###################### # model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl')) # Get vectors # print(model.ent_to_idx.items()) # print(target_entities.get_entities()[:30]) missing = list( filter(lambda e: e not in model.ent_to_idx, target_entities.get_entities())) exist = list( filter(lambda e: e in model.ent_to_idx, target_entities.get_entities())) print('missing: ', len(missing), '/', len(target_entities.get_entities())) print(exist) print(missing[:5]) target_entities_embedding_vectors = model.get_embeddings( target_entities.get_entities()) # cluster with whatever methods km = KMeans(n_clusters=number_of_clusters, n_init=20, n_jobs=8) y_pred = km.fit_predict(target_entities_embedding_vectors) # To make the results in triples format clustering_results_as_triples = EntityLabelsToTriples( np.column_stack((target_entities.get_entities(), y_pred))) # to save clustering results as triples write_triples(clustering_results_as_triples, os.path.join(experiment_dir, 'clustering.tsv')) # evaluate clustering using normal measures and add them to methods results current_method_result.update(
predict(w1, w2) w1 = 'light' w2 = 'shadow' predict(w1, w2) w1 = 'street' w2 = 'rue' predict(w1, w2) from sklearn.decomposition import PCA from sklearn.manifold import TSNE print("Extracting Embeddings..") embedding_map = dict([(str(a), (model.get_embeddings(str(tok2id[str(a)])), tok2id[str(a)])) for a in alle if str(a) in tok2id]) embedding_map2 = dict([(str(a), (model2.get_embeddings(str(tok2id[str(a)])), tok2id[str(a)])) for a in alle if str(a) in tok2id]) embeddings_array = np.array([i[0] for i in embedding_map.values()]) print("PCA") embeddings_3d_pca = PCA(n_components=3).fit_transform(embeddings_array) print("TSNE") embeddings_3d_tsne = TSNE(n_components=3).fit_transform(embeddings_array) print("k2") embeddings_k2 = np.array([i[0] for i in embedding_map2.values()]) print(embeddings_3d_pca.shape)
from sklearn.decomposition import PCA import matplotlib.pyplot as plt import seaborn as sns from adjustText import adjust_text from incf.countryutils import transformations print("Extracting Embeddings..") id_to_name_map = { **dict(zip(df.home_team_id, df.home_team)), **dict(zip(df.away_team_id, df.away_team)) } teams = pd.concat( (df.home_team_id[df["train"]], df.away_team_id[df["train"]])).unique() team_embeddings = dict(zip(teams, model.get_embeddings(teams))) embeddings_2d = PCA(n_components=2).fit_transform( np.array([i for i in team_embeddings.values()])) print(embeddings_2d) first_embeddings = list(team_embeddings.values())[0] print(first_embeddings) print(first_embeddings.shape) print(embeddings_2d.shape) from ampligraph.discovery import find_clusters from sklearn.cluster import KMeans print("Clustering..") clustering_algorithm = KMeans(n_clusters=6,