コード例 #1
0
######################
# model=restore_model(os.path.join('/scratch/GW/pool0/gadelrab/multicut/output', 'yago_transE.pkl'))

# Get vectors
# print(model.ent_to_idx.items())
# print(target_entities.get_entities()[:30])
missing = list(
    filter(lambda e: e not in model.ent_to_idx,
           target_entities.get_entities()))
exist = list(
    filter(lambda e: e in model.ent_to_idx, target_entities.get_entities()))
print('missing:   ', len(missing), '/', len(target_entities.get_entities()))
print(exist)
print(missing[:5])

target_entities_embedding_vectors = model.get_embeddings(
    target_entities.get_entities())

# cluster with whatever methods
km = KMeans(n_clusters=number_of_clusters, n_init=20, n_jobs=8)
y_pred = km.fit_predict(target_entities_embedding_vectors)

# To make the results in triples format
clustering_results_as_triples = EntityLabelsToTriples(
    np.column_stack((target_entities.get_entities(), y_pred)))

# to save clustering results as triples
write_triples(clustering_results_as_triples,
              os.path.join(experiment_dir, 'clustering.tsv'))

# evaluate clustering using normal measures and add them to methods results
current_method_result.update(
コード例 #2
0
predict(w1, w2)

w1 = 'light'
w2 = 'shadow'
predict(w1, w2)

w1 = 'street'
w2 = 'rue'
predict(w1, w2)

from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

print("Extracting Embeddings..")

embedding_map = dict([(str(a), (model.get_embeddings(str(tok2id[str(a)])),
                                tok2id[str(a)])) for a in alle
                      if str(a) in tok2id])
embedding_map2 = dict([(str(a), (model2.get_embeddings(str(tok2id[str(a)])),
                                 tok2id[str(a)])) for a in alle
                       if str(a) in tok2id])

embeddings_array = np.array([i[0] for i in embedding_map.values()])
print("PCA")
embeddings_3d_pca = PCA(n_components=3).fit_transform(embeddings_array)
print("TSNE")
embeddings_3d_tsne = TSNE(n_components=3).fit_transform(embeddings_array)
print("k2")
embeddings_k2 = np.array([i[0] for i in embedding_map2.values()])

print(embeddings_3d_pca.shape)
コード例 #3
0
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from incf.countryutils import transformations

print("Extracting Embeddings..")

id_to_name_map = {
    **dict(zip(df.home_team_id, df.home_team)),
    **dict(zip(df.away_team_id, df.away_team))
}

teams = pd.concat(
    (df.home_team_id[df["train"]], df.away_team_id[df["train"]])).unique()
team_embeddings = dict(zip(teams, model.get_embeddings(teams)))

embeddings_2d = PCA(n_components=2).fit_transform(
    np.array([i for i in team_embeddings.values()]))

print(embeddings_2d)
first_embeddings = list(team_embeddings.values())[0]
print(first_embeddings)
print(first_embeddings.shape)
print(embeddings_2d.shape)
from ampligraph.discovery import find_clusters
from sklearn.cluster import KMeans

print("Clustering..")

clustering_algorithm = KMeans(n_clusters=6,