hits_10 = hits_at_n_score(ranks, n=10)
print("Hits@10: %.2f" % (hits_10))
hits_3 = hits_at_n_score(ranks, n=3)
print("Hits@3: %.2f" % (hits_3))
hits_1 = hits_at_n_score(ranks, n=1)
print("Hits@1: %.2f" % (hits_1))

data = pd.read_csv('triplet.csv')
data.drop(data[data['name'] == 'no pc_item'].index, inplace=True)
data.drop(data[data['prop'] == 'no price'].index, inplace=True)
print(data.head())

import itertools
pcItem = data['name'].unique()
pcItem_embeddings = dict(zip(pcItem,model.get_embeddings(pcItem)))

ke = []
val = []
for k,v in pcItem_embeddings.items():
  ke.append(k)
  val.append(v)
embed_df = pd.DataFrame({'name':ke,'embed':val})


price_df = pd.read_csv('item_price.csv')
price_df.drop(price_df[price_df['item_name'] == 'no pc_item'].index, inplace=True)
price_df.drop(price_df[price_df['price'] == 'no price'].index, inplace=True)
price_df['embed'] = price_df['item_name'].apply(lambda x: pcItem_embeddings[x])

price_df1 = pd.DataFrame(price_df.embed.values.tolist()).add_prefix('embed_')
예제 #2
0
        train_y, dtype=np.int32), to_categorical(
            test_y, dtype=np.int32), positives_filter.to_numpy(dtype=np.int32)
    print(
        "Shape of train_y: %s;  Shape of test_y: %s;  Shape of positives_filter: %s"
        % (train_y.shape, test_y.shape, positives_filter.shape))

    # Feature Scaling: Normalize dataset via Generation of Embeddings
    print("\nFeature Scaling: Embeddings Generation")
    embed_dim = 100
    embeds_model = ComplEx(k=embed_dim, verbose=True)
    tf.compat.v1.logging.set_verbosity(
        tf.compat.v1.logging.ERROR
    )  # TensorFlow will tell you all messages that have the label ERROR
    embeds_model.fit(positives_filter)

    embeds_source = embeds_model.get_embeddings(positives_filter[:, 0],
                                                embedding_type='entity')
    embeds_dest = embeds_model.get_embeddings(positives_filter[:, 2],
                                              embedding_type='entity')
    embeds = np.concatenate((embeds_source, embeds_dest), axis=1)

    train_sz = train_X_temp.shape[0]
    train_X, test_X = embeds[:train_sz, :], embeds[train_sz:, :]
    train_X = train_X.reshape(
        train_X.shape[0], 4, embed_dim
    )  # (samples, n_timesteps, feat_per_timestep) # n_timesteps=4 -> embeds_source(:, 2) & embeds_dest(:, 2)
    test_X = test_X.reshape(
        test_X.shape[0], 4, embed_dim
    )  # (samples, n_timesteps, feat_per_timestep) # n_timesteps=4 -> embeds_source(:, 2) & embeds_dest(:, 2)
    print("Shape of train_X: %s;  Shape of train_y: %s" %
          (train_X.shape, train_y.shape))
    print("Shape of test_X: %s;  Shape of test_y: %s" %
예제 #3
0
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from adjustText import adjust_text
from incf.countryutils import transformations

print("Extracting Embeddings..")

id_to_name_map = {
    **dict(zip(df.home_team_id, df.home_team)),
    **dict(zip(df.away_team_id, df.away_team))
}

teams = pd.concat(
    (df.home_team_id[df["train"]], df.away_team_id[df["train"]])).unique()
team_embeddings = dict(zip(teams, model.get_embeddings(teams)))

embeddings_2d = PCA(n_components=2).fit_transform(
    np.array([i for i in team_embeddings.values()]))

print(embeddings_2d)
first_embeddings = list(team_embeddings.values())[0]
print(first_embeddings)
print(first_embeddings.shape)
print(embeddings_2d.shape)
from ampligraph.discovery import find_clusters
from sklearn.cluster import KMeans

print("Clustering..")

clustering_algorithm = KMeans(n_clusters=6,
예제 #4
0
                optimizer="adam",
                optimizer_params={"lr": 0.01})

model.fit(X['train'])

y_pred = model.predict(X['test'][:5, ])

from scipy.special import expit

print(expit(y_pred))

ranks = evaluate_performance(X['test'][:10], model=model)
print(ranks)

mrr = mrr_score(ranks)
hits_10 = hits_at_n_score(ranks, n=10)
print("MRR: %f, Hits@10: %f" % (mrr, hits_10))

import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

embs = model.get_embeddings(embs_labels, type='entity')
embs_2d = TSNE(n_components=2).fit_transform(embs)

fig, ax = plt.subplots()
ax.scatter(embs_2d[:, 0], embs_2d[:, 1])
for i, lab in enumerate(embs_labels):
    ax.annotate(lab, (embs_2d[i, 0], embs_2d[i, 1]))

plt.show(fig)
    model2 = restore_model(model_name_path=ke_model_path + '2')
    with open(ke_wnkeys_path, 'rb') as handle:
        tok2id, id2tok = pickle.load(handle)


def find_in_tok2id(w):
    for s in tok2id.keys():
        if w in s:
            print(w, s, "it is alphabetically there")


tok2id = OrderedDict(tok2id)

print("Extracting Embeddings..")
alle = table['n1'].tolist() + table['n2'].tolist()
embedding_map = dict([(str(a), (model.get_embeddings(str(tok2id[str(a)])),
                                tok2id[str(a)])) for a in alle
                      if str(a) in tok2id])
embedding_map2 = dict([(str(a), (model2.get_embeddings(str(tok2id[str(a)])),
                                 tok2id[str(a)])) for a in alle
                       if str(a) in tok2id])

embeddings_array = np.array([i[0] for i in embedding_map.values()])
print("PCA")
embeddings_3d_pca = PCA(n_components=3).fit_transform(embeddings_array)
print("TSNE")
embeddings_3d_tsne = TSNE(n_components=3).fit_transform(embeddings_array)
print("k2")
embeddings_k2 = np.array([i[0] for i in embedding_map2.values()])

# Check if second dimension is 3