def evaluate_embeddings(embeddings): # data_path = join_path(dirname(dirname(__file__)),'data/wiki/wiki_labels.txt') data_path = join_path(dirname(dirname(__file__)), 'data/bello_kg/graph_labels_last_version.txt') data_path = join_path(dirname(dirname(__file__)), 'data/bello_kg/graph_labels_v1.1.txt') X, Y = read_node_label(data_path) tr_frac = 0.8 print("Training classifier using {:.2f}% nodes...".format( tr_frac * 100)) clf = Classifier(embeddings=embeddings, clf=LogisticRegression()) clf.split_train_evaluate(X, Y, tr_frac)
def plot_embeddings(embeddings, ): data_path = join_path(dirname(dirname(__file__)), 'data/bello_kg/graph_labels_last_version.txt') X, Y = read_node_label(data_path, skip_head=True) emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) # c=node_colors) plt.legend() plt.show()