예제 #1
0
def evaluate_embeddings(embeddings):
    # data_path = join_path(dirname(dirname(__file__)),'data/wiki/wiki_labels.txt')
    data_path = join_path(dirname(dirname(__file__)), 'data/bello_kg/graph_labels_last_version.txt')
    data_path = join_path(dirname(dirname(__file__)), 'data/bello_kg/graph_labels_v1.1.txt')
    X, Y = read_node_label(data_path)
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
예제 #2
0
def plot_embeddings(embeddings, ):
    data_path = join_path(dirname(dirname(__file__)),
                          'data/bello_kg/graph_labels_last_version.txt')
    X, Y = read_node_label(data_path, skip_head=True)
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)
    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)
    color_idx = {}

    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)
    plt.legend()
    plt.show()