Ejemplo n.º 1
0
def plot_embeddings(embeddings, ):
    """
    将实体的向量降维到2维,然后显示出来
    :param embeddings:
    :return:
    """
    X, Y = read_node_label(
        '/Users/admin/Desktop/GraphEmbedding-deeplearning/data/XunYiWenYao/寻医问药category.txt'
    )
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()
Ejemplo n.º 2
0
def plot(embeddings):
    emb_list = []
    for i in range(1, len(embeddings) + 1):
        # print(i)
        emb_list.append(embeddings[str(i)])

    emb_list = np.array(emb_list)
    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)
    # print(node_pos)

    X, Y = read_node_label('../data/Net300/community.dat', is_net=True)

    # colors = ['c', 'b', 'g', 'r', 'm', 'y', 'k']
    colors = ['r', 'b', 'g', 'y']
    #
    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    # plt.figure(dpi=300, figsize=(24, 12))
    for c, idx in color_idx.items():
        # print(type(idx))
        # print(c,idx)
        # print(node_pos[idx, 0],node_pos[idx, 1],node_pos[idx,2])
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
        # ax.scatter(node_pos[idx, 0], node_pos[idx, 1], node_pos[idx, 2], label=c)
    plt.legend()  # 图例
    plt.show()
Ejemplo n.º 3
0
def plot_embeddings(embeddings, ):

    X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',
                           skip_head=True)

    emb_list = []

    for k in X:

        emb_list.append(embeddings[k])

    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)

    node_pos = model.fit_transform(emb_list)

    color_idx = {}

    for i in range(len(X)):

        color_idx.setdefault(Y[i][0], [])

        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():

        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)

    plt.legend()

    plt.show()
Ejemplo n.º 4
0
def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/flight/labels-brazil-airports.txt',
                           skip_head=True)
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 5
0
def evaluate_embeddings(embeddings):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(
        tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 6
0
def plot_embeddings_3D(embeddings,):
    """将嵌入降维到3维并可视化

    :param embeddings:
    """
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    # print(Y)
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=3)
    node_pos = model.fit_transform(emb_list)  # 返回矩阵
    # print(node_pos)


    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
    # print(color_idx)
    # 生产标签_节点字典
    fig = plt.figure()
    ax = Axes3D(fig)
    for c, idx in color_idx.items():
        # print(c,idx)
        # print(node_pos[idx, 0],node_pos[idx, 1],node_pos[idx,2])
        ax.scatter(node_pos[idx, 0], node_pos[idx, 1], node_pos[idx, 2], label=c)
    plt.legend()  # 图例
    plt.show()
Ejemplo n.º 7
0
def plot_embeddings(embeddings,):
    X, Y = read_node_label('../data/wiki/karate.txt')
    emb_list = []
    embeddings = field_to_json(embeddings)
    key = embeddings.keys()

    for k in key:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    # model = TSNE(n_components=2)
    # node_pos = model.fit_transform(emb_list)

    # color_idx = {}
    # for i in range(len(X)):
    #     color_idx.setdefault(Y[i][0], [])
    #     color_idx[Y[i][0]].append(i)
    #
    # for c, idx in color_idx.items():
    #     plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    for i in range(len(key)):
        plt.scatter(emb_list[i, 0], emb_list[i, 1], alpha=0.5, s=150, color='b')
        plt.text(emb_list[i, 0], emb_list[i, 1], list(key)[i], horizontalalignment='center', verticalalignment='center')
        # for j in range(len(X)):
        #     if X[j] == str(i+1):
        #         x = int(X[j])
        #         y = int(Y[j][0])
        #         plt.plot(node_pos[int(list(key)[x-1])-1], node_pos[int(list(key)[y-1])-1], color='r')
    plt.legend()
    plt.show()
Ejemplo n.º 8
0
def evaluate_embeddings(embeddings):
    #读入真实的分类label
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    tr_frac = 0.8  #80%的节点用于训练分类器,其余的用于测试
    print("Training classifier using {:.2f}% nodes...".format(tr_frac * 100))
    #应用分类器对节点进行分类以评估向量的质量
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 9
0
def plot_embeddings(embeddings,):
    X, Y = read_node_label('./data/miserables/miserables_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    color_dict = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
        color_dict[X[i]] = COLOR_MAP[int(Y[i][0])]


    plt.figure(figsize=(10, 10))
    ax = plt.axes([0.05, 0.05, 1 - 0.05 * 2, 1 - 0.05 * 2])
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)

    pos = {}
    for i, x in enumerate(X):
        pos[x] = node_pos[i]
    color_list = []
    cluster_color_list = []
    for node in G.nodes:
        color_list.append(color_dict[node])
    nx.draw_networkx_nodes(G, pos, node_size=80, node_color=color_list, edgecolors='white', linewidths=0.7)
    #for c, idx in color_idx.items():

        #plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)  # c=node_colors)

    # plt.legend()
    t = datetime.datetime.now()
    plt.savefig("./pics/miserables/{}-{}-{}/dw-{}-{}-{}-emb-real.png".format(
        t.year, t.month, t.day,
        t.hour, t.minute, int(t.second)
    ))
    plt.cla()
    cluster = kmeans(embeddings, K=8)
    cluster_color = []
    for node in G.nodes:
        cluster_color.append(COLOR_MAP[cluster[node]])
    nx.draw_networkx_nodes(G, pos, node_size=80, node_color=cluster_color, edgecolors='white', linewidths=0.7)
    plt.savefig("./pics/miserables/{}-{}-{}/dw-{}-{}-{}-emb-cluster.png".format(
        t.year, t.month, t.day,
        t.hour, t.minute, int(t.second)
    ))
    plt.cla()
    return color_dict, cluster
Ejemplo n.º 10
0
def evaluate_embeddings(embeddings):

    X, Y = read_node_label(
        '../data/ETH/Phishing node classification/label.txt', skip_head=True)

    tr_frac = 0.8

    print("Training classifier using {:.2f}% nodes...".format(tr_frac * 100))

    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())

    clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 11
0
def evaluate_embeddings(embeddings):
    """分割训练评估,输出性能参数

    :param embeddings:
    """
    # X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    # X, Y = read_node_label('../data/flight/labels-brazil-airports.txt', True)
    # X, Y = read_node_label('../data/flight/labels-europe-airports.txt', True)
    X, Y = read_node_label('../data/flight/labels-usa-airports.txt', True)
    tr_frac = 0.8  # 交叉验证百分比
    print("Training classifier using {:.2%} nodes...".format(tr_frac))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    return clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 12
0
def evaluate_embeddings(embeddings):
    """
    一个分类器函数,用来评价向量好坏,因为每个实体有对应的标签,通过向量实现多分类
    :param embeddings:
    :return:
    """
    X, Y = read_node_label(
        '/Users/admin/Desktop/GraphEmbedding-deeplearning/data/XunYiWenYao/寻医问药category.txt'
    )
    tr_frac = 0.8
    print("Training classifier using {:.2f}% nodes...".format(tr_frac * 100))
    clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    clf.split_train_evaluate(X, Y, tr_frac)
Ejemplo n.º 13
0
def NMI_Q(embeddings, num_coms):
    emb_list = []
    for i in range(1, len(embeddings) + 1):
        emb_list.append(embeddings[str(i)])
    # edgelist index start at 1  / emb_list index start at 0

    predict = kmeans_from_vec(emb_list, num_coms)  # index start by 0

    for i in range(len(predict)):
        predict[i] = [str(x) for x in predict[i]]
    # 数据处理

    X, Y = read_node_label('../data/Net_mu/Net0.8/community.dat', is_net=True)

    comu_idx = {}
    for i in range(len(X)):
        comu_idx.setdefault(Y[i][0], [])
        comu_idx[Y[i][0]].append(i)

    # print(comu_idx)

    real = []
    # print(real)

    for key in comu_idx:
        real.append(comu_idx[key])
    for i in range(len(real)):
        real[i] = [str(x) for x in real[i]]

    # print(predict)
    # print(real)

    mni = NMI(predict, real)
    # print(mni)
    #
    predict_ = predict
    for i in range(len(predict)):
        predict_[i] = [str(int(x) + 1) for x in predict[i]]
    # predict index add 1
    #
    #
    q = Q(predict_, G)
    # print(q)
    #
    return mni, q
Ejemplo n.º 14
0
def plot_embeddings(embeddings, ):
    X, Y = read_node_label('../data/wiki/karate.txt', skip_head=True)
    emb_list = []

    for k in X:

        emb_list.append(embeddings[k])

    emb_list = np.array(emb_list)
    # model = TSNE(n_components=2)
    # node_pos = model.fit_transform(emb_list)
    # color_idx = {}
    # for i in range(len(X)):
    #     color_idx.setdefault(Y[i][0], [])
    #     color_idx[Y[i][0]].append(i)
    for idx in range(len(X)):
        plt.scatter(emb_list[idx, 0], emb_list[idx, 1])
    plt.legend()
    plt.show()
Ejemplo n.º 15
0
def evaluate_embeddings(embeddings):

    X, Y = read_node_label('../data/flight/igraph2Label.txt', skip_head=True)

    # tr_frac = 0.8
    #
    # print("Training classifier using {:.2f}% nodes...".format(
    #
    #     tr_frac * 100))
    #
    # # clf = Classifier(embeddings=embeddings, clf=LogisticRegression())
    # #
    # # clf.split_train_evaluate(X, Y, tr_frac)
    # X_ = numpy.asarray([embeddings[x] for x in X])
    # print("kexinxin")
    X = numpy.asarray([embeddings[x] for x in X])
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4)
    clf = linear_model.LogisticRegression(solver='liblinear')
    # clf = RandomForestClassifier()
    # clf=tree.DecisionTreeClassifier(criterion='entropy')
    #clf = KNeighborsClassifier()

    clf.fit(x_train, y_train)

    y_pred = clf.predict(x_test)
    # count=0
    # # for i in range(len(result)):
    # #     if result[i]==y_test[i]:
    # #         count=count+1
    # # print(count/len(result))
    y_test = np.array(y_test)
    y_test = y_test.astype(np.int16)
    y_pred = y_pred.astype(np.int16)
    #y_pred.reshape(-1,1)
    #y_test.reshape(-1,1)
    #y_pred=y_pred.tolist()

    #arr=np.array([1,2,3,4])
    print("acc", accuracy_score(y_test, y_pred))
    #print("precision", precision_score(y_test, y_pred))
    print("recall", recall_score(y_test, y_pred))
    print("f1", f1_score(y_test, y_pred))
Ejemplo n.º 16
0
def plot_embeddings(embeddings, ):
    X, Y = read_node_label('../data/wiki/friend_labels_baseline.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()
Ejemplo n.º 17
0
def plot_embeddings(embeddings,):
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')

    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
    # 分辨率参数-dpi,画布大小参数-figsize
    plt.figure(dpi=300, figsize=(24, 12))
    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()
def plot(embeddings):
    emb_list = []
    for i in range(1, len(embeddings) + 1):
        emb_list.append(embeddings[str(i)])
    # 将嵌入向量字典转换成列表
    emb_list = np.array(emb_list)
    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)
    # 通过tsne转换成二维坐标
    X, Y = read_node_label('../data/Net300/community.dat', is_net=True)
    # ['1', '2', '3', '4'],[['4'], ['2'], ['4'], ['1']]
    commu_idx = {}
    for i in range(len(X)):
        commu_idx.setdefault(Y[i][0], [])
        commu_idx[Y[i][0]].append(i)

    for c, idx in commu_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)

    plt.legend()
    plt.show()
Ejemplo n.º 19
0
def plot_embeddings(embeddings,):
    """将嵌入降维到2维并可视化

    :param embeddings:
    """
    X, Y = read_node_label('../data/wiki/wiki_labels.txt')
    # X, Y = read_node_label('../data/flight/labels-brazil-airports.txt', True)
    # X, Y = read_node_label('../data/flight/labels-europe-airports.txt', True)
    # X, Y = read_node_label('../data/flight/labels-usa-airports.txt', True)

    # print(Y)
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    # model = TSNE(n_components=3)
    node_pos = model.fit_transform(emb_list)  # 返回矩阵
    # print(node_pos)


    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)
    # print(color_idx)
    # 生产标签_节点字典
    # fig = plt.figure()
    # ax = Axes3D(fig)
    # 分辨率参数-dpi,画布大小参数-figsize
    plt.figure(dpi=300, figsize=(24, 12))
    for c, idx in color_idx.items():
        # print(type(idx))
        # print(c,idx)
        # print(node_pos[idx, 0],node_pos[idx, 1],node_pos[idx,2])
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
        # ax.scatter(node_pos[idx, 0], node_pos[idx, 1], node_pos[idx, 2], label=c)
    # plt.legend()  # 图例
    plt.show()
Ejemplo n.º 20
0
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1],
                    label=c)  # c=node_colors)
    plt.legend()
    plt.show()


d_set = [
    "Facebook-Page2Page", "PubMed-Diabetes", "Terrorists-Relation"
]  # ["Cora", "CiteSeer", "Facebook-Page2Page", "PubMed-Diabetes", "Terrorists-Relation", "Zachary-Karate", "Internet-Industry-Partnerships"]  # [sparse, dense]
mdl = ["Node2Vec", "SDNE", "DeepWalk",
       "LINE"]  # ["Node2Vec", "SDNE", "Struc2Vec", "DeepWalk", "LINE"]

for i in range(len(d_set)):
    # Load/Prepare data
    graph_fname = "data/" + d_set[i] + "/" + d_set[i]
    X, Y = read_node_label(graph_fname + ".labels", skip_head=True)
    X = np.asarray(X)
    Y = np.asarray(Y)

    # Preserve ratio/percentage of samples per class using efficent data-splitting && data-resampling strageies
    train_frac = 0.8
    test_frac = round((1 - train_frac), 1)
    print("Training classifier using {:.2f}% nodes...".format(train_frac *
                                                              100))
    if not os.path.isfile(graph_fname + "_strat_train_test.splits"):
        stratified_data = StratifiedShuffleSplit(n_splits=1,
                                                 test_size=test_frac,
                                                 train_size=train_frac,
                                                 random_state=42)
        for train_index, test_index in stratified_data.split(X, Y):
            strat_X_train, strat_y_train = X[train_index], Y[train_index]