예제 #1
0
    def get_embeddings(self, inst, th=1):
        G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None,
                                data=[('weight', float)])
        if self._embedding == 'deepwalk':
            model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
            model.train(window_size=5, iter=3)
        elif self._embedding == 'node2vec':
            model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1)  # init model
            model.train(window_size=5, iter=3)  # train model
        elif self._embedding == 'line':
            model = LINE(G, embedding_size=128, order='second')  # init model,order can be ['first','second','all']
            model.train(batch_size=1024, epochs=50, verbose=2)  # train model
        elif self._embedding == 'sdne':
            model = SDNE(G, hidden_size=[256, 128])  # init model
            model.train(batch_size=3000, epochs=40, verbose=2)  # train model
        elif self._embedding == 'struc2vec':
            model = Struc2Vec(G, 10, 80, workers=4, verbose=40, )  # init model
            model.train(window_size=5, iter=3)  # train model
        else:
            return self._normalise(inst)

        ebds = model.get_embeddings()
        coords = []
        for i in range(inst.n):
            coords.append(ebds[str(i)])
        return np.array(coords)
예제 #2
0
def embedding(args, datadict):
    if not os.path.exists(f'./embedding/{args.dataset}.edgelist'):
        nx.write_edgelist(datadict['g'],
                          f'./embedding/{args.dataset}.edgelist',
                          data=[('weight', int)])
    datadict['g'] = nx.read_edgelist(f'./embedding/{args.dataset}.edgelist',
                                     create_using=nx.DiGraph(),
                                     nodetype=None,
                                     data=[('weight', int)])

    if not os.path.exists(f'./embedding/{args.dataset}_{args.emb_method}.emb'):
        model = DeepWalk(datadict['g'], walk_length=5, num_walks=50, workers=8)
        model.train(window_size=10, iter=10)
        dict_embeddings = model.get_embeddings()
        embeddings = np.zeros(
            (datadict['labels'].shape[0], dict_embeddings['0'].shape[0]))
        print('Saving the embeddings......')
        for key in tqdm(dict_embeddings):
            embeddings[int(key)] = dict_embeddings[key]
        np.savetxt(f'./embedding/{args.dataset}_{args.emb_method}.emb',
                   embeddings)
        print(f'{embeddings.shape[1]}-dims Embeddings saved.')
    else:
        print('Loading the embeddings')
        embeddings = np.loadtxt(
            f'./embedding/{args.dataset}_{args.emb_method}.emb')
        print(f'{embeddings.shape[1]}-dims Embeddings load.')

    return embeddings
예제 #3
0
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('./data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model.train(window_size=5, iter=3)
    embeddings = model.get_embeddings()
    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
예제 #4
0
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('../data/wiki/test_small.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G, walk_length=4, num_walks=100, workers=30)
    model.train(window_size=50, iter=12)
    embeddings = model.get_embeddings()
    print(embeddings)
    f = open('../data/wiki/_test_vector.txt', 'w')  # 若是'wb'就表示写二进制文件
    j = 0
    for i in embeddings:
        f.write(i + " ")
        for q in embeddings[i]:
            f.write(str(q) + " ")
            # print(str(q))
        f.write('\n')
        j = j + 1
    f.close()
    # evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
예제 #5
0

if __name__ == "__main__":
    # G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
    #                      create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)])
    # nx.draw(G, node_size=10, font_size=10, font_color="blue", font_weight="bold")
    # plt.show()
    import pandas as pd
    df = pd.DataFrame()
    df['source'] = [str(i) for i in [0, 1, 2, 3, 4, 4, 6, 7, 7, 9]]
    df['target'] = [str(i) for i in [1, 4, 4, 4, 6, 7, 5, 8, 9, 8]]

    G = nx.from_pandas_edgelist(df, create_using=nx.Graph())

    model = DeepWalk(G, walk_length=50, num_walks=180, workers=1)
    model.train(window_size=10, iter=3, embed_size=2)
    # model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    # model.train(window_size=5, iter=3,embed_size=128)
    embeddings = model.get_embeddings()
    #print(embeddings)
    x, y = [], []
    print(sorted(embeddings.items(), key=lambda x: x[0]))
    for k, i in embeddings.items():
        x.append(i[0])
        y.append(i[1])
    plt.scatter(x, y)
    plt.show()

    # evaluate_embeddings(embeddings)
    # plot_embeddings(embeddings)
    return G


if __name__ == '__main__':
    train_df, vali_df, train_mat, vali_mat = get_network("ratings.csv")
    train_df.to_csv("network.txt",sep='@',mode='w',index=None,header=None,encoding='utf-16')
    appendMovie()
    G = buildGraph()
    #G = nx.read_edgelist("network.txt", encoding='utf-16',delimiter='@',
                        # create_using=nx.Graph(), nodetype=None, data=[('weight', float)])
    print("G done!")
    #model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model = DeepWalk(G, walk_length=100, num_walks=80, workers=1)
    print("begin to train")
    #model.train(window_size=5, iter=3)
    model.train(embed_size=200, window_size=30, iter=5)
    embeddings = model.get_embeddings()
    print("embeddings got, begin to evaluate")
    for N in range(5,21,5):
        print("top{}".format(N))
        precision, recall, precision_list, recall_list = evaluate(embeddings, train_mat, vali_mat, 6040, 3952,n=N)
        print("precision{}%".format(precision * 100))
        print("recall{}%".format(recall * 100))
        print("f1 score{}".format((2 * precision * recall) / (precision + recall)))
        microPrecision = np.average(precision_list)
        microRecall = np.average(recall_list)
        microF1 = (2 * microPrecision * microRecall) / (microPrecision + microRecall)
        print("microPrecision:{}%".format(microPrecision * 100))
        print("mircroRecall:{}%".format(microRecall * 100))
        np.savetxt("../results/precision_list_d_know_" + str(N) + ".txt", precision_list)
        np.savetxt("../results/recall_list_d_know_" + str(N) + ".txt", recall_list)
예제 #7
0
    model = TSNE(n_components=2)  #用TSNE进行降维
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])  #类别
        color_idx[Y[i][0]].append(i)  #id

    for c, idx in color_idx.items():  #不同类别不同颜色
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    #读入边列表,文件中的每一行有两个节点,表示连接这两个节点的边。
    #直接用networkx读入就行,很方便的操作。
    G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    #实例化模型,“句子”长度为10,80次游走。
    model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model.train(window_size=5, iter=3)  #训练模型,关于gensim w2v的参数都默认在train里面
    embeddings = model.get_embeddings()  #得到Embedding向量

    evaluate_embeddings(embeddings)  #应用节点分类来评估嵌入向量的质量
    plot_embeddings(embeddings)  #降成二维画在图中可视化
예제 #8
0
    ge_features.to_csv(
        f'../save/graph_embedding_{name}_{graph_name}_svd{svd_dim}.csv')


if __name__ == "__main__":

    G = nx.read_edgelist(graph_path,
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G,
                     walk_length=dw_walk_length,
                     num_walks=dw_num_walks,
                     workers=NUM_WORKER)  # init model
    model.train(window_size=dw_window_size, iter=dw_iter)  # train model
    embeddings = model.get_embeddings()  # get embedding vectors

    save_embedding(embeddings, name='dw')
    save_embedding_svd(embeddings, name='dw', svd_dim=svd_dim)

    model = model = Struc2Vec(G,
                              walk_length=s2v_walk_length,
                              num_walks=s2v_num_walks,
                              workers=NUM_WORKER,
                              verbose=40)  # init model
    model.train(window_size=s2v_window_size, iter=s2v_iter)  # train model
    embeddings = model.get_embeddings()  # get embedding vectors

    save_embedding(embeddings, name='s2v')
    save_embedding_svd(embeddings, name='s2v', svd_dim=svd_dim)
예제 #9
0
"""使用deepwalk模型获得节点向量"""
import sys

sys.path.append(r'D:\pythonplaces\citation-recommendation')

from ge import DeepWalk

import networkx as nx

if __name__ == "__main__":
    G = nx.read_edgelist('../data/aan/aan_normal_train.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])
    model = DeepWalk(G, walk_length=5, num_walks=10, workers=12)
    model.train(embed_size=128,
                window_size=5,
                iter=3,
                emb_filepath="../embs/aan_deepwalk_test.emb")
    embeddings = model.get_embeddings()