Esempio n. 1
0
def embedding(args, datadict):
    if not os.path.exists(f'./embedding/{args.dataset}.edgelist'):
        nx.write_edgelist(datadict['g'],
                          f'./embedding/{args.dataset}.edgelist',
                          data=[('weight', int)])
    datadict['g'] = nx.read_edgelist(f'./embedding/{args.dataset}.edgelist',
                                     create_using=nx.DiGraph(),
                                     nodetype=None,
                                     data=[('weight', int)])

    if not os.path.exists(f'./embedding/{args.dataset}_{args.emb_method}.emb'):
        model = DeepWalk(datadict['g'], walk_length=5, num_walks=50, workers=8)
        model.train(window_size=10, iter=10)
        dict_embeddings = model.get_embeddings()
        embeddings = np.zeros(
            (datadict['labels'].shape[0], dict_embeddings['0'].shape[0]))
        print('Saving the embeddings......')
        for key in tqdm(dict_embeddings):
            embeddings[int(key)] = dict_embeddings[key]
        np.savetxt(f'./embedding/{args.dataset}_{args.emb_method}.emb',
                   embeddings)
        print(f'{embeddings.shape[1]}-dims Embeddings saved.')
    else:
        print('Loading the embeddings')
        embeddings = np.loadtxt(
            f'./embedding/{args.dataset}_{args.emb_method}.emb')
        print(f'{embeddings.shape[1]}-dims Embeddings load.')

    return embeddings
Esempio n. 2
0
    def get_embeddings(self, inst, th=1):
        G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None,
                                data=[('weight', float)])
        if self._embedding == 'deepwalk':
            model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
            model.train(window_size=5, iter=3)
        elif self._embedding == 'node2vec':
            model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1)  # init model
            model.train(window_size=5, iter=3)  # train model
        elif self._embedding == 'line':
            model = LINE(G, embedding_size=128, order='second')  # init model,order can be ['first','second','all']
            model.train(batch_size=1024, epochs=50, verbose=2)  # train model
        elif self._embedding == 'sdne':
            model = SDNE(G, hidden_size=[256, 128])  # init model
            model.train(batch_size=3000, epochs=40, verbose=2)  # train model
        elif self._embedding == 'struc2vec':
            model = Struc2Vec(G, 10, 80, workers=4, verbose=40, )  # init model
            model.train(window_size=5, iter=3)  # train model
        else:
            return self._normalise(inst)

        ebds = model.get_embeddings()
        coords = []
        for i in range(inst.n):
            coords.append(ebds[str(i)])
        return np.array(coords)
Esempio n. 3
0
    emb_list = []
    for k in X:
        emb_list.append(embeddings[k])
    emb_list = np.array(emb_list)

    model = TSNE(n_components=2)
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])
        color_idx[Y[i][0]].append(i)

    for c, idx in color_idx.items():
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    G = nx.read_edgelist('./data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model.train(window_size=5, iter=3)
    embeddings = model.get_embeddings()
    evaluate_embeddings(embeddings)
    plot_embeddings(embeddings)
    model = TSNE(n_components=2)  #用TSNE进行降维
    node_pos = model.fit_transform(emb_list)

    color_idx = {}
    for i in range(len(X)):
        color_idx.setdefault(Y[i][0], [])  #类别
        color_idx[Y[i][0]].append(i)  #id

    for c, idx in color_idx.items():  #不同类别不同颜色
        plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c)
    plt.legend()
    plt.show()


if __name__ == "__main__":
    #读入边列表,文件中的每一行有两个节点,表示连接这两个节点的边。
    #直接用networkx读入就行,很方便的操作。
    G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt',
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    #实例化模型,“句子”长度为10,80次游走。
    model = DeepWalk(G, walk_length=10, num_walks=80, workers=1)
    model.train(window_size=5, iter=3)  #训练模型,关于gensim w2v的参数都默认在train里面
    embeddings = model.get_embeddings()  #得到Embedding向量

    evaluate_embeddings(embeddings)  #应用节点分类来评估嵌入向量的质量
    plot_embeddings(embeddings)  #降成二维画在图中可视化
Esempio n. 5
0
        f'../save/graph_embedding_{name}_{graph_name}_svd{svd_dim}.csv')


if __name__ == "__main__":

    G = nx.read_edgelist(graph_path,
                         create_using=nx.DiGraph(),
                         nodetype=None,
                         data=[('weight', int)])

    model = DeepWalk(G,
                     walk_length=dw_walk_length,
                     num_walks=dw_num_walks,
                     workers=NUM_WORKER)  # init model
    model.train(window_size=dw_window_size, iter=dw_iter)  # train model
    embeddings = model.get_embeddings()  # get embedding vectors

    save_embedding(embeddings, name='dw')
    save_embedding_svd(embeddings, name='dw', svd_dim=svd_dim)

    model = model = Struc2Vec(G,
                              walk_length=s2v_walk_length,
                              num_walks=s2v_num_walks,
                              workers=NUM_WORKER,
                              verbose=40)  # init model
    model.train(window_size=s2v_window_size, iter=s2v_iter)  # train model
    embeddings = model.get_embeddings()  # get embedding vectors

    save_embedding(embeddings, name='s2v')
    save_embedding_svd(embeddings, name='s2v', svd_dim=svd_dim)