def embedding(args, datadict): if not os.path.exists(f'./embedding/{args.dataset}.edgelist'): nx.write_edgelist(datadict['g'], f'./embedding/{args.dataset}.edgelist', data=[('weight', int)]) datadict['g'] = nx.read_edgelist(f'./embedding/{args.dataset}.edgelist', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) if not os.path.exists(f'./embedding/{args.dataset}_{args.emb_method}.emb'): model = DeepWalk(datadict['g'], walk_length=5, num_walks=50, workers=8) model.train(window_size=10, iter=10) dict_embeddings = model.get_embeddings() embeddings = np.zeros( (datadict['labels'].shape[0], dict_embeddings['0'].shape[0])) print('Saving the embeddings......') for key in tqdm(dict_embeddings): embeddings[int(key)] = dict_embeddings[key] np.savetxt(f'./embedding/{args.dataset}_{args.emb_method}.emb', embeddings) print(f'{embeddings.shape[1]}-dims Embeddings saved.') else: print('Loading the embeddings') embeddings = np.loadtxt( f'./embedding/{args.dataset}_{args.emb_method}.emb') print(f'{embeddings.shape[1]}-dims Embeddings load.') return embeddings
def get_embeddings(self, inst, th=1): G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None, data=[('weight', float)]) if self._embedding == 'deepwalk': model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) elif self._embedding == 'node2vec': model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1) # init model model.train(window_size=5, iter=3) # train model elif self._embedding == 'line': model = LINE(G, embedding_size=128, order='second') # init model,order can be ['first','second','all'] model.train(batch_size=1024, epochs=50, verbose=2) # train model elif self._embedding == 'sdne': model = SDNE(G, hidden_size=[256, 128]) # init model model.train(batch_size=3000, epochs=40, verbose=2) # train model elif self._embedding == 'struc2vec': model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) # init model model.train(window_size=5, iter=3) # train model else: return self._normalise(inst) ebds = model.get_embeddings() coords = [] for i in range(inst.n): coords.append(ebds[str(i)]) return np.array(coords)
emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": G = nx.read_edgelist('./data/wiki/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) embeddings = model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
model = TSNE(n_components=2) #用TSNE进行降维 node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) #类别 color_idx[Y[i][0]].append(i) #id for c, idx in color_idx.items(): #不同类别不同颜色 plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": #读入边列表,文件中的每一行有两个节点,表示连接这两个节点的边。 #直接用networkx读入就行,很方便的操作。 G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) #实例化模型,“句子”长度为10,80次游走。 model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) #训练模型,关于gensim w2v的参数都默认在train里面 embeddings = model.get_embeddings() #得到Embedding向量 evaluate_embeddings(embeddings) #应用节点分类来评估嵌入向量的质量 plot_embeddings(embeddings) #降成二维画在图中可视化
f'../save/graph_embedding_{name}_{graph_name}_svd{svd_dim}.csv') if __name__ == "__main__": G = nx.read_edgelist(graph_path, create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=dw_walk_length, num_walks=dw_num_walks, workers=NUM_WORKER) # init model model.train(window_size=dw_window_size, iter=dw_iter) # train model embeddings = model.get_embeddings() # get embedding vectors save_embedding(embeddings, name='dw') save_embedding_svd(embeddings, name='dw', svd_dim=svd_dim) model = model = Struc2Vec(G, walk_length=s2v_walk_length, num_walks=s2v_num_walks, workers=NUM_WORKER, verbose=40) # init model model.train(window_size=s2v_window_size, iter=s2v_iter) # train model embeddings = model.get_embeddings() # get embedding vectors save_embedding(embeddings, name='s2v') save_embedding_svd(embeddings, name='s2v', svd_dim=svd_dim)