def get_embeddings(self, inst, th=1): G = nx.parse_edgelist(self._compose_edge_list(inst.dist_mat, th), create_using=nx.DiGraph(), nodetype=None, data=[('weight', float)]) if self._embedding == 'deepwalk': model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) elif self._embedding == 'node2vec': model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1) # init model model.train(window_size=5, iter=3) # train model elif self._embedding == 'line': model = LINE(G, embedding_size=128, order='second') # init model,order can be ['first','second','all'] model.train(batch_size=1024, epochs=50, verbose=2) # train model elif self._embedding == 'sdne': model = SDNE(G, hidden_size=[256, 128]) # init model model.train(batch_size=3000, epochs=40, verbose=2) # train model elif self._embedding == 'struc2vec': model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) # init model model.train(window_size=5, iter=3) # train model else: return self._normalise(inst) ebds = model.get_embeddings() coords = [] for i in range(inst.n): coords.append(ebds[str(i)]) return np.array(coords)
def embedding(args, datadict): if not os.path.exists(f'./embedding/{args.dataset}.edgelist'): nx.write_edgelist(datadict['g'], f'./embedding/{args.dataset}.edgelist', data=[('weight', int)]) datadict['g'] = nx.read_edgelist(f'./embedding/{args.dataset}.edgelist', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) if not os.path.exists(f'./embedding/{args.dataset}_{args.emb_method}.emb'): model = DeepWalk(datadict['g'], walk_length=5, num_walks=50, workers=8) model.train(window_size=10, iter=10) dict_embeddings = model.get_embeddings() embeddings = np.zeros( (datadict['labels'].shape[0], dict_embeddings['0'].shape[0])) print('Saving the embeddings......') for key in tqdm(dict_embeddings): embeddings[int(key)] = dict_embeddings[key] np.savetxt(f'./embedding/{args.dataset}_{args.emb_method}.emb', embeddings) print(f'{embeddings.shape[1]}-dims Embeddings saved.') else: print('Loading the embeddings') embeddings = np.loadtxt( f'./embedding/{args.dataset}_{args.emb_method}.emb') print(f'{embeddings.shape[1]}-dims Embeddings load.') return embeddings
emb_list = [] for k in X: emb_list.append(embeddings[k]) emb_list = np.array(emb_list) model = TSNE(n_components=2) node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": G = nx.read_edgelist('./data/wiki/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) embeddings = model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": G = nx.read_edgelist('../data/wiki/test_small.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=4, num_walks=100, workers=30) model.train(window_size=50, iter=12) embeddings = model.get_embeddings() print(embeddings) f = open('../data/wiki/_test_vector.txt', 'w') # 若是'wb'就表示写二进制文件 j = 0 for i in embeddings: f.write(i + " ") for q in embeddings[i]: f.write(str(q) + " ") # print(str(q)) f.write('\n') j = j + 1 f.close() # evaluate_embeddings(embeddings) plot_embeddings(embeddings)
if __name__ == "__main__": # G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', # create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) # nx.draw(G, node_size=10, font_size=10, font_color="blue", font_weight="bold") # plt.show() import pandas as pd df = pd.DataFrame() df['source'] = [str(i) for i in [0, 1, 2, 3, 4, 4, 6, 7, 7, 9]] df['target'] = [str(i) for i in [1, 4, 4, 4, 6, 7, 5, 8, 9, 8]] G = nx.from_pandas_edgelist(df, create_using=nx.Graph()) model = DeepWalk(G, walk_length=50, num_walks=180, workers=1) model.train(window_size=10, iter=3, embed_size=2) # model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) # model.train(window_size=5, iter=3,embed_size=128) embeddings = model.get_embeddings() #print(embeddings) x, y = [], [] print(sorted(embeddings.items(), key=lambda x: x[0])) for k, i in embeddings.items(): x.append(i[0]) y.append(i[1]) plt.scatter(x, y) plt.show() # evaluate_embeddings(embeddings) # plot_embeddings(embeddings)
return G if __name__ == '__main__': train_df, vali_df, train_mat, vali_mat = get_network("ratings.csv") train_df.to_csv("network.txt",sep='@',mode='w',index=None,header=None,encoding='utf-16') appendMovie() G = buildGraph() #G = nx.read_edgelist("network.txt", encoding='utf-16',delimiter='@', # create_using=nx.Graph(), nodetype=None, data=[('weight', float)]) print("G done!") #model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model = DeepWalk(G, walk_length=100, num_walks=80, workers=1) print("begin to train") #model.train(window_size=5, iter=3) model.train(embed_size=200, window_size=30, iter=5) embeddings = model.get_embeddings() print("embeddings got, begin to evaluate") for N in range(5,21,5): print("top{}".format(N)) precision, recall, precision_list, recall_list = evaluate(embeddings, train_mat, vali_mat, 6040, 3952,n=N) print("precision{}%".format(precision * 100)) print("recall{}%".format(recall * 100)) print("f1 score{}".format((2 * precision * recall) / (precision + recall))) microPrecision = np.average(precision_list) microRecall = np.average(recall_list) microF1 = (2 * microPrecision * microRecall) / (microPrecision + microRecall) print("microPrecision:{}%".format(microPrecision * 100)) print("mircroRecall:{}%".format(microRecall * 100)) np.savetxt("../results/precision_list_d_know_" + str(N) + ".txt", precision_list) np.savetxt("../results/recall_list_d_know_" + str(N) + ".txt", recall_list)
model = TSNE(n_components=2) #用TSNE进行降维 node_pos = model.fit_transform(emb_list) color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) #类别 color_idx[Y[i][0]].append(i) #id for c, idx in color_idx.items(): #不同类别不同颜色 plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) plt.legend() plt.show() if __name__ == "__main__": #读入边列表,文件中的每一行有两个节点,表示连接这两个节点的边。 #直接用networkx读入就行,很方便的操作。 G = nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) #实例化模型,“句子”长度为10,80次游走。 model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) #训练模型,关于gensim w2v的参数都默认在train里面 embeddings = model.get_embeddings() #得到Embedding向量 evaluate_embeddings(embeddings) #应用节点分类来评估嵌入向量的质量 plot_embeddings(embeddings) #降成二维画在图中可视化
ge_features.to_csv( f'../save/graph_embedding_{name}_{graph_name}_svd{svd_dim}.csv') if __name__ == "__main__": G = nx.read_edgelist(graph_path, create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=dw_walk_length, num_walks=dw_num_walks, workers=NUM_WORKER) # init model model.train(window_size=dw_window_size, iter=dw_iter) # train model embeddings = model.get_embeddings() # get embedding vectors save_embedding(embeddings, name='dw') save_embedding_svd(embeddings, name='dw', svd_dim=svd_dim) model = model = Struc2Vec(G, walk_length=s2v_walk_length, num_walks=s2v_num_walks, workers=NUM_WORKER, verbose=40) # init model model.train(window_size=s2v_window_size, iter=s2v_iter) # train model embeddings = model.get_embeddings() # get embedding vectors save_embedding(embeddings, name='s2v') save_embedding_svd(embeddings, name='s2v', svd_dim=svd_dim)
"""使用deepwalk模型获得节点向量""" import sys sys.path.append(r'D:\pythonplaces\citation-recommendation') from ge import DeepWalk import networkx as nx if __name__ == "__main__": G = nx.read_edgelist('../data/aan/aan_normal_train.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = DeepWalk(G, walk_length=5, num_walks=10, workers=12) model.train(embed_size=128, window_size=5, iter=3, emb_filepath="../embs/aan_deepwalk_test.emb") embeddings = model.get_embeddings()