def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' # original graph nx_G nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) # simplify graph simplify_threshold = [2.67, 4.15, 10.66] # meeting_freq threshold # simplify_threshold = [7,10,24] for threshold in simplify_threshold: newG_edge = [] for n, nbrs in nx_G.adj.items(): for nbr, eattr in nbrs.items(): wt = eattr['weight'] if wt > threshold: newG_edge.append((n, nbr, wt)) new_nx_G = nx.DiGraph() new_nx_G.add_weighted_edges_from(newG_edge) new_nx_G.to_undirected() new_G = node2vec.Graph(new_nx_G, args.directed, args.p, args.q) new_G.preprocess_transition_probs() walks += new_G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' embeddings = {} edgelists = os.listdir(args.input) for idx, edgelist in enumerate(edgelists): nx_G = read_graph(args.input + edgelist, args) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) n2v = learn_embeddings(args, walks) for key in n2v.vocab.keys(): assert int(key) not in embeddings.keys(), (int(key), embeddings.keys()) embeddings[int(key)] = n2v.word_vec(key) if idx % 10000 == 0: print(idx, len(list(embeddings.keys()))) keys = np.array(list(embeddings.keys())) order = np.argsort(keys) keys = keys[order] values = np.array(list(embeddings.values())) values = values[order] emb_old = np.load('data/embeddings_base.npy') emb = np.concatenate([emb_old, values], axis=1) np.save('data/embeddings_new.npy', emb)
def main(args, c_args): """ Pipeline for representational learning for all nodes in a graph. """ #generate corpus nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) sentences = "" # create raw data # _walks = [map(str, walk) for walk in walks] print("walks converted") for i in range(len(walks)): sentences += str(walks[i][0]) for j in range(1, len(walks[i])): sentences += (" " + str(walks[i][j])) sentences += '\n' file = open('../corpus/' + source_name + '.txt', 'w+') file.write(sentences) #learn_embeddings(walks) call( "./../Modified_DIVE/word2vec -train ../corpus/" + source_name + ".txt -output ../emb/" + source_name + ".emb " + source_name + "-vocab -alpha 0.10 -window 10 -cbow 0 -sample 1e-5 -threads 20 -binary 0 -iter 15", shell=True) save_word_emb_with_name("../emb/" + source_name + ".emb", "../json/" + source_name + "_with_name.json", "../graph/" + source_name + "_id_name.net")
def generate_node2vec_embeddings(A, emd_size=128, negative_injection=False, train_neg=None): if negative_injection: row, col = train_neg A = A.copy() A[row, col] = 1 # inject negative train A[col, row] = 1 # inject negative train nx_G = nx.from_scipy_sparse_matrix(A) G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1) G.preprocess_transition_probs() walks = G.simulate_walks(num_walks=10, walk_length=80) walks = [map(str, walk) for walk in walks] model = Word2Vec(walks, size=emd_size, window=10, min_count=0, sg=1, workers=8, iter=1) wv = model.wv embeddings = np.zeros([A.shape[0], emd_size], dtype='float32') sum_embeddings = 0 empty_list = [] for i in range(A.shape[0]): if str(i) in wv: embeddings[i] = wv.word_vec(str(i)) sum_embeddings += embeddings[i] else: empty_list.append(i) mean_embedding = sum_embeddings / (A.shape[0] - len(empty_list)) embeddings[empty_list] = mean_embedding return embeddings
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' print("Args: {}".format(args)) try: #walks = load_walks(args.cached_walks_path) walks = Walks(args.cached_walks_path) except IOError as e: print("Found no cached walks at {}".format(args.cached_walks_path)) print("Generating walks from graph.") nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q, n_jobs=args.workers) print("Loaded graph: {}".format(G)) print("Processing transition probabilities.") G.preprocess_transition_probs() print("Simulating walks.") walks = G.simulate_walks(args.num_walks, args.walk_length) walks = walks2str_list(walks) print("Saving walks at {}".format(args.cached_walks_path)) save_walks(walks, args.cached_walks_path) print("Loading walks") walks = load_walks(args.cached_walks_path) print("Learning embeddings.") learn_embeddings(walks)
def main(args): # Pipeline for representational learning for all nodes in a graph. nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def main(args): """ Pipeline for representational learning for all nodes in a graph. """ print("Reading the graph...") nx_G = read_graph() print("Preprocessing the graph...") G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() print("Simulating random walks...") startTime = time.time() walks = G.simulate_walks(args.num_walks, args.walk_length) endTime = time.time() print("\n---- Random walks generation completed in " + str(round(endTime - startTime, 3)) + " seconds; average of " + str(round((endTime - startTime) / args.num_walks, 3)) + " seconds per set of walks\n") print("Learning node embedding function for {} SGD iterations...".format( args.iter)) startTime = time.time() learn_embeddings(walks) endTime = time.time() print("\n ---- Embedding learning completed in " + str(round(endTime - startTime, 3)) + " seconds; average of " + str(round((endTime - startTime) / args.iter, 3)) + " seconds per SGD iteration\n")
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' if args.resume == False: print("Reading graph.") nx_G = read_graph() print("Passthrough graph and construct class.") G = node2vec.Graph(nx_G, args.directed, args.p, args.q) print("Generate probs.") G.preprocess_transition_probs() if args.end2end: print("Generate path.") walks = G.simulate_walks(args.num_walks, args.walk_length) else: print("Dump probs of nodes.") pickle.dump(G.alias_nodes, open(args.probs_graph + ".nodes", "wb")) print("Dump probs of edges.") pickle.dump(G.alias_edges, open(args.probs_graph + ".edges", "wb")) # nx.write_weighted_edgelist(G, args.probs_graph) if args.end2end or args.resume: if args.resume: walks = pickle.load(open(args.walk_list, "rb")) print("Pass to word2vec.") learn_embeddings(walks)
def main(args): """ Pipeline for representational learning for all nodes in a graph. """ print("==========Read Network and Semantic!===========") nx_G = read_graph() print("The number of nodes in network is {}".format(len(nx_G.nodes()))) index = read_index(args.semantic) print("==========Sampling for Fact!===========") G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.epochs, args.walk_length) print("==========Semantic Alignment!===========") for walks_length in tqdm(range(len(walks))): for walk_len in range(len(walks[walks_length])): walks[walks_length][walk_len] = index[walks[walks_length] [walk_len]] print( "==========Jointly Learning Fact Embeddings in Word-Level and Fact-Level!===========" ) word_model = J_ME.learn_embeddings(args.mode, walks, args.dimensions, args.window_size, args.workers, args.iter, index) print( "==========Combine Two Embeddings to Obtain the Final Fact Embeddings!===========" ) fact_vec_from_word = S_O.self_organization(word_model, nx_G, index, args.dimensions) fact_vec_from_node = read_embeddings("temp/fact_embeddings.txt") sentenceemb_with_wordemb(fact_vec_from_word, fact_vec_from_node)
def main(datapath): ''' コマンドラインから,入力のグラフデータを読み込んで,ランダムウォークで系列データを作成して,Skip-Gramで学習させる ''' print('Reading edgelist...') nx_G = nx.read_edgelist(datapath, nodetype=str, create_using=nx.DiGraph()) for edge in nx_G.edges(): nx_G[edge[0]][edge[1]]['weight'] = 1 print('Organizing baiased-weighted graph...') G = node2vec.Graph( nx_G, is_directed=True, p=1, q=1 ) # node2vec.pyの中の,Graphクラスのインスタンスを生成, read_graphでunweightedでも"weight=1"と入れるため,args.is_unweightedは考えない. G.preprocess_transition_probs_deepwalk( ) # 入力グラフのリンクの "重み・矢印 "に従って,各ノードごとにその重みを反映した遷移確率でウォーク出来るように準備 print('Start random walk...') walks = G.simulate_deepwalks(20, 120) # バイアス付きランダムウォークを開始 print('Training the skip-gram model...') dataname = datapath.replace('.edgelist', '') model = learn_embeddings( walks, dataname) # 上記で得られたノード系列データをインプットとして,skip-gramモデルで学習 del walks, model gc.collect()
def node_fun(args, nx_graph): G = node2vec.Graph(nx_graph, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) walks = node_num(walks) vocabulary_size = len(nx_graph.nodes()) return walks, vocabulary_size
def get_n2v_embeddings(emb_dim, nx_G): """ Pipeline for representational learning for all nodes in a graph. """ G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1) G.preprocess_transition_probs() walks = G.simulate_walks(10, 80) learn_embeddings(walks, emb_dim)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) np.savetxt(args.output, walks)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.h) G.preprocess_transition_probs() walks = G.simulate_walks(args.r, args.l) learn_embeddings(walks)
def node_walk(args): nx_g = read_graph(args) g = n2v.Graph(nx_g, args.directed, args.p, args.q) #args.directed bool型用来标识 #(有向图,无向图), args.p,args.q分别是参数p和q, 这一步是生成一个图对象 g.preprocess_transition_probs() #生成每个节点的转移概率向量 walks = g.simulate_walks(args.num_walks, args.walk_length) #随机游走 # walks是随机游走生成的多个节点序列,被当做文本输入,调用Word2Vec模型,生成向量 return walks
def main(nx_G, args): ''' Pipeline for representational learning for all nodes in a graph. ''' # nx_G = read_graph() G = node2vec.Graph(nx_G, args["is_directed"], args["p"], args["q"]) G.preprocess_transition_probs() walks = G.simulate_walks(args["num_walks"], args["walk_length"]) return learn_embeddings(walks, args)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() for i in range(0, args.num_walks): G.simulate_walks(1, args.walk_length, i)
def main(args): # Pipeline for representational learning for all nodes in a graph. nx_G = read_graph() # nx_G = snap.LoadEdgeList(snap.PNGraph, '/home/huwenxin/文档/project/CA-AstroPh.txt', 0, 1) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def graph2walks(self, method="", params={}): self.params = params if method == "deepwalk": number_of_walks = self.params['number_of_walks'] walk_length = self.params['walk_length'] alpha = self.params['alpha'] # Temporarily generate the edge list with open("./temp/graph.edgelist", 'w') as f: for line in nx.generate_edgelist(self.graph, data=False): f.write("{}\n".format(line)) dwg = deepwalk.load_edgelist("./temp/graph.edgelist", undirected=True) corpus = deepwalk.build_deepwalk_corpus(G=dwg, num_paths=number_of_walks, path_length=walk_length, alpha=alpha, rand=random.Random(0)) elif method == "node2vec": number_of_walks = self.params['number_of_walks'] walk_length = self.params['walk_length'] p = self.params['p'] q = self.params['q'] for edge in self.graph.edges(): self.graph[edge[0]][edge[1]]['weight'] = 1 G = node2vec.Graph(nx_G=self.graph, p=p, q=q, is_directed=False) G.preprocess_transition_probs() corpus = G.simulate_walks(num_walks=number_of_walks, walk_length=walk_length) else: raise ValueError("Invalid method name!") """ new_corpus = [] line_counter = 0 line = [] for walk in corpus: if line_counter < self.params['number_of_walks']: line.extend(walk) line_counter += 1 else: line_counter = 0 new_corpus.append(line) line = [] corpus = new_corpus """ self.corpus = corpus return self.corpus
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() G = node2vec.Graph(nx_G, "true", 1, 10) G.preprocess_transition_probs() walks = G.simulate_walks(10, 80) learn_embeddings(walks)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph('data/citations') nx_G = get_gaint_comp(nx_G) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def doNode2VecStuff(nx_graph, filename = 'sample.txt', p = 1, q = 2, num_walks = 10, walk_length = 80, dimensions = 2, window_size = 10, num_workers = 10, num_iters = 5): Graph_n2v = node2vec.Graph(nx_graph, False, p, q) # p and q param Graph_n2v.preprocess_transition_probs() walks = Graph_n2v.simulate_walks(num_walks, walk_length) walks = [map(str, walk) for walk in walks] model = Word2Vec(walks, size=dimensions, window=window_size, min_count=0, sg=1, workers=num_workers, iter=num_iters) model.wv.save_word2vec_format(filename, binary = False)
def generate_random_walks(input, num_walks, walk_length): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph(input) G = node2vec.Graph(nx_G, is_directed=False, p=1, q=1) #DeepWalk G.preprocess_transition_probs() walks = G.simulate_walks(num_walks, walk_length) return np.array(walks)
def ns(nx_G, num_walks=100, walk_length=3): G = node2vec.Graph(nx_G, False, 1, 1) G.preprocess_transition_probs() walks = G.simulate_walks(num_walks, walk_length, False) p_visit = calc_pvisit(walks) graph = nx_G.copy() for u,v in graph.edges(): graph[u][v]['weight'] = cossim(p_visit[u], p_visit[v]) return graph
def main(args): nx_G = read_graph(file=args.input, get_connected_graph=True, remove_selfloops=True, get_directed=False) print('Original Graph: nodes: {}, edges: {}'.format( nx_G.number_of_nodes(), nx_G.number_of_edges())) print() train_pos = pickle.load(open(config.train_pos, 'rb')) test_pos = pickle.load(open(config.test_pos, 'rb')) train_neg = pickle.load(open(config.train_neg, 'rb')) test_neg = pickle.load(open(config.test_neg, 'rb')) print('Number of positive training samples: ', len(train_pos)) print('Number of negative training samples: ', len(train_neg)) print('Number of positive testing samples: ', len(test_pos)) print('Number of negative testing samples: ', len(test_neg)) train_graph = read_graph(file=config.train_graph, get_connected_graph=False, remove_selfloops=False, get_directed=False) print('Train graph created: {} nodes, {} edges'.format( train_graph.number_of_nodes(), train_graph.number_of_edges())) print('Number of connected components: ', nx.number_connected_components(train_graph)) if config.train: if config.resume_training: _ = learn_embeddings(walks=None) else: G = node2vec.Graph(train_graph, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) # learn the embeddings _ = learn_embeddings(walks) embeddings_file = None checkpoint_file = None if config.evaluate: if config.model is not 'rnn': embeddings_file = config.embeddings_dir + config.output_file else: checkpoint_file = config.checkpoint_dir + config.checkpoint_name print(checkpoint_file) # evaluate embeddings in link prediction _ = learn_embeddings(walks=None, train_pos=train_pos, train_neg=train_neg, test_pos=test_pos, test_neg=test_neg, eval_bool=True, embeddings_file=embeddings_file, checkpoint_file=checkpoint_file)
def ce(nx_G, k=3, num_walks=10, walk_length=3): newgraph = nx_G.copy() for u,v in tqdm(nx_G.edges()): s_u = set(nx.single_source_shortest_path_length(nx_G, u, cutoff=k).keys()) s_v = set(nx.single_source_shortest_path_length(nx_G, u, cutoff=k).keys()) sg = node2vec.Graph(nx_G.subgraph(s_u.union(s_v)), False, 1, 1) sg.preprocess_transition_probs() walks = sg.simulate_walks(num_walks, walk_length, False) probs = calc_ce(walks) newgraph[u][v]['weight'] = newgraph[v][u]['weight'] = 0.5 * (probs[(u,v)] + probs[(v,u)]) return newgraph
def main(args, Samplegraph): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = Samplegraph print(nx_G.number_of_nodes()) print(nx_G.edges(data=True)) G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks)
def main(): prepareInput.createInput(logName) scores=[] #----------start Trace2Vec Trace2Vec.learn(logName,vectorsize) y=Trace2Vec.getY(logName) vectors, corpus=Trace2Vec.startCluster(logName, vectorsize) printMatrix(vectors, "Trace2Vec", "vectors") for alg in clustering: assigned_clusters=cluster(alg, vectors, y) printVector(assigned_clusters, "Trace2Vec", "clusters", alg) Trace2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus) #----------end Trace2Vec #----------start Node2Vec args=Node2Vec.parse_args() args.input="input/"+logName+".graph" args.output="output/"+logName+"N2VVS"+str(vectorsize)+".node2vec" nx_G = Node2Vec.read_graph(args) G = node2vec.Graph(nx_G, True, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) Node2Vec.learn_embeddings(args, logName, vectorsize, walks) Node2Vec.extract(logName, vectorsize) y=Node2Vec.getY(logName) vectors, corpus=Node2Vec.startCluster(logName, vectorsize) printMatrix(vectors, "Node2Vec", "vectors") for alg in clustering: assigned_clusters=cluster(alg, vectors, y) printVector(assigned_clusters, "Node2Vec", "clusters", alg) Node2Vec.endCluster(logName, assigned_clusters, vectorsize, alg, corpus) #----------end Node2Vec #----------start NGrams vectors, y=NGrams.ngrams_BPI_2015(logName, vectorsize) printMatrix(vectors, "NGrams", "vectors") for alg in clustering: assigned_clusters=cluster(alg, vectors, y) printVector(assigned_clusters, "NGrams", "clusters", alg) NGrams.endCluster(logName, assigned_clusters, vectorsize, alg, [0]*len(vectors)) #----------end NGrams scores.append(get_scores("Trace2Vec")) scores.append(get_scores("Node2Vec")) scores.append(get_scores("NGrams")) for score in scores: print_scores(score) if vectorsize==2: for emb in embed: myPlot.plot(emb)
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() #从文本中读取图 G = node2vec.Graph( nx_G, args.directed, args.p, args.q ) #args.directed bool型用来标识(有向图,无向图), args.p,args.q分别是参数p和q, 这一步是生成一个图对象 G.preprocess_transition_probs() #生成每个节点的转移概率向量 walks = G.simulate_walks(args.num_walks, args.walk_length) learn_embeddings(walks) #walks是随机游走生成的多个节点序列,被当做文本输入,调用Word2Vec模型,生成向量
def main(args): ''' Pipeline for representational learning for all nodes in a graph. ''' nx_G = read_graph() G = node2vec.Graph(nx_G, args.directed, args.p, args.q) G.preprocess_transition_probs() walks = G.simulate_walks(args.num_walks, args.walk_length) weights = learn_embeddings(walks) weight_dict = {"node_embs": weights} torch.save(weight_dict, args.output)