def regenerate_impropers(g: Graph, improper_def='smirnoff'): """ Method to regenerate the improper nodes according to the specified method of permuting the impropers. Modifies the esp.Graph's heterograph in place and returns the new heterograph. NOTE: This will clear the data on all n4_improper nodes, including previously generated improper from JanossyPoolingImproper. """ ## First get rid of the old nodes/edges hg = g.heterograph hg = dgl.remove_nodes(hg, hg.nodes('n4_improper'), 'n4_improper') ## Generate new improper torsion permutations idxs = improper_torsion_indices(g.mol, improper_def) if len(idxs) == 0: return g ## Add new nodes of type n4_improper (one for each permut) hg = dgl.add_nodes(hg, idxs.shape[0], ntype='n4_improper') ## New edges b/n improper permuts and n1 nodes permut_ids = np.arange(idxs.shape[0]) for i in range(4): n1_ids = idxs[:, i] # edge from improper node to n1 node outgoing_etype = ('n4_improper', f'n4_improper_has_{i}_n1', 'n1') hg = dgl.add_edges(hg, permut_ids, n1_ids, etype=outgoing_etype) # edge from n1 to improper incoming_etype = ('n1', f'n1_as_{i}_in_n4_improper', 'n4_improper') hg = dgl.add_edges(hg, n1_ids, permut_ids, etype=incoming_etype) ## New edges b/n improper permuts and the graph (for global pooling) # edge from improper node to graph outgoing_etype = ('n4_improper', f'n4_improper_in_g', 'g') hg = dgl.add_edges(hg, permut_ids, np.zeros_like(permut_ids), etype=outgoing_etype) # edge from graph to improper nodes incoming_etype = ('g', 'g_has_n4_improper', 'n4_improper') hg = dgl.add_edges(hg, np.zeros_like(permut_ids), permut_ids, etype=incoming_etype) hg.nodes['n4_improper'].data['idxs'] = torch.tensor(idxs) g.heterograph = hg return g # hg
def sampler_frontier(self, block_id, g, seed_nodes, timestamp): full_neighbor_subgraph = dgl.in_subgraph(g, seed_nodes) full_neighbor_subgraph = dgl.add_edges(full_neighbor_subgraph, seed_nodes, seed_nodes) temporal_edge_mask = (full_neighbor_subgraph.edata['timestamp'] < timestamp) + ( full_neighbor_subgraph.edata['timestamp'] <= 0) temporal_subgraph = dgl.edge_subgraph( full_neighbor_subgraph, temporal_edge_mask) # Map preserve ID temp2origin = temporal_subgraph.ndata[dgl.NID] # The added new edgge will be preserved hence root2sub_dict = dict( zip(temp2origin.tolist(), temporal_subgraph.nodes().tolist())) temporal_subgraph.ndata[dgl.NID] = g.ndata[dgl.NID][temp2origin] seed_nodes = [root2sub_dict[int(n)] for n in seed_nodes] final_subgraph = self.sampler(g=temporal_subgraph, nodes=seed_nodes) final_subgraph.remove_self_loop() return final_subgraph
import torch as th # 创建一个具有3种节点类型和3种边类型的异构图 graph_data = { ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])), ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])), ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2])) } g = dgl.heterograph(graph_data) g.ntypes g.etypes g.canonical_etypes neg_sampler = dgl.dataloading.negative_sampler.Uniform(4) print(g) g1 = neg_sampler(g, {("drug", "interacts", "gene"): th.tensor([0])}) print(g1[('drug', 'interacts', 'gene')]) #dgl.add_edges, dgl.remove_edges #g = dgl.add_edges(g,{("drug","interacts", "gene"): (th.tensor([0]),th.tensor([0]))}) #g = dgl.add_edges(g, {("drug","interacts", "gene"):(th.tensor([1, 3]), th.tensor([0, 1])}) g = dgl.add_edges(g, th.tensor([0]), th.tensor([3]), etype=('drug', 'interacts', 'gene')) print(g) #self.negatives = self.sample_negative(self.train_rating_info, self.sample_rate)
def data_process(graph): # train pos edges::graph形式 用于训练与inference阶段的推断 # val pos edges::节点对形式 ,用于模型指标的计算 # val neg edges:: 负采样得到,节点对形式,用于模型指标的计算 # test pos edges::节点对形式,用于模型指标的计算 # test neg edges:: 负采样得到,节点对形式,用于模型指标的计算 row = graph.edges()[0] col = graph.edges()[1] data = np.ones_like(row) adj_coo = coo_matrix((data,(row,col)),shape=(2708,2708)) ## test edge_all = sparse_to_tuple(adj_coo)[0] #即节点对 edge_id_shuffle = graph.edge_ids(graph.edges()[0],graph.edges()[1]).numpy() np.random.shuffle(edge_id_shuffle) num_test = int(edge_id_shuffle.shape[0]*0.05) num_val = int(edge_id_shuffle.shape[0]*0.10) val_idx = np.sort(edge_id_shuffle[:num_val]) test_idx = np.sort(edge_id_shuffle[num_val:(num_val+num_test)]) train_idx = np.sort(edge_id_shuffle[(num_val+num_test):]) train_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool) train_mask[train_idx] = True test_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool) test_mask[test_idx] = True val_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool) val_mask[val_idx] = True #TODO save the mask graph.edata['train_mask'] = train_mask graph.edata['val_mask'] = val_mask graph.edata['test_mask'] = test_mask ########## train_set = torch.arange(graph.number_of_edges())[train_mask] val_set = torch.arange(graph.number_of_edges())[val_mask] # val的边在原始图中的序号 test_set = torch.arange(graph.number_of_edges())[test_mask] #build train_g train_edges = train_set train_g = graph.edge_subgraph(train_edges,preserve_nodes=True) train_edges_src = train_g.edges()[0] train_edges_dst = train_g.edges()[1] train_pos_nodepair = torch.vstack([train_edges_src,train_edges_dst]).numpy().T # array,not include self-loop #add self-loop edge train_g = dgl.add_self_loop(train_g) #build val_pos_graph val_edges = val_set val_pos_graph = graph.edge_subgraph(val_edges,preserve_nodes=True) val_edges_src = val_pos_graph.edges()[0] val_edges_dst = val_pos_graph.edges()[1] val_pos_nodepair = torch.vstack([val_edges_src,val_edges_dst]).numpy().T # array 应该是单向边 # build test_pos_graph test_edges = test_set test_pos_graph = graph.edge_subgraph(test_edges,preserve_nodes=True) test_edges_src = test_pos_graph.edges()[0] test_edges_dst = test_pos_graph.edges()[1] test_pos_nodepair = torch.vstack([test_edges_src,test_edges_dst]).numpy().T # array ### 负采样的方法就是给定首尾节点的idx,然后确定在节点对(graph.edges())中不存在即可以认为是合适的负样本 val_neg_nodepair,test_neg_nodepair = neg_sampling(train_pos_nodepair,val_pos_nodepair,test_pos_nodepair,graph) ## 得到负样本对后为val和test都生成neg subgraph edges = torch.arange(graph.number_of_edges()) val_neg_graph = graph.__copy__() val_neg_graph.remove_edges(edges) val_neg_graph = dgl.add_edges(val_neg_graph,val_neg_nodepair.T[0],val_neg_nodepair.T[1]) test_neg_graph = graph.__copy__() test_neg_graph.remove_edges(edges) test_neg_graph = dgl.add_edges(test_neg_graph,test_neg_nodepair.T[0],test_neg_nodepair.T[1]) return train_g,val_pos_graph,val_neg_graph,test_pos_graph,test_neg_graph