Example #1
0
def regenerate_impropers(g: Graph, improper_def='smirnoff'):
    """
        Method to regenerate the improper nodes according to the specified
        method of permuting the impropers. Modifies the esp.Graph's heterograph
        in place and returns the new heterograph.
        NOTE: This will clear the data on all n4_improper nodes, including
        previously generated improper from JanossyPoolingImproper.
        """

    ## First get rid of the old nodes/edges
    hg = g.heterograph
    hg = dgl.remove_nodes(hg, hg.nodes('n4_improper'), 'n4_improper')

    ## Generate new improper torsion permutations
    idxs = improper_torsion_indices(g.mol, improper_def)
    if len(idxs) == 0:
        return g

    ## Add new nodes of type n4_improper (one for each permut)
    hg = dgl.add_nodes(hg, idxs.shape[0], ntype='n4_improper')

    ## New edges b/n improper permuts and n1 nodes
    permut_ids = np.arange(idxs.shape[0])
    for i in range(4):
        n1_ids = idxs[:, i]

        # edge from improper node to n1 node
        outgoing_etype = ('n4_improper', f'n4_improper_has_{i}_n1', 'n1')
        hg = dgl.add_edges(hg, permut_ids, n1_ids, etype=outgoing_etype)

        # edge from n1 to improper
        incoming_etype = ('n1', f'n1_as_{i}_in_n4_improper', 'n4_improper')
        hg = dgl.add_edges(hg, n1_ids, permut_ids, etype=incoming_etype)

    ## New edges b/n improper permuts and the graph (for global pooling)
    # edge from improper node to graph
    outgoing_etype = ('n4_improper', f'n4_improper_in_g', 'g')
    hg = dgl.add_edges(hg,
                       permut_ids,
                       np.zeros_like(permut_ids),
                       etype=outgoing_etype)

    # edge from graph to improper nodes
    incoming_etype = ('g', 'g_has_n4_improper', 'n4_improper')
    hg = dgl.add_edges(hg,
                       np.zeros_like(permut_ids),
                       permut_ids,
                       etype=incoming_etype)

    hg.nodes['n4_improper'].data['idxs'] = torch.tensor(idxs)

    g.heterograph = hg

    return g  # hg
Example #2
0
    def sampler_frontier(self,
                         block_id,
                         g,
                         seed_nodes,
                         timestamp):
        full_neighbor_subgraph = dgl.in_subgraph(g, seed_nodes)
        full_neighbor_subgraph = dgl.add_edges(full_neighbor_subgraph,
                                               seed_nodes, seed_nodes)

        temporal_edge_mask = (full_neighbor_subgraph.edata['timestamp'] < timestamp) + (
            full_neighbor_subgraph.edata['timestamp'] <= 0)
        temporal_subgraph = dgl.edge_subgraph(
            full_neighbor_subgraph, temporal_edge_mask)

        # Map preserve ID
        temp2origin = temporal_subgraph.ndata[dgl.NID]

        # The added new edgge will be preserved hence
        root2sub_dict = dict(
            zip(temp2origin.tolist(), temporal_subgraph.nodes().tolist()))
        temporal_subgraph.ndata[dgl.NID] = g.ndata[dgl.NID][temp2origin]
        seed_nodes = [root2sub_dict[int(n)] for n in seed_nodes]
        final_subgraph = self.sampler(g=temporal_subgraph, nodes=seed_nodes)
        final_subgraph.remove_self_loop()
        return final_subgraph
Example #3
0
import torch as th
# 创建一个具有3种节点类型和3种边类型的异构图
graph_data = {
    ('drug', 'interacts', 'drug'): (th.tensor([0, 1]), th.tensor([1, 2])),
    ('drug', 'interacts', 'gene'): (th.tensor([0, 1]), th.tensor([2, 3])),
    ('drug', 'treats', 'disease'): (th.tensor([1]), th.tensor([2]))
}
g = dgl.heterograph(graph_data)
g.ntypes
g.etypes
g.canonical_etypes

neg_sampler = dgl.dataloading.negative_sampler.Uniform(4)
print(g)

g1 = neg_sampler(g, {("drug", "interacts", "gene"): th.tensor([0])})

print(g1[('drug', 'interacts', 'gene')])

#dgl.add_edges, dgl.remove_edges

#g = dgl.add_edges(g,{("drug","interacts", "gene"): (th.tensor([0]),th.tensor([0]))})

#g = dgl.add_edges(g, {("drug","interacts", "gene"):(th.tensor([1, 3]), th.tensor([0, 1])})
g = dgl.add_edges(g,
                  th.tensor([0]),
                  th.tensor([3]),
                  etype=('drug', 'interacts', 'gene'))
print(g)

#self.negatives = self.sample_negative(self.train_rating_info, self.sample_rate)
Example #4
0
def data_process(graph):
    # train pos edges::graph形式 用于训练与inference阶段的推断
    # val pos edges::节点对形式 ,用于模型指标的计算
    # val neg edges:: 负采样得到,节点对形式,用于模型指标的计算
    # test pos edges::节点对形式,用于模型指标的计算
    # test neg edges:: 负采样得到,节点对形式,用于模型指标的计算
    row = graph.edges()[0]
    col = graph.edges()[1]
    data = np.ones_like(row)
    adj_coo = coo_matrix((data,(row,col)),shape=(2708,2708))
    ## test
    edge_all = sparse_to_tuple(adj_coo)[0] #即节点对

    edge_id_shuffle = graph.edge_ids(graph.edges()[0],graph.edges()[1]).numpy()
    np.random.shuffle(edge_id_shuffle)
    num_test = int(edge_id_shuffle.shape[0]*0.05)
    num_val  = int(edge_id_shuffle.shape[0]*0.10)
    val_idx = np.sort(edge_id_shuffle[:num_val])
    test_idx = np.sort(edge_id_shuffle[num_val:(num_val+num_test)])
    train_idx = np.sort(edge_id_shuffle[(num_val+num_test):])
    train_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool)
    train_mask[train_idx] = True
    test_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool)
    test_mask[test_idx] = True
    val_mask = torch.zeros_like(graph.edges()[0],dtype=torch.bool)
    val_mask[val_idx] = True
    #TODO save the mask
    graph.edata['train_mask'] = train_mask
    graph.edata['val_mask'] = val_mask
    graph.edata['test_mask'] = test_mask
    ##########
    train_set = torch.arange(graph.number_of_edges())[train_mask]
    val_set = torch.arange(graph.number_of_edges())[val_mask] # val的边在原始图中的序号
    test_set = torch.arange(graph.number_of_edges())[test_mask]
    #build train_g
    train_edges = train_set
    train_g = graph.edge_subgraph(train_edges,preserve_nodes=True)
    train_edges_src = train_g.edges()[0]
    train_edges_dst = train_g.edges()[1]
    train_pos_nodepair = torch.vstack([train_edges_src,train_edges_dst]).numpy().T # array,not include self-loop
    #add self-loop edge
    train_g = dgl.add_self_loop(train_g)
    #build val_pos_graph
    val_edges = val_set
    val_pos_graph = graph.edge_subgraph(val_edges,preserve_nodes=True)
    val_edges_src = val_pos_graph.edges()[0]
    val_edges_dst = val_pos_graph.edges()[1]
    val_pos_nodepair = torch.vstack([val_edges_src,val_edges_dst]).numpy().T # array 应该是单向边
    # build test_pos_graph
    test_edges = test_set
    test_pos_graph = graph.edge_subgraph(test_edges,preserve_nodes=True)
    test_edges_src = test_pos_graph.edges()[0]
    test_edges_dst = test_pos_graph.edges()[1]
    test_pos_nodepair = torch.vstack([test_edges_src,test_edges_dst]).numpy().T # array
    ### 负采样的方法就是给定首尾节点的idx,然后确定在节点对(graph.edges())中不存在即可以认为是合适的负样本
    val_neg_nodepair,test_neg_nodepair = neg_sampling(train_pos_nodepair,val_pos_nodepair,test_pos_nodepair,graph)
    ## 得到负样本对后为val和test都生成neg subgraph
    edges = torch.arange(graph.number_of_edges())
    val_neg_graph = graph.__copy__()
    val_neg_graph.remove_edges(edges)
    val_neg_graph = dgl.add_edges(val_neg_graph,val_neg_nodepair.T[0],val_neg_nodepair.T[1])
    test_neg_graph = graph.__copy__()
    test_neg_graph.remove_edges(edges)
    test_neg_graph = dgl.add_edges(test_neg_graph,test_neg_nodepair.T[0],test_neg_nodepair.T[1])

    return train_g,val_pos_graph,val_neg_graph,test_pos_graph,test_neg_graph