Beispiel #1
0
    def forward(
        self,
        g: dgl.DGLGraph,
        feats: Dict[str, torch.Tensor],
        norm_atom: torch.Tensor = None,
        norm_bond: torch.Tensor = None,
    ) -> Dict[str, torch.Tensor]:
        """
        Args:
            g: the graph
            feats: node features. Allowed node types are `atom`, `bond` and `global`.
            norm_atom: values used to normalize atom features as proposed in graph norm.
            norm_bond: values used to normalize bond features as proposed in graph norm.

        Returns:
            updated node features.
        """

        g = g.local_var()

        h = feats["atom"]
        e = feats["bond"]
        u = feats["global"]

        # for residual connection
        h_in = h
        e_in = e
        u_in = u

        g.nodes["atom"].data.update({"Ah": self.A(h), "Dh": self.D(h), "Eh": self.E(h)})
        g.nodes["bond"].data.update({"Be": self.B(e)})
        g.nodes["global"].data.update({"Cu": self.C(u), "Fu": self.F(u)})

        # update bond feature e
        g.multi_update_all(
            {
                "a2b": (fn.copy_u("Ah", "m"), fn.sum("m", "e")),  # A * (h_i + h_j)
                "b2b": (fn.copy_u("Be", "m"), fn.sum("m", "e")),  # B * e_ij
                "g2b": (fn.copy_u("Cu", "m"), fn.sum("m", "e")),  # C * u
            },
            "sum",
        )

        e = g.nodes["bond"].data["e"]
        if self.graph_norm:
            e = e * norm_bond
        if self.batch_norm:
            e = self.bn_node_e(e)
        e = self.activation(e)
        if self.residual:
            e = e_in + e
        g.nodes["bond"].data["e"] = e

        # update atom feature h

        # Copy Eh to bond nodes, without reduction.
        # This is the first arrow in: Eh_j -> bond node -> atom i node
        # The second arrow is done in self.message_fn and self.reduce_fn below
        g.update_all(fn.copy_u("Eh", "Eh_j"), self.reduce_fn_a2b, etype="a2b")

        g.multi_update_all(
            {
                "a2a": (fn.copy_u("Dh", "m"), fn.sum("m", "h")),  # D * h_i
                "b2a": (self.message_fn, self.reduce_fn),  # e_ij [Had] (E * hj)
                "g2a": (fn.copy_u("Fu", "m"), fn.sum("m", "h")),  # F * u
            },
            "sum",
        )

        h = g.nodes["atom"].data["h"]
        if self.graph_norm:
            h = h * norm_atom
        if self.batch_norm:
            h = self.bn_node_h(h)
        h = self.activation(h)
        if self.residual:
            h = h_in + h
        g.nodes["atom"].data["h"] = h

        # update global feature u
        g.nodes["atom"].data.update({"Gh": self.G(h)})
        g.nodes["bond"].data.update({"He": self.H(e)})
        g.nodes["global"].data.update({"Iu": self.I(u)})
        g.multi_update_all(
            {
                "a2g": (fn.copy_u("Gh", "m"), fn.mean("m", "u")),  # G * (mean_i h_i)
                "b2g": (fn.copy_u("He", "m"), fn.mean("m", "u")),  # H * (mean_ij e_ij)
                "g2g": (fn.copy_u("Iu", "m"), fn.sum("m", "u")),  # I * u
            },
            "sum",
        )
        u = g.nodes["global"].data["u"]
        # do not apply batch norm if it there is only one graph
        if self.batch_norm and u.shape[0] > 1:
            u = self.bn_node_u(u)
        u = self.activation(u)
        if self.residual:
            u = u_in + u

        # dropout
        h = self.dropout(h)
        e = self.dropout(e)
        u = self.dropout(u)

        feats = {"atom": h, "bond": e, "global": u}

        return feats
Beispiel #2
0
        features = bond_features(bond)
        bond_src.append(begin_idx)
        bond_dst.append(end_idx)
        bond_x.append(features)
        # set up the reverse direction
        bond_src.append(end_idx)
        bond_dst.append(begin_idx)
        bond_x.append(features)
    graph.add_edges(bond_src, bond_dst)

    n_edges += n_bonds
    return graph, torch.stack(atom_x), \
        torch.stack(bond_x) if len(bond_x) > 0 else torch.zeros(0)

mpn_loopy_bp_msg = DGLF.copy_src(src='msg', out='msg')
mpn_loopy_bp_reduce = DGLF.sum(msg='msg', out='accum_msg')

class LoopyBPUpdate(nn.Module):
    def __init__(self, hidden_size):
        super(LoopyBPUpdate, self).__init__()
        self.hidden_size = hidden_size

        self.W_h = nn.Linear(hidden_size, hidden_size, bias=False)

    def reset_parameters(self):
        """Reinitialize model parameters."""
        self.W_h.reset_parameters()

    def forward(self, nodes):
        msg_input = nodes.data['msg_input']
        msg_delta = self.W_h(nodes.data['accum_msg'])
Beispiel #3
0
#
# GCN implementation with DGL
# ``````````````````````````````````````````
# We first define the message and reduce function as usual.  Since the
# aggregation on a node :math:`u` only involves summing over the neighbors'
# representations :math:`h_v`, we can simply use builtin functions:

import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
import Processtest
gcn_msg = fn.u_mul_e('h', 'w', 'm')
gcn_reduce = fn.sum(msg='m', out='h')

###############################################################################
# We then proceed to define the GCNLayer module. A GCNLayer essentially performs
# message passing on all the nodes then applies a fully-connected layer.


class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, feature):
        # Creating a local scope so that all the stored ndata and edata
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
Beispiel #4
0
    def propagate(self, g, weight, incidence_in, incidence_out):

        self.aggregate_relation(g, weight, incidence_in, incidence_out)
        g.update_all(self.msg_func, fn.sum(msg='msg', out='h'),
                     self.apply_func)
        return self.weight
Beispiel #5
0
 def forward(self, graph):
     graph.update_all(message_func=self.message_function,
                      reduce_func=fn.sum(msg='m', out='m_sum'),
                      apply_node_func=self.update_function)
Beispiel #6
0
    def run(self, cand_graphs, cand_line_graph, tree_mess_src_edges,
            tree_mess_tgt_edges, tree_mess_tgt_nodes, mol_tree_batch):
        n_nodes = cand_graphs.number_of_nodes()

        cand_graphs.apply_edges(func=lambda edges: {'src_x': edges.src['x']}, )

        bond_features = cand_line_graph.ndata['x']
        source_features = cand_line_graph.ndata['src_x']
        features = torch.cat([source_features, bond_features], 1)
        msg_input = self.W_i(features)
        cand_line_graph.ndata.update({
            'msg_input': msg_input,
            'msg': torch.relu(msg_input),
            'accum_msg': torch.zeros_like(msg_input),
        })
        zero_node_state = bond_features.new(n_nodes, self.hidden_size).zero_()
        cand_graphs.ndata.update({
            'm': zero_node_state.clone(),
            'h': zero_node_state.clone(),
        })

        cand_graphs.edata['alpha'] = \
                cuda(torch.zeros(cand_graphs.number_of_edges(), self.hidden_size))
        cand_graphs.ndata['alpha'] = zero_node_state
        if tree_mess_src_edges.shape[0] > 0:
            if PAPER:
                src_u, src_v = tree_mess_src_edges.unbind(1)
                tgt_u, tgt_v = tree_mess_tgt_edges.unbind(1)
                src_u = src_u.to(mol_tree_batch.device)
                src_v = src_v.to(mol_tree_batch.device)
                eid = mol_tree_batch.edge_ids(src_u, src_v)
                alpha = mol_tree_batch.edata['m'][eid]
                cand_graphs.edges[tgt_u, tgt_v].data['alpha'] = alpha
            else:
                src_u, src_v = tree_mess_src_edges.unbind(1)
                src_u = src_u.to(mol_tree_batch.device)
                src_v = src_v.to(mol_tree_batch.device)
                eid = mol_tree_batch.edge_ids(src_u, src_v)
                alpha = mol_tree_batch.edata['m'][eid]
                node_idx = (tree_mess_tgt_nodes.to(
                    device=zero_node_state.device)[:, None].expand_as(alpha))
                node_alpha = zero_node_state.clone().scatter_add(
                    0, node_idx, alpha)
                cand_graphs.ndata['alpha'] = node_alpha
                cand_graphs.apply_edges(
                    func=lambda edges: {'alpha': edges.src['alpha']}, )

        cand_line_graph.ndata.update(cand_graphs.edata)
        for i in range(self.depth - 1):
            cand_line_graph.update_all(DGLF.copy_u('msg', 'msg'),
                                       DGLF.sum('msg', 'accum_msg'))
            cand_line_graph.apply_nodes(self.loopy_bp_updater)

        cand_graphs.edata.update(cand_line_graph.ndata)

        cand_graphs.update_all(DGLF.copy_e('msg', 'msg'), DGLF.sum('msg', 'm'))
        if PAPER:
            cand_graphs.update_all(DGLF.copy_e('alpha', 'alpha'),
                                   DGLF.sum('alpha', 'accum_alpha'))
        cand_graphs.apply_nodes(self.gather_updater)

        return cand_graphs
Beispiel #7
0
                bond_x,
                'src_x':
                atom_x.new(len(bond_feature_list), ATOM_FDIM).zero_()
            })
        cand_graphs.append(g)

    return cand_graphs, tree_mess_source_edges, tree_mess_target_edges, \
           tree_mess_target_nodes


# TODO: use SPMV
mpn_loopy_bp_msg = DGLF.copy_src(src='msg', out='msg')
#def mpn_loopy_bp_msg(src, edge):
#    return src['msg']

mpn_loopy_bp_reduce = DGLF.sum(msgs='msg', out='accum_msg')
#def mpn_loopy_bp_reduce(node, msgs):
#    return {'accum_msg': torch.sum(msgs, 1)}


class LoopyBPUpdate(nn.Module):
    def __init__(self, hidden_size):
        super(LoopyBPUpdate, self).__init__()
        self.hidden_size = hidden_size

        self.W_h = nn.Linear(hidden_size, hidden_size, bias=False)

    def forward(self, node):
        msg_input = node['msg_input']
        msg_delta = self.W_h(node['accum_msg'] + node['alpha'])
        msg = torch.relu(msg_input + msg_delta)
Beispiel #8
0
 def reduce_sum(self, msg, out):
     res = fn.sum(msg, out)
     return res
Beispiel #9
0
def test_copy():
    num_layers = 2
    g = generate_rand_graph(100)
    g.ndata['h'] = g.ndata['h1']
    nf = create_mini_batch(g, num_layers)
    nf.copy_from_parent()
    for i in range(nf.num_layers):
        assert len(g.ndata.keys()) == len(nf.layers[i].data.keys())
        for key in g.ndata.keys():
            assert key in nf.layers[i].data.keys()
            assert F.array_equal(nf.layers[i].data[key],
                                 g.ndata[key][nf.layer_parent_nid(i)])
    for i in range(nf.num_blocks):
        assert len(g.edata.keys()) == len(nf.blocks[i].data.keys())
        for key in g.edata.keys():
            assert key in nf.blocks[i].data.keys()
            assert F.array_equal(nf.blocks[i].data[key],
                                 g.edata[key][nf.block_parent_eid(i)])

    nf = create_mini_batch(g, num_layers)
    node_embed_names = [['h'], ['h1'], ['h']]
    edge_embed_names = [['h2'], ['h2']]
    nf.copy_from_parent(node_embed_names=node_embed_names,
                        edge_embed_names=edge_embed_names)
    for i in range(nf.num_layers):
        assert len(node_embed_names[i]) == len(nf.layers[i].data.keys())
        for key in node_embed_names[i]:
            assert key in nf.layers[i].data.keys()
            assert F.array_equal(nf.layers[i].data[key],
                                 g.ndata[key][nf.layer_parent_nid(i)])
    for i in range(nf.num_blocks):
        assert len(edge_embed_names[i]) == len(nf.blocks[i].data.keys())
        for key in edge_embed_names[i]:
            assert key in nf.blocks[i].data.keys()
            assert F.array_equal(nf.blocks[i].data[key],
                                 g.edata[key][nf.block_parent_eid(i)])

    nf = create_mini_batch(g, num_layers)
    g.ndata['h0'] = F.clone(g.ndata['h'])
    node_embed_names = [['h0'], [], []]
    nf.copy_from_parent(node_embed_names=node_embed_names,
                        edge_embed_names=None)
    for i in range(num_layers):
        nf.block_compute(i, fn.copy_src(src='h%d' % i, out='m'),
                         fn.sum(msg='m', out='t'),
                         lambda nodes: {'h%d' % (i + 1): nodes.data['t'] + 1})
        g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='t'),
                     lambda nodes: {'h': nodes.data['t'] + 1})
        assert F.array_equal(nf.layers[i + 1].data['h%d' % (i + 1)],
                             g.ndata['h'][nf.layer_parent_nid(i + 1)])
    nf.copy_to_parent(node_embed_names=[['h0'], ['h1'], ['h2']])
    for i in range(num_layers + 1):
        assert F.array_equal(nf.layers[i].data['h%d' % i],
                             g.ndata['h%d' % i][nf.layer_parent_nid(i)])

    nf = create_mini_batch(g, num_layers)
    g.ndata['h0'] = F.clone(g.ndata['h'])
    g.ndata['h1'] = F.clone(g.ndata['h'])
    g.ndata['h2'] = F.clone(g.ndata['h'])
    node_embed_names = [['h0'], ['h1'], ['h2']]
    nf.copy_from_parent(node_embed_names=node_embed_names,
                        edge_embed_names=None)

    def msg_func(edge, ind):
        assert 'h%d' % ind in edge.src.keys()
        return {'m': edge.src['h%d' % ind]}

    def reduce_func(node, ind):
        assert 'h%d' % (ind + 1) in node.data.keys()
        return {
            'h': F.sum(node.mailbox['m'], 1) + node.data['h%d' % (ind + 1)]
        }

    for i in range(num_layers):
        nf.block_compute(i, partial(msg_func, ind=i),
                         partial(reduce_func, ind=i))
Beispiel #10
0
import scipy.sparse as spp
import matplotlib.pyplot as plt
import networkx as nx

edgelist=[(0,4),(0,1),(4,1),(4,3),(1,2),(3,2),(3,5)]
g=nx.DiGraph(edgelist)

# add self-edge for each node
g.remove_edges_from(nx.selfloop_edges(g))
g.add_edges_from(zip(g.nodes(), g.nodes()))

#nx.draw(g, with_labels=True)
#plt.show()

g = dgl.DGLGraph(g)

degs = g.in_degrees().float()
norm = torch.pow(degs, -0.5)
norm[torch.isinf(norm)] = 0
g.ndata['norm'] = norm.unsqueeze(1)

h=torch.ones([6, 2])
# normalization by square root of src degree
h = h * g.ndata['norm']
g.ndata['h'] = h
g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h'))
h = g.ndata.pop('h')
# normalization by square root of dst degree
h = h * g.ndata['norm']

print(h)
Beispiel #11
0
def check_partition(g,
                    part_method,
                    reshuffle,
                    num_parts=4,
                    num_trainers_per_machine=1,
                    load_feats=True):
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10),
                                F.float32)
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10),
                                F.float32)
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_hops = 2

    orig_nids, orig_eids = partition_graph(
        g,
        'test',
        num_parts,
        '/tmp/partition',
        num_hops=num_hops,
        part_method=part_method,
        reshuffle=reshuffle,
        return_mapping=True,
        num_trainers_per_machine=num_trainers_per_machine)
    part_sizes = []
    shuffled_labels = []
    shuffled_edata = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition(
            '/tmp/partition/test.json', i, load_feats=load_feats)
        if not load_feats:
            assert not node_feats
            assert not edge_feats
            node_feats, edge_feats = load_partition_feats(
                '/tmp/partition/test.json', i)
        if num_trainers_per_machine > 1:
            for ntype in g.ntypes:
                name = ntype + '/trainer_id'
                assert name in node_feats
                part_ids = F.floor_div(node_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)

            for etype in g.etypes:
                name = etype + '/trainer_id'
                assert name in edge_feats
                part_ids = F.floor_div(edge_feats[name],
                                       num_trainers_per_machine)
                assert np.all(F.asnumpy(part_ids) == i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        nid = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        local_nid = gpb.nid2localnid(nid, i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        eid = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_eid = gpb.eid2localeid(eid, i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID],
                                     part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(
                local_nodes1)))
        assert np.all(F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID],
                                     part_g.edata['inner_edge'])
        llocal_edges = F.nonzero_1d(part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(
            np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(
                local_edges1)))
        assert np.all(F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)))

        # Verify the mapping between the reshuffled IDs and the original IDs.
        part_src_ids, part_dst_ids = part_g.edges()
        part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids)
        part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids)
        part_eids = part_g.edata[dgl.EID]
        orig_src_ids = F.gather_row(orig_nids, part_src_ids)
        orig_dst_ids = F.gather_row(orig_nids, part_dst_ids)
        orig_eids1 = F.gather_row(orig_eids, part_eids)
        orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids)
        assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
        assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'],
                                         part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'],
                                         part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'],
                                                 part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'],
                                                 part_g.edata[dgl.NID])

        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert '_N/' + name in node_feats
            assert node_feats['_N/' + name].shape[0] == len(local_nodes)
            true_feats = F.gather_row(g.ndata[name], local_nodes)
            ndata = F.gather_row(node_feats['_N/' + name], local_nid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata))
        for name in ['feats']:
            assert '_E/' + name in edge_feats
            assert edge_feats['_E/' + name].shape[0] == len(local_edges)
            true_feats = F.gather_row(g.edata[name], local_edges)
            edata = F.gather_row(edge_feats['_E/' + name], local_eid)
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))

        # This only works if node/edge IDs are shuffled.
        if reshuffle:
            shuffled_labels.append(node_feats['_N/labels'])
            shuffled_edata.append(edge_feats['_E/feats'])

    # Verify that we can reconstruct node/edge data for original IDs.
    if reshuffle:
        shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
        shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
        orig_labels = np.zeros(shuffled_labels.shape,
                               dtype=shuffled_labels.dtype)
        orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
        orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
        orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
        assert np.all(orig_labels == F.asnumpy(g.ndata['labels']))
        assert np.all(orig_edata == F.asnumpy(g.edata['feats']))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)
from dgl import batch, unbatch, bfs_edges_generator
import dgl.function as DGLF
from .line_profiler_integration import profile
import numpy as np

MAX_NB = 8

def level_order(forest, roots):
    edges = bfs_edges_generator(forest, roots)
    _, leaves = forest.find_edges(edges[-1])
    edges_back = bfs_edges_generator(forest, roots, reverse=True)
    yield from reversed(edges_back)
    yield from edges

enc_tree_msg = [DGLF.copy_src(src='m', out='m'), DGLF.copy_src(src='rm', out='rm')]
enc_tree_reduce = [DGLF.sum(msg='m', out='s'), DGLF.sum(msg='rm', out='accum_rm')]
enc_tree_gather_msg = DGLF.copy_edge(edge='m', out='m')
enc_tree_gather_reduce = DGLF.sum(msg='m', out='m')

class EncoderGatherUpdate(nn.Module):
    def __init__(self, hidden_size):
        nn.Module.__init__(self)
        self.hidden_size = hidden_size

        self.W = nn.Linear(2 * hidden_size, hidden_size)

    def forward(self, nodes):
        x = nodes.data['x']
        m = nodes.data['m']
        return {
            'h': torch.relu(self.W(torch.cat([x, m], 1))),
Beispiel #13
0
    def forward(self, g, feats, norm_atom, norm_bond):

        g = g.local_var()

        h = feats["atom"]
        e = feats["bond"]
        # u = feats["global"]

        # for residual connection
        h_in = h
        e_in = e
        # u_in = u

        g.nodes["atom"].data.update({"Ah": self.A(h), "Dh": self.D(h), "Eh": self.E(h)})
        g.nodes["bond"].data.update({"Be": self.B(e)})
        # g.nodes["global"].data.update({"Cu": self.C(u), "Fu": self.F(u)})

        # update bond feature e
        g.multi_update_all(
            {
                "a2b": (fn.copy_u("Ah", "m"), fn.sum("m", "e")),  # A * (h_i + h_j)
                "b2b": (fn.copy_u("Be", "m"), fn.sum("m", "e")),  # B * e_ij
                # "g2b": (fn.copy_u("Cu", "m"), fn.sum("m", "e")),  # C * u
            },
            "sum",
        )
        e = g.nodes["bond"].data["e"]
        if self.graph_norm:
            e = e * norm_bond
        if self.batch_norm:
            e = self.bn_node_e(e)
        e = self.activation(e)
        if self.residual:
            e = e_in + e
        g.nodes["bond"].data["e"] = e

        # update atom feature h

        # Copy Eh to bond nodes, without reduction.
        # This is the first arrow in: Eh_j -> bond node -> atom i node
        # The second arrow is done in self.message_fn and self.reduce_fn below
        g.update_all(fn.copy_u("Eh", "Eh_j"), self.reduce_fn_a2b, etype="a2b")

        g.multi_update_all(
            {
                "a2a": (fn.copy_u("Dh", "m"), fn.sum("m", "h")),  # D * h_i
                "b2a": (self.message_fn, self.reduce_fn),  # e_ij [Had] (E * hj)
                # "g2a": (fn.copy_u("Fu", "m"), fn.sum("m", "h")),  # F * u
            },
            "sum",
        )
        h = g.nodes["atom"].data["h"]
        if self.graph_norm:
            h = h * norm_atom
        if self.batch_norm:
            h = self.bn_node_h(h)
        h = self.activation(h)
        if self.residual:
            h = h_in + h
        g.nodes["atom"].data["h"] = h

        # # update global feature u
        # g.nodes["atom"].data.update({"Gh": self.G(h)})
        # g.nodes["bond"].data.update({"He": self.H(e)})
        # g.nodes["global"].data.update({"Iu": self.I(u)})
        # g.multi_update_all(
        #     {
        #         "a2g": (fn.copy_u("Gh", "m"), fn.mean("m", "u")),  # G * (mean_i h_i)
        #         "b2g": (fn.copy_u("He", "m"), fn.mean("m", "u")),  # H * (mean_ij e_ij)
        #         "g2g": (fn.copy_u("Iu", "m"), fn.sum("m", "u")),  # I * u
        #     },
        #     "sum",
        # )
        # u = g.nodes["global"].data["u"]
        # if self.batch_norm:
        #     u = self.bn_node_u(u)
        # u = self.activation(u)
        # if self.residual:
        #     u = u_in + u

        # dropout
        h = self.dropout(h)
        e = self.dropout(e)
        # u = self.dropout(u)

        # feats = {"atom": h, "bond": e, "global": u}
        feats = {"atom": h, "bond": e}

        return feats
Beispiel #14
0
    def forward(self, g, feats, norm_atom, norm_bond):

        g = g.local_var()

        h = feats["atom"]
        e = feats["bond"]
        u = feats["global"]

        # for residual connection
        h_in = h
        e_in = e
        u_in = u

        g.nodes["atom"].data.update({"Ah": self.A(h), "Dh": self.D(h), "Eh": self.E(h)})
        g.nodes["bond"].data.update({"Be": self.B(e)})
        g.nodes["global"].data.update({"Cu": self.C(u), "Fu": self.F(u)})

        # update bond feature e
        g.multi_update_all(
            {
                "a2b": (fn.copy_u("Ah", "m"), fn.sum("m", "e")),  # A * (h_i + h_j)
                "b2b": (fn.copy_u("Be", "m"), fn.sum("m", "e")),  # B * e_ij
                "g2b": (fn.copy_u("Cu", "m"), fn.sum("m", "e")),  # C * u
            },
            "sum",
        )
        e = g.nodes["bond"].data["e"]
        if self.graph_norm:
            e = e * norm_bond
        if self.batch_norm:
            e = self.bn_node_e(e)
        e = self.activation(e)
        if self.residual:
            e = e_in + e
        g.nodes["bond"].data["e"] = e

        # update atom feature h

        # Copy Eh to bond nodes, without reduction.
        # This is the first arrow in: Eh_j -> bond node -> atom i node
        # The second arrow is done in self.message_fn and self.reduce_fn below
        g.update_all(fn.copy_u("Eh", "Eh_j"), self.reduce_fn_a2b, etype="a2b")

        g.multi_update_all(
            {
                "a2a": (fn.copy_u("Dh", "m"), fn.sum("m", "h")),  # D * h_i
                "b2a": (self.message_fn, self.reduce_fn),  # e_ij [Had] (E * hj)
                "g2a": (fn.copy_u("Fu", "m"), fn.sum("m", "h")),  # F * u
            },
            "sum",
        )
        h = g.nodes["atom"].data["h"]
        if self.graph_norm:
            h = h * norm_atom
        if self.batch_norm:
            h = self.bn_node_h(h)
        h = self.activation(h)
        if self.residual:
            h = h_in + h
        g.nodes["atom"].data["h"] = h

        u = self.node_attn_layer(g, u, [h, e, u]).flatten(start_dim=1)
        if self.batch_norm:
            u = self.bn_node_u(u)
        u = self.activation(u)
        if self.residual:
            u = u_in + u

        # dropout
        h = self.dropout(h)
        e = self.dropout(e)
        u = self.dropout(u)

        feats = {"atom": h, "bond": e, "global": u}

        return feats
Beispiel #15
0
 def propagate(self, g):
     g.update_all(self.msg_func, fn.sum(msg='msg', out='h'), self.apply_func)
Beispiel #16
0
    def forward(self, graph, feat):
        r"""Compute graph attention network layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
            is the number of heads, and :math:`D_{out}` is size of output feature.
        """
        graph = graph.local_var()
        if isinstance(feat, tuple):
            h_src = self.feat_drop(feat[0])
            h_dst = self.feat_drop(feat[1])
            feat_src = self.fc_src(h_src).view(-1, self._num_heads,
                                               self._out_feats)
            feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads,
                                               self._out_feats)
        else:
            h_src = h_dst = self.feat_drop(feat)
            feat_src = feat_dst = self.fc(h_src).view(-1, self._num_heads,
                                                      self._out_feats)

        if self.opt['att_type'] == "GAT":
            # NOTE: GAT paper uses "first concatenation then linear projection"
            # to compute attention scores, while ours is "first projection then
            # addition", the two approaches are mathematically equivalent:
            # We decompose the weight vector a mentioned in the paper into
            # [a_l || a_r], then
            # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
            # Our implementation is much efficient because we do not need to
            # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
            # addition could be optimized with DGL's built-in function u_add_v,
            # which further speeds up computation and saves memory footprint.
            el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
            er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
            graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
            e = self.leaky_relu(graph.edata.pop('e'))
        elif self.opt['att_type'] == "cosine":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1)
            graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')
        elif self.opt['att_type'] == "scaled_dot":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r / th.sqrt(
                th.tensor(self.opt['num_hidden'] / self.opt['num_heads']))
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            # compute dot
            graph.apply_edges(fn.u_dot_v('el', 'er', 'dot'))
            e = graph.edata.pop('dot')
        elif self.opt['att_type'] == "pearson":
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})
            src_mu = th.mean(el, dim=1, keepdim=True)
            graph.srcdata['norm_h'] = F.normalize(el - src_mu, p=2, dim=-1)
            dst_mu = th.mean(er, dim=1, keepdim=True)
            graph.dstdata['norm_h'] = F.normalize(er - dst_mu, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')
        elif self.opt['att_type'] == "spearman":
            #todo check all these operations
            el = feat_src * self.attn_l
            er = feat_dst * self.attn_r
            graph.srcdata.update({'ft': feat_src, 'el': el})
            graph.dstdata.update({'er': er})

            el = el.view(-1, self._out_feats)
            er = er.view(-1, self._out_feats)

            el = soft_rank(el, regularization_strength=1.0)
            er = soft_rank(er, regularization_strength=1.0)

            ranked_src = soft_rank(
                1000 *
                F.normalize(el, p=2, dim=-1))  #, regularization_strength=0.1)
            ranked_dst = soft_rank(1000 * F.normalize(er, p=2, dim=-1),
                                   regularization_strength=0.1)
            src_mu = th.mean(ranked_src, dim=1, keepdim=True)
            dst_mu = th.mean(ranked_dst, dim=1, keepdim=True)

            el = F.normalize(ranked_src - src_mu, p=2, dim=-1)
            er = F.normalize(ranked_dst - dst_mu, p=2, dim=-1)
            el = el.view(-1, self._num_heads, self._out_feats)
            er = er.view(-1, self._num_heads, self._out_feats)
            graph.srcdata['norm_h'] = F.normalize(el, p=2, dim=-1)
            graph.dstdata['norm_h'] = F.normalize(er, p=2, dim=-1)
            # compute cosine distance
            graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
            e = graph.edata.pop('cos')

        # compute softmax
        graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
        # message passing
        graph.update_all(fn.u_mul_e('ft', 'a', 'm'), fn.sum('m', 'ft'))
        rst = graph.dstdata['ft']

        # residual
        if self.res_fc is not None:
            resval = self.res_fc(h_dst).view(h_dst.shape[0], -1,
                                             self._out_feats)
            rst = rst + resval
        # activation
        if self.activation:
            rst = self.activation(rst)
        return rst
Beispiel #17
0
        msg_delta = self.W_h(node.data['accum_msg'] + node.data['alpha'])
        msg = torch.relu(msg_input + msg_delta)
        return {'msg': msg}


if PAPER:
    mpn_gather_msg = [
        DGLF.copy_edge(edge='msg', out='msg'),
        DGLF.copy_edge(edge='alpha', out='alpha')
    ]
else:
    mpn_gather_msg = DGLF.copy_edge(edge='msg', out='msg')

if PAPER:
    mpn_gather_reduce = [
        DGLF.sum(msg='msg', out='m'),
        DGLF.sum(msg='alpha', out='accum_alpha'),
    ]
else:
    mpn_gather_reduce = DGLF.sum(msg='msg', out='m')


class GatherUpdate(nn.Module):
    def __init__(self, hidden_size):
        super(GatherUpdate, self).__init__()
        self.hidden_size = hidden_size

        self.W_o = nn.Linear(ATOM_FDIM + hidden_size, hidden_size)

    def forward(self, node):
        if PAPER:
Beispiel #18
0
 def forward(self, graph, n_feat):
     graph = graph.local_var()
     graph.ndata['h'] = n_feat
     graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
     n_feat += graph.ndata['h']
     return n_feat.view(graph.number_of_nodes() // 2, 2, -1).sum(1)
import dgl.function as fn
import torch.nn as nn

from Constants import EDGE_FEATURE_NAME

in_out_key = 'h'
edge_layer_msg = fn.copy_edge(edge=in_out_key, out='m')
edge_layer_reduce = fn.sum(msg='m', out=in_out_key)


class EdgeLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(EdgeLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g_and_features):
        if type(g_and_features) is tuple:
            g, _ = g_and_features
        else:
            g = g_and_features

        features = g.edata[EDGE_FEATURE_NAME]
        # Creating a local scope so that all the stored ndata and edata
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
        with g.local_scope():
            g.edata[in_out_key] = features
            g.update_all(edge_layer_msg, edge_layer_reduce)
            h = g.ndata[in_out_key]
            return self.linear(h)
Beispiel #20
0
	return g, features, labels, train_mask, test_mask

def evaluate(model, g, features, labels, mask):
	model.eval()
	with torch.no_grad():
		logits = model(g, features)
		logits = logits[mask]
		labels = labels[mask]
		_, indices = torch.max(logits, dim = 1)
		correct = torch.sum(indices == labels)
		return correct.item() * 1.0 / len(labels)

if __name__ == "__main__":
	# Since the aggragation on a node u only involves summing the neighbors' representations h
	gcn_msg = fn.copy_src(src = 'h', out = 'm')
	gcn_reduce = fn.sum(msg = "m", out = "h")
	net = Net()
	g, features, labels, train_mask, test_mask = load_cora_data()
	optimizer = torch.optim.Adam(net.parameters(), lr = 0.01)
	for epoch in range(50):
		net.train()
		logits = net(g, features)
		loss = nn.CrossEntropyLoss()
		output = loss(logits[train_mask], labels[train_mask])
		optimizer.zero_grad()
		output.backward()
		optimizer.step()
		acc = evaluate(net, g, features, labels, test_mask)
		print("accurate:",acc)

Beispiel #21
0
 def propagate(self, g, weight, incidence_in, incidence_out):
     g.update_all(self.msg_func, fn.sum(msg='msg', out='h'),
                  self.apply_func)
     return weight
Beispiel #22
0
 def forward(self, g):
     g.apply_edges(self.update_edge)
     g.update_all(message_func=fn.u_mul_e('new_node', 'h', 'neighbor_info'),
                  reduce_func=fn.sum('neighbor_info', 'new_node'))
     return g.ndata["new_node"]
Beispiel #23
0
    def forward(self, graph, feat):
        r"""Compute graph attention network layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
            :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
            is the number of heads, and :math:`D_{out}` is size of output feature.
        """
        graph = graph.local_var()
        if isinstance(feat, tuple):
            h_src = self.feat_drop(feat[0])
            h_dst = self.feat_drop(feat[1])
            feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats)
            feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats)
        else:
            h_src = h_dst = self.feat_drop(feat)
            feat_src = feat_dst = self.fc(h_src).view(
                -1, self._num_heads, self._out_feats)
        # NOTE: GAT paper uses "first concatenation then linear projection"
        # to compute attention scores, while ours is "first projection then
        # addition", the two approaches are mathematically equivalent:
        # We decompose the weight vector a mentioned in the paper into
        # [a_l || a_r], then
        # a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
        # Our implementation is much efficient because we do not need to
        # save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
        # addition could be optimized with DGL's built-in function u_add_v,
        # which further speeds up computation and saves memory footprint.
        el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
        er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
        graph.srcdata.update({'ft': feat_src, 'el': el})
        graph.dstdata.update({'er': er})
        # compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
        graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
        e = self.leaky_relu(graph.edata.pop('e'))
        e_soft = edge_softmax(graph, e)

        graph.edata['a'] = self.attn_drop(e_soft)   
        # compute softmax
        # graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
        # message passing
        graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
                         fn.sum('m', 'ft'))
        rst = graph.dstdata['ft']
        # residual
        if self.res_fc is not None:
            resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats)
            rst = rst + resval
        # activation
        if self.activation:
            rst = self.activation(rst)
        return rst, e_soft
Beispiel #24
0
def test_pickling_graph():
    # graph structures and frames are pickled
    g = dgl.DGLGraph()
    g.add_nodes(3)
    src = F.tensor([0, 0])
    dst = F.tensor([1, 2])
    g.add_edges(src, dst)

    x = F.randn((3, 7))
    y = F.randn((3, 5))
    a = F.randn((2, 6))
    b = F.randn((2, 4))

    g.ndata['x'] = x
    g.ndata['y'] = y
    g.edata['a'] = a
    g.edata['b'] = b

    # registered functions are pickled
    g.register_message_func(_global_message_func)
    reduce_func = fn.sum('x', 'x')
    g.register_reduce_func(reduce_func)

    # custom attributes should be pickled
    g.foo = 2

    new_g = _reconstruct_pickle(g)

    _assert_is_identical(g, new_g)
    assert new_g.foo == 2
    assert new_g._message_func == _global_message_func
    assert isinstance(new_g._reduce_func, type(reduce_func))
    assert new_g._reduce_func._name == 'sum'
    assert new_g._reduce_func.msg_field == 'x'
    assert new_g._reduce_func.out_field == 'x'

    # test batched graph with partial set case
    g2 = dgl.DGLGraph()
    g2.add_nodes(4)
    src2 = F.tensor([0, 1])
    dst2 = F.tensor([2, 3])
    g2.add_edges(src2, dst2)

    x2 = F.randn((4, 7))
    y2 = F.randn((3, 5))
    a2 = F.randn((2, 6))
    b2 = F.randn((2, 4))

    g2.ndata['x'] = x2
    g2.nodes[[0, 1, 3]].data['y'] = y2
    g2.edata['a'] = a2
    g2.edata['b'] = b2

    bg = dgl.batch([g, g2])

    bg2 = _reconstruct_pickle(bg)

    _assert_is_identical(bg, bg2)
    new_g, new_g2 = dgl.unbatch(bg2)
    _assert_is_identical(g, new_g)
    _assert_is_identical(g2, new_g2)

    # readonly graph
    g = dgl.DGLGraph([(0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)])
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)

    # readonly multigraph
    g = dgl.DGLGraph([(0, 1), (0, 1), (1, 2)], readonly=True)
    new_g = _reconstruct_pickle(g)
    _assert_is_identical(g, new_g)
Beispiel #25
0
def graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples):
    features = g.ndata['features']
    labels = g.ndata['labels']
    in_feats = g.ndata['features'].shape[1]

    norm = mx.nd.expand_dims(1./g.in_degrees().astype('float32'), 1)
    g.ndata['norm'] = norm.as_in_context(ctx)

    degs = g.in_degrees().astype('float32').asnumpy()
    degs[degs > args.num_neighbors] = args.num_neighbors
    g.ndata['subg_norm'] = mx.nd.expand_dims(mx.nd.array(1./degs, ctx=ctx), 1)

    g.update_all(fn.copy_src(src='features', out='m'),
                 fn.sum(msg='m', out='preprocess'),
                 lambda node : {'preprocess': node.data['preprocess'] * node.data['norm']})

    n_layers = args.n_layers
    for i in range(n_layers):
        g.ndata['h_{}'.format(i)] = mx.nd.zeros((features.shape[0], args.n_hidden), ctx=ctx)

    model = GraphSAGETrain(in_feats,
                           args.n_hidden,
                           n_classes,
                           n_layers,
                           args.dropout,
                           prefix='GraphSAGE')

    model.initialize(ctx=ctx)

    loss_fcn = gluon.loss.SoftmaxCELoss()

    infer_model = GraphSAGEInfer(in_feats,
                                 args.n_hidden,
                                 n_classes,
                                 n_layers,
                                 prefix='GraphSAGE')

    infer_model.initialize(ctx=ctx)

    # use optimizer
    print(model.collect_params())
    trainer = gluon.Trainer(model.collect_params(), 'adam',
                            {'learning_rate': args.lr, 'wd': args.weight_decay},
                            kvstore=mx.kv.create('local'))

    # initialize graph
    dur = []
    for epoch in range(args.n_epochs):
        for nf in dgl.contrib.sampling.NeighborSampler(g, args.batch_size,
                                                       args.num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       add_self_loop=True,
                                                       seed_nodes=train_nid):
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                g.pull(nf.layer_parent_nid(i+1), fn.copy_src(src='h_{}'.format(i), out='m'),
                       fn.sum(msg='m', out=agg_history_str))

            node_embed_names = [['preprocess', 'features', 'h_0']]
            for i in range(1, n_layers):
                node_embed_names.append(['h_{}'.format(i), 'agg_h_{}'.format(i-1), 'subg_norm', 'norm'])
            node_embed_names.append(['agg_h_{}'.format(n_layers-1), 'subg_norm', 'norm'])

            nf.copy_from_parent(node_embed_names=node_embed_names)
            # forward
            with mx.autograd.record():
                pred = model(nf)
                batch_nids = nf.layer_parent_nid(-1).as_in_context(ctx)
                batch_labels = labels[batch_nids]
                loss = loss_fcn(pred, batch_labels)
                loss = loss.sum() / len(batch_nids)

            loss.backward()
            trainer.step(batch_size=1)

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])

            nf.copy_to_parent(node_embed_names=node_embed_names)

        infer_params = infer_model.collect_params()

        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())

        num_acc = 0.
        num_tests = 0

        for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_hops=n_layers,
                                                       seed_nodes=test_nid,
                                                       add_self_loop=True):
            node_embed_names = [['preprocess', 'features']]
            for i in range(n_layers):
                node_embed_names.append(['norm', 'subg_norm'])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            pred = infer_model(nf)
            batch_nids = nf.layer_parent_nid(-1).as_in_context(ctx)
            batch_labels = labels[batch_nids]
            num_acc += (pred.argmax(axis=1) == batch_labels).sum().asscalar()
            num_tests += nf.layer_size(-1)
            break

        print("Test Accuracy {:.4f}". format(num_acc/num_tests))
Beispiel #26
0
 def forward(self, g):
     # g: graph
     # inputs: node_num * emb_size
     # g is the graph and the inputs is the input node features
     # first set the node features
     g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='h'), self.apply_func)
Beispiel #27
0
    def forward(self, graph, feat):
        r"""

        Description
        -----------
        Compute GraphSAGE layer.

        Parameters
        ----------
        graph : DGLGraph
            The graph.
        feat : torch.Tensor or pair of torch.Tensor
            If a torch.Tensor is given, it represents the input feature of shape
            :math:`(N, D_{in})`
            where :math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
            If a pair of torch.Tensor is given, the pair must contain two tensors of shape
            :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.

        Returns
        -------
        torch.Tensor
            The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
            is size of output feature.
        """
        with graph.local_scope():
            if isinstance(feat, tuple):
                feat_src = self.feat_drop(feat[0])
                feat_dst = self.feat_drop(feat[1])
            else:
                feat_src = feat_dst = self.feat_drop(feat)
                if graph.is_block:
                    feat_dst = feat_src[:graph.number_of_dst_nodes()]

            h_self = feat_dst

            # Handle the case of graphs without edges
            if graph.number_of_edges() == 0:
                graph.dstdata['neigh'] = torch.zeros(
                    feat_dst.shape[0], self._in_src_feats).to(feat_dst)

            if self._aggre_type == 'mean':
                graph.srcdata['h'] = feat_src
                graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
                h_neigh = graph.dstdata['neigh']
            elif self._aggre_type == 'gcn':
                check_eq_shape(feat)
                graph.srcdata['h'] = feat_src
                graph.dstdata['h'] = feat_dst  # same as above if homogeneous
                graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
                # divide in_degrees
                degs = graph.in_degrees().to(feat_dst)
                h_neigh = (graph.dstdata['neigh'] +
                           graph.dstdata['h']) / (degs.unsqueeze(-1) + 1)
            elif self._aggre_type == 'pool':
                graph.srcdata['h'] = F.relu(self.fc_pool(feat_src))
                graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
                h_neigh = graph.dstdata['neigh']
            elif self._aggre_type == 'lstm':
                graph.srcdata['h'] = feat_src
                graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
                h_neigh = graph.dstdata['neigh']
            elif self._aggre_type == 'ginmean':
                graph.srcdata['h'] = feat_src
                graph.update_all(fn.copy_src('h', 'm'),
                                 self._gin_reducer('m', 'neigh'))
                h_neigh = graph.dstdata['neigh']
            elif self._aggre_type == 'cheb':

                def unnLaplacian(feat, D_invsqrt_left, D_invsqrt_right, graph):
                    """ Operation Feat * D^-1/2 A D^-1/2 但是如果写成矩阵乘法:D^-1/2 A D^-1/2 Feat"""
                    #tmp = torch.zeros((D_invsqrt.shape[0],D_invsqrt.shape[0])).to(graph.device)
                    # sparse tensor没有broadcast机制,最后还依赖于srcnode在feat中从0开始连续排布
                    #print("adj : ",graph.adj(transpose=False,ctx = graph.device).shape)
                    #graph.srcdata['h'] = (torch.mm((graph.adj(transpose=False,ctx = graph.device)),(feat * D_invsqrt)))*D_invsqrt[::graph.number_of_dst_nodes()]
                    #graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'h'))
                    #return graph.srcdata['h']
                    graph.srcdata[
                        'h'] = feat * D_invsqrt_right  # feat is srcfeat
                    graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
                    return graph.dstdata.pop('h') * D_invsqrt_left

                D_invsqrt_right = torch.pow(
                    graph.out_degrees().float().clamp(min=1),
                    -0.5).unsqueeze(-1)
                D_invsqrt_left = torch.pow(
                    graph.in_degrees().float().clamp(min=1),
                    -0.5).unsqueeze(-1)
                #print("D_invsqrt shape: ",D_invsqrt.shape)
                #print(graph.__dict__)
                #print(dir(graph))
                #graph.srcdata['h']=feat_src
                #graph.dstdata['h']=feat_dst
                #g = dgl.to_homogeneous(graph,ndata=['h'])
                #dgl._ffi.base.DGLError: Expect number of features to match number of nodes (len(u)). Got 70 and 76 instead.
                #print(g)
                # since the block is different every time so it's safe to call dgl's method every time instead of calculating the l_m ahead
                try:
                    lambda_max = laplacian_lambda_max(graph)
                except BaseException:
                    # if the largest eigenvalue is not found
                    dgl_warning(
                        "Largest eigonvalue not found, using default value 2 for lambda_max",
                        RuntimeWarning)
                    lambda_max = torch.tensor(2)  # .to(feat.device)
                if isinstance(lambda_max, list):
                    lambda_max = torch.tensor(lambda_max)  # .to(feat.device)
                if lambda_max.dim() == 1:
                    lambda_max = lambda_max.unsqueeze(-1)  # (B,) to (B, 1)
                # broadcast from (B, 1) to (N, 1)
                # lambda_max = lambda_max * torch.ones((feat.shape[0],1))
                #re_norm = (2 / lambda_max ) * torch.ones((graph.number_of_dst_nodes(),1)).to(graph.device)
                re_norm = (2 / lambda_max.to(graph.device)) * torch.ones(
                    (graph.number_of_dst_nodes(), 1), device=graph.device)
                self._cheb_Xt = X_0 = feat_dst
                graph.srcdata[
                    'h'] = feat_src * D_invsqrt_right  # feat is srcfeat
                graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
                X_1 = -re_norm * graph.dstdata['h'] * D_invsqrt_left + X_0 * (
                    re_norm - 1)
                self._cheb_Xt = torch.cat((self._cheb_Xt, X_1.float()), 1)
            else:
                raise KeyError('Aggregator type {} not recognized.'.format(
                    self._aggre_type))

            # GraphSAGE GCN does not require fc_self.
            if self._aggre_type == 'gcn':
                rst = self.fc_neigh(h_neigh)
            elif self._aggre_type == 'ginmean':
                rst = (1 + self.eps) * h_self + h_neigh
                rst = self.fc_gin(rst)
                if self.norm is not None:
                    rst = self.norm(rst)
                return rst
            elif self._aggre_type == 'cheb':
                rst = self._cheb_linear(self._cheb_Xt)
            else:
                rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)

            # activation
            if self.activation is not None:
                rst = self.activation(rst)
            # normalization
            if self.norm is not None:
                rst = self.norm(rst)
            return rst
Beispiel #28
0
def main(args):
    # load and preprocess dataset
    data = load_data(args)

    if args.self_loop and not args.dataset.startswith('reddit'):
        data.graph.add_edges_from([(i, i) for i in range(len(data.graph))])

    train_nid = np.nonzero(data.train_mask)[0].astype(np.int64)
    test_nid = np.nonzero(data.test_mask)[0].astype(np.int64)

    features = torch.FloatTensor(data.features)
    labels = torch.LongTensor(data.labels)
    if hasattr(torch, 'BoolTensor'):
        train_mask = torch.BoolTensor(data.train_mask)
        val_mask = torch.BoolTensor(data.val_mask)
        test_mask = torch.BoolTensor(data.test_mask)
    else:
        train_mask = torch.ByteTensor(data.train_mask)
        val_mask = torch.ByteTensor(data.val_mask)
        test_mask = torch.ByteTensor(data.test_mask)
    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

    n_train_samples = train_mask.sum().item()
    n_val_samples = val_mask.sum().item()
    n_test_samples = test_mask.sum().item()

    print("""----Data statistics------'
      #Edges %d
      #Classes %d
      #Train samples %d
      #Val samples %d
      #Test samples %d""" %
          (n_edges, n_classes, n_train_samples, n_val_samples, n_test_samples))

    # create GCN model
    g = DGLGraph(data.graph, readonly=True)
    norm = 1. / g.in_degrees().float().unsqueeze(1)

    if args.gpu < 0:
        cuda = False
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()
        norm = norm.cuda()

    g.ndata['features'] = features

    num_neighbors = args.num_neighbors
    n_layers = args.n_layers

    g.ndata['norm'] = norm

    g.update_all(
        fn.copy_src(src='features',
                    out='m'), fn.sum(msg='m', out='preprocess'), lambda node:
        {'preprocess': node.data['preprocess'] * node.data['norm']})

    for i in range(n_layers):
        g.ndata['h_{}'.format(i)] = torch.zeros(
            features.shape[0], args.n_hidden).to(device=features.device)

    g.ndata['h_{}'.format(n_layers - 1)] = torch.zeros(
        features.shape[0], 2 * args.n_hidden).to(device=features.device)

    model = GCNSampling(in_feats, args.n_hidden, n_classes, n_layers, F.relu,
                        args.dropout)

    loss_fcn = nn.CrossEntropyLoss()

    infer_model = GCNInfer(in_feats, args.n_hidden, n_classes, n_layers,
                           F.relu)

    if cuda:
        model.cuda()
        infer_model.cuda()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    for epoch in range(args.n_epochs):
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=train_nid):
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                g.pull(
                    nf.layer_parent_nid(i + 1).long(),
                    fn.copy_src(src='h_{}'.format(i), out='m'),
                    fn.sum(msg='m', out=agg_history_str), lambda node: {
                        agg_history_str:
                        node.data[agg_history_str] * node.data['norm']
                    })

            node_embed_names = [['preprocess', 'h_0']]
            for i in range(1, n_layers):
                node_embed_names.append(
                    ['h_{}'.format(i), 'agg_h_{}'.format(i - 1)])
            node_embed_names.append(['agg_h_{}'.format(n_layers - 1)])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            model.train()
            # forward
            pred = model(nf)
            batch_nids = nf.layer_parent_nid(-1).to(device=pred.device).long()
            batch_labels = labels[batch_nids]
            loss = loss_fcn(pred, batch_labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])
            nf.copy_to_parent(node_embed_names=node_embed_names)

        for infer_param, param in zip(infer_model.parameters(),
                                      model.parameters()):
            infer_param.data.copy_(param.data)

        num_acc = 0.

        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.test_batch_size,
                                                       g.number_of_nodes(),
                                                       neighbor_type='in',
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       seed_nodes=test_nid):
            node_embed_names = [['preprocess']]
            for i in range(n_layers):
                node_embed_names.append(['norm'])
            nf.copy_from_parent(node_embed_names=node_embed_names)

            infer_model.eval()
            with torch.no_grad():
                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1).to(
                    device=pred.device).long()
                batch_labels = labels[batch_nids]
                num_acc += (pred.argmax(
                    dim=1) == batch_labels).sum().cpu().item()

        print("Test Accuracy {:.4f}".format(num_acc / n_test_samples))
Beispiel #29
0
def graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid,
                       n_test_samples, distributed):
    features = g.ndata['features']
    labels = g.ndata['labels']
    in_feats = g.ndata['features'].shape[1]
    g_ctx = features.context

    norm = mx.nd.expand_dims(1. / g.in_degrees().astype('float32'), 1)
    g.ndata['norm'] = norm.as_in_context(g_ctx)
    degs = g.in_degrees().astype('float32').asnumpy()
    degs[degs > args.num_neighbors] = args.num_neighbors
    g.ndata['subg_norm'] = mx.nd.expand_dims(mx.nd.array(1. / degs, ctx=g_ctx),
                                             1)
    n_layers = args.n_layers

    if distributed:
        g.dist_update_all(
            fn.copy_src(src='features', out='m'),
            fn.sum(msg='m', out='preprocess'), lambda node:
            {'preprocess': node.data['preprocess'] * node.data['norm']})
        for i in range(n_layers):
            g.init_ndata('h_{}'.format(i), (features.shape[0], args.n_hidden),
                         'float32')
            g.init_ndata('agg_h_{}'.format(i),
                         (features.shape[0], args.n_hidden), 'float32')
    else:
        g.update_all(
            fn.copy_src(src='features', out='m'),
            fn.sum(msg='m', out='preprocess'), lambda node:
            {'preprocess': node.data['preprocess'] * node.data['norm']})
        for i in range(n_layers):
            g.ndata['h_{}'.format(i)] = mx.nd.zeros(
                (features.shape[0], args.n_hidden), ctx=g_ctx)
            g.ndata['agg_h_{}'.format(i)] = mx.nd.zeros(
                (features.shape[0], args.n_hidden), ctx=g_ctx)

    model = GraphSAGETrain(in_feats,
                           args.n_hidden,
                           n_classes,
                           n_layers,
                           args.dropout,
                           prefix='GraphSAGE')

    model.initialize(ctx=ctx)

    loss_fcn = gluon.loss.SoftmaxCELoss()

    infer_model = GraphSAGEInfer(in_feats,
                                 args.n_hidden,
                                 n_classes,
                                 n_layers,
                                 prefix='GraphSAGE')

    infer_model.initialize(ctx=ctx)

    # use optimizer
    print(model.collect_params())
    kv_type = 'dist_sync' if distributed else 'local'
    trainer = gluon.Trainer(model.collect_params(),
                            'adam', {
                                'learning_rate': args.lr,
                                'wd': args.weight_decay
                            },
                            kvstore=mx.kv.create(kv_type))

    # initialize graph
    dur = []

    adj = g.adjacency_matrix().as_in_context(g_ctx)
    for epoch in range(args.n_epochs):
        start = time.time()
        if distributed:
            msg_head = "Worker {:d}, epoch {:d}".format(g.worker_id, epoch)
        else:
            msg_head = "epoch {:d}".format(epoch)
        for nf in dgl.contrib.sampling.NeighborSampler(g,
                                                       args.batch_size,
                                                       args.num_neighbors,
                                                       neighbor_type='in',
                                                       shuffle=True,
                                                       num_workers=32,
                                                       num_hops=n_layers,
                                                       add_self_loop=True,
                                                       seed_nodes=train_nid):
            for i in range(n_layers):
                agg_history_str = 'agg_h_{}'.format(i)
                dests = nf.layer_parent_nid(i + 1).as_in_context(g_ctx)
                # TODO we could use DGLGraph.pull to implement this, but the current
                # implementation of pull is very slow. Let's manually do it for now.
                g.ndata[agg_history_str][dests] = mx.nd.dot(
                    mx.nd.take(adj, dests), g.ndata['h_{}'.format(i)])

            node_embed_names = [['preprocess', 'features', 'h_0']]
            for i in range(1, n_layers):
                node_embed_names.append([
                    'h_{}'.format(i), 'agg_h_{}'.format(i - 1), 'subg_norm',
                    'norm'
                ])
            node_embed_names.append(
                ['agg_h_{}'.format(n_layers - 1), 'subg_norm', 'norm'])

            nf.copy_from_parent(node_embed_names=node_embed_names, ctx=ctx)
            # forward
            with mx.autograd.record():
                pred = model(nf)
                batch_nids = nf.layer_parent_nid(-1)
                batch_labels = labels[batch_nids].as_in_context(ctx)
                loss = loss_fcn(pred, batch_labels)
                if distributed:
                    loss = loss.sum() / (len(batch_nids) * g.num_workers)
                else:
                    loss = loss.sum() / (len(batch_nids))

            loss.backward()
            trainer.step(batch_size=1)

            node_embed_names = [['h_{}'.format(i)] for i in range(n_layers)]
            node_embed_names.append([])

            nf.copy_to_parent(node_embed_names=node_embed_names)
        print(msg_head + ': training takes ' + str(time.time() - start))

        infer_params = infer_model.collect_params()

        for key in infer_params:
            idx = trainer._param2idx[key]
            trainer._kvstore.pull(idx, out=infer_params[key].data())

        num_acc = 0.
        num_tests = 0

        if not distributed or g.worker_id == 0:
            start = time.time()
            for nf in dgl.contrib.sampling.NeighborSampler(
                    g,
                    args.test_batch_size,
                    g.number_of_nodes(),
                    neighbor_type='in',
                    num_hops=n_layers,
                    seed_nodes=test_nid,
                    add_self_loop=True):
                node_embed_names = [['preprocess', 'features']]
                for i in range(n_layers):
                    node_embed_names.append(['norm', 'subg_norm'])
                nf.copy_from_parent(node_embed_names=node_embed_names, ctx=ctx)

                pred = infer_model(nf)
                batch_nids = nf.layer_parent_nid(-1)
                batch_labels = labels[batch_nids].as_in_context(ctx)
                num_acc += (pred.argmax(
                    axis=1) == batch_labels).sum().asscalar()
                num_tests += nf.layer_size(-1)
                if distributed:
                    g._sync_barrier()
                print(msg_head +
                      ": Test Accuracy {:.4f}".format(num_acc / num_tests))
                break
        elif distributed:
            g._sync_barrier()
Beispiel #30
0
def check_partition(g, part_method, reshuffle):
    g.ndata['labels'] = F.arange(0, g.number_of_nodes())
    g.ndata['feats'] = F.tensor(np.random.randn(g.number_of_nodes(), 10), F.float32)
    g.edata['feats'] = F.tensor(np.random.randn(g.number_of_edges(), 10), F.float32)
    g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
    g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
    num_parts = 4
    num_hops = 2

    partition_graph(g, 'test', num_parts, '/tmp/partition', num_hops=num_hops,
                    part_method=part_method, reshuffle=reshuffle)
    part_sizes = []
    for i in range(num_parts):
        part_g, node_feats, edge_feats, gpb, _ = load_partition('/tmp/partition/test.json', i)

        # Check the metadata
        assert gpb._num_nodes() == g.number_of_nodes()
        assert gpb._num_edges() == g.number_of_edges()

        assert gpb.num_partitions() == num_parts
        gpb_meta = gpb.metadata()
        assert len(gpb_meta) == num_parts
        assert len(gpb.partid2nids(i)) == gpb_meta[i]['num_nodes']
        assert len(gpb.partid2eids(i)) == gpb_meta[i]['num_edges']
        part_sizes.append((gpb_meta[i]['num_nodes'], gpb_meta[i]['num_edges']))

        local_nid = gpb.nid2localnid(F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node']), i)
        assert F.dtype(local_nid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid)))
        local_eid = gpb.eid2localeid(F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge']), i)
        assert F.dtype(local_eid) in (F.int64, F.int32)
        assert np.all(F.asnumpy(local_eid) == np.arange(0, len(local_eid)))

        # Check the node map.
        local_nodes = F.boolean_mask(part_g.ndata[dgl.NID], part_g.ndata['inner_node'])
        llocal_nodes = F.nonzero_1d(part_g.ndata['inner_node'])
        local_nodes1 = gpb.partid2nids(i)
        assert F.dtype(local_nodes1) in (F.int32, F.int64)
        assert np.all(np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(local_nodes1)))

        # Check the edge map.
        local_edges = F.boolean_mask(part_g.edata[dgl.EID], part_g.edata['inner_edge'])
        local_edges1 = gpb.partid2eids(i)
        assert F.dtype(local_edges1) in (F.int32, F.int64)
        assert np.all(np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(local_edges1)))

        if reshuffle:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata['orig_id'])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata['orig_id'])
            # when we read node data from the original global graph, we should use orig_id.
            local_nodes = F.boolean_mask(part_g.ndata['orig_id'], part_g.ndata['inner_node'])
            local_edges = F.boolean_mask(part_g.edata['orig_id'], part_g.edata['inner_edge'])
        else:
            part_g.ndata['feats'] = F.gather_row(g.ndata['feats'], part_g.ndata[dgl.NID])
            part_g.edata['feats'] = F.gather_row(g.edata['feats'], part_g.edata[dgl.NID])
        part_g.update_all(fn.copy_src('feats', 'msg'), fn.sum('msg', 'h'))
        part_g.update_all(fn.copy_edge('feats', 'msg'), fn.sum('msg', 'eh'))
        assert F.allclose(F.gather_row(g.ndata['h'], local_nodes),
                          F.gather_row(part_g.ndata['h'], llocal_nodes))
        assert F.allclose(F.gather_row(g.ndata['eh'], local_nodes),
                          F.gather_row(part_g.ndata['eh'], llocal_nodes))

        for name in ['labels', 'feats']:
            assert name in node_feats
            assert node_feats[name].shape[0] == len(local_nodes)
            assert np.all(F.asnumpy(g.ndata[name])[F.asnumpy(local_nodes)] == F.asnumpy(node_feats[name]))
        for name in ['feats']:
            assert name in edge_feats
            assert edge_feats[name].shape[0] == len(local_edges)
            assert np.all(F.asnumpy(g.edata[name])[F.asnumpy(local_edges)] == F.asnumpy(edge_feats[name]))

    if reshuffle:
        node_map = []
        edge_map = []
        for i, (num_nodes, num_edges) in enumerate(part_sizes):
            node_map.append(np.ones(num_nodes) * i)
            edge_map.append(np.ones(num_edges) * i)
        node_map = np.concatenate(node_map)
        edge_map = np.concatenate(edge_map)
        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
        assert F.dtype(nid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(nid2pid) == node_map)
        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
        assert F.dtype(eid2pid) in (F.int32, F.int64)
        assert np.all(F.asnumpy(eid2pid) == edge_map)