Beispiel #1
0
def test_isolated_nodes(idtype):
    g = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=F.ctx())
    assert g.number_of_nodes() == 5

    g = dgl.heterograph({('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])}, {
                             'user': 5,
                             'game': 7
                         },
                        idtype=idtype,
                        device=F.ctx())
    assert g.idtype == idtype
    assert g.number_of_nodes('user') == 5
    assert g.number_of_nodes('game') == 7

    # Test backward compatibility
    g = dgl.heterograph({('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])}, {
                             'user': 5,
                             'game': 7
                         },
                        idtype=idtype,
                        device=F.ctx())
    assert g.idtype == idtype
    assert g.number_of_nodes('user') == 5
    assert g.number_of_nodes('game') == 7
Beispiel #2
0
def test_pinsage_sampling():
    def _test_sampler(g, sampler, ntype):
        neighbor_g = sampler(F.tensor([0, 2], dtype=F.int64))
        assert neighbor_g.ntypes == [ntype]
        u, v = neighbor_g.all_edges(form='uv', order='eid')
        uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist()))
        assert (1, 0) in uv or (0, 0) in uv
        assert (2, 2) in uv or (3, 2) in uv

    g = dgl.heterograph({
        ('item', 'bought-by', 'user'): [(0, 0), (0, 1), (1, 0), (1, 1), (2, 2),
                                        (2, 3), (3, 2), (3, 3)],
        ('user', 'bought', 'item'): [(0, 0), (1, 0), (0, 1), (1, 1), (2, 2),
                                     (3, 2), (2, 3), (3, 3)]
    })
    sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2)
    _test_sampler(g, sampler, 'item')
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
                                                     ['bought-by', 'bought'])
    _test_sampler(g, sampler, 'item')
    sampler = dgl.sampling.RandomWalkNeighborSampler(
        g, 4, 0.5, 3, 2, [('item', 'bought-by', 'user'),
                          ('user', 'bought', 'item')])
    _test_sampler(g, sampler, 'item')
    g = dgl.graph([(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2),
                   (3, 3)])
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2)
    _test_sampler(g, sampler, g.ntypes[0])
    g = dgl.heterograph({
        ('A', 'AB', 'B'): [(0, 1), (2, 3)],
        ('B', 'BC', 'C'): [(1, 2), (3, 1)],
        ('C', 'CA', 'A'): [(2, 0), (1, 2)]
    })
    sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
                                                     ['AB', 'BC', 'CA'])
    _test_sampler(g, sampler, 'A')
Beispiel #3
0
def test_group_apply_edges():
    def edge_udf(edges):
        h = F.sum(edges.data['feat'] * (edges.src['h'] + edges.dst['h']),
                  dim=2)
        normalized_feat = F.softmax(h, dim=1)
        return {"norm_feat": normalized_feat}

    elist = []
    for v in [1, 2, 3, 4, 5, 6, 7, 8]:
        elist.append((0, v))
    for v in [2, 3, 4, 6, 7, 8]:
        elist.append((1, v))
    for v in [2, 3, 4, 5, 6, 7, 8]:
        elist.append((2, v))
    g = dgl.graph(elist)

    g.ndata['h'] = F.randn((g.number_of_nodes(), D))
    g.edata['feat'] = F.randn((g.number_of_edges(), D))

    def _test(group_by):
        g.group_apply_edges(group_by=group_by, func=edge_udf)
        if group_by == 'src':
            u, v, eid = g.out_edges(1, form='all')
        else:
            u, v, eid = g.in_edges(5, form='all')
        out_feat = g.edges[eid].data['norm_feat']
        result = (g.nodes[u].data['h'] +
                  g.nodes[v].data['h']) * g.edges[eid].data['feat']
        result = F.softmax(F.sum(result, dim=1), dim=0)
        assert F.allclose(out_feat, result)

    # test group by source nodes
    _test('src')

    # test group by destination nodes
    _test('dst')
Beispiel #4
0
def test_agnn_conv():
    g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3))
    ctx = F.ctx()

    agnn_conv = nn.AGNNConv(0.1, True)
    agnn_conv.initialize(ctx=ctx)
    print(agnn_conv)

    # test#1: basic
    feat = F.randn((20, 10))
    h = agnn_conv(g, feat)
    assert h.shape == (20, 10)

    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    feat = (F.randn((100, 5)), F.randn((200, 5)))
    h = agnn_conv(g, feat)
    assert h.shape == (200, 5)

    g = dgl.graph(sp.sparse.random(100, 100, density=0.001))
    seed_nodes = np.unique(g.edges()[1].asnumpy())
    block = dgl.to_block(g, seed_nodes)
    feat = F.randn((block.number_of_src_nodes(), 5))
    h = agnn_conv(block, feat)
    assert h.shape == (block.number_of_dst_nodes(), 5)
Beispiel #5
0
def test_neighbor_nonuniform(num_workers):
    g = dgl.graph(([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]))
    g.edata['p'] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0])
    sampler = dgl.dataloading.MultiLayerNeighborSampler([2], prob='p')
    dataloader = dgl.dataloading.NodeDataLoader(g, [0, 1], sampler, batch_size=1, device=F.ctx())
    for input_nodes, output_nodes, blocks in dataloader:
        seed = output_nodes.item()
        neighbors = set(input_nodes[1:].cpu().numpy())
        if seed == 1:
            assert neighbors == {5, 6}
        elif seed == 0:
            assert neighbors == {1, 2}

    g = dgl.heterograph({
        ('B', 'BA', 'A'): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]),
        ('C', 'CA', 'A'): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]),
        })
    g.edges['BA'].data['p'] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0])
    g.edges['CA'].data['p'] = torch.FloatTensor([0, 0, 1, 1, 0, 0, 1, 1])
    sampler = dgl.dataloading.MultiLayerNeighborSampler([2], prob='p')
    dataloader = dgl.dataloading.NodeDataLoader(
        g, {'A': [0, 1]}, sampler, batch_size=1, device=F.ctx())
    for input_nodes, output_nodes, blocks in dataloader:
        seed = output_nodes['A'].item()
        # Seed and neighbors are of different node types so slicing is not necessary here.
        neighbors = set(input_nodes['B'].cpu().numpy())
        if seed == 1:
            assert neighbors == {5, 6}
        elif seed == 0:
            assert neighbors == {1, 2}

        neighbors = set(input_nodes['C'].cpu().numpy())
        if seed == 1:
            assert neighbors == {7, 8}
        elif seed == 0:
            assert neighbors == {3, 4}
Beispiel #6
0
def generate_graph(idtype=F.int64, grad=False):
    '''
    s, d, eid
    0, 1, 0
    1, 9, 1
    0, 2, 2
    2, 9, 3
    0, 3, 4
    3, 9, 5
    0, 4, 6
    4, 9, 7
    0, 5, 8
    5, 9, 9
    0, 6, 10
    6, 9, 11
    0, 7, 12
    7, 9, 13
    0, 8, 14
    8, 9, 15
    9, 0, 16
    '''
    u = F.tensor([0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 9])
    v = F.tensor([1, 9, 2, 9, 3, 9, 4, 9, 5, 9, 6, 9, 7, 9, 8, 9, 0])
    g = dgl.graph((u, v), idtype=idtype)
    assert g.device == F.ctx()
    ncol = F.randn((10, D))
    ecol = F.randn((17, D))
    if grad:
        ncol = F.attach_grad(ncol)
        ecol = F.attach_grad(ecol)

    g.ndata['h'] = ncol
    g.edata['w'] = ecol
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    return g
Beispiel #7
0
def test_sage_conv(aggre_type):
    ctx = F.ctx()
    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.graph(sp.sparse.random(100, 100, density=0.1))
    sage = nn.SAGEConv(5, 10, aggre_type)
    feat = F.randn((100, 5))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 10

    g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1))
    dst_dim = 5 if aggre_type != 'gcn' else 10
    sage = nn.SAGEConv((10, dst_dim), 2, aggre_type)
    feat = (F.randn((100, 10)), F.randn((200, dst_dim)))
    sage = sage.to(ctx)
    h = sage(g, feat)
    assert h.shape[-1] == 2
    assert h.shape[0] == 200
Beispiel #8
0
    def _create_ckg_graph(self, form='dgl', show_relation=False):
        user_num = self.user_num

        kg_tensor = self._dataframe_to_interaction(self.kg_feat)
        inter_tensor = self._dataframe_to_interaction(self.inter_feat)

        head_entity = kg_tensor[self.head_entity_field] + user_num
        tail_entity = kg_tensor[self.tail_entity_field] + user_num

        user = inter_tensor[self.uid_field]
        item = inter_tensor[self.iid_field] + user_num

        src = torch.cat([user, item, head_entity])
        tgt = torch.cat([item, user, tail_entity])

        if show_relation:
            ui_rel_num = user.shape[0]
            ui_rel_id = self.relation_num - 1
            assert self.field2id_token[self.relation_field][ui_rel_id] == '[UI-Relation]'
            kg_rel = kg_tensor[self.relation_field]
            ui_rel = torch.full((2 * ui_rel_num,), ui_rel_id, dtype=kg_rel.dtype)
            edge = torch.cat([ui_rel, kg_rel])

        if form == 'dgl':
            import dgl
            graph = dgl.graph((src, tgt))
            if show_relation:
                graph.edata[self.relation_field] = edge
            return graph
        elif form == 'pyg':
            from torch_geometric.data import Data
            edge_attr = edge if show_relation else None
            graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr)
            return graph
        else:
            raise NotImplementedError('graph format [{}] has not been implemented.'.format(form))
Beispiel #9
0
def load_reddit(args):
    
    data = RedditDataset(self_loop = False)
    train_mask = data.train_mask
    test_mask = data.test_mask
    val_mask = data.val_mask
    
    features = torch.Tensor(data.features)
    in_feats = features.shape[1]
    labels = torch.LongTensor(data.labels)
    n_classes = data.num_labels
    
    # Construct graph
    g = dgl.graph(data.graph.all_edges())
    g.ndata['features'] = features
    prepare_mp(g)

    train_nid = torch.LongTensor(np.nonzero(train_mask)[0])
    val_nid = torch.LongTensor(np.nonzero(val_mask)[0])
    train_mask = torch.BoolTensor(train_mask).cuda()
    test_mask = torch.BoolTensor(test_mask).cuda()
    val_mask = torch.BoolTensor(val_mask).cuda()
   
    return g, features, labels, train_mask, val_mask, test_mask, train_nid
Beispiel #10
0
    def forward(self, pos, centroids, feat=None):
        dev = pos.device
        group_idx = self.frnn(pos, centroids)
        B, N, _ = pos.shape
        glist = []
        for i in range(B):
            center = torch.zeros((N)).to(dev)
            center[centroids[i]] = 1
            src = group_idx[i].contiguous().view(-1)
            dst = centroids[i].view(-1, 1).repeat(1, self.n_neighbor).view(-1)

            unified = torch.cat([src, dst])
            uniq, inv_idx = torch.unique(unified, return_inverse=True)
            src_idx = inv_idx[:src.shape[0]]
            dst_idx = inv_idx[src.shape[0]:]

            g = dgl.graph((src_idx, dst_idx))
            g.ndata['pos'] = pos[i][uniq]
            g.ndata['center'] = center[uniq]
            if feat is not None:
                g.ndata['feat'] = feat[i][uniq]
            glist.append(g)
        bg = dgl.batch(glist)
        return bg
Beispiel #11
0
def split_train_valid_test(g):
    u, v = g.edges()

    eids = np.arange(g.number_of_edges())
    eids = np.random.permutation(eids)

    valid_size = int(len(eids) * 0.1)
    test_size = int(len(eids) * 0.1)
    train_size = g.number_of_edges() - test_size - valid_size

    test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]]
    valid_pos_u, valid_pos_v =  u[eids[test_size:test_size+valid_size]], v[eids[test_size:test_size+valid_size]]
    train_pos_u, train_pos_v = u[eids[test_size+valid_size:]], v[eids[test_size+valid_size:]]

    # Find all negative edges and split them for training and testing
    adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
    adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes())
    neg_u, neg_v = np.where(adj_neg != 0)

    neg_eids = np.random.choice(len(neg_u), g.number_of_edges())
    test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]]
    valid_neg_u, valid_neg_v = neg_u[neg_eids[test_size:test_size+valid_size]], neg_v[neg_eids[test_size:test_size+valid_size]]
    train_neg_u, train_neg_v = neg_u[neg_eids[test_size+valid_size:]], neg_v[neg_eids[test_size+valid_size:]]

    train_g = dgl.remove_edges(g, eids[:test_size+valid_size])

    train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes())
    train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes())

    valid_pos_g = dgl.graph((valid_pos_u, valid_pos_v), num_nodes=g.number_of_nodes())
    valid_neg_g = dgl.graph((valid_neg_u, valid_neg_v), num_nodes=g.number_of_nodes())

    test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes())
    test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes())

    return train_g, train_pos_g, train_neg_g, valid_pos_g, valid_neg_g, test_pos_g, test_neg_g
Beispiel #12
0
def mol_to_nearest_neighbor_graph(mol,
                                  coordinates,
                                  neighbor_cutoff,
                                  max_num_neighbors=None,
                                  p_distance=2,
                                  add_self_loop=False,
                                  node_featurizer=None,
                                  edge_featurizer=None,
                                  canonical_atom_order=True,
                                  keep_dists=False,
                                  dist_field='dist',
                                  explicit_hydrogens=False,
                                  num_virtual_nodes=0):
    """Convert an RDKit molecule into a nearest neighbor graph and featurize for it.

    Different from bigraph and complete graph, the nearest neighbor graph
    may not be symmetric since i is the closest neighbor of j does not
    necessarily suggest the other way.

    Parameters
    ----------
    mol : rdkit.Chem.rdchem.Mol
        RDKit molecule holder
    coordinates : numpy.ndarray of shape (N, D)
        The coordinates of atoms in the molecule. N for the number of atoms
        and D for the dimensions of the coordinates.
    neighbor_cutoff : float
        If the distance between a pair of nodes is larger than neighbor_cutoff,
        they will not be considered as neighboring nodes.
    max_num_neighbors : int or None.
        If not None, then this specifies the maximum number of neighbors
        allowed for each atom. Default to None.
    p_distance : int
        We compute the distance between neighbors using Minkowski (:math:`l_p`)
        distance. When ``p_distance = 1``, Minkowski distance is equivalent to
        Manhattan distance. When ``p_distance = 2``, Minkowski distance is
        equivalent to the standard Euclidean distance. Default to 2.
    add_self_loop : bool
        Whether to add self loops in DGLGraphs. Default to False.
    node_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict
        Featurization for nodes like atoms in a molecule, which can be used to update
        ndata for a DGLGraph. Default to None.
    edge_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict
        Featurization for edges like bonds in a molecule, which can be used to update
        edata for a DGLGraph. Default to None.
    canonical_atom_order : bool
        Whether to use a canonical order of atoms returned by RDKit. Setting it
        to true might change the order of atoms in the graph constructed. Default
        to True.
    keep_dists : bool
        Whether to store the distance between neighboring atoms in ``edata`` of the
        constructed DGLGraphs. Default to False.
    dist_field : str
        Field for storing distance between neighboring atoms in ``edata``. This comes
        into effect only when ``keep_dists=True``. Default to ``'dist'``.
    explicit_hydrogens : bool
        Whether to explicitly represent hydrogens as nodes in the graph. If True,
        it will call rdkit.Chem.AddHs(mol). Default to False.
    num_virtual_nodes : int
        The number of virtual nodes to add. The virtual nodes will be connected to
        all real nodes with virtual edges. If the returned graph has any node/edge
        feature, an additional column of binary values will be used for each feature
        to indicate the identity of virtual node/edges. The features of the virtual
        nodes/edges will be zero vectors except for the additional column. Default to 0.

    Returns
    -------
    DGLGraph or None
        Nearest neighbor DGLGraph for the molecule if :attr:`mol` is valid and None otherwise.

    Examples
    --------
    >>> from dgllife.utils import mol_to_nearest_neighbor_graph
    >>> from rdkit import Chem
    >>> from rdkit.Chem import AllChem

    >>> mol = Chem.MolFromSmiles('CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C')
    >>> AllChem.EmbedMolecule(mol)
    >>> AllChem.MMFFOptimizeMolecule(mol)
    >>> coords = get_mol_3d_coordinates(mol)
    >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25)
    >>> print(g)
    DGLGraph(num_nodes=23, num_edges=6,
             ndata_schemes={}
             edata_schemes={})

    Quite often we will want to use the distance between end atoms of edges, this can be
    achieved with

    >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25, keep_dists=True)
    >>> print(g.edata['dist'])
    tensor([[1.2024],
            [1.2024],
            [1.2270],
            [1.2270],
            [1.2259],
            [1.2259]])

    By default, we do not explicitly represent hydrogens as nodes, which can be done as follows.

    >>> mol = Chem.MolFromSmiles('CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C')
    >>> mol = Chem.AddHs(mol)
    >>> AllChem.EmbedMolecule(mol)
    >>> AllChem.MMFFOptimizeMolecule(mol)
    >>> coords = get_mol_3d_coordinates(mol)
    >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25,
    >>>                                   explicit_hydrogens=True)
    >>> print(g)
    DGLGraph(num_nodes=41, num_edges=42,
             ndata_schemes={}
             edata_schemes={})

    See Also
    --------
    get_mol_3d_coordinates
    k_nearest_neighbors
    smiles_to_nearest_neighbor_graph
    """
    if mol is None:
        print('Invalid mol found')
        return None

    if explicit_hydrogens:
        mol = Chem.AddHs(mol)

    num_atoms = mol.GetNumAtoms()
    num_coords = coordinates.shape[0]
    assert num_atoms == num_coords, \
        'Expect the number of atoms to match the first dimension of coordinates, ' \
        'got {:d} and {:d}'.format(num_atoms, num_coords)

    if canonical_atom_order:
        new_order = rdmolfiles.CanonicalRankAtoms(mol)
        mol = rdmolops.RenumberAtoms(mol, new_order)

    srcs, dsts, dists = k_nearest_neighbors(
        coordinates=coordinates,
        neighbor_cutoff=neighbor_cutoff,
        max_num_neighbors=max_num_neighbors,
        p_distance=p_distance,
        self_loops=add_self_loop)
    g = dgl.graph(([], []), idtype=torch.int32)

    # Add nodes first since some nodes may be completely isolated
    g.add_nodes(num_atoms)

    # Add edges
    g.add_edges(srcs, dsts)

    if node_featurizer is not None:
        g.ndata.update(node_featurizer(mol))

    if edge_featurizer is not None:
        g.edata.update(edge_featurizer(mol))

    if keep_dists:
        assert dist_field not in g.edata, \
            'Expect {} to be reserved for distance between neighboring atoms.'
        g.edata[dist_field] = torch.tensor(dists).float().reshape(-1, 1)

    if num_virtual_nodes > 0:
        num_real_nodes = g.num_nodes()
        real_nodes = list(range(num_real_nodes))
        g.add_nodes(num_virtual_nodes)

        # Change Topology
        virtual_src = []
        virtual_dst = []
        for count in range(num_virtual_nodes):
            virtual_node = num_real_nodes + count
            virtual_node_copy = [virtual_node] * num_real_nodes
            virtual_src.extend(real_nodes)
            virtual_src.extend(virtual_node_copy)
            virtual_dst.extend(virtual_node_copy)
            virtual_dst.extend(real_nodes)
        g.add_edges(virtual_src, virtual_dst)

        for nk, nv in g.ndata.items():
            nv = torch.cat([nv, torch.zeros(g.num_nodes(), 1)], dim=1)
            nv[:-num_virtual_nodes, -1] = 1
            g.ndata[nk] = nv

        for ek, ev in g.edata.items():
            ev = torch.cat([ev, torch.zeros(g.num_edges(), 1)], dim=1)
            ev[:-num_virtual_nodes * num_real_nodes * 2, -1] = 1
            g.edata[ek] = ev

    return g
def construct_graph(training_dir,
                    edges,
                    nodes,
                    target_node_type,
                    heterogeneous=True):
    if heterogeneous:
        print("Getting relation graphs from the following edge lists : {} ".
              format(edges))
        edgelists, id_to_node = {}, {}
        for i, edge in enumerate(edges):
            edgelist, id_to_node, src, dst = parse_edgelist(os.path.join(
                training_dir, edge),
                                                            id_to_node,
                                                            header=True)
            if src == target_node_type:
                src = 'target'
            if dst == target_node_type:
                dst = 'target'
            edgelists[(src, 'relation{}'.format(i), dst)] = edgelist
            print("Read edges for relation{} from edgelist: {}".format(
                i, os.path.join(training_dir, edge)))

            # reverse edge list so that relation is undirected
            edgelists[(dst, 'reverse_relation{}'.format(i),
                       src)] = [(b, a) for a, b in edgelist]

        # get features for target nodes
        features, new_nodes = get_features(id_to_node[target_node_type],
                                           os.path.join(training_dir, nodes))
        print("Read in features for target nodes")
        # handle target nodes that have features but don't have any connections
        # if new_nodes:
        #     edgelists[('target', 'relation'.format(i+1), 'none')] = [(node, 0) for node in new_nodes]
        #     edgelists[('none', 'reverse_relation{}'.format(i + 1), 'target')] = [(0, node) for node in new_nodes]

        # add self relation
        edgelists[('target', 'self_relation', 'target')] = [
            (t, t) for t in id_to_node[target_node_type].values()
        ]

        g = dgl.heterograph(edgelists)
        print(
            "Constructed heterograph with the following metagraph structure: Node types {}, Edge types{}"
            .format(g.ntypes, g.canonical_etypes))
        print("Number of nodes of type target : {}".format(
            g.number_of_nodes('target')))

        # g.nodes['target'].data['features'] = features

        id_to_node = id_to_node[target_node_type]

    else:
        sources, sinks, features, id_to_node = read_edges(
            os.path.join(training_dir, edges[0]),
            os.path.join(training_dir, nodes))

        # add self relation
        all_nodes = sorted(id_to_node.values())
        sources.extend(all_nodes)
        sinks.extend(all_nodes)

        g = dgl.graph((sources, sinks))

        if features:
            g.ndata['features'] = np.array(features).astype('float32')

        print('read graph from node list and edge list')

        features = g.ndata['features']

    return g, features, id_to_node
Beispiel #14
0
def test_subgraph1(idtype):
    g = create_test_heterograph(idtype)
    g_graph = g['follows']
    g_bipartite = g['plays']

    x = F.randn((3, 5))
    y = F.randn((2, 4))
    g.nodes['user'].data['h'] = x
    g.edges['follows'].data['h'] = y

    def _check_subgraph(g, sg):
        assert sg.idtype == g.idtype
        assert sg.device == g.device
        assert sg.ntypes == g.ntypes
        assert sg.etypes == g.etypes
        assert sg.canonical_etypes == g.canonical_etypes
        assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
                             F.tensor([1, 2], g.idtype))
        assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]),
                             F.tensor([0], g.idtype))
        assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]),
                             F.tensor([1], g.idtype))
        assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]),
                             F.tensor([1], g.idtype))
        assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]),
                             F.tensor([1], g.idtype))
        assert sg.number_of_nodes('developer') == 0
        assert sg.number_of_edges('develops') == 0
        assert F.array_equal(sg.nodes['user'].data['h'],
                             g.nodes['user'].data['h'][1:3])
        assert F.array_equal(sg.edges['follows'].data['h'],
                             g.edges['follows'].data['h'][1:2])

    sg1 = g.subgraph({'user': [1, 2], 'game': [0]})
    _check_subgraph(g, sg1)
    if F._default_context_str != 'gpu':
        # TODO(minjie): enable this later
        sg2 = g.edge_subgraph({'follows': [1], 'plays': [1], 'wishes': [1]})
        _check_subgraph(g, sg2)

    # backend tensor input
    sg1 = g.subgraph({
        'user': F.tensor([1, 2], dtype=idtype),
        'game': F.tensor([0], dtype=idtype)
    })
    _check_subgraph(g, sg1)
    if F._default_context_str != 'gpu':
        # TODO(minjie): enable this later
        sg2 = g.edge_subgraph({
            'follows': F.tensor([1], dtype=idtype),
            'plays': F.tensor([1], dtype=idtype),
            'wishes': F.tensor([1], dtype=idtype)
        })
        _check_subgraph(g, sg2)

    # numpy input
    sg1 = g.subgraph({'user': np.array([1, 2]), 'game': np.array([0])})
    _check_subgraph(g, sg1)
    if F._default_context_str != 'gpu':
        # TODO(minjie): enable this later
        sg2 = g.edge_subgraph({
            'follows': np.array([1]),
            'plays': np.array([1]),
            'wishes': np.array([1])
        })
        _check_subgraph(g, sg2)

    def _check_subgraph_single_ntype(g, sg, preserve_nodes=False):
        assert sg.idtype == g.idtype
        assert sg.device == g.device
        assert sg.ntypes == g.ntypes
        assert sg.etypes == g.etypes
        assert sg.canonical_etypes == g.canonical_etypes

        if not preserve_nodes:
            assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
                                 F.tensor([1, 2], g.idtype))
        else:
            for ntype in sg.ntypes:
                assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)

        assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]),
                             F.tensor([1], g.idtype))

        if not preserve_nodes:
            assert F.array_equal(sg.nodes['user'].data['h'],
                                 g.nodes['user'].data['h'][1:3])
        assert F.array_equal(sg.edges['follows'].data['h'],
                             g.edges['follows'].data['h'][1:2])

    def _check_subgraph_single_etype(g, sg, preserve_nodes=False):
        assert sg.ntypes == g.ntypes
        assert sg.etypes == g.etypes
        assert sg.canonical_etypes == g.canonical_etypes

        if not preserve_nodes:
            assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]),
                                 F.tensor([0, 1], g.idtype))
            assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]),
                                 F.tensor([0], g.idtype))
        else:
            for ntype in sg.ntypes:
                assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)

        assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]),
                             F.tensor([0, 1], g.idtype))

    sg1_graph = g_graph.subgraph([1, 2])
    _check_subgraph_single_ntype(g_graph, sg1_graph)
    if F._default_context_str != 'gpu':
        # TODO(minjie): enable this later
        sg1_graph = g_graph.edge_subgraph([1])
        _check_subgraph_single_ntype(g_graph, sg1_graph)
        sg1_graph = g_graph.edge_subgraph([1], relabel_nodes=False)
        _check_subgraph_single_ntype(g_graph, sg1_graph, True)
        sg2_bipartite = g_bipartite.edge_subgraph([0, 1])
        _check_subgraph_single_etype(g_bipartite, sg2_bipartite)
        sg2_bipartite = g_bipartite.edge_subgraph([0, 1], relabel_nodes=False)
        _check_subgraph_single_etype(g_bipartite, sg2_bipartite, True)

    def _check_typed_subgraph1(g, sg):
        assert g.idtype == sg.idtype
        assert g.device == sg.device
        assert set(sg.ntypes) == {'user', 'game'}
        assert set(sg.etypes) == {'follows', 'plays', 'wishes'}
        for ntype in sg.ntypes:
            assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
        for etype in sg.etypes:
            src_sg, dst_sg = sg.all_edges(etype=etype, order='eid')
            src_g, dst_g = g.all_edges(etype=etype, order='eid')
            assert F.array_equal(src_sg, src_g)
            assert F.array_equal(dst_sg, dst_g)
        assert F.array_equal(sg.nodes['user'].data['h'],
                             g.nodes['user'].data['h'])
        assert F.array_equal(sg.edges['follows'].data['h'],
                             g.edges['follows'].data['h'])
        g.nodes['user'].data['h'] = F.scatter_row(g.nodes['user'].data['h'],
                                                  F.tensor([2]), F.randn(
                                                      (1, 5)))
        g.edges['follows'].data['h'] = F.scatter_row(
            g.edges['follows'].data['h'], F.tensor([1]), F.randn((1, 4)))
        assert F.array_equal(sg.nodes['user'].data['h'],
                             g.nodes['user'].data['h'])
        assert F.array_equal(sg.edges['follows'].data['h'],
                             g.edges['follows'].data['h'])

    def _check_typed_subgraph2(g, sg):
        assert set(sg.ntypes) == {'developer', 'game'}
        assert set(sg.etypes) == {'develops'}
        for ntype in sg.ntypes:
            assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype)
        for etype in sg.etypes:
            src_sg, dst_sg = sg.all_edges(etype=etype, order='eid')
            src_g, dst_g = g.all_edges(etype=etype, order='eid')
            assert F.array_equal(src_sg, src_g)
            assert F.array_equal(dst_sg, dst_g)

    sg3 = g.node_type_subgraph(['user', 'game'])
    _check_typed_subgraph1(g, sg3)
    sg4 = g.edge_type_subgraph(['develops'])
    _check_typed_subgraph2(g, sg4)
    sg5 = g.edge_type_subgraph(['follows', 'plays', 'wishes'])
    _check_typed_subgraph1(g, sg5)

    # Test for restricted format
    if F._default_context_str != 'gpu':
        # TODO(minjie): enable this later
        for fmt in ['csr', 'csc', 'coo']:
            g = dgl.graph(([0, 1], [1, 2])).formats(fmt)
            sg = g.subgraph({g.ntypes[0]: [1, 0]})
            nids = F.asnumpy(sg.ndata[dgl.NID])
            assert np.array_equal(nids, np.array([1, 0]))
            src, dst = sg.edges(order='eid')
            src = F.asnumpy(src)
            dst = F.asnumpy(dst)
            assert np.array_equal(src, np.array([1]))
Beispiel #15
0
    def atom_dgl_multigraph(
        atoms=None,
        cutoff=8.0,
        max_neighbors=12,
        atom_features="cgcnn",
        enforce_undirected=False,
        max_attempts=3,
        include_prdf_angles=False,
        partial_rcut=4.0,
        id=None,
    ):
        """Obtain a DGLGraph for Atoms object."""
        dists = atoms.raw_distance_matrix

        def cos_formula(a, b, c):
            """Get angle between three edges for oblique triangles."""
            res = (a**2 + b**2 - c**2) / (2 * a * b)
            res = -1.0 if res < -1.0 else res
            res = 1.0 if res > 1.0 else res
            return np.arccos(res)

        def bond_to_bond_feats(nb):
            tmp = 0
            angles_tmp = []
            for ii, i in enumerate(nb):
                tmp = ii + 1
                if tmp > len(nb) - 1:
                    tmp = 0
                ang = 0
                try:
                    ang = cos_formula(i[2], nb[tmp][2], dists[i[1],
                                                              nb[tmp][1]])
                except Exception as exp:
                    # print("Setting angle zeros", id, exp)
                    pass
                angles_tmp.append(ang)
            return np.array(angles_tmp)

        if include_prdf_angles:
            (
                all_neighbors,
                prdf_arr,
                pangle_arr,
                pval,
                aval,
                nbor,
            ) = atoms.atomwise_angle_and_radial_distribution(r=cutoff)
            pval = np.fliplr(np.sort(pval))[:, 0:max_neighbors]
            aval = np.fliplr(np.sort(aval))[:, 0:max_neighbors]
        else:
            all_neighbors = atoms.get_all_neighbors(r=cutoff)
        # if a site has too few neighbors, increase the cutoff radius
        min_nbrs = min(len(neighborlist) for neighborlist in all_neighbors)
        # print('min_nbrs,max_neighbors=',min_nbrs,max_neighbors)

        attempt = 0
        while min_nbrs < max_neighbors:
            print("extending cutoff radius!", attempt, cutoff, id)
            lat = atoms.lattice
            r_cut = max(cutoff, lat.a, lat.b, lat.c)
            attempt += 1
            if attempt >= max_attempts:
                atoms = atoms.make_supercell([2, 2, 2])
                print(
                    "Making supercell, exceeded,attempts",
                    max_attempts,
                    "cutoff",
                    r_cut,
                    id,
                )
            cutoff = r_cut
            all_neighbors = atoms.get_all_neighbors(r=cutoff)
            min_nbrs = min(len(neighborlist) for neighborlist in all_neighbors)
            # return Graph.atom_dgl_multigraph(
            #    atoms, r_cut, max_neighbors, atom_features
            # )

        # build up edge list
        # Currently there's no guarantee that this creates undirected graphs
        # An undirected solution would build the full edge list where nodes are
        # keyed by (index,image), and ensure each edge has a complementary edge

        # indeed,JVASP-59628 is an example of a calculation where this produces
        # a graph where one site has no incident edges!

        # build an edge dictionary u -> v
        # so later we can run through the dictionary
        # and remove all pairs of edges
        # so what's left is the odd ones out
        edges = defaultdict(list)

        u, v, r, w, prdf, adf = [], [], [], [], [], []
        for site_idx, neighborlist in enumerate(all_neighbors):

            # sort on distance
            neighborlist = sorted(neighborlist, key=lambda x: x[2])

            ids = np.array([nbr[1] for nbr in neighborlist])
            distances = np.array([nbr[2] for nbr in neighborlist])
            c = np.array([nbr[3] for nbr in neighborlist])

            # find the distance to the k-th nearest neighbor
            max_dist = distances[max_neighbors - 1]

            # keep all edges out to the neighbor shell of the k-th neighbor
            ids = ids[distances <= max_dist]
            new_angles = bond_to_bond_feats(neighborlist)
            try:
                new_angles = new_angles[ids - 1]
            except Exception as exp:
                new_angles = np.zeros(len(ids))
                pass
            c = c[distances <= max_dist]
            distances = distances[distances <= max_dist]
            u.append([site_idx] * len(ids))
            v.append(ids)
            r.append(distances)
            w.append(new_angles)

            if include_prdf_angles:
                prdf.append(pval[site_idx])
                adf.append(aval[site_idx])
            # keep track of cell-resolved edges
            # to enforce undirected graph construction
            for dst, cell_id in zip(ids, c):
                u_key = f"{site_idx}-(0.0, 0.0, 0.0)"
                v_key = f"{dst}-{tuple(cell_id)}"
                edge_key = tuple(sorted((u_key, v_key)))
                edges[edge_key].append((site_idx, dst))

        if enforce_undirected:
            # add complementary edges to unpaired edges
            for edge_pair in edges.values():
                if len(edge_pair) == 1:
                    src, dst = edge_pair[0]
                    u.append(dst)  # swap the order!
                    v.append(src)
                    r.append(atoms.raw_distance_matrix[src, dst])

        u = np.hstack(u)
        v = np.hstack(v)
        r = np.hstack(r)
        w = np.hstack(w)
        u = torch.tensor(u)
        v = torch.tensor(v)
        w = torch.tensor(w)
        if include_prdf_angles:
            prdf = np.array(prdf)
            adf = np.array(adf)
            prdf = np.hstack(prdf)
            adf = np.cos(np.hstack(adf))
            if len(r) != len(prdf):
                prdf = np.append(prdf, np.zeros(len(r) - len(prdf)))
            if len(r) != len(adf):
                adf = np.append(adf, np.zeros(len(r) - len(adf)))
            prdf = torch.tensor(np.array(np.hstack(prdf))).type(
                torch.get_default_dtype())
            adf = torch.tensor(np.array(np.hstack(adf))).type(
                torch.get_default_dtype())

        r = torch.tensor(np.array(r)).type(torch.get_default_dtype())
        w = torch.tensor(np.array(w)).type(torch.get_default_dtype())
        # build up atom attribute tensor
        species = atoms.elements
        sps_features = []
        for ii, s in enumerate(species):
            feat = list(get_node_attributes(s, atom_features=atom_features))
            # if include_prdf_angles:
            #    feat=feat+list(prdf[ii])+list(adf[ii])
            sps_features.append(feat)
        sps_features = np.array(sps_features)
        node_features = torch.tensor(sps_features).type(
            torch.get_default_dtype())

        g = dgl.graph((u, v))
        g.ndata["atom_features"] = node_features
        g.edata["bondlength"] = r
        g.edata["bondangle"] = w
        if include_prdf_angles:
            g.edata["partial_distance"] = prdf
            g.edata["partial_angle"] = adf

        return g
Beispiel #16
0
def test_to_simple(index_dtype):
    # homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]])
    sg, wb = dgl.to_simple(g, writeback_mapping=True)
    u, v = g.all_edges(form='uv', order='eid')
    u = F.asnumpy(u).tolist()
    v = F.asnumpy(v).tolist()
    uv = list(zip(u, v))
    eid_map = F.asnumpy(wb)

    su, sv = sg.all_edges(form='uv', order='eid')
    su = F.asnumpy(su).tolist()
    sv = F.asnumpy(sv).tolist()
    suv = list(zip(su, sv))
    sc = F.asnumpy(sg.edata['count'])
    assert set(uv) == set(suv)
    for i, e in enumerate(suv):
        assert sc[i] == sum(e == _e for _e in uv)
    for i, e in enumerate(uv):
        assert eid_map[i] == suv.index(e)
    # shared ndata
    assert F.array_equal(sg.ndata['h'], g.ndata['h'])
    assert 'h' not in sg.edata
    # new ndata to sg
    sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]])
    assert 'hh' not in g.ndata

    sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False)
    assert 'h' not in sg.ndata
    assert 'h' not in sg.edata

    # heterogeneous graph
    g = dgl.heterograph({
        ('user', 'follow', 'user'): ([0, 1, 2, 1, 1, 1],
                                     [1, 3, 2, 3, 4, 4]),
        ('user', 'plays', 'game'): ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3])},
        index_dtype=index_dtype)
    g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4])
    g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5])
    sg, wb = dgl.to_simple(g, return_counts='weights', writeback_mapping=True, copy_edata=True)
    g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5])

    for etype in g.canonical_etypes:
        u, v = g.all_edges(form='uv', order='eid', etype=etype)
        u = F.asnumpy(u).tolist()
        v = F.asnumpy(v).tolist()
        uv = list(zip(u, v))
        eid_map = F.asnumpy(wb[etype])

        su, sv = sg.all_edges(form='uv', order='eid', etype=etype)
        su = F.asnumpy(su).tolist()
        sv = F.asnumpy(sv).tolist()
        suv = list(zip(su, sv))
        sw = F.asnumpy(sg.edges[etype].data['weights'])

        assert set(uv) == set(suv)
        for i, e in enumerate(suv):
            assert sw[i] == sum(e == _e for _e in uv)
        for i, e in enumerate(uv):
            assert eid_map[i] == suv.index(e)
    # shared ndata
    assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'])
    assert F.array_equal(sg.nodes['user'].data['hh'], g.nodes['user'].data['hh'])
    assert 'h' not in sg.nodes['game'].data
    # new ndata to sg
    sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4])
    assert 'hhh' not in g.nodes['user'].data
    # share edata
    feat_idx = F.asnumpy(wb[('user', 'follow', 'user')])
    _, indices = np.unique(feat_idx, return_index=True)
    assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']),
                          F.asnumpy(g.edges['follow'].data['h'])[indices])

    sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype)
    assert 'h' not in sg.nodes['user'].data
    assert 'hh' not in sg.nodes['user'].data
Beispiel #17
0
def test_to_bidirected():
    # homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)
    u, v = g.edges()
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(g.ndata['h'], bg.ndata['h'])
    assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0), bg.edata['h'])
    bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]])
    assert ('hh' in g.ndata) is False
    bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]])
    assert ('hh' in g.edata) is False

    # donot share ndata and edata
    bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False)
    ub, vb = bg.edges()
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert ('h' in bg.ndata) is False
    assert ('h' in bg.edata) is False

    # zero edge graph
    g = dgl.graph([])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True)

    # heterogeneous graph
    g = dgl.heterograph({
        ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0])),
        ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])),
        ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0]))
    })
    g.nodes['game'].data['hv'] = F.ones((3, 1))
    g.nodes['user'].data['hv'] = F.ones((3, 1))
    g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True)
    assert F.array_equal(g.nodes['game'].data['hv'], bg.nodes['game'].data['hv'])
    assert F.array_equal(g.nodes['user'].data['hv'], bg.nodes['user'].data['hv'])
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    assert F.array_equal(F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0),
                         bg.edges['wins'].data['h'])
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0

    # donot share ndata and edata
    bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True)
    assert len(bg.edges['wins'].data) == 0
    assert len(bg.edges['plays'].data) == 0
    assert len(bg.edges['follows'].data) == 0
    assert len(bg.nodes['game'].data) == 0
    assert len(bg.nodes['user'].data) == 0
    u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user'))
    assert F.array_equal(F.cat([u, v], dim=0), ub)
    assert F.array_equal(F.cat([v, u], dim=0), vb)
    u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game'))
    ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game'))
    assert F.array_equal(u, ub)
    assert F.array_equal(v, vb)
Beispiel #18
0
def test_remove_edges(index_dtype):
    def check(g1, etype, g, edges_removed):
        src, dst, eid = g.edges(etype=etype, form='all')
        src1, dst1 = g1.edges(etype=etype, order='eid')
        if etype is not None:
            eid1 = g1.edges[etype].data[dgl.EID]
        else:
            eid1 = g1.edata[dgl.EID]
        src1 = F.asnumpy(src1)
        dst1 = F.asnumpy(dst1)
        eid1 = F.asnumpy(eid1)
        src = F.asnumpy(src)
        dst = F.asnumpy(dst)
        eid = F.asnumpy(eid)
        sde_set = set(zip(src, dst, eid))

        for s, d, e in zip(src1, dst1, eid1):
            assert (s, d, e) in sde_set
        assert not np.isin(edges_removed, eid1).any()
        assert g1.idtype == g.idtype

    for fmt in ['coo', 'csr', 'csc']:
        for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]:
            g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)],
                          restrict_format=fmt,
                          index_dtype=index_dtype)
            g1 = dgl.remove_edges(
                g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
            check(g1, None, g, edges_to_remove)

            g = dgl.graph(spsp.csr_matrix(
                ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)),
                          restrict_format=fmt,
                          index_dtype=index_dtype)
            g1 = dgl.remove_edges(
                g, F.tensor(edges_to_remove, getattr(F, index_dtype)))
            check(g1, None, g, edges_to_remove)

    g = dgl.heterograph(
        {
            ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)],
            ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)],
            ('B', 'BA', 'A'): [(2, 3), (3, 2)]
        },
        index_dtype=index_dtype)
    g2 = dgl.remove_edges(
        g, {
            'AA': F.tensor([2], getattr(F, index_dtype)),
            'AB': F.tensor([3], getattr(F, index_dtype)),
            'BA': F.tensor([1], getattr(F, index_dtype))
        })
    check(g2, 'AA', g, [2])
    check(g2, 'AB', g, [3])
    check(g2, 'BA', g, [1])

    g3 = dgl.remove_edges(
        g, {
            'AA': F.tensor([], getattr(F, index_dtype)),
            'AB': F.tensor([3], getattr(F, index_dtype)),
            'BA': F.tensor([1], getattr(F, index_dtype))
        })
    check(g3, 'AA', g, [])
    check(g3, 'AB', g, [3])
    check(g3, 'BA', g, [1])

    g4 = dgl.remove_edges(
        g, {'AB': F.tensor([3, 1, 2, 0], getattr(F, index_dtype))})
    check(g4, 'AA', g, [])
    check(g4, 'AB', g, [3, 1, 2, 0])
    check(g4, 'BA', g, [])
Beispiel #19
0
def test_local_scope(index_dtype):
    g = dgl.graph([(0, 1), (1, 2), (2, 3), (3, 4)], index_dtype=index_dtype)
    g.ndata['h'] = F.zeros((g.number_of_nodes(), 3))
    g.edata['w'] = F.zeros((g.number_of_edges(), 4))

    # test override
    def foo(g):
        with g.local_scope():
            g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
            g.edata['w'] = F.ones((g.number_of_edges(), 4))

    foo(g)
    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))

    # test out-place update
    def foo(g):
        with g.local_scope():
            g.nodes[[2, 3]].data['h'] = F.ones((2, 3))
            g.edges[[2, 3]].data['w'] = F.ones((2, 4))

    foo(g)
    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))

    # test out-place update 2
    def foo(g):
        with g.local_scope():
            g.apply_nodes(lambda nodes: {'h': nodes.data['h'] + 10}, [2, 3])
            g.apply_edges(lambda edges: {'w': edges.data['w'] + 10}, [2, 3])

    foo(g)
    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))

    # test auto-pop
    def foo(g):
        with g.local_scope():
            g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
            g.edata['ww'] = F.ones((g.number_of_edges(), 4))

    foo(g)
    assert 'hh' not in g.ndata
    assert 'ww' not in g.edata

    # test nested scope
    def foo(g):
        with g.local_scope():
            g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
            g.edata['ww'] = F.ones((g.number_of_edges(), 4))
            with g.local_scope():
                g.ndata['hhh'] = F.ones((g.number_of_nodes(), 3))
                g.edata['www'] = F.ones((g.number_of_edges(), 4))
            assert 'hhh' not in g.ndata
            assert 'www' not in g.edata

    foo(g)
    assert 'hh' not in g.ndata
    assert 'ww' not in g.edata

    # test initializer1
    g = dgl.graph([(0, 1), (1, 1)], index_dtype=index_dtype)
    g.set_n_initializer(dgl.init.zero_initializer)

    def foo(g):
        with g.local_scope():
            g.nodes[0].data['h'] = F.ones((1, 1))
            assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))

    foo(g)

    # test initializer2
    def foo_e_initializer(shape, dtype, ctx, id_range):
        return F.ones(shape)

    g.set_e_initializer(foo_e_initializer, field='h')

    def foo(g):
        with g.local_scope():
            g.edges[0, 1].data['h'] = F.ones((1, 1))
            assert F.allclose(g.edata['h'], F.ones((2, 1)))
            g.edges[0, 1].data['w'] = F.ones((1, 1))
            assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))

    foo(g)
Beispiel #20
0
def atest_nx_conversion(index_dtype):
    # check conversion between networkx and DGLGraph

    def _check_nx_feature(nxg, nf, ef):
        # check node and edge feature of nxg
        # this is used to check to_networkx
        num_nodes = len(nxg)
        num_edges = nxg.size()
        if num_nodes > 0:
            node_feat = ddict(list)
            for nid, attr in nxg.nodes(data=True):
                assert len(attr) == len(nf)
                for k in nxg.nodes[nid]:
                    node_feat[k].append(F.unsqueeze(attr[k], 0))
            for k in node_feat:
                feat = F.cat(node_feat[k], 0)
                assert F.allclose(feat, nf[k])
        else:
            assert len(nf) == 0
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
                assert len(attr) == len(ef) + 1  # extra id
                eid = attr['id']
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
                feat = F.cat(edge_feat[k], 0)
                assert F.allclose(feat, ef[k])
        else:
            assert len(ef) == 0

    n1 = F.randn((5, 3))
    n2 = F.randn((5, 10))
    n3 = F.randn((5, 4))
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)], index_dtype=index_dtype)
    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
    g.edata.update({'e1': e1, 'e2': e2})

    # convert to networkx
    nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
    assert len(nxg) == 5
    assert nxg.size() == 4
    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
    g = dgl.graph(nxg,
                  node_attrs=['n1'],
                  edge_attrs=['e1', 'id'],
                  index_dtype=index_dtype)
    assert g._idtype_str == index_dtype
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    # test with existing dglgraph (so existing features should be cleared)
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # with id in nx edge feature, e1 should follow original order
    assert F.allclose(g.edata['e1'], e1)
    assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu()))

    # test conversion after modifying DGLGraph
    # TODO(minjie): enable after mutation is supported
    #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges
    #new_n = F.randn((2, 3))
    #new_e = F.randn((3, 5))
    #g.add_nodes(2, data={'n1': new_n})
    ## add three edges, one is a multi-edge
    #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
    #n1 = F.cat((n1, new_n), 0)
    #e1 = F.cat((e1, new_e), 0)
    ## convert to networkx again
    #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
    #assert len(nxg) == 7
    #assert nxg.size() == 7
    #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
        attr.pop('id')
    # test with a new graph
    g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1'])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
    # check number of features
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
    assert F.allclose(g.ndata['n1'], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
        edge_feat.append(F.unsqueeze(attr['e1'], 0))
    edge_feat = F.cat(edge_feat, 0)
    assert F.allclose(g.edata['e1'], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
        nxg.nodes[u]['h'] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
        d['h'] = F.tensor([u, v])

    g = dgl.DGLGraph()
    g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
    assert F.allclose(g.edata['h'],
                      F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
Beispiel #21
0
def test_send_multigraph(index_dtype):
    g = dgl.graph([(0, 1), (0, 1), (0, 1), (2, 1)], index_dtype=index_dtype)

    def _message_a(edges):
        return {'a': edges.data['a']}

    def _message_b(edges):
        return {'a': edges.data['a'] * 3}

    def _reduce(nodes):
        return {'a': F.max(nodes.mailbox['a'], 1)}

    def answer(*args):
        return F.max(F.stack(args, 0), 0)

    assert g.is_multigraph

    # send by eid
    old_repr = F.randn((4, 5))
    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send([0, 2], message_func=_message_a)
    g.recv(1, _reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2]))

    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send([0, 2, 3], message_func=_message_a)
    g.recv(1, _reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2],
                                          old_repr[3]))

    # send on multigraph
    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send(([0, 2], [1, 1]), _message_a)
    g.recv(1, _reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1], F.max(old_repr, 0))

    # consecutive send and send_on
    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send((2, 1), _message_a)
    g.send([0, 1], message_func=_message_b)
    g.recv(1, _reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1],
                      answer(old_repr[0] * 3, old_repr[1] * 3, old_repr[3]))

    # consecutive send_on
    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send(0, message_func=_message_a)
    g.send(1, message_func=_message_b)
    g.recv(1, _reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[1] * 3))

    # send_and_recv_on
    g.ndata['a'] = F.zeros((3, 5))
    g.edata['a'] = old_repr
    g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce)
    new_repr = g.ndata['a']
    assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2],
                                          old_repr[3]))
    assert F.allclose(new_repr[[0, 2]], F.zeros((2, 5)))
Beispiel #22
0
def create_graph(kg_data, n_nodes):
    g = dgl.graph((kg_data['t'], kg_data['h']))
    g.ndata['id'] = torch.arange(n_nodes, dtype=torch.long)
    g.edata['type'] = torch.LongTensor(kg_data['r'])
    return g
Beispiel #23
0
import dgl
import numpy as np
from mxnet import nd

g = dgl.graph(([0,0,1,5],[1,2,2,0]))
print('g:\n',g)

g.ndata['x'] = nd.ones((g.num_nodes(),3)) # 长度为3的特征
g.edata['x'] = nd.ones(g.num_edges(),dtype=np.int32)
print('g:',g)
# 不同的名字可以有不同的特征
g.ndata['y'] = nd.random.uniform(shape=(g.num_nodes(),5))
print('g:',g)

print('the feature of node 1 in x',g.ndata['x'][1])  # 获取节点1 的特征
print('\n the feature of edge 0 and 3 in x:',g.edata['x'][nd.array([0,3],dtype=np.int32)])

# 对于加权图,可以将权重存储为边缘特征
edges = nd.array([0,0,0,1],dtype=np.int), nd.array([1,2,3,3],dtype=np.int)
weights = nd.array([0.1, 0.6, 0.9, 0.7]) # 权重
g = dgl.graph(edges)
g.edata['w'] = weights  # w 代表权重特征
print('\n g with weight:',g)
Beispiel #24
0
    argparser.add_argument('--lr', type=float, default=0.003)
    argparser.add_argument('--dropout', type=float, default=0.5)
    argparser.add_argument(
        '--num-workers',
        type=int,
        default=0,
        help="Number of sampling processes. Use 0 for no extra process.")
    args = argparser.parse_args()

    if args.gpu >= 0:
        device = th.device('cuda:%d' % args.gpu)
    else:
        device = th.device('cpu')

    # load reddit data
    data = RedditDataset(self_loop=True)
    train_mask = data.train_mask
    val_mask = data.val_mask
    features = th.Tensor(data.features)
    in_feats = features.shape[1]
    labels = th.LongTensor(data.labels)
    n_classes = data.num_labels
    # Construct graph
    g = dgl.graph(data.graph.all_edges())
    g.ndata['features'] = features
    prepare_mp(g)
    # Pack data
    data = train_mask, val_mask, in_feats, labels, n_classes, g

    run(args, device, data)
Beispiel #25
0
sys.path.append(base_dir)

from graph_embeddings.plot_embedding import plot_embeddings

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.set_num_threads(2)

edges = pd.read_table('{}/data/Wiki_edgelist.txt'.format(base_dir), sep=' ')
nodes = pd.read_table('{}/data/wiki_labels.txt'.format(base_dir), sep=' ')

u = edges['src'].to_numpy()
v = edges['dst'].to_numpy()
labels = nodes['label'].to_numpy()

g = dgl.graph((u,v))
g.ndata['label'] = torch.tensor(labels)

num_node = g.num_nodes()
C = 5  # context window
simple_num = 10000
walks = dgl.sampling.random_walk(g, torch.randint(0, 2405, (simple_num, )), length=C * 2)
# 过滤掉-1的节点,-1表示找不到下一条边
walks = list(filter(lambda item: (item < 0).sum().item() == 0, walks[0]))
walks = np.array(list(map(lambda item: item.tolist(), walks)))
walks_train = np.delete(walks, C, axis=1).reshape(-1, C * 2)
walks_label = walks[:, C:C+1].reshape(-1, 1)

random.seed(1)
np.random.seed(1)
torch.manual_seed(1)
Beispiel #26
0
    (train_adj, train_fea) = sampler.randomedge_sampler(percent=args.sampling_percent, normalization=args.normalization,
                                                        cuda=args.cuda)
    if args.mixmode:
        train_adj = train_adj.cuda()

    sampling_t = time.time() - sampling_t

    (val_adj, val_fea) = sampler.get_test_set(normalization=args.normalization, cuda=args.cuda)
   
    # Construct feed data g
    if torch.cuda.is_available():
        train_edges = train_adj._indices().cpu().data
    else:
        train_edges = train_adj._indices().data
    train_edges = (train_edges[0], train_edges[1])
    train_g = dgl.graph(train_edges)
    train_g.ndata['features'] = train_fea
    prepare_mp(train_g)

    # Construct feed data g
    if torch.cuda.is_available():
        val_edges = val_adj._indices().cpu().data
    else:
        val_edges = val_adj._indices().data
    val_edges = (val_edges[0], val_edges[1])
    val_g = dgl.graph(val_edges)
    if sampler.dataset=='coauthor_phy':
        val_g.ndata['features'] = val_fea.cpu()
        idx_val = idx_val.cpu()
    else:
        val_g.ndata['features'] = val_fea
Beispiel #27
0
def test_compact(index_dtype):
    g1 = dgl.heterograph({
        ('user', 'follow', 'user'): [(1, 3), (3, 5)],
        ('user', 'plays', 'game'): [(2, 4), (3, 4), (2, 5)],
        ('game', 'wished-by', 'user'): [(6, 7), (5, 7)]},
        {'user': 20, 'game': 10}, index_dtype=index_dtype)

    g2 = dgl.heterograph({
        ('game', 'clicked-by', 'user'): [(3, 1)],
        ('user', 'likes', 'user'): [(1, 8), (8, 9)]},
        {'user': 20, 'game': 10}, index_dtype=index_dtype)

    g3 = dgl.graph([(0, 1), (1, 2)], num_nodes=10, ntype='user', index_dtype=index_dtype)
    g4 = dgl.graph([(1, 3), (3, 5)], num_nodes=10, ntype='user', index_dtype=index_dtype)

    def _check(g, new_g, induced_nodes):
        assert g.ntypes == new_g.ntypes
        assert g.canonical_etypes == new_g.canonical_etypes

        for ntype in g.ntypes:
            assert -1 not in induced_nodes[ntype]

        for etype in g.canonical_etypes:
            g_src, g_dst = g.all_edges(order='eid', etype=etype)
            g_src = F.asnumpy(g_src)
            g_dst = F.asnumpy(g_dst)
            new_g_src, new_g_dst = new_g.all_edges(order='eid', etype=etype)
            new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)]
            new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)]
            assert (g_src == new_g_src_mapped).all()
            assert (g_dst == new_g_dst_mapped).all()

    # Test default
    new_g1 = dgl.compact_graphs(g1)
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a dict
    new_g1 = dgl.compact_graphs(
        g1, always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))})
    assert new_g1._idtype_str == index_dtype
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7])
    assert set(induced_nodes['game']) == set([4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)

    # Test with always_preserve given a tensor
    new_g3 = dgl.compact_graphs(
        g3, always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype)))
    induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}

    assert new_g3._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([0, 1, 2, 7])
    _check(g3, new_g3, induced_nodes)

    # Test multiple graphs
    new_g1, new_g2 = dgl.compact_graphs([g1, g2])
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a dict
    new_g1, new_g2 = dgl.compact_graphs(
        [g1, g2], always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))})
    induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}
    assert new_g1._idtype_str == index_dtype
    assert new_g2._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9])
    assert set(induced_nodes['game']) == set([3, 4, 5, 6, 7])
    _check(g1, new_g1, induced_nodes)
    _check(g2, new_g2, induced_nodes)

    # Test multiple graphs with always_preserve given a tensor
    new_g3, new_g4 = dgl.compact_graphs(
        [g3, g4], always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype)))
    induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes}
    induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()}

    assert new_g3._idtype_str == index_dtype
    assert new_g4._idtype_str == index_dtype
    assert set(induced_nodes['user']) == set([0, 1, 2, 3, 5, 7])
    _check(g3, new_g3, induced_nodes)
    _check(g4, new_g4, induced_nodes)
Beispiel #28
0
def test_node_dataloader(sampler_name):
    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())
    g1.ndata['label'] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu())

    for load_input, load_output in [(None, None), ({'feat': g1.ndata['feat']}, {'label': g1.ndata['label']})]:
        for async_load in [False, True]:
            for num_workers in [0, 1, 2]:
                sampler = {
                    'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
                    'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]),
                    'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3]),
                    'shadow': dgl.dataloading.ShaDowKHopSampler([3, 3])}[sampler_name]
                dataloader = dgl.dataloading.NodeDataLoader(
                    g1, g1.nodes(), sampler, device=F.ctx(),
                    load_input=load_input,
                    load_output=load_output,
                    async_load=async_load,
                    batch_size=g1.num_nodes(),
                    num_workers=num_workers)
                for input_nodes, output_nodes, blocks in dataloader:
                    _check_device(input_nodes)
                    _check_device(output_nodes)
                    _check_device(blocks)
                    if load_input:
                        _check_device(blocks[0].srcdata['feat'])
                        OPS.copy_u_sum(blocks[0], blocks[0].srcdata['feat'])
                    if load_output:
                        _check_device(blocks[-1].dstdata['label'])
                        OPS.copy_u_sum(blocks[-1], blocks[-1].dstdata['label'])

    g2 = dgl.heterograph({
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
    })
    for ntype in g2.ntypes:
        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
    batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes)
    sampler = {
        'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
        'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2),
        'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3]),
        'shadow': dgl.dataloading.ShaDowKHopSampler([{etype: 3 for etype in g2.etypes}] * 2)}[sampler_name]

    for async_load in [False, True]:
        dataloader = dgl.dataloading.NodeDataLoader(
            g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
            sampler, device=F.ctx(), async_load=async_load, batch_size=batch_size)
        assert isinstance(iter(dataloader), Iterator)
        for input_nodes, output_nodes, blocks in dataloader:
            _check_device(input_nodes)
            _check_device(output_nodes)
            _check_device(blocks)

    status = False
    try:
        dgl.dataloading.NodeDataLoader(
            g2, {nty: g2.nodes(nty) for nty in g2.ntypes},
            sampler, device=F.ctx(), load_input={'feat': g1.ndata['feat']}, batch_size=batch_size)
    except dgl.DGLError:
        status = True
    assert status
Beispiel #29
0
def test_reverse():
    g = dgl.DGLGraph()
    g.add_nodes(5)
    # The graph need not to be completely connected.
    g.add_edges([0, 1, 2], [1, 2, 1])
    g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]])
    g.edata['h'] = F.tensor([[5.], [6.], [7.]])
    rg = g.reverse()

    assert g.is_multigraph == rg.is_multigraph

    assert g.number_of_nodes() == rg.number_of_nodes()
    assert g.number_of_edges() == rg.number_of_edges()
    assert F.allclose(F.astype(rg.has_edges_between(
        [1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,)))
    assert g.edge_id(0, 1) == rg.edge_id(1, 0)
    assert g.edge_id(1, 2) == rg.edge_id(2, 1)
    assert g.edge_id(2, 1) == rg.edge_id(1, 2)

    # test dgl.reverse_heterograph
    # test homogeneous graph
    g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0])))
    g.ndata['h'] = F.tensor([[0.], [1.], [2.]])
    g.edata['h'] = F.tensor([[3.], [4.], [5.]])
    g_r = dgl.reverse_heterograph(g)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    u_g, v_g, eids_g = g.all_edges(form='all')
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all')
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    assert F.array_equal(g.ndata['h'], g_r.ndata['h'])
    assert len(g_r.edata) == 0

    # without share ndata
    g_r = dgl.reverse_heterograph(g, copy_ndata=False)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    assert len(g_r.ndata) == 0
    assert len(g_r.edata) == 0

    # with share ndata and edata
    g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True)
    assert g.number_of_nodes() == g_r.number_of_nodes()
    assert g.number_of_edges() == g_r.number_of_edges()
    assert F.array_equal(g.ndata['h'], g_r.ndata['h'])
    assert F.array_equal(g.edata['h'], g_r.edata['h'])

    # add new node feature to g_r
    g_r.ndata['hh'] = F.tensor([0, 1, 2])
    assert ('hh' in g.ndata) is False
    assert ('hh' in g_r.ndata) is True

    # add new edge feature to g_r
    g_r.edata['hh'] = F.tensor([0, 1, 2])
    assert ('hh' in g.edata) is False
    assert ('hh' in g_r.edata) is True

    # test heterogeneous graph
    g = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1, 2, 4, 3 ,1, 3], [1, 2, 3, 2, 0, 0, 1]),
        ('user', 'plays', 'game'): ([0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0]),
        ('developer', 'develops', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])})
    g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4])
    g.nodes['user'].data['hh'] = F.tensor([1, 1, 1, 1, 1])
    g.nodes['game'].data['h'] = F.tensor([0, 1])
    g.edges['follows'].data['h'] = F.tensor([0, 1, 2, 4, 3 ,1, 3])
    g.edges['follows'].data['hh'] = F.tensor([1, 2, 3, 2, 0, 0, 1])
    g_r = dgl.reverse_heterograph(g)

    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype)
    assert F.array_equal(g.nodes['user'].data['h'], g_r.nodes['user'].data['h'])
    assert F.array_equal(g.nodes['user'].data['hh'], g_r.nodes['user'].data['hh'])
    assert F.array_equal(g.nodes['game'].data['h'], g_r.nodes['game'].data['h'])
    assert len(g_r.edges['follows'].data) == 0
    u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'follows', 'user'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('user', 'follows', 'user'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'plays', 'game'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'plays', 'user'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)
    u_g, v_g, eids_g = g.all_edges(form='all', etype=('developer', 'develops', 'game'))
    u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'develops', 'developer'))
    assert F.array_equal(u_g, v_rg)
    assert F.array_equal(v_g, u_rg)
    assert F.array_equal(eids_g, eids_rg)

    # withour share ndata
    g_r = dgl.reverse_heterograph(g, copy_ndata=False)
    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    for ntype in g.ntypes:
        assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype)
    assert len(g_r.nodes['user'].data) == 0
    assert len(g_r.nodes['game'].data) == 0

    g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True)
    print(g_r)
    for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes):
        assert etype_g[0] == etype_gr[2]
        assert etype_g[1] == etype_gr[1]
        assert etype_g[2] == etype_gr[0]
        assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr)
    assert F.array_equal(g.edges['follows'].data['h'], g_r.edges['follows'].data['h'])
    assert F.array_equal(g.edges['follows'].data['hh'], g_r.edges['follows'].data['hh'])

    # add new node feature to g_r
    g_r.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4])
    assert ('hhh' in g.nodes['user'].data) is False
    assert ('hhh' in g_r.nodes['user'].data) is True

    # add new edge feature to g_r
    g_r.edges['follows'].data['hhh'] = F.tensor([1, 2, 3, 2, 0, 0, 1])
    assert ('hhh' in g.edges['follows'].data) is False
    assert ('hhh' in g_r.edges['follows'].data) is True
Beispiel #30
0
def test_edge_dataloader(sampler_name):
    neg_sampler = dgl.dataloading.negative_sampler.Uniform(2)

    g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4]))
    g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu())

    sampler = {
        'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
        'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]),
        'shadow': dgl.dataloading.ShaDowKHopSampler([3, 3])}[sampler_name]

    # no negative sampler
    dataloader = dgl.dataloading.EdgeDataLoader(
        g1, g1.edges(form='eid'), sampler, device=F.ctx(), batch_size=g1.num_edges())
    for input_nodes, pos_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(blocks)

    # negative sampler
    dataloader = dgl.dataloading.EdgeDataLoader(
        g1, g1.edges(form='eid'), sampler, device=F.ctx(),
        negative_sampler=neg_sampler, batch_size=g1.num_edges())
    for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(neg_pair_graph)
        _check_device(blocks)

    g2 = dgl.heterograph({
         ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]),
         ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]),
         ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]),
         ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5])
    })
    for ntype in g2.ntypes:
        g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu())
    batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes)
    sampler = {
        'full': dgl.dataloading.MultiLayerFullNeighborSampler(2),
        'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2),
        'shadow': dgl.dataloading.ShaDowKHopSampler([{etype: 3 for etype in g2.etypes}] * 2)}[sampler_name]

    # no negative sampler
    dataloader = dgl.dataloading.EdgeDataLoader(
        g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
        sampler, device=F.ctx(), batch_size=batch_size)
    for input_nodes, pos_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(blocks)

    # negative sampler
    dataloader = dgl.dataloading.EdgeDataLoader(
        g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes},
        sampler, device=F.ctx(), negative_sampler=neg_sampler,
        batch_size=batch_size)

    assert isinstance(iter(dataloader), Iterator)
    for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader:
        _check_device(input_nodes)
        _check_device(pos_pair_graph)
        _check_device(neg_pair_graph)
        _check_device(blocks)