def test_isolated_nodes(idtype): g = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=F.ctx()) assert g.number_of_nodes() == 5 g = dgl.heterograph({('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])}, { 'user': 5, 'game': 7 }, idtype=idtype, device=F.ctx()) assert g.idtype == idtype assert g.number_of_nodes('user') == 5 assert g.number_of_nodes('game') == 7 # Test backward compatibility g = dgl.heterograph({('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])}, { 'user': 5, 'game': 7 }, idtype=idtype, device=F.ctx()) assert g.idtype == idtype assert g.number_of_nodes('user') == 5 assert g.number_of_nodes('game') == 7
def test_pinsage_sampling(): def _test_sampler(g, sampler, ntype): neighbor_g = sampler(F.tensor([0, 2], dtype=F.int64)) assert neighbor_g.ntypes == [ntype] u, v = neighbor_g.all_edges(form='uv', order='eid') uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist())) assert (1, 0) in uv or (0, 0) in uv assert (2, 2) in uv or (3, 2) in uv g = dgl.heterograph({ ('item', 'bought-by', 'user'): [(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2), (3, 3)], ('user', 'bought', 'item'): [(0, 0), (1, 0), (0, 1), (1, 1), (2, 2), (3, 2), (2, 3), (3, 3)] }) sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2) _test_sampler(g, sampler, 'item') sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['bought-by', 'bought']) _test_sampler(g, sampler, 'item') sampler = dgl.sampling.RandomWalkNeighborSampler( g, 4, 0.5, 3, 2, [('item', 'bought-by', 'user'), ('user', 'bought', 'item')]) _test_sampler(g, sampler, 'item') g = dgl.graph([(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2), (3, 3)]) sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2) _test_sampler(g, sampler, g.ntypes[0]) g = dgl.heterograph({ ('A', 'AB', 'B'): [(0, 1), (2, 3)], ('B', 'BC', 'C'): [(1, 2), (3, 1)], ('C', 'CA', 'A'): [(2, 0), (1, 2)] }) sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['AB', 'BC', 'CA']) _test_sampler(g, sampler, 'A')
def test_group_apply_edges(): def edge_udf(edges): h = F.sum(edges.data['feat'] * (edges.src['h'] + edges.dst['h']), dim=2) normalized_feat = F.softmax(h, dim=1) return {"norm_feat": normalized_feat} elist = [] for v in [1, 2, 3, 4, 5, 6, 7, 8]: elist.append((0, v)) for v in [2, 3, 4, 6, 7, 8]: elist.append((1, v)) for v in [2, 3, 4, 5, 6, 7, 8]: elist.append((2, v)) g = dgl.graph(elist) g.ndata['h'] = F.randn((g.number_of_nodes(), D)) g.edata['feat'] = F.randn((g.number_of_edges(), D)) def _test(group_by): g.group_apply_edges(group_by=group_by, func=edge_udf) if group_by == 'src': u, v, eid = g.out_edges(1, form='all') else: u, v, eid = g.in_edges(5, form='all') out_feat = g.edges[eid].data['norm_feat'] result = (g.nodes[u].data['h'] + g.nodes[v].data['h']) * g.edges[eid].data['feat'] result = F.softmax(F.sum(result, dim=1), dim=0) assert F.allclose(out_feat, result) # test group by source nodes _test('src') # test group by destination nodes _test('dst')
def test_agnn_conv(): g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) ctx = F.ctx() agnn_conv = nn.AGNNConv(0.1, True) agnn_conv.initialize(ctx=ctx) print(agnn_conv) # test#1: basic feat = F.randn((20, 10)) h = agnn_conv(g, feat) assert h.shape == (20, 10) g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) feat = (F.randn((100, 5)), F.randn((200, 5))) h = agnn_conv(g, feat) assert h.shape == (200, 5) g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = np.unique(g.edges()[1].asnumpy()) block = dgl.to_block(g, seed_nodes) feat = F.randn((block.number_of_src_nodes(), 5)) h = agnn_conv(block, feat) assert h.shape == (block.number_of_dst_nodes(), 5)
def test_neighbor_nonuniform(num_workers): g = dgl.graph(([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1])) g.edata['p'] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0]) sampler = dgl.dataloading.MultiLayerNeighborSampler([2], prob='p') dataloader = dgl.dataloading.NodeDataLoader(g, [0, 1], sampler, batch_size=1, device=F.ctx()) for input_nodes, output_nodes, blocks in dataloader: seed = output_nodes.item() neighbors = set(input_nodes[1:].cpu().numpy()) if seed == 1: assert neighbors == {5, 6} elif seed == 0: assert neighbors == {1, 2} g = dgl.heterograph({ ('B', 'BA', 'A'): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), ('C', 'CA', 'A'): ([1, 2, 3, 4, 5, 6, 7, 8], [0, 0, 0, 0, 1, 1, 1, 1]), }) g.edges['BA'].data['p'] = torch.FloatTensor([1, 1, 0, 0, 1, 1, 0, 0]) g.edges['CA'].data['p'] = torch.FloatTensor([0, 0, 1, 1, 0, 0, 1, 1]) sampler = dgl.dataloading.MultiLayerNeighborSampler([2], prob='p') dataloader = dgl.dataloading.NodeDataLoader( g, {'A': [0, 1]}, sampler, batch_size=1, device=F.ctx()) for input_nodes, output_nodes, blocks in dataloader: seed = output_nodes['A'].item() # Seed and neighbors are of different node types so slicing is not necessary here. neighbors = set(input_nodes['B'].cpu().numpy()) if seed == 1: assert neighbors == {5, 6} elif seed == 0: assert neighbors == {1, 2} neighbors = set(input_nodes['C'].cpu().numpy()) if seed == 1: assert neighbors == {7, 8} elif seed == 0: assert neighbors == {3, 4}
def generate_graph(idtype=F.int64, grad=False): ''' s, d, eid 0, 1, 0 1, 9, 1 0, 2, 2 2, 9, 3 0, 3, 4 3, 9, 5 0, 4, 6 4, 9, 7 0, 5, 8 5, 9, 9 0, 6, 10 6, 9, 11 0, 7, 12 7, 9, 13 0, 8, 14 8, 9, 15 9, 0, 16 ''' u = F.tensor([0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 9]) v = F.tensor([1, 9, 2, 9, 3, 9, 4, 9, 5, 9, 6, 9, 7, 9, 8, 9, 0]) g = dgl.graph((u, v), idtype=idtype) assert g.device == F.ctx() ncol = F.randn((10, D)) ecol = F.randn((17, D)) if grad: ncol = F.attach_grad(ncol) ecol = F.attach_grad(ecol) g.ndata['h'] = ncol g.edata['w'] = ecol g.set_n_initializer(dgl.init.zero_initializer) g.set_e_initializer(dgl.init.zero_initializer) return g
def test_sage_conv(aggre_type): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) dst_dim = 5 if aggre_type != 'gcn' else 10 sage = nn.SAGEConv((10, dst_dim), 2, aggre_type) feat = (F.randn((100, 10)), F.randn((200, dst_dim))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 200
def _create_ckg_graph(self, form='dgl', show_relation=False): user_num = self.user_num kg_tensor = self._dataframe_to_interaction(self.kg_feat) inter_tensor = self._dataframe_to_interaction(self.inter_feat) head_entity = kg_tensor[self.head_entity_field] + user_num tail_entity = kg_tensor[self.tail_entity_field] + user_num user = inter_tensor[self.uid_field] item = inter_tensor[self.iid_field] + user_num src = torch.cat([user, item, head_entity]) tgt = torch.cat([item, user, tail_entity]) if show_relation: ui_rel_num = user.shape[0] ui_rel_id = self.relation_num - 1 assert self.field2id_token[self.relation_field][ui_rel_id] == '[UI-Relation]' kg_rel = kg_tensor[self.relation_field] ui_rel = torch.full((2 * ui_rel_num,), ui_rel_id, dtype=kg_rel.dtype) edge = torch.cat([ui_rel, kg_rel]) if form == 'dgl': import dgl graph = dgl.graph((src, tgt)) if show_relation: graph.edata[self.relation_field] = edge return graph elif form == 'pyg': from torch_geometric.data import Data edge_attr = edge if show_relation else None graph = Data(edge_index=torch.stack([src, tgt]), edge_attr=edge_attr) return graph else: raise NotImplementedError('graph format [{}] has not been implemented.'.format(form))
def load_reddit(args): data = RedditDataset(self_loop = False) train_mask = data.train_mask test_mask = data.test_mask val_mask = data.val_mask features = torch.Tensor(data.features) in_feats = features.shape[1] labels = torch.LongTensor(data.labels) n_classes = data.num_labels # Construct graph g = dgl.graph(data.graph.all_edges()) g.ndata['features'] = features prepare_mp(g) train_nid = torch.LongTensor(np.nonzero(train_mask)[0]) val_nid = torch.LongTensor(np.nonzero(val_mask)[0]) train_mask = torch.BoolTensor(train_mask).cuda() test_mask = torch.BoolTensor(test_mask).cuda() val_mask = torch.BoolTensor(val_mask).cuda() return g, features, labels, train_mask, val_mask, test_mask, train_nid
def forward(self, pos, centroids, feat=None): dev = pos.device group_idx = self.frnn(pos, centroids) B, N, _ = pos.shape glist = [] for i in range(B): center = torch.zeros((N)).to(dev) center[centroids[i]] = 1 src = group_idx[i].contiguous().view(-1) dst = centroids[i].view(-1, 1).repeat(1, self.n_neighbor).view(-1) unified = torch.cat([src, dst]) uniq, inv_idx = torch.unique(unified, return_inverse=True) src_idx = inv_idx[:src.shape[0]] dst_idx = inv_idx[src.shape[0]:] g = dgl.graph((src_idx, dst_idx)) g.ndata['pos'] = pos[i][uniq] g.ndata['center'] = center[uniq] if feat is not None: g.ndata['feat'] = feat[i][uniq] glist.append(g) bg = dgl.batch(glist) return bg
def split_train_valid_test(g): u, v = g.edges() eids = np.arange(g.number_of_edges()) eids = np.random.permutation(eids) valid_size = int(len(eids) * 0.1) test_size = int(len(eids) * 0.1) train_size = g.number_of_edges() - test_size - valid_size test_pos_u, test_pos_v = u[eids[:test_size]], v[eids[:test_size]] valid_pos_u, valid_pos_v = u[eids[test_size:test_size+valid_size]], v[eids[test_size:test_size+valid_size]] train_pos_u, train_pos_v = u[eids[test_size+valid_size:]], v[eids[test_size+valid_size:]] # Find all negative edges and split them for training and testing adj = sp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy()))) adj_neg = 1 - adj.todense() - np.eye(g.number_of_nodes()) neg_u, neg_v = np.where(adj_neg != 0) neg_eids = np.random.choice(len(neg_u), g.number_of_edges()) test_neg_u, test_neg_v = neg_u[neg_eids[:test_size]], neg_v[neg_eids[:test_size]] valid_neg_u, valid_neg_v = neg_u[neg_eids[test_size:test_size+valid_size]], neg_v[neg_eids[test_size:test_size+valid_size]] train_neg_u, train_neg_v = neg_u[neg_eids[test_size+valid_size:]], neg_v[neg_eids[test_size+valid_size:]] train_g = dgl.remove_edges(g, eids[:test_size+valid_size]) train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=g.number_of_nodes()) train_neg_g = dgl.graph((train_neg_u, train_neg_v), num_nodes=g.number_of_nodes()) valid_pos_g = dgl.graph((valid_pos_u, valid_pos_v), num_nodes=g.number_of_nodes()) valid_neg_g = dgl.graph((valid_neg_u, valid_neg_v), num_nodes=g.number_of_nodes()) test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=g.number_of_nodes()) test_neg_g = dgl.graph((test_neg_u, test_neg_v), num_nodes=g.number_of_nodes()) return train_g, train_pos_g, train_neg_g, valid_pos_g, valid_neg_g, test_pos_g, test_neg_g
def mol_to_nearest_neighbor_graph(mol, coordinates, neighbor_cutoff, max_num_neighbors=None, p_distance=2, add_self_loop=False, node_featurizer=None, edge_featurizer=None, canonical_atom_order=True, keep_dists=False, dist_field='dist', explicit_hydrogens=False, num_virtual_nodes=0): """Convert an RDKit molecule into a nearest neighbor graph and featurize for it. Different from bigraph and complete graph, the nearest neighbor graph may not be symmetric since i is the closest neighbor of j does not necessarily suggest the other way. Parameters ---------- mol : rdkit.Chem.rdchem.Mol RDKit molecule holder coordinates : numpy.ndarray of shape (N, D) The coordinates of atoms in the molecule. N for the number of atoms and D for the dimensions of the coordinates. neighbor_cutoff : float If the distance between a pair of nodes is larger than neighbor_cutoff, they will not be considered as neighboring nodes. max_num_neighbors : int or None. If not None, then this specifies the maximum number of neighbors allowed for each atom. Default to None. p_distance : int We compute the distance between neighbors using Minkowski (:math:`l_p`) distance. When ``p_distance = 1``, Minkowski distance is equivalent to Manhattan distance. When ``p_distance = 2``, Minkowski distance is equivalent to the standard Euclidean distance. Default to 2. add_self_loop : bool Whether to add self loops in DGLGraphs. Default to False. node_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict Featurization for nodes like atoms in a molecule, which can be used to update ndata for a DGLGraph. Default to None. edge_featurizer : callable, rdkit.Chem.rdchem.Mol -> dict Featurization for edges like bonds in a molecule, which can be used to update edata for a DGLGraph. Default to None. canonical_atom_order : bool Whether to use a canonical order of atoms returned by RDKit. Setting it to true might change the order of atoms in the graph constructed. Default to True. keep_dists : bool Whether to store the distance between neighboring atoms in ``edata`` of the constructed DGLGraphs. Default to False. dist_field : str Field for storing distance between neighboring atoms in ``edata``. This comes into effect only when ``keep_dists=True``. Default to ``'dist'``. explicit_hydrogens : bool Whether to explicitly represent hydrogens as nodes in the graph. If True, it will call rdkit.Chem.AddHs(mol). Default to False. num_virtual_nodes : int The number of virtual nodes to add. The virtual nodes will be connected to all real nodes with virtual edges. If the returned graph has any node/edge feature, an additional column of binary values will be used for each feature to indicate the identity of virtual node/edges. The features of the virtual nodes/edges will be zero vectors except for the additional column. Default to 0. Returns ------- DGLGraph or None Nearest neighbor DGLGraph for the molecule if :attr:`mol` is valid and None otherwise. Examples -------- >>> from dgllife.utils import mol_to_nearest_neighbor_graph >>> from rdkit import Chem >>> from rdkit.Chem import AllChem >>> mol = Chem.MolFromSmiles('CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C') >>> AllChem.EmbedMolecule(mol) >>> AllChem.MMFFOptimizeMolecule(mol) >>> coords = get_mol_3d_coordinates(mol) >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25) >>> print(g) DGLGraph(num_nodes=23, num_edges=6, ndata_schemes={} edata_schemes={}) Quite often we will want to use the distance between end atoms of edges, this can be achieved with >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25, keep_dists=True) >>> print(g.edata['dist']) tensor([[1.2024], [1.2024], [1.2270], [1.2270], [1.2259], [1.2259]]) By default, we do not explicitly represent hydrogens as nodes, which can be done as follows. >>> mol = Chem.MolFromSmiles('CC1(C(N2C(S1)C(C2=O)NC(=O)CC3=CC=CC=C3)C(=O)O)C') >>> mol = Chem.AddHs(mol) >>> AllChem.EmbedMolecule(mol) >>> AllChem.MMFFOptimizeMolecule(mol) >>> coords = get_mol_3d_coordinates(mol) >>> g = mol_to_nearest_neighbor_graph(mol, coords, neighbor_cutoff=1.25, >>> explicit_hydrogens=True) >>> print(g) DGLGraph(num_nodes=41, num_edges=42, ndata_schemes={} edata_schemes={}) See Also -------- get_mol_3d_coordinates k_nearest_neighbors smiles_to_nearest_neighbor_graph """ if mol is None: print('Invalid mol found') return None if explicit_hydrogens: mol = Chem.AddHs(mol) num_atoms = mol.GetNumAtoms() num_coords = coordinates.shape[0] assert num_atoms == num_coords, \ 'Expect the number of atoms to match the first dimension of coordinates, ' \ 'got {:d} and {:d}'.format(num_atoms, num_coords) if canonical_atom_order: new_order = rdmolfiles.CanonicalRankAtoms(mol) mol = rdmolops.RenumberAtoms(mol, new_order) srcs, dsts, dists = k_nearest_neighbors( coordinates=coordinates, neighbor_cutoff=neighbor_cutoff, max_num_neighbors=max_num_neighbors, p_distance=p_distance, self_loops=add_self_loop) g = dgl.graph(([], []), idtype=torch.int32) # Add nodes first since some nodes may be completely isolated g.add_nodes(num_atoms) # Add edges g.add_edges(srcs, dsts) if node_featurizer is not None: g.ndata.update(node_featurizer(mol)) if edge_featurizer is not None: g.edata.update(edge_featurizer(mol)) if keep_dists: assert dist_field not in g.edata, \ 'Expect {} to be reserved for distance between neighboring atoms.' g.edata[dist_field] = torch.tensor(dists).float().reshape(-1, 1) if num_virtual_nodes > 0: num_real_nodes = g.num_nodes() real_nodes = list(range(num_real_nodes)) g.add_nodes(num_virtual_nodes) # Change Topology virtual_src = [] virtual_dst = [] for count in range(num_virtual_nodes): virtual_node = num_real_nodes + count virtual_node_copy = [virtual_node] * num_real_nodes virtual_src.extend(real_nodes) virtual_src.extend(virtual_node_copy) virtual_dst.extend(virtual_node_copy) virtual_dst.extend(real_nodes) g.add_edges(virtual_src, virtual_dst) for nk, nv in g.ndata.items(): nv = torch.cat([nv, torch.zeros(g.num_nodes(), 1)], dim=1) nv[:-num_virtual_nodes, -1] = 1 g.ndata[nk] = nv for ek, ev in g.edata.items(): ev = torch.cat([ev, torch.zeros(g.num_edges(), 1)], dim=1) ev[:-num_virtual_nodes * num_real_nodes * 2, -1] = 1 g.edata[ek] = ev return g
def construct_graph(training_dir, edges, nodes, target_node_type, heterogeneous=True): if heterogeneous: print("Getting relation graphs from the following edge lists : {} ". format(edges)) edgelists, id_to_node = {}, {} for i, edge in enumerate(edges): edgelist, id_to_node, src, dst = parse_edgelist(os.path.join( training_dir, edge), id_to_node, header=True) if src == target_node_type: src = 'target' if dst == target_node_type: dst = 'target' edgelists[(src, 'relation{}'.format(i), dst)] = edgelist print("Read edges for relation{} from edgelist: {}".format( i, os.path.join(training_dir, edge))) # reverse edge list so that relation is undirected edgelists[(dst, 'reverse_relation{}'.format(i), src)] = [(b, a) for a, b in edgelist] # get features for target nodes features, new_nodes = get_features(id_to_node[target_node_type], os.path.join(training_dir, nodes)) print("Read in features for target nodes") # handle target nodes that have features but don't have any connections # if new_nodes: # edgelists[('target', 'relation'.format(i+1), 'none')] = [(node, 0) for node in new_nodes] # edgelists[('none', 'reverse_relation{}'.format(i + 1), 'target')] = [(0, node) for node in new_nodes] # add self relation edgelists[('target', 'self_relation', 'target')] = [ (t, t) for t in id_to_node[target_node_type].values() ] g = dgl.heterograph(edgelists) print( "Constructed heterograph with the following metagraph structure: Node types {}, Edge types{}" .format(g.ntypes, g.canonical_etypes)) print("Number of nodes of type target : {}".format( g.number_of_nodes('target'))) # g.nodes['target'].data['features'] = features id_to_node = id_to_node[target_node_type] else: sources, sinks, features, id_to_node = read_edges( os.path.join(training_dir, edges[0]), os.path.join(training_dir, nodes)) # add self relation all_nodes = sorted(id_to_node.values()) sources.extend(all_nodes) sinks.extend(all_nodes) g = dgl.graph((sources, sinks)) if features: g.ndata['features'] = np.array(features).astype('float32') print('read graph from node list and edge list') features = g.ndata['features'] return g, features, id_to_node
def test_subgraph1(idtype): g = create_test_heterograph(idtype) g_graph = g['follows'] g_bipartite = g['plays'] x = F.randn((3, 5)) y = F.randn((2, 4)) g.nodes['user'].data['h'] = x g.edges['follows'].data['h'] = y def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), F.tensor([1, 2], g.idtype)) assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), F.tensor([0], g.idtype)) assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor([1], g.idtype)) assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), F.tensor([1], g.idtype)) assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]), F.tensor([1], g.idtype)) assert sg.number_of_nodes('developer') == 0 assert sg.number_of_edges('develops') == 0 assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2]) sg1 = g.subgraph({'user': [1, 2], 'game': [0]}) _check_subgraph(g, sg1) if F._default_context_str != 'gpu': # TODO(minjie): enable this later sg2 = g.edge_subgraph({'follows': [1], 'plays': [1], 'wishes': [1]}) _check_subgraph(g, sg2) # backend tensor input sg1 = g.subgraph({ 'user': F.tensor([1, 2], dtype=idtype), 'game': F.tensor([0], dtype=idtype) }) _check_subgraph(g, sg1) if F._default_context_str != 'gpu': # TODO(minjie): enable this later sg2 = g.edge_subgraph({ 'follows': F.tensor([1], dtype=idtype), 'plays': F.tensor([1], dtype=idtype), 'wishes': F.tensor([1], dtype=idtype) }) _check_subgraph(g, sg2) # numpy input sg1 = g.subgraph({'user': np.array([1, 2]), 'game': np.array([0])}) _check_subgraph(g, sg1) if F._default_context_str != 'gpu': # TODO(minjie): enable this later sg2 = g.edge_subgraph({ 'follows': np.array([1]), 'plays': np.array([1]), 'wishes': np.array([1]) }) _check_subgraph(g, sg2) def _check_subgraph_single_ntype(g, sg, preserve_nodes=False): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), F.tensor([1, 2], g.idtype)) else: for ntype in sg.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor([1], g.idtype)) if not preserve_nodes: assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2]) def _check_subgraph_single_etype(g, sg, preserve_nodes=False): assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes if not preserve_nodes: assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), F.tensor([0, 1], g.idtype)) assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), F.tensor([0], g.idtype)) else: for ntype in sg.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), F.tensor([0, 1], g.idtype)) sg1_graph = g_graph.subgraph([1, 2]) _check_subgraph_single_ntype(g_graph, sg1_graph) if F._default_context_str != 'gpu': # TODO(minjie): enable this later sg1_graph = g_graph.edge_subgraph([1]) _check_subgraph_single_ntype(g_graph, sg1_graph) sg1_graph = g_graph.edge_subgraph([1], relabel_nodes=False) _check_subgraph_single_ntype(g_graph, sg1_graph, True) sg2_bipartite = g_bipartite.edge_subgraph([0, 1]) _check_subgraph_single_etype(g_bipartite, sg2_bipartite) sg2_bipartite = g_bipartite.edge_subgraph([0, 1], relabel_nodes=False) _check_subgraph_single_etype(g_bipartite, sg2_bipartite, True) def _check_typed_subgraph1(g, sg): assert g.idtype == sg.idtype assert g.device == sg.device assert set(sg.ntypes) == {'user', 'game'} assert set(sg.etypes) == {'follows', 'plays', 'wishes'} for ntype in sg.ntypes: assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order='eid') src_g, dst_g = g.all_edges(etype=etype, order='eid') assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h']) g.nodes['user'].data['h'] = F.scatter_row(g.nodes['user'].data['h'], F.tensor([2]), F.randn( (1, 5))) g.edges['follows'].data['h'] = F.scatter_row( g.edges['follows'].data['h'], F.tensor([1]), F.randn((1, 4))) assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h']) def _check_typed_subgraph2(g, sg): assert set(sg.ntypes) == {'developer', 'game'} assert set(sg.etypes) == {'develops'} for ntype in sg.ntypes: assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order='eid') src_g, dst_g = g.all_edges(etype=etype, order='eid') assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) sg3 = g.node_type_subgraph(['user', 'game']) _check_typed_subgraph1(g, sg3) sg4 = g.edge_type_subgraph(['develops']) _check_typed_subgraph2(g, sg4) sg5 = g.edge_type_subgraph(['follows', 'plays', 'wishes']) _check_typed_subgraph1(g, sg5) # Test for restricted format if F._default_context_str != 'gpu': # TODO(minjie): enable this later for fmt in ['csr', 'csc', 'coo']: g = dgl.graph(([0, 1], [1, 2])).formats(fmt) sg = g.subgraph({g.ntypes[0]: [1, 0]}) nids = F.asnumpy(sg.ndata[dgl.NID]) assert np.array_equal(nids, np.array([1, 0])) src, dst = sg.edges(order='eid') src = F.asnumpy(src) dst = F.asnumpy(dst) assert np.array_equal(src, np.array([1]))
def atom_dgl_multigraph( atoms=None, cutoff=8.0, max_neighbors=12, atom_features="cgcnn", enforce_undirected=False, max_attempts=3, include_prdf_angles=False, partial_rcut=4.0, id=None, ): """Obtain a DGLGraph for Atoms object.""" dists = atoms.raw_distance_matrix def cos_formula(a, b, c): """Get angle between three edges for oblique triangles.""" res = (a**2 + b**2 - c**2) / (2 * a * b) res = -1.0 if res < -1.0 else res res = 1.0 if res > 1.0 else res return np.arccos(res) def bond_to_bond_feats(nb): tmp = 0 angles_tmp = [] for ii, i in enumerate(nb): tmp = ii + 1 if tmp > len(nb) - 1: tmp = 0 ang = 0 try: ang = cos_formula(i[2], nb[tmp][2], dists[i[1], nb[tmp][1]]) except Exception as exp: # print("Setting angle zeros", id, exp) pass angles_tmp.append(ang) return np.array(angles_tmp) if include_prdf_angles: ( all_neighbors, prdf_arr, pangle_arr, pval, aval, nbor, ) = atoms.atomwise_angle_and_radial_distribution(r=cutoff) pval = np.fliplr(np.sort(pval))[:, 0:max_neighbors] aval = np.fliplr(np.sort(aval))[:, 0:max_neighbors] else: all_neighbors = atoms.get_all_neighbors(r=cutoff) # if a site has too few neighbors, increase the cutoff radius min_nbrs = min(len(neighborlist) for neighborlist in all_neighbors) # print('min_nbrs,max_neighbors=',min_nbrs,max_neighbors) attempt = 0 while min_nbrs < max_neighbors: print("extending cutoff radius!", attempt, cutoff, id) lat = atoms.lattice r_cut = max(cutoff, lat.a, lat.b, lat.c) attempt += 1 if attempt >= max_attempts: atoms = atoms.make_supercell([2, 2, 2]) print( "Making supercell, exceeded,attempts", max_attempts, "cutoff", r_cut, id, ) cutoff = r_cut all_neighbors = atoms.get_all_neighbors(r=cutoff) min_nbrs = min(len(neighborlist) for neighborlist in all_neighbors) # return Graph.atom_dgl_multigraph( # atoms, r_cut, max_neighbors, atom_features # ) # build up edge list # Currently there's no guarantee that this creates undirected graphs # An undirected solution would build the full edge list where nodes are # keyed by (index,image), and ensure each edge has a complementary edge # indeed,JVASP-59628 is an example of a calculation where this produces # a graph where one site has no incident edges! # build an edge dictionary u -> v # so later we can run through the dictionary # and remove all pairs of edges # so what's left is the odd ones out edges = defaultdict(list) u, v, r, w, prdf, adf = [], [], [], [], [], [] for site_idx, neighborlist in enumerate(all_neighbors): # sort on distance neighborlist = sorted(neighborlist, key=lambda x: x[2]) ids = np.array([nbr[1] for nbr in neighborlist]) distances = np.array([nbr[2] for nbr in neighborlist]) c = np.array([nbr[3] for nbr in neighborlist]) # find the distance to the k-th nearest neighbor max_dist = distances[max_neighbors - 1] # keep all edges out to the neighbor shell of the k-th neighbor ids = ids[distances <= max_dist] new_angles = bond_to_bond_feats(neighborlist) try: new_angles = new_angles[ids - 1] except Exception as exp: new_angles = np.zeros(len(ids)) pass c = c[distances <= max_dist] distances = distances[distances <= max_dist] u.append([site_idx] * len(ids)) v.append(ids) r.append(distances) w.append(new_angles) if include_prdf_angles: prdf.append(pval[site_idx]) adf.append(aval[site_idx]) # keep track of cell-resolved edges # to enforce undirected graph construction for dst, cell_id in zip(ids, c): u_key = f"{site_idx}-(0.0, 0.0, 0.0)" v_key = f"{dst}-{tuple(cell_id)}" edge_key = tuple(sorted((u_key, v_key))) edges[edge_key].append((site_idx, dst)) if enforce_undirected: # add complementary edges to unpaired edges for edge_pair in edges.values(): if len(edge_pair) == 1: src, dst = edge_pair[0] u.append(dst) # swap the order! v.append(src) r.append(atoms.raw_distance_matrix[src, dst]) u = np.hstack(u) v = np.hstack(v) r = np.hstack(r) w = np.hstack(w) u = torch.tensor(u) v = torch.tensor(v) w = torch.tensor(w) if include_prdf_angles: prdf = np.array(prdf) adf = np.array(adf) prdf = np.hstack(prdf) adf = np.cos(np.hstack(adf)) if len(r) != len(prdf): prdf = np.append(prdf, np.zeros(len(r) - len(prdf))) if len(r) != len(adf): adf = np.append(adf, np.zeros(len(r) - len(adf))) prdf = torch.tensor(np.array(np.hstack(prdf))).type( torch.get_default_dtype()) adf = torch.tensor(np.array(np.hstack(adf))).type( torch.get_default_dtype()) r = torch.tensor(np.array(r)).type(torch.get_default_dtype()) w = torch.tensor(np.array(w)).type(torch.get_default_dtype()) # build up atom attribute tensor species = atoms.elements sps_features = [] for ii, s in enumerate(species): feat = list(get_node_attributes(s, atom_features=atom_features)) # if include_prdf_angles: # feat=feat+list(prdf[ii])+list(adf[ii]) sps_features.append(feat) sps_features = np.array(sps_features) node_features = torch.tensor(sps_features).type( torch.get_default_dtype()) g = dgl.graph((u, v)) g.ndata["atom_features"] = node_features g.edata["bondlength"] = r g.edata["bondangle"] = w if include_prdf_angles: g.edata["partial_distance"] = prdf g.edata["partial_angle"] = adf return g
def test_to_simple(index_dtype): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) sg, wb = dgl.to_simple(g, writeback_mapping=True) u, v = g.all_edges(form='uv', order='eid') u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb) su, sv = sg.all_edges(form='uv', order='eid') su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sc = F.asnumpy(sg.edata['count']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sc[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.ndata['h'], g.ndata['h']) assert 'h' not in sg.edata # new ndata to sg sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]]) assert 'hh' not in g.ndata sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) assert 'h' not in sg.ndata assert 'h' not in sg.edata # heterogeneous graph g = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4]), ('user', 'plays', 'game'): ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3])}, index_dtype=index_dtype) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4]) g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) sg, wb = dgl.to_simple(g, return_counts='weights', writeback_mapping=True, copy_edata=True) g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) for etype in g.canonical_etypes: u, v = g.all_edges(form='uv', order='eid', etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb[etype]) su, sv = sg.all_edges(form='uv', order='eid', etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data['weights']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.nodes['user'].data['hh'], g.nodes['user'].data['hh']) assert 'h' not in sg.nodes['game'].data # new ndata to sg sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert 'hhh' not in g.nodes['user'].data # share edata feat_idx = F.asnumpy(wb[('user', 'follow', 'user')]) _, indices = np.unique(feat_idx, return_index=True) assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']), F.asnumpy(g.edges['follow'].data['h'])[indices]) sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert 'h' not in sg.nodes['user'].data assert 'hh' not in sg.nodes['user'].data
def test_to_bidirected(): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) u, v = g.edges() ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(g.ndata['h'], bg.ndata['h']) assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0), bg.edata['h']) bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]]) assert ('hh' in g.ndata) is False bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]]) assert ('hh' in g.edata) is False # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False) ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert ('h' in bg.ndata) is False assert ('h' in bg.edata) is False # zero edge graph g = dgl.graph([]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) # heterogeneous graph g = dgl.heterograph({ ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0])), ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])), ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0])) }) g.nodes['game'].data['hv'] = F.ones((3, 1)) g.nodes['user'].data['hv'] = F.ones((3, 1)) g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True) assert F.array_equal(g.nodes['game'].data['hv'], bg.nodes['game'].data['hv']) assert F.array_equal(g.nodes['user'].data['hv'], bg.nodes['user'].data['hv']) u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0), bg.edges['wins'].data['h']) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb) assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True) assert len(bg.edges['wins'].data) == 0 assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 assert len(bg.nodes['game'].data) == 0 assert len(bg.nodes['user'].data) == 0 u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb)
def test_remove_edges(index_dtype): def check(g1, etype, g, edges_removed): src, dst, eid = g.edges(etype=etype, form='all') src1, dst1 = g1.edges(etype=etype, order='eid') if etype is not None: eid1 = g1.edges[etype].data[dgl.EID] else: eid1 = g1.edata[dgl.EID] src1 = F.asnumpy(src1) dst1 = F.asnumpy(dst1) eid1 = F.asnumpy(eid1) src = F.asnumpy(src) dst = F.asnumpy(dst) eid = F.asnumpy(eid) sde_set = set(zip(src, dst, eid)) for s, d, e in zip(src1, dst1, eid1): assert (s, d, e) in sde_set assert not np.isin(edges_removed, eid1).any() assert g1.idtype == g.idtype for fmt in ['coo', 'csr', 'csc']: for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]: g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)], restrict_format=fmt, index_dtype=index_dtype) g1 = dgl.remove_edges( g, F.tensor(edges_to_remove, getattr(F, index_dtype))) check(g1, None, g, edges_to_remove) g = dgl.graph(spsp.csr_matrix( ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)), restrict_format=fmt, index_dtype=index_dtype) g1 = dgl.remove_edges( g, F.tensor(edges_to_remove, getattr(F, index_dtype))) check(g1, None, g, edges_to_remove) g = dgl.heterograph( { ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)] }, index_dtype=index_dtype) g2 = dgl.remove_edges( g, { 'AA': F.tensor([2], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype)) }) check(g2, 'AA', g, [2]) check(g2, 'AB', g, [3]) check(g2, 'BA', g, [1]) g3 = dgl.remove_edges( g, { 'AA': F.tensor([], getattr(F, index_dtype)), 'AB': F.tensor([3], getattr(F, index_dtype)), 'BA': F.tensor([1], getattr(F, index_dtype)) }) check(g3, 'AA', g, []) check(g3, 'AB', g, [3]) check(g3, 'BA', g, [1]) g4 = dgl.remove_edges( g, {'AB': F.tensor([3, 1, 2, 0], getattr(F, index_dtype))}) check(g4, 'AA', g, []) check(g4, 'AB', g, [3, 1, 2, 0]) check(g4, 'BA', g, [])
def test_local_scope(index_dtype): g = dgl.graph([(0, 1), (1, 2), (2, 3), (3, 4)], index_dtype=index_dtype) g.ndata['h'] = F.zeros((g.number_of_nodes(), 3)) g.edata['w'] = F.zeros((g.number_of_edges(), 4)) # test override def foo(g): with g.local_scope(): g.ndata['h'] = F.ones((g.number_of_nodes(), 3)) g.edata['w'] = F.ones((g.number_of_edges(), 4)) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test out-place update def foo(g): with g.local_scope(): g.nodes[[2, 3]].data['h'] = F.ones((2, 3)) g.edges[[2, 3]].data['w'] = F.ones((2, 4)) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test out-place update 2 def foo(g): with g.local_scope(): g.apply_nodes(lambda nodes: {'h': nodes.data['h'] + 10}, [2, 3]) g.apply_edges(lambda edges: {'w': edges.data['w'] + 10}, [2, 3]) foo(g) assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3))) assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4))) # test auto-pop def foo(g): with g.local_scope(): g.ndata['hh'] = F.ones((g.number_of_nodes(), 3)) g.edata['ww'] = F.ones((g.number_of_edges(), 4)) foo(g) assert 'hh' not in g.ndata assert 'ww' not in g.edata # test nested scope def foo(g): with g.local_scope(): g.ndata['hh'] = F.ones((g.number_of_nodes(), 3)) g.edata['ww'] = F.ones((g.number_of_edges(), 4)) with g.local_scope(): g.ndata['hhh'] = F.ones((g.number_of_nodes(), 3)) g.edata['www'] = F.ones((g.number_of_edges(), 4)) assert 'hhh' not in g.ndata assert 'www' not in g.edata foo(g) assert 'hh' not in g.ndata assert 'ww' not in g.edata # test initializer1 g = dgl.graph([(0, 1), (1, 1)], index_dtype=index_dtype) g.set_n_initializer(dgl.init.zero_initializer) def foo(g): with g.local_scope(): g.nodes[0].data['h'] = F.ones((1, 1)) assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]])) foo(g) # test initializer2 def foo_e_initializer(shape, dtype, ctx, id_range): return F.ones(shape) g.set_e_initializer(foo_e_initializer, field='h') def foo(g): with g.local_scope(): g.edges[0, 1].data['h'] = F.ones((1, 1)) assert F.allclose(g.edata['h'], F.ones((2, 1))) g.edges[0, 1].data['w'] = F.ones((1, 1)) assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]])) foo(g)
def atest_nx_conversion(index_dtype): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)], index_dtype=index_dtype) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'], index_dtype=index_dtype) assert g._idtype_str == index_dtype # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu())) # test conversion after modifying DGLGraph # TODO(minjie): enable after mutation is supported #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges #new_n = F.randn((2, 3)) #new_e = F.randn((3, 5)) #g.add_nodes(2, data={'n1': new_n}) ## add three edges, one is a multi-edge #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) #n1 = F.cat((n1, new_n), 0) #e1 = F.cat((e1, new_e), 0) ## convert to networkx again #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) #assert len(nxg) == 7 #assert nxg.size() == 7 #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.nodes[u]['h'] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d['h'] = F.tensor([u, v]) g = dgl.DGLGraph() g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h']) assert g.number_of_nodes() == 3 assert g.number_of_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]])) assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
def test_send_multigraph(index_dtype): g = dgl.graph([(0, 1), (0, 1), (0, 1), (2, 1)], index_dtype=index_dtype) def _message_a(edges): return {'a': edges.data['a']} def _message_b(edges): return {'a': edges.data['a'] * 3} def _reduce(nodes): return {'a': F.max(nodes.mailbox['a'], 1)} def answer(*args): return F.max(F.stack(args, 0), 0) assert g.is_multigraph # send by eid old_repr = F.randn((4, 5)) g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send([0, 2], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2])) g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send([0, 2, 3], message_func=_message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) # send on multigraph g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send(([0, 2], [1, 1]), _message_a) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], F.max(old_repr, 0)) # consecutive send and send_on g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send((2, 1), _message_a) g.send([0, 1], message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], answer(old_repr[0] * 3, old_repr[1] * 3, old_repr[3])) # consecutive send_on g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send(0, message_func=_message_a) g.send(1, message_func=_message_b) g.recv(1, _reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[1] * 3)) # send_and_recv_on g.ndata['a'] = F.zeros((3, 5)) g.edata['a'] = old_repr g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce) new_repr = g.ndata['a'] assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])) assert F.allclose(new_repr[[0, 2]], F.zeros((2, 5)))
def create_graph(kg_data, n_nodes): g = dgl.graph((kg_data['t'], kg_data['h'])) g.ndata['id'] = torch.arange(n_nodes, dtype=torch.long) g.edata['type'] = torch.LongTensor(kg_data['r']) return g
import dgl import numpy as np from mxnet import nd g = dgl.graph(([0,0,1,5],[1,2,2,0])) print('g:\n',g) g.ndata['x'] = nd.ones((g.num_nodes(),3)) # 长度为3的特征 g.edata['x'] = nd.ones(g.num_edges(),dtype=np.int32) print('g:',g) # 不同的名字可以有不同的特征 g.ndata['y'] = nd.random.uniform(shape=(g.num_nodes(),5)) print('g:',g) print('the feature of node 1 in x',g.ndata['x'][1]) # 获取节点1 的特征 print('\n the feature of edge 0 and 3 in x:',g.edata['x'][nd.array([0,3],dtype=np.int32)]) # 对于加权图,可以将权重存储为边缘特征 edges = nd.array([0,0,0,1],dtype=np.int), nd.array([1,2,3,3],dtype=np.int) weights = nd.array([0.1, 0.6, 0.9, 0.7]) # 权重 g = dgl.graph(edges) g.edata['w'] = weights # w 代表权重特征 print('\n g with weight:',g)
argparser.add_argument('--lr', type=float, default=0.003) argparser.add_argument('--dropout', type=float, default=0.5) argparser.add_argument( '--num-workers', type=int, default=0, help="Number of sampling processes. Use 0 for no extra process.") args = argparser.parse_args() if args.gpu >= 0: device = th.device('cuda:%d' % args.gpu) else: device = th.device('cpu') # load reddit data data = RedditDataset(self_loop=True) train_mask = data.train_mask val_mask = data.val_mask features = th.Tensor(data.features) in_feats = features.shape[1] labels = th.LongTensor(data.labels) n_classes = data.num_labels # Construct graph g = dgl.graph(data.graph.all_edges()) g.ndata['features'] = features prepare_mp(g) # Pack data data = train_mask, val_mask, in_feats, labels, n_classes, g run(args, device, data)
sys.path.append(base_dir) from graph_embeddings.plot_embedding import plot_embeddings device = torch.device("cuda" if torch.cuda.is_available() else "cpu") print(device) torch.set_num_threads(2) edges = pd.read_table('{}/data/Wiki_edgelist.txt'.format(base_dir), sep=' ') nodes = pd.read_table('{}/data/wiki_labels.txt'.format(base_dir), sep=' ') u = edges['src'].to_numpy() v = edges['dst'].to_numpy() labels = nodes['label'].to_numpy() g = dgl.graph((u,v)) g.ndata['label'] = torch.tensor(labels) num_node = g.num_nodes() C = 5 # context window simple_num = 10000 walks = dgl.sampling.random_walk(g, torch.randint(0, 2405, (simple_num, )), length=C * 2) # 过滤掉-1的节点,-1表示找不到下一条边 walks = list(filter(lambda item: (item < 0).sum().item() == 0, walks[0])) walks = np.array(list(map(lambda item: item.tolist(), walks))) walks_train = np.delete(walks, C, axis=1).reshape(-1, C * 2) walks_label = walks[:, C:C+1].reshape(-1, 1) random.seed(1) np.random.seed(1) torch.manual_seed(1)
(train_adj, train_fea) = sampler.randomedge_sampler(percent=args.sampling_percent, normalization=args.normalization, cuda=args.cuda) if args.mixmode: train_adj = train_adj.cuda() sampling_t = time.time() - sampling_t (val_adj, val_fea) = sampler.get_test_set(normalization=args.normalization, cuda=args.cuda) # Construct feed data g if torch.cuda.is_available(): train_edges = train_adj._indices().cpu().data else: train_edges = train_adj._indices().data train_edges = (train_edges[0], train_edges[1]) train_g = dgl.graph(train_edges) train_g.ndata['features'] = train_fea prepare_mp(train_g) # Construct feed data g if torch.cuda.is_available(): val_edges = val_adj._indices().cpu().data else: val_edges = val_adj._indices().data val_edges = (val_edges[0], val_edges[1]) val_g = dgl.graph(val_edges) if sampler.dataset=='coauthor_phy': val_g.ndata['features'] = val_fea.cpu() idx_val = idx_val.cpu() else: val_g.ndata['features'] = val_fea
def test_compact(index_dtype): g1 = dgl.heterograph({ ('user', 'follow', 'user'): [(1, 3), (3, 5)], ('user', 'plays', 'game'): [(2, 4), (3, 4), (2, 5)], ('game', 'wished-by', 'user'): [(6, 7), (5, 7)]}, {'user': 20, 'game': 10}, index_dtype=index_dtype) g2 = dgl.heterograph({ ('game', 'clicked-by', 'user'): [(3, 1)], ('user', 'likes', 'user'): [(1, 8), (8, 9)]}, {'user': 20, 'game': 10}, index_dtype=index_dtype) g3 = dgl.graph([(0, 1), (1, 2)], num_nodes=10, ntype='user', index_dtype=index_dtype) g4 = dgl.graph([(1, 3), (3, 5)], num_nodes=10, ntype='user', index_dtype=index_dtype) def _check(g, new_g, induced_nodes): assert g.ntypes == new_g.ntypes assert g.canonical_etypes == new_g.canonical_etypes for ntype in g.ntypes: assert -1 not in induced_nodes[ntype] for etype in g.canonical_etypes: g_src, g_dst = g.all_edges(order='eid', etype=etype) g_src = F.asnumpy(g_src) g_dst = F.asnumpy(g_dst) new_g_src, new_g_dst = new_g.all_edges(order='eid', etype=etype) new_g_src_mapped = induced_nodes[etype[0]][F.asnumpy(new_g_src)] new_g_dst_mapped = induced_nodes[etype[2]][F.asnumpy(new_g_dst)] assert (g_src == new_g_src_mapped).all() assert (g_dst == new_g_dst_mapped).all() # Test default new_g1 = dgl.compact_graphs(g1) induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7]) assert set(induced_nodes['game']) == set([4, 5, 6]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a dict new_g1 = dgl.compact_graphs( g1, always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))}) assert new_g1._idtype_str == index_dtype induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7]) assert set(induced_nodes['game']) == set([4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) # Test with always_preserve given a tensor new_g3 = dgl.compact_graphs( g3, always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype))) induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3._idtype_str == index_dtype assert set(induced_nodes['user']) == set([0, 1, 2, 7]) _check(g3, new_g3, induced_nodes) # Test multiple graphs new_g1, new_g2 = dgl.compact_graphs([g1, g2]) induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert new_g2._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes['game']) == set([3, 4, 5, 6]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a dict new_g1, new_g2 = dgl.compact_graphs( [g1, g2], always_preserve={'game': F.tensor([4, 7], dtype=getattr(F, index_dtype))}) induced_nodes = {ntype: new_g1.nodes[ntype].data[dgl.NID] for ntype in new_g1.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g1._idtype_str == index_dtype assert new_g2._idtype_str == index_dtype assert set(induced_nodes['user']) == set([1, 3, 5, 2, 7, 8, 9]) assert set(induced_nodes['game']) == set([3, 4, 5, 6, 7]) _check(g1, new_g1, induced_nodes) _check(g2, new_g2, induced_nodes) # Test multiple graphs with always_preserve given a tensor new_g3, new_g4 = dgl.compact_graphs( [g3, g4], always_preserve=F.tensor([1, 7], dtype=getattr(F, index_dtype))) induced_nodes = {ntype: new_g3.nodes[ntype].data[dgl.NID] for ntype in new_g3.ntypes} induced_nodes = {k: F.asnumpy(v) for k, v in induced_nodes.items()} assert new_g3._idtype_str == index_dtype assert new_g4._idtype_str == index_dtype assert set(induced_nodes['user']) == set([0, 1, 2, 3, 5, 7]) _check(g3, new_g3, induced_nodes) _check(g4, new_g4, induced_nodes)
def test_node_dataloader(sampler_name): g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])) g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu()) g1.ndata['label'] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu()) for load_input, load_output in [(None, None), ({'feat': g1.ndata['feat']}, {'label': g1.ndata['label']})]: for async_load in [False, True]: for num_workers in [0, 1, 2]: sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]), 'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3]), 'shadow': dgl.dataloading.ShaDowKHopSampler([3, 3])}[sampler_name] dataloader = dgl.dataloading.NodeDataLoader( g1, g1.nodes(), sampler, device=F.ctx(), load_input=load_input, load_output=load_output, async_load=async_load, batch_size=g1.num_nodes(), num_workers=num_workers) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) if load_input: _check_device(blocks[0].srcdata['feat']) OPS.copy_u_sum(blocks[0], blocks[0].srcdata['feat']) if load_output: _check_device(blocks[-1].dstdata['label']) OPS.copy_u_sum(blocks[-1], blocks[-1].dstdata['label']) g2 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) }) for ntype in g2.ntypes: g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu()) batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes) sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2), 'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3]), 'shadow': dgl.dataloading.ShaDowKHopSampler([{etype: 3 for etype in g2.etypes}] * 2)}[sampler_name] for async_load in [False, True]: dataloader = dgl.dataloading.NodeDataLoader( g2, {nty: g2.nodes(nty) for nty in g2.ntypes}, sampler, device=F.ctx(), async_load=async_load, batch_size=batch_size) assert isinstance(iter(dataloader), Iterator) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) status = False try: dgl.dataloading.NodeDataLoader( g2, {nty: g2.nodes(nty) for nty in g2.ntypes}, sampler, device=F.ctx(), load_input={'feat': g1.ndata['feat']}, batch_size=batch_size) except dgl.DGLError: status = True assert status
def test_reverse(): g = dgl.DGLGraph() g.add_nodes(5) # The graph need not to be completely connected. g.add_edges([0, 1, 2], [1, 2, 1]) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]]) g.edata['h'] = F.tensor([[5.], [6.], [7.]]) rg = g.reverse() assert g.is_multigraph == rg.is_multigraph assert g.number_of_nodes() == rg.number_of_nodes() assert g.number_of_edges() == rg.number_of_edges() assert F.allclose(F.astype(rg.has_edges_between( [1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,))) assert g.edge_id(0, 1) == rg.edge_id(1, 0) assert g.edge_id(1, 2) == rg.edge_id(2, 1) assert g.edge_id(2, 1) == rg.edge_id(1, 2) # test dgl.reverse_heterograph # test homogeneous graph g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.]]) g_r = dgl.reverse_heterograph(g) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() u_g, v_g, eids_g = g.all_edges(form='all') u_rg, v_rg, eids_rg = g_r.all_edges(form='all') assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert len(g_r.edata) == 0 # without share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert len(g_r.ndata) == 0 assert len(g_r.edata) == 0 # with share ndata and edata g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert F.array_equal(g.edata['h'], g_r.edata['h']) # add new node feature to g_r g_r.ndata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.ndata) is False assert ('hh' in g_r.ndata) is True # add new edge feature to g_r g_r.edata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.edata) is False assert ('hh' in g_r.edata) is True # test heterogeneous graph g = dgl.heterograph({ ('user', 'follows', 'user'): ([0, 1, 2, 4, 3 ,1, 3], [1, 2, 3, 2, 0, 0, 1]), ('user', 'plays', 'game'): ([0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0]), ('developer', 'develops', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])}) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([1, 1, 1, 1, 1]) g.nodes['game'].data['h'] = F.tensor([0, 1]) g.edges['follows'].data['h'] = F.tensor([0, 1, 2, 4, 3 ,1, 3]) g.edges['follows'].data['hh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) g_r = dgl.reverse_heterograph(g) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert F.array_equal(g.nodes['user'].data['h'], g_r.nodes['user'].data['h']) assert F.array_equal(g.nodes['user'].data['hh'], g_r.nodes['user'].data['hh']) assert F.array_equal(g.nodes['game'].data['h'], g_r.nodes['game'].data['h']) assert len(g_r.edges['follows'].data) == 0 u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'follows', 'user')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('user', 'follows', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'plays', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'plays', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('developer', 'develops', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'develops', 'developer')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) # withour share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert len(g_r.nodes['user'].data) == 0 assert len(g_r.nodes['game'].data) == 0 g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) print(g_r) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) assert F.array_equal(g.edges['follows'].data['h'], g_r.edges['follows'].data['h']) assert F.array_equal(g.edges['follows'].data['hh'], g_r.edges['follows'].data['hh']) # add new node feature to g_r g_r.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert ('hhh' in g.nodes['user'].data) is False assert ('hhh' in g_r.nodes['user'].data) is True # add new edge feature to g_r g_r.edges['follows'].data['hhh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) assert ('hhh' in g.edges['follows'].data) is False assert ('hhh' in g_r.edges['follows'].data) is True
def test_edge_dataloader(sampler_name): neg_sampler = dgl.dataloading.negative_sampler.Uniform(2) g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])) g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu()) sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]), 'shadow': dgl.dataloading.ShaDowKHopSampler([3, 3])}[sampler_name] # no negative sampler dataloader = dgl.dataloading.EdgeDataLoader( g1, g1.edges(form='eid'), sampler, device=F.ctx(), batch_size=g1.num_edges()) for input_nodes, pos_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(blocks) # negative sampler dataloader = dgl.dataloading.EdgeDataLoader( g1, g1.edges(form='eid'), sampler, device=F.ctx(), negative_sampler=neg_sampler, batch_size=g1.num_edges()) for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(neg_pair_graph) _check_device(blocks) g2 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) }) for ntype in g2.ntypes: g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu()) batch_size = max(g2.num_edges(ety) for ety in g2.canonical_etypes) sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2), 'shadow': dgl.dataloading.ShaDowKHopSampler([{etype: 3 for etype in g2.etypes}] * 2)}[sampler_name] # no negative sampler dataloader = dgl.dataloading.EdgeDataLoader( g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes}, sampler, device=F.ctx(), batch_size=batch_size) for input_nodes, pos_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(blocks) # negative sampler dataloader = dgl.dataloading.EdgeDataLoader( g2, {ety: g2.edges(form='eid', etype=ety) for ety in g2.canonical_etypes}, sampler, device=F.ctx(), negative_sampler=neg_sampler, batch_size=batch_size) assert isinstance(iter(dataloader), Iterator) for input_nodes, pos_pair_graph, neg_pair_graph, blocks in dataloader: _check_device(input_nodes) _check_device(pos_pair_graph) _check_device(neg_pair_graph) _check_device(blocks)