def test_basic(): num_layers = 2 g = generate_rand_graph(100, connect_more=True) nf = create_full_nodeflow(g, num_layers) assert nf.number_of_nodes() == g.number_of_nodes() * (num_layers + 1) assert nf.number_of_edges() == g.number_of_edges() * num_layers assert nf.num_layers == num_layers + 1 assert nf.layer_size(0) == g.number_of_nodes() assert nf.layer_size(1) == g.number_of_nodes() check_basic(g, nf) parent_nids = F.arange(0, g.number_of_nodes()) nids = nf.map_from_parent_nid(0, parent_nids) assert F.array_equal(nids, parent_nids) g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 check_basic(g, nf)
def _test2(): # k > #neighbors subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 2], edge_dir='out') assert subg.number_of_nodes() == g.number_of_nodes() u, v = subg.edges() u_ans, v_ans = subg.out_edges([0, 2]) uv = set(zip(F.asnumpy(u), F.asnumpy(v))) uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans))) assert uv == uv_ans subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 2], edge_dir='out') assert subg.number_of_nodes() == g.number_of_nodes() assert subg.number_of_edges() == 3 u, v = subg.edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID]) assert edge_set == {(0, 2), (0, 1), (2, 0)}
def test_basic(): num_layers = 2 g = generate_rand_graph(100, connect_more=True) nf = create_full_node_flow(g, num_layers) assert nf.number_of_nodes() == g.number_of_nodes() * (num_layers + 1) assert nf.number_of_edges() == g.number_of_edges() * num_layers assert nf.num_layers == num_layers + 1 assert nf.layer_size(0) == g.number_of_nodes() assert nf.layer_size(1) == g.number_of_nodes() check_basic(g, nf) parent_nids = F.arange(0, g.number_of_nodes()) nids = dgl.graph_index.map_to_nodeflow_nid(nf._graph, 0, utils.toindex(parent_nids)).tousertensor() assert F.array_equal(nids, parent_nids) g = generate_rand_graph(100) nf = create_mini_batch(g, num_layers) assert nf.num_layers == num_layers + 1 check_basic(g, nf)
def test_append2(): # test append on FrameRef data = Frame(create_test_data()) f = FrameRef(data) assert f.is_contiguous() assert f.is_span_whole_column() assert f.num_rows == N # append on the underlying frame should not reflect on the ref data.append(data) assert f.is_contiguous() assert not f.is_span_whole_column() assert f.num_rows == N # append on the FrameRef should work f.append(data) assert not f.is_contiguous() assert not f.is_span_whole_column() assert f.num_rows == 3 * N new_idx = list(range(N)) + list(range(2*N, 4*N)) assert F.array_equal(f._index.tousertensor(), F.copy_to(F.tensor(new_idx, dtype=F.int64), F.cpu())) assert data.num_rows == 4 * N
def _check_subgraph(g, sg): assert sg.ntypes == ['user', 'game', 'developer'] assert sg.etypes == ['follows', 'plays', 'wishes', 'develops'] assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), F.tensor([1, 2], F.int64)) assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), F.tensor([0], F.int64)) assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor([1], F.int64)) assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), F.tensor([1], F.int64)) assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]), F.tensor([1], F.int64)) assert sg.number_of_nodes('developer') == 0 assert sg.number_of_edges('develops') == 0 assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2])
def _check_typed_subgraph1(g, sg): assert set(sg.ntypes) == {'user', 'game'} assert set(sg.etypes) == {'follows', 'plays', 'wishes'} for ntype in sg.ntypes: assert sg.number_of_nodes(ntype) == g.number_of_nodes(ntype) for etype in sg.etypes: src_sg, dst_sg = sg.all_edges(etype=etype, order='eid') src_g, dst_g = g.all_edges(etype=etype, order='eid') assert F.array_equal(src_sg, src_g) assert F.array_equal(dst_sg, dst_g) assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h']) g.nodes['user'].data['h'] = F.scatter_row(g.nodes['user'].data['h'], F.tensor([2]), F.randn((1, 5))) g.edges['follows'].data['h'] = F.scatter_row(g.edges['follows'].data['h'], F.tensor([1]), F.randn((1, 4))) assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'])
def test_graph_conv2(g, norm, weight, bias): conv = nn.GraphConv(5, 2, norm=norm, weight=weight, bias=bias) conv.initialize(ctx=F.ctx()) ext_w = F.randn((5, 2)).as_in_context(F.ctx()) nsrc = g.number_of_nodes() if isinstance(g, dgl.DGLGraph) else g.number_of_src_nodes() ndst = g.number_of_nodes() if isinstance(g, dgl.DGLGraph) else g.number_of_dst_nodes() h = F.randn((nsrc, 5)).as_in_context(F.ctx()) h_dst = F.randn((ndst, 2)).as_in_context(F.ctx()) if weight: h_out = conv(g, h) else: h_out = conv(g, h, ext_w) assert h_out.shape == (ndst, 2) if not isinstance(g, dgl.DGLGraph) and len(g.ntypes) == 2: # bipartite, should also accept pair of tensors if weight: h_out2 = conv(g, (h, h_dst)) else: h_out2 = conv(g, (h, h_dst), ext_w) assert h_out2.shape == (ndst, 2) assert F.array_equal(h_out, h_out2)
def test_chainjacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x1", shape=[2, 2, 2]) x2 = ad.Variable(name="x2", shape=[2, 2, 2]) x1.set_in_indices_length(1) x2.set_in_indices_length(2) y = ad.chainjacobian(x1, x2) executor = ad.Executor([y]) x1_val = T.tensor([[[1, 1], [1, 1]], [[1, 1], [1, 1]]]) x2_val = T.tensor([[[1, 1], [1, 1]], [[1, 1], [1, 1]]]) y_val, = executor.run(feed_dict={x1: x1_val, x2: x2_val}) expected_y_val = T.einsum("abc,bcd->ad", x1_val, x2_val) assert isinstance(y, ad.Node) assert T.array_equal(y_val, expected_y_val)
def _check_subgraph(g, sg): assert sg.idtype == g.idtype assert sg.device == g.device assert sg.ntypes == g.ntypes assert sg.etypes == g.etypes assert sg.canonical_etypes == g.canonical_etypes assert F.array_equal(F.tensor(sg.nodes['user'].data[dgl.NID]), F.tensor([1, 2], g.idtype)) assert F.array_equal(F.tensor(sg.nodes['game'].data[dgl.NID]), F.tensor([0], g.idtype)) assert F.array_equal(F.tensor(sg.edges['follows'].data[dgl.EID]), F.tensor([1], g.idtype)) assert F.array_equal(F.tensor(sg.edges['plays'].data[dgl.EID]), F.tensor([1], g.idtype)) assert F.array_equal(F.tensor(sg.edges['wishes'].data[dgl.EID]), F.tensor([1], g.idtype)) assert sg.number_of_nodes('developer') == 0 assert sg.number_of_edges('develops') == 0 assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h'][1:3]) assert F.array_equal(sg.edges['follows'].data['h'], g.edges['follows'].data['h'][1:2])
def _test_DGLCSVDataset_multiple(): with tempfile.TemporaryDirectory() as test_dir: # generate YAML/CSVs meta_yaml_path = os.path.join(test_dir, "meta.yaml") edges_csv_path_0 = os.path.join(test_dir, "test_edges_0.csv") edges_csv_path_1 = os.path.join(test_dir, "test_edges_1.csv") nodes_csv_path_0 = os.path.join(test_dir, "test_nodes_0.csv") nodes_csv_path_1 = os.path.join(test_dir, "test_nodes_1.csv") graph_csv_path = os.path.join(test_dir, "test_graph.csv") meta_yaml_data = {'version': '1.0.0', 'dataset_name': 'default_name', 'node_data': [{'file_name': os.path.basename(nodes_csv_path_0), 'ntype': 'user', }, {'file_name': os.path.basename(nodes_csv_path_1), 'ntype': 'item', }], 'edge_data': [{'file_name': os.path.basename(edges_csv_path_0), 'etype': ['user', 'follow', 'user'], }, {'file_name': os.path.basename(edges_csv_path_1), 'etype': ['user', 'like', 'item'], }], 'graph_data': {'file_name': os.path.basename(graph_csv_path)} } with open(meta_yaml_path, 'w') as f: yaml.dump(meta_yaml_data, f, sort_keys=False) num_nodes = 100 num_edges = 500 num_graphs = 10 num_dims = 3 feat_ndata = np.random.rand(num_nodes*num_graphs, num_dims) label_ndata = np.random.randint(2, size=num_nodes*num_graphs) df = pd.DataFrame({'node_id': np.hstack([np.arange(num_nodes) for _ in range(num_graphs)]), 'label': label_ndata, 'feat': [line.tolist() for line in feat_ndata], 'graph_id': np.hstack([np.full(num_nodes, i) for i in range(num_graphs)]) }) df.to_csv(nodes_csv_path_0, index=False) df.to_csv(nodes_csv_path_1, index=False) feat_edata = np.random.rand(num_edges*num_graphs, num_dims) label_edata = np.random.randint(2, size=num_edges*num_graphs) df = pd.DataFrame({'src_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), 'dst_id': np.hstack([np.random.randint(num_nodes, size=num_edges) for _ in range(num_graphs)]), 'label': label_edata, 'feat': [line.tolist() for line in feat_edata], 'graph_id': np.hstack([np.full(num_edges, i) for i in range(num_graphs)]) }) df.to_csv(edges_csv_path_0, index=False) df.to_csv(edges_csv_path_1, index=False) feat_gdata = np.random.rand(num_graphs, num_dims) label_gdata = np.random.randint(2, size=num_graphs) df = pd.DataFrame({'label': label_gdata, 'feat': [line.tolist() for line in feat_gdata], 'graph_id': np.arange(num_graphs) }) df.to_csv(graph_csv_path, index=False) # load CSVDataset with default node/edge/graph_data_parser for force_reload in [True, False]: if not force_reload: # remove original node data file to verify reload from cached files os.remove(nodes_csv_path_0) assert not os.path.exists(nodes_csv_path_0) csv_dataset = data.DGLCSVDataset( test_dir, force_reload=force_reload) assert len(csv_dataset) == num_graphs assert csv_dataset.has_cache() assert len(csv_dataset.data) == 2 assert 'feat' in csv_dataset.data assert 'label' in csv_dataset.data assert F.array_equal(F.tensor(feat_gdata), csv_dataset.data['feat']) for i, (g, label) in enumerate(csv_dataset): assert not g.is_homogeneous assert F.asnumpy(label) == label_gdata[i] for ntype in g.ntypes: assert g.num_nodes(ntype) == num_nodes assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes]), g.nodes[ntype].data['feat']) assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes], F.asnumpy(g.nodes[ntype].data['label'])) for etype in g.etypes: assert g.num_edges(etype) == num_edges assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges]), g.edges[etype].data['feat']) assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges], F.asnumpy(g.edges[etype].data['label']))
def test_convert(): hg = create_test_heterograph() hs = [] for ntype in hg.ntypes: h = F.randn((hg.number_of_nodes(ntype), 5)) hg.nodes[ntype].data['h'] = h hs.append(h) hg.nodes['user'].data['x'] = F.randn((3, 3)) ws = [] for etype in hg.canonical_etypes: w = F.randn((hg.number_of_edges(etype), 5)) hg.edges[etype].data['w'] = w ws.append(w) hg.edges['plays'].data['x'] = F.randn((4, 3)) g = dgl.to_homo(hg) assert F.array_equal(F.cat(hs, dim=0), g.ndata['h']) assert 'x' not in g.ndata assert F.array_equal(F.cat(ws, dim=0), g.edata['w']) assert 'x' not in g.edata src, dst = g.all_edges(order='eid') src = F.asnumpy(src) dst = F.asnumpy(dst) etype_id, eid = F.asnumpy(g.edata[dgl.ETYPE]), F.asnumpy(g.edata[dgl.EID]) ntype_id, nid = F.asnumpy(g.ndata[dgl.NTYPE]), F.asnumpy(g.ndata[dgl.NID]) for i in range(g.number_of_edges()): srctype = hg.ntypes[ntype_id[src[i]]] dsttype = hg.ntypes[ntype_id[dst[i]]] etype = hg.etypes[etype_id[i]] src_i, dst_i = hg.find_edges([eid[i]], (srctype, etype, dsttype)) assert np.asscalar(F.asnumpy(src_i)) == nid[src[i]] assert np.asscalar(F.asnumpy(dst_i)) == nid[dst[i]] mg = nx.MultiDiGraph([('user', 'user', 'follows'), ('user', 'game', 'plays'), ('user', 'game', 'wishes'), ('developer', 'game', 'develops')]) for _mg in [None, mg]: hg2 = dgl.to_hetero(g, ['user', 'game', 'developer'], ['follows', 'plays', 'wishes', 'develops'], ntype_field=dgl.NTYPE, etype_field=dgl.ETYPE, metagraph=_mg) assert set(hg.ntypes) == set(hg2.ntypes) assert set(hg.canonical_etypes) == set(hg2.canonical_etypes) for ntype in hg.ntypes: assert hg.number_of_nodes(ntype) == hg2.number_of_nodes(ntype) assert F.array_equal(hg.nodes[ntype].data['h'], hg2.nodes[ntype].data['h']) for canonical_etype in hg.canonical_etypes: src, dst = hg.all_edges(etype=canonical_etype, order='eid') src2, dst2 = hg2.all_edges(etype=canonical_etype, order='eid') assert F.array_equal(src, src2) assert F.array_equal(dst, dst2) assert F.array_equal(hg.edges[canonical_etype].data['w'], hg2.edges[canonical_etype].data['w']) # hetero_from_homo test case 2 g = dgl.graph([(0, 2), (1, 2), (2, 3), (0, 3)]) g.ndata[dgl.NTYPE] = F.tensor([0, 0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0, 1, 2]) hg = dgl.to_hetero(g, ['l0', 'l1', 'l2'], ['e0', 'e1', 'e2']) assert set(hg.canonical_etypes) == set([('l0', 'e0', 'l1'), ('l1', 'e1', 'l2'), ('l0', 'e2', 'l2')]) assert hg.number_of_nodes('l0') == 2 assert hg.number_of_nodes('l1') == 1 assert hg.number_of_nodes('l2') == 1 assert hg.number_of_edges('e0') == 2 assert hg.number_of_edges('e1') == 1 assert hg.number_of_edges('e2') == 1 # hetero_from_homo test case 3 mg = nx.MultiDiGraph([('user', 'movie', 'watches'), ('user', 'TV', 'watches')]) g = dgl.graph([(0, 1), (0, 2)]) g.ndata[dgl.NTYPE] = F.tensor([0, 1, 2]) g.edata[dgl.ETYPE] = F.tensor([0, 0]) for _mg in [None, mg]: hg = dgl.to_hetero(g, ['user', 'TV', 'movie'], ['watches'], metagraph=_mg) assert set(hg.canonical_etypes) == set([('user', 'watches', 'movie'), ('user', 'watches', 'TV')]) assert hg.number_of_nodes('user') == 1 assert hg.number_of_nodes('TV') == 1 assert hg.number_of_nodes('movie') == 1 assert hg.number_of_edges(('user', 'watches', 'TV')) == 1 assert hg.number_of_edges(('user', 'watches', 'movie')) == 1 assert len(hg.etypes) == 2 # hetero_to_homo test case 2 hg = dgl.bipartite([(0, 0), (1, 1)], card=(2, 3)) g = dgl.to_homo(hg) assert g.number_of_nodes() == 5
def test_to_block(index_dtype): def check(g, bg, ntype, etype, dst_nodes, include_dst_in_src=True): if dst_nodes is not None: assert F.array_equal(bg.dstnodes[ntype].data[dgl.NID], dst_nodes) n_dst_nodes = bg.number_of_nodes('DST/' + ntype) if include_dst_in_src: assert F.array_equal( bg.srcnodes[ntype].data[dgl.NID][:n_dst_nodes], bg.dstnodes[ntype].data[dgl.NID]) g = g[etype] bg = bg[etype] induced_src = bg.srcdata[dgl.NID] induced_dst = bg.dstdata[dgl.NID] induced_eid = bg.edata[dgl.EID] bg_src, bg_dst = bg.all_edges(order='eid') src_ans, dst_ans = g.all_edges(order='eid') induced_src_bg = F.gather_row(induced_src, bg_src) induced_dst_bg = F.gather_row(induced_dst, bg_dst) induced_src_ans = F.gather_row(src_ans, induced_eid) induced_dst_ans = F.gather_row(dst_ans, induced_eid) assert F.array_equal(induced_src_bg, induced_src_ans) assert F.array_equal(induced_dst_bg, induced_dst_ans) def checkall(g, bg, dst_nodes, include_dst_in_src=True): for etype in g.etypes: ntype = g.to_canonical_etype(etype)[2] if dst_nodes is not None and ntype in dst_nodes: check(g, bg, ntype, etype, dst_nodes[ntype], include_dst_in_src) else: check(g, bg, ntype, etype, None, include_dst_in_src) g = dgl.heterograph({ ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)]}, index_dtype=index_dtype) g_a = g['AA'] bg = dgl.to_block(g_a) check(g_a, bg, 'A', 'AA', None) assert bg.number_of_src_nodes() == 5 assert bg.number_of_dst_nodes() == 4 bg = dgl.to_block(g_a, include_dst_in_src=False) check(g_a, bg, 'A', 'AA', None, False) assert bg.number_of_src_nodes() == 4 assert bg.number_of_dst_nodes() == 4 dst_nodes = F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)) bg = dgl.to_block(g_a, dst_nodes) check(g_a, bg, 'A', 'AA', dst_nodes) g_ab = g['AB'] bg = dgl.to_block(g_ab) assert bg._idtype_str == index_dtype assert bg.number_of_nodes('SRC/B') == 4 assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID]) assert bg.number_of_nodes('DST/A') == 0 checkall(g_ab, bg, None) dst_nodes = {'B': F.tensor([5, 6, 3, 1], dtype=getattr(F, index_dtype))} bg = dgl.to_block(g, dst_nodes) assert bg.number_of_nodes('SRC/B') == 4 assert F.array_equal(bg.srcnodes['B'].data[dgl.NID], bg.dstnodes['B'].data[dgl.NID]) assert bg.number_of_nodes('DST/A') == 0 checkall(g, bg, dst_nodes) dst_nodes = {'A': F.tensor([4, 3, 2, 1], dtype=getattr(F, index_dtype)), 'B': F.tensor([3, 5, 6, 1], dtype=getattr(F, index_dtype))} bg = dgl.to_block(g, dst_nodes=dst_nodes) checkall(g, bg, dst_nodes)
def test_to_bidirected(): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 3, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [1.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) u, v = g.edges() ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(g.ndata['h'], bg.ndata['h']) assert F.array_equal(F.cat([g.edata['h'], g.edata['h']], dim=0), bg.edata['h']) bg.ndata['hh'] = F.tensor([[0.], [1.], [2.], [1.]]) assert ('hh' in g.ndata) is False bg.edata['hh'] = F.tensor([[0.], [1.], [2.], [1.], [0.], [1.], [2.], [1.]]) assert ('hh' in g.edata) is False # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False) ub, vb = bg.edges() assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert ('h' in bg.ndata) is False assert ('h' in bg.edata) is False # zero edge graph g = dgl.graph([]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True) # heterogeneous graph g = dgl.heterograph({ ('user', 'wins', 'user'): (F.tensor([0, 2, 0, 2, 2]), F.tensor([1, 1, 2, 1, 0])), ('user', 'plays', 'game'): (F.tensor([1, 2, 1]), F.tensor([2, 1, 1])), ('user', 'follows', 'user'): (F.tensor([1, 2, 1]), F.tensor([0, 0, 0])) }) g.nodes['game'].data['hv'] = F.ones((3, 1)) g.nodes['user'].data['hv'] = F.ones((3, 1)) g.edges['wins'].data['h'] = F.tensor([0, 1, 2, 3, 4]) bg = dgl.to_bidirected(g, copy_ndata=True, copy_edata=True, ignore_bipartite=True) assert F.array_equal(g.nodes['game'].data['hv'], bg.nodes['game'].data['hv']) assert F.array_equal(g.nodes['user'].data['hv'], bg.nodes['user'].data['hv']) u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) assert F.array_equal(F.cat([g.edges['wins'].data['h'], g.edges['wins'].data['h']], dim=0), bg.edges['wins'].data['h']) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb) assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 # donot share ndata and edata bg = dgl.to_bidirected(g, copy_ndata=False, copy_edata=False, ignore_bipartite=True) assert len(bg.edges['wins'].data) == 0 assert len(bg.edges['plays'].data) == 0 assert len(bg.edges['follows'].data) == 0 assert len(bg.nodes['game'].data) == 0 assert len(bg.nodes['user'].data) == 0 u, v = g.all_edges(order='eid', etype=('user', 'wins', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'wins', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'follows', 'user')) ub, vb = bg.all_edges(order='eid', etype=('user', 'follows', 'user')) assert F.array_equal(F.cat([u, v], dim=0), ub) assert F.array_equal(F.cat([v, u], dim=0), vb) u, v = g.all_edges(order='eid', etype=('user', 'plays', 'game')) ub, vb = bg.all_edges(order='eid', etype=('user', 'plays', 'game')) assert F.array_equal(u, ub) assert F.array_equal(v, vb)
def check_basics(g, ig): assert g.number_of_nodes() == ig.number_of_nodes() assert g.number_of_edges() == ig.number_of_edges() edges = g.edges("srcdst") iedges = ig.edges("srcdst") assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) edges = g.edges("eid") iedges = ig.edges("eid") assert F.array_equal(edges[0].tousertensor(), iedges[0].tousertensor()) assert F.array_equal(edges[1].tousertensor(), iedges[1].tousertensor()) assert F.array_equal(edges[2].tousertensor(), iedges[2].tousertensor()) for i in range(g.number_of_nodes()): assert g.has_node(i) == ig.has_node(i) for i in range(g.number_of_nodes()): assert F.array_equal( g.predecessors(i).tousertensor(), ig.predecessors(i).tousertensor()) assert F.array_equal( g.successors(i).tousertensor(), ig.successors(i).tousertensor()) randv = np.random.randint(0, g.number_of_nodes(), 10) randv = utils.toindex(randv) in_src1, in_dst1, in_eids1 = sort_edges(g.in_edges(randv)) in_src2, in_dst2, in_eids2 = sort_edges(ig.in_edges(randv)) nnz = in_src2.shape[0] assert F.array_equal(in_src1, in_src2) assert F.array_equal(in_dst1, in_dst2) assert F.array_equal(in_eids1, in_eids2) out_src1, out_dst1, out_eids1 = sort_edges(g.out_edges(randv)) out_src2, out_dst2, out_eids2 = sort_edges(ig.out_edges(randv)) nnz = out_dst2.shape[0] assert F.array_equal(out_dst1, out_dst2) assert F.array_equal(out_src1, out_src2) assert F.array_equal(out_eids1, out_eids2) num_v = len(randv) assert F.array_equal( g.in_degrees(randv).tousertensor(), ig.in_degrees(randv).tousertensor()) assert F.array_equal( g.out_degrees(randv).tousertensor(), ig.out_degrees(randv).tousertensor()) randv = randv.tousertensor() for v in F.asnumpy(randv): assert g.in_degree(v) == ig.in_degree(v) assert g.out_degree(v) == ig.out_degree(v) for u in F.asnumpy(randv): for v in F.asnumpy(randv): if len(g.edge_id(u, v)) == 1: assert g.edge_id(u, v).tonumpy() == ig.edge_id(u, v).tonumpy() assert g.has_edge_between(u, v) == ig.has_edge_between(u, v) randv = utils.toindex(randv) ids = g.edge_ids(randv, randv)[2].tonumpy() assert sum(ig.edge_ids(randv, randv)[2].tonumpy() == ids, 0) == len(ids) assert sum( g.has_edges_between(randv, randv).tonumpy() == ig.has_edges_between( randv, randv).tonumpy(), 0) == len(randv)
def _check_neighbor_sampling_dataloader(g, nids, dl, mode, collator): seeds = defaultdict(list) for item in dl: if mode == 'node': input_nodes, output_nodes, blocks = item elif mode == 'edge': input_nodes, pair_graph, blocks = item output_nodes = pair_graph.ndata[dgl.NID] elif mode == 'link': input_nodes, pair_graph, neg_graph, blocks = item output_nodes = pair_graph.ndata[dgl.NID] for ntype in pair_graph.ntypes: assert F.array_equal(pair_graph.nodes[ntype].data[dgl.NID], neg_graph.nodes[ntype].data[dgl.NID]) if len(g.ntypes) > 1: for ntype in g.ntypes: assert F.array_equal(input_nodes[ntype], blocks[0].srcnodes[ntype].data[dgl.NID]) assert F.array_equal(output_nodes[ntype], blocks[-1].dstnodes[ntype].data[dgl.NID]) else: assert F.array_equal(input_nodes, blocks[0].srcdata[dgl.NID]) assert F.array_equal(output_nodes, blocks[-1].dstdata[dgl.NID]) prev_dst = {ntype: None for ntype in g.ntypes} for block in blocks: for canonical_etype in block.canonical_etypes: utype, etype, vtype = canonical_etype uu, vv = block.all_edges(order='eid', etype=canonical_etype) src = block.srcnodes[utype].data[dgl.NID] dst = block.dstnodes[vtype].data[dgl.NID] assert F.array_equal( block.srcnodes[utype].data['feat'], g.nodes[utype].data['feat'][src]) assert F.array_equal( block.dstnodes[vtype].data['feat'], g.nodes[vtype].data['feat'][dst]) if prev_dst[utype] is not None: assert F.array_equal(src, prev_dst[utype]) u = src[uu] v = dst[vv] assert F.asnumpy(g.has_edges_between(u, v, etype=canonical_etype)).all() eid = block.edges[canonical_etype].data[dgl.EID] assert F.array_equal( block.edges[canonical_etype].data['feat'], g.edges[canonical_etype].data['feat'][eid]) ufound, vfound = g.find_edges(eid, etype=canonical_etype) assert F.array_equal(ufound, u) assert F.array_equal(vfound, v) for ntype in block.dsttypes: src = block.srcnodes[ntype].data[dgl.NID] dst = block.dstnodes[ntype].data[dgl.NID] assert F.array_equal(src[:block.number_of_dst_nodes(ntype)], dst) prev_dst[ntype] = dst if mode == 'node': for ntype in blocks[-1].dsttypes: seeds[ntype].append(blocks[-1].dstnodes[ntype].data[dgl.NID]) elif mode == 'edge' or mode == 'link': for etype in pair_graph.canonical_etypes: seeds[etype].append(pair_graph.edges[etype].data[dgl.EID]) # Check if all nodes/edges are iterated seeds = {k: F.cat(v, 0) for k, v in seeds.items()} for k, v in seeds.items(): if k in nids: seed_set = set(F.asnumpy(nids[k])) elif isinstance(k, tuple) and k[1] in nids: seed_set = set(F.asnumpy(nids[k[1]])) else: continue v_set = set(F.asnumpy(v)) assert v_set == seed_set
def _assert_is_identical_index(i1, i2): assert i1.slice_data() == i2.slice_data() assert F.array_equal(i1.tousertensor(), i2.tousertensor())
def test_level1(): #edges = { # 'follows': ([0, 1], [1, 2]), # 'plays': ([0, 1, 2, 1], [0, 0, 1, 1]), # 'wishes': ([0, 2], [1, 0]), # 'develops': ([0, 1], [0, 1]), #} g = create_test_heterograph() def rfunc(nodes): return {'y': F.sum(nodes.mailbox['m'], 1)} def rfunc2(nodes): return {'y': F.max(nodes.mailbox['m'], 1)} def mfunc(edges): return {'m': edges.src['h']} def afunc(nodes): return {'y': nodes.data['y'] + 1} g.nodes['user'].data['h'] = F.ones((3, 2)) g.send([2, 3], mfunc, etype='plays') g.recv([0, 1], rfunc, etype='plays') y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) g.nodes['game'].data.pop('y') # only one type play_g = g['plays'] play_g.send([2, 3], mfunc) play_g.recv([0, 1], rfunc) y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) # TODO(minjie): following codes will fail because messages are # not shared with the base graph. However, since send and recv # are rarely used, no fix at the moment. # g['plays'].send([2, 3], mfunc) # g['plays'].recv([0, 1], mfunc) # test fail case # fail due to multiple types fail = False try: g.send([2, 3], mfunc) except dgl.DGLError: fail = True assert fail fail = False try: g.recv([0, 1], rfunc) except dgl.DGLError: fail = True assert fail # test multi recv g.send(g.edges(etype='plays'), mfunc, etype='plays') g.send(g.edges(etype='wishes'), mfunc, etype='wishes') g.multi_recv([0, 1], { 'plays': rfunc, ('user', 'wishes', 'game'): rfunc2 }, 'sum') assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[3., 3.], [3., 3.]])) # test multi recv with apply function g.send(g.edges(etype='plays'), mfunc, etype='plays') g.send(g.edges(etype='wishes'), mfunc, etype='wishes') g.multi_recv([0, 1], { 'plays': (rfunc, afunc), ('user', 'wishes', 'game'): rfunc2 }, 'sum', afunc) assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[5., 5.], [5., 5.]])) # test cross reducer g.nodes['user'].data['h'] = F.randn((3, 2)) for cred in ['sum', 'max', 'min', 'mean']: g.send(g.edges(etype='plays'), mfunc, etype='plays') g.send(g.edges(etype='wishes'), mfunc, etype='wishes') g.multi_recv([0, 1], { 'plays': (rfunc, afunc), 'wishes': rfunc2 }, cred, afunc) y = g.nodes['game'].data['y'] g1 = g['plays'] g2 = g['wishes'] g1.send(g1.edges(), mfunc) g1.recv(g1.nodes('game'), rfunc, afunc) y1 = g.nodes['game'].data['y'] g2.send(g2.edges(), mfunc) g2.recv(g2.nodes('game'), rfunc2) y2 = g.nodes['game'].data['y'] yy = get_redfn(cred)(F.stack([y1, y2], 0), 0) yy = yy + 1 # final afunc assert F.array_equal(y, yy) # test fail case # fail because cannot infer ntype fail = False try: g.multi_recv([0, 1], {'plays': rfunc, 'follows': rfunc2}, 'sum') except dgl.DGLError: fail = True assert fail
def _test_construct_graphs_multiple(): from dgl.data.csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor num_nodes = 100 num_edges = 1000 num_graphs = 10 num_dims = 3 node_ids = np.array([], dtype=np.int) src_ids = np.array([], dtype=np.int) dst_ids = np.array([], dtype=np.int) ngraph_ids = np.array([], dtype=np.int) egraph_ids = np.array([], dtype=np.int) u_indices = np.array([], dtype=np.int) for i in range(num_graphs): l_node_ids = np.random.choice( np.arange(num_nodes*2), size=num_nodes, replace=False) node_ids = np.append(node_ids, l_node_ids) _, l_u_indices = np.unique(l_node_ids, return_index=True) u_indices = np.append(u_indices, l_u_indices) ngraph_ids = np.append(ngraph_ids, np.full(num_nodes, i)) src_ids = np.append(src_ids, np.random.choice( l_node_ids, size=num_edges)) dst_ids = np.append(dst_ids, np.random.choice( l_node_ids, size=num_edges)) egraph_ids = np.append(egraph_ids, np.full(num_edges, i)) ndata = {'feat': np.random.rand(num_nodes*num_graphs, num_dims), 'label': np.random.randint(2, size=num_nodes*num_graphs)} node_data = NodeData(node_ids, ndata, graph_id=ngraph_ids) edata = {'feat': np.random.rand( num_edges*num_graphs, num_dims), 'label': np.random.randint(2, size=num_edges*num_graphs)} edge_data = EdgeData(src_ids, dst_ids, edata, graph_id=egraph_ids) gdata = {'feat': np.random.rand(num_graphs, num_dims), 'label': np.random.randint(2, size=num_graphs)} graph_data = GraphData(np.arange(num_graphs), gdata) graphs, data_dict = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data) assert len(graphs) == num_graphs assert len(data_dict) == len(gdata) for k, v in data_dict.items(): assert F.array_equal(F.tensor(gdata[k]), v) for i, g in enumerate(graphs): assert g.is_homogeneous assert g.num_nodes() == num_nodes assert g.num_edges() == num_edges def assert_data(lhs, rhs, size, node=False): for key, value in lhs.items(): assert key in rhs value = value[i*size:(i+1)*size] if node: indices = u_indices[i*size:(i+1)*size] value = value[indices] assert F.array_equal(F.tensor(value), rhs[key]) assert_data(ndata, g.ndata, num_nodes, node=True) assert_data(edata, g.edata, num_edges) # Graph IDs found in node/edge CSV but not in graph CSV graph_data = GraphData(np.arange(num_graphs-2), {}) expect_except = False try: _, _ = DGLGraphConstructor.construct_graphs( node_data, edge_data, graph_data) except: expect_except = True assert expect_except
def test_level2(): #edges = { # 'follows': ([0, 1], [1, 2]), # 'plays': ([0, 1, 2, 1], [0, 0, 1, 1]), # 'wishes': ([0, 2], [1, 0]), # 'develops': ([0, 1], [0, 1]), #} g = create_test_heterograph() def rfunc(nodes): return {'y': F.sum(nodes.mailbox['m'], 1)} def rfunc2(nodes): return {'y': F.max(nodes.mailbox['m'], 1)} def mfunc(edges): return {'m': edges.src['h']} def afunc(nodes): return {'y': nodes.data['y'] + 1} ############################################################# # send_and_recv ############################################################# g.nodes['user'].data['h'] = F.ones((3, 2)) g.send_and_recv([2, 3], mfunc, rfunc, etype='plays') y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) # only one type g['plays'].send_and_recv([2, 3], mfunc, rfunc) y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) # test fail case # fail due to multiple types fail = False try: g.send_and_recv([2, 3], mfunc, rfunc) except dgl.DGLError: fail = True assert fail # test multi g.multi_send_and_recv( { 'plays': (g.edges(etype='plays'), mfunc, rfunc), ('user', 'wishes', 'game'): (g.edges(etype='wishes'), mfunc, rfunc2) }, 'sum') assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[3., 3.], [3., 3.]])) # test multi g.multi_send_and_recv( { 'plays': (g.edges(etype='plays'), mfunc, rfunc, afunc), ('user', 'wishes', 'game'): (g.edges(etype='wishes'), mfunc, rfunc2) }, 'sum', afunc) assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[5., 5.], [5., 5.]])) # test cross reducer g.nodes['user'].data['h'] = F.randn((3, 2)) for cred in ['sum', 'max', 'min', 'mean']: g.multi_send_and_recv( { 'plays': (g.edges(etype='plays'), mfunc, rfunc, afunc), 'wishes': (g.edges(etype='wishes'), mfunc, rfunc2) }, cred, afunc) y = g.nodes['game'].data['y'] g['plays'].send_and_recv(g.edges(etype='plays'), mfunc, rfunc, afunc) y1 = g.nodes['game'].data['y'] g['wishes'].send_and_recv(g.edges(etype='wishes'), mfunc, rfunc2) y2 = g.nodes['game'].data['y'] yy = get_redfn(cred)(F.stack([y1, y2], 0), 0) yy = yy + 1 # final afunc assert F.array_equal(y, yy) # test fail case # fail because cannot infer ntype fail = False try: g.multi_send_and_recv( { 'plays': (g.edges(etype='plays'), mfunc, rfunc), 'follows': (g.edges(etype='follows'), mfunc, rfunc2) }, 'sum') except dgl.DGLError: fail = True assert fail g.nodes['game'].data.clear() ############################################################# # pull ############################################################# g.nodes['user'].data['h'] = F.ones((3, 2)) g.pull(1, mfunc, rfunc, etype='plays') y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) # only one type g['plays'].pull(1, mfunc, rfunc) y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[0., 0.], [2., 2.]])) # test fail case fail = False try: g.pull(1, mfunc, rfunc) except dgl.DGLError: fail = True assert fail # test multi g.multi_pull(1, { 'plays': (mfunc, rfunc), ('user', 'wishes', 'game'): (mfunc, rfunc2) }, 'sum') assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[0., 0.], [3., 3.]])) # test multi g.multi_pull( 1, { 'plays': (mfunc, rfunc, afunc), ('user', 'wishes', 'game'): (mfunc, rfunc2) }, 'sum', afunc) assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[0., 0.], [5., 5.]])) # test cross reducer g.nodes['user'].data['h'] = F.randn((3, 2)) for cred in ['sum', 'max', 'min', 'mean']: g.multi_pull(1, { 'plays': (mfunc, rfunc, afunc), 'wishes': (mfunc, rfunc2) }, cred, afunc) y = g.nodes['game'].data['y'] g['plays'].pull(1, mfunc, rfunc, afunc) y1 = g.nodes['game'].data['y'] g['wishes'].pull(1, mfunc, rfunc2) y2 = g.nodes['game'].data['y'] g.nodes['game'].data['y'] = get_redfn(cred)(F.stack([y1, y2], 0), 0) g.apply_nodes(afunc, 1, ntype='game') yy = g.nodes['game'].data['y'] assert F.array_equal(y, yy) # test fail case # fail because cannot infer ntype fail = False try: g.multi_pull(1, { 'plays': (mfunc, rfunc), 'follows': (mfunc, rfunc2) }, 'sum') except dgl.DGLError: fail = True assert fail g.nodes['game'].data.clear() ############################################################# # update_all ############################################################# g.nodes['user'].data['h'] = F.ones((3, 2)) g.update_all(mfunc, rfunc, etype='plays') y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[2., 2.], [2., 2.]])) # only one type g['plays'].update_all(mfunc, rfunc) y = g.nodes['game'].data['y'] assert F.array_equal(y, F.tensor([[2., 2.], [2., 2.]])) # test fail case # fail due to multiple types fail = False try: g.update_all(mfunc, rfunc) except dgl.DGLError: fail = True assert fail # test multi g.multi_update_all( { 'plays': (mfunc, rfunc), ('user', 'wishes', 'game'): (mfunc, rfunc2) }, 'sum') assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[3., 3.], [3., 3.]])) # test multi g.multi_update_all( { 'plays': (mfunc, rfunc, afunc), ('user', 'wishes', 'game'): (mfunc, rfunc2) }, 'sum', afunc) assert F.array_equal(g.nodes['game'].data['y'], F.tensor([[5., 5.], [5., 5.]])) # test cross reducer g.nodes['user'].data['h'] = F.randn((3, 2)) for cred in ['sum', 'max', 'min', 'mean', 'stack']: g.multi_update_all( { 'plays': (mfunc, rfunc, afunc), 'wishes': (mfunc, rfunc2) }, cred, afunc) y = g.nodes['game'].data['y'] g['plays'].update_all(mfunc, rfunc, afunc) y1 = g.nodes['game'].data['y'] g['wishes'].update_all(mfunc, rfunc2) y2 = g.nodes['game'].data['y'] if cred == 'stack': # stack has two both correct outcomes yy1 = F.stack([F.unsqueeze(y1, 1), F.unsqueeze(y2, 1)], 1) yy1 = yy1 + 1 # final afunc yy2 = F.stack([F.unsqueeze(y2, 1), F.unsqueeze(y1, 1)], 1) yy2 = yy2 + 1 # final afunc assert F.array_equal(y, yy1) or F.array_equal(y, yy2) else: yy = get_redfn(cred)(F.stack([y1, y2], 0), 0) yy = yy + 1 # final afunc assert F.array_equal(y, yy) # test fail case # fail because cannot infer ntype fail = False try: g.update_all({ 'plays': (mfunc, rfunc), 'follows': (mfunc, rfunc2) }, 'sum') except dgl.DGLError: fail = True assert fail g.nodes['game'].data.clear()
def check_negative_sampler(mode, exclude_positive, neg_size): g = generate_rand_graph(100) num_edges = g.number_of_edges() etype = np.random.randint(0, 10, size=g.number_of_edges(), dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid') pos_map = {} for i in range(len(pos_geid)): pos_d = int(F.asnumpy(pos_gdst[i])) pos_e = int(F.asnumpy(pos_geid[i])) pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i])) EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') # Test the homogeneous graph. batch_size = 50 total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, negative_mode=mode, reset=False, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all', order='eid') assert_array_equal( F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)), F.asnumpy( g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc), F.gather_row(pos_edges.parent_nid, pos_ldst)))) neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) for i in range(len(neg_eid)): neg_d = int(F.asnumpy(neg_dst)[i]) neg_e = int(F.asnumpy(neg_eid)[i]) assert (neg_d, neg_e) in pos_map if exclude_positive: assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)] check_head_tail(neg_edges) pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid) neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid) pos_tails = np.sort(F.asnumpy(pos_tails)) neg_tails = np.sort(F.asnumpy(neg_tails)) np.testing.assert_equal(pos_tails, neg_tails) exist = neg_edges.edata['false_neg'] if exclude_positive: assert np.sum(F.asnumpy(exist) == 0) == len(exist) else: assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist) total_samples += batch_size assert total_samples <= num_edges # check replacement = True # with reset = False (default setting) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=False, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = False # with reset = False (default setting) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=False, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = True # with reset = True total_samples = 0 max_samples = 2 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) <= batch_size total_samples += len(pos_leid) if (total_samples >= max_samples): break assert total_samples >= max_samples # check replacement = False # with reset = True total_samples = 0 max_samples = 2 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) <= batch_size total_samples += len(pos_leid) if (total_samples >= max_samples): break assert total_samples >= max_samples # Test the knowledge graph. total_samples = 0 for _, neg_edges in EdgeSampler(g, batch_size, negative_mode=mode, reset=False, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges
def test_to_block(): def check(g, bg, ntype, etype, rhs_nodes): if rhs_nodes is not None: assert F.array_equal(bg.nodes[ntype + '_r'].data[dgl.NID], rhs_nodes) n_rhs_nodes = bg.number_of_nodes(ntype + '_r') assert F.array_equal( bg.nodes[ntype + '_l'].data[dgl.NID][:n_rhs_nodes], bg.nodes[ntype + '_r'].data[dgl.NID]) g = g[etype] bg = bg[etype] induced_src = bg.srcdata[dgl.NID] induced_dst = bg.dstdata[dgl.NID] induced_eid = bg.edata[dgl.EID] bg_src, bg_dst = bg.all_edges(order='eid') src_ans, dst_ans = g.all_edges(order='eid') induced_src_bg = F.gather_row(induced_src, bg_src) induced_dst_bg = F.gather_row(induced_dst, bg_dst) induced_src_ans = F.gather_row(src_ans, induced_eid) induced_dst_ans = F.gather_row(dst_ans, induced_eid) assert F.array_equal(induced_src_bg, induced_src_ans) assert F.array_equal(induced_dst_bg, induced_dst_ans) def checkall(g, bg, rhs_nodes): for etype in g.etypes: ntype = g.to_canonical_etype(etype)[2] if rhs_nodes is not None and ntype in rhs_nodes: check(g, bg, ntype, etype, rhs_nodes[ntype]) else: check(g, bg, ntype, etype, None) g = dgl.heterograph({ ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)]}) g_a = g['AA'] bg = dgl.to_block(g_a) check(g_a, bg, 'A', 'AA', None) rhs_nodes = F.tensor([3, 4], dtype=F.int64) bg = dgl.to_block(g_a, rhs_nodes) check(g_a, bg, 'A', 'AA', rhs_nodes) rhs_nodes = F.tensor([4, 3, 2, 1], dtype=F.int64) bg = dgl.to_block(g_a, rhs_nodes) check(g_a, bg, 'A', 'AA', rhs_nodes) g_ab = g['AB'] bg = dgl.to_block(g_ab) assert bg.number_of_nodes('B_l') == 4 assert F.array_equal(bg.nodes['B_l'].data[dgl.NID], bg.nodes['B_r'].data[dgl.NID]) assert bg.number_of_nodes('A_r') == 0 checkall(g_ab, bg, None) rhs_nodes = {'B': F.tensor([5, 6], dtype=F.int64)} bg = dgl.to_block(g, rhs_nodes) assert bg.number_of_nodes('B_l') == 2 assert F.array_equal(bg.nodes['B_l'].data[dgl.NID], bg.nodes['B_r'].data[dgl.NID]) assert bg.number_of_nodes('A_r') == 0 checkall(g, bg, rhs_nodes) rhs_nodes = {'A': F.tensor([3, 4], dtype=F.int64), 'B': F.tensor([5, 6], dtype=F.int64)} bg = dgl.to_block(g, rhs_nodes) checkall(g, bg, rhs_nodes) rhs_nodes = {'A': F.tensor([4, 3, 2, 1], dtype=F.int64), 'B': F.tensor([3, 5, 6, 1], dtype=F.int64)} bg = dgl.to_block(g, rhs_nodes=rhs_nodes) checkall(g, bg, rhs_nodes)
def check_weighted_negative_sampler(mode, exclude_positive, neg_size): g = generate_rand_graph(100) num_edges = g.number_of_edges() num_nodes = g.number_of_nodes() edge_weight = F.copy_to( F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu()) node_weight = F.copy_to( F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu()) etype = np.random.randint(0, 10, size=num_edges, dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid') pos_map = {} for i in range(len(pos_geid)): pos_d = int(F.asnumpy(pos_gdst[i])) pos_e = int(F.asnumpy(pos_geid[i])) pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i])) EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') # Correctness check # Test the homogeneous graph. batch_size = 50 # Test the knowledge graph with edge weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all', order='eid') assert_array_equal( F.asnumpy(F.gather_row(pos_edges.parent_eid, pos_leid)), F.asnumpy( g.edge_ids(F.gather_row(pos_edges.parent_nid, pos_lsrc), F.gather_row(pos_edges.parent_nid, pos_ldst)))) neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) for i in range(len(neg_eid)): neg_d = int(F.asnumpy(neg_dst[i])) neg_e = int(F.asnumpy(neg_eid[i])) assert (neg_d, neg_e) in pos_map if exclude_positive: assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)] check_head_tail(neg_edges) pos_tails = F.gather_row(pos_edges.parent_nid, pos_edges.tail_nid) neg_tails = F.gather_row(neg_edges.parent_nid, neg_edges.tail_nid) pos_tails = np.sort(F.asnumpy(pos_tails)) neg_tails = np.sort(F.asnumpy(neg_tails)) np.testing.assert_equal(pos_tails, neg_tails) exist = neg_edges.edata['false_neg'] if exclude_positive: assert np.sum(F.asnumpy(exist) == 0) == len(exist) else: assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist) total_samples += batch_size assert total_samples <= num_edges # Test the knowledge graph with edge weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges # Test the knowledge graph with edge/node weight provied. total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, reset=False, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) neg_eid = F.gather_row(neg_edges.parent_eid, neg_leid) exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = F.gather_row(g.edata['etype'], neg_eid) for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist) total_samples += batch_size assert total_samples <= num_edges # check replacement = True with pos edges no-uniform sample # with reset = False total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=False, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = True with pos edges no-uniform sample # with reset = True total_samples = 0 max_samples = 4 * num_edges for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, reset=True, edge_weight=edge_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) if total_samples >= max_samples: break assert total_samples == max_samples # check replacement = False with pos/neg edges no-uniform sample # reset = False total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=False, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) assert total_samples == num_edges # check replacement = False with pos/neg edges no-uniform sample # reset = True total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=False, reset=True, edge_weight=edge_weight, node_weight=node_weight, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') assert len(pos_leid) == batch_size total_samples += len(pos_leid) if total_samples >= max_samples: break assert total_samples == max_samples # Check Rate dgl.random.seed(0) g = generate_rand_graph(1000) num_edges = g.number_of_edges() num_nodes = g.number_of_nodes() edge_weight = F.copy_to( F.tensor(np.full((num_edges, ), 1, dtype=np.float32)), F.cpu()) edge_weight[0] = F.sum(edge_weight, dim=0) node_weight = F.copy_to( F.tensor(np.full((num_nodes, ), 1, dtype=np.float32)), F.cpu()) node_weight[-1] = F.sum(node_weight, dim=0) / 200 etype = np.random.randint(0, 20, size=num_edges, dtype=np.int64) g.edata['etype'] = F.copy_to(F.tensor(etype), F.cpu()) # Test w/o node weight. max_samples = num_edges // 5 total_samples = 0 # Test the knowledge graph with edge weight provied. edge_sampled = np.full((num_edges, ), 0, dtype=np.int32) node_sampled = np.full((num_nodes, ), 0, dtype=np.int32) for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, edge_weight=edge_weight, shuffle=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=False, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid') if 'head' in mode: neg_src = neg_edges.parent_nid[neg_lsrc] np.add.at(node_sampled, F.asnumpy(neg_src), 1) else: neg_dst = neg_edges.parent_nid[neg_ldst] np.add.at(node_sampled, F.asnumpy(neg_dst), 1) np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1) total_samples += batch_size if total_samples > max_samples: break # Check rate here edge_rate_0 = edge_sampled[0] / edge_sampled.sum() edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum() edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum() assert np.allclose(edge_rate_0, 0.5, atol=0.05) assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05) node_rate_0 = node_sampled[0] / node_sampled.sum() node_tail_half_cnt = node_sampled[node_sampled.shape[0] // 2:-1].sum() node_rate_tail_half = node_tail_half_cnt / node_sampled.sum() assert node_rate_0 < 0.02 assert np.allclose(node_rate_tail_half, 0.5, atol=0.02) # Test the knowledge graph with edge/node weight provied. edge_sampled = np.full((num_edges, ), 0, dtype=np.int32) node_sampled = np.full((num_nodes, ), 0, dtype=np.int32) total_samples = 0 for pos_edges, neg_edges in EdgeSampler(g, batch_size, replacement=True, edge_weight=edge_weight, node_weight=node_weight, shuffle=True, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=False, relations=g.edata['etype'], return_false_neg=True): _, _, pos_leid = pos_edges.all_edges(form='all', order='eid') neg_lsrc, neg_ldst, _ = neg_edges.all_edges(form='all', order='eid') if 'head' in mode: neg_src = F.gather_row(neg_edges.parent_nid, neg_lsrc) np.add.at(node_sampled, F.asnumpy(neg_src), 1) else: neg_dst = F.gather_row(neg_edges.parent_nid, neg_ldst) np.add.at(node_sampled, F.asnumpy(neg_dst), 1) np.add.at(edge_sampled, F.asnumpy(pos_edges.parent_eid[pos_leid]), 1) total_samples += batch_size if total_samples > max_samples: break # Check rate here edge_rate_0 = edge_sampled[0] / edge_sampled.sum() edge_tail_half_cnt = edge_sampled[edge_sampled.shape[0] // 2:-1].sum() edge_rate_tail_half = edge_tail_half_cnt / edge_sampled.sum() assert np.allclose(edge_rate_0, 0.5, atol=0.05) assert np.allclose(edge_rate_tail_half, 0.25, atol=0.05) node_rate = node_sampled[-1] / node_sampled.sum() node_rate_a = np.average(node_sampled[:50]) / node_sampled.sum() node_rate_b = np.average(node_sampled[50:100]) / node_sampled.sum() # As neg sampling does not contain duplicate nodes, # this test takes some acceptable variation on the sample rate. assert np.allclose(node_rate, node_rate_a * 5, atol=0.002) assert np.allclose(node_rate_a, node_rate_b, atol=0.0002)
def test_out_subgraph(idtype): hg = dgl.heterograph( { ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]), ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]), ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0]) }, idtype=idtype) subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1, 0), (0, 1), (0, 2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (0, 1), (1, 2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0, 0), (1, 0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID]) for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test store_ids subg = dgl.out_subgraph(hg, {'user': [0, 1], 'game': 0}, store_ids=False) for etype in subg.canonical_etypes: assert dgl.EID not in subg.edges[etype].data for ntype in subg.ntypes: assert dgl.NID not in subg.nodes[ntype].data # Test relabel nodes subg = dgl.out_subgraph(hg, {'user': [1], 'game': 0}, relabel_nodes=True) assert subg.idtype == idtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['follow'].edge_ids(old_u, old_v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['game'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 2)} assert F.array_equal(hg['play'].edge_ids(old_u, old_v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() old_u = F.gather_row(subg.nodes['game'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['user'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(0, 0)} assert F.array_equal(hg['liked-by'].edge_ids(old_u, old_v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() old_u = F.gather_row(subg.nodes['user'].data[dgl.NID], u) old_v = F.gather_row(subg.nodes['coin'].data[dgl.NID], v) edge_set = set(zip(list(F.asnumpy(old_u)), list(F.asnumpy(old_v)))) assert edge_set == {(1, 0)} assert F.array_equal(hg['flips'].edge_ids(old_u, old_v), subg['flips'].edata[dgl.EID]) assert subg.num_nodes('user') == 2 assert subg.num_nodes('game') == 2 assert subg.num_nodes('coin') == 1
def test_copy(): num_layers = 2 g = generate_rand_graph(100) g.ndata['h'] = g.ndata['h1'] nf = create_mini_batch(g, num_layers) nf.copy_from_parent() for i in range(nf.num_layers): assert len(g.ndata.keys()) == len(nf.layers[i].data.keys()) for key in g.ndata.keys(): assert key in nf.layers[i].data.keys() assert F.array_equal(nf.layers[i].data[key], g.ndata[key][nf.layer_parent_nid(i)]) for i in range(nf.num_blocks): assert len(g.edata.keys()) == len(nf.blocks[i].data.keys()) for key in g.edata.keys(): assert key in nf.blocks[i].data.keys() assert F.array_equal(nf.blocks[i].data[key], g.edata[key][nf.block_parent_eid(i)]) nf = create_mini_batch(g, num_layers) node_embed_names = [['h'], ['h1'], ['h']] edge_embed_names = [['h2'], ['h2']] nf.copy_from_parent(node_embed_names=node_embed_names, edge_embed_names=edge_embed_names) for i in range(nf.num_layers): assert len(node_embed_names[i]) == len(nf.layers[i].data.keys()) for key in node_embed_names[i]: assert key in nf.layers[i].data.keys() assert F.array_equal(nf.layers[i].data[key], g.ndata[key][nf.layer_parent_nid(i)]) for i in range(nf.num_blocks): assert len(edge_embed_names[i]) == len(nf.blocks[i].data.keys()) for key in edge_embed_names[i]: assert key in nf.blocks[i].data.keys() assert F.array_equal(nf.blocks[i].data[key], g.edata[key][nf.block_parent_eid(i)]) nf = create_mini_batch(g, num_layers) g.ndata['h0'] = F.clone(g.ndata['h']) node_embed_names = [['h0'], [], []] nf.copy_from_parent(node_embed_names=node_embed_names, edge_embed_names=None) for i in range(num_layers): nf.block_compute(i, fn.copy_src(src='h%d' % i, out='m'), fn.sum(msg='m', out='t'), lambda nodes: {'h%d' % (i + 1): nodes.data['t'] + 1}) g.update_all(fn.copy_src(src='h', out='m'), fn.sum(msg='m', out='t'), lambda nodes: {'h': nodes.data['t'] + 1}) assert F.array_equal(nf.layers[i + 1].data['h%d' % (i + 1)], g.ndata['h'][nf.layer_parent_nid(i + 1)]) nf.copy_to_parent(node_embed_names=[['h0'], ['h1'], ['h2']]) for i in range(num_layers + 1): assert F.array_equal(nf.layers[i].data['h%d' % i], g.ndata['h%d' % i][nf.layer_parent_nid(i)]) nf = create_mini_batch(g, num_layers) g.ndata['h0'] = F.clone(g.ndata['h']) g.ndata['h1'] = F.clone(g.ndata['h']) g.ndata['h2'] = F.clone(g.ndata['h']) node_embed_names = [['h0'], ['h1'], ['h2']] nf.copy_from_parent(node_embed_names=node_embed_names, edge_embed_names=None) def msg_func(edge, ind): assert 'h%d' % ind in edge.src.keys() return {'m': edge.src['h%d' % ind]} def reduce_func(node, ind): assert 'h%d' % (ind + 1) in node.data.keys() return { 'h': F.sum(node.mailbox['m'], 1) + node.data['h%d' % (ind + 1)] } for i in range(num_layers): nf.block_compute(i, partial(msg_func, ind=i), partial(reduce_func, ind=i))
def atest_nx_conversion(index_dtype): # check conversion between networkx and DGLGraph def _check_nx_feature(nxg, nf, ef): # check node and edge feature of nxg # this is used to check to_networkx num_nodes = len(nxg) num_edges = nxg.size() if num_nodes > 0: node_feat = ddict(list) for nid, attr in nxg.nodes(data=True): assert len(attr) == len(nf) for k in nxg.nodes[nid]: node_feat[k].append(F.unsqueeze(attr[k], 0)) for k in node_feat: feat = F.cat(node_feat[k], 0) assert F.allclose(feat, nf[k]) else: assert len(nf) == 0 if num_edges > 0: edge_feat = ddict(lambda: [0] * num_edges) for u, v, attr in nxg.edges(data=True): assert len(attr) == len(ef) + 1 # extra id eid = attr['id'] for k in ef: edge_feat[k][eid] = F.unsqueeze(attr[k], 0) for k in edge_feat: feat = F.cat(edge_feat[k], 0) assert F.allclose(feat, ef[k]) else: assert len(ef) == 0 n1 = F.randn((5, 3)) n2 = F.randn((5, 10)) n3 = F.randn((5, 4)) e1 = F.randn((4, 5)) e2 = F.randn((4, 7)) g = dgl.graph([(0, 2), (1, 4), (3, 0), (4, 3)], index_dtype=index_dtype) g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3}) g.edata.update({'e1': e1, 'e2': e2}) # convert to networkx nxg = dgl.to_networkx(g, node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2']) assert len(nxg) == 5 assert nxg.size() == 4 _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2}) # convert to DGLGraph, nx graph has id in edge feature # use id feature to test non-tensor copy g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'], index_dtype=index_dtype) assert g._idtype_str == index_dtype # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features # test with existing dglgraph (so existing features should be cleared) assert len(g.ndata) == 1 assert len(g.edata) == 2 # check feature values assert F.allclose(g.ndata['n1'], n1) # with id in nx edge feature, e1 should follow original order assert F.allclose(g.edata['e1'], e1) assert F.array_equal(g.edata['id'], F.copy_to(F.arange(0, 4), F.cpu())) # test conversion after modifying DGLGraph # TODO(minjie): enable after mutation is supported #g.pop_e_repr('id') # pop id so we don't need to provide id when adding edges #new_n = F.randn((2, 3)) #new_e = F.randn((3, 5)) #g.add_nodes(2, data={'n1': new_n}) ## add three edges, one is a multi-edge #g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e}) #n1 = F.cat((n1, new_n), 0) #e1 = F.cat((e1, new_e), 0) ## convert to networkx again #nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1']) #assert len(nxg) == 7 #assert nxg.size() == 7 #_check_nx_feature(nxg, {'n1': n1}, {'e1': e1}) # now test convert from networkx without id in edge feature # first pop id in edge feature for _, _, attr in nxg.edges(data=True): attr.pop('id') # test with a new graph g = dgl.graph(nxg, node_attrs=['n1'], edge_attrs=['e1']) # check graph size assert g.number_of_nodes() == 5 assert g.number_of_edges() == 4 # check number of features assert len(g.ndata) == 1 assert len(g.edata) == 1 # check feature values assert F.allclose(g.ndata['n1'], n1) # edge feature order follows nxg.edges() edge_feat = [] for _, _, attr in nxg.edges(data=True): edge_feat.append(F.unsqueeze(attr['e1'], 0)) edge_feat = F.cat(edge_feat, 0) assert F.allclose(g.edata['e1'], edge_feat) # Test converting from a networkx graph whose nodes are # not labeled with consecutive-integers. nxg = nx.cycle_graph(5) nxg.remove_nodes_from([0, 4]) for u in nxg.nodes(): nxg.nodes[u]['h'] = F.tensor([u]) for u, v, d in nxg.edges(data=True): d['h'] = F.tensor([u, v]) g = dgl.DGLGraph() g.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h']) assert g.number_of_nodes() == 3 assert g.number_of_edges() == 4 assert g.has_edge_between(0, 1) assert g.has_edge_between(1, 2) assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]])) assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.], [2., 3.], [2., 3.]]))
def check_negative_sampler(mode, exclude_positive, neg_size): g = generate_rand_graph(100) etype = np.random.randint(0, 10, size=g.number_of_edges(), dtype=np.int64) g.edata['etype'] = F.tensor(etype) pos_gsrc, pos_gdst, pos_geid = g.all_edges(form='all', order='eid') pos_map = {} for i in range(len(pos_geid)): pos_d = int(F.asnumpy(pos_gdst[i])) pos_e = int(F.asnumpy(pos_geid[i])) pos_map[(pos_d, pos_e)] = int(F.asnumpy(pos_gsrc[i])) EdgeSampler = getattr(dgl.contrib.sampling, 'EdgeSampler') # Test the homogeneous graph. for pos_edges, neg_edges in EdgeSampler(g, 50, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, return_false_neg=True): pos_lsrc, pos_ldst, pos_leid = pos_edges.all_edges(form='all', order='eid') assert_array_equal( F.asnumpy(pos_edges.parent_eid[pos_leid]), F.asnumpy( g.edge_ids(pos_edges.parent_nid[pos_lsrc], pos_edges.parent_nid[pos_ldst]))) neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = neg_edges.parent_nid[neg_lsrc] neg_dst = neg_edges.parent_nid[neg_ldst] neg_eid = neg_edges.parent_eid[neg_leid] for i in range(len(neg_eid)): neg_d = int(F.asnumpy(neg_dst[i])) neg_e = int(F.asnumpy(neg_eid[i])) assert (neg_d, neg_e) in pos_map if exclude_positive: assert int(F.asnumpy(neg_src[i])) != pos_map[(neg_d, neg_e)] exist = neg_edges.edata['false_neg'] if exclude_positive: assert np.sum(F.asnumpy(exist) == 0) == len(exist) else: assert F.array_equal(g.has_edges_between(neg_src, neg_dst), exist) # Test the knowledge graph. for _, neg_edges in EdgeSampler(g, 50, negative_mode=mode, neg_sample_size=neg_size, exclude_positive=exclude_positive, relations=g.edata['etype'], return_false_neg=True): neg_lsrc, neg_ldst, neg_leid = neg_edges.all_edges(form='all', order='eid') neg_src = neg_edges.parent_nid[neg_lsrc] neg_dst = neg_edges.parent_nid[neg_ldst] neg_eid = neg_edges.parent_eid[neg_leid] exists = neg_edges.edata['false_neg'] neg_edges.edata['etype'] = g.edata['etype'][neg_eid] for i in range(len(neg_eid)): u, v = F.asnumpy(neg_src[i]), F.asnumpy(neg_dst[i]) if g.has_edge_between(u, v): eid = g.edge_id(u, v) etype = g.edata['etype'][eid] exist = neg_edges.edata['etype'][i] == etype assert F.asnumpy(exists[i]) == F.asnumpy(exist)
def test_to_simple(index_dtype): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) sg, wb = dgl.to_simple(g, writeback_mapping=True) u, v = g.all_edges(form='uv', order='eid') u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb) su, sv = sg.all_edges(form='uv', order='eid') su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sc = F.asnumpy(sg.edata['count']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sc[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.ndata['h'], g.ndata['h']) assert 'h' not in sg.edata # new ndata to sg sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]]) assert 'hh' not in g.ndata sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) assert 'h' not in sg.ndata assert 'h' not in sg.edata # heterogeneous graph g = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4]), ('user', 'plays', 'game'): ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3])}, index_dtype=index_dtype) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4]) g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) sg, wb = dgl.to_simple(g, return_counts='weights', writeback_mapping=True, copy_edata=True) g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) for etype in g.canonical_etypes: u, v = g.all_edges(form='uv', order='eid', etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb[etype]) su, sv = sg.all_edges(form='uv', order='eid', etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data['weights']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.nodes['user'].data['hh'], g.nodes['user'].data['hh']) assert 'h' not in sg.nodes['game'].data # new ndata to sg sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert 'hhh' not in g.nodes['user'].data # share edata feat_idx = F.asnumpy(wb[('user', 'follow', 'user')]) _, indices = np.unique(feat_idx, return_index=True) assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']), F.asnumpy(g.edges['follow'].data['h'])[indices]) sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert 'h' not in sg.nodes['user'].data assert 'hh' not in sg.nodes['user'].data
def test_view1(): # test relation view HG = create_test_heterograph() ntypes = ['user', 'game', 'developer'] canonical_etypes = [('user', 'follows', 'user'), ('user', 'plays', 'game'), ('user', 'wishes', 'game'), ('developer', 'develops', 'game')] etypes = ['follows', 'plays', 'wishes', 'develops'] def _test_query(): for etype in etypes: utype, _, vtype = HG.to_canonical_etype(etype) g = HG[etype] srcs, dsts = edges[etype] for src, dst in zip(srcs, dsts): assert g.has_edge_between(src, dst) assert F.asnumpy(g.has_edges_between(srcs, dsts)).all() srcs, dsts = negative_edges[etype] for src, dst in zip(srcs, dsts): assert not g.has_edge_between(src, dst) assert not F.asnumpy(g.has_edges_between(srcs, dsts)).any() srcs, dsts = edges[etype] n_edges = len(srcs) # predecessors & in_edges & in_degree pred = [s for s, d in zip(srcs, dsts) if d == 0] assert set(F.asnumpy(g.predecessors(0)).tolist()) == set(pred) u, v = g.in_edges([0]) assert F.asnumpy(v).tolist() == [0] * len(pred) assert set(F.asnumpy(u).tolist()) == set(pred) assert g.in_degree(0) == len(pred) # successors & out_edges & out_degree succ = [d for s, d in zip(srcs, dsts) if s == 0] assert set(F.asnumpy(g.successors(0)).tolist()) == set(succ) u, v = g.out_edges([0]) assert F.asnumpy(u).tolist() == [0] * len(succ) assert set(F.asnumpy(v).tolist()) == set(succ) assert g.out_degree(0) == len(succ) # edge_id & edge_ids for i, (src, dst) in enumerate(zip(srcs, dsts)): assert g.edge_id(src, dst) == i assert F.asnumpy(g.edge_id(src, dst, force_multi=True)).tolist() == [i] assert F.asnumpy(g.edge_ids(srcs, dsts)).tolist() == list(range(n_edges)) u, v, e = g.edge_ids(srcs, dsts, force_multi=True) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts assert F.asnumpy(e).tolist() == list(range(n_edges)) # find_edges u, v = g.find_edges(list(range(n_edges))) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts # all_edges. for order in ['eid']: u, v, e = g.all_edges(form='all', order=order) assert F.asnumpy(u).tolist() == srcs assert F.asnumpy(v).tolist() == dsts assert F.asnumpy(e).tolist() == list(range(n_edges)) # in_degrees & out_degrees in_degrees = F.asnumpy(g.in_degrees()) out_degrees = F.asnumpy(g.out_degrees()) src_count = Counter(srcs) dst_count = Counter(dsts) for i in range(g.number_of_nodes(utype)): assert out_degrees[i] == src_count[i] for i in range(g.number_of_nodes(vtype)): assert in_degrees[i] == dst_count[i] edges = { 'follows': ([0, 1], [1, 2]), 'plays': ([0, 1, 2, 1], [0, 0, 1, 1]), 'wishes': ([0, 2], [1, 0]), 'develops': ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { 'follows': ([0, 1], [0, 1]), 'plays': ([0, 2], [1, 0]), 'wishes': ([0, 1], [0, 1]), 'develops': ([0, 1], [1, 0]), } _test_query() etypes = canonical_etypes edges = { ('user', 'follows', 'user'): ([0, 1], [1, 2]), ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ('user', 'wishes', 'game'): ([0, 2], [1, 0]), ('developer', 'develops', 'game'): ([0, 1], [0, 1]), } # edges that does not exist in the graph negative_edges = { ('user', 'follows', 'user'): ([0, 1], [0, 1]), ('user', 'plays', 'game'): ([0, 2], [1, 0]), ('user', 'wishes', 'game'): ([0, 1], [0, 1]), ('developer', 'develops', 'game'): ([0, 1], [1, 0]), } _test_query() # test features HG.nodes['user'].data['h'] = F.ones((HG.number_of_nodes('user'), 5)) HG.nodes['game'].data['m'] = F.ones((HG.number_of_nodes('game'), 3)) * 2 # test only one node type g = HG['follows'] assert g.number_of_nodes() == 3 # test ndata and edata f1 = F.randn((3, 6)) g.ndata['h'] = f1 # ok f2 = HG.nodes['user'].data['h'] assert F.array_equal(f1, f2) assert F.array_equal(F.tensor(g.nodes()), F.arange(0, 3)) f3 = F.randn((2, 4)) g.edata['h'] = f3 f4 = HG.edges['follows'].data['h'] assert F.array_equal(f3, f4) assert F.array_equal(F.tensor(g.edges(form='eid')), F.arange(0, 2)) # test fail case # fail due to multiple types fail = False try: HG.ndata['h'] except dgl.DGLError: fail = True assert fail fail = False try: HG.edata['h'] except dgl.DGLError: fail = True assert fail
def test_reverse(): g = dgl.DGLGraph() g.add_nodes(5) # The graph need not to be completely connected. g.add_edges([0, 1, 2], [1, 2, 1]) g.ndata['h'] = F.tensor([[0.], [1.], [2.], [3.], [4.]]) g.edata['h'] = F.tensor([[5.], [6.], [7.]]) rg = g.reverse() assert g.is_multigraph == rg.is_multigraph assert g.number_of_nodes() == rg.number_of_nodes() assert g.number_of_edges() == rg.number_of_edges() assert F.allclose(F.astype(rg.has_edges_between( [1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,))) assert g.edge_id(0, 1) == rg.edge_id(1, 0) assert g.edge_id(1, 2) == rg.edge_id(2, 1) assert g.edge_id(2, 1) == rg.edge_id(1, 2) # test dgl.reverse_heterograph # test homogeneous graph g = dgl.graph((F.tensor([0, 1, 2]), F.tensor([1, 2, 0]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.]]) g_r = dgl.reverse_heterograph(g) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() u_g, v_g, eids_g = g.all_edges(form='all') u_rg, v_rg, eids_rg = g_r.all_edges(form='all') assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert len(g_r.edata) == 0 # without share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert len(g_r.ndata) == 0 assert len(g_r.edata) == 0 # with share ndata and edata g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) assert g.number_of_nodes() == g_r.number_of_nodes() assert g.number_of_edges() == g_r.number_of_edges() assert F.array_equal(g.ndata['h'], g_r.ndata['h']) assert F.array_equal(g.edata['h'], g_r.edata['h']) # add new node feature to g_r g_r.ndata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.ndata) is False assert ('hh' in g_r.ndata) is True # add new edge feature to g_r g_r.edata['hh'] = F.tensor([0, 1, 2]) assert ('hh' in g.edata) is False assert ('hh' in g_r.edata) is True # test heterogeneous graph g = dgl.heterograph({ ('user', 'follows', 'user'): ([0, 1, 2, 4, 3 ,1, 3], [1, 2, 3, 2, 0, 0, 1]), ('user', 'plays', 'game'): ([0, 0, 2, 3, 3, 4, 1], [1, 0, 1, 0, 1, 0, 0]), ('developer', 'develops', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1])}) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([1, 1, 1, 1, 1]) g.nodes['game'].data['h'] = F.tensor([0, 1]) g.edges['follows'].data['h'] = F.tensor([0, 1, 2, 4, 3 ,1, 3]) g.edges['follows'].data['hh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) g_r = dgl.reverse_heterograph(g) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert F.array_equal(g.nodes['user'].data['h'], g_r.nodes['user'].data['h']) assert F.array_equal(g.nodes['user'].data['hh'], g_r.nodes['user'].data['hh']) assert F.array_equal(g.nodes['game'].data['h'], g_r.nodes['game'].data['h']) assert len(g_r.edges['follows'].data) == 0 u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'follows', 'user')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('user', 'follows', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('user', 'plays', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'plays', 'user')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) u_g, v_g, eids_g = g.all_edges(form='all', etype=('developer', 'develops', 'game')) u_rg, v_rg, eids_rg = g_r.all_edges(form='all', etype=('game', 'develops', 'developer')) assert F.array_equal(u_g, v_rg) assert F.array_equal(v_g, u_rg) assert F.array_equal(eids_g, eids_rg) # withour share ndata g_r = dgl.reverse_heterograph(g, copy_ndata=False) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == g_r.number_of_nodes(ntype) assert len(g_r.nodes['user'].data) == 0 assert len(g_r.nodes['game'].data) == 0 g_r = dgl.reverse_heterograph(g, copy_ndata=True, copy_edata=True) print(g_r) for etype_g, etype_gr in zip(g.canonical_etypes, g_r.canonical_etypes): assert etype_g[0] == etype_gr[2] assert etype_g[1] == etype_gr[1] assert etype_g[2] == etype_gr[0] assert g.number_of_edges(etype_g) == g_r.number_of_edges(etype_gr) assert F.array_equal(g.edges['follows'].data['h'], g_r.edges['follows'].data['h']) assert F.array_equal(g.edges['follows'].data['hh'], g_r.edges['follows'].data['hh']) # add new node feature to g_r g_r.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert ('hhh' in g.nodes['user'].data) is False assert ('hhh' in g_r.nodes['user'].data) is True # add new edge feature to g_r g_r.edges['follows'].data['hhh'] = F.tensor([1, 2, 3, 2, 0, 0, 1]) assert ('hhh' in g.edges['follows'].data) is False assert ('hhh' in g_r.edges['follows'].data) is True
def test_flatten(): def check_mapping(g, fg): if len(fg.ntypes) == 1: SRC = DST = fg.ntypes[0] else: SRC = fg.ntypes[0] DST = fg.ntypes[1] etypes = F.asnumpy(fg.edata[dgl.ETYPE]).tolist() eids = F.asnumpy(fg.edata[dgl.EID]).tolist() for i, (etype, eid) in enumerate(zip(etypes, eids)): src_g, dst_g = g.find_edges([eid], g.canonical_etypes[etype]) src_fg, dst_fg = fg.find_edges([i]) # TODO(gq): I feel this code is quite redundant; can we just add new members (like # "induced_srcid") to returned heterograph object and not store them as features? assert src_g == fg.nodes[SRC].data[dgl.NID][src_fg] tid = F.asnumpy(fg.nodes[SRC].data[dgl.NTYPE][src_fg])[0] assert g.canonical_etypes[etype][0] == g.ntypes[tid] assert dst_g == fg.nodes[DST].data[dgl.NID][dst_fg] tid = F.asnumpy(fg.nodes[DST].data[dgl.NTYPE][dst_fg])[0] assert g.canonical_etypes[etype][2] == g.ntypes[tid] # check for wildcard slices g = create_test_heterograph() g.nodes['user'].data['h'] = F.ones((3, 5)) g.nodes['game'].data['i'] = F.ones((2, 5)) g.edges['plays'].data['e'] = F.ones((4, 4)) g.edges['wishes'].data['e'] = F.ones((2, 4)) g.edges['wishes'].data['f'] = F.ones((2, 4)) fg = g['user', :, 'game'] # user--plays->game and user--wishes->game assert len(fg.ntypes) == 2 assert fg.ntypes == ['user', 'game'] assert fg.etypes == ['plays+wishes'] assert F.array_equal(fg.nodes['user'].data['h'], F.ones((3, 5))) assert F.array_equal(fg.nodes['game'].data['i'], F.ones((2, 5))) assert F.array_equal(fg.edata['e'], F.ones((6, 4))) assert 'f' not in fg.edata etypes = F.asnumpy(fg.edata[dgl.ETYPE]).tolist() eids = F.asnumpy(fg.edata[dgl.EID]).tolist() assert set(zip(etypes, eids)) == set([(1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1)]) check_mapping(g, fg) fg = g['user', :, 'user'] # NOTE(gq): The node/edge types from the parent graph is returned if there is only one # node/edge type. This differs from the behavior above. assert fg.ntypes == ['user'] assert fg.etypes == ['follows'] u1, v1 = g.edges(etype='follows', order='eid') u2, v2 = fg.edges(etype='follows', order='eid') assert F.array_equal(u1, u2) assert F.array_equal(v1, v2) fg = g['developer', :, 'game'] assert fg.ntypes == ['developer', 'game'] assert fg.etypes == ['develops'] u1, v1 = g.edges(etype='develops', order='eid') u2, v2 = fg.edges(etype='develops', order='eid') assert F.array_equal(u1, u2) assert F.array_equal(v1, v2) fg = g[:, :, :] assert fg.ntypes == ['developer+user', 'game+user'] assert fg.etypes == ['develops+follows+plays+wishes'] check_mapping(g, fg) # Test another heterograph g_x = dgl.graph(([0, 1, 2], [1, 2, 3]), 'user', 'follows') g_y = dgl.graph(([0, 2], [2, 3]), 'user', 'knows') g_x.nodes['user'].data['h'] = F.randn((4, 3)) g_x.edges['follows'].data['w'] = F.randn((3, 2)) g_y.nodes['user'].data['hh'] = F.randn((4, 5)) g_y.edges['knows'].data['ww'] = F.randn((2, 10)) g = dgl.hetero_from_relations([g_x, g_y]) assert F.array_equal(g.ndata['h'], g_x.ndata['h']) assert F.array_equal(g.ndata['hh'], g_y.ndata['hh']) assert F.array_equal(g.edges['follows'].data['w'], g_x.edata['w']) assert F.array_equal(g.edges['knows'].data['ww'], g_y.edata['ww']) fg = g['user', :, 'user'] assert fg.ntypes == ['user'] assert fg.etypes == ['follows+knows'] check_mapping(g, fg) fg = g['user', :, :] assert fg.ntypes == ['user'] assert fg.etypes == ['follows+knows'] check_mapping(g, fg)