def check_hetero_partition(hg, part_method): hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1')) hg.nodes['n1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_nodes('n1'), 10), F.float32) hg.edges['r1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_edges('r1'), 10), F.float32) num_parts = 4 num_hops = 1 orig_nids, orig_eids = partition_graph(hg, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=True, return_mapping=True) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype) for etype in hg.etypes: assert len(orig_eids[etype]) == hg.number_of_edges(etype) parts = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # These are original per-type IDs. for etype_id, etype in enumerate(hg.etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask(src_ntype_ids, etype_ids == etype_id) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids, etype_ids == etype_id) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats(hg, part_g, node_feats) verify_hetero_graph(hg, parts)
def test_spmm(idtype, g, shp, msg, reducer): g = g.astype(idtype).to(F.ctx()) if dgl.backend.backend_name == 'tensorflow' and (reducer in ['min', 'max']): pytest.skip() # tensorflow dlpack has problem writing into int32 arrays on GPU. print(g) print(g.idtype) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(),) + shp[0])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(),) + shp[1])) + 1) print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.edata['w'] = F.attach_grad(F.clone(he)) print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) non_degree_indices = F.tensor( np.nonzero(F.asnumpy(g.in_degrees()) != 0)[0]) v = F.gather_row(v, non_degree_indices) if g.number_of_edges() > 0: F.backward(F.reduce_sum(v)) if msg != 'copy_rhs': grad_u = F.grad(u) if msg != 'copy_lhs': grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.number_of_edges() > 0: v1 = F.gather_row(g.dstdata['v'], non_degree_indices) assert F.allclose(v, v1) print('forward passed') F.backward(F.reduce_sum(v1)) if msg != 'copy_rhs': if reducer in ['min', 'max']: # there might be some numerical errors rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.srcdata['x']), grad_u) if msg != 'copy_lhs': if reducer in ['min', 'max']: rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.edata['w']), grad_e) print('backward passed') g.srcdata.pop('x') g.edata.pop('w') if 'v' in g.dstdata: g.dstdata.pop('v')
def test_spmm(idtype, g, shp, msg, reducer): g = g.astype(idtype).to(F.ctx()) print(g) print(g.idtype) hu = F.tensor(np.random.rand(*((g.number_of_src_nodes(), ) + shp[0])) + 1) he = F.tensor(np.random.rand(*((g.number_of_edges(), ) + shp[1])) + 1) print('u shape: {}, e shape: {}'.format(F.shape(hu), F.shape(he))) g.srcdata['x'] = F.attach_grad(F.clone(hu)) g.edata['w'] = F.attach_grad(F.clone(he)) print('SpMM(message func: {}, reduce func: {})'.format(msg, reducer)) u = F.attach_grad(F.clone(hu)) e = F.attach_grad(F.clone(he)) with F.record_grad(): v = gspmm(g, msg, reducer, u, e) if reducer in ['max', 'min']: v = F.replace_inf_with_zero(v) if g.number_of_edges() > 0: F.backward(F.reduce_sum(v)) if msg != 'copy_rhs': grad_u = F.grad(u) if msg != 'copy_lhs': grad_e = F.grad(e) with F.record_grad(): g.update_all(udf_msg[msg], udf_reduce[reducer]) if g.number_of_edges() > 0: v1 = g.dstdata['v'] assert F.allclose(v, v1) print('forward passed') F.backward(F.reduce_sum(v1)) if msg != 'copy_rhs': if reducer in ['min', 'max']: # there might be some numerical errors rate = F.reduce_sum(F.abs(F.grad(g.srcdata['x']) - grad_u)) /\ F.reduce_sum(F.abs(grad_u)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.srcdata['x']), grad_u) if msg != 'copy_lhs': if reducer in ['min', 'max']: rate = F.reduce_sum(F.abs(F.grad(g.edata['w']) - grad_e)) /\ F.reduce_sum(F.abs(grad_e)) assert F.as_scalar(rate) < 1e-2, rate else: assert F.allclose(F.grad(g.edata['w']), grad_e) print('backward passed') g.srcdata.pop('x') g.edata.pop('w') if 'v' in g.dstdata: g.dstdata.pop('v')
def verify_hetero_graph(g, parts): num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.etypes} for part in parts: assert len(g.ntypes) == len(F.unique(part.ndata[dgl.NTYPE])) assert len(g.etypes) == len(F.unique(part.edata[dgl.ETYPE])) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask(part, ntype_id) num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) num_nodes[ntype] += num_inner_nodes for etype in g.etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask(part, etype_id) num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. for ntype in g.ntypes: print('node {}: {}, {}'.format(ntype, g.number_of_nodes(ntype), num_nodes[ntype])) assert g.number_of_nodes(ntype) == num_nodes[ntype] # Verify the number of edges are correct. for etype in g.etypes: print('edge {}: {}, {}'.format(etype, g.number_of_edges(etype), num_edges[etype])) assert g.number_of_edges(etype) == num_edges[etype] nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.etypes} for part in parts: src, dst, eid = part.edges(form='all') orig_src = F.gather_row(part.ndata['orig_id'], src) orig_dst = F.gather_row(part.ndata['orig_id'], dst) orig_eid = F.gather_row(part.edata['orig_id'], eid) etype_arr = F.gather_row(part.edata[dgl.ETYPE], eid) eid_type = F.gather_row(part.edata[dgl.EID], eid) for etype in g.etypes: etype_id = g.get_etype_id(etype) src1 = F.boolean_mask(orig_src, etype_arr == etype_id) dst1 = F.boolean_mask(orig_dst, etype_arr == etype_id) eid1 = F.boolean_mask(orig_eid, etype_arr == etype_id) exist = g.has_edges_between(src1, dst1, etype=etype) assert np.all(F.asnumpy(exist)) eid2 = g.edge_ids(src1, dst1, etype=etype) assert np.all(F.asnumpy(eid1 == eid2)) eids[etype].append(F.boolean_mask(eid_type, etype_arr == etype_id)) # Make sure edge Ids fall into a range. inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = np.sort( F.asnumpy(F.boolean_mask(part.edata[dgl.EID], inner_edge_mask))) assert np.all( inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1)) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) # Make sure inner nodes have Ids fall into a range. inner_node_mask = _get_inner_node_mask(part, ntype_id) inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) assert np.all( F.asnumpy( inner_nids == F.arange(F.as_scalar(inner_nids[0]), F.as_scalar(inner_nids[-1]) + 1))) nids[ntype].append(inner_nids) for ntype in nids: nids_type = F.cat(nids[ntype], 0) uniq_ids = F.unique(nids_type) # We should get all nodes. assert len(uniq_ids) == g.number_of_nodes(ntype) for etype in eids: eids_type = F.cat(eids[etype], 0) uniq_ids = F.unique(eids_type) assert len(uniq_ids) == g.number_of_edges(etype)
def check_hetero_partition(hg, part_method, num_parts=4, num_trainers_per_machine=1, load_feats=True): hg.nodes['n1'].data['labels'] = F.arange(0, hg.number_of_nodes('n1')) hg.nodes['n1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_nodes('n1'), 10), F.float32) hg.edges['r1'].data['feats'] = F.tensor( np.random.randn(hg.number_of_edges('r1'), 10), F.float32) hg.edges['r1'].data['labels'] = F.arange(0, hg.number_of_edges('r1')) num_hops = 1 orig_nids, orig_eids = partition_graph( hg, 'test', num_parts, '/tmp/partition', num_hops=num_hops, part_method=part_method, reshuffle=True, return_mapping=True, num_trainers_per_machine=num_trainers_per_machine) assert len(orig_nids) == len(hg.ntypes) assert len(orig_eids) == len(hg.etypes) for ntype in hg.ntypes: assert len(orig_nids[ntype]) == hg.number_of_nodes(ntype) for etype in hg.etypes: assert len(orig_eids[etype]) == hg.number_of_edges(etype) parts = [] shuffled_labels = [] shuffled_elabels = [] for i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( '/tmp/partition/test.json', i, load_feats=load_feats) if not load_feats: assert not node_feats assert not edge_feats node_feats, edge_feats = load_partition_feats( '/tmp/partition/test.json', i) if num_trainers_per_machine > 1: for ntype in hg.ntypes: name = ntype + '/trainer_id' assert name in node_feats part_ids = F.floor_div(node_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) for etype in hg.etypes: name = etype + '/trainer_id' assert name in edge_feats part_ids = F.floor_div(edge_feats[name], num_trainers_per_machine) assert np.all(F.asnumpy(part_ids) == i) # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. part_src_ids, part_dst_ids = part_g.edges() # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.ndata[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.ndata[dgl.NID], part_dst_ids) part_eids = part_g.edata[dgl.EID] # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) etype_ids, part_eids = gpb.map_to_per_etype(part_eids) # These are original per-type IDs. for etype_id, etype in enumerate(hg.etypes): part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) src_ntype_ids1 = F.boolean_mask(src_ntype_ids, etype_ids == etype_id) part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) dst_ntype_ids1 = F.boolean_mask(dst_ntype_ids, etype_ids == etype_id) part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = hg.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) verify_graph_feats(hg, gpb, part_g, node_feats, edge_feats) shuffled_labels.append(node_feats['n1/labels']) shuffled_elabels.append(edge_feats['r1/labels']) verify_hetero_graph(hg, parts) shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) orig_elabels = np.zeros(shuffled_elabels.shape, dtype=shuffled_elabels.dtype) orig_labels[F.asnumpy(orig_nids['n1'])] = shuffled_labels orig_elabels[F.asnumpy(orig_eids['r1'])] = shuffled_elabels assert np.all(orig_labels == F.asnumpy(hg.nodes['n1'].data['labels'])) assert np.all(orig_elabels == F.asnumpy(hg.edges['r1'].data['labels']))