def construct_graph(p_p_g, a_a_g, p_a_g): p_p_edges = p_p_g.edge_index p_p_edges = utils.sort_edge_index(p_p_edges)[0] p_p_edges = utils.to_undirected(p_p_edges) p_p_edges = utils.remove_self_loops(p_p_edges)[0] a_a_edges = a_a_g.edge_index a_a_edges = utils.sort_edge_index(a_a_edges)[0] a_a_edges = utils.to_undirected(a_a_edges) a_a_edges = utils.remove_self_loops(a_a_edges)[0] p_a_edges = p_a_g.edge_index p_a_edges = utils.sort_edge_index(p_a_edges)[0] p_a_edges = utils.remove_self_loops(p_a_edges)[0] paper_paper_graph = dgl.graph((p_p_edges[0], p_p_edges[1]), 'paper', 'pp') author_author_graph = dgl.graph((a_a_edges[0], a_a_edges[1]), 'author', 'aa') paper_author_graph = dgl.bipartite( (p_a_edges[0], p_a_edges[1]), 'paper', 'pa', 'author', num_nodes=(paper_paper_graph.number_of_nodes(), author_author_graph.number_of_nodes())) author_paper_graph = dgl.bipartite( (p_a_edges[1], p_a_edges[0]), 'author', 'ap', 'paper', num_nodes=(author_author_graph.number_of_nodes(), paper_paper_graph.number_of_nodes())) hg = dgl.hetero_from_relations([ author_author_graph, author_paper_graph, paper_author_graph, paper_paper_graph ]) return hg
def test_out_subgraph(index_dtype): g1 = dgl.graph([(1,0),(2,0),(3,0),(0,1),(2,1),(3,1),(0,2)], 'user', 'follow', index_dtype=index_dtype) g2 = dgl.bipartite([(0,0),(0,1),(1,2),(3,2)], 'user', 'play', 'game', index_dtype=index_dtype) g3 = dgl.bipartite([(2,0),(2,1),(2,2),(1,0),(1,3),(0,0)], 'game', 'liked-by', 'user', index_dtype=index_dtype) g4 = dgl.bipartite([(0,0),(1,0),(2,0),(3,0)], 'user', 'flips', 'coin', index_dtype=index_dtype) hg = dgl.hetero_from_relations([g1, g2, g3, g4]) subg = dgl.out_subgraph(hg, {'user' : [0,1], 'game' : 0}) assert subg._idtype_str == index_dtype assert len(subg.ntypes) == 3 assert len(subg.etypes) == 4 u, v = subg['follow'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(1,0),(0,1),(0,2)} assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID]) u, v = subg['play'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(0,1),(1,2)} assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID]) u, v = subg['liked-by'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0)} assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID]) u, v = subg['flips'].edges() edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v)))) assert edge_set == {(0,0),(1,0)} assert F.array_equal(hg['flips'].edge_ids(u, v), subg['flips'].edata[dgl.EID])
def load_dblp(num_walks, metapaths): with open('../dataset/DBLP/output/DBLP_Metapath2vec.pickle', 'rb') as f: a_list, p_list, c_list, node_list = pickle.load(f) pa_list, pc_list = pickle.load(f) print(len(pa_list)) print(len(pc_list)) author_ids = [node_list.index(i) for i in a_list] # 构造异构网络 pa = dgl.bipartite(pa_list, 'paper', 'pa', 'author') ap = dgl.bipartite(transpose(pa_list), 'author', 'ap', 'paper') pc = dgl.bipartite(pc_list, 'paper', 'pc', 'conf') cp = dgl.bipartite(transpose(pc_list), 'conf', 'cp', 'paper') hg = dgl.hetero_from_relations([pa, ap, pc, cp]) # 随机游走 sentences = [] for metapath in metapaths: traces, types = dgl.sampling.random_walk(hg, author_ids * num_walks, metapath=metapath) for s in traces.tolist(): sentences.append([node_list[i] for i in s]) return hg, sentences, node_list
def test_pickling_batched_heterograph(): # copied from test_heterograph.create_test_heterograph() plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows') plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game') wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game') develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) g2 = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) g.nodes['user'].data['u_h'] = F.randn((3, 4)) g.nodes['game'].data['g_h'] = F.randn((2, 5)) g.edges['plays'].data['p_h'] = F.randn((4, 6)) g2.nodes['user'].data['u_h'] = F.randn((3, 4)) g2.nodes['game'].data['g_h'] = F.randn((2, 5)) g2.edges['plays'].data['p_h'] = F.randn((4, 6)) bg = dgl.batch_hetero([g, g2]) new_bg = _reconstruct_pickle(bg) test_utils.check_graph_equal(bg, new_bg)
def load_dblp(remove_self_loop): with open('../dataset/DBLP/output/DBLP_HAN.pickle', 'rb') as f: a_list, p_list, c_list, node_list = pickle.load(f) pa_list, pc_list = pickle.load(f) author_features = pickle.load(f) labels = pickle.load(f) # 构造异构网络 pa = dgl.bipartite(pa_list, 'paper', 'pa', 'author') ap = dgl.bipartite(transpose(pa_list), 'author', 'ap', 'paper') pc = dgl.bipartite(pc_list, 'paper', 'pc', 'conf') cp = dgl.bipartite(transpose(pc_list), 'conf', 'cp', 'paper') hg = dgl.hetero_from_relations([pa, ap, pc, cp]) features = torch.FloatTensor(author_features) labels = torch.LongTensor(labels) num_class = 4 alls = [i for i in range(len(a_list))] train_idx, x, _, _ = train_test_split(alls, labels, test_size=0.2, random_state=52) eval_idx, test_idx, _, _ = train_test_split(x, _, test_size=0.5, random_state=40) num_nodes = hg.number_of_nodes('author') train_mask = get_binary_mask(num_nodes, train_idx) eval_mask = get_binary_mask(num_nodes, eval_idx) test_mask = get_binary_mask(num_nodes, test_idx) return hg, features, labels, num_class, train_mask, test_mask, eval_mask, node_list
def obtain_Bs(self, head_b, tail_b): n_edges = head_b.shape[0] heads, tails = head_b, tail_b neg_tails = self.weights.multinomial(self.num_negs * n_edges, replacement=True) neg_heads = torch.LongTensor(heads).view(-1, 1).expand( n_edges, self.num_negs).flatten() spmat_p = coo_matrix((np.ones(heads.shape[0]), (heads, tails)), shape=(self.g.number_of_nodes('user'), self.g.number_of_nodes('item'))) spmat_pr = coo_matrix((np.ones(heads.shape[0]), (tails, heads)), shape=(self.g.number_of_nodes('item'), self.g.number_of_nodes('user'))) spmat_neg = coo_matrix( (np.ones(heads.shape[0]), (neg_heads, neg_tails)), shape=(self.g.number_of_nodes('user'), self.g.number_of_nodes('item'))) pos_graph = dgl.bipartite(spmat_p, 'user', 'edit', 'item') pos_graph_r = dgl.bipartite(spmat_pr, 'item', 'edit', 'user') neg_graph = dgl.bipartite(spmat_neg, 'user', 'edit', 'item') # pos_graph, neg_graph = dgl.compact_graphs([pos_graph, pos_graph_r, neg_graph]) 用了这句会删节点! # 可以读取NID pos_graph = pos_graph.edge_subgraph({ ('user', 'edit', 'item'): list(range(pos_graph.number_of_edges())) }) pos_graph_r = pos_graph_r.edge_subgraph({ ('item', 'edit', 'user'): list(range(pos_graph_r.number_of_edges())) }) neg_graph = neg_graph.edge_subgraph({ ('user', 'edit', 'item'): list(range(neg_graph.number_of_edges())) }) return pos_graph, pos_graph_r, neg_graph
def create_test_graph(idtype): plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', idtype=idtype) plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game', idtype=idtype) wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game', idtype=idtype) develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game', idtype=idtype) g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) return g
def test_pickling_heterograph(): # copied from test_heterograph.create_test_heterograph() plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows') plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game') wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game') develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) g.nodes['user'].data['u_h'] = F.randn((3, 4)) g.nodes['game'].data['g_h'] = F.randn((2, 5)) g.edges['plays'].data['p_h'] = F.randn((4, 6)) new_g = _reconstruct_pickle(g) _assert_is_identical_hetero(g, new_g) block = dgl.to_block(g, {'user': [1, 2], 'game': [0, 1], 'developer': []}) new_block = _reconstruct_pickle(block) _assert_is_identical_hetero(block, new_block) assert block.is_block assert new_block.is_block
def test_isolated_nodes(index_dtype): g = dgl.graph([(0, 1), (1, 2)], num_nodes=5, index_dtype=index_dtype) assert g._idtype_str == index_dtype assert g.number_of_nodes() == 5 # Test backward compatibility g = dgl.graph([(0, 1), (1, 2)], card=5, index_dtype=index_dtype) assert g.number_of_nodes() == 5 g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game', num_nodes=(5, 7), index_dtype=index_dtype) assert g._idtype_str == index_dtype assert g.number_of_nodes('user') == 5 assert g.number_of_nodes('game') == 7 # Test backward compatibility g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game', card=(5, 7), index_dtype=index_dtype) assert g._idtype_str == index_dtype assert g.number_of_nodes('user') == 5 assert g.number_of_nodes('game') == 7
def test_sage_conv(aggre_type): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) dst_dim = 5 if aggre_type != 'gcn' else 10 sage = nn.SAGEConv((10, dst_dim), 2, aggre_type) feat = (F.randn((100, 10)), F.randn((200, dst_dim))) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 200 # Test the case for graphs without edges g = dgl.bipartite([], num_nodes=(5, 3)) sage = nn.SAGEConv((3, 3), 2, 'gcn') feat = (F.randn((5, 3)), F.randn((3, 3))) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3 for aggre_type in ['mean', 'pool', 'lstm']: sage = nn.SAGEConv((3, 1), 2, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3
def construct_graph(): api_ids = [] api_names = [] app_ids = [] app_names = [] f_3 = open(os.path.join(path, "id_api_320.txt"), encoding='utf-8') f_4 = open(os.path.join(path, "id_app_320.txt"), encoding='utf-8') while True: z = f_3.readline() if not z: break z = z.strip().split() identity = int(z[0]) api_ids.append(identity) api_names.append(z[1]) while True: w = f_4.readline() if not w: break; w = w.strip().split() identity = int(w[0]) app_ids.append(identity) app_names.append(w[1]) f_3.close() f_4.close() api_ids_invmap = {x: i for i, x in enumerate(api_ids)} app_ids_invmap = {x: i for i, x in enumerate(app_ids)} api_api_src = [] api_api_dst = [] api_app_src = [] api_app_dst = [] f_1 = open(os.path.join(path, "same_block_api_320.txt"), "r") # B matrix f_2 = open(os.path.join(path, "api_app_320.txt"), "r") # A matrix for x in f_1: x = x.split() x[0] = int(x[0]) x[1] = int(x[1].strip('\n')) api_api_src.append(api_ids_invmap[x[0]]) api_api_dst.append(api_ids_invmap[x[1]]) for y in f_2: y = y.split() y[0] = int(y[0]) y[1] = int(y[1].strip('\n')) api_app_src.append(api_ids_invmap[y[0]]) api_app_dst.append(app_ids_invmap[y[1]]) f_1.close() f_2.close() app_api = dgl.bipartite((api_app_dst, api_app_src), 'app', 'app_api', 'api1') api_app = dgl.bipartite((api_app_src, api_app_dst), 'api2', 'api_app', 'app') api_api = dgl.bipartite((api_api_src, api_api_dst), 'api1', 'api_api', 'api2') hg = dgl.hetero_from_relations([app_api, api_api, api_app]) return hg, api_names, app_names
def load_acm_raw(remove_self_loop): assert not remove_self_loop url = 'dataset/ACM.mat' data_path = get_download_dir() + '/ACM.mat' download(_get_dgl_url(url), path=data_path) data = sio.loadmat(data_path) p_vs_l = data['PvsL'] # paper-field? p_vs_a = data['PvsA'] # paper-author p_vs_t = data['PvsT'] # paper-term, bag of words p_vs_c = data['PvsC'] # paper-conference, labels come from that # We assign # (1) KDD papers as class 0 (data mining), # (2) SIGMOD and VLDB papers as class 1 (database), # (3) SIGCOMM and MOBICOMM papers as class 2 (communication) conf_ids = [0, 1, 9, 10, 13] label_ids = [0, 1, 2, 2, 1] p_vs_c_filter = p_vs_c[:, conf_ids] p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0] p_vs_l = p_vs_l[p_selected] p_vs_a = p_vs_a[p_selected] p_vs_t = p_vs_t[p_selected] p_vs_c = p_vs_c[p_selected] pa = dgl.bipartite(p_vs_a, 'paper', 'pa', 'author') ap = dgl.bipartite(p_vs_a.transpose(), 'author', 'ap', 'paper') pl = dgl.bipartite(p_vs_l, 'paper', 'pf', 'field') lp = dgl.bipartite(p_vs_l.transpose(), 'field', 'fp', 'paper') hg = dgl.hetero_from_relations([pa, ap, pl, lp]) features = torch.FloatTensor(p_vs_t.toarray()) pc_p, pc_c = p_vs_c.nonzero() labels = np.zeros(len(p_selected), dtype=np.int64) for conf_id, label_id in zip(conf_ids, label_ids): labels[pc_p[pc_c == conf_id]] = label_id labels = torch.LongTensor(labels) num_classes = 3 float_mask = np.zeros(len(pc_p)) for conf_id in conf_ids: pc_c_mask = (pc_c == conf_id) float_mask[pc_c_mask] = np.random.permutation( np.linspace(0, 1, pc_c_mask.sum())) train_idx = np.where(float_mask <= 0.2)[0] val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0] test_idx = np.where(float_mask > 0.3)[0] num_nodes = hg.number_of_nodes('paper') train_mask = get_binary_mask(num_nodes, train_idx) val_mask = get_binary_mask(num_nodes, val_idx) test_mask = get_binary_mask(num_nodes, test_idx) return hg, features, labels, num_classes, train_idx, val_idx, test_idx, \ train_mask, val_mask, test_mask
def load_acm_raw(): from dgl.data.utils import download, get_download_dir, _get_dgl_url from scipy import io as sio url = 'dataset/ACM.mat' data_path = get_download_dir() + '/ACM.mat' download(_get_dgl_url(url), path=data_path) data = sio.loadmat(data_path) p_vs_l = data['PvsL'] # paper-field? p_vs_a = data['PvsA'] # paper-author p_vs_t = data['PvsT'] # paper-term, bag of words p_vs_c = data['PvsC'] # paper-conference, labels come from that # We assign # (1) KDD papers as class 0 (data mining), # (2) SIGMOD and VLDB papers as class 1 (database), # (3) SIGCOMM and MOBICOMM papers as class 2 (communication) conf_ids = [0, 1, 9, 10, 13] label_ids = [0, 1, 2, 2, 1] p_vs_c_filter = p_vs_c[:, conf_ids] p_selected = (p_vs_c_filter.sum(1) != 0).A1.nonzero()[0] p_vs_l = p_vs_l[p_selected] p_vs_a = p_vs_a[p_selected] p_vs_t = p_vs_t[p_selected] p_vs_c = p_vs_c[p_selected] pa = dgl.bipartite(p_vs_a, 'paper', 'pa', 'author') pl = dgl.bipartite(p_vs_l, 'paper', 'pf', 'field') gs = [pa, pl] hg = dgl.hetero_from_relations(gs) features = torch.FloatTensor(p_vs_t.toarray()) pc_p, pc_c = p_vs_c.nonzero() labels = np.zeros(len(p_selected), dtype=np.int64) for conf_id, label_id in zip(conf_ids, label_ids): labels[pc_p[pc_c == conf_id]] = label_id labels = torch.LongTensor(labels) num_classes = 3 float_mask = np.zeros(len(pc_p)) for conf_id in conf_ids: pc_c_mask = (pc_c == conf_id) float_mask[pc_c_mask] = np.random.permutation( np.linspace(0, 1, pc_c_mask.sum())) train_idx = np.where(float_mask <= 0.2)[0] val_idx = np.where((float_mask > 0.2) & (float_mask <= 0.3))[0] test_idx = np.where(float_mask > 0.3)[0] hg.nodes["paper"].data["feat"] = features return hg, labels, num_classes, train_idx, val_idx, test_idx
def _generate_enc_graph(self, rating_pairs, rating_values, add_support=False): user_movie_R = np.zeros((self._num_user, self._num_movie), dtype=np.float32) user_movie_R[rating_pairs] = rating_values movie_user_R = user_movie_R.transpose() rating_graphs = [] rating_row, rating_col = rating_pairs for rating in self.possible_rating_values: ridx = np.where(rating_values == rating) rrow = rating_row[ridx] rcol = rating_col[ridx] bg = dgl.bipartite((rrow, rcol), 'user', str(rating), 'movie', card=(self._num_user, self._num_movie)) rev_bg = dgl.bipartite((rcol, rrow), 'movie', 'rev-%s' % str(rating), 'user', card=(self._num_movie, self._num_user)) rating_graphs.append(bg) rating_graphs.append(rev_bg) graph = dgl.hetero_from_relations(rating_graphs) # sanity check assert len(rating_pairs[0]) == sum([graph.number_of_edges(et) for et in graph.etypes]) // 2 if add_support: def _calc_norm(x): x = x.numpy().astype('float32') x[x == 0.] = np.inf x = th.FloatTensor(1. / np.sqrt(x)) return x.to(self._device).unsqueeze(1) user_ci = [] user_cj = [] movie_ci = [] movie_cj = [] for r in self.possible_rating_values: r = str(r) user_ci.append(graph['rev-%s' % r].in_degrees()) movie_ci.append(graph[r].in_degrees()) if self._symm: user_cj.append(graph[r].out_degrees()) movie_cj.append(graph['rev-%s' % r].out_degrees()) else: user_cj.append(th.zeros((self.num_user,))) movie_cj.append(th.zeros((self.num_movie,))) user_ci = _calc_norm(sum(user_ci)) movie_ci = _calc_norm(sum(movie_ci)) if self._symm: user_cj = _calc_norm(sum(user_cj)) movie_cj = _calc_norm(sum(movie_cj)) else: user_cj = th.ones(self.num_user,).to(self._device) movie_cj = th.ones(self.num_movie,).to(self._device) graph.nodes['user'].data.update({'ci' : user_ci, 'cj' : user_cj}) graph.nodes['movie'].data.update({'ci' : movie_ci, 'cj' : movie_cj}) return graph
def test_batching_with_zero_nodes_edges(index_dtype): """Test the features of batched DGLHeteroGraphs""" g1 = dgl.heterograph({ ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [] }, index_dtype=index_dtype) g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2 = dgl.heterograph({ ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): [(0, 0), (1, 0)] }, index_dtype=index_dtype) g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]]) g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]]) g2.nodes['game'].data['h1'] = F.tensor([[0.]]) g2.nodes['game'].data['h2'] = F.tensor([[1.]]) g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]]) g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]]) g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]]) bg = dgl.batch_hetero([g1, g2]) assert F.allclose(bg.nodes['user'].data['h1'], F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0)) assert F.allclose(bg.nodes['user'].data['h2'], F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0)) assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1']) assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2']) assert F.allclose(bg.edges['follows'].data['h1'], F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0)) assert F.allclose(bg.edges['plays'].data['h1'], g2.edges['plays'].data['h1']) # Test unbatching graphs g3, g4 = dgl.unbatch_hetero(bg) check_equivalence_between_heterographs( g1, g3, node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']}, edge_attrs={('user', 'follows', 'user'): ['h1']}) check_equivalence_between_heterographs( g2, g4, node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']}, edge_attrs={('user', 'follows', 'user'): ['h1']}) # Test graphs without edges g1 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(0, 4)) g2 = dgl.bipartite([], 'u', 'r', 'v', num_nodes=(1, 5)) g2.nodes['u'].data['x'] = F.tensor([1]) dgl.batch_hetero([g1, g2])
def test_sage_conv(aggre_type): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) dst_dim = 5 if aggre_type != 'gcn' else 10 sage = nn.SAGEConv((10, dst_dim), 2, aggre_type) feat = (F.randn((100, 10)), F.randn((200, dst_dim))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 200 g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = th.unique(g.edges()[1]) block = dgl.to_block(g, seed_nodes) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((block.number_of_src_nodes(), 5)) sage = sage.to(ctx) h = sage(block, feat) assert h.shape[0] == block.number_of_dst_nodes() assert h.shape[-1] == 10 # Test the case for graphs without edges g = dgl.bipartite([], num_nodes=(5, 3)) sage = nn.SAGEConv((3, 3), 2, 'gcn') feat = (F.randn((5, 3)), F.randn((3, 3))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3 for aggre_type in ['mean', 'pool', 'lstm']: sage = nn.SAGEConv((3, 1), 2, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3
def generate_dec_graph(self, rating_users, rating_items): u_id = [] i_id = [] r = [] for i in self.rating_values: u_id += rating_users[i] i_id += rating_items[i] r += [i for j in range(len(rating_users[i]))] # print(u_id[0], i_id[0]) r = torch.Tensor(r) # r = torch.IntTensor(r) ones = np.ones_like(u_id) user_item_ratings_coo = sp.coo_matrix((ones, (u_id, i_id)), shape=(self.n_user, self.n_item), dtype=np.float32) G = dgl.bipartite(user_item_ratings_coo, 'user', 'rate', 'item') G.edata['label'] = r # print(G.find_edges(0)) return G
def _generate_dec_graph(self, rating_pairs): ones = np.ones_like(rating_pairs[0]) user_movie_ratings_coo = sp.coo_matrix( (ones, rating_pairs), shape=(self.num_user, self.num_movie), dtype=np.float32) return dgl.bipartite(user_movie_ratings_coo, 'user', 'rate', 'movie')
def test_create(): g0 = create_test_heterograph() g1 = create_test_heterograph1() g2 = create_test_heterograph2() assert set(g0.ntypes) == set(g1.ntypes) == set(g2.ntypes) assert set(g0.canonical_etypes) == set(g1.canonical_etypes) == set( g2.canonical_etypes) # create from nx complete bipartite graph nxg = nx.complete_bipartite_graph(3, 4) g = dgl.bipartite(nxg, 'user', 'plays', 'game') assert g.ntypes == ['user', 'game'] assert g.etypes == ['plays'] assert g.number_of_edges() == 12 # create from scipy spmat = ssp.coo_matrix(([1, 1, 1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4)) g = dgl.graph(spmat) assert g.number_of_nodes() == 4 assert g.number_of_edges() == 3 # test inferring number of nodes for heterograph g = dgl.heterograph({ ('l0', 'e0', 'l1'): [(0, 1), (0, 2)], ('l0', 'e1', 'l2'): [(2, 2)], ('l2', 'e2', 'l2'): [(1, 1), (3, 3)], }) assert g.number_of_nodes('l0') == 3 assert g.number_of_nodes('l1') == 3 assert g.number_of_nodes('l2') == 4
def test_gmm_conv(): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) gmmconv = nn.GMMConv(5, 10, 3, 4, 'mean') feat = F.randn((100, 5)) pseudo = F.randn((g.number_of_edges(), 3)) gmmconv = gmmconv.to(ctx) h = gmmconv(g, feat, pseudo) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1), readonly=True) gmmconv = nn.GMMConv(5, 10, 3, 4, 'mean') feat = F.randn((100, 5)) pseudo = F.randn((g.number_of_edges(), 3)) gmmconv = gmmconv.to(ctx) h = gmmconv(g, feat, pseudo) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 50, density=0.1), readonly=True) gmmconv = nn.GMMConv((5, 2), 10, 3, 4, 'mean') feat = F.randn((100, 5)) feat_dst = F.randn((50, 2)) pseudo = F.randn((g.number_of_edges(), 3)) gmmconv = gmmconv.to(ctx) h = gmmconv(g, (feat, feat_dst), pseudo) # currently we only do shape check assert h.shape[-1] == 10
def test_nn_conv(): ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, 'mean') feat = F.randn((100, 5)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, feat, efeat) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv(5, 10, edge_func, 'mean') feat = F.randn((100, 5)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, feat, efeat) # currently we only do shape check assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(50, 100, density=0.1)) edge_func = th.nn.Linear(4, 5 * 10) nnconv = nn.NNConv((5, 2), 10, edge_func, 'mean') feat = F.randn((50, 5)) feat_dst = F.randn((100, 2)) efeat = F.randn((g.number_of_edges(), 4)) nnconv = nnconv.to(ctx) h = nnconv(g, (feat, feat_dst), efeat) # currently we only do shape check assert h.shape[-1] == 10
def test_gin_conv(): g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) ctx = F.ctx() gin_conv = nn.GINConv(lambda x: x, 'mean', 0.1) gin_conv.initialize(ctx=ctx) print(gin_conv) # test #1: basic feat = F.randn((g.number_of_nodes(), 5)) h = gin_conv(g, feat) assert h.shape == (20, 5) # test #2: bipartite g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) feat = (F.randn((100, 5)), F.randn((200, 5))) h = gin_conv(g, feat) return h.shape == (20, 5) g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = np.unique(g.edges()[1].asnumpy()) block = dgl.to_block(g, seed_nodes) feat = F.randn((block.number_of_src_nodes(), 5)) h = gin_conv(block, feat) assert h.shape == (block.number_of_dst_nodes(), 12)
def test_metapath_random_walk(): g1 = dgl.bipartite(([0, 1, 2, 3], [0, 1, 2, 3]), 'a', 'ab', 'b') g2 = dgl.bipartite(([0, 0, 1, 1, 2, 2, 3, 3], [1, 3, 2, 0, 3, 1, 0, 2]), 'b', 'ba', 'a') G = dgl.hetero_from_relations([g1, g2]) seeds = [0, 1] traces = dgl.contrib.sampling.metapath_random_walk(G, ['ab', 'ba'] * 4, seeds, 3) for seed, traces_per_seed in zip(seeds, traces): assert len(traces_per_seed) == 3 for trace in traces_per_seed: assert len(trace) == 8 trace = np.insert(F.asnumpy(trace), 0, seed) for i in range(4): assert g1.has_edge_between(trace[2 * i], trace[2 * i + 1]) assert g2.has_edge_between(trace[2 * i + 1], trace[2 * i + 2])
def test_nn_conv(): ctx = F.ctx() g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), 'max') nn_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx) h1 = nn_conv(g, h0, etypes) assert h1.shape == (g.number_of_nodes(), 2) g = dgl.graph(nx.erdos_renyi_graph(20, 0.3)) nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), 'max') nn_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx) h1 = nn_conv(g, h0, etypes) assert h1.shape == (g.number_of_nodes(), 2) g = dgl.bipartite(sp.sparse.random(20, 10, 0.3)) nn_conv = nn.NNConv((5, 4), 2, gluon.nn.Embedding(3, 5 * 2), 'max') nn_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) hd = F.randn((g.number_of_dst_nodes(), 4)) etypes = nd.random.randint(0, 4, g.number_of_edges()).as_in_context(ctx) h1 = nn_conv(g, (h0, hd), etypes) assert h1.shape == (g.number_of_dst_nodes(), 2)
def test_gat_conv(): ctx = F.ctx() g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) gat = nn.GATConv(10, 20, 5) # n_heads = 5 gat.initialize(ctx=ctx) print(gat) # test#1: basic feat = F.randn((20, 10)) h = gat(g, feat) assert h.shape == (20, 5, 20) # test#2: bipartite g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) gat = nn.GATConv((5, 10), 2, 4) gat.initialize(ctx=ctx) feat = (F.randn((100, 5)), F.randn((200, 10))) h = gat(g, feat) assert h.shape == (200, 4, 2) # test#3: block g = dgl.graph(sp.sparse.random(100, 100, density=0.001)) seed_nodes = np.unique(g.edges()[1].asnumpy()) block = dgl.to_block(g, seed_nodes) gat = nn.GATConv(5, 2, 4) gat.initialize(ctx=ctx) feat = F.randn((block.number_of_src_nodes(), 5)) h = gat(block, feat) assert h.shape == (block.number_of_dst_nodes(), 4, 2)
def test_gmm_conv(): ctx = F.ctx() g = dgl.DGLGraph(nx.erdos_renyi_graph(20, 0.3)) gmm_conv = nn.GMMConv(5, 2, 5, 3, 'max') gmm_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) pseudo = F.randn((g.number_of_edges(), 5)) h1 = gmm_conv(g, h0, pseudo) assert h1.shape == (g.number_of_nodes(), 2) g = dgl.graph(nx.erdos_renyi_graph(20, 0.3)) gmm_conv = nn.GMMConv(5, 2, 5, 3, 'max') gmm_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_nodes(), 5)) pseudo = F.randn((g.number_of_edges(), 5)) h1 = gmm_conv(g, h0, pseudo) assert h1.shape == (g.number_of_nodes(), 2) g = dgl.bipartite(sp.sparse.random(20, 10, 0.1)) gmm_conv = nn.GMMConv((5, 4), 2, 5, 3, 'max') gmm_conv.initialize(ctx=ctx) # test #1: basic h0 = F.randn((g.number_of_src_nodes(), 5)) hd = F.randn((g.number_of_dst_nodes(), 4)) pseudo = F.randn((g.number_of_edges(), 5)) h1 = gmm_conv(g, (h0, hd), pseudo) assert h1.shape == (g.number_of_dst_nodes(), 2)
def test_sage_conv(): for aggre_type in ['mean', 'pool', 'gcn', 'lstm']: ctx = F.ctx() g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.graph(sp.sparse.random(100, 100, density=0.1)) sage = nn.SAGEConv(5, 10, aggre_type) feat = F.randn((100, 5)) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 10 g = dgl.bipartite(sp.sparse.random(100, 200, density=0.1)) dst_dim = 5 if aggre_type != 'gcn' else 10 sage = nn.SAGEConv((10, dst_dim), 2, aggre_type) feat = (F.randn((100, 10)), F.randn((200, dst_dim))) sage = sage.to(ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 200
def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows', idtype=idtype, device=F.ctx()) plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game', idtype=idtype, device=F.ctx()) wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game', idtype=idtype, device=F.ctx()) develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game', idtype=idtype, device=F.ctx()) assert follows_g.idtype == idtype assert plays_g.idtype == idtype assert wishes_g.idtype == idtype assert develops_g.idtype == idtype g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) assert g.idtype == idtype assert g.device == F.ctx() return g
def test_create(): g0 = create_test_heterograph() g1 = create_test_heterograph1() g2 = create_test_heterograph2() assert set(g0.ntypes) == set(g1.ntypes) == set(g2.ntypes) assert set(g0.canonical_etypes) == set(g1.canonical_etypes) == set(g2.canonical_etypes) # create from nx complete bipartite graph nxg = nx.complete_bipartite_graph(3, 4) g = dgl.bipartite(nxg, 'user', 'plays', 'game') assert g.ntypes == ['user', 'game'] assert g.etypes == ['plays'] assert g.number_of_edges() == 12 # create from scipy spmat = ssp.coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4)) g = dgl.graph(spmat) assert g.number_of_nodes() == 4 assert g.number_of_edges() == 3 # test inferring number of nodes for heterograph g = dgl.heterograph({ ('l0', 'e0', 'l1'): [(0, 1), (0, 2)], ('l0', 'e1', 'l2'): [(2, 2)], ('l2', 'e2', 'l2'): [(1, 1), (3, 3)], }) assert g.number_of_nodes('l0') == 3 assert g.number_of_nodes('l1') == 3 assert g.number_of_nodes('l2') == 4 # test if validate flag works # h**o graph fail = False try: g = dgl.graph( ([0, 0, 0, 1, 1, 2], [0, 1, 2, 0, 1, 2]), card=2, validate=True ) except DGLError: fail = True finally: assert fail, "should catch a DGLError because node ID is out of bound." # bipartite graph def _test_validate_bipartite(card): fail = False try: g = dgl.bipartite( ([0, 0, 1, 1, 2], [1, 1, 2, 2, 3]), card=card, validate=True ) except DGLError: fail = True finally: assert fail, "should catch a DGLError because node ID is out of bound." _test_validate_bipartite((3, 3)) _test_validate_bipartite((2, 4))
def test_pickling_heterograph_index_compatibility(): plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows') plays_g = dgl.bipartite(plays_spmat, 'user', 'plays', 'game') wishes_g = dgl.bipartite(wishes_nx, 'user', 'wishes', 'game') develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') g = dgl.hetero_from_relations([follows_g, plays_g, wishes_g, develops_g]) with open("tests/compute/hetero_pickle_old.pkl", "rb") as f: gi = pickle.load(f) f.close() new_g = dgl.DGLHeteroGraph(gi, g.ntypes, g.etypes) _assert_is_identical_hetero(g, new_g)