def test_pinsage_sampling(): def _test_sampler(g, sampler, ntype): neighbor_g = sampler(F.tensor([0, 2], dtype=F.int64)) assert neighbor_g.ntypes == [ntype] u, v = neighbor_g.all_edges(form='uv', order='eid') uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist())) assert (1, 0) in uv or (0, 0) in uv assert (2, 2) in uv or (3, 2) in uv g = dgl.heterograph({ ('item', 'bought-by', 'user'): [(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2), (3, 3)], ('user', 'bought', 'item'): [(0, 0), (1, 0), (0, 1), (1, 1), (2, 2), (3, 2), (2, 3), (3, 3)] }) sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2) _test_sampler(g, sampler, 'item') sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['bought-by', 'bought']) _test_sampler(g, sampler, 'item') sampler = dgl.sampling.RandomWalkNeighborSampler( g, 4, 0.5, 3, 2, [('item', 'bought-by', 'user'), ('user', 'bought', 'item')]) _test_sampler(g, sampler, 'item') g = dgl.graph([(0, 0), (0, 1), (1, 0), (1, 1), (2, 2), (2, 3), (3, 2), (3, 3)]) sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2) _test_sampler(g, sampler, g.ntypes[0]) g = dgl.heterograph({ ('A', 'AB', 'B'): [(0, 1), (2, 3)], ('B', 'BC', 'C'): [(1, 2), (3, 1)], ('C', 'CA', 'A'): [(2, 0), (1, 2)] }) sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['AB', 'BC', 'CA']) _test_sampler(g, sampler, 'A')
def test_sage_conv_bi_empty(idtype, aggre_type, out_dim): # Test the case for graphs without edges g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3}).to(F.ctx()) g = g.astype(idtype).to(F.ctx()) sage = nn.SAGEConv((3, 3), out_dim, 'gcn') feat = (F.randn((5, 3)), F.randn((3, 3))) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3 for aggre_type in ['mean', 'pool', 'lstm']: sage = nn.SAGEConv((3, 1), out_dim, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) h = sage(g, feat) assert h.shape[-1] == out_dim assert h.shape[0] == 3
def _generate_dec_graph(self, rating_pairs): ones = np.ones_like(rating_pairs[0]) user_movie_ratings_coo = sp.coo_matrix( (ones, rating_pairs), shape=(self.num_user, self.num_movie), dtype=np.float32) g = dgl.bipartite_from_scipy(user_movie_ratings_coo, utype='_U', etype='_E', vtype='_V') return dgl.heterograph({('user', 'rate', 'movie'): g.edges()}, num_nodes_dict={ 'user': self.num_user, 'movie': self.num_movie })
def __init__(self, datadir, batch_size=128): self.train_items, self.train_edge_dict = self.get_edge_list( os.path.join(datadir, 'train.txt')) self.test_items, self.test_edge_dict = self.get_edge_list( os.path.join(datadir, 'test.txt')) self.n_train = len(self.train_edge_dict[('user', 'ui', 'item')]) self.n_test = len(self.test_edge_dict[('user', 'ui', 'item')]) self.G = dgl.heterograph(self.train_edge_dict) self.n_items = self.G.number_of_nodes('item') self.n_users = self.G.number_of_nodes('user') self.users = self.G.nodes('user').detach().cpu().numpy().tolist() self.batch_size = batch_size
def get_mol_complete_graph(self, idx, e_start, e_end, pairwise_start, n_atoms): if self.prefetch_graphs: g = self.mol_complete_graphs[idx] else: edge_indices = self.edge_indices[:, e_start:e_end] pairwise_indices = self.dist_dict[ 'pairwise_indices'][:, pairwise_start:pairwise_start + n_atoms * (n_atoms - 1)] g = dgl.heterograph({ ('atom', 'bond', 'atom'): (edge_indices[0], edge_indices[1]), ('atom', 'complete', 'atom'): (pairwise_indices[0], pairwise_indices[1]) }) return g
def create_random_hetero(): num_nodes = {'n1': 10000, 'n2': 10010, 'n3': 10020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_nodes('n1')), 1) g.edges['r1'].data['feat'] = F.unsqueeze(F.arange(0, g.number_of_edges('r1')), 1) return g
def _create_heterogeneous(): edges = {} for utype, etype, vtype in [('A', 'AA', 'A'), ('A', 'AB', 'B')]: s = torch.randint(0, 200, (1000,), device=F.ctx()) d = torch.randint(0, 200, (1000,), device=F.ctx()) edges[utype, etype, vtype] = (s, d) edges[vtype, 'rev-' + etype, utype] = (d, s) g = dgl.heterograph(edges, num_nodes_dict={'A': 200, 'B': 200}) reverse_etypes = {'AA': 'rev-AA', 'AB': 'rev-AB', 'rev-AA': 'AA', 'rev-AB': 'AB'} always_exclude = { 'AA': torch.randint(0, 1000, (50,), device=F.ctx()), 'AB': torch.randint(0, 1000, (50,), device=F.ctx())} seed_edges = { 'AA': torch.arange(0, 1000, device=F.ctx()), 'AB': torch.arange(0, 1000, device=F.ctx())} return g, reverse_etypes, always_exclude, seed_edges
def create_test_heterograph2(): plays_spmat = ssp.coo_matrix(([1, 1, 1, 1], ([0, 1, 2, 1], [0, 0, 1, 1]))) wishes_nx = nx.DiGraph() wishes_nx.add_nodes_from(['u0', 'u1', 'u2'], bipartite=0) wishes_nx.add_nodes_from(['g0', 'g1'], bipartite=1) wishes_nx.add_edge('u0', 'g1', id=0) wishes_nx.add_edge('u2', 'g0', id=1) develops_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game') g = dgl.heterograph({ ('user', 'follows', 'user'): [(0, 1), (1, 2)], ('user', 'plays', 'game'): plays_spmat, ('user', 'wishes', 'game'): wishes_nx, ('developer', 'develops', 'game'): develops_g, }) return g
def heterograph0(): g = dgl.heterograph({ ('user', 'plays', 'game'): ([0, 1, 1, 2], [0, 0, 1, 1]), ('developer', 'develops', 'game'): ([0, 1], [0, 1]) }) g.nodes['user'].data['h'] = F.copy_to( F.randn((g.number_of_nodes('user'), 3)), F.cpu()) g.nodes['game'].data['h'] = F.copy_to( F.randn((g.number_of_nodes('game'), 2)), F.cpu()) g.nodes['developer'].data['h'] = F.copy_to( F.randn((g.number_of_nodes('developer'), 3)), F.cpu()) g.edges['plays'].data['h'] = F.copy_to( F.randn((g.number_of_edges('plays'), 1)), F.cpu()) g.edges['develops'].data['h'] = F.copy_to( F.randn((g.number_of_edges('develops'), 5)), F.cpu()) return g
def create_test_heterograph_large(idtype): src = np.random.randint(0, 50, 2500) dst = np.random.randint(0, 50, 2500) g = dgl.heterograph( { ('user', 'follows', 'user'): (src, dst), ('user', 'plays', 'game'): (src, dst), ('user', 'wishes', 'game'): (src, dst), ('developer', 'develops', 'game'): (src, dst), }, idtype=idtype, device=F.ctx()) assert g.idtype == idtype assert g.device == F.ctx() return g
def construct_negative_graph(self,): neg_srcdst = self.negative_sampler(self.hg, self.train_eid_dict) if not isinstance(neg_srcdst, Mapping): assert len(self.hg.etypes) == 1, \ 'graph has multiple or no edge types; '\ 'please return a dict in negative sampler.' neg_srcdst = {self.hg.canonical_etypes[0]: neg_srcdst} # Get dtype from a tuple of tensors #dtype = F.dtype(list(neg_srcdst.values())[0][0]) neg_edges = { etype: neg_srcdst.get(etype, (th.tensor([]), th.tensor([]))) for etype in self.hg.canonical_etypes} neg_pair_graph = dgl.heterograph( neg_edges, {ntype: self.hg.number_of_nodes(ntype) for ntype in self.hg.ntypes}) return neg_pair_graph
def create_random_hetero(): num_nodes = {'n1': 1010, 'n2': 1000, 'n3': 1020} etypes = [('n1', 'r1', 'n2'), ('n1', 'r2', 'n3'), ('n2', 'r3', 'n3')] edges = {} for etype in etypes: src_ntype, _, dst_ntype = etype arr = spsp.random(num_nodes[src_ntype], num_nodes[dst_ntype], density=0.001, format='coo', random_state=100) edges[etype] = (arr.row, arr.col) g = dgl.heterograph(edges, num_nodes) g.nodes['n1'].data['feat'] = F.ones((g.number_of_nodes('n1'), 10), F.float32, F.cpu()) return g
def retHeterographProdCat(df): dictIDsMASTER = {} #Umiesczanie wszytskich ID w słowniku for x in range(0, 6): IDs = retIDsOneColumnATaTime(df[x], 0) dictIDsMASTER.update(IDs) dictIDsMASTER.update(retIDsOneColumnATaTime(df[6], 2)) # Also IDs #Problme polega na tym, że gdy parsuje podwójnia dane z csv to jedna kolumna sie wysrywa w momencie gdy w nbastepnej nie ma wartosci #Zrobic oddzielnie parsowanie samych kol i podkol i potem osttamnia kolumna -> item mappedRels = [] for x in range(0, 7): mappedRels.append(retMappedRelationCategories(df[x], dictIDsMASTER)) #Funkcja mapujące relacje prod_catx -> productID dict2 = {} prodCatsDone = [] for x, y in df[6].iterrows(): catName = y[0][:12] if catName in prodCatsDone: #Juz prodcat ktorystam caly zrobiony wiec mozna pominac i szukac nastepnego continue relation = (catName, 'has_product', 'prod_id') ListOfSameCats = [] for x2, y2 in df[6].iterrows(): if y2[0][:12] == catName: IDofCat = dictIDsMASTER[y2[0]] IDofProd = dictIDsMASTER[y2[2]] ListOfSameCats.append((IDofCat, IDofProd)) d = {relation: ListOfSameCats} dict2.update(d) prodCatsDone.append(catName) dataDict = { ('<prod_cat_1>', 'has_category12', '<prod_cat_2>'): mappedRels[0], ('<prod_cat_2>', 'has_category23', '<prod_cat_3>'): mappedRels[1], ('<prod_cat_3>', 'has_category34', '<prod_cat_4>'): mappedRels[2], ('<prod_cat_4>', 'has_category45', '<prod_cat_5>'): mappedRels[3], ('<prod_cat_5>', 'has_category56', '<prod_cat_6>'): mappedRels[4], ('<prod_cat_6>', 'has_category67', '<prod_cat_7>'): mappedRels[5] } dataDict.update(dict2) g = dgl.heterograph(dataDict) return g
def read_csv_heterograph_dgl(raw_dir, add_inverse_edge = False, additional_node_files = [], additional_edge_files = []): graph_list = read_csv_heterograph_raw(raw_dir, add_inverse_edge, additional_node_files = additional_node_files, additional_edge_files = additional_edge_files) dgl_graph_list = [] print('Converting graphs into DGL objects...') for graph in tqdm(graph_list): g_dict = {} # add edge connectivity for triplet, edge_index in graph["edge_index_dict"].items(): edge_tuple = [(i, j) for i, j in zip(graph["edge_index_dict"][triplet][0], graph["edge_index_dict"][triplet][1])] g_dict[triplet] = edge_tuple dgl_hetero_graph = dgl.heterograph(g_dict) if graph["edge_feat_dict"] is not None: for triplet in graph["edge_feat_dict"].keys(): dgl_hetero_graph.edges[triplet].data["feat"] = torch.from_numpy(graph["edge_feat_dict"][triplet]) if graph["node_feat_dict"] is not None: for nodetype in graph["node_feat_dict"].keys(): dgl_hetero_graph.nodes[nodetype].data["feat"] = torch.from_numpy(graph["node_feat_dict"][nodetype]) for key in additional_node_files: if 'node_' not in key: feat_name = 'node_' + key else: feat_name = key for nodetype in graph[feat_name].keys(): dgl_hetero_graph.nodes[nodetype].data[feat_name] = torch.from_numpy(graph[feat_name][nodetype]) for key in additional_edge_files: if 'edge_' not in key: feat_name = 'edge_' + key else: feat_name = key for triplet in graph[feat_name].keys(): dgl_hetero_graph.edges[triplet].data[feat_name] = torch.from_numpy(graph[feat_name][triplet]) dgl_graph_list.append(dgl_hetero_graph) return dgl_graph_list
def test_node_dataloader(sampler_name, pin_graph): g1 = dgl.graph(([0, 0, 0, 1, 1], [1, 2, 3, 3, 4])) if F.ctx() != F.cpu() and pin_graph: g1.create_formats_() g1.pin_memory_() g1.ndata['feat'] = F.copy_to(F.randn((5, 8)), F.cpu()) g1.ndata['label'] = F.copy_to(F.randn((g1.num_nodes(),)), F.cpu()) for num_workers in [0, 1, 2]: sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([3, 3]), 'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3])}[sampler_name] dataloader = dgl.dataloading.NodeDataLoader( g1, g1.nodes(), sampler, device=F.ctx(), batch_size=g1.num_nodes(), num_workers=num_workers) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) g2 = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0]), ('user', 'followed-by', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2]), ('user', 'play', 'game'): ([0, 1, 1, 3, 5], [0, 1, 2, 0, 2]), ('game', 'played-by', 'user'): ([0, 1, 2, 0, 2], [0, 1, 1, 3, 5]) }) for ntype in g2.ntypes: g2.nodes[ntype].data['feat'] = F.copy_to(F.randn((g2.num_nodes(ntype), 8)), F.cpu()) batch_size = max(g2.num_nodes(nty) for nty in g2.ntypes) sampler = { 'full': dgl.dataloading.MultiLayerFullNeighborSampler(2), 'neighbor': dgl.dataloading.MultiLayerNeighborSampler([{etype: 3 for etype in g2.etypes}] * 2), 'neighbor2': dgl.dataloading.MultiLayerNeighborSampler([3, 3])}[sampler_name] dataloader = dgl.dataloading.NodeDataLoader( g2, {nty: g2.nodes(nty) for nty in g2.ntypes}, sampler, device=F.ctx(), batch_size=batch_size) assert isinstance(iter(dataloader), Iterator) for input_nodes, output_nodes, blocks in dataloader: _check_device(input_nodes) _check_device(output_nodes) _check_device(blocks) if g1.is_pinned(): g1.unpin_memory_()
def load_ogb(dataset): if dataset == 'ogbn-mag': dataset = DglNodePropPredDataset(name=dataset) split_idx = dataset.get_idx_split() train_idx = split_idx["train"]['paper'] val_idx = split_idx["valid"]['paper'] test_idx = split_idx["test"]['paper'] hg_orig, labels = dataset[0] subgs = {} for etype in hg_orig.canonical_etypes: u, v = hg_orig.all_edges(etype=etype) subgs[etype] = (u, v) subgs[(etype[2], 'rev-' + etype[1], etype[0])] = (v, u) hg = dgl.heterograph(subgs) hg.nodes['paper'].data['feat'] = hg_orig.nodes['paper'].data['feat'] paper_labels = labels['paper'].squeeze() num_rels = len(hg.canonical_etypes) num_of_ntype = len(hg.ntypes) num_classes = dataset.num_classes category = 'paper' print('Number of relations: {}'.format(num_rels)) print('Number of class: {}'.format(num_classes)) print('Number of train: {}'.format(len(train_idx))) print('Number of valid: {}'.format(len(val_idx))) print('Number of test: {}'.format(len(test_idx))) # get target category id category_id = len(hg.ntypes) for i, ntype in enumerate(hg.ntypes): if ntype == category: category_id = i train_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool) train_mask[train_idx] = True val_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool) val_mask[val_idx] = True test_mask = th.zeros((hg.number_of_nodes('paper'), ), dtype=th.bool) test_mask[test_idx] = True hg.nodes['paper'].data['train_mask'] = train_mask hg.nodes['paper'].data['val_mask'] = val_mask hg.nodes['paper'].data['test_mask'] = test_mask hg.nodes['paper'].data['labels'] = paper_labels return hg else: raise ("Do not support other ogbn datasets.")
def test_remove_edges(): def check(g1, etype, g, edges_removed): src, dst, eid = g.edges(etype=etype, form='all') src1, dst1 = g1.edges(etype=etype, order='eid') if etype is not None: eid1 = g1.edges[etype].data[dgl.EID] else: eid1 = g1.edata[dgl.EID] src1 = F.asnumpy(src1) dst1 = F.asnumpy(dst1) eid1 = F.asnumpy(eid1) src = F.asnumpy(src) dst = F.asnumpy(dst) eid = F.asnumpy(eid) sde_set = set(zip(src, dst, eid)) for s, d, e in zip(src1, dst1, eid1): assert (s, d, e) in sde_set assert not np.isin(edges_removed, eid1).any() for fmt in ['coo', 'csr', 'csc']: for edges_to_remove in [[2], [2, 2], [3, 2], [1, 3, 1, 2]]: g = dgl.graph([(0, 1), (2, 3), (1, 2), (3, 4)], restrict_format=fmt) g1 = dgl.remove_edges(g, F.tensor(edges_to_remove)) check(g1, None, g, edges_to_remove) g = dgl.graph(spsp.csr_matrix( ([1, 1, 1, 1], ([0, 2, 1, 3], [1, 3, 2, 4])), shape=(5, 5)), restrict_format=fmt) g1 = dgl.remove_edges(g, F.tensor(edges_to_remove)) check(g1, None, g, edges_to_remove) g = dgl.heterograph({ ('A', 'AA', 'A'): [(0, 1), (2, 3), (1, 2), (3, 4)], ('A', 'AB', 'B'): [(0, 1), (1, 3), (3, 5), (1, 6)], ('B', 'BA', 'A'): [(2, 3), (3, 2)] }) g2 = dgl.remove_edges(g, { 'AA': F.tensor([2]), 'AB': F.tensor([3]), 'BA': F.tensor([1]) }) check(g2, 'AA', g, [2]) check(g2, 'AB', g, [3]) check(g2, 'BA', g, [1])
def process(self): authors, papers, confs, paper_author, paper_conf = self._read_raw_data() pa_p, pa_a = paper_author['pid'].to_list(), paper_author['aid'].to_list() pc_p, pc_c = paper_conf['pid'].to_list(), paper_conf['cid'].to_list() self.g = dgl.heterograph({ ('paper', 'pa', 'author'): (pa_p, pa_a), ('author', 'ap', 'paper'): (pa_a, pa_p), ('paper', 'pc', 'conf'): (pc_p, pc_c), ('conf', 'cp', 'paper'): (pc_c, pc_p) }) self.g.nodes['author'].data['label'] = torch.from_numpy(authors['label'].to_numpy()) self.g.nodes['conf'].data['label'] = torch.from_numpy(confs['label'].to_numpy()) self.author_names = authors['name'].tolist() self.paper_titles = papers['title'].tolist() self.conf_names = confs['name'].tolist()
def add_reverse_edges(g): """给异构图的每种边添加反向边,返回新的异构图 :param g: DGLGraph 异构图 :return: DGLGraph 添加反向边之后的异构图 """ data = {} for stype, etype, dtype in g.canonical_etypes: u, v = g.edges(etype=(stype, etype, dtype)) data[(stype, etype, dtype)] = u, v data[(dtype, etype + '_rev', stype)] = v, u new_g = dgl.heterograph(data, {ntype: g.num_nodes(ntype) for ntype in g.ntypes}) node_frames = extract_node_subframes(g, None) set_new_frames(new_g, node_frames=node_frames) return new_g
def _generate_dec_graph(self, rating_pairs): ones = np.ones_like( rating_pairs[0] ) # add one indicating edge for every (user, item) pair user_item_ratings_coo = sp.coo_matrix( # create coo matrix for graph conversion (ones, rating_pairs), shape=(self.num_user, self.num_item), dtype=np.float32) g = dgl.bipartite_from_scipy(user_item_ratings_coo, utype='_U', etype='_E', vtype='_V') return dgl.heterograph({('user', 'rate', 'item'): g.edges()}, num_nodes_dict={ 'user': self.num_user, 'item': self.num_item })
def get_hetero_graph(glass_tc_pos_path: str, control_tc_pos_path: str, t2t_threshold: float = 850.0, u2t_threshold: float = np.inf, weight: float = (1.0, 1.0, 1.0)): dist_func = lambda u, v: np.sqrt(((u - v)**2 * weight).sum()) POS_COLS = ['Position_x', 'Position_y', 'Position_z'] df_glass_tc = pd.read_csv(glass_tc_pos_path) g_tc_pos = df_glass_tc[POS_COLS].to_numpy() df_control_tc = pd.read_csv(control_tc_pos_path) c_tc_pos = df_control_tc[POS_COLS].to_numpy() tc_pos = np.concatenate([g_tc_pos, c_tc_pos], axis=0) graph_data = dict() # construct 'tc' to 'tc' edges t2t_dist_mat = cdist(tc_pos, tc_pos, dist_func) u, v = torch.nonzero(torch.tensor(t2t_dist_mat <= t2t_threshold).bool(), as_tuple=True) graph_data[t2t] = (u, v) # construct 'control' to 'tc' edges c2t_dist_mat = cdist(c_tc_pos, tc_pos, dist_func) u, v = torch.nonzero(torch.tensor(c2t_dist_mat <= u2t_threshold).bool(), as_tuple=True) graph_data[u2t] = (u, v) g = dgl.heterograph(graph_data) # standardize positions scaler = MinMaxScaler() pos = np.concatenate([tc_pos, c_tc_pos], axis=0) pos_std = scaler.fit_transform(pos) g.nodes['tc'].data['position'] = torch.from_numpy( pos_std[:tc_pos.shape[0], :]).float() g.nodes['control'].data['position'] = torch.from_numpy( pos_std[tc_pos.shape[0]:, :]).float() # add binary indicator for noticing the node is glass tc or not. is_glass_tc = torch.ones(tc_pos.shape[0], 1) is_glass_tc[:g_tc_pos.shape[0], :] = 0 g.nodes['tc'].data['is-glass-tc'] = is_glass_tc return g
def build_graph(relations_list, relations_data_list): relations_data_dic = {} i = 0 for each in relations_list: relations_data_dic[each] = relations_data_list[i] i += 1 graph = dgl.heterograph(relations_data_dic) print('Node types:', graph.ntypes) print('Edge types:', graph.etypes) print('Canonical edge types:', graph.canonical_etypes) for each in graph.canonical_etypes: print('graph number edges--' + str(each) + ':', graph.number_of_edges(each)) for each in graph.ntypes: print('graph number nodes--' + str(each) + ':', graph.number_of_nodes(each)) return graph
def create_test_heterograph_2(idtype): src = np.random.randint(0, 50, 25) dst = np.random.randint(0, 50, 25) src1 = np.random.randint(0, 25, 10) dst1 = np.random.randint(0, 25, 10) src2 = np.random.randint(0, 100, 1000) dst2 = np.random.randint(0, 100, 1000) g = dgl.heterograph({ ('user', 'becomes', 'player'): (src, dst), ('user', 'follows', 'user'): (src, dst), ('user', 'plays', 'game'): (src, dst), ('user', 'wishes', 'game'): (src1, dst1), ('developer', 'develops', 'game'): (src2, dst2), }, idtype=idtype, device=F.ctx()) assert g.idtype == idtype assert g.device == F.ctx() return g
def giveGraphs(self, batch_size, voxel_pos): p2v = np.load("data/p2v_spec.npy", allow_pickle=True).tolist() p2v = [item for sublist in p2v for item in sublist] p2p = np.load("data/p2p.npy", allow_pickle=True).tolist() p2p = [item for sublist in p2p for item in sublist] v2v = np.load("data/v2v.npy", allow_pickle=True).tolist() v2v = [item for sublist in v2v for item in sublist] v2v_6 = np.load("data/v2v_6.npy", allow_pickle=True).tolist() v2v_6 = [item for sublist in v2v_6 for item in sublist] G_vox = dgl.graph(v2v) G_vox = dgl.add_self_loop(G_vox) graph_data = {('PMT', 'p2v', 'vox'): p2v, ('vox', 'v2v', 'vox'): v2v} g = dgl.heterograph(graph_data) g = dgl.to_homogeneous(g) g = dgl.add_self_loop(g) G = dgl.batch([g for i in range(batch_size)]) return G, G_vox
def _build_graph(self): ma, md = set(), set() for m, row in self.data.iterrows(): d = self.directors.index(row['director_name']) md.add((m, d)) for c in ('actor_1_name', 'actor_2_name', 'actor_3_name'): if row[c] in self.actors: a = self.actors.index(row[c]) ma.add((m, a)) ma, md = list(ma), list(md) ma_m, ma_a = [e[0] for e in ma], [e[1] for e in ma] md_m, md_d = [e[0] for e in md], [e[1] for e in md] return dgl.heterograph({ ('movie', 'ma', 'actor'): (ma_m, ma_a), ('actor', 'am', 'movie'): (ma_a, ma_m), ('movie', 'md', 'director'): (md_m, md_d), ('director', 'dm', 'movie'): (md_d, md_m) })
def create_test_heterograph(idtype): # test heterograph from the docstring, plus a user -- wishes -- game relation # 3 users, 2 games, 2 developers # metagraph: # ('user', 'follows', 'user'), # ('user', 'plays', 'game'), # ('user', 'wishes', 'game'), # ('developer', 'develops', 'game')]) g = dgl.heterograph({ ('user', 'follows', 'user'): ([0, 1, 2, 1], [0, 0, 1, 1]), ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]), ('user', 'wishes', 'game'): ([0, 1, 1], [0, 0, 1]), ('developer', 'develops', 'game'): ([0, 1, 0], [0, 1, 1]), }, idtype=idtype, device=F.ctx()) assert g.idtype == idtype assert g.device == F.ctx() return g
def test_sage_conv_bi2(idtype, aggre_type): # Test the case for graphs without edges g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3}) g = g.astype(idtype).to(F.ctx()) ctx = F.ctx() sage = nn.SAGEConv((3, 3), 2, 'gcn') feat = (F.randn((5, 3)), F.randn((3, 3))) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3 for aggre_type in ['mean', 'pool']: sage = nn.SAGEConv((3, 1), 2, aggre_type) feat = (F.randn((5, 3)), F.randn((3, 1))) sage.initialize(ctx=ctx) h = sage(g, feat) assert h.shape[-1] == 2 assert h.shape[0] == 3
def test_sort_with_tag_bipartite(idtype): num_nodes, num_adj, num_tags = 200, [20, 50], 5 g = create_test_heterograph(num_nodes, num_adj, idtype=idtype) g = dgl.heterograph({('_U', '_E', '_V'): g.edges()}) utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_U'))) vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes('_V'))) new_g = dgl.sort_csr_by_tag(g, vtag) old_csr = g.adjacency_matrix(scipy_fmt='csr') new_csr = new_g.adjacency_matrix(scipy_fmt='csr') assert (check_sort(new_csr, vtag, new_g.nodes['_U'].data['_TAG_OFFSET'])) assert (not check_sort(old_csr, vtag)) new_g = dgl.sort_csc_by_tag(g, utag) old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr') new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr') assert (check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET'])) assert (not check_sort(old_csc, utag))
def construct_graph(edges, nodes, target_node_type): print("Getting relation graphs from the following edge lists : {} ".format(edges)) edgelists, id_to_node = {}, {} for i, edge in enumerate(edges): edgelist, rev_edgelist, id_to_node, src, dst = parse_edgelist(edge, id_to_node, header=True) if src == target_node_type: src = 'target' if dst == target_node_type: dst = 'target' if src == 'target' and dst == 'target': print("Will add self loop for target later......") else: if (src, src + '<>' + dst, dst) in edgelists: edgelists[(src, src + '<>' + dst, dst)] = edgelists[(src, src + '<>' + dst, dst)] + edgelist edgelists[(dst, dst + '<>' + src, src)] = edgelists[(dst, dst + '<>' + src, src)] +rev_edgelist print("Append edges for {} from edgelist: {}".format(src + '<>' + dst, edge)) else: edgelists[(src, src + '<>' + dst, dst)] = edgelist edgelists[(dst, dst + '<>' + src, src)] = rev_edgelist print("Read edges for {} from edgelist: {}".format(src + '<>' + dst, edge)) # get features for target nodes features, new_nodes = get_features(id_to_node[target_node_type], nodes) print("Read in features for target nodes") # add self relation edgelists[('target', 'self_relation', 'target')] = [(t, t) for t in id_to_node[target_node_type].values()] g = dgl.heterograph(edgelists) print( "Constructed heterograph with the following metagraph structure: Node types {}, Edge types{}".format( g.ntypes, g.canonical_etypes)) print("Number of nodes of type target : {}".format(g.number_of_nodes('target'))) g.nodes['target'].data['features'] = th.from_numpy(features) target_id_to_node = id_to_node[target_node_type] id_to_node['target'] = target_id_to_node del id_to_node[target_node_type] return g, features, target_id_to_node, id_to_node
def __init__(self, all_click_train, all_click_test, click_qtime, user_mapped_id, item_mapped_id, batch_size=128): self.mapped_train_df = self.map_id_df(all_click_train, user_mapped_id, item_mapped_id) self.mapped_test_df = self.map_id_df(all_click_test, user_mapped_id, item_mapped_id) self.mapped_qtime_df = pd.merge(left=click_qtime, right=user_mapped_id, left_on='user_id', right_on='org_id') self.mapped_qtime_df = self.mapped_qtime_df.drop(['org_id'], axis=1) self.mapped_qtime_df = self.mapped_qtime_df.rename( columns={'remap_id': 'mapped_user_id'}) self.qtime_users = self.mapped_qtime_df['mapped_user_id'].tolist() self.train_items = self.get_users_items_dict(self.mapped_train_df) self.test_items = self.get_users_items_dict(self.mapped_test_df) self.train_edge_dict = self.get_edge_list(self.mapped_train_df) self.test_edge_dict = self.get_edge_list(self.mapped_test_df) self.n_train = len(self.train_edge_dict[('user', 'ui', 'item')]) self.n_test = len(self.test_edge_dict[('user', 'ui', 'item')]) self.mapped_userId_to_org_userId = dict( zip(user_mapped_id.remap_id, user_mapped_id.org_id)) self.mapped_itemId_to_org_itemId = dict( zip(item_mapped_id.remap_id, item_mapped_id.org_id)) self.G = dgl.heterograph(self.train_edge_dict) self.n_items = self.G.number_of_nodes('item') self.n_users = self.G.number_of_nodes('user') self.users = self.G.nodes('user').detach().cpu().numpy().tolist() self.items = self.G.nodes('item').detach().cpu().numpy().tolist() self.batch_size = batch_size