def test_to_simple(index_dtype): g = dgl.heterograph( { ('user', 'follow', 'user'): [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)], ('user', 'plays', 'game'): [(3, 5), (2, 3), (1, 4), (1, 4), (3, 5), (2, 3), (2, 3)] }, index_dtype=index_dtype) sg = dgl.to_simple(g, return_counts='weights', writeback_mapping='new_eid') for etype in g.canonical_etypes: u, v = g.all_edges(form='uv', order='eid', etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(g.edges[etype].data['new_eid']) su, sv = sg.all_edges(form='uv', order='eid', etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data['weights']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e)
def to_bidirected_with_reverse_mapping(g): """Makes a graph bidirectional, and returns a mapping array ``mapping`` where ``mapping[i]`` is the reverse edge of edge ID ``i``. Does not work with graphs that have self-loops. """ g_simple, mapping = dgl.to_simple(dgl.add_reverse_edges(g), return_counts='count', writeback_mapping=True) c = g_simple.edata['count'] num_edges = g.num_edges() mapping_offset = torch.zeros(g_simple.num_edges() + 1, dtype=g_simple.idtype) mapping_offset[1:] = c.cumsum(0) idx = mapping.argsort() idx_uniq = idx[mapping_offset[:-1]] reverse_idx = torch.where(idx_uniq >= num_edges, idx_uniq - num_edges, idx_uniq + num_edges) reverse_mapping = mapping[reverse_idx] # Correctness check src1, dst1 = g_simple.edges() src2, dst2 = g_simple.find_edges(reverse_mapping) assert torch.equal(src1, dst2) assert torch.equal(src2, dst1) return g_simple, reverse_mapping
def edge2graph(self, edge_batch): u = edge_batch[:, :-1].reshape(-1) v = edge_batch[:, 1:].reshape(-1) if self.symmetric: tmp = u u = th.cat((u, v), dim=0) v = th.cat((v,tmp), dim=0) g = dgl.graph((u, v)) sg = dgl.to_simple(g, return_counts='w') return sg
def coalesce_graph(graph, aggr_type='sum', copy_data=False): """ Coalesce multi-edge graph Args: graph(DGLGraph): graph aggr_type(str): type of aggregator for multi edge weights copy_data(bool): if copy ndata and edata in new graph Returns: graph(DGLGraph): graph """ src, dst = graph.edges() graph_df = pd.DataFrame({'src': src, 'dst': dst}) graph_df['edge_weight'] = graph.edata['edge_weight'].numpy() if aggr_type == 'sum': tmp = graph_df.groupby(['src', 'dst'])['edge_weight'].sum().reset_index() elif aggr_type == 'mean': tmp = graph_df.groupby(['src', 'dst'])['edge_weight'].mean().reset_index() else: raise ValueError("aggr type error") if copy_data: graph = dgl.to_simple(graph, copy_ndata=True, copy_edata=True) else: graph = dgl.to_simple(graph) src, dst = graph.edges() graph_df = pd.DataFrame({'src': src, 'dst': dst}) graph_df = pd.merge(graph_df, tmp, how='left', on=['src', 'dst']) graph.edata['edge_weight'] = torch.from_numpy(graph_df['edge_weight'].values).unsqueeze(1) graph.edata.pop('count') return graph
def __init__(self, g, split_edge, hop=1, neg_samples=1, subsample_ratio=1, prefix=None, save_dir=None, num_workers=32, shuffle=True, use_coalesce=True, print_fn=print): self.g = g self.hop = hop self.subsample_ratio = subsample_ratio self.prefix = prefix self.save_dir = save_dir self.print_fn = print_fn self.generator = PosNegEdgesGenerator(g=self.g, split_edge=split_edge, neg_samples=neg_samples, subsample_ratio=subsample_ratio, shuffle=shuffle) if use_coalesce: for k, v in g.edata.items(): g.edata[k] = v.float( ) # dgl.to_simple() requires data is float self.g = dgl.to_simple(g, copy_ndata=True, copy_edata=True, aggregator='sum') self.ndata = {k: v for k, v in self.g.ndata.items()} self.edata = {k: v for k, v in self.g.edata.items()} self.g.ndata.clear() self.g.edata.clear() self.print_fn("Save ndata and edata in class.") self.print_fn("Clear ndata and edata in graph.") self.sampler = SEALSampler(graph=self.g, hop=hop, num_workers=num_workers, print_fn=print_fn)
def build_hetgnn_graph(self, length, walks, restart_prob): #edges = [[[[],[]]] * len(self.num_nodes)] * len(self.num_nodes) edges = [[[[], []], [[], []], [[], []]], [[[], []], [[], []], [[], []]], [[[], []], [[], []], [[], []]]] for i in range(self.g.number_of_nodes()): nodes = th.tensor([i]).repeat(walks) traces, types = dgl.sampling.random_walk(self.g, nodes, length=length, restart_prob=restart_prob) concat_vids, _, _, _ = dgl.sampling.pack_traces(traces, types) concat_types = th.index_select(self.NTYPE, 0, concat_vids) uid = concat_vids[0] utype = concat_types[0] for (vid, vtype) in zip(concat_vids, concat_types): edges[int(utype)][int(vtype)][0].append(self.NID[uid]) edges[int(utype)][int(vtype)][1].append(self.NID[vid]) edge_dict = {} k = {} num_ntypes = self.NTYPE.max() + 1 for i in range(num_ntypes): for j in range(num_ntypes): edge = (self.hg.ntypes[j], self.hg.ntypes[j] + '-' + self.hg.ntypes[i], self.hg.ntypes[i]) edge_dict[edge] = (th.tensor(edges[i][j][1]), th.tensor(edges[i][j][0])) if j == 2: k[edge] = 3 else: k[edge] = 10 neighbor_graph = dgl.heterograph(edge_dict, self.num_nodes) neighbor_graph = dgl.to_simple(neighbor_graph, return_counts=self.weight_column) counts = neighbor_graph.edata[self.weight_column] neighbor_graph = select_topk(neighbor_graph, k, self.weight_column) return neighbor_graph
def add_reverse_hetero(g, combine_like=True): r""" Parameters ---------- g : DGLGraph The heterogenous graph where reverse edges should be added combine_like : bool, optional Whether reverse-edges that have identical source/destination node types should be combined with the existing edge-type, rather than creating a new edge type. Default: True. """ relations = {} num_nodes_dict = {ntype: g.num_nodes(ntype) for ntype in g.ntypes} for metapath in g.canonical_etypes: src_ntype, rel_type, dst_ntype = metapath src, dst = g.all_edges(etype=rel_type) if src_ntype == dst_ntype and combine_like: # Make edges un-directed instead of making a reverse edge type relations[metapath] = (th.cat([src, dst], dim=0), th.cat([dst, src], dim=0)) else: # Original edges relations[metapath] = (src, dst) reverse_metapath = (dst_ntype, 'rev-' + rel_type, src_ntype) relations[reverse_metapath] = (dst, src) # Reverse edges new_g = dgl.heterograph(relations, num_nodes_dict=num_nodes_dict) # Remove duplicate edges new_g = dgl.to_simple(new_g, return_counts=None, writeback_mapping=False, copy_ndata=True) # copy_ndata: for ntype in g.ntypes: for k, v in g.nodes[ntype].data.items(): new_g.nodes[ntype].data[k] = v.detach().clone() return new_g
def test_to_simple(index_dtype): # homogeneous graph g = dgl.graph((F.tensor([0, 1, 2, 1]), F.tensor([1, 2, 0, 2]))) g.ndata['h'] = F.tensor([[0.], [1.], [2.]]) g.edata['h'] = F.tensor([[3.], [4.], [5.], [6.]]) sg, wb = dgl.to_simple(g, writeback_mapping=True) u, v = g.all_edges(form='uv', order='eid') u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb) su, sv = sg.all_edges(form='uv', order='eid') su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sc = F.asnumpy(sg.edata['count']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sc[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.ndata['h'], g.ndata['h']) assert 'h' not in sg.edata # new ndata to sg sg.ndata['hh'] = F.tensor([[0.], [1.], [2.]]) assert 'hh' not in g.ndata sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) assert 'h' not in sg.ndata assert 'h' not in sg.edata # heterogeneous graph g = dgl.heterograph({ ('user', 'follow', 'user'): ([0, 1, 2, 1, 1, 1], [1, 3, 2, 3, 4, 4]), ('user', 'plays', 'game'): ([3, 2, 1, 1, 3, 2, 2], [5, 3, 4, 4, 5, 3, 3])}, index_dtype=index_dtype) g.nodes['user'].data['h'] = F.tensor([0, 1, 2, 3, 4]) g.nodes['user'].data['hh'] = F.tensor([0, 1, 2, 3, 4]) g.edges['follow'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) sg, wb = dgl.to_simple(g, return_counts='weights', writeback_mapping=True, copy_edata=True) g.nodes['game'].data['h'] = F.tensor([0, 1, 2, 3, 4, 5]) for etype in g.canonical_etypes: u, v = g.all_edges(form='uv', order='eid', etype=etype) u = F.asnumpy(u).tolist() v = F.asnumpy(v).tolist() uv = list(zip(u, v)) eid_map = F.asnumpy(wb[etype]) su, sv = sg.all_edges(form='uv', order='eid', etype=etype) su = F.asnumpy(su).tolist() sv = F.asnumpy(sv).tolist() suv = list(zip(su, sv)) sw = F.asnumpy(sg.edges[etype].data['weights']) assert set(uv) == set(suv) for i, e in enumerate(suv): assert sw[i] == sum(e == _e for _e in uv) for i, e in enumerate(uv): assert eid_map[i] == suv.index(e) # shared ndata assert F.array_equal(sg.nodes['user'].data['h'], g.nodes['user'].data['h']) assert F.array_equal(sg.nodes['user'].data['hh'], g.nodes['user'].data['hh']) assert 'h' not in sg.nodes['game'].data # new ndata to sg sg.nodes['user'].data['hhh'] = F.tensor([0, 1, 2, 3, 4]) assert 'hhh' not in g.nodes['user'].data # share edata feat_idx = F.asnumpy(wb[('user', 'follow', 'user')]) _, indices = np.unique(feat_idx, return_index=True) assert np.array_equal(F.asnumpy(sg.edges['follow'].data['h']), F.asnumpy(g.edges['follow'].data['h'])[indices]) sg = dgl.to_simple(g, writeback_mapping=False, copy_ndata=False) for ntype in g.ntypes: assert g.number_of_nodes(ntype) == sg.number_of_nodes(ntype) assert 'h' not in sg.nodes['user'].data assert 'hh' not in sg.nodes['user'].data
label_min_index = source_data.y.min() label_max_index = source_data.y.max() node_label_num = label_max_index-label_min_index+1 ## data processing # to avoid missing nodes, we should add all the self loop in the edge index and then build up the graph self_loop = torch.arange(source_data.x.shape[0]) self_loop = self_loop.unsqueeze(1).repeat(1,2) src_edge_index_sl = torch.cat([source_data.edge_index.T,self_loop]).T #[2,N] self_loop = torch.arange(target_data.x.shape[0]) self_loop = self_loop.unsqueeze(1).repeat(1,2) tgt_edge_index_sl = torch.cat([target_data.edge_index.T,self_loop]).T #[2,N] del self_loop ## generate train graph source_graph = dgl.to_simple(dgl.graph((src_edge_index_sl[0],src_edge_index_sl[1]))) target_graph = dgl.to_simple(dgl.graph((tgt_edge_index_sl[0],tgt_edge_index_sl[1]))) ## make edge index to be bidirected source_graph = dgl.to_bidirected(source_graph) target_graph = dgl.to_bidirected(target_graph) src_edge_index_sl = torch.vstack([source_graph.edges()[0],source_graph.edges()[1]]) tgt_edge_index_sl = torch.vstack([target_graph.edges()[0],target_graph.edges()[1]]) ##generate all node pair label source_node_num = source_data.x.shape[0] target_node_num = target_data.x.shape[0] source_node_feat = source_data.x target_node_feat = target_data.x source_node_label = source_data.y target_node_label = target_data.y del source_data,target_data src_all_node_pair,src_all_node_pair_label,max_np_label =generate_all_node_pair(source_node_num,src_edge_index_sl,source_node_label,
def preprocess_data(dataset, train_ratio): if dataset in ['cora', 'citeseer', 'pubmed']: edge = np.loadtxt('../low_freq/{}.edge'.format(dataset), dtype=int).tolist() feat = np.loadtxt('../low_freq/{}.feature'.format(dataset)) labels = np.loadtxt('../low_freq/{}.label'.format(dataset), dtype=int) train = np.loadtxt('../low_freq/{}.train'.format(dataset), dtype=int) val = np.loadtxt('../low_freq/{}.val'.format(dataset), dtype=int) test = np.loadtxt('../low_freq/{}.test'.format(dataset), dtype=int) nclass = len(set(labels.tolist())) print(dataset, nclass) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) feat = normalize_features(feat) feat = torch.FloatTensor(feat) labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) return g, nclass, feat, labels, train, val, test elif 'syn' in dataset: edge = np.loadtxt('../syn/{}.edge'.format(dataset), dtype=int).tolist() labels = np.loadtxt('../syn/{}.lab'.format(dataset), dtype=int) features = np.loadtxt('../syn/{}.feat'.format(dataset), dtype=float) n = labels.shape[0] idx = [i for i in range(n)] random.shuffle(idx) idx_train = np.array(idx[:100]) idx_test = np.array(idx[100:]) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) c1 = 0 c2 = 0 lab = labels.tolist() for e in edge: if lab[e[0]] == lab[e[1]]: c1 += 1 else: c2 += 1 print(c1 / len(edge), c2 / len(edge)) #normalization will make features degenerated #features = normalize_features(features) features = torch.FloatTensor(features) nclass = 2 labels = torch.LongTensor(labels) train = torch.LongTensor(idx_train) test = torch.LongTensor(idx_test) print(dataset, nclass) return g, nclass, features, labels, train, train, test elif dataset in ['film']: graph_adjacency_list_file_path = '../high_freq/{}/out1_graph_edges.txt'.format( dataset) graph_node_features_and_labels_file_path = '../high_freq/{}/out1_node_feature_label.txt'.format( dataset) G = nx.DiGraph() graph_node_features_dict = {} graph_labels_dict = {} if dataset == 'film': with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) feature_blank = np.zeros(932, dtype=np.uint16) feature_blank[np.array(line[1].split(','), dtype=np.uint16)] = 1 graph_node_features_dict[int(line[0])] = feature_blank graph_labels_dict[int(line[0])] = int(line[2]) else: with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) graph_node_features_dict[int(line[0])] = np.array( line[1].split(','), dtype=np.uint8) graph_labels_dict[int(line[0])] = int(line[2]) with open(graph_adjacency_list_file_path) as graph_adjacency_list_file: graph_adjacency_list_file.readline() for line in graph_adjacency_list_file: line = line.rstrip().split('\t') assert (len(line) == 2) if int(line[0]) not in G: G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])], label=graph_labels_dict[int(line[0])]) if int(line[1]) not in G: G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])], label=graph_labels_dict[int(line[1])]) G.add_edge(int(line[0]), int(line[1])) adj = nx.adjacency_matrix(G, sorted(G.nodes())) row, col = np.where(adj.todense() > 0) U = row.tolist() V = col.tolist() g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) features = np.array([ features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0]) ], dtype=float) labels = np.array([ label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0]) ], dtype=int) n = labels.shape[0] idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) idx_train = np.array(idx[:r0]) idx_val = np.array(idx[r1:r2]) idx_test = np.array(idx[r2:]) features = normalize_features(features) features = torch.FloatTensor(features) nclass = 5 labels = torch.LongTensor(labels) train = torch.LongTensor(idx_train) val = torch.LongTensor(idx_val) test = torch.LongTensor(idx_test) print(dataset, nclass) return g, nclass, features, labels, train, val, test # datasets in Geom-GCN elif dataset in ['cornell', 'texas', 'wisconsin', 'chameleon', 'squirrel']: graph_adjacency_list_file_path = '../high_freq/{}/out1_graph_edges.txt'.format( dataset) graph_node_features_and_labels_file_path = '../high_freq/{}/out1_node_feature_label.txt'.format( dataset) G = nx.DiGraph() graph_node_features_dict = {} graph_labels_dict = {} with open(graph_node_features_and_labels_file_path ) as graph_node_features_and_labels_file: graph_node_features_and_labels_file.readline() for line in graph_node_features_and_labels_file: line = line.rstrip().split('\t') assert (len(line) == 3) assert (int(line[0]) not in graph_node_features_dict and int(line[0]) not in graph_labels_dict) graph_node_features_dict[int(line[0])] = np.array( line[1].split(','), dtype=np.uint8) graph_labels_dict[int(line[0])] = int(line[2]) with open(graph_adjacency_list_file_path) as graph_adjacency_list_file: graph_adjacency_list_file.readline() for line in graph_adjacency_list_file: line = line.rstrip().split('\t') assert (len(line) == 2) if int(line[0]) not in G: G.add_node(int(line[0]), features=graph_node_features_dict[int(line[0])], label=graph_labels_dict[int(line[0])]) if int(line[1]) not in G: G.add_node(int(line[1]), features=graph_node_features_dict[int(line[1])], label=graph_labels_dict[int(line[1])]) G.add_edge(int(line[0]), int(line[1])) adj = nx.adjacency_matrix(G, sorted(G.nodes())) features = np.array([ features for _, features in sorted(G.nodes(data='features'), key=lambda x: x[0]) ]) labels = np.array([ label for _, label in sorted(G.nodes(data='label'), key=lambda x: x[0]) ]) features = normalize_features(features) g = DGLGraph(adj) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) n = len(labels.tolist()) idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) train = np.array(idx[:r0]) val = np.array(idx[r1:r2]) test = np.array(idx[r2:]) nclass = len(set(labels.tolist())) features = torch.FloatTensor(features) labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) print(dataset, nclass) return g, nclass, features, labels, train, val, test # datasets in FAGCN elif dataset in ['new_chameleon', 'new_squirrel']: edge = np.loadtxt('../high_freq/{}/edges.txt'.format(dataset), dtype=int) labels = np.loadtxt('../high_freq/{}/labels.txt'.format(dataset), dtype=int).tolist() features = np.loadtxt('../high_freq/{}/features.txt'.format(dataset), dtype=float) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.to_bidirected(g) g = dgl.remove_self_loop(g) n = len(labels) idx = [i for i in range(n)] #random.shuffle(idx) r0 = int(n * train_ratio) r1 = int(n * 0.6) r2 = int(n * 0.8) train = np.array(idx[:r0]) val = np.array(idx[r1:r2]) test = np.array(idx[r2:]) features = normalize_features(features) features = torch.FloatTensor(features) nclass = 3 labels = torch.LongTensor(labels) train = torch.LongTensor(train) val = torch.LongTensor(val) test = torch.LongTensor(test) print(dataset, nclass) return g, nclass, features, labels, train, val, test
def preprocess_data(dataset, train_percentage): import dgl # Modified from AAAI21 FA-GCN if dataset in ['cora', 'citeseer', 'pubmed']: load_default_split = train_percentage <= 0 edge = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.edge', dtype=int).tolist() features = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.feature') labels = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.label', dtype=int) if load_default_split: train = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.train', dtype=int) val = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.val', dtype=int) test = np.loadtxt(f'{DATA_PATH}/{dataset}/{dataset}.test', dtype=int) else: train, val, test = stratified_train_test_split( np.arange(len(labels)), labels, len(labels), train_percentage) nclass = len(set(labels.tolist())) print(dataset, nclass) U = [e[0] for e in edge] V = [e[1] for e in edge] g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) features = normalize_features(features) features = th.FloatTensor(features) labels = th.LongTensor(labels) train = th.LongTensor(train) val = th.LongTensor(val) test = th.LongTensor(test) elif dataset in ['airport', 'blogcatalog', 'flickr']: load_default_split = train_percentage <= 0 adj_orig = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_adj.pkl', 'rb')) # sparse features = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_features.pkl', 'rb')) # sparase labels = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_labels.pkl', 'rb')) # tensor if th.is_tensor(labels): labels = labels.numpy() if load_default_split: tvt_nids = pickle.load( open(f'{DATA_PATH}/{dataset}/{dataset}_tvt_nids.pkl', 'rb')) # 3 array train = tvt_nids[0] val = tvt_nids[1] test = tvt_nids[2] else: train, val, test = stratified_train_test_split( np.arange(len(labels)), labels, len(labels), train_percentage) nclass = len(set(labels.tolist())) print(dataset, nclass) adj_orig = adj_orig.tocoo() U = adj_orig.row.tolist() V = adj_orig.col.tolist() g = dgl.graph((U, V)) g = dgl.to_simple(g) g = dgl.remove_self_loop(g) g = dgl.to_bidirected(g) if dataset in ['airport']: features = normalize_features(features) if sp.issparse(features): features = torch.FloatTensor(features.toarray()) else: features = th.FloatTensor(features) labels = th.LongTensor(labels) train = th.LongTensor(train) val = th.LongTensor(val) test = th.LongTensor(test) elif dataset in ['arxiv']: dataset = DglNodePropPredDataset(name='ogbn-arxiv', root='data/ogb_arxiv') split_idx = dataset.get_idx_split() train, val, test = split_idx["train"], split_idx["valid"], split_idx[ "test"] g, labels = dataset[0] features = g.ndata['feat'] nclass = 40 labels = labels.squeeze() g = dgl.to_bidirected(g) g = dgl.to_bidirected(g) if dataset in ['citeseer']: g = dgl.add_self_loop(g) return g, features, features.shape[1], nclass, labels, train, val, test