def load_data_default(dataset_name): if dataset_name in ['cora', 'citeseer', 'pubmed']: if dataset_name == 'cora': dataset = CoraGraphDataset() if dataset_name == 'citeseer': dataset = CiteseerGraphDataset() if dataset_name == 'pubmed': dataset = PubmedGraphDataset() graph = dataset[0] graph = graph.remove_self_loop().add_self_loop() print(graph) features = graph.ndata['feat'] labels = graph.ndata['label'] train_mask = graph.ndata['train_mask'] val_mask = graph.ndata['val_mask'] test_mask = graph.ndata['test_mask'] num_feats = features.shape[1] num_classes = int(labels.max().item() + 1) else: dataset = DglNodePropPredDataset(name=dataset_name) splitted_mask = dataset.get_idx_split() train_mask, val_mask, test_mask = splitted_mask[ 'train'], splitted_mask['valid'], splitted_mask['test'] graph, labels = dataset[0] features = graph.ndata["feat"] num_feats = features.shape[1] num_classes = (labels.max() + 1).item() # add reverse edges srcs, dsts = graph.all_edges() graph.add_edges(dsts, srcs) #add self-loop graph = graph.remove_self_loop().add_self_loop() return graph, features, labels, train_mask, val_mask, test_mask, num_feats, num_classes
def load_data(dataset, ogb_root): if dataset in ('cora', 'reddit'): data = CoraGraphDataset() if dataset == 'cora' else RedditDataset(self_loop=True) g = data[0] train_idx = g.ndata['train_mask'].nonzero(as_tuple=True)[0] val_idx = g.ndata['val_mask'].nonzero(as_tuple=True)[0] test_idx = g.ndata['test_mask'].nonzero(as_tuple=True)[0] return g, g.ndata['label'], data.num_classes, train_idx, val_idx, test_idx else: data = DglNodePropPredDataset('ogbn-products', ogb_root) g, labels = data[0] split_idx = data.get_idx_split() return g, labels.squeeze(dim=-1), data.num_classes, \ split_idx['train'], split_idx['valid'], split_idx['test']