Beispiel #1
0
def load_data_default(dataset_name):
    if dataset_name in ['cora', 'citeseer', 'pubmed']:
        if dataset_name == 'cora':
            dataset = CoraGraphDataset()
        if dataset_name == 'citeseer':
            dataset = CiteseerGraphDataset()
        if dataset_name == 'pubmed':
            dataset = PubmedGraphDataset()
        graph = dataset[0]
        graph = graph.remove_self_loop().add_self_loop()
        print(graph)
        features = graph.ndata['feat']
        labels = graph.ndata['label']
        train_mask = graph.ndata['train_mask']
        val_mask = graph.ndata['val_mask']
        test_mask = graph.ndata['test_mask']
        num_feats = features.shape[1]
        num_classes = int(labels.max().item() + 1)
    else:
        dataset = DglNodePropPredDataset(name=dataset_name)
        splitted_mask = dataset.get_idx_split()
        train_mask, val_mask, test_mask = splitted_mask[
            'train'], splitted_mask['valid'], splitted_mask['test']
        graph, labels = dataset[0]
        features = graph.ndata["feat"]
        num_feats = features.shape[1]
        num_classes = (labels.max() + 1).item()
        # add reverse edges
        srcs, dsts = graph.all_edges()
        graph.add_edges(dsts, srcs)
        #add self-loop
        graph = graph.remove_self_loop().add_self_loop()

    return graph, features, labels, train_mask, val_mask, test_mask, num_feats, num_classes
Beispiel #2
0
def load_data(dataset, ogb_root):
    if dataset in ('cora', 'reddit'):
        data = CoraGraphDataset() if dataset == 'cora' else RedditDataset(self_loop=True)
        g = data[0]
        train_idx = g.ndata['train_mask'].nonzero(as_tuple=True)[0]
        val_idx = g.ndata['val_mask'].nonzero(as_tuple=True)[0]
        test_idx = g.ndata['test_mask'].nonzero(as_tuple=True)[0]
        return g, g.ndata['label'], data.num_classes, train_idx, val_idx, test_idx
    else:
        data = DglNodePropPredDataset('ogbn-products', ogb_root)
        g, labels = data[0]
        split_idx = data.get_idx_split()
        return g, labels.squeeze(dim=-1), data.num_classes, \
            split_idx['train'], split_idx['valid'], split_idx['test']