Esempio n. 1
0
    def _loadDataset(self, dataset: DataSet,
                     device: torch.device) -> torch_geometric.data.Data:
        """
            a loader function for the requested dataset
        """
        dataset_path = osp.join(getGitPath(), 'datasets')
        if dataset is DataSet.PUBMED or dataset is DataSet.CORA or dataset is DataSet.CITESEER:
            dataset = Planetoid(dataset_path, dataset.string())
        elif dataset is DataSet.TWITTER:
            twitter_glove_path = osp.join(dataset_path, 'twitter', 'glove.pkl')
            if not osp.exists(twitter_glove_path):
                exit(
                    "Go to README and follow the download instructions to the TWITTER dataset"
                )
            else:
                dataset = TwitterDataset(osp.dirname(twitter_glove_path))
                with open(twitter_glove_path, 'rb') as file:
                    glove_matrix = pickle.load(file)
                self.glove_matrix = torch.tensor(
                    glove_matrix, dtype=torch.float32).to(device)

        data = dataset[0].to(self.device)
        setattr(data, 'num_classes', dataset.num_classes)

        self.num_features = data.num_features
        self.num_classes = dataset.num_classes
        return data
    def test_resample_disjoint(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        graphs = GraphDataset.pyg_to_graphs(pyg_dataset)
        graph = graphs[0]
        graph = Graph(node_label=graph.node_label,
                      node_feature=graph.node_feature,
                      edge_index=graph.edge_index,
                      edge_feature=graph.edge_feature,
                      directed=False)
        graphs = [graph]
        dataset = GraphDataset(graphs,
                               task="link_pred",
                               edge_train_mode="disjoint",
                               edge_message_ratio=0.8,
                               resample_disjoint=True,
                               resample_disjoint_period=1)
        dataset_train, _, _ = dataset.split(split_ratio=[0.5, 0.2, 0.3])
        graph_train_first = dataset_train[0]
        graph_train_second = dataset_train[0]

        self.assertEqual(graph_train_first.edge_label_index.shape[1],
                         graph_train_second.edge_label_index.shape[1])
        self.assertTrue(
            torch.equal(graph_train_first.edge_label,
                        graph_train_second.edge_label))
Esempio n. 3
0
def learning_methods_on_graphs():
    dataset = 'Cora'
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                        'data', dataset)
    dataset = Planetoid(root=path, name=dataset)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(dataset).to(device)
    data = dataset[0].to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.01,
                                 weight_decay=5e-4)

    model.train()
    for epoch in range(1000):
        optimizer.zero_grad()
        out = model(data)
        loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        print("{} {}".format(epoch, loss.item()))
        pass

    model.eval()
    _, pred = model(data).max(dim=1)
    correct = float(pred[data.test_mask].eq(
        data.y[data.test_mask]).sum().item())
    acc = correct / data.test_mask.sum().item()
    print('Accuracy: {:.4f}'.format(acc))
    pass
Esempio n. 4
0
def load_dataset(dataset):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)

    if dataset in ['cora', 'citeseer', 'pubmed']:
        dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
        num_features = dataset.num_features
        num_classes = dataset.num_classes
        data = dataset[0]
        data.adj = torch.zeros((data.x.size(0), data.x.size(0)))
        col, row = data.edge_index
        data.adj[col, row] = 1
        return data, num_features, num_classes
    elif dataset == 'reddit':
        dataset = Reddit(path)
    elif dataset == 'corafull':
        dataset = CoraFull(path)
    num_features = dataset.num_features
    num_classes = dataset.num_classes
    data = dataset[0]

    data.train_mask, data.val_mask, data.test_mask = generate_split(
        data, num_classes)
    data.adj = torch.zeros((data.x.size[0], data.x.size(0)))
    col, row = data.edge_index
    data.adj[col, row] = 1
    return data, num_features, num_classes
Esempio n. 5
0
def load_dataset(root: str, name: str, *args, **kwargs) -> Dataset:
    r"""Returns a variety of datasets according to :obj:`name`."""
    if 'karate' in name.lower():
        from torch_geometric.datasets import KarateClub
        return KarateClub(*args, **kwargs)
    if name.lower() in ['cora', 'citeseer', 'pubmed']:
        from torch_geometric.datasets import Planetoid
        path = osp.join(root, 'Planetoid', name)
        return Planetoid(path, name, *args, **kwargs)
    if name in ['BZR', 'ENZYMES', 'IMDB-BINARY', 'MUTAG']:
        from torch_geometric.datasets import TUDataset
        path = osp.join(root, 'TUDataset')
        return TUDataset(path, name, *args, **kwargs)
    if name in ['ego-facebook', 'soc-Slashdot0811', 'wiki-vote']:
        from torch_geometric.datasets import SNAPDataset
        path = osp.join(root, 'SNAPDataset')
        return SNAPDataset(path, name, *args, **kwargs)
    if name.lower() in ['bashapes']:
        from torch_geometric.datasets import BAShapes
        return BAShapes(*args, **kwargs)
    if name.lower() in ['dblp']:
        from torch_geometric.datasets import DBLP
        path = osp.join(root, 'DBLP')
        return DBLP(path, *args, **kwargs)
    if name in ['citationCiteseer', 'illc1850']:
        from torch_geometric.datasets import SuiteSparseMatrixCollection
        path = osp.join(root, 'SuiteSparseMatrixCollection')
        return SuiteSparseMatrixCollection(path, name=name, *args, **kwargs)
    if 'elliptic' in name.lower():
        from torch_geometric.datasets import EllipticBitcoinDataset
        path = osp.join(root, 'EllipticBitcoinDataset')
        return EllipticBitcoinDataset(path, *args, **kwargs)

    raise NotImplementedError
Esempio n. 6
0
def load_data(
    dataset="Cora",
    supervised=True,
):
    '''
    support semi-supervised and supervised
    :param dataset:
    :param supervised:
    :return:
    '''
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    if dataset in ["CS", "Physics"]:
        dataset = Coauthor(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Computers", "Photo"]:
        dataset = Amazon(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Cora", "Citeseer", "Pubmed"]:
        dataset = Planetoid(path, dataset, T.NormalizeFeatures())
    data = dataset[0]
    if supervised:

        data.train_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
        data.train_mask[:-1000] = 1
        data.val_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
        data.val_mask[-1000:-500] = 1
        data.test_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
        data.test_mask[-500:] = 1
    data.num_classes = data.y.max().item() + 1
    return dataset
Esempio n. 7
0
    def test_split(self):
        pyg_dataset = Planetoid("./cora", "Cora")
        dg = Graph.pyg_to_graph(pyg_dataset[0])

        dg_node = dg.split()
        dg_num_nodes = dg.num_nodes
        node_0 = int(0.8 * dg_num_nodes)
        node_1 = int(0.1 * dg_num_nodes)
        node_2 = dg_num_nodes - node_0 - node_1
        self.assertEqual(dg_node[0].node_label_index.shape[0], node_0)
        self.assertEqual(dg_node[1].node_label_index.shape[0], node_1)
        self.assertEqual(dg_node[2].node_label_index.shape[0], node_2)

        for split_ratio in [[0.1, 0.4, 0.5], [0.4, 0.3, 0.3], [0.7, 0.2, 0.1]]:
            dg_link_custom = (dg.split(task="link_pred",
                                       split_ratio=split_ratio))
            dg_num_edges = dg.num_edges
            edge_0 = 2 * int(split_ratio[0] * dg_num_edges)
            edge_1 = 2 * int(split_ratio[1] * dg_num_edges)
            edge_2 = 2 * (dg_num_edges - int(split_ratio[0] * dg_num_edges) -
                          int(split_ratio[1] * dg_num_edges))
            self.assertEqual(
                dg_link_custom[0].edge_label_index.shape[1],
                edge_0,
            )
            self.assertEqual(
                dg_link_custom[1].edge_label_index.shape[1],
                edge_1,
            )
            self.assertEqual(
                dg_link_custom[2].edge_label_index.shape[1],
                edge_2,
            )
Esempio n. 8
0
def get_dataset(name: str, use_lcc: bool = True) -> InMemoryDataset:
    path = os.path.join(DATA_PATH, name)
    if name in ['Cora', 'Citeseer', 'Pubmed']:
        dataset = Planetoid(path, name)
    elif name in ['Computers', 'Photo']:
        dataset = Amazon(path, name)
    elif name == 'CoauthorCS':
        dataset = Coauthor(path, 'CS')
    else:
        raise Exception('Unknown dataset.')

    if use_lcc:
        lcc = get_largest_connected_component(dataset)

        x_new = dataset.data.x[lcc]
        y_new = dataset.data.y[lcc]

        row, col = dataset.data.edge_index.numpy()
        edges = [[i, j] for i, j in zip(row, col) if i in lcc and j in lcc]
        edges = remap_edges(edges, get_node_mapper(lcc))

        data = Data(x=x_new,
                    edge_index=torch.LongTensor(edges),
                    y=y_new,
                    train_mask=torch.zeros(y_new.size()[0], dtype=torch.bool),
                    test_mask=torch.zeros(y_new.size()[0], dtype=torch.bool),
                    val_mask=torch.zeros(y_new.size()[0], dtype=torch.bool))
        dataset.data = data

    return dataset
Esempio n. 9
0
File: pyg.py Progetto: xs-li/cogdl
 def __init__(self):
     dataset = "PubMed"
     path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data",
                     dataset)
     if not osp.exists(path):
         Planetoid(path, dataset, T.TargetIndegree())
     super(PubMedDataset, self).__init__(path, dataset, T.TargetIndegree())
Esempio n. 10
0
def test_citeseer():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Citeseer')
    loader = DataLoader(dataset, batch_size=len(dataset))

    assert len(dataset) == 1
    assert dataset.__repr__() == 'Citeseer()'

    for data in loader:
        assert data.num_graphs == 1
        assert data.num_nodes == 3327
        assert data.num_edges / 2 == 4552

        assert len(data) == 7
        assert list(data.x.size()) == [data.num_nodes, 3703]
        assert list(data.y.size()) == [data.num_nodes]
        assert data.y.max() + 1 == 6
        assert data.train_mask.sum() == 6 * 20
        assert data.val_mask.sum() == 500
        assert data.test_mask.sum() == 1000
        assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0
        assert list(data.batch.size()) == [data.num_nodes]

        assert data.contains_isolated_nodes()
        assert not data.contains_self_loops()
        assert data.is_undirected()

    dataset = Planetoid(root, 'Citeseer', split='full')
    data = dataset[0]
    assert data.val_mask.sum() == 500
    assert data.test_mask.sum() == 1000
    assert data.train_mask.sum() == data.num_nodes - 1500
    assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0

    dataset = Planetoid(root,
                        'Citeseer',
                        split='random',
                        num_train_per_class=11,
                        num_val=29,
                        num_test=41)
    data = dataset[0]
    assert data.train_mask.sum() == dataset.num_classes * 11
    assert data.val_mask.sum() == 29
    assert data.test_mask.sum() == 41
    assert (data.train_mask & data.val_mask & data.test_mask).sum() == 0

    shutil.rmtree(root)
Esempio n. 11
0
 def prepare_data(self):
     path = osp.join(
         osp.dirname(osp.realpath(__file__)), "data", self.NAME
     )
     print(path)
     self.dataset = Planetoid(path, self.NAME, transform=self._transform)
     self.data = self.dataset[0]
     print(self.dataset)
Esempio n. 12
0
def load_dataset(name):
    name = name.lower()

    if name in ['cora', 'citeseer', 'pubmed']:
        return Planetoid(root=name, name=name, pre_transform=pre_transform)
    elif name == 'reddit':
        # TODO:
        raise NotImplementedError
def get_data2(folder="node_classify/cora", data_name="cora"):
    dataset = Planetoid(
        root=folder,
        name=data_name,
        # pre_transform=T.KNNGraph(k=6),
        # transform=T.NormalizeFeatures())#,
        transform=T.TargetIndegree())
    return dataset
Esempio n. 14
0
def get_dataset(name):
    if name in ['Cora', 'Citeseer', 'Pubmed']:
        dataset = Planetoid(path + name, name)
    elif name in ['Computers', 'Photo']:
        dataset = Amazon(path + name, name)
    else:
        raise Exception('Unknown dataset.')
    return dataset
def get_data(dataset_name, dataset_dir):
    full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'}
    dataset_name = full_names[dataset_name]
    dataset_path = path.join(dataset_dir, dataset_name)
    dataset = Planetoid(dataset_path,
                        dataset_name,
                        transform=T.NormalizeFeatures())
    return dataset
Esempio n. 16
0
def main():
    args = arg_parse()

    if args.dataset == 'cora':
        dataset = Planetoid(root='/tmp/Cora', name='Cora')
        train(dataset, 'node', args)
    else:
        raise RuntimeError('Unknown datasets')
Esempio n. 17
0
def load_pubmed():
    from torch_geometric.datasets import Planetoid
    dirpath = './data/pubmed'
    dataset = Planetoid(dirpath, 'Pubmed')
    data = dataset[0]
    G = nx.Graph()
    G.add_edges_from(data.edge_index.numpy().T.tolist())
    return G, nx.adjacency_matrix(G), data.y.numpy().tolist()
Esempio n. 18
0
def load_planetoid(dataset):
    data_name = ['Cora', 'CiteSeer', 'PubMed']
    assert dataset in data_name
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'Datasets',
                    'NodeData')
    transforms = T.Compose([T.AddSelfLoops()])
    dataset = Planetoid(path, dataset, transform=transforms)
    return dataset, dataset[0]
Esempio n. 19
0
 def __init__(self, name):
     path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                     name)
     self.dataset = Planetoid(path, "Cora", T.NormalizeFeatures())
     data = self.dataset[0]
     data.train_mask = data.val_mask = data.test_mask = data.y = None
     self.num_features = self.dataset.num_features
     self.reconstruction_loss = None
Esempio n. 20
0
def main():
    dataset = 'Cora'
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    dataset = Planetoid(path, dataset)
    data = dataset[0]

    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = Node2Vec(data.edge_index, embedding_dim=128, walk_length=20,
                     context_size=10, walks_per_node=10,
                     num_negative_samples=1, p=1, q=1, sparse=True).to(device)

    loader = model.loader(batch_size=128, shuffle=True, num_workers=4)
    optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=0.01)

    def train():
        model.train()
        total_loss = 0
        for pos_rw, neg_rw in loader:
            optimizer.zero_grad()
            loss = model.loss(pos_rw.to(device), neg_rw.to(device))
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        return total_loss / len(loader)

    @torch.no_grad()
    def test():
        model.eval()
        z = model()
        acc = model.test(z[data.train_mask], data.y[data.train_mask],
                         z[data.test_mask], data.y[data.test_mask],
                         max_iter=150)
        return acc

    for epoch in range(1, 101):
        loss = train()
        acc = test()
        print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Acc: {acc:.4f}')

    @torch.no_grad()
    def plot_points(colors):
        model.eval()
        z = model(torch.arange(data.num_nodes, device=device))
        z = TSNE(n_components=2).fit_transform(z.cpu().numpy())
        y = data.y.cpu().numpy()

        plt.figure(figsize=(8, 8))
        for i in range(dataset.num_classes):
            plt.scatter(z[y == i, 0], z[y == i, 1], s=20, color=colors[i])
        plt.axis('off')
        plt.show()

    colors = [
        '#ffc0cb', '#bada55', '#008080', '#420420', '#7fe5f0', '#065535',
        '#ffd700'
    ]
    plot_points(colors)
Esempio n. 21
0
def test_lightning_node_data(strategy, loader):
    import pytorch_lightning as pl

    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, name='Cora')
    data = dataset[0]
    data_repr = ('Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], '
                 'train_mask=[2708], val_mask=[2708], test_mask=[2708])')
    shutil.rmtree(root)

    model = LinearNodeModule(dataset.num_features, dataset.num_classes)

    if strategy is None or loader == 'full':
        gpus = 1
    else:
        gpus = torch.cuda.device_count()

    if strategy == 'ddp_spawn' and loader == 'full':
        data = data.cuda()  # This is necessary to test sharing of data.

    if strategy == 'ddp_spawn':
        strategy = pl.plugins.DDPSpawnPlugin(find_unused_parameters=False)

    batch_size = 1 if loader == 'full' else 32
    num_workers = 0 if loader == 'full' else 3
    kwargs, kwargs_repr = {}, ''
    if loader == 'neighbor':
        kwargs['num_neighbors'] = [5]
        kwargs_repr += 'num_neighbors=[5], '

    trainer = pl.Trainer(strategy=strategy,
                         gpus=gpus,
                         max_epochs=5,
                         log_every_n_steps=1)
    datamodule = LightningNodeData(data,
                                   loader=loader,
                                   batch_size=batch_size,
                                   num_workers=num_workers,
                                   **kwargs)
    old_x = data.x.clone().cpu()
    assert str(datamodule) == (f'LightningNodeData(data={data_repr}, '
                               f'loader={loader}, batch_size={batch_size}, '
                               f'num_workers={num_workers}, {kwargs_repr}'
                               f'pin_memory={loader != "full"}, '
                               f'persistent_workers={loader != "full"})')
    trainer.fit(model, datamodule)
    new_x = data.x.cpu()
    if loader == 'full':
        offset = 5 + 5 + 1  # `train_steps` + `val_steps` + `sanity`
    else:
        offset = 0
        offset += gpus * 2  # `sanity`
        offset += 5 * gpus * math.ceil(140 / (gpus * batch_size))  # `train`
        offset += 5 * gpus * math.ceil(500 / (gpus * batch_size))  # `val`
    assert torch.all(new_x > (old_x + offset - 4))  # Ensure shared data.
    assert trainer._data_connector._val_dataloader_source.is_defined()
    assert trainer._data_connector._test_dataloader_source.is_defined()
Esempio n. 22
0
def main():
    net_name = 'Pubmed'
    dataset = Planetoid(root='/tmp/' + net_name, name=net_name)
    data = dataset[0]
    edge_index = data.edge_index
    adj_list = edge_index.numpy().T
    G = nx.Graph()
    G.add_edges_from(adj_list)
    draw_degree_distribution(G)
Esempio n. 23
0
def exp(exp_name, seed, style, shared):
    torch.manual_seed(seed)
    dataset = 'Cora'
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                        'data', dataset)
    dataset = Planetoid(path, dataset, T.NormalizeFeatures())
    data = dataset[0]
    fold = 0
    accuracies = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    with open('{}.log'.format(exp_name), 'w') as flog:
        for tr_mask, vl_mask, ts_mask in gen_folds(data.x.shape[0], FOLDS,
                                                   FOLDS_SEED):
            fold += 1
            print("FOLD:", fold)
            flog.write("fold #{}\n".format(fold))

            data.train_mask = tr_mask
            data.val_mask = vl_mask
            data.test_mask = ts_mask

            print('Train: {}'.format(torch.sum(data.train_mask)))
            print('Validation: {}'.format(torch.sum(data.val_mask)))
            print('Test: {}'.format(torch.sum(data.test_mask)))

            data = data.to(device)
            #model = GINNet(dataset).to(device)
            model = GIN(dataset, 2, 64, seed).to(device)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=0.001,
                                         weight_decay=0.0001)
            best_acc = 0
            count = 0
            for epoch in range(1, EPOCH):
                train(model, data, optimizer)
                train_accs = validate(model, data)
                log = 'Epoch: {:03d}, Train: {:.4f}, Validation: {:.4f}'
                print(log.format(epoch, *train_accs))
                log += '\n'
                flog.write(log.format(epoch, *train_accs))
                if train_accs[1] > best_acc:
                    best_acc = train_accs[1]
                    torch.save(model.state_dict(), "{}.dth".format(exp_name))
                    print("Saving model at iteration {}".format(epoch))
                    count = 0
                else:
                    count += 1
                if count == 200:
                    break
            model.load_state_dict(torch.load("{}.dth".format(exp_name)))
            accs = test(model, data)
            print('Test Accuracy: {}'.format(accs[1]))
            flog.write('Test Accuracy: {}\n'.format(accs[1]))
            accuracies.append(accs[1])
        flog.write("----------\n")
        flog.write("Avg Test Accuracy: {}\tVariance: {}\n".format(
            np.mean(accuracies), np.var(accuracies)))
Esempio n. 24
0
def main():
    args = arg_parse()
    path = Path("../data")
    if args.dataset == 'cora':
        dataset = Planetoid(root=path / 'Cora',
                            name='Cora',
                            split='random',
                            num_train_per_class=77)
        task = 'node'
    elif args.dataset == 'citeseer':
        dataset = Planetoid(root=path / 'CiteSeer',
                            name='CiteSeer',
                            split='random',
                            num_train_per_class=111)
        task = 'node'
    # print(dataset.data)
    # print(dataset.num_classes)
    return train(dataset, task, args)
Esempio n. 25
0
def main():
    args = arg_parse()

    pyg_dataset = Planetoid('./cora', 'Cora', transform=T.TargetIndegree())
    
    # the input that we assume users have
    edge_train_mode = args.mode
    print('edge train mode: {}'.format(edge_train_mode))

    graphs = GraphDataset.pyg_to_graphs(pyg_dataset, tensor_backend=True)
    if args.multigraph:
        graphs = [copy.deepcopy(graphs[0]) for _ in range(10)]

    dataset = GraphDataset(graphs, 
                           task='link_pred', 
                           edge_message_ratio=args.edge_message_ratio, 
                           edge_train_mode=edge_train_mode)
    print('Initial dataset: {}'.format(dataset))

    # split dataset
    datasets = {}
    datasets['train'], datasets['val'], datasets['test']= dataset.split(
            transductive=not args.multigraph, split_ratio=[0.85, 0.05, 0.1])

    print('after split')
    print('Train message-passing graph: {} nodes; {} edges.'.format(
            datasets['train'][0].num_nodes,
            datasets['train'][0].num_edges))
    print('Val message-passing graph: {} nodes; {} edges.'.format(
            datasets['val'][0].num_nodes,
            datasets['val'][0].num_edges))
    print('Test message-passing graph: {} nodes; {} edges.'.format(
            datasets['test'][0].num_nodes,
            datasets['test'][0].num_edges))


    # node feature dimension
    input_dim = datasets['train'].num_node_features
    # link prediction needs 2 classes (0, 1)
    num_classes = datasets['train'].num_edge_labels

    model = Net(input_dim, num_classes, args).to(args.device)
    #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3)
    optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.epochs)
    follow_batch = [] # e.g., follow_batch = ['edge_index']

    dataloaders = {split: DataLoader(
            ds, collate_fn=Batch.collate(follow_batch), 
            batch_size=args.batch_size, shuffle=(split=='train'))
            for split, ds in datasets.items()}
    print('Graphs after split: ')
    for key, dataloader in dataloaders.items():
        for batch in dataloader:
            print(key, ': ', batch)

    train(model, dataloaders, optimizer, args, scheduler=scheduler)
def test_heterogeneous_neighbor_loader_on_cora(directed):
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Cora')
    data = dataset[0]
    data.edge_weight = torch.rand(data.num_edges)

    hetero_data = HeteroData()
    hetero_data['paper'].x = data.x
    hetero_data['paper'].n_id = torch.arange(data.num_nodes)
    hetero_data['paper', 'paper'].edge_index = data.edge_index
    hetero_data['paper', 'paper'].edge_weight = data.edge_weight

    split_idx = torch.arange(5, 8)

    loader = NeighborLoader(hetero_data,
                            num_neighbors=[-1, -1],
                            batch_size=split_idx.numel(),
                            input_nodes=('paper', split_idx),
                            directed=directed)
    assert len(loader) == 1

    hetero_batch = next(iter(loader))
    batch_size = hetero_batch['paper'].batch_size

    if not directed:
        n_id, _, _, e_mask = k_hop_subgraph(split_idx,
                                            num_hops=2,
                                            edge_index=data.edge_index,
                                            num_nodes=data.num_nodes)

        n_id = n_id.sort()[0]
        assert n_id.tolist() == hetero_batch['paper'].n_id.sort()[0].tolist()
        assert hetero_batch['paper', 'paper'].num_edges == int(e_mask.sum())

    class GNN(torch.nn.Module):
        def __init__(self, in_channels, hidden_channels, out_channels):
            super().__init__()
            self.conv1 = GraphConv(in_channels, hidden_channels)
            self.conv2 = GraphConv(hidden_channels, out_channels)

        def forward(self, x, edge_index, edge_weight):
            x = self.conv1(x, edge_index, edge_weight).relu()
            x = self.conv2(x, edge_index, edge_weight).relu()
            return x

    model = GNN(dataset.num_features, 16, dataset.num_classes)
    hetero_model = to_hetero(model, hetero_data.metadata())

    out1 = model(data.x, data.edge_index, data.edge_weight)[split_idx]
    out2 = hetero_model(hetero_batch.x_dict, hetero_batch.edge_index_dict,
                        hetero_batch.edge_weight_dict)['paper'][:batch_size]
    assert torch.allclose(out1, out2, atol=1e-6)

    try:
        shutil.rmtree(root)
    except PermissionError:
        pass
Esempio n. 27
0
 def __init__(self):
     dataset = "CiteSeer"
     path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data",
                     dataset)
     if not osp.exists(path):
         Planetoid(path, dataset, transform=T.TargetIndegree())
     super(CiteSeerDataset, self).__init__(path,
                                           dataset,
                                           transform=T.TargetIndegree())
Esempio n. 28
0
def test_cora():
    root = osp.join('/', 'tmp', str(random.randrange(sys.maxsize)))
    dataset = Planetoid(root, 'Cora')
    data = dataset[0]

    class Net(torch.nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = SAGEConv(dataset.num_features, 16)
            self.conv2 = SAGEConv(16, 16)
            self.conv3 = SAGEConv(16, dataset.num_classes)

        def forward_data_flow(self, x, data_flow):
            block = data_flow[0]
            x = F.relu(self.conv1(x, block.edge_index, size=block.size))
            block = data_flow[1]
            x = F.relu(self.conv2(x, block.edge_index, size=block.size))
            block = data_flow[2]
            x = self.conv3(x, block.edge_index, size=block.size)
            return x

        def forward(self, x, edge_index):
            x = F.relu(self.conv1(x, edge_index))
            x = F.relu(self.conv2(x, edge_index))
            return self.conv3(x, edge_index)

    model = Net()

    out_all = model(data.x, data.edge_index)

    loader = NeighborSampler(data,
                             size=1.0,
                             num_hops=3,
                             batch_size=64,
                             shuffle=False,
                             drop_last=False,
                             bipartite=True,
                             add_self_loops=True)

    for data_flow in loader(data.train_mask):
        out = model.forward_data_flow(data.x[data_flow[0].n_id], data_flow)
        assert torch.allclose(out_all[data_flow.n_id], out)

    loader = NeighborSampler(data,
                             size=1.0,
                             num_hops=3,
                             batch_size=64,
                             shuffle=False,
                             drop_last=False,
                             bipartite=False)

    for subdata in loader(data.train_mask):
        out = model(data.x[subdata.n_id], subdata.edge_index)[subdata.sub_b_id]
        assert torch.allclose(out_all[subdata.b_id], out)

    shutil.rmtree(root)
Esempio n. 29
0
def get_planetoid_dataset(name, normalize_features=False, transform=None, split="public"):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name)
    if split == 'complete':
        dataset = Planetoid(path, name)
        dataset[0].train_mask.fill_(False)
        dataset[0].train_mask[:dataset[0].num_nodes - 1000] = 1
        dataset[0].val_mask.fill_(False)
        dataset[0].val_mask[dataset[0].num_nodes - 1000:dataset[0].num_nodes - 500] = 1
        dataset[0].test_mask.fill_(False)
        dataset[0].test_mask[dataset[0].num_nodes - 500:] = 1
    else:
        dataset = Planetoid(path, name, split=split)
    if transform is not None and normalize_features:
        dataset.transform = T.Compose([T.NormalizeFeatures(), transform])
    elif normalize_features:
        dataset.transform = T.NormalizeFeatures()
    elif transform is not None:
        dataset.transform = transform
    return dataset
Esempio n. 30
0
def load_dataset(dataset_folder, dataset_name):
    """
    导入数据集,并处理为Data格式
    :param dataset_folder: 数据集存储路径
    :param dataset_name: 数据集的名字("Cora", "CiteSeer", "PubMed")
    :return: dataset
    """
    path = os.path.join(os.path.dirname(dataset_folder), dataset_name)
    dataset = Planetoid(path, dataset_name, T.NormalizeFeatures())
    return dataset