예제 #1
0
def load_data(
    dataset="Cora",
    supervised=True,
):
    '''
    support semi-supervised and supervised
    :param dataset:
    :param supervised:
    :return:
    '''
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    if dataset in ["CS", "Physics"]:
        dataset = Coauthor(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Computers", "Photo"]:
        dataset = Amazon(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Cora", "Citeseer", "Pubmed"]:
        dataset = Planetoid(path, dataset, T.NormalizeFeatures())
    data = dataset[0]
    if supervised:

        data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.train_mask[:-1000] = 1
        data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.val_mask[-1000:-500] = 1
        data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
        data.test_mask[-500:] = 1
    data.num_classes = data.y.max().item() + 1
    return dataset
예제 #2
0
def load_data(dataset="Cora", supervised=False, full_data=True):
    '''
    support semi-supervised and supervised
    :param dataset:
    :param supervised:
    :return:
    '''
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)
    if dataset in ["CS", "Physics"]:
        dataset = Coauthor(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Computers", "Photo"]:
        dataset = Amazon(path, dataset, T.NormalizeFeatures())
    elif dataset in ["Cora", "Citeseer", "Pubmed"]:
        dataset = Planetoid(path, dataset, T.NormalizeFeatures())
    data = dataset[0]
    if supervised:
        if full_data:
            data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.train_mask[:-1000] = 1
            data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.val_mask[data.num_nodes - 1000:data.num_nodes - 500] = 1
            data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.test_mask[data.num_nodes - 500:] = 1
        else:
            data.train_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.train_mask[:1000] = 1
            data.val_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.val_mask[data.num_nodes - 1000:data.num_nodes - 500] = 1
            data.test_mask = torch.zeros(data.num_nodes, dtype=torch.bool)
            data.test_mask[data.num_nodes - 500:] = 1
    print('loaded data: ', '\n', data)
    return data
예제 #3
0
def load_data(dataset_name="Cora", seed=10, n_splits=5):
    # Path in which the data will be stored
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                    dataset_name)
    if dataset_name in ["CS", "Physics"]:
        dataset = Coauthor(path, dataset_name, T.NormalizeFeatures())
    elif dataset_name in ["Computers", "Photo"]:
        dataset = Amazon(path, dataset_name, T.NormalizeFeatures())
    elif dataset_name in ["Cora", "Citeseer", "Pubmed"]:
        dataset = Planetoid(path,
                            dataset_name,
                            split='public',
                            transform=T.NormalizeFeatures())
    elif dataset_name in ["Arxiv", "Papers", "Products"]:
        dataset = PygNodePropPredDataset(name=ogb_data_name_conv[dataset_name],
                                         root=path,
                                         transform=T.NormalizeFeatures())
    elif dataset_name == "MAG":
        dataset = PygNodePropPredDataset(name=ogb_data_name_conv[dataset_name],
                                         root=path)
    else:
        raise Exception("[!] Dataset not found: ", str(dataset_name))
    if dataset_name in obg_datasets:
        data = split_ogb_data(dataset, dataset_name)
    else:
        data = dataset[0]  # pyg graph object
        data = split_data(data, seed, n_splits)
        data.num_classes = dataset.num_classes
    return data
예제 #4
0
파일: dataset.py 프로젝트: CreaterLL/GCA
def get_dataset(path, name):
    assert name in ['Cora', 'CiteSeer', 'PubMed', 'DBLP', 'Karate', 'WikiCS', 'Coauthor-CS', 'Coauthor-Phy',
                    'Amazon-Computers', 'Amazon-Photo', 'ogbn-arxiv', 'ogbg-code']
    name = 'dblp' if name == 'DBLP' else name
    root_path = osp.expanduser('~/datasets')

    if name == 'Coauthor-CS':
        return Coauthor(root=path, name='cs', transform=T.NormalizeFeatures())

    if name == 'Coauthor-Phy':
        return Coauthor(root=path, name='physics', transform=T.NormalizeFeatures())

    if name == 'WikiCS':
        return WikiCS(root=path, transform=T.NormalizeFeatures())

    if name == 'Amazon-Computers':
        return Amazon(root=path, name='computers', transform=T.NormalizeFeatures())

    if name == 'Amazon-Photo':
        return Amazon(root=path, name='photo', transform=T.NormalizeFeatures())

    if name.startswith('ogbn'):
        return PygNodePropPredDataset(root=osp.join(root_path, 'OGB'), name=name, transform=T.NormalizeFeatures())

    return (CitationFull if name == 'dblp' else Planetoid)(osp.join(root_path, 'Citation'), name, transform=T.NormalizeFeatures())
예제 #5
0
def DataLoader(name):
    # assert name in ['cSBM_data_Aug_19_2020-13:06',
    #                 'cSBM_data_Aug_18_2020-18:50',
    #                 'cSBM_data_Aug_21_2020-10:06',
    #                 'cSBM_data_Aug_19_2020-20:41',
    #                 'cSBM_data_Aug_21_2020-11:04',
    #                 'cSBM_data_Aug_21_2020-11:21',
    #                 'cSBM_data_Sep_01_2020-14:15',
    #                 'cSBM_data_Sep_01_2020-14:18',
    #                 'cSBM_data_Sep_01_2020-14:19',
    #                 'cSBM_data_Sep_01_2020-14:32',
    #                 'cSBM_data_Sep_01_2020-14:22',
    #                 'cSBM_data_Sep_01_2020-14:23',
    #                 'cSBM_data_Sep_01_2020-14:27',
    #                 'cSBM_data_Sep_01_2020-14:29',
    #                 'Cora', 'Citeseer', 'PubMed',
    #                 'Computers', 'Photo',
    #                 'chameleon', 'film', 'squirrel',
    #                 'Texas', 'Cornell']

    # if name in ['cSBM_data_Aug_19_2020-13:06',
    #             'cSBM_data_Aug_18_2020-18:50',
    #             'cSBM_data_Aug_21_2020-10:06',
    #             'cSBM_data_Aug_19_2020-20:41',
    #             'cSBM_data_Aug_21_2020-11:04',
    #             'cSBM_data_Aug_21_2020-11:21',
    #             'cSBM_data_Sep_01_2020-14:15',
    #             'cSBM_data_Sep_01_2020-14:18',
    #             'cSBM_data_Sep_01_2020-14:19',
    #             'cSBM_data_Sep_01_2020-14:32',
    #             'cSBM_data_Sep_01_2020-14:22',
    #             'cSBM_data_Sep_01_2020-14:23',
    #             'cSBM_data_Sep_01_2020-14:27',
    #             'cSBM_data_Sep_01_2020-14:29']:
    if 'cSBM_data' in name:
        path = '../data/'
        dataset = dataset_ContextualSBM(path, name=name)
    else:
        name = name.lower()

    if name in ['cora', 'citeseer', 'pubmed']:
        root_path = '../'
        path = osp.join(root_path, 'data', name)
        dataset = Planetoid(path, name, transform=T.NormalizeFeatures())
    elif name in ['computers', 'photo']:
        root_path = '../'
        path = osp.join(root_path, 'data', name)
        dataset = Amazon(path, name, T.NormalizeFeatures())
    elif name in ['chameleon', 'film', 'squirrel']:
        dataset = dataset_heterophily(root='../data/',
                                      name=name,
                                      transform=T.NormalizeFeatures())
    elif name in ['texas', 'cornell']:
        dataset = WebKB(root='../data/',
                        name=name,
                        transform=T.NormalizeFeatures())
    else:
        raise ValueError(f'dataset {name} not supported in dataloader')

    return dataset
예제 #6
0
def load_data(task, seed, val_size, test_size):
    """Load dataset
    Task: Graph Classification - benchmark datasets. Consisting of various graphs we want to classify.

    Returns:
    - train_dataset: PyG dataset
    - val_dataset: PyG dataset
    - test_dataset: PyG dataset
    - test_idx: indices of the original dataset used for testing - to recover original data.
    """
    path = "../data/TUDataset"

    if task == 'mutag':
        dataset = 'Mutagenicity'
        dataset = TUDataset(path,
                            dataset,
                            transform=T.NormalizeFeatures(),
                            cleaned=True)
    elif task == 'enzymes':
        dataset = 'ENZYMES'
        dataset = TUDataset(path,
                            dataset,
                            transform=T.NormalizeFeatures(),
                            cleaned=True)
    elif task == 'proteins':
        dataset = 'PROTEINS'
        dataset = TUDataset(path,
                            dataset,
                            transform=T.NormalizeFeatures(),
                            cleaned=True)
    else:
        NameError(f"task {args.task} not allowed")

    print(f'Dataset: {dataset}:')
    print('====================')
    print(f'Number of graphs: {len(dataset)}')
    print(f'Number of features: {dataset.num_features}')
    print(f'Number of classes: {dataset.num_classes}')

    indices = [i for i in range(len(dataset))]
    train_idx, test_idx = split(indices,
                                random_state=seed,
                                test_size=test_size)
    train_dataset = dataset[train_idx]
    test_dataset = dataset[test_idx]

    indices = [i for i in range(len(train_dataset))]
    train_idx, val_idx = split(indices,
                               random_state=seed,
                               test_size=val_size / (1 - test_size))
    val_dataset = train_dataset[val_idx]
    train_dataset = train_dataset[train_idx]
    print(f'Number of training graphs: {len(train_dataset)}')
    print(f'Number of val graphs: {len(val_dataset)}')
    print(f'Number of test graphs: {len(test_dataset)}')

    return train_dataset, val_dataset, test_dataset, test_idx
예제 #7
0
파일: trainer.py 프로젝트: vermouthdky/GREF
def load_ppi_data():
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                        "data", "PPI")
    train_dataset = PPI(path, split="train", transform=T.NormalizeFeatures())
    val_dataset = PPI(path, split="val", transform=T.NormalizeFeatures())
    test_dataset = PPI(path, split="test", transform=T.NormalizeFeatures())
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False)
    val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
    return [train_loader, val_loader, test_loader]
예제 #8
0
def get_directed_dataset(name, directed, normalize_features=False, transform=None):
    path = osp.join(osp.dirname(osp.realpath(__file__)), "..", "directed", name)
    dataset = DirectedCitation(path, name, directed)

    if transform is not None and normalize_features:
        dataset.transform = T.Compose([T.NormalizeFeatures(), transform])
    elif normalize_features:
        dataset.transform = T.NormalizeFeatures()
    elif transform is not None:
        dataset.transform = transform

    return dataset
예제 #9
0
def get_planetoid_dataset(name, normalize_features=False, transform=None):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name)
    dataset = Planetoid(path, name)

    if transform is not None and normalize_features:
        dataset.transform = T.Compose([T.NormalizeFeatures(), transform])
    elif normalize_features:
        dataset.transform = T.NormalizeFeatures()
    elif transform is not None:
        dataset.transform = transform

    return dataset
예제 #10
0
 def __init__(self, name):
     path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                     name)
     self.path = path
     self.train_dataset = PPI(self.path,
                              split='train',
                              transform=T.NormalizeFeatures())
     self.test_dataset = PPI(self.path,
                             split='test',
                             transform=T.NormalizeFeatures())
     self.num_features = self.train_dataset.num_features
     self.reconstruction_loss = None
예제 #11
0
def get_dataset(dataset_name):
    """
    Retrieves the dataset corresponding to the given name.
    """
    print("Getting dataset...")
    path = join('dataset', dataset_name)
    if dataset_name == 'reddit':
        dataset = Reddit(path)
    elif dataset_name == 'ppi':
        dataset = PPI(path)
    elif dataset_name == 'github':
        dataset = GitHub(path)
        data = dataset.data
        idx_train, idx_test = train_test_split(list(range(data.x.shape[0])),
                                               test_size=0.4,
                                               random_state=42)
        idx_val, idx_test = train_test_split(idx_test,
                                             test_size=0.5,
                                             random_state=42)
        data.train_mask = torch.tensor(idx_train)
        data.val_mask = torch.tensor(idx_val)
        data.test_mask = torch.tensor(idx_test)
        dataset.data = data
    elif dataset_name in ['amazon_comp', 'amazon_photo']:
        dataset = Amazon(path, "Computers", T.NormalizeFeatures()
                         ) if dataset_name == 'amazon_comp' else Amazon(
                             path, "Photo", T.NormalizeFeatures())
        data = dataset.data
        idx_train, idx_test = train_test_split(list(range(data.x.shape[0])),
                                               test_size=0.4,
                                               random_state=42)
        idx_val, idx_test = train_test_split(idx_test,
                                             test_size=0.5,
                                             random_state=42)
        data.train_mask = torch.tensor(idx_train)
        data.val_mask = torch.tensor(idx_val)
        data.test_mask = torch.tensor(idx_test)
        dataset.data = data
    elif dataset_name in ["Cora", "CiteSeer", "PubMed"]:
        dataset = Planetoid(path,
                            name=dataset_name,
                            split="full",
                            transform=T.NormalizeFeatures())
    else:
        raise NotImplementedError

    print("Dataset ready!")
    return dataset
예제 #12
0
def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = Planetoid(root='data',
                        name='Cora',
                        transform=T.NormalizeFeatures())
    data = dataset[0]
    ground_truth_edge_index = data.edge_index.to(device)
    data.train_mask = data.val_mask = data.test_mask = data.y = None
    data = train_test_split_edges(data)
    data = data.to(device)

    model = Net(dataset.num_features, 64).to(device)
    optimizer = torch.optim.Adam(params=model.parameters(), lr=0.01)

    best_val_auc = test_auc = 0
    for epoch in range(1, 101):
        loss = train(data, model, optimizer)
        val_auc, tmp_test_auc = test(data, model)
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            test_auc = tmp_test_auc
        print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_auc:.4f}, '
              f'Test: {test_auc:.4f}')

    z = model.encode(data.x, data.train_pos_edge_index)
    final_edge_index = model.decode_all(z)
예제 #13
0
def load_facebook_dataset(dataset_name: FacebookDataset,
                          allow_features=True) -> Data:
    path = osp.join(DATASETS_DIR, dataset_name.value[:12])
    facebook_idx_map = {
        "0": 0,
        "107": 1,
        "1684": 2,
        "1912": 3,
        "3437": 4,
        "348": 5,
        "3980": 6,
        "414": 7,
        "686": 8,
        "698": 9,
    }
    data = SNAPDataset(path, dataset_name.value[:12], T.NormalizeFeatures())

    data = data[facebook_idx_map[dataset_name.value[12:]]]

    if not allow_features:
        data.x = torch.eye(data.x.size(0))

    data.num_communities = data.circle_batch.max() + 1
    communities = np.zeros((data.num_communities, data.x.size(0)))
    communities[data.circle_batch, data.circle] = 1
    data.communities = communities
    data.communities_cnl_format = matrix_to_cnl_format(communities,
                                                       data.num_communities)

    return data
예제 #14
0
    def __init__(self, data, data_path, task, **kwargs):
        path = data_path + data

        if data in ['CiteSeer', 'Cora', 'PubMed']:
            """ Citation Networks: CiteSeer, Cora, and PubMed
            Description:
                these datasets contain sparse bag-of-words feature vectors for each document and a list of citation 
                links between documents. The citation links are treated as undirected edges
            """
            assert task == "node_class", "%s is a dataset for node classification" % data
            self.dataset = Citations(path, data, T.NormalizeFeatures())
            self.data = self.dataset[0]
            # these citation networks are small enough fit into the GPUs, but they still have enough nodes for
            # training, validation, and test.

        elif data in [
                'COLLAB', 'IMDB-BINARY', 'IMDB-MULTI', 'REDDIT-BINARY',
                'REDDIT-MULTI5K', 'PROTEINS', 'MUTAG', 'PTC', 'NCI1'
        ]:
            """ Benchmark Data Sets for Graph Kernels
            Description:
                these datasets are the benchmarks for graph classification, the detailed information is available 
                at <https://ls11-www.cs.tu-dortmund.de/staff/morris/graphkerneldatasets>
            """
            assert task == "graph_class", "%s is a dataset for graph classification" % data
            self.dataset = graph_kernel_dataset(data, path)
            self.train_ids, self.test_ids, self.val_ids = k_fold(
                self.dataset, kwargs['fold'])
            self.batch_size = kwargs['batch_size']
            # these datasets are much smaller. For instance, MUTAG only has 188 different graphs. So unlike the above
            # node classification, here we need cross validation.
        else:
            raise NameError('unknown dataset')
예제 #15
0
def load_dataset(dataset):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)

    if dataset in ['cora', 'citeseer', 'pubmed']:
        dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
        num_features = dataset.num_features
        num_classes = dataset.num_classes
        data = dataset[0]
        data.adj = torch.zeros((data.x.size(0), data.x.size(0)))
        col, row = data.edge_index
        data.adj[col, row] = 1
        return data, num_features, num_classes
    elif dataset == 'reddit':
        dataset = Reddit(path)
    elif dataset == 'corafull':
        dataset = CoraFull(path)
    num_features = dataset.num_features
    num_classes = dataset.num_classes
    data = dataset[0]

    data.train_mask, data.val_mask, data.test_mask = generate_split(
        data, num_classes)
    data.adj = torch.zeros((data.x.size[0], data.x.size(0)))
    col, row = data.edge_index
    data.adj[col, row] = 1
    return data, num_features, num_classes
예제 #16
0
def get_amazon_dataset(name):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'datasets',
                    'node_datasets', name)
    dataset = Amazon(path, name, transform=T.NormalizeFeatures())

    num_per_class = 20
    train_index = []
    val_index = []
    test_index = []
    for i in range(dataset.num_classes):
        index = (dataset[0].y.long() == i).nonzero().view(-1)
        index = index[torch.randperm(index.size(0))]
        if len(index) > num_per_class + 30:
            train_index.append(index[:num_per_class])
            val_index.append(index[num_per_class:num_per_class + 30])
            test_index.append(index[num_per_class + 30:])
        else:
            continue
    train_index = torch.cat(train_index)
    val_index = torch.cat(val_index)
    test_index = torch.cat(test_index)

    train_mask = index_to_mask(train_index, size=dataset[0].num_nodes)
    val_mask = index_to_mask(val_index, size=dataset[0].num_nodes)
    test_mask = index_to_mask(test_index, size=dataset[0].num_nodes)

    dataset.train_mask = train_mask
    dataset.val_mask = val_mask
    dataset.test_mask = test_mask

    return dataset
예제 #17
0
def load_data(args):
    dataset = args.input
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', dataset)

    if dataset in ['cora', 'citeseer', 'pubmed']:
        dataset = Planetoid(path, dataset, transform=T.NormalizeFeatures())
        num_features = dataset.num_features
        num_classes = dataset.num_classes
        data = dataset[0]
        return data, num_features, num_classes
    elif dataset == 'corafull':
        dataset = CoraFull(path)
    elif dataset in ['cs', 'physics']:
        dataset = Coauthor(path, name=dataset)
    elif dataset in ['computers', 'photo']:
        dataset = Amazon(path, name=dataset)
    elif dataset == 'reddit':
        dataset = Reddit(path)
        num_features = dataset.num_features
        num_classes = dataset.num_classes
        data = dataset[0]
        return data, num_features, num_classes
    num_features = dataset.num_features
    num_classes = dataset.num_classes
    data = dataset[0]

    data.train_mask, data.val_mask, data.test_mask = generate_split(
        data, num_classes)

    return data, num_features, num_classes
예제 #18
0
파일: pyg.py 프로젝트: xs-li/cogdl
 def __init__(self):
     dataset = "Cora"
     path = osp.join(osp.dirname(osp.realpath(__file__)), "../..", "data",
                     dataset)
     if not osp.exists(path):
         Planetoid(path, dataset, T.NormalizeFeatures())
         Planetoid(path, dataset, T.TargetIndegree())
     super(CoraDataset, self).__init__(path, dataset, T.TargetIndegree())
예제 #19
0
 def __init__(self, name):
     path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                     name)
     self.dataset = Planetoid(path, "Cora", T.NormalizeFeatures())
     data = self.dataset[0]
     data.train_mask = data.val_mask = data.test_mask = data.y = None
     self.num_features = self.dataset.num_features
     self.reconstruction_loss = None
def get_data(dataset_name, dataset_dir):
    full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'}
    dataset_name = full_names[dataset_name]
    dataset_path = path.join(dataset_dir, dataset_name)
    dataset = Planetoid(dataset_path,
                        dataset_name,
                        transform=T.NormalizeFeatures())
    return dataset
예제 #21
0
def get_planetoid_dataset(name, normalize_features=False, transform=None):
    usrhome = os.path.expanduser('~')
    datadir = usrhome + '/.pyg'
    if not os.path.exists(datadir):
        print('Creating data dir:' + datadir)
        os.makedirs(datadir)
    path = osp.join(datadir, name)
    dataset = Planetoid(path, name)

    if transform is not None and normalize_features:
        dataset.transform = T.Compose([T.NormalizeFeatures(), transform])
    elif normalize_features:
        dataset.transform = T.NormalizeFeatures()
    elif transform is not None:
        dataset.transform = transform

    return dataset
예제 #22
0
def loaddatas(d_loader, d_name):
    if d_loader == 'Planetoid':
        dataset = getattr(torch_geometric.datasets,
                          d_loader)('../data/' + d_name, d_name,
                                    T.NormalizeFeatures())
    else:
        dataset = getattr(torch_geometric.datasets,
                          d_loader)('../data/' + d_name, d_name)
    return dataset
예제 #23
0
def exp(exp_name, seed, style, shared):
    torch.manual_seed(seed)
    dataset = 'Cora'
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..',
                        'data', dataset)
    dataset = Planetoid(path, dataset, T.NormalizeFeatures())
    data = dataset[0]
    fold = 0
    accuracies = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    with open('{}.log'.format(exp_name), 'w') as flog:
        for tr_mask, vl_mask, ts_mask in gen_folds(data.x.shape[0], FOLDS,
                                                   FOLDS_SEED):
            fold += 1
            print("FOLD:", fold)
            flog.write("fold #{}\n".format(fold))

            data.train_mask = tr_mask
            data.val_mask = vl_mask
            data.test_mask = ts_mask

            print('Train: {}'.format(torch.sum(data.train_mask)))
            print('Validation: {}'.format(torch.sum(data.val_mask)))
            print('Test: {}'.format(torch.sum(data.test_mask)))

            data = data.to(device)
            #model = GINNet(dataset).to(device)
            model = GIN(dataset, 2, 64, seed).to(device)
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=0.001,
                                         weight_decay=0.0001)
            best_acc = 0
            count = 0
            for epoch in range(1, EPOCH):
                train(model, data, optimizer)
                train_accs = validate(model, data)
                log = 'Epoch: {:03d}, Train: {:.4f}, Validation: {:.4f}'
                print(log.format(epoch, *train_accs))
                log += '\n'
                flog.write(log.format(epoch, *train_accs))
                if train_accs[1] > best_acc:
                    best_acc = train_accs[1]
                    torch.save(model.state_dict(), "{}.dth".format(exp_name))
                    print("Saving model at iteration {}".format(epoch))
                    count = 0
                else:
                    count += 1
                if count == 200:
                    break
            model.load_state_dict(torch.load("{}.dth".format(exp_name)))
            accs = test(model, data)
            print('Test Accuracy: {}'.format(accs[1]))
            flog.write('Test Accuracy: {}\n'.format(accs[1]))
            accuracies.append(accs[1])
        flog.write("----------\n")
        flog.write("Avg Test Accuracy: {}\tVariance: {}\n".format(
            np.mean(accuracies), np.var(accuracies)))
예제 #24
0
파일: trainer.py 프로젝트: vermouthdky/GREF
def load_data(dataset="Cora"):
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..",
                        "data", dataset)
    if dataset in ["Cora", "Citeseer", "Pubmed"]:
        data = Planetoid(path,
                         dataset,
                         split="public",
                         transform=T.NormalizeFeatures())[0]
        num_nodes = data.x.size(0)
        edge_index, _ = remove_self_loops(data.edge_index)
        edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
        if isinstance(edge_index, tuple):
            data.edge_index = edge_index[0]  # !!! 2*N 新版可能有改变
        else:
            data.edge_index = edge_index
        return data
    elif dataset in ["CoauthorCS"]:
        data = Coauthor(path, "cs", transform=T.NormalizeFeatures())[0]
        num_nodes = data.x.size(0)
        edge_index, _ = remove_self_loops(data.edge_index)
        edge_index = add_self_loops(edge_index, num_nodes=num_nodes)
        if isinstance(edge_index, tuple):
            data.edge_index = edge_index[0]
        else:
            data.edge_index = edge_index

        # devide training validation and testing set
        train_mask = torch.zeros((num_nodes, ), dtype=torch.bool)
        val_mask = torch.zeros((num_nodes, ), dtype=torch.bool)
        test_mask = torch.zeros((num_nodes, ), dtype=torch.bool)
        train_num = 40
        val_num = 150
        for i in range(15):  # number of labels
            index = (data.y == i).nonzero()[:, 0]
            perm = torch.randperm(index.size(0))
            train_mask[index[perm[:train_num]]] = 1
            val_mask[index[perm[train_num:(train_num + val_num)]]] = 1
            test_mask[index[perm[(train_num + val_num):]]] = 1
        data.train_mask = train_mask
        data.val_mask = val_mask
        data.test_mask = test_mask
        return data
    else:
        raise Exception(f"the dataset of {dataset} has not been implemented")
예제 #25
0
def get_planetoid_dataset(name, normalize_features=False, transform=None, split="public"):
    path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', name)
    if split == 'complete':
        dataset = Planetoid(path, name)
        dataset[0].train_mask.fill_(False)
        dataset[0].train_mask[:dataset[0].num_nodes - 1000] = 1
        dataset[0].val_mask.fill_(False)
        dataset[0].val_mask[dataset[0].num_nodes - 1000:dataset[0].num_nodes - 500] = 1
        dataset[0].test_mask.fill_(False)
        dataset[0].test_mask[dataset[0].num_nodes - 500:] = 1
    else:
        dataset = Planetoid(path, name, split=split)
    if transform is not None and normalize_features:
        dataset.transform = T.Compose([T.NormalizeFeatures(), transform])
    elif normalize_features:
        dataset.transform = T.NormalizeFeatures()
    elif transform is not None:
        dataset.transform = transform
    return dataset
예제 #26
0
def get_data(model_name, dataset_dir):
    dataset_path = path.join(dataset_dir, 'Cora')
    if model_name == 'spline':
        transform = T.TargetIndegree()
    elif model_name == 'dna':
        transform = None
    else:
        transform = T.NormalizeFeatures()
    dataset = Planetoid(dataset_path, 'Cora', transform=transform)
    return dataset
예제 #27
0
파일: utils.py 프로젝트: Sanchez2020/Demo
def load_dataset(dataset_folder, dataset_name):
    """
    导入数据集,并处理为Data格式
    :param dataset_folder: 数据集存储路径
    :param dataset_name: 数据集的名字("Cora", "CiteSeer", "PubMed")
    :return: dataset
    """
    path = os.path.join(os.path.dirname(dataset_folder), dataset_name)
    dataset = Planetoid(path, dataset_name, T.NormalizeFeatures())
    return dataset
예제 #28
0
def get_dataset_properties(params):
    dataset_name = params["dataset"]
    split = params["split"]
    path = os.path.join(os.getcwd(), "dataset", dataset_name)
    os.makedirs(path, exist_ok=True)
    torch.manual_seed(params["seed"])
    dataset = Planetoid(path, dataset_name, split=split, transform=T.NormalizeFeatures())
    data = dataset[0]
    n_features = dataset.num_features
    n_classes = dataset.num_classes
    return data, n_features, n_classes
def get_data(dataset_name, model_name, dataset_dir):
    full_names = {'cora': 'Cora', 'citeseer': 'CiteSeer', 'pubmed': 'PubMed'}
    dataset_name = full_names[dataset_name]
    dataset_path = path.join(dataset_dir, dataset_name)
    if model_name == 'spline':
        transform = T.TargetIndegree()
    elif model_name == 'dna':
        transform = None
    else:
        transform = T.NormalizeFeatures()
    dataset = Planetoid(dataset_path, dataset_name, transform=transform)
    return dataset
예제 #30
0
 def __init__(self,
              root='/home/galkampel/tmp',
              dataset_name="PubMed",
              split_type='public'):
     names = {"Cora", "CiteSeer", "PubMed"}
     self.dataset_name = dataset_name
     self.split_type = split_type
     path = os.path.join(root, dataset_name)
     self.dataset = Planetoid(path,
                              dataset_name,
                              split=split_type,
                              transform=T.NormalizeFeatures())