コード例 #1
0
ファイル: 1-mutag.py プロジェクト: zbn123/k-gnn
                                                           patience=5,
                                                           min_lr=0.00001)

    test_mask = torch.zeros(len(dataset), dtype=torch.uint8)
    n = len(dataset) // 10
    test_mask[i * n:(i + 1) * n] = 1
    test_dataset = dataset[test_mask]
    train_dataset = dataset[1 - test_mask]

    n = len(train_dataset) // 10
    val_mask = torch.zeros(len(train_dataset), dtype=torch.uint8)
    val_mask[i * n:(i + 1) * n] = 1
    val_dataset = train_dataset[val_mask]
    train_dataset = train_dataset[1 - val_mask]

    val_loader = DataLoader(val_dataset, batch_size=BATCH)
    test_loader = DataLoader(test_dataset, batch_size=BATCH)
    train_loader = DataLoader(train_dataset, batch_size=BATCH, shuffle=True)

    print('---------------- Split {} ----------------'.format(i))

    best_val_loss, test_acc = 100, 0
    for epoch in range(1, 101):
        lr = scheduler.optimizer.param_groups[0]['lr']
        train_loss = train(epoch, train_loader, optimizer)
        val_loss = val(val_loader)
        scheduler.step(val_loss)
        if best_val_loss >= val_loss:
            test_acc = test(test_loader)
            best_val_loss = val_loss
        print('Epoch: {:03d}, LR: {:7f}, Train Loss: {:.7f}, '
コード例 #2
0
ファイル: main_gnn.py プロジェクト: edwardelson/ogb
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with Pytorch Geometrics')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help=
        'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--train_subset', action='store_true')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers (default: 0)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory')
    parser.add_argument('--use_triplet_loss', action='store_true')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    random.seed(42)

    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")

    ### automatic dataloading and splitting
    dataset = PygPCQM4MDataset(root='dataset/')

    split_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    #     if args.use_triplet_loss:
    #         if args.train_subset:
    #             subset_ratio = 0.1
    #             subset_idx = torch.randperm(len(split_idx["train"]))[:int(subset_ratio*len(split_idx["train"]))]
    #             anchor_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #             positive_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #             negative_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #         else:
    #             anchor_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #             positive_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #             negative_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers = args.num_workers)
    #     elif args.train_subset:
    if args.train_subset:
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_loader = DataLoader(dataset[split_idx["train"][subset_idx]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    else:
        train_loader = DataLoader(dataset[split_idx["train"]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)

    valid_loader = DataLoader(dataset[split_idx["valid"]],
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    if args.save_test_dir is not '':
        test_loader = DataLoader(dataset[split_idx["test"]],
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    if args.checkpoint_dir is not '':
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual-bnn':
        model = BayesianGNN(gnn_type='gin',
                            virtual_node=True,
                            last_layer_only=False,
                            **shared_params).to(device)
    elif args.gnn == 'gin-virtual-bnn-lastLayer':
        model = BayesianGNN(gnn_type='gin',
                            virtual_node=True,
                            last_layer_only=True,
                            **shared_params).to(device)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir is not '':
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    # start epoch (default = 1, unless resuming training)
    firstEpoch = 1
    # check if checkpoint exist -> load model
    checkpointFile = os.path.join(args.checkpoint_dir, 'checkpoint.pt')
    if os.path.exists(checkpointFile):
        # load checkpoint file
        checkpointData = torch.load(checkpointFile)
        firstEpoch = checkpointData["epoch"]
        model.load_state_dict(checkpointData["model_state_dict"])
        optimizer.load_state_dict(checkpointData["optimizer_state_dict"])
        scheduler.load_state_dict(checkpointData["scheduler_state_dict"])
        best_valid_mae = checkpointData["best_val_mae"]
        num_params = checkpointData["num_params"]
        print(
            "Loaded existing weights from {}. Continuing from epoch: {} with best valid MAE: {}"
            .format(checkpointFile, firstEpoch, best_valid_mae))

    if args.use_triplet_loss:
        model.gnn_node.register_forward_hook(get_activation('gnn_node'))

    for epoch in range(firstEpoch, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        if args.use_triplet_loss:
            train_mae = triplet_loss_train(model, device, train_loader,
                                           dataset, optimizer, args.gnn, args)
        else:
            train_mae = train(model, device, train_loader, optimizer, args.gnn)

        print('Evaluating...')
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({'Train': train_mae, 'Validation': valid_mae})

        if args.log_dir is not '':
            writer.add_scalar('valid/mae', valid_mae, epoch)
            writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir is not '':
                print('Saving checkpoint...')
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_val_mae': best_valid_mae,
                    'num_params': num_params
                }
                torch.save(checkpoint,
                           os.path.join(args.checkpoint_dir, 'checkpoint.pt'))

            if args.save_test_dir is not '':
                print('Predicting on test data...')
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...')
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_test_dir)

        scheduler.step()

        print(f'Best validation MAE so far: {best_valid_mae}')

    if args.log_dir is not '':
        writer.close()
コード例 #3
0
import os.path as osp

import torch
import time
import torch.nn.functional as F
from torch.nn import Sequential, Linear, ReLU
from torch_geometric.datasets import TUDataset
from torch_geometric.data import DataLoader
from torch_geometric.nn import GINConv, global_add_pool

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'NCI1')
dataset = TUDataset(path, name='NCI1').shuffle()
test_dataset = dataset[:len(dataset) // 10]
train_dataset = dataset[len(dataset) // 10:]
test_loader = DataLoader(test_dataset, batch_size=256)
train_loader = DataLoader(train_dataset, batch_size=256)


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        num_features = dataset.num_features
        dim = 32

        nn1 = Sequential(Linear(num_features, dim), ReLU(), Linear(dim, dim))
        self.conv1 = GINConv(nn1)
        self.bn1 = torch.nn.BatchNorm1d(dim)

        nn2 = Sequential(Linear(dim, dim), ReLU(), Linear(dim, dim))
        self.conv2 = GINConv(nn2)
コード例 #4
0
ファイル: dataset.py プロジェクト: wang91zhe/GFTSR
        return label

    def if_same_col(self, si, ti, tbpos):
        ss, se = tbpos[si][2], tbpos[si][3]
        ts, te = tbpos[ti][2], tbpos[ti][3]
        if (ss >= ts and se <= te):
            return 2
        if (ts >= ss and te <= se):
            return 2
        return 0

    def if_same_cell(self):
        pass


if __name__ == "__main__":

    root_path = '/home/jiuxiao/NLP/table/data/SciTSR/train'

    ds = ScitsrDataset(root_path)
    print(len(ds))
    # ds.check_all()
    # ds.get(0)
    #print(ds.get_html(76))
    test_loader = DataLoader(ds, batch_size=5)
    for data in test_loader:
        print(data, data.num_graphs)
        x = scatter_mean(data.x, data.batch, dim=0)
        print(data.edge_index)
        print(x.size())
        print("ratio:", data.y.sum().float() / data.y.size()[0])
コード例 #5
0
ファイル: pyg_datasets.py プロジェクト: thomasly/slgnn
 def train_loader(self):
     return DataLoader(self._train_data,
                       shuffle=self._shuffle,
                       batch_size=self._batch_size)
コード例 #6
0
print('- weight decay: ',weight_decay)
print('- hidden size: ',nhid)
#print('- graph convolution: ','GCNConv')
print('- number of graph convoluational layers: {}x{}'.format(2,num_layers))
#print('- graph pooling: ','HaarPooling')
print('- number of pooling layers: ',num_layers)
print('- number of fully connected layers: ',num_layers)
print('- maximum number of epochs: ',epochs)
print('- patience for earlystopping: ',early_stopping)
print('Datasets')
#print('- name: ',dataname)
print('- number of training data:',num_training)
print('- number of validation data:',num_val)
print('- number of test data:',num_test)

train_loader = DataLoader(training_set, batch_size=batch_size, shuffle = False)
val_loader = DataLoader(validation_set, batch_size=batch_size, shuffle = False) # add validation for a possible early stopping
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle = False)

#%%
##Define Model Class
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.nhid = nhid
        self.num_layers = num_layers
        self.conv1 = GCNConv(num_features, self.nhid)
        self.conv2 = GCNConv(self.nhid, self.nhid)
        self.conv3 = GCNConv(self.nhid, self.nhid)
        self.conv4 = GCNConv(self.nhid, self.nhid)
コード例 #7
0
ファイル: mainTPR.py プロジェクト: Jan21/MoNet
 def val_dataloader(self):
     return DataLoader(self.test_dataset, batch_size=64)
コード例 #8
0
print('Feature matrix: ', example.x)
print('Edge features: ', example.edge_attr)
print('Label: ', example.y)
print()

# preprocess graph data
graph_data = normalizeFeatures(graph_data)

# create train, val test split
num_training = int(len(graph_data) * 0.6)
num_val = int(len(graph_data) * 0.1)
num_test = len(graph_data) - (num_training + num_val)
training_set, validation_set, test_set = random_split(graph_data, [num_training, num_val, num_test])

# create dataloader objects
train_loader = DataLoader(training_set, batch_size=args.batch_size, shuffle=True)
val_loader = DataLoader(validation_set, batch_size=args.batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)


# Create the model 
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.num_features = args.num_features
        self.nhid = args.nhid

        self.conv1 = GCNConv(self.num_features, self.nhid * 2)
        self.conv2 = GCNConv(self.nhid * 2, self.nhid * 2)
コード例 #9
0
def cross_validation_with_val_set(dataset,
                                  model,
                                  folds,
                                  epochs,
                                  batch_size,
                                  lr,
                                  lr_decay_factor,
                                  lr_decay_step_size,
                                  weight_decay,
                                  logger=None):

    val_losses, accs, durations = [], [], []
    for fold, (train_idx, test_idx,
               val_idx) in enumerate(zip(*k_fold(dataset, folds))):

        train_dataset = dataset[train_idx]
        test_dataset = dataset[test_idx]
        val_dataset = dataset[val_idx]

        if 'adj' in train_dataset[0]:
            train_loader = DenseLoader(train_dataset, batch_size, shuffle=True)
            val_loader = DenseLoader(val_dataset, batch_size, shuffle=False)
            test_loader = DenseLoader(test_dataset, batch_size, shuffle=False)
        else:
            train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
            val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size, shuffle=False)

        model.to(device).reset_parameters()
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_start = time.perf_counter()

        for epoch in range(1, epochs + 1):
            train_loss = train(model, optimizer, train_loader)
            val_losses.append(eval_loss(model, val_loader))
            accs.append(eval_acc(model, test_loader))
            eval_info = {
                'fold': fold,
                'epoch': epoch,
                'train_loss': train_loss,
                'val_loss': val_losses[-1],
                'test_acc': accs[-1],
            }

            if logger is not None:
                logger(eval_info)

            if epoch % lr_decay_step_size == 0:
                for param_group in optimizer.param_groups:
                    param_group['lr'] = lr_decay_factor * param_group['lr']

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()
        durations.append(t_end - t_start)

    loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations)
    loss, acc = loss.view(folds, epochs), acc.view(folds, epochs)
    loss, argmin = loss.min(dim=1)
    acc = acc[torch.arange(folds, dtype=torch.long), argmin]

    print('Val Loss: {:.4f}, Test Accuracy: {:.3f} ± {:.3f}, Duration: {:.3f}'.
          format(loss.mean().item(),
                 acc.mean().item(),
                 acc.std().item(),
                 duration.mean().item()))

    return loss.mean().item(), acc.mean().item(), acc.std().item()
コード例 #10
0
ファイル: main.py プロジェクト: ssbahade/gnn_toy_example
    def atexit_tasks(model):

        # -----------------------------------------------
        # ---------------- EVALUATION ROUTINE -----------
        # -----------------------------------------------

        # save the tensorboardx summary files
        summary_dir_exit = os.path.join(
            config.run_abs_path, config.summary_dir)
        summary_compressed = summary_dir_exit + '.tar.gz'
        # remove old tar file
        if os.path.isfile(summary_compressed):
            os.remove(summary_compressed)

        with tarfile.open(summary_compressed, mode='w:gz') as archive:
            archive.add(summary_dir_exit, arcname='summary', recursive=True)
        _run.add_artifact(filename=summary_compressed, name='summary.tar.gz')

        model.eval()
        model.current_writer = None

        # train loss
        final_loss_train = 0.0
        final_metric_train = 0.0
        final_nr_nodes_train = 0
        for data_ft in data_loader_train:
            data_ft = data_ft.to(device)
            out_ft = model(data_ft)
            final_loss_train += model.loss(out_ft,
                                           data_ft.y).item() * data_ft.num_nodes
            final_metric_train += model.out_to_metric(
                out_ft, data_ft.y) * data_ft.num_nodes
            final_nr_nodes_train += data_ft.num_nodes
        final_loss_train /= final_nr_nodes_train
        final_metric_train /= final_nr_nodes_train

        _run.log_scalar(
            'loss_train_final',
            final_loss_train,
            config.training_epochs)
        _run.log_scalar(
            'accuracy_train_final',
            final_metric_train,
            config.training_epochs)

        # test loss
        data_loader_test = DataLoader(
            test_dataset, batch_size=config.batch_size_eval, shuffle=False)
        test_loss = 0.0
        test_metric = 0.0
        nr_nodes_test = 0
        test_predictions = []
        test_targets = []

        for data_fe in data_loader_test:
            data_fe = data_fe.to(device)
            out_fe = model(data_fe)
            test_loss += model.loss(out_fe,
                                    data_fe.y).item() * data_fe.num_nodes
            test_metric += model.out_to_metric(out_fe,
                                               data_fe.y) * data_fe.num_nodes
            nr_nodes_test += data_fe.num_nodes
            pred = model.out_to_predictions(out_fe)
            test_predictions.extend(model.predictions_to_list(pred))
            test_targets.extend(data_fe.y.tolist())
        test_loss /= nr_nodes_test
        test_metric /= nr_nodes_test

        _run.log_scalar('loss_test', test_loss, config.training_epochs)
        _run.log_scalar('accuracy_test', test_metric, config.training_epochs)

        # final print routine
        print('')
        dataset.print_summary()
        print('Total number of parameters: {}'.format(total_params))
        print('Mean train loss ({0} samples): {1:.3f}'.format(
            train_dataset.__len__(),
            final_loss_train))
        print('Mean accuracy on train set: {0:.3f}'.format(
            final_metric_train))
        print('Mean test loss ({0} samples): {1:.3f}'.format(
            test_dataset.__len__(),
            test_loss))
        print('Mean accuracy on test set: {0:.3f}'.format(
            test_metric))
        print('')

        # plot targets vs predictions. default is a confusion matrix
        model.plot_targets_vs_predictions(
            targets=test_targets, predictions=test_predictions)
        _run.add_artifact(
            filename=os.path.join(
                config.run_abs_path,
                config.confusion_matrix_path),
            name=config.confusion_matrix_path)

        # if Regression, plot targets vs. continuous outputs
        # if isinstance(model.model_type, RegressionProblem):
        #     test_outputs = []
        #     for data in data_loader_test:
        #         data = data.to(device)
        #         out = torch.squeeze(model(data)).tolist()
        #         test_outputs.extend(out)
        #     model.model_type.plot_targets_vs_outputs(
        #         targets=test_targets, outputs=test_outputs)

        # plot errors by location
        # plotter = ResultPlotting(config=config)
        # plotter.plot_errors_by_location(
        # data=test_dataset, predictions=test_predictions,
        # targets=test_targets)

        # plot the graphs in the test dataset for visual inspection
        if config.plot_graphs_testset:
            if config.plot_graphs_testset < 0 or config.plot_graphs_testset > test_dataset.__len__():
                plot_limit = test_dataset.__len__()
            else:
                plot_limit = config.plot_graphs_testset

            for i in range(plot_limit):
                g = test_dataset[i]
                g.to(device)
                out_p = model(g)
                g.plot_predictions(
                    config=config,
                    pred=model.predictions_to_list(
                        model.out_to_predictions(out_p)),
                    graph_nr=i,
                    run=_run,
                    acc=model.out_to_metric(
                        out_p,
                        g.y),
                    logger=_log)

        return '\n{0}\ntrain acc: {1:.3f}\ntest acc: {2:.3f}'.format(
            _run.meta_info['options']['--comment'], final_metric_train, test_metric)
コード例 #11
0
ファイル: main.py プロジェクト: ssbahade/gnn_toy_example
def main(_config, _run, _log):
    # Check for a comment, if none is given raise error
    if _run.meta_info['options']['--comment'] is None:
        raise ValueError('You need to specify a comment with -c, --comment')

    config = argparse.Namespace(**_config)
    _log.info('Logging to {}'.format(config.run_abs_path))

    # -----------------------------------------------
    # ---------------- CREATE SETUP -----------------
    # -----------------------------------------------

    # make necessary directory structure
    if not os.path.isdir(config.run_abs_path):
        os.makedirs(config.run_abs_path)

    # clear old stuff from the run dir, if it's not a restart
    if not config.load_model:
        summary_dir = os.path.join(config.run_abs_path, config.summary_dir)
        if os.path.isdir(summary_dir):
            shutil.rmtree(summary_dir)
        model_dir = os.path.join(config.run_abs_path, config.model_dir)
        if os.path.isdir(model_dir):
            shutil.rmtree(model_dir)

        # make dir structure in temp dir
        os.makedirs(summary_dir)
        os.makedirs(model_dir)

    # Pass the path of tensorboardX summaries to sacred
    if config.write_summary:
        _run.info["tensorflow"] = dict()
        _run.info["tensorflow"]["logdirs"] = [os.path.join(
            config.run_abs_path, config.summary_dir)]

    # set up the summary writer for tensorboardX
    train_writer = SummaryWriter(os.path.join(
        config.run_abs_path, 'summary', 'training'))
    val_writer = SummaryWriter(os.path.join(
        config.run_abs_path, 'summary', 'validation'))

    # create and load dataset
    dataset = globals()[config.dataset_type](
        root=config.dataset_abs_path, config=config)
    dataset.update_config(config)
    assert dataset[0].edge_attr.size(1) == config.pseudo_dimensionality

    if config.standardize_targets:
        config.targets_mean, config.targets_std = dataset.targets_mean_std()

    # TODO if model is loaded, use the same train val test split.
    # shuffle can return the permutation of the dataset, which can then be used to permute the same way
    # dataset, perm = dataset.shuffle(return_perm=True)
    # when loading a model:
    # dataset = dataset.__indexing__(permutation)

    # split into train and test
    split_train_idx = int(
        config.samples * (1 - config.test_split - config.validation_split))
    split_validation_idx = int(config.samples * (1 - config.test_split))

    train_dataset = dataset[:split_train_idx]
    validation_dataset = dataset[split_train_idx:split_validation_idx]
    test_dataset = dataset[split_validation_idx:]

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    data_loader_train = DataLoader(
        train_dataset, batch_size=config.batch_size_train, shuffle=True)
    data_loader_validation = DataLoader(
        validation_dataset, batch_size=config.batch_size_eval, shuffle=False)

    if not config.load_model:
        model = globals()[config.model](
            config=config,
            train_writer=train_writer,
            val_writer=val_writer,
            model_type=config.model_type
        )
        model = model.to(device)
    else:
        _log.info('Loading model {} ...'.format(config.load_model))
        # TODO allow to load previous models
        # find latest state of model
        load_model_dir = os.path.join(
            config.root_dir, config.run_abs_path, config.model_dir)
        checkpoint_versions = [name for name in os.listdir(
            load_model_dir) if name.endswith('.tar')]
        if 'final.tar' in checkpoint_versions:
            checkpoint_to_load = 'final.tar'
        else:
            checkpoint_versions = [
                x for x in checkpoint_versions if x.startswith('epoch')].sort()
            checkpoint_to_load = checkpoint_versions[-1]

        _log.info('Loading checkpoint {} ...'.format(
            os.path.join(load_model_dir, checkpoint_to_load)))
        checkpoint = torch.load(os.path.join(
            load_model_dir, checkpoint_to_load))

        # restore the checkpoint
        model = globals()[config.model](
            config=config,
            train_writer=train_writer,
            val_writer=val_writer,
            epoch=checkpoint['epoch'],
            train_batch_iteration=checkpoint['train_batch_iteration'],
            val_batch_iteration=checkpoint['val_batch_iteration'],
            model_type=config.model_type
        )
        # model.to(device) has to be executed before loading the state
        # dicts
        model.to(device)
        model.load_state_dict(checkpoint['model_state_dict'])
        model.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

    total_params = sum(p.numel()
                       for p in model.parameters() if p.requires_grad)
    _run.log_scalar('nr_params', total_params, config.training_epochs)

    # save config to file and store in DB
    config_filepath = os.path.join(config.run_abs_path, 'config.json')
    with open(config_filepath, 'w') as f:
        json.dump(vars(config), f)
    _run.add_artifact(filename=config_filepath)

    def atexit_tasks(model):

        # -----------------------------------------------
        # ---------------- EVALUATION ROUTINE -----------
        # -----------------------------------------------

        # save the tensorboardx summary files
        summary_dir_exit = os.path.join(
            config.run_abs_path, config.summary_dir)
        summary_compressed = summary_dir_exit + '.tar.gz'
        # remove old tar file
        if os.path.isfile(summary_compressed):
            os.remove(summary_compressed)

        with tarfile.open(summary_compressed, mode='w:gz') as archive:
            archive.add(summary_dir_exit, arcname='summary', recursive=True)
        _run.add_artifact(filename=summary_compressed, name='summary.tar.gz')

        model.eval()
        model.current_writer = None

        # train loss
        final_loss_train = 0.0
        final_metric_train = 0.0
        final_nr_nodes_train = 0
        for data_ft in data_loader_train:
            data_ft = data_ft.to(device)
            out_ft = model(data_ft)
            final_loss_train += model.loss(out_ft,
                                           data_ft.y).item() * data_ft.num_nodes
            final_metric_train += model.out_to_metric(
                out_ft, data_ft.y) * data_ft.num_nodes
            final_nr_nodes_train += data_ft.num_nodes
        final_loss_train /= final_nr_nodes_train
        final_metric_train /= final_nr_nodes_train

        _run.log_scalar(
            'loss_train_final',
            final_loss_train,
            config.training_epochs)
        _run.log_scalar(
            'accuracy_train_final',
            final_metric_train,
            config.training_epochs)

        # test loss
        data_loader_test = DataLoader(
            test_dataset, batch_size=config.batch_size_eval, shuffle=False)
        test_loss = 0.0
        test_metric = 0.0
        nr_nodes_test = 0
        test_predictions = []
        test_targets = []

        for data_fe in data_loader_test:
            data_fe = data_fe.to(device)
            out_fe = model(data_fe)
            test_loss += model.loss(out_fe,
                                    data_fe.y).item() * data_fe.num_nodes
            test_metric += model.out_to_metric(out_fe,
                                               data_fe.y) * data_fe.num_nodes
            nr_nodes_test += data_fe.num_nodes
            pred = model.out_to_predictions(out_fe)
            test_predictions.extend(model.predictions_to_list(pred))
            test_targets.extend(data_fe.y.tolist())
        test_loss /= nr_nodes_test
        test_metric /= nr_nodes_test

        _run.log_scalar('loss_test', test_loss, config.training_epochs)
        _run.log_scalar('accuracy_test', test_metric, config.training_epochs)

        # final print routine
        print('')
        dataset.print_summary()
        print('Total number of parameters: {}'.format(total_params))
        print('Mean train loss ({0} samples): {1:.3f}'.format(
            train_dataset.__len__(),
            final_loss_train))
        print('Mean accuracy on train set: {0:.3f}'.format(
            final_metric_train))
        print('Mean test loss ({0} samples): {1:.3f}'.format(
            test_dataset.__len__(),
            test_loss))
        print('Mean accuracy on test set: {0:.3f}'.format(
            test_metric))
        print('')

        # plot targets vs predictions. default is a confusion matrix
        model.plot_targets_vs_predictions(
            targets=test_targets, predictions=test_predictions)
        _run.add_artifact(
            filename=os.path.join(
                config.run_abs_path,
                config.confusion_matrix_path),
            name=config.confusion_matrix_path)

        # if Regression, plot targets vs. continuous outputs
        # if isinstance(model.model_type, RegressionProblem):
        #     test_outputs = []
        #     for data in data_loader_test:
        #         data = data.to(device)
        #         out = torch.squeeze(model(data)).tolist()
        #         test_outputs.extend(out)
        #     model.model_type.plot_targets_vs_outputs(
        #         targets=test_targets, outputs=test_outputs)

        # plot errors by location
        # plotter = ResultPlotting(config=config)
        # plotter.plot_errors_by_location(
        # data=test_dataset, predictions=test_predictions,
        # targets=test_targets)

        # plot the graphs in the test dataset for visual inspection
        if config.plot_graphs_testset:
            if config.plot_graphs_testset < 0 or config.plot_graphs_testset > test_dataset.__len__():
                plot_limit = test_dataset.__len__()
            else:
                plot_limit = config.plot_graphs_testset

            for i in range(plot_limit):
                g = test_dataset[i]
                g.to(device)
                out_p = model(g)
                g.plot_predictions(
                    config=config,
                    pred=model.predictions_to_list(
                        model.out_to_predictions(out_p)),
                    graph_nr=i,
                    run=_run,
                    acc=model.out_to_metric(
                        out_p,
                        g.y),
                    logger=_log)

        return '\n{0}\ntrain acc: {1:.3f}\ntest acc: {2:.3f}'.format(
            _run.meta_info['options']['--comment'], final_metric_train, test_metric)

    atexit.register(atexit_tasks, model=model)

    # -----------------------------------------------
    # ---------------- TRAINING LOOP ----------------
    # -----------------------------------------------

    for epoch in range(model.epoch, config.training_epochs):
        # put model in training mode (e.g. use dropout)
        model.train()
        epoch_loss = 0.0
        epoch_metric_train = 0.0
        nr_nodes_train = 0
        _log.info('epoch {} ...'.format(epoch))
        for batch_i, data in enumerate(data_loader_train):
            data = data.to(device)
            # call the forward method
            out = model(data)

            loss = model.loss(out, data.y)
            model.print_current_loss(epoch, batch_i, _log)
            epoch_loss += loss.item() * data.num_nodes
            epoch_metric_train += model.out_to_metric(
                out, data.y) * data.num_nodes
            nr_nodes_train += data.num_nodes

            # clear the gradient variables of the model
            model.optimizer.zero_grad()

            loss.backward()

            # Gradient clipping
            if config.clip_grad:
                if config.clip_method == 'value':
                    torch.nn.utils.clip_grad_value_(
                        parameters=model.parameters(),
                        clip_value=config.clip_value
                    )
                else:
                    torch.nn.utils.clip_grad_norm_(
                        parameters=model.parameters(),
                        max_norm=config.clip_value,
                        norm_type=float(config.clip_method)
                    )

            model.optimizer.step()
            model.train_batch_iteration += 1

        epoch_loss /= nr_nodes_train
        epoch_metric_train /= nr_nodes_train

        if config.write_summary:
            train_writer.add_scalar('_per_epoch/loss', epoch_loss, epoch)
            train_writer.add_scalar(
                '_per_epoch/metric', epoch_metric_train, epoch)
        _run.log_scalar('loss_train', epoch_loss, epoch)
        _run.log_scalar('accuracy_train', epoch_metric_train, epoch)

        # validation
        model.eval()
        validation_loss = 0.0
        epoch_metric_val = 0.0
        nr_nodes_val = 0
        for batch_i, data in enumerate(data_loader_validation):
            data = data.to(device)
            out = model(data)
            loss = model.loss(out, data.y)
            model.print_current_loss(
                epoch, 'validation {}'.format(batch_i), _log)
            validation_loss += loss.item() * data.num_nodes
            epoch_metric_val += model.out_to_metric(
                out, data.y) * data.num_nodes
            nr_nodes_val += data.num_nodes
            model.val_batch_iteration += 1

        # The numbering of train and val does not correspond 1-to-1!
        # Here we skip some numbers for maintaining loose correspondence
        model.val_batch_iteration = model.train_batch_iteration

        validation_loss /= nr_nodes_val
        epoch_metric_val /= nr_nodes_val

        if config.write_summary:
            val_writer.add_scalar('_per_epoch/loss', validation_loss, epoch)
            val_writer.add_scalar('_per_epoch/metric', epoch_metric_val, epoch)

        _run.log_scalar('loss_val', validation_loss, epoch)
        _run.log_scalar('accuracy_val', epoch_metric_val, epoch)

        model.epoch += 1

        # save intermediate models
        if model.epoch % config.checkpoint_interval == 0:
            model.save('epoch_{}'.format(model.epoch))

    # save the final model
    final_model_name = 'final'
    model.save(final_model_name)
    _run.add_artifact(
        filename=os.path.join(
            config.run_abs_path,
            config.model_dir,
            final_model_name + '.tar'),
        name=final_model_name)

    ###########################

    return atexit_tasks(model=model)
コード例 #12
0
    model.load_state_dict(state['state_dict'])
    optimizer.load_state_dict(state['optimizer'])
    print('model loaded from %s' % checkpoint_path)


#%%
if __name__ == "__main__":
    np.random.seed(SEED)
    torch.manual_seed(SEED)
    # hyper parameters

    train_data = GraphDataset(TRAIN_DIR).shuffle()
    val_data = GraphDataset(VAL_DIR)
    if small_dataset:
        train_loader = DataLoader(train_data[:1000],
                                  batch_size=batch_size,
                                  shuffle=True)
        val_loader = DataLoader(val_data[:200], batch_size=batch_size)
    else:
        train_loader = DataLoader(train_data,
                                  batch_size=batch_size,
                                  shuffle=True)
        val_loader = DataLoader(val_data, batch_size=batch_size)

    model = HGNN(in_channels, out_channels).to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=decay_lr_every,
                                          gamma=decay_lr_factor)
    if checkpoint_dir:
        load_checkpoint(checkpoint_dir, model, optimizer)
コード例 #13
0
ファイル: vis_cvae.py プロジェクト: jleesdev/pytorch_coma
    num_nodes = [len(M[i].v) for i in range(len(M))]

    print('Loading Dataset')
    if args.data_dir:
        data_dir = args.data_dir
    else:
        data_dir = config['data_dir']

    normalize_transform = Normalize()
    dataset = ComaDataset(data_dir,
                          dtype='test',
                          split=args.split,
                          split_term=args.split_term,
                          pre_transform=normalize_transform)
    data_loader = DataLoader(dataset,
                             batch_size=1,
                             shuffle=True,
                             num_workers=1)

    print('Loading model')
    coma = Coma(dataset, config, D_t, U_t, A_t, num_nodes)

    checkpoint_file = config['checkpoint_file']
    print(checkpoint_file)
    if checkpoint_file:
        checkpoint = torch.load(checkpoint_file,
                                map_location=torch.device('cpu'))
        coma.load_state_dict(checkpoint['state_dict'])
    coma.to(device)

    meshviewer = MeshViewers(shape=(3, cols))
    for row in meshviewer:
コード例 #14
0
P2 = [0.01]
H = torch.from_numpy(error_generate.generate_PCM(2 * L * L - 2,
                                                 L)).t()  #64, 30
h_prep = error_generate.H_Prep(H.t())
H_prep = torch.from_numpy(h_prep.get_H_Prep())
BATCH_SIZE = 128
lr = 3e-4
Nc = 10
run1 = 40960
run2 = 8192
dataset1 = error_generate.gen_syn(P1, L, H, run1)
dataset2 = error_generate.gen_syn(P2, L, H, run2)
train_dataset = CustomDataset(H, dataset1)
test_dataset = CustomDataset(H, dataset2)
rows, cols = H.size(0), H.size(1)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
logical, stab = h_prep.get_logical(H_prep)
logical, stab = logical.cuda(), stab.cuda()


def init_weights(m):
    if type(m) == torch.nn.Linear:
        #        torch.nn.init.xavier_uniform_(m.weight)
        #        torch.nn.init.constant_(m.weight, val)
        #        torch.nn.init.uniform_(m.weight, a=0.0884, b=0.1)
        torch.nn.init.uniform_(m.weight, a=0.1, b=1)
        m.bias.data.fill_(1e-3)


def init_weights_one(m):
コード例 #15
0
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import PPI
from torch_geometric.data import DataLoader
from torch_geometric.nn import GATConv
from sklearn import metrics

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'PPI')
train_dataset = PPI(path, split='train')
val_dataset = PPI(path, split='test')
test_dataset = PPI(path, split='test')
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
dataset_name = 'PPI'


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GATConv(train_dataset.num_features, 256, heads=4)
        self.lin1 = torch.nn.Linear(train_dataset.num_features, 4 * 256)
        self.conv2 = GATConv(4 * 256, 256, heads=4)
        self.lin2 = torch.nn.Linear(4 * 256, 4 * 256)
        self.conv3 = GATConv(4 * 256,
                             train_dataset.num_classes,
                             heads=6,
                             concat=False)
        self.lin3 = torch.nn.Linear(4 * 256, train_dataset.num_classes)
コード例 #16
0
    path_graph_input = os.path.join(path_data_root, 'input_graph')
    dataset_atac_graph = ATACGraphDataset(path_graph_input)
    torch.manual_seed(12345)
    dataset = dataset_atac_graph.shuffle()
    train_dataset = dataset[:1700]
    test_dataset = dataset[1700:]

    device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
    model = GCN(input_channels=dataset.num_node_features,
                output_channels=dataset.num_classes,
                hidden_channels=8,
                num_nodes=dataset_atac_graph[0].num_nodes).to(device)
    criterion = torch.nn.CrossEntropyLoss()

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

    # train model
    time_start = time()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
    for epoch in range(1, 50):
        train(train_loader)
        train_acc = test(train_loader)
        test_acc = test(test_loader)
        print(f'Epoch: {epoch:03d}, '
              f'Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')
        if test_acc > 0.97:
            break
    time_end = time()
    print(time_end - time_start)
コード例 #17
0
        super(PyGToyDataset, self).__init__(save_root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_file_names[0])

    @property
    def raw_file_names(self):
        return ["origin_dataset"]

    @property
    def processed_file_names(self):
        return ["toy_dataset.pt"]

    def download(self):
        pass

    def process(self):
        # 100 samples, each sample is a graph with 32 nodes and 42 edges, each node has a feature dimension of 3.
        data_list = [toy_dataset(num_nodes=32, num_node_features=3, num_edges=42) for _ in range(100)]
        data_save, data_slices = self.collate(data_list)
        torch.save((data_save, data_slices), self.processed_file_names[0])


if __name__ == '__main__':
    # toy_sample = toy_dataset(num_nodes=32, num_node_features=3, num_edges=42)
    # print(toy_sample)
    toy_data = PyGToyDataset(save_root="toy")  # 100 samples, each sample is a graph
    # print(toy_data[0])
    data_loader = DataLoader(toy_data, batch_size=5, shuffle=True)

    for batch in data_loader:
        print(batch)
コード例 #18
0
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)
    earlystop = Earlystop(window_size=50)
    train_dataset = []
    test_dataset = []
    for j in range(10):
        if j == fold:
            test_dataset = folded_data[j]
        else:
            train_dataset.extend(folded_data[j])
    valid_idx = int(len(train_dataset) * 0.1)
    valid_dataset = train_dataset[:valid_idx]
    train_dataset = train_dataset[valid_idx:]

    test_loader = DataLoader(test_dataset, batch_size=args.batch_size)
    valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size)
    train_loader = DataLoader(train_dataset, batch_size=args.batch_size)

    fold_best_acc = 0.
    fold_val_loss = 100.
    fold_val_acc = 0.
    patience = 0

    for epoch in range(1, 100001):
        t = time.time()
        loss = train(train_loader)
        val_acc, val_loss = test(valid_loader)
        train_acc, _ = test(train_loader)

        if fold_val_loss >= val_loss:
コード例 #19
0
ファイル: mainTPR.py プロジェクト: Jan21/MoNet
 def train_dataloader(self):
     return DataLoader(self.train_dataset, batch_size=64, shuffle=True)
コード例 #20
0
def modelSelection(model_list,k, train_dataset, balanced=True, force_numclasses=None, unbalanced_split=False ):    

    global device 
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    #print(dir(train_dataset))
    # not working
    if force_numclasses is not None:
        train_dataset.num_classes = force_numclasses

    kfolds = kFolding2(train_dataset,k, balanced, unbalanced_split)


    for modeldict in model_list:

        train_loss_history = []
        val_history = {'loss':[], 'accuracy':[], 'microF1':[],'macroF1':[]}
        modeldict['cv_val_loss']=0.0
        modeldict['cv_val_accuracy']=0.0
        modeldict['cv_val_microF1'] =0.0
        modeldict['cv_val_macroF1'] =0.0

        epochs = modeldict['epochs']
        modelclass = modeldict['model']
        kwargs = modeldict['kwargs']

        try:
            model = modelclass(**kwargs)
            model = model.to(device)
            modeldict['model_instance'] = model
            
            lr = modeldict['learning_rate']
            wd = modeldict['weight_decay']
            bs = modeldict['batch_size']

            optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)

            for kfold in kfolds:

                train = train_dataset[kfold[0]]
                val = train_dataset[kfold[1]]
                loader = DataLoader(train, batch_size=bs, shuffle=True)
                loader_val = DataLoader(val, batch_size=bs, shuffle=True)
                for epoch in range(epochs):
                    train_model(model, loader, optimizer, train_loss_history)
                    val_loss_model(model, loader_val, optimizer, val_history)

                # save results
                modeldict['train_loss_history']=train_loss_history
                modeldict['val_loss_history']=val_history['loss']
                modeldict['val_accuracy_history']=val_history['accuracy']
                modeldict['val_loss']=val_history['loss'][-1]
                modeldict['accuracy']=val_history['accuracy'][-1]
                modeldict['microF1']=val_history['microF1'][-1]
                modeldict['macroF1']=val_history['macroF1'][-1]

                modeldict['cv_val_loss']+=modeldict['val_loss']
                modeldict['cv_val_accuracy']+=modeldict['accuracy']
                modeldict['cv_val_microF1']+=modeldict['microF1']
                modeldict['cv_val_macroF1']+=modeldict['macroF1']
        
            modeldict['cv_val_loss']=modeldict['cv_val_loss']/len(kfolds)
            modeldict['cv_val_accuracy']=modeldict['cv_val_accuracy']/len(kfolds)
            modeldict['cv_val_microF1']=modeldict['cv_val_microF1']/len(kfolds)
            modeldict['cv_val_macroF1']=modeldict['cv_val_macroF1']/len(kfolds)

        except:
            print("Problem training model "+modeldict['model'].__name__)
            traceback.print_exc()

        # report model results
        reportTrainedModel(modeldict)
        

        
    # select best model
    modelsdict = selectBestModel(model_list)
    
    # save model to disk + save file path    
    # or save model in the dict.. (could take too much memory)
    saveModels(modelsdict)
    
    return modelsdict
コード例 #21
0
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import MNISTSuperpixels
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader
from torch_geometric.nn import SplineConv, voxel_grid, max_pool, max_pool_x

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'MNIST')
transform = T.Cartesian(cat=False)
train_dataset = MNISTSuperpixels(path, True, transform=transform)
test_dataset = MNISTSuperpixels(path, False, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64)
d = train_dataset


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = SplineConv(d.num_features, 32, dim=2, kernel_size=5)
        self.conv2 = SplineConv(32, 64, dim=2, kernel_size=5)
        self.conv3 = SplineConv(64, 64, dim=2, kernel_size=5)
        self.fc1 = torch.nn.Linear(4 * 64, 128)
        self.fc2 = torch.nn.Linear(128, d.num_classes)

    def forward(self, data):
        data.x = F.elu(self.conv1(data.x, data.edge_index, data.edge_attr))
        cluster = voxel_grid(data.pos, data.batch, size=5, start=0, end=28)
        data.edge_attr = None
コード例 #22
0
ファイル: main.py プロジェクト: vikasverma1077/InfoGraph-1
    # Normalize targets to mean = 0 and std = 1.
    mean = dataset.data.y[:, target].mean().item()
    std = dataset.data.y[:, target].std().item()
    dataset.data.y[:, target] = (dataset.data.y[:, target] - mean) / std

    # print(type(dataset[0]))
    # print(type(dataset.data.x)) #tensor
    # print(type(dataset.data.y)) #tensor

    # Split datasets.
    test_dataset = dataset[:10000]
    val_dataset = dataset[10000:20000]
    train_dataset = dataset[20000:20000 + args.train_num]

    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    if use_unsup_loss:
        unsup_train_dataset = dataset[20000:]
        unsup_train_loader = DataLoader(unsup_train_dataset,
                                        batch_size=batch_size,
                                        shuffle=True)

        print(len(train_dataset), len(val_dataset), len(test_dataset),
              len(unsup_train_dataset))
    else:
        print(len(train_dataset), len(val_dataset), len(test_dataset))
コード例 #23
0
ファイル: Train_GAE.py プロジェクト: gyhandy/Graph_AE
from utils.CustomDataSet import SelectGraph, SceneGraphs
from classification.Graph_AE import Net
from classification.Classifier import MLP
import utils.Display_Plot as dp
import torch

device = torch.device('cuda:1')

num_epoch = 100
batch_size = 200
comp_model = Net.get_instance().to(device)
cfy_model = MLP.get_instance().to(device)

SelectGraph.data_name = 'Shana10k'
data_set_Shana = SelectGraph('data/' + SelectGraph.data_name)
train_set = DataLoader(data_set_Shana[:5000], 500, shuffle=True)

SelectGraph.data_name = 'Shana7000'
data_set_Shana = SelectGraph('data/' + SelectGraph.data_name)
train_set2 = DataLoader(data_set_Shana[5000:6000], 1000, shuffle=False)
test_set = DataLoader(data_set_Shana[6000:7000], 1000, shuffle=False)

m_name = "GAE_TRANSFER.ckpt"
data_list1, group1, group2 = comp_model.train_model(train_set, train_set2,
                                                    test_set, num_epoch,
                                                    m_name)
data_list2 = cfy_model.train_model(train_set2, test_set, int(num_epoch),
                                   group1, group2)

title = "GAE TRANSFER"
labels = ['MSE Loss', 'Num Nodes', 'Total Loss', title]
コード例 #24
0
import torch
import torch.nn.functional as F
from torch.nn import Sequential as Seq, Dropout, Linear as Lin
from torch_geometric.datasets import ModelNet
import torch_geometric.transforms as T
from torch_geometric.data import DataLoader
from torch_geometric.nn import DynamicEdgeConv, global_max_pool

from pointnet2_classification import MLP

path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data/ModelNet10')
pre_transform, transform = T.NormalizeScale(), T.SamplePoints(1024)
train_dataset = ModelNet(path, '10', True, transform, pre_transform)
test_dataset = ModelNet(path, '10', False, transform, pre_transform)
train_loader = DataLoader(train_dataset,
                          batch_size=32,
                          shuffle=True,
                          num_workers=6)
test_loader = DataLoader(test_dataset,
                         batch_size=32,
                         shuffle=False,
                         num_workers=6)


class Net(torch.nn.Module):
    def __init__(self, out_channels, k=20, aggr='max'):
        super().__init__()

        self.conv1 = DynamicEdgeConv(MLP([2 * 3, 64, 64, 64]), k, aggr)
        self.conv2 = DynamicEdgeConv(MLP([2 * 64, 128]), k, aggr)
        self.lin1 = MLP([128 + 64, 1024])
コード例 #25
0
ファイル: pyg_datasets.py プロジェクト: thomasly/slgnn
 def val_loader(self):
     return DataLoader(self._val_data,
                       shuffle=self._shuffle,
                       batch_size=self._batch_size)
コード例 #26
0
def train():
    # get the parameters
    args = get_args()

    # decide the device
    device = torch.device(
        'cuda:1' if torch.cuda.is_available() and args.cuda else 'cpu')

    # load dataset
    train_dataset = PPI(root='/home/amax/xsx/data/gnn_datas/PPI',
                        split='train')
    val_dataset = PPI(root='/home/amax/xsx/data/gnn_datas/PPI', split='val')
    test_dataset = PPI(root='/home/amax/xsx/data/gnn_datas/PPI', split='test')
    train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
    # data = dataset[0].to(device)

    # create the model and optimizer
    model = AGNN_PPI(train_dataset.num_features, args.hidden_dim,
                     train_dataset.num_classes, args.dropout).to(device)
    optimizer = Adam(model.parameters(), lr=args.lr)
    criterion = torch.nn.BCEWithLogitsLoss()

    # the information which need to be recorded
    start_time = time.time()
    bad_counter = 0
    best_valid_f1 = 0.0
    best_epoch = 0
    least_loss = float("inf")
    best_model = None

    # beging training
    for epoch in range(args.epochs):
        # the steps of training
        model.train()
        total_loss = 0.0
        for data in train_loader:
            data.batch = None
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, data.y)
            total_loss += loss.item()
            loss.backward()
            optimizer.step()
        avg_loss = total_loss / len(train_loader.dataset)
        f1 = validate(model, val_loader, device)
        # print('Epoch: {:04d}'.format(epoch + 1), 'f1: {:.4f}'.format(f1), 'loss: {:.4f}'.format(avg_loss))

        if avg_loss < least_loss:
            least_loss = avg_loss
            best_epoch = epoch + 1
            best_valid_f1 = f1
            best_model = copy.deepcopy(model)
            bad_counter = 0
        else:
            bad_counter += 1

        if bad_counter >= args.patience:
            break

    print("Optimization Finished!")
    used_time = time.time() - start_time
    print("Total epochs: {:2d}".format(best_epoch + 100))
    print("Best epochs: {:2d}".format(best_epoch))
    # print("Time for each epoch: {:.2f}s".format(used_time / (best_epoch + args.patience)))
    print("Best epoch's validate f1: {:.2f}".format(best_valid_f1 * 100))
    # test the best trained model
    test(best_model, test_loader, device)
    print("Total time elapsed: {:.2f}s".format(used_time))
コード例 #27
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description=
        'PyTorch implementation of pre-training of graph neural networks')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=32,
                        help='input batch size for training (default: 32)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        help='learning rate (default: 0.001)')
    parser.add_argument('--decay',
                        type=float,
                        default=0,
                        help='weight decay (default: 0)')
    parser.add_argument(
        '--num_layer',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5).')
    parser.add_argument('--emb_dim',
                        type=int,
                        default=300,
                        help='embedding dimensions (default: 300)')
    parser.add_argument('--dropout_ratio',
                        type=float,
                        default=0.2,
                        help='dropout ratio (default: 0.2)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default="mean",
        help='graph level pooling (sum, mean, max, set2set, attention)')
    parser.add_argument(
        '--JK',
        type=str,
        default="last",
        help=
        'how the node features across layers are combined. last, sum, max or concat'
    )
    parser.add_argument(
        '--dataset',
        type=str,
        default='chembl_filtered',
        help='root directory of dataset. For now, only classification.')
    parser.add_argument('--gnn_type', type=str, default="gin")
    parser.add_argument('--input_model_file',
                        type=str,
                        default='',
                        help='filename to read the model (if there is any)')
    parser.add_argument('--output_model_file',
                        type=str,
                        default='',
                        help='filename to output the pre-trained model')
    parser.add_argument('--num_workers',
                        type=int,
                        default=8,
                        help='number of workers for dataset loading')
    args = parser.parse_args()

    torch.manual_seed(0)
    np.random.seed(0)
    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(0)

    #Bunch of classification tasks
    if args.dataset == "chembl_filtered":
        num_tasks = 1310
    else:
        raise ValueError("Invalid dataset name.")

    #set up dataset
    dataset = MoleculeDataset("dataset/" + args.dataset, dataset=args.dataset)

    loader = DataLoader(dataset,
                        batch_size=args.batch_size,
                        shuffle=True,
                        num_workers=args.num_workers)

    #set up model
    model = GNN_graphpred(args.num_layer,
                          args.emb_dim,
                          num_tasks,
                          JK=args.JK,
                          drop_ratio=args.dropout_ratio,
                          graph_pooling=args.graph_pooling,
                          gnn_type=args.gnn_type)
    if not args.input_model_file == "":
        model.from_pretrained(args.input_model_file + ".pth")

    model.to(device)

    #set up optimizer
    optimizer = optim.Adam(model.parameters(),
                           lr=args.lr,
                           weight_decay=args.decay)
    print(optimizer)

    for epoch in range(1, args.epochs + 1):
        print("====epoch " + str(epoch))

        train(args, model, device, loader, optimizer)

    if not args.output_model_file == "":
        torch.save(model.gnn.state_dict(), args.output_model_file + ".pth")
コード例 #28
0
    state = {
        'epoch': epoch,
        'state_dict': model_cpu,
        'optimizer_state_dict': optimizer.state_dict(),
        'scheduler_state_dict': scheduler.state_dict()
    }
    filename = '{}/{}_{}.pth'.format(save_path, name_pre, name_post)
    torch.save(state, filename)


if __name__ == '__main__':
    opt = OptInit().initialize()
    opt.printer.info('===> Creating dataloader ...')
    test_dataset = GeoData.PPI(opt.data_dir, split='test')
    test_loader = DataLoader(test_dataset,
                             batch_size=opt.batch_size,
                             shuffle=True)
    opt.n_classes = test_loader.dataset.num_classes

    opt.printer.info('===> Loading the network ...')
    model = DeepGCN(opt).to(opt.device)
    if opt.multi_gpus:
        model = DataParallel(DeepGCN(opt)).to(opt.device)
    opt.printer.info('===> loading pre-trained ...')
    model, opt.best_value, opt.epoch = load_pretrained_models(
        model, opt.pretrained_model, opt.phase)

    if opt.phase == 'train':
        train_dataset = GeoData.PPI(opt.data_dir, 'train')
        if opt.multi_gpus:
            train_loader = DataListLoader(train_dataset,
コード例 #29
0
print(dataset.num_classes)

# print number of node features
print(dataset.num_node_labels)

# Access the graph in the dataset
data = dataset[0]

# There are 168/2 = 84 undirected edges
# There are 37 nodes with 3 features
# There is 1 class
print(data)

# split dataset to train_dataset and test_dataset
train_dataset = dataset[:540]
test_dataset = dataset[540:]

# random permutation
dataset = dataset.shuffle

# set dataloader
loader = DataLoader(dataset, batch_size=32, shuffle=True)

for data in loader:
    print(data)
    print(data.num_graphs)
    print(data.num_node_features)

    # Averages data.x values with the same batch index
    x = scatter_mean(data.x, data.batch, dim=0)
コード例 #30
0
showers = HGCalShowers(root='/beegfs/desy/user/korcariw/hgcal_model/',
                       raw_files=['ntupleTree_0.root'],
                       out_file='photonShowers50_100GeV_0.pt',
                       include_labels=True,
                       load_on_gpu=True)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Model will be loaded on {device}..')
print('Splitting the data.. ')
train_dataset = showers[:180000]
test_dataset = showers[180000:190000]
val_dataset = showers[190000:]

train_loader = DataLoader(train_dataset,
                          batch_size=hyperp['batch_size'],
                          shuffle=True)
test_loader = DataLoader(test_dataset, shuffle=False)
val_loader = DataLoader(val_dataset,
                        batch_size=hyperp['batch_size'],
                        shuffle=False)

print("Done.")


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(126755)

        self.bnorm = BatchNorm(4)