Ejemplo n.º 1
0
Archivo: test.py Proyecto: WenjinW/PGL
def infer(config, output_path):
    model = getattr(M, config.model_type)(config)

    log.info("infer model from %s" % config.infer_from)
    model.set_state_dict(paddle.load(config.infer_from))

    log.info("loading data")
    ds = getattr(DS, config.dataset_type)(config)

    split_idx = ds.get_idx_split()
    test_ds = DS.Subset(ds, split_idx['test'], mode='test')
    log.info("Test exapmles: %s" % len(test_ds))

    test_loader = Dataloader(test_ds,
                             batch_size=config.valid_batch_size,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=DS.CollateFn(config))

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    # ---------------- test ----------------------- #
    log.info("testing ...")
    pred_dict = evaluate(model, test_loader)

    test_output_path = os.path.join(config.output_dir, config.task_name)
    make_dir(test_output_path)
    test_output_file = os.path.join(test_output_path, "test_pred.npz")

    log.info("saving test result to %s" % test_output_file)
    np.savez_compressed(test_output_file,
                        pred_dict['y_pred'].astype(np.float32))
Ejemplo n.º 2
0
def main_mlp():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on ogbgmol* data with Pytorch Geometrics')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument('--num_mlp_layers',
                        type=int,
                        default=6,
                        help='number of mlp layers (default: 6)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0.2,
                        help='dropout ratio (default: 0.2)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--emb_dim',
                        type=int,
                        default=1600,
                        help='embedding dimensionality (default: 1600)')
    parser.add_argument('--train_subset', action='store_true')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers (default: 0)')
    parser.add_argument('--radius',
                        type=int,
                        default=2,
                        help='radius (default: 2)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    random.seed(42)

    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")

    dataset = PCQM4MDataset(root='dataset/', only_smiles=True)
    fp_processed_file = preprocess_fp(dataset, args.radius)

    data_dict = torch.load(fp_processed_file)
    X, Y = data_dict['X'], data_dict['Y']

    split_idx = dataset.get_idx_split()
    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    if args.train_subset:
        print('train subset')
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_dataset = TensorDataset(X[split_idx['train'][subset_idx]],
                                      Y[split_idx['train'][subset_idx]])

    else:
        train_dataset = TensorDataset(X[split_idx['train']],
                                      Y[split_idx['train']])

    valid_dataset = TensorDataset(X[split_idx['valid']], Y[split_idx['valid']])
    test_dataset = TensorDataset(X[split_idx['test-dev']],
                                 Y[split_idx['test']])

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    if args.save_test_dir != '':
        test_loader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    if args.checkpoint_dir != '':
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    model = MLP(num_mlp_layers=args.num_mlp_layers,
                emb_dim=args.emb_dim,
                drop_ratio=args.drop_ratio).to(device)

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir != '':
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train_mae = train(model, device, train_loader, optimizer)

        print('Evaluating...')
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({'Train': train_mae, 'Validation': valid_mae})

        if args.log_dir != '':
            writer.add_scalar('valid/mae', valid_mae, epoch)
            writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir != '':
                print('Saving checkpoint...')
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_val_mae': best_valid_mae,
                    'num_params': num_params
                }
                torch.save(checkpoint,
                           osp.join(args.checkpoint_dir, 'checkpoint.pt'))

            if args.save_test_dir != '':
                print('Predicting on test data...')
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...')
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_test_dir,
                                               mode='test-dev')

        scheduler.step()

        print(f'Best validation MAE so far: {best_valid_mae}')

    if args.log_dir != '':
        writer.close()
Ejemplo n.º 3
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with DGL')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='random seed to use (default: 42)')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help='GNN to use, which can be from '
        '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers (default: 0)')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        device = torch.device("cuda:" + str(args.device))
    else:
        device = torch.device("cpu")

    ### automatic data loading and splitting
    ### Read in the raw SMILES strings
    smiles_dataset = PCQM4MDataset(root='dataset/', only_smiles=True)
    split_idx = smiles_dataset.get_idx_split()

    test_smiles_dataset = [smiles_dataset[i] for i in split_idx['test']]
    onthefly_dataset = OnTheFlyPCQMDataset(test_smiles_dataset)
    test_loader = DataLoader(onthefly_dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers,
                             collate_fn=collate_dgl)

    ### automatic evaluator.
    evaluator = PCQM4MEvaluator()

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True,
                    **shared_params).to(device)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    checkpoint_path = os.path.join(args.checkpoint_dir, 'checkpoint.pt')
    if not os.path.exists(checkpoint_path):
        raise RuntimeError(f'Checkpoint file not found at {checkpoint_path}')

    ## reading in checkpoint
    checkpoint = torch.load(checkpoint_path)
    model.load_state_dict(checkpoint['model_state_dict'])

    print('Predicting on test data...')
    y_pred = test(model, device, test_loader)
    print('Saving test submission file...')
    evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir)
Ejemplo n.º 4
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with DGL')
    parser.add_argument('--seed',
                        type=int,
                        default=42,
                        help='random seed to use (default: 42)')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help='GNN to use, which can be from '
        '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--train_subset',
                        action='store_true',
                        help='use 10% of the training set for training')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers (default: 0)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory. If not specified, '
                        'tensorboard will not be used.')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    random.seed(args.seed)

    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        device = torch.device("cuda:" + str(args.device))
    else:
        device = torch.device("cpu")

    ### automatic dataloading and splitting
    dataset = SampleDglPCQM4MDataset(root='dataset/')

    # split_idx['train'], split_idx['valid'], split_idx['test']
    # separately gives a 1D int64 tensor
    split_idx = dataset.get_idx_split()
    split_idx["train"] = split_idx["train"].type(torch.LongTensor)
    split_idx["test"] = split_idx["test"].type(torch.LongTensor)
    split_idx["valid"] = split_idx["valid"].type(torch.LongTensor)

    ### automatic evaluator.
    evaluator = PCQM4MEvaluator()

    if args.train_subset:
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_loader = DataLoader(dataset[split_idx["train"][subset_idx]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_dgl)
    else:
        train_loader = DataLoader(dataset[split_idx["train"]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers,
                                  collate_fn=collate_dgl)

    valid_loader = DataLoader(dataset[split_idx["valid"]],
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers,
                              collate_fn=collate_dgl)

    if args.save_test_dir is not '':
        test_loader = DataLoader(dataset[split_idx["test"]],
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=collate_dgl)

    if args.checkpoint_dir is not '':
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual-diffpool':
        model = DiffPoolGNN(gnn_type='gin', virtual_node=True,
                            **shared_params).to(device)
    elif args.gnn == 'gin-virtual-bayes-diffpool':
        model = BayesDiffPoolGNN(gnn_type='gin',
                                 virtual_node=True,
                                 **shared_params).to(device)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir is not '':
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)
    """ load from latest checkpoint """
    # start epoch (default = 1, unless resuming training)
    firstEpoch = 1
    # check if checkpoint exist -> load model
    checkpointFile = os.path.join(args.checkpoint_dir, 'checkpoint.pt')
    if os.path.exists(checkpointFile):
        # load checkpoint file
        checkpointData = torch.load(checkpointFile)
        firstEpoch = checkpointData["epoch"]
        model.load_state_dict(checkpointData["model_state_dict"])
        optimizer.load_state_dict(checkpointData["optimizer_state_dict"])
        scheduler.load_state_dict(checkpointData["scheduler_state_dict"])
        best_valid_mae = checkpointData["best_val_mae"]
        num_params = checkpointData["num_params"]
        print(
            "Loaded existing weights from {}. Continuing from epoch: {} with best valid MAE: {}"
            .format(checkpointFile, firstEpoch, best_valid_mae))

    for epoch in range(firstEpoch, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train_mae = train(model, device, train_loader, optimizer, args.gnn)

        print('Evaluating...')
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({'Train': train_mae, 'Validation': valid_mae})

        if args.log_dir is not '':
            writer.add_scalar('valid/mae', valid_mae, epoch)
            writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir is not '':
                print('Saving checkpoint...')
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_val_mae': best_valid_mae,
                    'num_params': num_params
                }
                torch.save(checkpoint,
                           os.path.join(args.checkpoint_dir, 'checkpoint.pt'))

            if args.save_test_dir is not '':
                print('Predicting on test data...')
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...')
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_test_dir)

        scheduler.step()

        print(f'Best validation MAE so far: {best_valid_mae}')

    if args.log_dir is not '':
        writer.close()
Ejemplo n.º 5
0
Archivo: main.py Proyecto: WenjinW/PGL
def train_and_eval(model, config, train_loader, valid_loaders, test_loader,
                   optimizer, scheduler):
    evaluator = PCQM4MEvaluator()
    if dist.get_rank() == 0:
        writer = SummaryWriter(config.log_dir)

    best_valid = 1000
    global_step = 0
    header = "%s\n" % config.task_name
    msg_list = []
    epoch_step = len(train_loader)
    topk_best = []
    topk_num = 5
    for i in range(topk_num):
        msg_list.append("")
        topk_best.append([0, 1000])
    for epoch in range(1, config.epochs + 1):
        model.train()
        #  if dist.get_rank() == 0:
        #  bn_summary(writer, model, epoch)
        loss_accum = 0
        train_mae = 1000
        for step, (batch_dict, labels, others) in enumerate(train_loader):
            feed_dict = {}
            for key, value in batch_dict.items():
                if "graph" in key:
                    feed_dict[key] = value.tensor()
                else:
                    feed_dict[key] = paddle.to_tensor(value)
            labels = paddle.to_tensor(labels)

            pred = paddle.reshape(model(feed_dict), shape=[
                -1,
            ])
            loss = reg_criterion(pred, labels)
            loss.backward()
            optimizer.step()
            optimizer.clear_grad()

            loss_accum += loss.numpy()

            if global_step % config.log_step == 0:
                log.info("Epoch: %s | Step: %s/%s | Train loss: %.6f" \
                        % (epoch, step, epoch_step, loss_accum / (step+1)) )
            global_step += 1

        train_mae = loss_accum / (step + 1)
        dist.barrier()

        if dist.get_rank() == 0 and config.to_valid_step < epoch:
            valid_dict = evaluate(model, valid_loaders['valid'], config)
            valid_mae = evaluator.eval(valid_dict)["mae"]

            writer.add_scalar('train/mae', train_mae, epoch)
            writer.add_scalar('valid/mae', valid_mae, epoch)

            if config.split_mode is not None:
                left_dict = evaluate(model, valid_loaders['left'], config,
                                     'left_valid')
                left_valid_mae = evaluator.eval(left_dict)["mae"]
                writer.add_scalar('valid/left', left_valid_mae, epoch)

            if valid_mae < topk_best[topk_num - 1][1]:
                best_valid = valid_mae

                output_dir = os.path.join(config.output_dir, "%03d" % epoch)
                make_dir(output_dir)
                save_pred_result(output_dir, 'crossvalid', valid_dict)
                if config.split_mode is not None:
                    save_pred_result(output_dir, 'leftvalid', left_dict)

                # if valid is best, save test result
                test_dict = evaluate(model, test_loader, config, mode="test")
                save_pred_result(output_dir, 'test', test_dict)

                save_dir = os.path.join(config.save_dir, "%03d" % epoch)
                make_dir(save_dir)
                ckpt_file = os.path.join(save_dir, "checkpoint.pdparams")
                log.info("saving model checkpoints in %s" % ckpt_file)
                paddle.save(model.state_dict(), ckpt_file)
                #  optim_file = os.path.join(config.save_dir, "optimizer.pdparams")
                #  log.info("saving optimizer checkpoints in %s" % optim_file)
                #  paddle.save(optimizer.state_dict(), optim_file)

                # calculate top n
                for i in range(topk_num):
                    if valid_mae < topk_best[i][1]:
                        topk_best.insert(i, [epoch, valid_mae])
                        k_idx = i
                        break
                to_rm = topk_best[-1]

                tmp_output_dir = os.path.join(config.output_dir,
                                              "%03d" % to_rm[0])
                tmp_save_dir = os.path.join(config.save_dir, "%03d" % to_rm[0])
                try:
                    shutil.rmtree(tmp_output_dir)
                    shutil.rmtree(tmp_save_dir)
                except OSError:
                    pass

                topk_best = topk_best[:-1]
                with open(os.path.join(config.output_dir, "ckpt_info"),
                          'w') as f:
                    for item in topk_best:
                        f.write("%s\n" % item)

            v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr()
            info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \
                    % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1])
            log.info(info)

            writer.add_scalar('valid/best', topk_best[0][1], epoch)

        if isinstance(scheduler, float):
            pass
        elif config.lr_mode == "Reduce":
            if dist.get_rank() == 0:
                valid_mae = paddle.to_tensor(valid_mae, dtype="float32")
            else:
                valid_mae = paddle.to_tensor(0.0, dtype="float32")
            paddle.distributed.broadcast(valid_mae, 0)
            scheduler.step(valid_mae)
        else:
            scheduler.step()
Ejemplo n.º 6
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with Pytorch Geometrics')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help=
        'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--train_subset', action='store_true')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=0,
                        help='number of workers (default: 0)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    random.seed(42)

    device = torch.device(
        "cuda:" +
        str(args.device)) if torch.cuda.is_available() else torch.device("cpu")

    ### automatic dataloading and splitting
    dataset = PygPCQM4MDataset(root='dataset/')

    split_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    if args.train_subset:
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_loader = DataLoader(dataset[split_idx["train"][subset_idx]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)
    else:
        train_loader = DataLoader(dataset[split_idx["train"]],
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  num_workers=args.num_workers)

    valid_loader = DataLoader(dataset[split_idx["valid"]],
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)

    if args.save_test_dir != '':
        test_loader = DataLoader(dataset[split_idx["test-dev"]],
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers)

    if args.checkpoint_dir != '':
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True,
                    **shared_params).to(device)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir != '':
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train_mae = train(model, device, train_loader, optimizer)

        print('Evaluating...')
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({'Train': train_mae, 'Validation': valid_mae})

        if args.log_dir != '':
            writer.add_scalar('valid/mae', valid_mae, epoch)
            writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir != '':
                print('Saving checkpoint...')
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_val_mae': best_valid_mae,
                    'num_params': num_params
                }
                torch.save(checkpoint,
                           os.path.join(args.checkpoint_dir, 'checkpoint.pt'))

            if args.save_test_dir != '':
                print('Predicting on test data...')
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...')
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_test_dir,
                                               mode='test-dev')

        scheduler.step()

        print(f'Best validation MAE so far: {best_valid_mae}')

    if args.log_dir != '':
        writer.close()
Ejemplo n.º 7
0
def main_mlp():
    # Training settings
    parser = argparse.ArgumentParser(
        description="GNN baselines on ogbgmol* data with Pytorch Geometrics")
    parser.add_argument("--device",
                        type=int,
                        default=0,
                        help="which gpu to use if any (default: 0)")
    parser.add_argument(
        "--num_mlp_layers",
        type=int,
        default=6,
        help="number of mlp layers (default: 6)",
    )
    parser.add_argument("--drop_ratio",
                        type=float,
                        default=0.2,
                        help="dropout ratio (default: 0.2)")
    parser.add_argument(
        "--batch_size",
        type=int,
        default=256,
        help="input batch size for training (default: 256)",
    )
    parser.add_argument(
        "--emb_dim",
        type=int,
        default=1600,
        help="embedding dimensionality (default: 1600)",
    )
    parser.add_argument("--train_subset", action="store_true")
    parser.add_argument(
        "--epochs",
        type=int,
        default=100,
        help="number of epochs to train (default: 100)",
    )
    parser.add_argument("--num_workers",
                        type=int,
                        default=0,
                        help="number of workers (default: 0)")
    parser.add_argument("--radius",
                        type=int,
                        default=2,
                        help="radius (default: 2)")
    parser.add_argument("--log_dir",
                        type=str,
                        default="",
                        help="tensorboard log directory")
    parser.add_argument("--checkpoint_dir",
                        type=str,
                        default="",
                        help="directory to save checkpoint")
    parser.add_argument(
        "--save_test_dir",
        type=str,
        default="",
        help="directory to save test submission file",
    )
    args = parser.parse_args()

    print(args)

    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    random.seed(42)

    device = (torch.device("cuda:" + str(args.device))
              if torch.cuda.is_available() else torch.device("cpu"))

    dataset = PCQM4MDataset(root="dataset/", only_smiles=True)
    fp_processed_file = preprocess_fp(dataset, args.radius)

    data_dict = torch.load(fp_processed_file)
    X, Y = data_dict["X"], data_dict["Y"]

    split_idx = dataset.get_idx_split()
    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    if args.train_subset:
        print("train subset")
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_dataset = TensorDataset(X[split_idx["train"][subset_idx]],
                                      Y[split_idx["train"][subset_idx]])

    else:
        train_dataset = TensorDataset(X[split_idx["train"]],
                                      Y[split_idx["train"]])

    valid_dataset = TensorDataset(X[split_idx["valid"]], Y[split_idx["valid"]])
    test_dataset = TensorDataset(X[split_idx["test"]], Y[split_idx["test"]])

    train_loader = DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.num_workers,
    )
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
    )

    if args.save_test_dir is not "":
        test_loader = DataLoader(
            test_dataset,
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.num_workers,
        )

    if args.checkpoint_dir is not "":
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    model = MLP(
        num_mlp_layers=args.num_mlp_layers,
        emb_dim=args.emb_dim,
        drop_ratio=args.drop_ratio,
    ).to(device)

    num_params = sum(p.numel() for p in model.parameters())
    print(f"#Params: {num_params}")

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir is not "":
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print("Training...")
        train_mae = train(model, device, train_loader, optimizer)

        print("Evaluating...")
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({"Train": train_mae, "Validation": valid_mae})

        if args.log_dir is not "":
            writer.add_scalar("valid/mae", valid_mae, epoch)
            writer.add_scalar("train/mae", train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir is not "":
                print("Saving checkpoint...")
                checkpoint = {
                    "epoch": epoch,
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "scheduler_state_dict": scheduler.state_dict(),
                    "best_val_mae": best_valid_mae,
                    "num_params": num_params,
                }
                torch.save(checkpoint,
                           osp.join(args.checkpoint_dir, "checkpoint.pt"))

            if args.save_test_dir is not "":
                print("Predicting on test data...")
                y_pred = test(model, device, test_loader)
                print("Saving test submission file...")
                evaluator.save_test_submission({"y_pred": y_pred},
                                               args.save_test_dir)

        scheduler.step()

        print(f"Best validation MAE so far: {best_valid_mae}")

    if args.log_dir is not "":
        writer.close()
Ejemplo n.º 8
0
Archivo: main.py Proyecto: WenjinW/PGL
def pretrain_train_and_eval(model, config, train_loader, valid_loaders,
                            test_loader, optimizer, scheduler):
    evaluator = PCQM4MEvaluator()
    if dist.get_rank() == 0:
        writer = SummaryWriter(config.log_dir)

    best_valid = 1000
    global_step = 0
    header = "%s\n" % config.task_name
    msg_list = []
    epoch_step = len(train_loader)
    topk_best = []
    topk_num = 8
    for i in range(topk_num):
        msg_list.append("")
        topk_best.append([0, 1000])
    # Pretrain
    for epoch in range(1, config.pretrain_epoch + 1):
        model.train()
        if dist.get_rank() == 0:
            bn_summary(writer, model, epoch)
        loss_accum = 0
        loss_dict = {}
        for step, (batch_dict, labels, others) in enumerate(train_loader):
            feed_dict = data2tensor(batch_dict)
            labels = paddle.to_tensor(labels)
            pretrain_losses = model(feed_dict, return_graph=False)
            total_loss = 0
            for name in pretrain_losses:
                if name not in config.pretrain_tasks:
                    continue
                if not name in loss_dict:
                    loss_dict[name] = []
                c_loss = pretrain_losses[name]
                loss_dict[name].append(c_loss.numpy())
                total_loss += c_loss

            total_loss.backward()
            optimizer.step()
            optimizer.clear_grad()

            loss_accum += total_loss.numpy()

            if step % 100 == 0:
                log.info("Epoch: %s | Step: %s/%s Pretrain loss: %.6f" \
                        % (epoch, step+1,epoch_step, loss_accum / (step+1)) )
        for name in loss_dict:
            print('pretrain loss', epoch, name, np.mean(loss_dict[name]))
    # Train
    for epoch in range(1, config.epochs + 1):
        model.train()
        alphalist = [config.aux_alpha] * 10 + [config.aux_alpha / 2
                                               ] * 10 + [0] * 200
        if dist.get_rank() == 0:
            bn_summary(writer, model, epoch)
        loss_accum = 0
        for step, (batch_dict, labels, others) in enumerate(train_loader):
            feed_dict = data2tensor(batch_dict)
            labels = paddle.to_tensor(labels)
            out, pretrain_losses = model(feed_dict, return_graph=True)
            pred = paddle.reshape(out, shape=[
                -1,
            ])
            homo_loss = reg_criterion(pred, labels)
            alpha = alphalist[epoch - 1]
            pretrain_loss = alpha * reduce(lambda x, y: x + y,
                                           pretrain_losses.values())
            loss = homo_loss + pretrain_loss
            loss.backward()
            optimizer.step()
            optimizer.clear_grad()

            loss_accum += loss.numpy()

            if global_step % config.log_step == 0:
                log.info("Epoch: %s | Step: %s/%s | Train loss: %.6f" \
                        % (epoch, step, epoch_step, loss_accum / (step+1)) )
            global_step += 1
        train_mae = loss_accum / (step + 1)
        print("out the training")
        if dist.get_rank() == 0 and config.to_valid_step < epoch:
            valid_dict = evaluate(model, valid_loaders['valid'], config)
            valid_mae = evaluator.eval(valid_dict)["mae"]
            writer.add_scalar('train/mae', train_mae, epoch)
            writer.add_scalar('valid/mae', valid_mae, epoch)

            if config.split_mode is not None:
                left_dict = evaluate(model, valid_loaders['left'], config,
                                     'left_valid')
                left_valid_mae = evaluator.eval(left_dict)["mae"]
                writer.add_scalar('valid/left', left_valid_mae, epoch)

                #  valid_mae = (4.5 * valid_mae + left_valid_mae) / 5.5

            if valid_mae < topk_best[topk_num - 1][1]:
                best_valid = valid_mae

                output_dir = os.path.join(config.output_dir, "%03d" % epoch)
                make_dir(output_dir)
                save_pred_result(output_dir, 'valid', valid_dict)
                save_pred_result(output_dir, 'left_valid', left_dict)

                # if valid is best, save test result
                test_dict = evaluate(model, test_loader, config, mode="test")
                save_pred_result(output_dir, 'test', test_dict)

                save_dir = os.path.join(config.save_dir, "%03d" % epoch)
                make_dir(save_dir)
                ckpt_file = os.path.join(save_dir, "checkpoint.pdparams")
                log.info("saving model checkpoints in %s" % ckpt_file)
                paddle.save(model.state_dict(), ckpt_file)
                #  optim_file = os.path.join(config.save_dir, "optimizer.pdparams")
                #  log.info("saving optimizer checkpoints in %s" % optim_file)
                #  paddle.save(optimizer.state_dict(), optim_file)

                # calculate top n
                for i in range(topk_num):
                    if valid_mae < topk_best[i][1]:
                        topk_best.insert(i, [epoch, valid_mae])
                        k_idx = i
                        break
                to_rm = topk_best[-1]

                tmp_output_dir = os.path.join(config.output_dir,
                                              "%03d" % to_rm[0])
                tmp_save_dir = os.path.join(config.save_dir, "%03d" % to_rm[0])
                try:
                    shutil.rmtree(tmp_output_dir)
                    shutil.rmtree(tmp_save_dir)
                except OSError:
                    pass

                topk_best = topk_best[:-1]
                with open(os.path.join(config.output_dir, "ckpt_info"),
                          'w') as f:
                    for item in topk_best:
                        f.write("%s\n" % item)

                if not config.debug:
                    v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr(
                    )
                    info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \
                            % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1])
                    msg_list.insert(k_idx, info)
                    msg_list = msg_list[:-1]
                    to_robot_msg = header + "\n".join(msg_list)
                    os.system("echo '%s' | sh to_robot.sh >/dev/null 2>&1 " %
                              to_robot_msg)

            v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr()
            info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \
                    % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1])
            log.info(info)

            writer.add_scalar('valid/best', topk_best[0][1], epoch)

        if isinstance(scheduler, float):
            pass
        elif config.lr_mode == "Reduce":
            if dist.get_rank() == 0:
                valid_mae = paddle.to_tensor(valid_mae, dtype="float32")
            else:
                valid_mae = paddle.to_tensor(0.0, dtype="float32")
            paddle.distributed.broadcast(valid_mae, 0)
            scheduler.step(valid_mae)
        else:
            scheduler.step()
Ejemplo n.º 9
0
def main(args):
    prepartion(args)
    nn_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    # automatic dataloading and splitting
    dataset = MyPCQM4MDataset(root=args.dataset_root)
    split_idx = dataset.get_idx_split()
    train_data = dataset[split_idx['train']]
    valid_data = dataset[split_idx['valid']]
    test_data = dataset[split_idx['test']]
    train_loader = DataLoader(train_data,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers)
    valid_loader = DataLoader(valid_data,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers)
    test_loader = DataLoader(test_data,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)

    # automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()
    criterion_fn = torch.nn.MSELoss()

    device = args.device

    model = GINGraphPooling(**nn_params).to(device)

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}', file=args.output_file, flush=True)
    print(model, file=args.output_file, flush=True)

    optimizer = optim.Adam(model.parameters(),
                           lr=0.001,
                           weight_decay=args.weight_decay)
    scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    writer = SummaryWriter(log_dir=args.save_dir)
    not_improved = 0
    best_valid_mae = 9999
    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch), file=args.output_file, flush=True)
        print('Training...', file=args.output_file, flush=True)
        train_mae = train(model, device, train_loader, optimizer, criterion_fn)

        print('Evaluating...', file=args.output_file, flush=True)
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({
            'Train': train_mae,
            'Validation': valid_mae
        },
              file=args.output_file,
              flush=True)

        writer.add_scalar('valid/mae', valid_mae, epoch)
        writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.save_test:
                print('Saving checkpoint...',
                      file=args.output_file,
                      flush=True)
                checkpoint = {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'scheduler_state_dict': scheduler.state_dict(),
                    'best_val_mae': best_valid_mae,
                    'num_params': num_params
                }
                torch.save(checkpoint,
                           os.path.join(args.save_dir, 'checkpoint.pt'))
                print('Predicting on test data...',
                      file=args.output_file,
                      flush=True)
                y_pred = test(model, device, test_loader)
                print('Saving test submission file...',
                      file=args.output_file,
                      flush=True)
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_dir)

            not_improved = 0
        else:
            not_improved += 1
            if not_improved == args.early_stop:
                print(f"Have not improved for {not_improved} epoches.",
                      file=args.output_file,
                      flush=True)
                break

        scheduler.step()
        print(f'Best validation MAE so far: {best_valid_mae}',
              file=args.output_file,
              flush=True)

    writer.close()
    args.output_file.close()
Ejemplo n.º 10
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with PGL')
    parser.add_argument('--use_cuda', action='store_true')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help=
        'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--train_subset', action='store_true')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=1,
                        help='number of workers (default: 1)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    random.seed(42)
    np.random.seed(42)
    paddle.seed(42)

    if not args.use_cuda:
        paddle.set_device("cpu")

    ### automatic dataloading and splitting
    class Config():
        def __init__(self):
            self.base_data_path = "./dataset"

    config = Config()
    ds = MolDataset(config)
    split_idx = ds.get_idx_split()
    test_ds = Subset(ds, split_idx['test'])

    print("Test exapmles: ", len(test_ds))

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    test_loader = Dataloader(test_ds,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers,
                             collate_fn=CollateFn())

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False, **shared_params)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True, **shared_params)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False, **shared_params)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True, **shared_params)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    checkpoint_path = os.path.join(args.checkpoint_dir, 'checkpoint.pdparams')
    if not os.path.exists(checkpoint_path):
        raise RuntimeError(f'Checkpoint file not found at {checkpoint_path}')

    model.set_state_dict(paddle.load(checkpoint_path))

    print('Predicting on test data...')
    y_pred = test(model, test_loader)
    print('Saving test submission file...')
    evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir)
Ejemplo n.º 11
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description="GNN baselines on pcqm4m with Pytorch Geometrics")
    parser.add_argument("--device",
                        type=int,
                        default=0,
                        help="which gpu to use if any (default: 0)")
    parser.add_argument(
        "--gnn",
        type=str,
        default="gin-virtual",
        help=
        "GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)",
    )
    parser.add_argument(
        "--graph_pooling",
        type=str,
        default="sum",
        help="graph pooling strategy mean or sum (default: sum)",
    )
    parser.add_argument("--drop_ratio",
                        type=float,
                        default=0,
                        help="dropout ratio (default: 0)")
    parser.add_argument(
        "--num_layers",
        type=int,
        default=5,
        help="number of GNN message passing layers (default: 5)",
    )
    parser.add_argument(
        "--emb_dim",
        type=int,
        default=600,
        help="dimensionality of hidden units in GNNs (default: 600)",
    )
    parser.add_argument("--train_subset", action="store_true")
    parser.add_argument(
        "--batch_size",
        type=int,
        default=256,
        help="input batch size for training (default: 256)",
    )
    parser.add_argument(
        "--epochs",
        type=int,
        default=100,
        help="number of epochs to train (default: 100)",
    )
    parser.add_argument("--num_workers",
                        type=int,
                        default=0,
                        help="number of workers (default: 0)")
    parser.add_argument("--log_dir",
                        type=str,
                        default="",
                        help="tensorboard log directory")
    parser.add_argument("--checkpoint_dir",
                        type=str,
                        default="",
                        help="directory to save checkpoint")
    parser.add_argument(
        "--save_test_dir",
        type=str,
        default="",
        help="directory to save test submission file",
    )
    args = parser.parse_args()

    print(args)

    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed(42)
    random.seed(42)

    device = (torch.device("cuda:" + str(args.device))
              if torch.cuda.is_available() else torch.device("cpu"))

    ### automatic dataloading and splitting
    dataset = PygPCQM4MDataset(root="dataset/")

    split_idx = dataset.get_idx_split()

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    if args.train_subset:
        subset_ratio = 0.1
        subset_idx = torch.randperm(len(
            split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))]
        train_loader = DataLoader(
            dataset[split_idx["train"][subset_idx]],
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
        )
    else:
        train_loader = DataLoader(
            dataset[split_idx["train"]],
            batch_size=args.batch_size,
            shuffle=True,
            num_workers=args.num_workers,
        )

    valid_loader = DataLoader(
        dataset[split_idx["valid"]],
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
    )

    if args.save_test_dir is not "":
        test_loader = DataLoader(
            dataset[split_idx["test"]],
            batch_size=args.batch_size,
            shuffle=False,
            num_workers=args.num_workers,
        )

    if args.checkpoint_dir is not "":
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    shared_params = {
        "num_layers": args.num_layers,
        "emb_dim": args.emb_dim,
        "drop_ratio": args.drop_ratio,
        "graph_pooling": args.graph_pooling,
    }

    if args.gnn == "gin":
        model = GNN(gnn_type="gin", virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == "gin-virtual":
        model = GNN(gnn_type="gin", virtual_node=True,
                    **shared_params).to(device)
    elif args.gnn == "gcn":
        model = GNN(gnn_type="gcn", virtual_node=False,
                    **shared_params).to(device)
    elif args.gnn == "gcn-virtual":
        model = GNN(gnn_type="gcn", virtual_node=True,
                    **shared_params).to(device)
    else:
        raise ValueError("Invalid GNN type")

    num_params = sum(p.numel() for p in model.parameters())
    print(f"#Params: {num_params}")

    optimizer = optim.Adam(model.parameters(), lr=0.001)

    if args.log_dir is not "":
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    if args.train_subset:
        scheduler = StepLR(optimizer, step_size=300, gamma=0.25)
        args.epochs = 1000
    else:
        scheduler = StepLR(optimizer, step_size=30, gamma=0.25)

    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print("Training...")
        train_mae = train(model, device, train_loader, optimizer)

        print("Evaluating...")
        valid_mae = eval(model, device, valid_loader, evaluator)

        print({"Train": train_mae, "Validation": valid_mae})

        if args.log_dir is not "":
            writer.add_scalar("valid/mae", valid_mae, epoch)
            writer.add_scalar("train/mae", train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir is not "":
                print("Saving checkpoint...")
                checkpoint = {
                    "epoch": epoch,
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "scheduler_state_dict": scheduler.state_dict(),
                    "best_val_mae": best_valid_mae,
                    "num_params": num_params,
                }
                torch.save(checkpoint,
                           os.path.join(args.checkpoint_dir, "checkpoint.pt"))

            if args.save_test_dir is not "":
                print("Predicting on test data...")
                y_pred = test(model, device, test_loader)
                print("Saving test submission file...")
                evaluator.save_test_submission({"y_pred": y_pred},
                                               args.save_test_dir)

        scheduler.step()

        print(f"Best validation MAE so far: {best_valid_mae}")

    if args.log_dir is not "":
        writer.close()
Ejemplo n.º 12
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(
        description='GNN baselines on pcqm4m with PGL')
    parser.add_argument('--use_cuda', action='store_true')
    parser.add_argument('--device',
                        type=int,
                        default=0,
                        help='which gpu to use if any (default: 0)')
    parser.add_argument(
        '--gnn',
        type=str,
        default='gin-virtual',
        help=
        'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)')
    parser.add_argument(
        '--graph_pooling',
        type=str,
        default='sum',
        help='graph pooling strategy mean or sum (default: sum)')
    parser.add_argument('--drop_ratio',
                        type=float,
                        default=0,
                        help='dropout ratio (default: 0)')
    parser.add_argument(
        '--num_layers',
        type=int,
        default=5,
        help='number of GNN message passing layers (default: 5)')
    parser.add_argument(
        '--emb_dim',
        type=int,
        default=600,
        help='dimensionality of hidden units in GNNs (default: 600)')
    parser.add_argument('--train_subset', action='store_true')
    parser.add_argument('--batch_size',
                        type=int,
                        default=256,
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=100,
                        help='number of epochs to train (default: 100)')
    parser.add_argument('--num_workers',
                        type=int,
                        default=1,
                        help='number of workers (default: 1)')
    parser.add_argument('--log_dir',
                        type=str,
                        default="",
                        help='tensorboard log directory')
    parser.add_argument('--checkpoint_dir',
                        type=str,
                        default='',
                        help='directory to save checkpoint')
    parser.add_argument('--save_test_dir',
                        type=str,
                        default='',
                        help='directory to save test submission file')
    args = parser.parse_args()

    print(args)

    random.seed(42)
    np.random.seed(42)
    paddle.seed(42)

    if not args.use_cuda:
        paddle.set_device("cpu")

    ### automatic dataloading and splitting
    class Config():
        def __init__(self):
            self.base_data_path = "./dataset"

    config = Config()
    ds = MolDataset(config)

    split_idx = ds.get_idx_split()
    train_ds = Subset(ds, split_idx['train'])
    valid_ds = Subset(ds, split_idx['valid'])
    test_ds = Subset(ds, split_idx['test'])

    print("Train exapmles: ", len(train_ds))
    print("Valid exapmles: ", len(valid_ds))
    print("Test exapmles: ", len(test_ds))

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    train_loader = Dataloader(train_ds,
                              batch_size=args.batch_size,
                              shuffle=True,
                              num_workers=args.num_workers,
                              collate_fn=CollateFn())

    valid_loader = Dataloader(valid_ds,
                              batch_size=args.batch_size,
                              shuffle=False,
                              num_workers=args.num_workers,
                              collate_fn=CollateFn())

    if args.save_test_dir is not '':
        test_loader = Dataloader(test_ds,
                                 batch_size=args.batch_size,
                                 shuffle=False,
                                 num_workers=args.num_workers,
                                 collate_fn=CollateFn())

    if args.checkpoint_dir is not '':
        os.makedirs(args.checkpoint_dir, exist_ok=True)

    shared_params = {
        'num_layers': args.num_layers,
        'emb_dim': args.emb_dim,
        'drop_ratio': args.drop_ratio,
        'graph_pooling': args.graph_pooling
    }

    if args.gnn == 'gin':
        model = GNN(gnn_type='gin', virtual_node=False, **shared_params)
    elif args.gnn == 'gin-virtual':
        model = GNN(gnn_type='gin', virtual_node=True, **shared_params)
    elif args.gnn == 'gcn':
        model = GNN(gnn_type='gcn', virtual_node=False, **shared_params)
    elif args.gnn == 'gcn-virtual':
        model = GNN(gnn_type='gcn', virtual_node=True, **shared_params)
    else:
        raise ValueError('Invalid GNN type')

    num_params = sum(p.numel() for p in model.parameters())
    print(f'#Params: {num_params}')

    if args.log_dir is not '':
        writer = SummaryWriter(log_dir=args.log_dir)

    best_valid_mae = 1000

    scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.001,
                                              step_size=300,
                                              gamma=0.25)

    optimizer = paddle.optimizer.Adam(learning_rate=scheduler,
                                      parameters=model.parameters())

    msg = "ogbg_lsc_paddle_baseline\n"
    for epoch in range(1, args.epochs + 1):
        print("=====Epoch {}".format(epoch))
        print('Training...')
        train_mae = train(model, train_loader, optimizer)

        print('Evaluating...')
        valid_mae = eval(model, valid_loader, evaluator)

        print({'Train': train_mae, 'Validation': valid_mae})

        if args.log_dir is not '':
            writer.add_scalar('valid/mae', valid_mae, epoch)
            writer.add_scalar('train/mae', train_mae, epoch)

        if valid_mae < best_valid_mae:
            best_valid_mae = valid_mae
            if args.checkpoint_dir is not '':
                print('Saving checkpoint...')
                paddle.save(
                    model.state_dict(),
                    os.path.join(args.checkpoint_dir, 'checkpoint.pdparams'))

            if args.save_test_dir is not '':
                print('Predicting on test data...')
                y_pred = test(model, test_loader)
                print('Saving test submission file...')
                evaluator.save_test_submission({'y_pred': y_pred},
                                               args.save_test_dir)

        scheduler.step()

        print(f'Best validation MAE so far: {best_valid_mae}')

        try:
            msg +="Epoch: %d | Train: %.6f | Valid: %.6f | Best Valid: %.6f\n" \
                    % (epoch, train_mae, valid_mae, best_valid_mae)
            print(msg)
        except:
            continue

    if args.log_dir is not '':
        writer.close()
Ejemplo n.º 13
0
def infer(config):
    model = getattr(M, config.model_type)(config)

    log.info("infer model from %s" % config.infer_from)
    model.set_state_dict(paddle.load(config.infer_from))

    log.info("loading data")
    ds = getattr(DS, config.dataset_type)(config)

    split_idx = ds.get_idx_split()
    train_ds = DS.Subset(ds, split_idx['train'], mode='train')
    valid_ds = DS.Subset(ds, split_idx['valid'], mode='valid')
    test_ds = DS.Subset(ds, split_idx['test'], mode='test')

    log.info("Train exapmles: %s" % len(train_ds))
    log.info("Valid exapmles: %s" % len(valid_ds))
    log.info("Test exapmles: %s" % len(test_ds))

    train_loader = Dataloader(train_ds,
                              batch_size=config.batch_size,
                              shuffle=False,
                              num_workers=config.num_workers,
                              collate_fn=DS.CollateFn(config),
                              drop_last=True)

    valid_loader = Dataloader(valid_ds,
                              batch_size=config.valid_batch_size,
                              shuffle=False,
                              num_workers=1,
                              collate_fn=DS.CollateFn(config))

    test_loader = Dataloader(test_ds,
                             batch_size=config.valid_batch_size,
                             shuffle=False,
                             num_workers=1,
                             collate_fn=DS.CollateFn(config))

    try:
        task_name = config.infer_from.split("/")[-2]
    except:
        task_name = "ogb_kdd"
    log.info("task_name: %s" % task_name)

    ### automatic evaluator. takes dataset name as input
    evaluator = PCQM4MEvaluator()

    # ---------------- valid ----------------------- #
    #  log.info("validating ...")
    #  pred_dict = evaluate(model, valid_loader)
    #
    #  log.info("valid MAE: %s" % evaluator.eval(pred_dict)["mae"])
    #  valid_output_path = os.path.join(config.output_dir, task_name)
    #  make_dir(valid_output_path)
    #  valid_output_file = os.path.join(valid_output_path, "valid_mae.txt")
    #
    #  log.info("saving valid result to %s" % valid_output_file)
    #  with open(valid_output_file, 'w') as f:
    #      for y_pred, idx in zip(pred_dict['y_pred'], split_idx['valid']):
    #          smiles, label = ds.raw_dataset[idx]
    #          f.write("%s\t%s\t%s\n" % (y_pred, label, smiles))
    #
    # ---------------- test ----------------------- #

    log.info("testing ...")
    pred_dict = evaluate(model, test_loader)

    test_output_path = os.path.join(config.output_dir, task_name)
    make_dir(test_output_path)
    test_output_file = os.path.join(test_output_path, "test_mae.txt")

    log.info("saving test result to %s" % test_output_file)
    with open(test_output_file, 'w') as f:
        for y_pred, idx in zip(pred_dict['y_pred'], split_idx['test']):
            smiles, label = ds.raw_dataset[idx]
            f.write("%s\t%s\n" % (y_pred, smiles))

    log.info("saving submition format to %s" % test_output_path)
    evaluator.save_test_submission({'y_pred': pred_dict['y_pred']},
                                   test_output_path)