def infer(config, output_path): model = getattr(M, config.model_type)(config) log.info("infer model from %s" % config.infer_from) model.set_state_dict(paddle.load(config.infer_from)) log.info("loading data") ds = getattr(DS, config.dataset_type)(config) split_idx = ds.get_idx_split() test_ds = DS.Subset(ds, split_idx['test'], mode='test') log.info("Test exapmles: %s" % len(test_ds)) test_loader = Dataloader(test_ds, batch_size=config.valid_batch_size, shuffle=False, num_workers=1, collate_fn=DS.CollateFn(config)) ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() # ---------------- test ----------------------- # log.info("testing ...") pred_dict = evaluate(model, test_loader) test_output_path = os.path.join(config.output_dir, config.task_name) make_dir(test_output_path) test_output_file = os.path.join(test_output_path, "test_pred.npz") log.info("saving test result to %s" % test_output_file) np.savez_compressed(test_output_file, pred_dict['y_pred'].astype(np.float32))
def main_mlp(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on ogbgmol* data with Pytorch Geometrics') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--num_mlp_layers', type=int, default=6, help='number of mlp layers (default: 6)') parser.add_argument('--drop_ratio', type=float, default=0.2, help='dropout ratio (default: 0.2)') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--emb_dim', type=int, default=1600, help='embedding dimensionality (default: 1600)') parser.add_argument('--train_subset', action='store_true') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=0, help='number of workers (default: 0)') parser.add_argument('--radius', type=int, default=2, help='radius (default: 2)') parser.add_argument('--log_dir', type=str, default="", help='tensorboard log directory') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) np.random.seed(42) torch.manual_seed(42) torch.cuda.manual_seed(42) random.seed(42) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") dataset = PCQM4MDataset(root='dataset/', only_smiles=True) fp_processed_file = preprocess_fp(dataset, args.radius) data_dict = torch.load(fp_processed_file) X, Y = data_dict['X'], data_dict['Y'] split_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() if args.train_subset: print('train subset') subset_ratio = 0.1 subset_idx = torch.randperm(len( split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] train_dataset = TensorDataset(X[split_idx['train'][subset_idx]], Y[split_idx['train'][subset_idx]]) else: train_dataset = TensorDataset(X[split_idx['train']], Y[split_idx['train']]) valid_dataset = TensorDataset(X[split_idx['valid']], Y[split_idx['valid']]) test_dataset = TensorDataset(X[split_idx['test-dev']], Y[split_idx['test']]) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if args.save_test_dir != '': test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if args.checkpoint_dir != '': os.makedirs(args.checkpoint_dir, exist_ok=True) model = MLP(num_mlp_layers=args.num_mlp_layers, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio).to(device) num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir != '': writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print('Training...') train_mae = train(model, device, train_loader, optimizer) print('Evaluating...') valid_mae = eval(model, device, valid_loader, evaluator) print({'Train': train_mae, 'Validation': valid_mae}) if args.log_dir != '': writer.add_scalar('valid/mae', valid_mae, epoch) writer.add_scalar('train/mae', train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir != '': print('Saving checkpoint...') checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, 'num_params': num_params } torch.save(checkpoint, osp.join(args.checkpoint_dir, 'checkpoint.pt')) if args.save_test_dir != '': print('Predicting on test data...') y_pred = test(model, device, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir, mode='test-dev') scheduler.step() print(f'Best validation MAE so far: {best_valid_mae}') if args.log_dir != '': writer.close()
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on pcqm4m with DGL') parser.add_argument('--seed', type=int, default=42, help='random seed to use (default: 42)') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help='GNN to use, which can be from ' '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)') parser.add_argument( '--graph_pooling', type=str, default='sum', help='graph pooling strategy mean or sum (default: sum)') parser.add_argument('--drop_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--num_layers', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=600, help='dimensionality of hidden units in GNNs (default: 600)') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--num_workers', type=int, default=0, help='number of workers (default: 0)') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:" + str(args.device)) else: device = torch.device("cpu") ### automatic data loading and splitting ### Read in the raw SMILES strings smiles_dataset = PCQM4MDataset(root='dataset/', only_smiles=True) split_idx = smiles_dataset.get_idx_split() test_smiles_dataset = [smiles_dataset[i] for i in split_idx['test']] onthefly_dataset = OnTheFlyPCQMDataset(test_smiles_dataset) test_loader = DataLoader(onthefly_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl) ### automatic evaluator. evaluator = PCQM4MEvaluator() shared_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } if args.gnn == 'gin': model = GNN(gnn_type='gin', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', virtual_node=True, **shared_params).to(device) else: raise ValueError('Invalid GNN type') num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') checkpoint_path = os.path.join(args.checkpoint_dir, 'checkpoint.pt') if not os.path.exists(checkpoint_path): raise RuntimeError(f'Checkpoint file not found at {checkpoint_path}') ## reading in checkpoint checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint['model_state_dict']) print('Predicting on test data...') y_pred = test(model, device, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir)
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on pcqm4m with DGL') parser.add_argument('--seed', type=int, default=42, help='random seed to use (default: 42)') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help='GNN to use, which can be from ' '[gin, gin-virtual, gcn, gcn-virtual] (default: gin-virtual)') parser.add_argument( '--graph_pooling', type=str, default='sum', help='graph pooling strategy mean or sum (default: sum)') parser.add_argument('--drop_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--num_layers', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=600, help='dimensionality of hidden units in GNNs (default: 600)') parser.add_argument('--train_subset', action='store_true', help='use 10% of the training set for training') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=0, help='number of workers (default: 0)') parser.add_argument('--log_dir', type=str, default="", help='tensorboard log directory. If not specified, ' 'tensorboard will not be used.') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) device = torch.device("cuda:" + str(args.device)) else: device = torch.device("cpu") ### automatic dataloading and splitting dataset = SampleDglPCQM4MDataset(root='dataset/') # split_idx['train'], split_idx['valid'], split_idx['test'] # separately gives a 1D int64 tensor split_idx = dataset.get_idx_split() split_idx["train"] = split_idx["train"].type(torch.LongTensor) split_idx["test"] = split_idx["test"].type(torch.LongTensor) split_idx["valid"] = split_idx["valid"].type(torch.LongTensor) ### automatic evaluator. evaluator = PCQM4MEvaluator() if args.train_subset: subset_ratio = 0.1 subset_idx = torch.randperm(len( split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] train_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_dgl) else: train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_dgl) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl) if args.save_test_dir is not '': test_loader = DataLoader(dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_dgl) if args.checkpoint_dir is not '': os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } if args.gnn == 'gin': model = GNN(gnn_type='gin', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', virtual_node=True, **shared_params).to(device) elif args.gnn == 'gin-virtual-diffpool': model = DiffPoolGNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) elif args.gnn == 'gin-virtual-bayes-diffpool': model = BayesDiffPoolGNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) else: raise ValueError('Invalid GNN type') num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir is not '': writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) """ load from latest checkpoint """ # start epoch (default = 1, unless resuming training) firstEpoch = 1 # check if checkpoint exist -> load model checkpointFile = os.path.join(args.checkpoint_dir, 'checkpoint.pt') if os.path.exists(checkpointFile): # load checkpoint file checkpointData = torch.load(checkpointFile) firstEpoch = checkpointData["epoch"] model.load_state_dict(checkpointData["model_state_dict"]) optimizer.load_state_dict(checkpointData["optimizer_state_dict"]) scheduler.load_state_dict(checkpointData["scheduler_state_dict"]) best_valid_mae = checkpointData["best_val_mae"] num_params = checkpointData["num_params"] print( "Loaded existing weights from {}. Continuing from epoch: {} with best valid MAE: {}" .format(checkpointFile, firstEpoch, best_valid_mae)) for epoch in range(firstEpoch, args.epochs + 1): print("=====Epoch {}".format(epoch)) print('Training...') train_mae = train(model, device, train_loader, optimizer, args.gnn) print('Evaluating...') valid_mae = eval(model, device, valid_loader, evaluator) print({'Train': train_mae, 'Validation': valid_mae}) if args.log_dir is not '': writer.add_scalar('valid/mae', valid_mae, epoch) writer.add_scalar('train/mae', train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir is not '': print('Saving checkpoint...') checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, 'num_params': num_params } torch.save(checkpoint, os.path.join(args.checkpoint_dir, 'checkpoint.pt')) if args.save_test_dir is not '': print('Predicting on test data...') y_pred = test(model, device, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir) scheduler.step() print(f'Best validation MAE so far: {best_valid_mae}') if args.log_dir is not '': writer.close()
def train_and_eval(model, config, train_loader, valid_loaders, test_loader, optimizer, scheduler): evaluator = PCQM4MEvaluator() if dist.get_rank() == 0: writer = SummaryWriter(config.log_dir) best_valid = 1000 global_step = 0 header = "%s\n" % config.task_name msg_list = [] epoch_step = len(train_loader) topk_best = [] topk_num = 5 for i in range(topk_num): msg_list.append("") topk_best.append([0, 1000]) for epoch in range(1, config.epochs + 1): model.train() # if dist.get_rank() == 0: # bn_summary(writer, model, epoch) loss_accum = 0 train_mae = 1000 for step, (batch_dict, labels, others) in enumerate(train_loader): feed_dict = {} for key, value in batch_dict.items(): if "graph" in key: feed_dict[key] = value.tensor() else: feed_dict[key] = paddle.to_tensor(value) labels = paddle.to_tensor(labels) pred = paddle.reshape(model(feed_dict), shape=[ -1, ]) loss = reg_criterion(pred, labels) loss.backward() optimizer.step() optimizer.clear_grad() loss_accum += loss.numpy() if global_step % config.log_step == 0: log.info("Epoch: %s | Step: %s/%s | Train loss: %.6f" \ % (epoch, step, epoch_step, loss_accum / (step+1)) ) global_step += 1 train_mae = loss_accum / (step + 1) dist.barrier() if dist.get_rank() == 0 and config.to_valid_step < epoch: valid_dict = evaluate(model, valid_loaders['valid'], config) valid_mae = evaluator.eval(valid_dict)["mae"] writer.add_scalar('train/mae', train_mae, epoch) writer.add_scalar('valid/mae', valid_mae, epoch) if config.split_mode is not None: left_dict = evaluate(model, valid_loaders['left'], config, 'left_valid') left_valid_mae = evaluator.eval(left_dict)["mae"] writer.add_scalar('valid/left', left_valid_mae, epoch) if valid_mae < topk_best[topk_num - 1][1]: best_valid = valid_mae output_dir = os.path.join(config.output_dir, "%03d" % epoch) make_dir(output_dir) save_pred_result(output_dir, 'crossvalid', valid_dict) if config.split_mode is not None: save_pred_result(output_dir, 'leftvalid', left_dict) # if valid is best, save test result test_dict = evaluate(model, test_loader, config, mode="test") save_pred_result(output_dir, 'test', test_dict) save_dir = os.path.join(config.save_dir, "%03d" % epoch) make_dir(save_dir) ckpt_file = os.path.join(save_dir, "checkpoint.pdparams") log.info("saving model checkpoints in %s" % ckpt_file) paddle.save(model.state_dict(), ckpt_file) # optim_file = os.path.join(config.save_dir, "optimizer.pdparams") # log.info("saving optimizer checkpoints in %s" % optim_file) # paddle.save(optimizer.state_dict(), optim_file) # calculate top n for i in range(topk_num): if valid_mae < topk_best[i][1]: topk_best.insert(i, [epoch, valid_mae]) k_idx = i break to_rm = topk_best[-1] tmp_output_dir = os.path.join(config.output_dir, "%03d" % to_rm[0]) tmp_save_dir = os.path.join(config.save_dir, "%03d" % to_rm[0]) try: shutil.rmtree(tmp_output_dir) shutil.rmtree(tmp_save_dir) except OSError: pass topk_best = topk_best[:-1] with open(os.path.join(config.output_dir, "ckpt_info"), 'w') as f: for item in topk_best: f.write("%s\n" % item) v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr() info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \ % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1]) log.info(info) writer.add_scalar('valid/best', topk_best[0][1], epoch) if isinstance(scheduler, float): pass elif config.lr_mode == "Reduce": if dist.get_rank() == 0: valid_mae = paddle.to_tensor(valid_mae, dtype="float32") else: valid_mae = paddle.to_tensor(0.0, dtype="float32") paddle.distributed.broadcast(valid_mae, 0) scheduler.step(valid_mae) else: scheduler.step()
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on pcqm4m with Pytorch Geometrics') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help= 'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)') parser.add_argument( '--graph_pooling', type=str, default='sum', help='graph pooling strategy mean or sum (default: sum)') parser.add_argument('--drop_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--num_layers', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=600, help='dimensionality of hidden units in GNNs (default: 600)') parser.add_argument('--train_subset', action='store_true') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=0, help='number of workers (default: 0)') parser.add_argument('--log_dir', type=str, default="", help='tensorboard log directory') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) np.random.seed(42) torch.manual_seed(42) torch.cuda.manual_seed(42) random.seed(42) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") ### automatic dataloading and splitting dataset = PygPCQM4MDataset(root='dataset/') split_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() if args.train_subset: subset_ratio = 0.1 subset_idx = torch.randperm(len( split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] train_loader = DataLoader(dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) else: train_loader = DataLoader(dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if args.save_test_dir != '': test_loader = DataLoader(dataset[split_idx["test-dev"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) if args.checkpoint_dir != '': os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } if args.gnn == 'gin': model = GNN(gnn_type='gin', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', virtual_node=True, **shared_params).to(device) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', virtual_node=False, **shared_params).to(device) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', virtual_node=True, **shared_params).to(device) else: raise ValueError('Invalid GNN type') num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir != '': writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print('Training...') train_mae = train(model, device, train_loader, optimizer) print('Evaluating...') valid_mae = eval(model, device, valid_loader, evaluator) print({'Train': train_mae, 'Validation': valid_mae}) if args.log_dir != '': writer.add_scalar('valid/mae', valid_mae, epoch) writer.add_scalar('train/mae', train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir != '': print('Saving checkpoint...') checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, 'num_params': num_params } torch.save(checkpoint, os.path.join(args.checkpoint_dir, 'checkpoint.pt')) if args.save_test_dir != '': print('Predicting on test data...') y_pred = test(model, device, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir, mode='test-dev') scheduler.step() print(f'Best validation MAE so far: {best_valid_mae}') if args.log_dir != '': writer.close()
def main_mlp(): # Training settings parser = argparse.ArgumentParser( description="GNN baselines on ogbgmol* data with Pytorch Geometrics") parser.add_argument("--device", type=int, default=0, help="which gpu to use if any (default: 0)") parser.add_argument( "--num_mlp_layers", type=int, default=6, help="number of mlp layers (default: 6)", ) parser.add_argument("--drop_ratio", type=float, default=0.2, help="dropout ratio (default: 0.2)") parser.add_argument( "--batch_size", type=int, default=256, help="input batch size for training (default: 256)", ) parser.add_argument( "--emb_dim", type=int, default=1600, help="embedding dimensionality (default: 1600)", ) parser.add_argument("--train_subset", action="store_true") parser.add_argument( "--epochs", type=int, default=100, help="number of epochs to train (default: 100)", ) parser.add_argument("--num_workers", type=int, default=0, help="number of workers (default: 0)") parser.add_argument("--radius", type=int, default=2, help="radius (default: 2)") parser.add_argument("--log_dir", type=str, default="", help="tensorboard log directory") parser.add_argument("--checkpoint_dir", type=str, default="", help="directory to save checkpoint") parser.add_argument( "--save_test_dir", type=str, default="", help="directory to save test submission file", ) args = parser.parse_args() print(args) np.random.seed(42) torch.manual_seed(42) torch.cuda.manual_seed(42) random.seed(42) device = (torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")) dataset = PCQM4MDataset(root="dataset/", only_smiles=True) fp_processed_file = preprocess_fp(dataset, args.radius) data_dict = torch.load(fp_processed_file) X, Y = data_dict["X"], data_dict["Y"] split_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() if args.train_subset: print("train subset") subset_ratio = 0.1 subset_idx = torch.randperm(len( split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] train_dataset = TensorDataset(X[split_idx["train"][subset_idx]], Y[split_idx["train"][subset_idx]]) else: train_dataset = TensorDataset(X[split_idx["train"]], Y[split_idx["train"]]) valid_dataset = TensorDataset(X[split_idx["valid"]], Y[split_idx["valid"]]) test_dataset = TensorDataset(X[split_idx["test"]], Y[split_idx["test"]]) train_loader = DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) valid_loader = DataLoader( valid_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if args.save_test_dir is not "": test_loader = DataLoader( test_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if args.checkpoint_dir is not "": os.makedirs(args.checkpoint_dir, exist_ok=True) model = MLP( num_mlp_layers=args.num_mlp_layers, emb_dim=args.emb_dim, drop_ratio=args.drop_ratio, ).to(device) num_params = sum(p.numel() for p in model.parameters()) print(f"#Params: {num_params}") optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir is not "": writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print("Training...") train_mae = train(model, device, train_loader, optimizer) print("Evaluating...") valid_mae = eval(model, device, valid_loader, evaluator) print({"Train": train_mae, "Validation": valid_mae}) if args.log_dir is not "": writer.add_scalar("valid/mae", valid_mae, epoch) writer.add_scalar("train/mae", train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir is not "": print("Saving checkpoint...") checkpoint = { "epoch": epoch, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "scheduler_state_dict": scheduler.state_dict(), "best_val_mae": best_valid_mae, "num_params": num_params, } torch.save(checkpoint, osp.join(args.checkpoint_dir, "checkpoint.pt")) if args.save_test_dir is not "": print("Predicting on test data...") y_pred = test(model, device, test_loader) print("Saving test submission file...") evaluator.save_test_submission({"y_pred": y_pred}, args.save_test_dir) scheduler.step() print(f"Best validation MAE so far: {best_valid_mae}") if args.log_dir is not "": writer.close()
def pretrain_train_and_eval(model, config, train_loader, valid_loaders, test_loader, optimizer, scheduler): evaluator = PCQM4MEvaluator() if dist.get_rank() == 0: writer = SummaryWriter(config.log_dir) best_valid = 1000 global_step = 0 header = "%s\n" % config.task_name msg_list = [] epoch_step = len(train_loader) topk_best = [] topk_num = 8 for i in range(topk_num): msg_list.append("") topk_best.append([0, 1000]) # Pretrain for epoch in range(1, config.pretrain_epoch + 1): model.train() if dist.get_rank() == 0: bn_summary(writer, model, epoch) loss_accum = 0 loss_dict = {} for step, (batch_dict, labels, others) in enumerate(train_loader): feed_dict = data2tensor(batch_dict) labels = paddle.to_tensor(labels) pretrain_losses = model(feed_dict, return_graph=False) total_loss = 0 for name in pretrain_losses: if name not in config.pretrain_tasks: continue if not name in loss_dict: loss_dict[name] = [] c_loss = pretrain_losses[name] loss_dict[name].append(c_loss.numpy()) total_loss += c_loss total_loss.backward() optimizer.step() optimizer.clear_grad() loss_accum += total_loss.numpy() if step % 100 == 0: log.info("Epoch: %s | Step: %s/%s Pretrain loss: %.6f" \ % (epoch, step+1,epoch_step, loss_accum / (step+1)) ) for name in loss_dict: print('pretrain loss', epoch, name, np.mean(loss_dict[name])) # Train for epoch in range(1, config.epochs + 1): model.train() alphalist = [config.aux_alpha] * 10 + [config.aux_alpha / 2 ] * 10 + [0] * 200 if dist.get_rank() == 0: bn_summary(writer, model, epoch) loss_accum = 0 for step, (batch_dict, labels, others) in enumerate(train_loader): feed_dict = data2tensor(batch_dict) labels = paddle.to_tensor(labels) out, pretrain_losses = model(feed_dict, return_graph=True) pred = paddle.reshape(out, shape=[ -1, ]) homo_loss = reg_criterion(pred, labels) alpha = alphalist[epoch - 1] pretrain_loss = alpha * reduce(lambda x, y: x + y, pretrain_losses.values()) loss = homo_loss + pretrain_loss loss.backward() optimizer.step() optimizer.clear_grad() loss_accum += loss.numpy() if global_step % config.log_step == 0: log.info("Epoch: %s | Step: %s/%s | Train loss: %.6f" \ % (epoch, step, epoch_step, loss_accum / (step+1)) ) global_step += 1 train_mae = loss_accum / (step + 1) print("out the training") if dist.get_rank() == 0 and config.to_valid_step < epoch: valid_dict = evaluate(model, valid_loaders['valid'], config) valid_mae = evaluator.eval(valid_dict)["mae"] writer.add_scalar('train/mae', train_mae, epoch) writer.add_scalar('valid/mae', valid_mae, epoch) if config.split_mode is not None: left_dict = evaluate(model, valid_loaders['left'], config, 'left_valid') left_valid_mae = evaluator.eval(left_dict)["mae"] writer.add_scalar('valid/left', left_valid_mae, epoch) # valid_mae = (4.5 * valid_mae + left_valid_mae) / 5.5 if valid_mae < topk_best[topk_num - 1][1]: best_valid = valid_mae output_dir = os.path.join(config.output_dir, "%03d" % epoch) make_dir(output_dir) save_pred_result(output_dir, 'valid', valid_dict) save_pred_result(output_dir, 'left_valid', left_dict) # if valid is best, save test result test_dict = evaluate(model, test_loader, config, mode="test") save_pred_result(output_dir, 'test', test_dict) save_dir = os.path.join(config.save_dir, "%03d" % epoch) make_dir(save_dir) ckpt_file = os.path.join(save_dir, "checkpoint.pdparams") log.info("saving model checkpoints in %s" % ckpt_file) paddle.save(model.state_dict(), ckpt_file) # optim_file = os.path.join(config.save_dir, "optimizer.pdparams") # log.info("saving optimizer checkpoints in %s" % optim_file) # paddle.save(optimizer.state_dict(), optim_file) # calculate top n for i in range(topk_num): if valid_mae < topk_best[i][1]: topk_best.insert(i, [epoch, valid_mae]) k_idx = i break to_rm = topk_best[-1] tmp_output_dir = os.path.join(config.output_dir, "%03d" % to_rm[0]) tmp_save_dir = os.path.join(config.save_dir, "%03d" % to_rm[0]) try: shutil.rmtree(tmp_output_dir) shutil.rmtree(tmp_save_dir) except OSError: pass topk_best = topk_best[:-1] with open(os.path.join(config.output_dir, "ckpt_info"), 'w') as f: for item in topk_best: f.write("%s\n" % item) if not config.debug: v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr( ) info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \ % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1]) msg_list.insert(k_idx, info) msg_list = msg_list[:-1] to_robot_msg = header + "\n".join(msg_list) os.system("echo '%s' | sh to_robot.sh >/dev/null 2>&1 " % to_robot_msg) v_lr = 0.0 if config.lr_mode == "Reduce" else scheduler.get_lr() info = "Epoch: %s | lr: %s | Train: %.6f | Valid: %.6f | Best Valid: %.6f" \ % (epoch, v_lr, train_mae, valid_mae, topk_best[0][1]) log.info(info) writer.add_scalar('valid/best', topk_best[0][1], epoch) if isinstance(scheduler, float): pass elif config.lr_mode == "Reduce": if dist.get_rank() == 0: valid_mae = paddle.to_tensor(valid_mae, dtype="float32") else: valid_mae = paddle.to_tensor(0.0, dtype="float32") paddle.distributed.broadcast(valid_mae, 0) scheduler.step(valid_mae) else: scheduler.step()
def main(args): prepartion(args) nn_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } # automatic dataloading and splitting dataset = MyPCQM4MDataset(root=args.dataset_root) split_idx = dataset.get_idx_split() train_data = dataset[split_idx['train']] valid_data = dataset[split_idx['valid']] test_data = dataset[split_idx['test']] train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) valid_loader = DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) test_loader = DataLoader(test_data, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) # automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() criterion_fn = torch.nn.MSELoss() device = args.device model = GINGraphPooling(**nn_params).to(device) num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}', file=args.output_file, flush=True) print(model, file=args.output_file, flush=True) optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=30, gamma=0.25) writer = SummaryWriter(log_dir=args.save_dir) not_improved = 0 best_valid_mae = 9999 for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch), file=args.output_file, flush=True) print('Training...', file=args.output_file, flush=True) train_mae = train(model, device, train_loader, optimizer, criterion_fn) print('Evaluating...', file=args.output_file, flush=True) valid_mae = eval(model, device, valid_loader, evaluator) print({ 'Train': train_mae, 'Validation': valid_mae }, file=args.output_file, flush=True) writer.add_scalar('valid/mae', valid_mae, epoch) writer.add_scalar('train/mae', train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.save_test: print('Saving checkpoint...', file=args.output_file, flush=True) checkpoint = { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'scheduler_state_dict': scheduler.state_dict(), 'best_val_mae': best_valid_mae, 'num_params': num_params } torch.save(checkpoint, os.path.join(args.save_dir, 'checkpoint.pt')) print('Predicting on test data...', file=args.output_file, flush=True) y_pred = test(model, device, test_loader) print('Saving test submission file...', file=args.output_file, flush=True) evaluator.save_test_submission({'y_pred': y_pred}, args.save_dir) not_improved = 0 else: not_improved += 1 if not_improved == args.early_stop: print(f"Have not improved for {not_improved} epoches.", file=args.output_file, flush=True) break scheduler.step() print(f'Best validation MAE so far: {best_valid_mae}', file=args.output_file, flush=True) writer.close() args.output_file.close()
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on pcqm4m with PGL') parser.add_argument('--use_cuda', action='store_true') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help= 'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)') parser.add_argument( '--graph_pooling', type=str, default='sum', help='graph pooling strategy mean or sum (default: sum)') parser.add_argument('--drop_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--num_layers', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=600, help='dimensionality of hidden units in GNNs (default: 600)') parser.add_argument('--train_subset', action='store_true') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=1, help='number of workers (default: 1)') parser.add_argument('--log_dir', type=str, default="", help='tensorboard log directory') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) random.seed(42) np.random.seed(42) paddle.seed(42) if not args.use_cuda: paddle.set_device("cpu") ### automatic dataloading and splitting class Config(): def __init__(self): self.base_data_path = "./dataset" config = Config() ds = MolDataset(config) split_idx = ds.get_idx_split() test_ds = Subset(ds, split_idx['test']) print("Test exapmles: ", len(test_ds)) ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() test_loader = Dataloader(test_ds, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=CollateFn()) shared_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } if args.gnn == 'gin': model = GNN(gnn_type='gin', virtual_node=False, **shared_params) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', virtual_node=True, **shared_params) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', virtual_node=False, **shared_params) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', virtual_node=True, **shared_params) else: raise ValueError('Invalid GNN type') num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') checkpoint_path = os.path.join(args.checkpoint_dir, 'checkpoint.pdparams') if not os.path.exists(checkpoint_path): raise RuntimeError(f'Checkpoint file not found at {checkpoint_path}') model.set_state_dict(paddle.load(checkpoint_path)) print('Predicting on test data...') y_pred = test(model, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir)
def main(): # Training settings parser = argparse.ArgumentParser( description="GNN baselines on pcqm4m with Pytorch Geometrics") parser.add_argument("--device", type=int, default=0, help="which gpu to use if any (default: 0)") parser.add_argument( "--gnn", type=str, default="gin-virtual", help= "GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)", ) parser.add_argument( "--graph_pooling", type=str, default="sum", help="graph pooling strategy mean or sum (default: sum)", ) parser.add_argument("--drop_ratio", type=float, default=0, help="dropout ratio (default: 0)") parser.add_argument( "--num_layers", type=int, default=5, help="number of GNN message passing layers (default: 5)", ) parser.add_argument( "--emb_dim", type=int, default=600, help="dimensionality of hidden units in GNNs (default: 600)", ) parser.add_argument("--train_subset", action="store_true") parser.add_argument( "--batch_size", type=int, default=256, help="input batch size for training (default: 256)", ) parser.add_argument( "--epochs", type=int, default=100, help="number of epochs to train (default: 100)", ) parser.add_argument("--num_workers", type=int, default=0, help="number of workers (default: 0)") parser.add_argument("--log_dir", type=str, default="", help="tensorboard log directory") parser.add_argument("--checkpoint_dir", type=str, default="", help="directory to save checkpoint") parser.add_argument( "--save_test_dir", type=str, default="", help="directory to save test submission file", ) args = parser.parse_args() print(args) np.random.seed(42) torch.manual_seed(42) torch.cuda.manual_seed(42) random.seed(42) device = (torch.device("cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu")) ### automatic dataloading and splitting dataset = PygPCQM4MDataset(root="dataset/") split_idx = dataset.get_idx_split() ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() if args.train_subset: subset_ratio = 0.1 subset_idx = torch.randperm(len( split_idx["train"]))[:int(subset_ratio * len(split_idx["train"]))] train_loader = DataLoader( dataset[split_idx["train"][subset_idx]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) else: train_loader = DataLoader( dataset[split_idx["train"]], batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, ) valid_loader = DataLoader( dataset[split_idx["valid"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if args.save_test_dir is not "": test_loader = DataLoader( dataset[split_idx["test"]], batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, ) if args.checkpoint_dir is not "": os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { "num_layers": args.num_layers, "emb_dim": args.emb_dim, "drop_ratio": args.drop_ratio, "graph_pooling": args.graph_pooling, } if args.gnn == "gin": model = GNN(gnn_type="gin", virtual_node=False, **shared_params).to(device) elif args.gnn == "gin-virtual": model = GNN(gnn_type="gin", virtual_node=True, **shared_params).to(device) elif args.gnn == "gcn": model = GNN(gnn_type="gcn", virtual_node=False, **shared_params).to(device) elif args.gnn == "gcn-virtual": model = GNN(gnn_type="gcn", virtual_node=True, **shared_params).to(device) else: raise ValueError("Invalid GNN type") num_params = sum(p.numel() for p in model.parameters()) print(f"#Params: {num_params}") optimizer = optim.Adam(model.parameters(), lr=0.001) if args.log_dir is not "": writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 if args.train_subset: scheduler = StepLR(optimizer, step_size=300, gamma=0.25) args.epochs = 1000 else: scheduler = StepLR(optimizer, step_size=30, gamma=0.25) for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print("Training...") train_mae = train(model, device, train_loader, optimizer) print("Evaluating...") valid_mae = eval(model, device, valid_loader, evaluator) print({"Train": train_mae, "Validation": valid_mae}) if args.log_dir is not "": writer.add_scalar("valid/mae", valid_mae, epoch) writer.add_scalar("train/mae", train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir is not "": print("Saving checkpoint...") checkpoint = { "epoch": epoch, "model_state_dict": model.state_dict(), "optimizer_state_dict": optimizer.state_dict(), "scheduler_state_dict": scheduler.state_dict(), "best_val_mae": best_valid_mae, "num_params": num_params, } torch.save(checkpoint, os.path.join(args.checkpoint_dir, "checkpoint.pt")) if args.save_test_dir is not "": print("Predicting on test data...") y_pred = test(model, device, test_loader) print("Saving test submission file...") evaluator.save_test_submission({"y_pred": y_pred}, args.save_test_dir) scheduler.step() print(f"Best validation MAE so far: {best_valid_mae}") if args.log_dir is not "": writer.close()
def main(): # Training settings parser = argparse.ArgumentParser( description='GNN baselines on pcqm4m with PGL') parser.add_argument('--use_cuda', action='store_true') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument( '--gnn', type=str, default='gin-virtual', help= 'GNN gin, gin-virtual, or gcn, or gcn-virtual (default: gin-virtual)') parser.add_argument( '--graph_pooling', type=str, default='sum', help='graph pooling strategy mean or sum (default: sum)') parser.add_argument('--drop_ratio', type=float, default=0, help='dropout ratio (default: 0)') parser.add_argument( '--num_layers', type=int, default=5, help='number of GNN message passing layers (default: 5)') parser.add_argument( '--emb_dim', type=int, default=600, help='dimensionality of hidden units in GNNs (default: 600)') parser.add_argument('--train_subset', action='store_true') parser.add_argument('--batch_size', type=int, default=256, help='input batch size for training (default: 256)') parser.add_argument('--epochs', type=int, default=100, help='number of epochs to train (default: 100)') parser.add_argument('--num_workers', type=int, default=1, help='number of workers (default: 1)') parser.add_argument('--log_dir', type=str, default="", help='tensorboard log directory') parser.add_argument('--checkpoint_dir', type=str, default='', help='directory to save checkpoint') parser.add_argument('--save_test_dir', type=str, default='', help='directory to save test submission file') args = parser.parse_args() print(args) random.seed(42) np.random.seed(42) paddle.seed(42) if not args.use_cuda: paddle.set_device("cpu") ### automatic dataloading and splitting class Config(): def __init__(self): self.base_data_path = "./dataset" config = Config() ds = MolDataset(config) split_idx = ds.get_idx_split() train_ds = Subset(ds, split_idx['train']) valid_ds = Subset(ds, split_idx['valid']) test_ds = Subset(ds, split_idx['test']) print("Train exapmles: ", len(train_ds)) print("Valid exapmles: ", len(valid_ds)) print("Test exapmles: ", len(test_ds)) ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() train_loader = Dataloader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=CollateFn()) valid_loader = Dataloader(valid_ds, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=CollateFn()) if args.save_test_dir is not '': test_loader = Dataloader(test_ds, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=CollateFn()) if args.checkpoint_dir is not '': os.makedirs(args.checkpoint_dir, exist_ok=True) shared_params = { 'num_layers': args.num_layers, 'emb_dim': args.emb_dim, 'drop_ratio': args.drop_ratio, 'graph_pooling': args.graph_pooling } if args.gnn == 'gin': model = GNN(gnn_type='gin', virtual_node=False, **shared_params) elif args.gnn == 'gin-virtual': model = GNN(gnn_type='gin', virtual_node=True, **shared_params) elif args.gnn == 'gcn': model = GNN(gnn_type='gcn', virtual_node=False, **shared_params) elif args.gnn == 'gcn-virtual': model = GNN(gnn_type='gcn', virtual_node=True, **shared_params) else: raise ValueError('Invalid GNN type') num_params = sum(p.numel() for p in model.parameters()) print(f'#Params: {num_params}') if args.log_dir is not '': writer = SummaryWriter(log_dir=args.log_dir) best_valid_mae = 1000 scheduler = paddle.optimizer.lr.StepDecay(learning_rate=0.001, step_size=300, gamma=0.25) optimizer = paddle.optimizer.Adam(learning_rate=scheduler, parameters=model.parameters()) msg = "ogbg_lsc_paddle_baseline\n" for epoch in range(1, args.epochs + 1): print("=====Epoch {}".format(epoch)) print('Training...') train_mae = train(model, train_loader, optimizer) print('Evaluating...') valid_mae = eval(model, valid_loader, evaluator) print({'Train': train_mae, 'Validation': valid_mae}) if args.log_dir is not '': writer.add_scalar('valid/mae', valid_mae, epoch) writer.add_scalar('train/mae', train_mae, epoch) if valid_mae < best_valid_mae: best_valid_mae = valid_mae if args.checkpoint_dir is not '': print('Saving checkpoint...') paddle.save( model.state_dict(), os.path.join(args.checkpoint_dir, 'checkpoint.pdparams')) if args.save_test_dir is not '': print('Predicting on test data...') y_pred = test(model, test_loader) print('Saving test submission file...') evaluator.save_test_submission({'y_pred': y_pred}, args.save_test_dir) scheduler.step() print(f'Best validation MAE so far: {best_valid_mae}') try: msg +="Epoch: %d | Train: %.6f | Valid: %.6f | Best Valid: %.6f\n" \ % (epoch, train_mae, valid_mae, best_valid_mae) print(msg) except: continue if args.log_dir is not '': writer.close()
def infer(config): model = getattr(M, config.model_type)(config) log.info("infer model from %s" % config.infer_from) model.set_state_dict(paddle.load(config.infer_from)) log.info("loading data") ds = getattr(DS, config.dataset_type)(config) split_idx = ds.get_idx_split() train_ds = DS.Subset(ds, split_idx['train'], mode='train') valid_ds = DS.Subset(ds, split_idx['valid'], mode='valid') test_ds = DS.Subset(ds, split_idx['test'], mode='test') log.info("Train exapmles: %s" % len(train_ds)) log.info("Valid exapmles: %s" % len(valid_ds)) log.info("Test exapmles: %s" % len(test_ds)) train_loader = Dataloader(train_ds, batch_size=config.batch_size, shuffle=False, num_workers=config.num_workers, collate_fn=DS.CollateFn(config), drop_last=True) valid_loader = Dataloader(valid_ds, batch_size=config.valid_batch_size, shuffle=False, num_workers=1, collate_fn=DS.CollateFn(config)) test_loader = Dataloader(test_ds, batch_size=config.valid_batch_size, shuffle=False, num_workers=1, collate_fn=DS.CollateFn(config)) try: task_name = config.infer_from.split("/")[-2] except: task_name = "ogb_kdd" log.info("task_name: %s" % task_name) ### automatic evaluator. takes dataset name as input evaluator = PCQM4MEvaluator() # ---------------- valid ----------------------- # # log.info("validating ...") # pred_dict = evaluate(model, valid_loader) # # log.info("valid MAE: %s" % evaluator.eval(pred_dict)["mae"]) # valid_output_path = os.path.join(config.output_dir, task_name) # make_dir(valid_output_path) # valid_output_file = os.path.join(valid_output_path, "valid_mae.txt") # # log.info("saving valid result to %s" % valid_output_file) # with open(valid_output_file, 'w') as f: # for y_pred, idx in zip(pred_dict['y_pred'], split_idx['valid']): # smiles, label = ds.raw_dataset[idx] # f.write("%s\t%s\t%s\n" % (y_pred, label, smiles)) # # ---------------- test ----------------------- # log.info("testing ...") pred_dict = evaluate(model, test_loader) test_output_path = os.path.join(config.output_dir, task_name) make_dir(test_output_path) test_output_file = os.path.join(test_output_path, "test_mae.txt") log.info("saving test result to %s" % test_output_file) with open(test_output_file, 'w') as f: for y_pred, idx in zip(pred_dict['y_pred'], split_idx['test']): smiles, label = ds.raw_dataset[idx] f.write("%s\t%s\n" % (y_pred, smiles)) log.info("saving submition format to %s" % test_output_path) evaluator.save_test_submission({'y_pred': pred_dict['y_pred']}, test_output_path)