def main(args, exp_config, train_set, val_set, test_set): if args['featurizer_type'] != 'pre_train': exp_config['in_node_feats'] = args['node_featurizer'].feat_size() if args['edge_featurizer'] is not None: exp_config['in_edge_feats'] = args['edge_featurizer'].feat_size() exp_config.update({ 'n_tasks': args['n_tasks'], 'model': args['model'] }) train_loader = DataLoader(dataset=train_set, batch_size=exp_config['batch_size'], shuffle=True, collate_fn=collate_molgraphs, num_workers=args['num_workers']) val_loader = DataLoader(dataset=val_set, batch_size=exp_config['batch_size'], collate_fn=collate_molgraphs, num_workers=args['num_workers']) test_loader = DataLoader(dataset=test_set, batch_size=exp_config['batch_size'], collate_fn=collate_molgraphs, num_workers=args['num_workers']) if args['pretrain']: args['num_epochs'] = 0 if args['featurizer_type'] == 'pre_train': model = load_pretrained('{}_{}'.format( args['model'], args['dataset'])).to(args['device']) else: model = load_pretrained('{}_{}_{}'.format( args['model'], args['featurizer_type'], args['dataset'])).to(args['device']) else: model = load_model(exp_config).to(args['device']) loss_criterion = nn.SmoothL1Loss(reduction='none') optimizer = Adam(model.parameters(), lr=exp_config['lr'], weight_decay=exp_config['weight_decay']) stopper = EarlyStopping(patience=exp_config['patience'], filename=args['result_path'] + '/model.pth', metric=args['metric']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print('epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'.format( epoch + 1, args['num_epochs'], args['metric'], val_score, args['metric'], stopper.best_score)) if early_stop: break if not args['pretrain']: stopper.load_checkpoint(model) val_score = run_an_eval_epoch(args, model, val_loader) test_score = run_an_eval_epoch(args, model, test_loader) print('val {} {:.4f}'.format(args['metric'], val_score)) print('test {} {:.4f}'.format(args['metric'], test_score)) with open(args['result_path'] + '/eval.txt', 'w') as f: if not args['pretrain']: f.write('Best val {}: {}\n'.format(args['metric'], stopper.best_score)) f.write('Val {}: {}\n'.format(args['metric'], val_score)) f.write('Test {}: {}\n'.format(args['metric'], test_score))
def main(args, exp_config, train_set, val_set, test_set): # Record settings exp_config.update({ 'model': args['model'], 'in_feats': args['node_featurizer'].feat_size(), 'n_tasks': args['n_tasks'] }) # Set up directory for saving results args = init_trial_path(args) train_loader = DataLoader(dataset=train_set, batch_size=exp_config['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=exp_config['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(dataset=test_set, batch_size=exp_config['batch_size'], collate_fn=collate_molgraphs) model = load_model(exp_config).to(args['device']) loss_criterion = nn.SmoothL1Loss(reduction='none') optimizer = Adam(model.parameters(), lr=exp_config['lr'], weight_decay=exp_config['weight_decay']) stopper = EarlyStopping(patience=exp_config['patience'], filename=args['trial_path'] + '/model.pth') for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric'], val_score, args['metric'], stopper.best_score)) if early_stop: break stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric'], test_score)) with open(args['trial_path'] + '/eval.txt', 'w') as f: f.write('Best val {}: {}\n'.format(args['metric'], stopper.best_score)) f.write('Test {}: {}\n'.format(args['metric'], test_score)) with open(args['trial_path'] + '/configure.json', 'w') as f: json.dump(exp_config, f, indent=2) return args['trial_path'], stopper.best_score
def main(args): args['device'] = torch.device( "cuda: 0") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) dataset = PubChemBioAssayAromaticity( smiles_to_graph=args['smiles_to_graph'], node_featurizer=args.get('node_featurizer', None), edge_featurizer=args.get('edge_featurizer', None)) train_set, val_set, test_set = RandomSplitter.train_val_test_split( dataset, frac_train=args['frac_train'], frac_val=args['frac_val'], frac_test=args['frac_test'], random_state=args['random_seed']) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = load_pretrained(args['exp']) else: model = load_model(args) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(mode=args['mode'], patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args, node_featurizer, edge_featurizer, train_set, val_set, test_set): # Record starting time t0 = time.time() train_mean, train_std = get_label_mean_and_std(train_set) train_mean, train_std = train_mean.to(args['device']), train_std.to( args['device']) args['train_mean'], args['train_std'] = train_mean, train_std train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], shuffle=False, collate_fn=collate) test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], shuffle=False, collate_fn=collate) model = load_model(args, node_featurizer, edge_featurizer).to(args['device']) criterion = nn.SmoothL1Loss(reduction='none') optimizer = Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(patience=args['patience'], filename=args['result_path'] + '/model.pth') for epoch in range(args['num_epochs']): loss, train_r2, train_mae = run_a_train_epoch(args, model, train_loader, criterion, optimizer) print('Epoch {:d}/{:d} | training | averaged loss {:.4f} | ' 'averaged r2 {:.4f} | averaged mae {:.4f}'.format( epoch + 1, args['num_epochs'], float(np.mean(loss)), float(np.mean(train_r2)), float(np.mean(train_mae)))) # Validation and early stop val_r2, val_mae = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(float(np.mean(val_r2)), model) print( 'Epoch {:d}/{:d} | validation | current r2 {:.4f} | best r2 {:.4f} | mae {:.4f}' .format(epoch + 1, args['num_epochs'], float(np.mean(val_r2)), stopper.best_score, float(np.mean(val_mae)))) if early_stop: break print('It took {:.4f}s to complete the task'.format(time.time() - t0)) stopper.load_checkpoint(model) log_model_evaluation(args, model, train_loader, val_loader, test_loader)
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) # Interchangeable with other datasets dataset, train_set, val_set, test_set = load_dataset_for_classification( args) train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs, shuffle=True) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = load_pretrained(args['exp']) else: args['n_tasks'] = dataset.n_tasks model = load_model(args) loss_criterion = BCEWithLogitsLoss(pos_weight=dataset.task_pos_weights( torch.tensor(train_set.indices)).to(args['device']), reduction='none') optimizer = Adam(model.parameters(), lr=args['lr']) stopper = EarlyStopping(patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_criterion, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def main(args): args['device'] = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu") set_random_seed(args['random_seed']) train_set, val_set, test_set = load_dataset_for_regression(args) train_loader = DataLoader(dataset=train_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) val_loader = DataLoader(dataset=val_set, batch_size=args['batch_size'], shuffle=True, collate_fn=collate_molgraphs) if test_set is not None: test_loader = DataLoader(dataset=test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = load_pretrained(args['exp']) else: model = load_model(args) loss_fn = nn.MSELoss(reduction='none') optimizer = torch.optim.Adam(model.parameters(), lr=args['lr'], weight_decay=args['weight_decay']) stopper = EarlyStopping(mode='lower', patience=args['patience']) model.to(args['device']) for epoch in range(args['num_epochs']): # Train run_a_train_epoch(args, epoch, model, train_loader, loss_fn, optimizer) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader) early_stop = stopper.step(val_score, model) print( 'epoch {:d}/{:d}, validation {} {:.4f}, best validation {} {:.4f}'. format(epoch + 1, args['num_epochs'], args['metric_name'], val_score, args['metric_name'], stopper.best_score)) if early_stop: break if test_set is not None: if not args['pre_trained']: stopper.load_checkpoint(model) test_score = run_an_eval_epoch(args, model, test_loader) print('test {} {:.4f}'.format(args['metric_name'], test_score))
def test_early_stopping_high_metric(): for metric in ['r2', 'roc_auc_score']: model1 = nn.Linear(2, 3) stopper = EarlyStopping(mode=None, patience=1, filename='test.pkl', metric=metric) # Save model in the first step stopper.step(1., model1) model1.weight.data = model1.weight.data + 1 model2 = nn.Linear(2, 3) stopper.load_checkpoint(model2) assert not torch.allclose(model1.weight, model2.weight) # Save model checkpoint with performance improvement model1.weight.data = model1.weight.data + 1 stopper.step(2., model1) stopper.load_checkpoint(model2) assert torch.allclose(model1.weight, model2.weight) # Stop when no improvement observed model1.weight.data = model1.weight.data + 1 assert stopper.step(0.5, model1) stopper.load_checkpoint(model2) assert not torch.allclose(model1.weight, model2.weight) remove_file('test.pkl')
def test_early_stopping_low(): model1 = nn.Linear(2, 3) stopper = EarlyStopping(mode='lower', patience=1, filename='test.pkl') # Save model in the first step stopper.step(1., model1) model1.weight.data = model1.weight.data + 1 model2 = nn.Linear(2, 3) stopper.load_checkpoint(model2) assert not torch.allclose(model1.weight, model2.weight) # Save model checkpoint with performance improvement model1.weight.data = model1.weight.data + 1 stopper.step(0.5, model1) stopper.load_checkpoint(model2) assert torch.allclose(model1.weight, model2.weight) # Stop when no improvement observed model1.weight.data = model1.weight.data + 1 assert stopper.step(2, model1) stopper.load_checkpoint(model2) assert not torch.allclose(model1.weight, model2.weight) remove_file('test.pkl')
def main(args): torch.cuda.set_device(args['gpu']) set_random_seed(args['random_seed']) dataset, train_set, val_set, test_set = load_dataset_for_classification( args) # 6264, 783, 784 train_loader = DataLoader(train_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs, shuffle=True) val_loader = DataLoader(val_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) test_loader = DataLoader(test_set, batch_size=args['batch_size'], collate_fn=collate_molgraphs) if args['pre_trained']: args['num_epochs'] = 0 model = load_pretrained(args['exp']) else: args['n_tasks'] = dataset.n_tasks if args['method'] == 'twp': model = load_mymodel(args) print(model) else: model = load_model(args) for name, parameters in model.named_parameters(): print(name, ':', parameters.size()) method = args['method'] life_model = importlib.import_module(f'LifeModel.{method}_model') life_model_ins = life_model.NET(model, args) data_loader = DataLoader(train_set, batch_size=len(train_set), collate_fn=collate_molgraphs, shuffle=True) life_model_ins.data_loader = data_loader loss_criterion = BCEWithLogitsLoss( pos_weight=dataset.task_pos_weights.cuda(), reduction='none') model.cuda() score_mean = [] score_matrix = np.zeros([args['n_tasks'], args['n_tasks']]) prev_model = None for task_i in range(12): print('\n********' + str(task_i)) stopper = EarlyStopping(patience=args['patience']) for epoch in range(args['num_epochs']): # Train if args['method'] == 'lwf': life_model_ins.observe(train_loader, loss_criterion, task_i, args, prev_model) else: life_model_ins.observe(train_loader, loss_criterion, task_i, args) # Validation and early stop val_score = run_an_eval_epoch(args, model, val_loader, task_i) early_stop = stopper.step(val_score, model) if early_stop: print(epoch) break if not args['pre_trained']: stopper.load_checkpoint(model) score_matrix[task_i] = run_eval_epoch(args, model, test_loader) prev_model = copy.deepcopy(life_model_ins).cuda() print('AP: ', round(np.mean(score_matrix[-1, :]), 4)) backward = [] for t in range(args['n_tasks'] - 1): b = score_matrix[args['n_tasks'] - 1][t] - score_matrix[t][t] backward.append(round(b, 4)) mean_backward = round(np.mean(backward), 4) print('AF: ', mean_backward)