def test_statistics(self, qm9_dataset, split_path, args): # test for statistics not in split file if os.path.exists(split_path): os.remove(split_path) os.makedirs(os.path.dirname(split_path), exist_ok=True) train, val, test = spk.data.train_test_split(qm9_dataset, 10, 5, split_path) train_loader = spk.data.AtomsLoader(train, batch_size=5) mean, stddev = get_statistics( split_path=split_path, train_loader=train_loader, args=args, atomref=None, per_atom=False, ) energies = [] for batch in train_loader: energies.append(batch["energy_U0"]) assert_almost_equal(torch.cat(energies).mean(), mean["energy_U0"], 2) # test for statistics in split file split_file = np.load(split_path) saved_mean = split_file["mean"] mean, stddev = get_statistics( split_path=split_path, train_loader=train_loader, args=args, atomref=None, per_atom=False, ) assert_almost_equal(saved_mean, mean["energy_U0"]) # test assertion on wrong split file with pytest.raises(Exception): get_statistics( split_path="I/do/not/exist.npz", train_loader=train_loader, args=args, atomref=None, per_atom=False, )
def main(args): # setup train_args = setup_run(args) device = torch.device("cuda" if args.cuda else "cpu") # get dataset environment_provider = get_environment_provider(train_args, device=device) dataset = get_dataset(train_args, environment_provider=environment_provider) # get dataloaders split_path = os.path.join(args.modelpath, "split.npz") train_loader, val_loader, test_loader = get_loaders(args, dataset=dataset, split_path=split_path, logging=logging) # define metrics metrics = get_metrics(train_args) # train or evaluate if args.mode == "train": # get statistics atomref = dataset.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging, ) # build model model = get_model(args, train_loader, mean, stddev, atomref, logging=logging) # build trainer logging.info("training...") trainer = get_trainer(args, model, train_loader, val_loader, metrics) # run training trainer.train(device, n_epochs=args.n_epochs) logging.info("...training done!") else: raise ("Use the original SchnetPack script instead.")
def main(args): #building model and dataset device = torch.device("cuda" if args.cuda else "cpu") environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0) omdb = './omdb' if args.mode == "train": if not os.path.exists(os.path.join(args.model_path)): os.makedirs(args.model_path) spk.utils.spk_utils.set_random_seed(None) if not os.path.exists('omdb'): os.makedirs(omdb) omdData = OrganicMaterialsDatabase( args.datapath, download=False, load_only=[args.property], environment_provider=environment_provider) # split_path = os.path.join(args.model_path, "split.npz") split_path = os.path.join( '/home/s3754715/gnn_molecule/schnetpack/model_2020-06-23-18-44-59', "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print('-----------') print(len(train)) print(len(val)) print(len(test)) print('-------------') train_loader = spk.AtomsLoader(train, batch_size=16, sampler=RandomSampler(train), num_workers=4 #pin_memory=True ) val_loader = spk.AtomsLoader(val, batch_size=16, num_workers=2) test_loader = spk.AtomsLoader(test, batch_size=16, num_workers=2) atomref = omdData.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging) # means, stddevs = train_loader.get_statistics( # args.property, get_divide_by_atoms(args),atomref # ) model_train = model(args, omdData, atomref, mean, stddev) trainer = train_model(args, model_train, train_loader, val_loader) print('started training') trainer.train(device=device, n_epochs=args.n_epochs) print('training finished') sch_model = torch.load(os.path.join(args.model_path, 'best_model')) err = 0 sch_model.eval() for count, batch in enumerate(test_loader): # move batch to GPU, if necessary batch = {k: v.to(device) for k, v in batch.items()} # apply model pred = sch_model(batch) # calculate absolute error tmp = torch.sum( torch.abs(pred[args.property] - batch[args.property])) tmp = tmp.detach().cpu().numpy( ) # detach from graph & convert to numpy err += tmp print(tmp) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") err /= len(test) print('Test MAE', np.round(err, 3), 'eV =', np.round(err / (kcal / mol), 3), 'kcal/mol') #plot results plot_results(args) elif args.mode == "pred": print('predictionsss') sch_model = torch.load(os.path.join(args.model_path, 'best_model'), map_location=torch.device(device)) #reading test data # test_dataset = AtomsData('./cod_predict.db') # test_loader = spk.AtomsLoader(test_dataset, batch_size=32) #reading stored cod list #cod_list = np.load('./cod_id_list_old.npy') omdData = OrganicMaterialsDatabase( args.datapath, download=True, load_only=[args.property], environment_provider=environment_provider) split_path = os.path.join(args.model_path, "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print(len(test)) test_loader = spk.AtomsLoader( test, batch_size=32, #num_workers=2 ) mean_abs_err = 0 prediction_list = [] actual_value_list = [] print('Started generating predictions') for count, batch in enumerate(test_loader): # move batch to GPU, if necessary print('before batch') batch = {k: v.to(device) for k, v in batch.items()} print('after batch') # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) actual_value_list.extend( batch['band_gap'].detach().cpu().numpy().flatten().tolist()) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") cod_arr = np.genfromtxt( os.path.join( '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1', 'CODids.csv')) cod_list = cod_arr[10000:].tolist() results_df = pd.DataFrame({ 'cod': cod_list, 'prediction': prediction_list, 'actual': actual_value_list }) results_df.to_csv('./predictions.csv')
def main(args): # setup train_args = setup_run(args) device = torch.device("cuda" if args.cuda else "cpu") # get dataset environment_provider = get_environment_provider(train_args, device=device) dataset = get_dataset(train_args, environment_provider=environment_provider) # get dataloaders split_path = os.path.join(args.modelpath, "split.npz") train_loader, val_loader, test_loader = get_loaders(args, dataset=dataset, split_path=split_path, logging=logging) # define metrics metrics = get_metrics(train_args) # train or evaluate if args.mode == "train": # get statistics atomref = dataset.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging, ) # build model model = get_model(args, train_loader, mean, stddev, atomref, logging=logging) # build trainer logging.info("training...") trainer = get_trainer(args, model, train_loader, val_loader, metrics) # run training trainer.train(device, n_epochs=args.n_epochs) logging.info("...training done!") elif args.mode == "eval": # remove old evaluation files evaluation_fp = os.path.join(args.modelpath, "evaluation.txt") if os.path.exists(evaluation_fp): if args.overwrite: os.remove(evaluation_fp) else: raise ScriptError( "The evaluation file does already exist at {}! Add overwrite flag" " to remove.".format(evaluation_fp)) # load model logging.info("loading trained model...") model = torch.load(os.path.join(args.modelpath, "best_model")) # run evaluation logging.info("evaluating...") if spk.utils.get_derivative(train_args) is None: with torch.no_grad(): evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) else: evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) logging.info("... evaluation done!") else: raise ScriptError("Unknown mode: {}".format(args.mode))
def main(args): # setup # train_args = setup_run(args) logging.info("CUDA is used: " + str(args.cuda)) if args.cuda: logging.info("CUDA is available: " + str(torch.cuda.is_available())) device = torch.device("cuda" if args.cuda else "cpu") # get dataset dataset = get_dataset(args) # get dataloaders split_path = os.path.join(args.modelpath, "split.npz") train_loader, val_loader, test_loader = get_loaders(args, dataset=dataset, split_path=split_path, logging=logging) # define metrics metrics = get_metrics(args) # train or evaluate if args.mode == "train": # get statistics atomref = dataset.get_atomref(args.property) divide_by_atoms = settings.divide_by_atoms[args.property] mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=divide_by_atoms, logging=logging, ) aggregation_mode = settings.pooling_mode[args.property] # build model model = get_model(args, train_loader, mean, stddev, atomref, aggregation_mode, logging=logging) # build trainer logging.info("training...") trainer = get_trainer(args, model, train_loader, val_loader, metrics) # run training trainer.train(device, n_epochs=args.n_epochs) logging.info("...training done!") elif args.mode == "eval": # remove old evaluation files evaluation_fp = os.path.join(args.modelpath, "evaluation.txt") if os.path.exists(evaluation_fp): if args.overwrite: os.remove(evaluation_fp) else: raise ScriptError( "The evaluation file does already exist at {}! Add overwrite flag" " to remove.".format(evaluation_fp)) # load model logging.info("loading trained model...") model = torch.load(os.path.join(args.modelpath, "best_model")) # run evaluation logging.info("evaluating...") if args.dataset != "md17": with torch.no_grad(): evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) else: evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) logging.info("... evaluation done!") else: raise ScriptError("Unknown mode: {}".format(args.mode))