def main(args): # setup train_args = setup_run(args) device = torch.device("cuda" if args.cuda else "cpu") # get dataset environment_provider = get_environment_provider(train_args, device=device) dataset = get_dataset(train_args, environment_provider=environment_provider) # get dataloaders split_path = os.path.join(args.modelpath, "split.npz") train_loader, val_loader, test_loader = get_loaders(args, dataset=dataset, split_path=split_path, logging=logging) # define metrics metrics = get_metrics(train_args) # train or evaluate if args.mode == "train": # get statistics atomref = dataset.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging, ) # build model model = get_model(args, train_loader, mean, stddev, atomref, logging=logging) # build trainer logging.info("training...") trainer = get_trainer(args, model, train_loader, val_loader, metrics) # run training trainer.train(device, n_epochs=args.n_epochs) logging.info("...training done!") else: raise ("Use the original SchnetPack script instead.")
def main(args): #building model and dataset device = torch.device("cuda" if args.cuda else "cpu") environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0) omdb = './omdb' if args.mode == "train": if not os.path.exists(os.path.join(args.model_path)): os.makedirs(args.model_path) spk.utils.spk_utils.set_random_seed(None) if not os.path.exists('omdb'): os.makedirs(omdb) omdData = OrganicMaterialsDatabase( args.datapath, download=False, load_only=[args.property], environment_provider=environment_provider) # split_path = os.path.join(args.model_path, "split.npz") split_path = os.path.join( '/home/s3754715/gnn_molecule/schnetpack/model_2020-06-23-18-44-59', "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print('-----------') print(len(train)) print(len(val)) print(len(test)) print('-------------') train_loader = spk.AtomsLoader(train, batch_size=16, sampler=RandomSampler(train), num_workers=4 #pin_memory=True ) val_loader = spk.AtomsLoader(val, batch_size=16, num_workers=2) test_loader = spk.AtomsLoader(test, batch_size=16, num_workers=2) atomref = omdData.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging) # means, stddevs = train_loader.get_statistics( # args.property, get_divide_by_atoms(args),atomref # ) model_train = model(args, omdData, atomref, mean, stddev) trainer = train_model(args, model_train, train_loader, val_loader) print('started training') trainer.train(device=device, n_epochs=args.n_epochs) print('training finished') sch_model = torch.load(os.path.join(args.model_path, 'best_model')) err = 0 sch_model.eval() for count, batch in enumerate(test_loader): # move batch to GPU, if necessary batch = {k: v.to(device) for k, v in batch.items()} # apply model pred = sch_model(batch) # calculate absolute error tmp = torch.sum( torch.abs(pred[args.property] - batch[args.property])) tmp = tmp.detach().cpu().numpy( ) # detach from graph & convert to numpy err += tmp print(tmp) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") err /= len(test) print('Test MAE', np.round(err, 3), 'eV =', np.round(err / (kcal / mol), 3), 'kcal/mol') #plot results plot_results(args) elif args.mode == "pred": print('predictionsss') sch_model = torch.load(os.path.join(args.model_path, 'best_model'), map_location=torch.device(device)) #reading test data # test_dataset = AtomsData('./cod_predict.db') # test_loader = spk.AtomsLoader(test_dataset, batch_size=32) #reading stored cod list #cod_list = np.load('./cod_id_list_old.npy') omdData = OrganicMaterialsDatabase( args.datapath, download=True, load_only=[args.property], environment_provider=environment_provider) split_path = os.path.join(args.model_path, "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print(len(test)) test_loader = spk.AtomsLoader( test, batch_size=32, #num_workers=2 ) mean_abs_err = 0 prediction_list = [] actual_value_list = [] print('Started generating predictions') for count, batch in enumerate(test_loader): # move batch to GPU, if necessary print('before batch') batch = {k: v.to(device) for k, v in batch.items()} print('after batch') # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) actual_value_list.extend( batch['band_gap'].detach().cpu().numpy().flatten().tolist()) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") cod_arr = np.genfromtxt( os.path.join( '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1', 'CODids.csv')) cod_list = cod_arr[10000:].tolist() results_df = pd.DataFrame({ 'cod': cod_list, 'prediction': prediction_list, 'actual': actual_value_list }) results_df.to_csv('./predictions.csv')
def main(args): # setup train_args = setup_run(args) device = torch.device("cuda" if args.cuda else "cpu") # get dataset environment_provider = get_environment_provider(train_args, device=device) dataset = get_dataset(train_args, environment_provider=environment_provider) # get dataloaders split_path = os.path.join(args.modelpath, "split.npz") train_loader, val_loader, test_loader = get_loaders(args, dataset=dataset, split_path=split_path, logging=logging) # define metrics metrics = get_metrics(train_args) # train or evaluate if args.mode == "train": # get statistics atomref = dataset.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging, ) # build model model = get_model(args, train_loader, mean, stddev, atomref, logging=logging) # build trainer logging.info("training...") trainer = get_trainer(args, model, train_loader, val_loader, metrics) # run training trainer.train(device, n_epochs=args.n_epochs) logging.info("...training done!") elif args.mode == "eval": # remove old evaluation files evaluation_fp = os.path.join(args.modelpath, "evaluation.txt") if os.path.exists(evaluation_fp): if args.overwrite: os.remove(evaluation_fp) else: raise ScriptError( "The evaluation file does already exist at {}! Add overwrite flag" " to remove.".format(evaluation_fp)) # load model logging.info("loading trained model...") model = torch.load(os.path.join(args.modelpath, "best_model")) # run evaluation logging.info("evaluating...") if spk.utils.get_derivative(train_args) is None: with torch.no_grad(): evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) else: evaluate( args, model, train_loader, val_loader, test_loader, device, metrics=metrics, ) logging.info("... evaluation done!") else: raise ScriptError("Unknown mode: {}".format(args.mode))