def createDataloader(self, traindb='Data/dataset_10_12_train_combined.db', benchdb='Data/dataset_10_12_test.db', traindata='../../Data/combined1618/', benchdata='../../Data/test/', indexpath='../../Data/INDEX_refined_data.2016.2018', properties=['KD'], threshold=10, cutoff=8, numVal=150, featureset=False, trainBatchsize=8, valBatchsize=1, benchBatchsize=1, natoms=None, props=False, ntrain=4444, ntest=290, splitfile=None, noProtons=False): f = open("log.txt", "a") f.writelines(str(datetime.datetime.now()) + ' call of createLoader' + '\n') f.close() train = schnetpack.data.AtomsData(traindb, available_properties=properties, environment_provider=schnetpack.environment.TorchEnvironmentProvider(cutoff, torch.device( 'cpu'))) if featureset: if len(train) == 0: PreprocessingSchnet.createDatabaseFromFeatureset(train, threshold=threshold, featureFile=traindata, length=ntrain, noProtons=noProtons) else: if (len(train) == 0): PreprocessingSchnet.createDatabase(train, threshold=threshold, data_path=traindata, index_path=indexpath) bench = schnetpack.data.AtomsData(benchdb, available_properties=properties, environment_provider=schnetpack.environment.TorchEnvironmentProvider(cutoff, torch.device( 'cpu'))) if featureset: if len(bench) == 0: PreprocessingSchnet.createDatabaseFromFeatureset(bench, threshold=threshold, featureFile=benchdata, length=ntest, noProtons=noProtons) else: if (len(bench) == 0): PreprocessingSchnet.createDatabase(bench, data_path=benchdata, threshold=threshold, index_path=indexpath) train, val, test = schnetpack.train_test_split(data=train, num_val=numVal, num_train=len(train) - numVal, split_file=splitfile, log='log.txt') print(len(train), len(bench), len(val)) # Create Dataloader for Training train_loader = schnetpack.AtomsLoader(train, batch_size=trainBatchsize, shuffle=True, natoms=natoms, props=props) val_loader = schnetpack.AtomsLoader(val, batch_size=valBatchsize, shuffle=False, natoms=natoms, props=props) bench_loader = schnetpack.AtomsLoader(bench, batch_size=benchBatchsize, shuffle=False, natoms=natoms, props=props) return train_loader, val_loader, bench_loader
def get_data(args, properties): split_file = os.path.join(args.model_dir, "split.npz") if not args.split_file else args.split_file dataset = QM9(args.db, load_only=properties) train, val, test = spk.train_test_split( dataset, num_train=args.ntr, num_val=args.nva, split_file=split_file ) assert len(train) == args.ntr assert len(val) == args.nva train_loader = spk.AtomsLoader(train, batch_size=args.bs, shuffle=True, num_workers=args.num_workers) val_loader = spk.AtomsLoader(val, batch_size=args.bs, num_workers=args.num_workers) test_loader = spk.AtomsLoader(test, batch_size=args.bs, num_workers=args.num_workers) return dataset, split_file, train_loader, val_loader, test_loader
def schnet_predict_F(self, indices): m = self.curr_model test = self.dataset.create_subset(indices) ind0 = indices[0] import schnetpack as spk test_loader = spk.AtomsLoader(test, batch_size=100) preds = [] import torch if torch.cuda.is_available(): device = "cuda" else: device = "cpu" for count, batch in enumerate(test_loader): print(f"{count}/{len(test_loader)}", end="\r") batch = {k: v.to(device) for k, v in batch.items()} preds.append(m(batch)["forces"].detach().cpu().numpy()) F = np.concatenate(preds) return F.reshape(len(F), -1)
def predict_E_MD17(self, indices): m = self.MD17_model test = self.dataset.create_subset(indices) import schnetpack as spk test_loader = spk.AtomsLoader(test, batch_size=1000) preds = [] for count, batch in enumerate(test_loader): #TBA currently only supports single batch preds.append(m(batch)['energy'].detach().cpu().numpy()) return np.concatenate(preds)
def test_set_stds(): dataset = QM9('qm9.db') _, _, test = spk.train_test_split( dataset, num_train=109000, num_val=1000, split_file='pst.npz' ) test_loader = spk.AtomsLoader(test, batch_size=len(test)) for item in test_loader: break outs = {tn: item[tn].std() for tn in TARGET_NAMES} return outs
def gnn_pred(cif_file): # device = torch.device("cuda" if args.cuda else "cpu") device = "cpu" sch_model = torch.load(os.path.join("./schnetpack/model", 'best_model'), map_location=torch.device(device)) test_dataset = AtomsData('./cod_predict.db') test_loader = spk.AtomsLoader(test_dataset, batch_size=32) prediction_list = [] for count, batch in enumerate(test_loader): # move batch to GPU, if necessary print('before batch') batch = {k: v.to(device) for k, v in batch.items()} print('after batch') # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) return prediction_list[0]
def schnet_predict_E(self, indices): m = self.curr_model test = self.dataset.create_subset(indices) import schnetpack as spk test_loader = spk.AtomsLoader(test, batch_size=100) preds = [] import torch if torch.cuda.is_available(): device = 'cuda' else: device = 'cpu' for count, batch in enumerate(test_loader): print(f'{count}/{len(test_loader)}', end='\r') batch = {k: v.to(device) for k, v in batch.items()} preds.append(m(batch)['energy'].detach().cpu().numpy()) return np.concatenate(preds)
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) # basic settings model_dir = "qm9_model" os.makedirs(model_dir) properties = [QM9.U0] # data preparation logging.info("get dataset") dataset = QM9("data/qm9.db", properties=[QM9.U0]) train, val, test = spk.train_test_split( dataset, 1000, 100, os.path.join(model_dir, "split.npz") ) train_loader = spk.AtomsLoader(train, batch_size=64) val_loader = spk.AtomsLoader(val, batch_size=64) # statistics atomrefs = dataset.get_atomrefs(properties) means, stddevs = train_loader.get_statistics( properties, per_atom=True, atomrefs=atomrefs ) # model build logging.info("build model") representation = spk.SchNet(n_interactions=6) output_modules = [ spk.Atomwise( property=QM9.U0, mean=means[QM9.U0],
length=4444) bench = schnetpack.data.AtomsData('Data/dataset_10_12_test_features.db', available_properties=['KD', 'props'], environment_provider=schnetpack.environment.TorchEnvironmentProvider(10., torch.device( 'cpu'))) if len(bench) == 0: PreprocessingSchnet.createDatabaseFromFeatureset(bench, featureFile='Data/Schnet/testSchnetKDeep', threshold=12, length=290) train, val, test = schnetpack.train_test_split(data=train, num_val=150, num_train=len(train) - 150) print(len(train), len(bench), len(val)) # Create Dataloader for Training train_loader = schnetpack.AtomsLoader(train, batch_size=8, shuffle=True, natoms=None, props=False) val_loader = schnetpack.AtomsLoader(val, batch_size=1, shuffle=False, natoms=None, props=False) # Call ShiftedSigmoid for Activation -> You can use, but you do not have to act = ShiftedSigmoid() # Create Model and Optimizer -> Please Note, that here a modified Fork from the original schnetpack is used # This allows a Noise on the Positions, also the reducing Featurevektor in interaction-layer is part of the modification model = schnetpack.representation.SchNet(use_noise=False, noise_mean=0.0, noise_std=0.1, chargeEmbedding = True, ownFeatures = False, nFeatures = 8, finalFeature = None, max_z=200, n_atom_basis=20, n_filters=[32, 24, 16, 8, 4], n_gaussians=25, normalize_filter=False, coupled_interactions=False, trainable_gaussians=False, n_interactions=5, cutoff=2.5, cutoff_network=schnetpack.nn.cutoff.CosineCutoff) # Modification of schnetpack allows an activation-function on the output of the output-network d = schnetpack.atomistic.Atomwise(n_in=20, aggregation_mode='avg',
def main(args): #building model and dataset device = torch.device("cuda" if args.cuda else "cpu") environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0) omdb = './omdb' if args.mode == "train": if not os.path.exists(os.path.join(args.model_path)): os.makedirs(args.model_path) spk.utils.spk_utils.set_random_seed(None) if not os.path.exists('omdb'): os.makedirs(omdb) omdData = OrganicMaterialsDatabase( args.datapath, download=False, load_only=[args.property], environment_provider=environment_provider) # split_path = os.path.join(args.model_path, "split.npz") split_path = os.path.join( '/home/s3754715/gnn_molecule/schnetpack/model_2020-06-23-18-44-59', "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print('-----------') print(len(train)) print(len(val)) print(len(test)) print('-------------') train_loader = spk.AtomsLoader(train, batch_size=16, sampler=RandomSampler(train), num_workers=4 #pin_memory=True ) val_loader = spk.AtomsLoader(val, batch_size=16, num_workers=2) test_loader = spk.AtomsLoader(test, batch_size=16, num_workers=2) atomref = omdData.get_atomref(args.property) mean, stddev = get_statistics( args=args, split_path=split_path, train_loader=train_loader, atomref=atomref, divide_by_atoms=get_divide_by_atoms(args), logging=logging) # means, stddevs = train_loader.get_statistics( # args.property, get_divide_by_atoms(args),atomref # ) model_train = model(args, omdData, atomref, mean, stddev) trainer = train_model(args, model_train, train_loader, val_loader) print('started training') trainer.train(device=device, n_epochs=args.n_epochs) print('training finished') sch_model = torch.load(os.path.join(args.model_path, 'best_model')) err = 0 sch_model.eval() for count, batch in enumerate(test_loader): # move batch to GPU, if necessary batch = {k: v.to(device) for k, v in batch.items()} # apply model pred = sch_model(batch) # calculate absolute error tmp = torch.sum( torch.abs(pred[args.property] - batch[args.property])) tmp = tmp.detach().cpu().numpy( ) # detach from graph & convert to numpy err += tmp print(tmp) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") err /= len(test) print('Test MAE', np.round(err, 3), 'eV =', np.round(err / (kcal / mol), 3), 'kcal/mol') #plot results plot_results(args) elif args.mode == "pred": print('predictionsss') sch_model = torch.load(os.path.join(args.model_path, 'best_model'), map_location=torch.device(device)) #reading test data # test_dataset = AtomsData('./cod_predict.db') # test_loader = spk.AtomsLoader(test_dataset, batch_size=32) #reading stored cod list #cod_list = np.load('./cod_id_list_old.npy') omdData = OrganicMaterialsDatabase( args.datapath, download=True, load_only=[args.property], environment_provider=environment_provider) split_path = os.path.join(args.model_path, "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) print(len(test)) test_loader = spk.AtomsLoader( test, batch_size=32, #num_workers=2 ) mean_abs_err = 0 prediction_list = [] actual_value_list = [] print('Started generating predictions') for count, batch in enumerate(test_loader): # move batch to GPU, if necessary print('before batch') batch = {k: v.to(device) for k, v in batch.items()} print('after batch') # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) actual_value_list.extend( batch['band_gap'].detach().cpu().numpy().flatten().tolist()) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") cod_arr = np.genfromtxt( os.path.join( '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1', 'CODids.csv')) cod_list = cod_arr[10000:].tolist() results_df = pd.DataFrame({ 'cod': cod_list, 'prediction': prediction_list, 'actual': actual_value_list }) results_df.to_csv('./predictions.csv')
def run(split_path,dataset_path,n_train=None,n_val=None,n_epochs=1000): storage_dir="Info" if not os.path.exists(storage_dir): os.makedirs(storage_dir) if os.path.exists(os.path.join(storage_dir,"checkpoints")): shutil.rmtree(os.path.join(storage_dir,"checkpoints")) if os.path.exists(os.path.join(storage_dir,"log.csv")): os.remove(os.path.join(storage_dir,"log.csv")) if os.path.exists(os.path.join(storage_dir,"best_model")): os.remove(os.path.join(storage_dir,"best_model")) data=MD17(dataset_path) atoms,properties=data.get_properties(0) train,val,test=spk.train_test_split( data=data, split_file=split_path, ) train_loader = spk.AtomsLoader(train, batch_size=100, shuffle=True) val_loader = spk.AtomsLoader(val, batch_size=100) means, stddevs = train_loader.get_statistics( spk.datasets.MD17.energy, divide_by_atoms=True ) with open("out.txt","w+") as file: file.write("IN MD17_train") print('Mean atomization energy / atom: {:12.4f} [kcal/mol]'.format(means[MD17.energy][0])) print('Std. dev. atomization energy / atom: {:12.4f} [kcal/mol]'.format(stddevs[MD17.energy][0])) n_features=64 schnet = spk.representation.SchNet( n_atom_basis=n_features, n_filters=n_features, n_gaussians=25, n_interactions=6, cutoff=5., cutoff_network=spk.nn.cutoff.CosineCutoff ) energy_model = spk.atomistic.Atomwise( n_in=n_features, property=MD17.energy, mean=means[MD17.energy], stddev=stddevs[MD17.energy], derivative=MD17.forces, negative_dr=True ) model = spk.AtomisticModel(representation=schnet, output_modules=energy_model) # tradeoff rho_tradeoff = 0.1 optimizer=Adam(model.parameters(),lr=1e-3) # loss function def loss(batch, result): # compute the mean squared error on the energies diff_energy = batch[MD17.energy]-result[MD17.energy] err_sq_energy = torch.mean(diff_energy ** 2) # compute the mean squared error on the forces diff_forces = batch[MD17.forces]-result[MD17.forces] err_sq_forces = torch.mean(diff_forces ** 2) # build the combined loss function err_sq = rho_tradeoff*err_sq_energy + (1-rho_tradeoff)*err_sq_forces return err_sq # set up metrics metrics = [ spk.metrics.MeanAbsoluteError(MD17.energy), spk.metrics.MeanAbsoluteError(MD17.forces) ] # construct hooks hooks = [ trn.CSVHook(log_path=storage_dir, metrics=metrics), trn.ReduceLROnPlateauHook( optimizer, patience=150, factor=0.8, min_lr=1e-6, stop_after_min=True ) ] trainer = trn.Trainer( model_path=storage_dir, model=model, hooks=hooks, loss_fn=loss, optimizer=optimizer, train_loader=train_loader, validation_loader=val_loader, ) # check if a GPU is available and use a CPU otherwise if torch.cuda.is_available(): device = "cuda" else: device = "cpu" # determine number of epochs and train trainer.train( device=device, n_epochs=n_epochs ) os.rename(os.path.join(storage_dir,"best_model"),os.path.join(storage_dir,"model_new"))
def schnet_train_default(self, train_indices, model_path, old_model_path, schnet_args): import schnetpack as spk import schnetpack.train as trn import torch n_val = schnet_args.get("n_val", 100) # LOADING train, val, test if type(train_indices) == int: n_train = train_indices # Preparing storage storage = os.path.join(self.temp_dir, f"schnet_{n_train}") if not os.path.exists(storage): os.mkdir(storage) split_path = os.path.join(storage, "split.npz") train, val, test = spk.train_test_split(data=self.dataset, num_train=n_train, num_val=n_val, split_file=split_path) else: n_train = len(train_indices) # Preparing storage storage = os.path.join(self.temp_dir, f"schnet_{n_train}") if not os.path.exists(storage): os.mkdir(storage) split_path = os.path.join(storage, "split.npz") all_ind = np.arange(len(self.dataset)) # train train_ind = train_indices all_ind = np.delete(all_ind, train_ind) # val val_ind_ind = np.random.choice(np.arange(len(all_ind)), n_val, replace=False) val_ind = all_ind[val_ind_ind] all_ind = np.delete(all_ind, val_ind_ind) split_dict = { "train_idx": train_ind, "val_idx": val_ind, "test_idx": all_ind, } np.savez_compressed(split_path, **split_dict) train, val, test = spk.train_test_split(data=self.dataset, split_file=split_path) print_ongoing_process(f"Preparing SchNet training, {len(train)} points", True) data = self.dataset batch_size = schnet_args.get("batch_size", 10) n_features = schnet_args.get("n_features", 64) n_gaussians = schnet_args.get("n_gaussians", 25) n_interactions = schnet_args.get("n_interactions", 6) cutoff = schnet_args.get("cutoff", 5.0) learning_rate = schnet_args.get("learning_rate", 1e-3) rho_tradeoff = schnet_args.get("rho_tradeoff", 0.1) patience = schnet_args.get("patience", 5) n_epochs = schnet_args.get("n_epochs", 100) # PRINTING INFO i = {} i["batch_size"], i["n_features"] = batch_size, n_features i["n_gaussians"], i["n_interactions"] = n_gaussians, n_interactions i["cutoff"], i["learning_rate"] = cutoff, learning_rate i["rho_tradeoff"], i["patience"] = rho_tradeoff, patience i["n_epochs"], i["n_val"] = n_epochs, n_val print_table("Parameters", None, None, i, width=20) print() train_loader = spk.AtomsLoader(train, shuffle=True, batch_size=batch_size) val_loader = spk.AtomsLoader(val, batch_size=batch_size) # STATISTICS + PRINTS means, stddevs = train_loader.get_statistics("energy", divide_by_atoms=True) print_info( "Mean atomization energy / atom: {:12.4f} [kcal/mol]".format( means["energy"][0])) print_info( "Std. dev. atomization energy / atom: {:12.4f} [kcal/mol]".format( stddevs["energy"][0])) # LOADING MODEL print_ongoing_process("Loading representation and model") schnet = spk.representation.SchNet( n_atom_basis=n_features, n_filters=n_features, n_gaussians=n_gaussians, n_interactions=n_interactions, cutoff=cutoff, cutoff_network=spk.nn.cutoff.CosineCutoff, ) energy_model = spk.atomistic.Atomwise( n_in=n_features, property="energy", mean=means["energy"], stddev=stddevs["energy"], derivative="forces", negative_dr=True, ) model = spk.AtomisticModel(representation=schnet, output_modules=energy_model) print_ongoing_process("Loading representation and model", True) # OPTIMIZER AND LOSS print_ongoing_process("Defining loss function and optimizer") from torch.optim import Adam optimizer = Adam(model.parameters(), lr=learning_rate) def loss(batch, result): # compute the mean squared error on the energies diff_energy = batch["energy"] - result["energy"] err_sq_energy = torch.mean(diff_energy**2) # compute the mean squared error on the forces diff_forces = batch["forces"] - result["forces"] err_sq_forces = torch.mean(diff_forces**2) # build the combined loss function err_sq = rho_tradeoff * err_sq_energy + (1 - rho_tradeoff) * err_sq_forces return err_sq print_ongoing_process("Defining loss function and optimizer", True) # METRICS AND HOOKS print_ongoing_process("Setting up metrics and hooks") metrics = [ spk.metrics.MeanAbsoluteError("energy"), spk.metrics.MeanAbsoluteError("forces"), ] hooks = [ trn.CSVHook(log_path=storage, metrics=metrics), trn.ReduceLROnPlateauHook(optimizer, patience=5, factor=0.8, min_lr=1e-6, stop_after_min=True), ] print_ongoing_process("Setting up metrics and hooks", True) print_ongoing_process("Setting up trainer") trainer = trn.Trainer( model_path=storage, model=model, hooks=hooks, loss_fn=loss, optimizer=optimizer, train_loader=train_loader, validation_loader=val_loader, ) print_ongoing_process("Setting up trainer", True) if torch.cuda.is_available(): device = "cuda" print_info(f"Cuda cores found, training on GPU") else: device = "cpu" print_info(f"No cuda cores found, training on CPU") print_ongoing_process(f"Training {n_epochs} ecpochs, out in {storage}") trainer.train(device=device, n_epochs=n_epochs) print_ongoing_process(f"Training {n_epochs} epochs, out in {storage}", True) os.mkdir(model_path) os.rename(os.path.join(storage, "best_model"), os.path.join(model_path, "model")) shutil.copy(split_path, os.path.join(model_path, "split.npz"))
from schnetpack.train.metrics import MeanAbsoluteError from schnetpack.train import build_mse_loss logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO")) # basic settings model_dir = "qm9_model" os.makedirs(model_dir) properties = [QM9.U0] # data preparation logging.info("get dataset") dataset = QM9("data/qm9.db", load_only=[QM9.U0]) train, val, test = spk.train_test_split(dataset, 1000, 100, os.path.join(model_dir, "split.npz")) train_loader = spk.AtomsLoader(train, batch_size=64, shuffle=True) val_loader = spk.AtomsLoader(val, batch_size=64) # statistics atomrefs = dataset.get_atomrefs(properties) means, stddevs = train_loader.get_statistics(properties, get_atomwise_statistics=True, single_atom_ref=atomrefs) # model build logging.info("build model") representation = spk.SchNet(n_interactions=6) output_modules = [ spk.Atomwise( property=QM9.U0, mean=means[QM9.U0],
def main(args): print('predictionsss') device = torch.device("cuda" if args.cuda else "cpu") environment_provider = spk.environment.AseEnvironmentProvider(cutoff=5.0) sch_model = torch.load(os.path.join(args.model_path, 'best_model'), map_location=torch.device(device)) # sch_model.representation.embedding.register_forward_hook(inputExtract) sch_model.output_modules[0].out_net[1].out_net[1].register_forward_hook( outputExtract) # for name, module in sch_model.named_modules(): # print(name) #reading test data # test_dataset = AtomsData('./cod_predict.db') # test_loader = spk.AtomsLoader(test_dataset, batch_size=32) #reading stored cod list #cod_list = np.load('./cod_id_list_old.npy') omdData = OrganicMaterialsDatabase( args.datapath, download=False, load_only=[args.property], environment_provider=environment_provider) split_path = os.path.join(args.model_path, "split.npz") train, val, test = spk.train_test_split(data=omdData, num_train=9000, num_val=1000, split_file=split_path) #constructing chemical formula and COD array for rec_num in range(0, len(omdData)): chem_formula = omdData.get_atoms(idx=rec_num).get_chemical_formula() formula_dict[chem_formula] = rec_num cod_array = np.genfromtxt(os.path.join( '/home/s3754715/gnn_molecule/schnetpack/dataset/OMDB-GAP1_v1.1', 'CODids.csv'), delimiter=',', dtype=(int)) #to fetch the COD using chemical formula from dictionary at random ids to construct data to print in graph for id in [0, 10, 20, 50]: atom_id_input_arr.append(test.get_atoms(idx=id).get_chemical_symbols()) chem_formula = test.get_atoms(idx=id).get_chemical_formula() cod.append(cod_array[formula_dict[chem_formula]]) print(test[id]['_neighbors'].numpy().shape) neighbour_list.append(test[id]['_neighbors'].numpy().tolist()) test_loader = spk.AtomsLoader( test, batch_size=1, #num_workers=2 ) mean_abs_err = 0 prediction_list = [] actual_value_list = [] print('Started generating predictions') #to stop pred after reaching max rec_ct and start constructing graph rec_id = 0 for count, batch in enumerate(test_loader): rec_id += 1 if (rec_id > 51): break # move batch to GPU, if necessary batch = {k: v.to(device) for k, v in batch.items()} # apply model pred = sch_model(batch) prediction_list.extend( pred['band_gap'].detach().cpu().numpy().flatten().tolist()) actual_value_list.extend( batch['band_gap'].detach().cpu().numpy().flatten().tolist()) # log progress percent = '{:3.2f}'.format(count / len(test_loader) * 100) print('Progress:', percent + '%' + ' ' * (5 - len(percent)), end="\r") constGraph()
# %% # import matplotlib.pyplot as plt # from ase.visualize import view # # view(atoms, viewer='x3d') # plt.show() # %% train, val, test = spk.train_test_split( data=cspbbr3_data, num_train=100 - 000, num_val=100 - 000, split_file=os.path.join(forcetut, "split.npz"), ) train_loader = spk.AtomsLoader(train, batch_size=batch_size, shuffle=True) val_loader = spk.AtomsLoader(val, batch_size=batch_size) # print(next(iter(train_loader))) # %% means, stddevs = train_loader.get_statistics( 'energy', # divide_by_atoms=True ) print('Mean atomization energy / atom: {:12.4f} [eV]'.format( means['energy'][0])) print('Std. dev. atomization energy / atom: {:12.4f} [eV]'.format( stddevs['energy'][0])) # %% n_features = 8