def __init__(self, training_data, model, label, save_logs=True): Calculator.__init__(self) os.makedirs("results/", exist_ok=True) os.makedirs("results/trained_models", exist_ok=True) os.makedirs("results/logs", exist_ok=True) self.model = model self.testlabel = label self.label = "".join(["results/trained_models/", label, ".pt"]) self.scalings = training_data.scalings self.target_ref_per_atom = self.scalings[0] self.delta_ref_per_atom = self.scalings[1] self.delta = training_data.delta self.Gs = training_data.Gs self.fprange = training_data.fprange self.descriptor = training_data.base_descriptor self.cores = training_data.cores self.training_data = training_data if self.delta: self.params = self.training_data.delta_data[3] self.delta_model = self.training_data.delta_data[4] # TODO make utility logging function self.log = Logger("results/logs/{}.txt".format(label)) if not self.delta: self.log(time.asctime()) self.log("-" * 50) self.log("Filename: {}".format(label)) self.log("Dataset size: {}".format(len(self.training_data))) self.log("Target scaling: {}".format(self.scalings)) self.log("Symmetry function parameters:") for i in self.Gs.keys(): self.log(" {}: {}".format(i, self.Gs[i])) self.log("Device: {:s}".format(self.model.device)) self.log("Model: {}".format(model.module)) self.log( "Architecture:\n Input Layer - {}\n # of Hidden Layers - {}\n Nodes/Layer - {}" .format( self.model.module.architecture[0], self.model.module.architecture[1] - 1, self.model.module.architecture[2], )) self.log("Loss Function: {}".format(self.model.criterion)) self.log("Force coefficient: {}".format( self.model.criterion__force_coefficient)) self.log("Optimizer: {}".format(self.model.optimizer)) self.log("Learning Rate: {}".format(self.model.lr)) self.log("Batch Size: {}".format(self.model.batch_size)) self.log("Epochs: {}".format(self.model.max_epochs)) self.log("Shuffle: {}".format(self.model.iterator_train__shuffle)) if self.model.train_split != 0: self.log( "Train Split (k-fold if int, fraction if float): {}\n".format( self.model.train_split.cv)) else: self.log( "Train Split (k-fold if int, fraction if float): {}\n".format( self.model.train_split))
def __init__(self, images, params, cutoff, filename, combo='mean'): if not os.path.exists("results"): os.mkdir("results") if not os.path.exists("results/logs"): os.mkdir("results/logs") self.filename = filename self.data = images self.params = params self.combo = combo self.cutoff = cutoff self.hashed_images = hash_images(images) self.hashed_keys = list(self.hashed_images.keys()) calc = NeighborlistCalculator(cutoff=cutoff) self.neighborlist = Data(filename="amp-data-neighborlists", calculator=calc) self.neighborlist.calculate_items(self.hashed_images) log = Logger("results/logs/{}.txt".format(filename)) self.logresults(log, self.params)
def md_run(images, count, calc, filename, dir, temp, cons_t=False): """Generates test or training data with a simple MD simulation.""" log = Logger("results/logs/" + filename + ".txt") traj = ase.io.Trajectory("".join([dir, filename, ".traj"]), "w") slab = images[0].copy() slab.set_calculator(calc) slab.get_forces() traj.write(slab) if cons_t is True: dyn = Langevin(slab, 1.0 * units.fs, temp * units.kB, 0.002) else: dyn = VelocityVerlet(slab, dt=1.0 * units.fs) time_start = time.time() for step in range(count): dyn.run(20) traj.write(slab) time_elapsed = time.time() - time_start log("MD Simulation Dynamics: %s" % dyn) log("MD Simulation Time: %s \n" % time_elapsed)
def __init__(self, model, training_data=None, label=None): Calculator.__init__(self) if not os.path.exists("results/trained_models"): os.mkdir("results/trained_models") self.save_logs = model.save_logs label = model.label self.log = Logger("results/logs/" + label + ".txt") self.log("Filename: %s" % label) self.model = model self.label = "".join(["results/trained_models/", label, ".pt"]) self.fp_scaling = self.model.training_data.fprange self.target_sd = self.model.scalings[0] self.target_mean = self.model.scalings[1] self.lj = self.model.training_data.lj self.Gs = self.model.training_data.Gs self.training_elements = self.model.training_data.elements self.log("Symmetry function parameters: %s" % self.Gs) if self.lj: self.fitted_params = self.model.lj_data[3] self.params_dict = self.model.lj_data[4] self.lj_model = self.model.lj_data[5]
def __init__( self, datafile, device="cpu", structure=[3, 5], val_frac=0, descriptor=Gaussian, Gs=None, cores=1, force_coefficient=0, criterion=CustomLoss, optimizer=optim.LBFGS, loader_params={ "batch_size": None, "shuffle": False, "num_workers": 0 }, resample=None, scheduler=None, lr=1, criteria={ "energy": 0, "force": 0, "epochs": 1e10, "early_stop": False }, lj_data=None, fine_tune=None, label="amptorch", save_logs=True, ): if not os.path.exists("results/logs/epochs"): os.makedirs("results/logs/epochs") self.save_logs = save_logs self.label = label self.log = Logger("results/logs/" + label + ".txt") self.filename = datafile self.device = device self.structure = structure self.val_frac = val_frac self.loader_params = loader_params self.resample = resample self.descriptor = descriptor self.force_coefficient = force_coefficient self.criterion = criterion self.optimizer = optimizer self.scheduler = scheduler self.lr = lr self.convergence = criteria self.lj_data = lj_data self.fine_tune = fine_tune self.Gs = Gs self.forcetraining = False if force_coefficient > 0: self.forcetraining = True self.training_data = AtomsDataset( self.filename, descriptor=self.descriptor, Gs=Gs, cores=cores, forcetraining=self.forcetraining, lj_data=self.lj_data, label=label, ) self.scalings = self.training_data.scalings self.sd_scaling = self.scalings[0] self.mean_scaling = self.scalings[1] if not lj_data: self.log(time.asctime()) self.log("-" * 50) self.log("LJ Data: %s" % (True if lj_data is not None else None)) self.log("Force Training: %s - %s" % (self.forcetraining, force_coefficient))
def train_model(self): "Training loop" forcetraining = False if self.criterion.alpha > 0: forcetraining = True best_train_force_loss = 1e8 best_train_energy_loss = 1e8 # dummy variables to track each epochs rmse previous_force_rmse = 1e8 previous_energy_rmse = 1e8 log = Logger("results/logs/" + self.label + ".txt") log_epoch = Logger("results/logs/epochs/" + self.label + "-calc.txt") plot_energy_loss = {"train": [], "val": []} if forcetraining: plot_force_loss = {"train": [], "val": []} if isinstance(self.atoms_dataloader, dict): validation = True best_val_force_loss = 1e8 best_val_energy_loss = 1e8 if forcetraining: make_val_force_header(log_epoch) else: make_val_energy_header(log_epoch) else: validation = False if forcetraining: make_force_header(log_epoch) else: make_energy_header(log_epoch) since = time.time() print("Training Initiated!") self.epochs -= 1 early_stop = False epoch = 0 convergence = False while not convergence: if validation: for phase in ["train", "val"]: if phase == "train": self.model.train() else: self.model.eval() energy_mse = 0.0 force_mse = "N/A" if forcetraining: force_mse = 0.0 for data_sample in self.atoms_dataloader[phase]: x = to_tensor(data_sample[0], self.device) y = to_tensor(data_sample[1], self.device) def closure(): self.optimizer.zero_grad() pred = self.model(x) loss = self.criterion(pred, y) loss.backward() return loss mse_loss = nn.MSELoss(reduction="sum") energy_target = y[0] num_of_atoms = y[1] energy_pred, force_pred = self.model(x) raw_preds = (energy_pred * self.sd_scaling) + self.mean_scaling raw_targets = (energy_target * self.sd_scaling) + self.mean_scaling raw_preds_per_atom = torch.div(raw_preds, num_of_atoms) target_per_atom = torch.div(raw_targets, num_of_atoms) energy_loss = mse_loss(raw_preds_per_atom, target_per_atom) energy_mse += torch.tensor(energy_loss.item()) if forcetraining: force_target = y[2] force_pred = force_pred * self.sd_scaling force_target = force_target * self.sd_scaling num_atoms_force = torch.cat( [idx.repeat(int(idx)) for idx in num_of_atoms] ) num_atoms_force = torch.sqrt( num_atoms_force.reshape(len(num_atoms_force), 1) ) force_pred_per_atom = torch.div(force_pred, num_atoms_force) force_targets_per_atom = torch.div( force_target, num_atoms_force ) force_loss = mse_loss( force_pred_per_atom, force_targets_per_atom ) force_loss /= 3 force_mse += torch.tensor(force_loss.item()) if phase == "train": loss = self.optimizer.step(closure) if self.scheduler: self.scheduler.step() now = time.asctime() energy_mse /= self.dataset_size[phase] energy_rmse = torch.sqrt(energy_mse) if torch.isnan(energy_rmse): early_stop = True plot_energy_loss[phase].append(energy_rmse) print("%s energy loss: %f" % (phase, energy_rmse)) if forcetraining: force_mse /= self.dataset_size[phase] force_rmse = torch.sqrt(force_mse) if torch.isnan(force_rmse): early_stop = True plot_force_loss[phase].append(force_rmse) print("%s force loss: %f" % (phase, force_rmse)) if phase == "train": log_force_results( log_epoch, epoch, now, loss, energy_rmse, force_rmse, phase, ) else: log_force_results( log_epoch, epoch, now, "", energy_rmse, force_rmse, phase, ) if phase == "train": # early stop when training force error stagnates if ( abs(force_rmse - previous_force_rmse) <= 1e-5 and abs(energy_rmse - previous_energy_rmse) <= 1e-5 ): early_stop = self.early_stop previous_force_rmse = force_rmse elif phase == "val": if force_rmse < best_val_force_loss: best_val_energy_loss = energy_rmse best_val_force_loss = force_rmse best_model_wts = copy.deepcopy(self.model.state_dict()) energy_convergence = ( best_val_force_loss <= self.convergence_criteria["energy"] ) force_convergence = ( best_val_force_loss <= self.convergence_criteria["force"] ) convergence = ( (energy_convergence and force_convergence) or (epoch >= self.epochs) or early_stop ) else: if phase == "train": log_energy_results( log_epoch, epoch, now, loss, energy_rmse, phase ) else: log_energy_results( log_epoch, epoch, now, "", energy_rmse, phase ) if phase == "train": # early stop when training energy error stagnates if abs(energy_rmse - previous_energy_rmse) <= 1e-5: early_stop = self.early_stop previous_energy_rmse = energy_rmse elif phase == "val": if energy_rmse < best_val_energy_loss: best_val_energy_loss = energy_rmse best_model_wts = copy.deepcopy(self.model.state_dict()) convergence = ( ( best_val_energy_loss <= self.convergence_criteria["energy"] ) or early_stop or (epoch >= self.epochs) ) print() else: phase = "train" self.model.train() energy_mse = 0.0 force_mse = "N/A" if forcetraining: force_mse = 0.0 for data_sample in self.atoms_dataloader: x = to_tensor(data_sample[0], self.device) y = to_tensor(data_sample[1], self.device) def closure(): self.optimizer.zero_grad() pred = self.model(x) loss = self.criterion(pred, y) loss.backward() return loss mse_loss = nn.MSELoss(reduction="sum") energy_target = y[0] num_of_atoms = y[1] energy_pred, force_pred = self.model(x) raw_preds = (energy_pred * self.sd_scaling) + self.mean_scaling raw_targets = (energy_target * self.sd_scaling) + self.mean_scaling raw_preds_per_atom = torch.div(raw_preds, num_of_atoms) target_per_atom = torch.div(raw_targets, num_of_atoms) energy_loss = mse_loss(raw_preds_per_atom, target_per_atom) energy_mse += torch.tensor(energy_loss.item()) if forcetraining: force_target = y[2] force_pred = force_pred * self.sd_scaling force_target = force_target * self.sd_scaling num_atoms_force = torch.cat( [idx.repeat(int(idx)) for idx in num_of_atoms] ) num_atoms_force = torch.sqrt(num_atoms_force).reshape( len(num_atoms_force), 1 ) force_pred_per_atom = torch.div(force_pred, num_atoms_force) force_targets_per_atom = torch.div( force_target, num_atoms_force ) force_loss = mse_loss( force_pred_per_atom, force_targets_per_atom ) # mean over image force_loss /= 3 force_mse += torch.tensor(force_loss.item()) loss = self.optimizer.step(closure) if self.scheduler: self.scheduler.step() now = time.asctime() energy_mse /= self.dataset_size energy_rmse = torch.sqrt(energy_mse) if torch.isnan(energy_rmse): early_stop = True plot_energy_loss[phase].append(energy_rmse) print("energy loss: %f" % energy_rmse) if forcetraining: force_mse /= self.dataset_size force_rmse = torch.sqrt(force_mse) if torch.isnan(force_rmse): early_stop = True plot_force_loss[phase].append(force_rmse) print("force loss: %f\n" % force_rmse) log_force_results( log_epoch, epoch, now, loss, energy_rmse, force_rmse, phase ) # terminates when error stagnates if ( abs(force_rmse - previous_force_rmse) <= 1e-5 and (energy_rmse - previous_energy_rmse) <= 1e-5 ): early_stop = self.early_stop if force_rmse < best_train_force_loss: best_train_energy_loss = energy_rmse best_train_force_loss = force_rmse best_model_wts = copy.deepcopy(self.model.state_dict()) previous_force_rmse = force_rmse previous_energy_rmse = energy_rmse energy_convergence = ( best_train_energy_loss <= self.convergence_criteria["energy"] ) force_convergence = ( best_train_force_loss <= self.convergence_criteria["force"] ) convergence = ( (energy_convergence and force_convergence) or early_stop or (epoch >= self.epochs) ) else: log_energy_results(log_epoch, epoch, now, loss, energy_rmse, phase) # terminates when error stagnates if abs(energy_rmse - previous_energy_rmse) <= 1e-5: early_stop = self.early_stop if energy_rmse < best_train_energy_loss: best_train_energy_loss = energy_rmse best_model_wts = copy.deepcopy(self.model.state_dict()) previous_energy_rmse = energy_rmse convergence = ( (best_train_energy_loss <= self.convergence_criteria["energy"]) or early_stop or (epoch >= self.epochs) ) epoch += 1 log_epoch("") time_elapsed = time.time() - since print("Training complete in {} steps".format(epoch)) print( "Training complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60 ) ) log("Training complete in {} steps".format(epoch)) if validation: log("Best validation energy loss: {:4f}".format(best_val_energy_loss)) if forcetraining: log("Best validation force loss: {:4f}".format(best_val_force_loss)) else: log("Best training energy loss: {:4f}".format(best_train_energy_loss)) if forcetraining: log("Best training force loss: {:4f}".format(best_train_force_loss)) log("") if not os.path.exists("results/plots/training"): os.makedirs("results/plots/training") plt.title("RMSE vs. Epoch") plt.xlabel("Epoch #") plt.ylabel("RMSE") plot_epoch_x = list(range(1, epoch + 1)) plt.plot(plot_epoch_x, plot_energy_loss["train"], label="energy train") if validation: plt.plot(plot_epoch_x, plot_energy_loss["val"], label="energy val") if forcetraining: plt.plot(plot_epoch_x, plot_force_loss["train"], label="force train") if validation: plt.plot(plot_epoch_x, plot_force_loss["val"], label="force val") plt.legend() plt.savefig("results/plots/training/" + self.label + ".pdf") self.model.load_state_dict(best_model_wts) sigopt_value = best_train_force_loss if validation: sigopt_value = best_val_force_loss return self.model, sigopt_value