def minimize(self, method: str, **kwargs): """ Minimize the loss. Args: method: minimization methods as specified at: https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html kwargs: extra keyword arguments that can be used by the scipy optimizer """ kwargs = self._adjust_kwargs(method, **kwargs) msg = "Start minimization using method: {}.".format(method) log_entry(logger, msg, level="info") result = self._scipy_optimize(method, **kwargs) msg = "Finish minimization using method: {}.".format(method) log_entry(logger, msg, level="info") # update final optimized parameters self.calculator.update_model_params(result.x) return result
def update_model_params(self, params: Sequence[float]): """ Update optimizing parameters (a sequence used by the optimizer) to the kim model. """ # update from opt params to model params # TODO, in super().update_model_params(), we have parameter relation set, # these parameters need to be updated here as well. However, in general # we do not know how parameters are modified in parameter_relation, # and it seems the only hope is to keep a copy of parameters and do some # comparison to check which are modified and then set them. super().update_model_params(params) # update from model params to kim params n = self.get_num_opt_params() for i in range(n): _, value, p_idx, c_idx = self.get_opt_param_name_value_and_indices( i) self.kim_model.set_parameter(p_idx, c_idx, value) # refresh model self.kim_model.clear_then_refresh() if logger.getEffectiveLevel() == logging.DEBUG: params = self.get_kim_model_params() s = "" for name, p in params.items(): s += f"\nname: {name}\n" s += str(p.as_dict()) log_entry(logger, s, level="debug")
def _adjust_kwargs(self, method, **kwargs): """ Check kwargs and adjust them as necessary. """ if method in self.scipy_least_squares_methods: # check support status for i in self.scipy_least_squares_methods_not_supported_args: if i in kwargs: msg = ( 'Argument "{}" should not be set via the "minimize" method. ' "It it set internally.".format(i)) log_entry(logger, msg, level="error") raise LossError(msg) # adjust bounds if self.calculator.has_opt_params_bounds(): if method in ["trf", "dogbox"]: bounds = self.calculator.get_opt_params_bounds() lb = [ b[0] if b[0] is not None else -np.inf for b in bounds ] ub = [b[1] if b[1] is not None else np.inf for b in bounds] bounds = (lb, ub) kwargs["bounds"] = bounds else: msg = 'Method "{}" cannot handle bounds.'.format(method) log_entry(logger, msg, level="error") raise LossError(msg) elif method in self.scipy_minimize_methods: # check support status for i in self.scipy_minimize_methods_not_supported_args: if i in kwargs: msg = ( 'Argument "{}" should not be set via the "minimize" method. ' "It it set internally.".format(i)) log_entry(logger, msg, level="error") raise LossError(msg) # adjust bounds if self.calculator.has_opt_params_bounds(): if method in ["L-BFGS-B", "TNC", "SLSQP"]: bounds = self.calculator.get_opt_params_bounds() kwargs["bounds"] = bounds else: msg = 'Method "{}" cannot handle bounds.'.format(method) log_entry(logger, msg, level="error") raise LossError(msg) else: msg = 'minimization method "{}" not supported.'.format(method) log_entry(logger, msg, level="error") raise LossError(msg) return kwargs
def write_kim_model(self, path: Path = None): """ Write out a KIM model that can be used directly with the kim-api. This function typically write two files to `path`: (1) CMakeLists.txt, and (2) a parameter file like A.model_params. `path` will be created if it does not exist. Args: path: Path to the a directory to store the model. If `None`, it is set to `./MODEL_NAME_kliff_trained`, where `MODEL_NAME` is the `model_name` that provided at the initialization of this class. Note: This only works for parameterized KIMModel models that support the writing of parameters. """ present, required, error = self.kim_model.is_routine_present( kimpy.model_routine_name.WriteParameterizedModel) check_error(error, "kim_model.is_routine_is_routine_present") if not present: raise KIMModelError( "This KIM model does not support writing parameters.") if path is None: model_name = self.model_name + "_kliff_trained" path = Path.cwd().joinpath(model_name) else: path = Path(path).expanduser().resolve() model_name = path.name if not path.exists(): os.makedirs(path) path = str(path) model_name = str(model_name) error = self.kim_model.write_parameterized_model(path, model_name) check_error(error, "kim_model.write_parameterized_model") log_entry(logger, f"KLIFF trained model write to `{path}`", level="info")
def _read(path: Path, file_format: str = "xyz"): """ Read atomic configurations from path. """ try: extension = SUPPORTED_FORMAT[file_format] except KeyError: raise DatasetError( f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, " f"got: {file_format}.") path = to_path(path) if path.is_dir(): parent = path all_files = [] for root, dirs, files in os.walk(parent): for f in files: if f.endswith(extension): all_files.append(to_path(root).joinpath(f)) all_files = sorted(all_files) else: parent = path.parent all_files = [path] configs = [Configuration.from_file(f, file_format) for f in all_files] if len(configs) <= 0: raise DatasetError( f"No dataset file with file format `{file_format}` found at {parent}." ) log_entry(logger, f"{len(configs)} configurations read from {path}", level="info") return configs
def _check_compute_flag(calculator, residual_data): """ Check whether compute flag correctly set when the corresponding weight in residual data is 0. """ ew = residual_data["energy_weight"] fw = residual_data["forces_weight"] sw = residual_data["stress_weight"] msg = ( '"{0}_weight" set to "{1}". Seems you do not want to use {0} in the fitting. ' 'You can set "use_{0}" in "calculator.create()" to "False" to speed up the ' "fitting.") if calculator.use_energy and ew < 1e-12: log_entry(logger, msg.format("energy", ew), level="warning") if calculator.use_forces and fw < 1e-12: log_entry(logger, msg.format("forces", fw), level="warning") if calculator.use_stress and sw < 1e-12: log_entry(logger, msg.format("stress", sw), level="warning")
def report_error(msg): log_entry(logger, msg, level="error") raise KIMModelError(msg)
def check_error(error, msg): if error != 0 and error is not None: msg = f"Calling `{msg}` failed.\nSee `kim.log` for more information." log_entry(logger, msg, level="error") raise KIMModelError(msg)
def run(self, normalize=True, sort=None, path=None, verbose=1): """ Run the RMSE analyzer. Parameters ---------- normalize: bool Whether to normalize the energy (forces) by the number of atoms in a configuration. sort: str (optional) Sort per configuration information according to `energy` or `forces`. If `None`, no sort. This works only when per configuration information is requested, i.e. ``verbose > 0``. path: str (optional) Path to write out the results. If `None`, write to stdout, otherwise, write to the file specified by `path`. Note, if ``verbose==3``, the difference of energy and forces will be written to a directory named `energy_forces_RMSE-difference`. verbose: int (optional) Verbose level of the output info. Available values are: 0, 1, 2. If ``verbose=0``, only output the energy and forces RMSEs for the dataset. If ``verbose==1``, output the norms of the energy and forces for each configuration additionally. If ``verbose==2``, output the difference of the energy and forces for each atom, and the information is written to extended XYZ files with the location specified by ``path``. """ msg = "Start analyzing energy and forces RMSE." log_entry(logger, msg, level="info") cas = self.calculator.get_compute_arguments() all_enorm = [] all_fnorm = [] all_identifier = [] # common path of dataset paths = [_get_config(ca).path for ca in cas] common = _get_common_path(paths) for i, ca in enumerate(cas): if i % 100 == 0: msg = "Processing configuration {}.".format(i) log_entry(logger, msg, level="info") prefix = "analysis_energy_forces_RMSE-difference" enorm, fnorm = self._compute_single_config(ca, normalize, verbose, common, prefix) all_enorm.append(enorm) all_fnorm.append(fnorm) all_identifier.append(_get_config(ca).identifier) all_enorm = np.asarray(all_enorm) all_fnorm = np.asarray(all_fnorm) all_identifier = np.asarray(all_identifier) if sort == "energy": if self.compute_energy: order = all_enorm.argsort() all_enorm = all_enorm[order] all_fnorm = all_fnorm[order] all_identifier = all_identifier[order] elif sort == "forces": if self.compute_forces: order = all_fnorm.argsort() all_enorm = all_enorm[order] all_fnorm = all_fnorm[order] all_identifier = all_identifier[order] if path is not None: fout = open(path, "w") else: fout = sys.stdout # header print("#" * 80, file=fout) print("#", file=fout) print("# Root-mean-square errors for energy and forces", file=fout) print("#", file=fout) msg = ( 'Values reported is per atom quantify if "normalize=True". For example, ' '"eV/atom" for energy and "(eV/Angstrom)/atom" if "eV" is the units for ' 'energy and "Angstrom" is the units for forces.') print(split_string(msg, length=80, starter="#"), file=fout) print("#", file=fout) print( "# See (TODO insert url of doc) for the meaning of the reported values.", file=fout, ) print("#" * 80 + "\n", file=fout) # norms of each config if verbose >= 1: print("#" * 80, file=fout) print("Per configuration quantify\n", file=fout) print("# config", end=" " * 4, file=fout) if self.compute_energy: print("energy difference norm", end=" " * 4, file=fout) if self.compute_forces: print("forces difference norm", end=" " * 4, file=fout) print("config identifier", file=fout) for i, (enorm, fnorm, identifier) in enumerate( zip(all_enorm, all_fnorm, all_identifier)): print("{:<10d}".format(i), end=" " * 4, file=fout) if self.compute_energy: print("{:.10e}".format(enorm), end=" " * 10, file=fout) if self.compute_forces: print("{:.10e}".format(fnorm), end=" " * 10, file=fout) print(identifier, file=fout) print("\n", file=fout) # RMSE of all configs print("#" * 80, file=fout) print("RMSE for the dataset (all configurations).", file=fout) if self.compute_energy: e_rmse = np.linalg.norm(all_enorm) / len(all_enorm)**0.5 print("{:.10e} # energy RMSE".format(e_rmse), file=fout) if self.compute_forces: f_rmse = np.linalg.norm(all_fnorm) / len(all_fnorm)**0.5 print("{:.10e} # forces RMSE".format(f_rmse), file=fout) print("\n", file=fout) # difference of each atom if verbose >= 2: print("#" * 80, file=fout) msg = ( "The differences of energy and forces are written to the directory " '"energy_forces_RMSE-difference" in extended XYZ format.') print(split_string(msg, length=80, starter="#"), file=fout) print("\n", file=fout) msg = "Finish analyzing energy and forces RMSE." log_entry(logger, msg, level="info")
def minimize( self, method: str, batch_size: int = 100, num_epochs: int = 1000, start_epoch: int = 0, **kwargs, ): """ Minimize the loss. Args: method: PyTorch optimization methods, and available ones are: [`Adadelta`, `Adagrad`, `Adam`, `SparseAdam`, `Adamax`, `ASGD`, `LBFGS`, `RMSprop`, `Rprop`, `SGD`] See also: https://pytorch.org/docs/stable/optim.html batch_size: Number of configurations used in in each minimization step. num_epochs: Number of epochs to carry out the minimization. start_epoch: The starting epoch number. This is typically 0, but if continuing a training, it is useful to set this to the last epoch number of the previous training. kwargs: Extra keyword arguments that can be used by the PyTorch optimizer. """ if method not in self.torch_minimize_methods: msg = 'Minimization method "{}" not supported.'.format(method) log_entry(logger, msg, level="error") raise LossError(msg) self.method = method self.batch_size = batch_size self.num_epochs = num_epochs self.start_epoch = start_epoch # data loader loader = self.calculator.get_compute_arguments(batch_size) # model save metadata save_prefix = self.calculator.model.save_prefix save_start = self.calculator.model.save_start save_frequency = self.calculator.model.save_frequency if save_prefix is None or save_start is None or save_frequency is None: logger.info( "Model saving meta data not set by user. Now set it to " '"prefix=./kliff_saved_model", "start=1", and "frequency=10".') save_prefix = os.path.join(os.getcwd(), "kliff_saved_model") save_start = 1 save_frequency = 10 self.calculator.model.set_save_metadata(save_prefix, save_start, save_frequency) msg = "Start minimization using optimization method: {}.".format( self.method) log_entry(logger, msg, level="info") # optimizing try: self.optimizer = getattr(torch.optim, method)( self.calculator.model.parameters(), **kwargs) if self.optimizer_state_path is not None: self._load_optimizer_stat(self.optimizer_state_path) except TypeError as e: print(str(e)) idx = str(e).index("argument '") + 10 err_arg = str(e)[idx:].strip("'") msg = 'Argument "{}" not supported by optimizer "{}".'.format( err_arg, method) log_entry(logger, msg, level="error") raise LossError(msg) epoch = 0 for epoch in range(self.start_epoch, self.start_epoch + self.num_epochs): # get the loss without any optimization if continue a training if self.start_epoch != 0 and epoch == self.start_epoch: epoch_loss = self._get_loss_epoch(loader) print("Epoch = {:<6d} loss = {:.10e}".format( epoch, epoch_loss)) else: epoch_loss = 0 for ib, batch in enumerate(loader): def closure(): self.optimizer.zero_grad() loss = self._get_loss_batch(batch) loss.backward() return loss loss = self.optimizer.step(closure) # float() such that do not accumulate history, more memory friendly epoch_loss += float(loss) print("Epoch = {:<6d} loss = {:.10e}".format( epoch, epoch_loss)) if epoch >= save_start and (epoch - save_start) % save_frequency == 0: path = os.path.join(save_prefix, "model_epoch{}.pkl".format(epoch)) self.calculator.model.save(path) # print loss from final parameter and save last epoch epoch += 1 epoch_loss = self._get_loss_epoch(loader) print("Epoch = {:<6d} loss = {:.10e}".format(epoch, epoch_loss)) path = os.path.join(save_prefix, "model_epoch{}.pkl".format(epoch)) self.calculator.model.save(path) msg = "Finish minimization using optimization method: {}.".format( self.method) log_entry(logger, msg, level="info")
def _scipy_optimize(self, method, **kwargs): """ Minimize the loss use scipy.optimize.least_squares or scipy.optimize.minimize methods. A user should not call this function, but should call the ``minimize`` method. """ size = parallel.get_MPI_world_size() if size > 1: comm = MPI.COMM_WORLD rank = comm.Get_rank() msg = "Running in MPI mode with {} processes.".format(size) log_entry(logger, msg, level="info", print_end="\n\n") if self.nprocs > 1: msg = ( 'Argument "nprocs = {}" provided at initialization is ignored. When ' "running in MPI mode, the number of processes provided along with " 'the "mpiexec" (or "mpirun") command is used.'.format( self.nprocs)) log_entry(logger, msg, level="warning") x = self.calculator.get_opt_params() if method in self.scipy_least_squares_methods: # geodesic LM if method == "geodesiclm": if not geodesicLM_avail: report_import_error("geodesciLM") else: minimize_fn = geodesiclm else: minimize_fn = scipy.optimize.least_squares func = self._get_residual_MPI elif method in self.scipy_minimize_methods: minimize_fn = scipy.optimize.minimize func = self._get_loss_MPI if rank == 0: result = minimize_fn(func, x, method=method, **kwargs) # notify other process to break func break_flag = True for i in range(1, size): comm.send(break_flag, dest=i, tag=i) else: func(x) result = None result = comm.bcast(result, root=0) return result else: # 1. running MPI with 1 process # 2. running without MPI at all # both cases are regarded as running without MPI if self.nprocs == 1: msg = "Running in serial mode." log_entry(logger, msg, level="info", print_end="\n\n") else: msg = "Running in multiprocessing mode with {} processes.".format( self.nprocs) log_entry(logger, msg, level="info", print_end="\n\n") # Maybe one thinks he is using MPI because nprocs is used if mpi4py_avail: msg = ( '"mpi4y" detected. If you try to run in MPI mode, you should ' 'execute your code via "mpiexec" (or "mpirun"). If not, ignore ' "this message.") log_entry(logger, msg, level="warning") x = self.calculator.get_opt_params() if method in self.scipy_least_squares_methods: if method == "geodesiclm": from geodesicLM import geodesiclm minimize_fn = geodesiclm else: minimize_fn = scipy.optimize.least_squares func = self._get_residual elif method in self.scipy_minimize_methods: minimize_fn = scipy.optimize.minimize func = self._get_loss result = minimize_fn(func, x, method=method, **kwargs) return result