예제 #1
0
파일: loss.py 프로젝트: miaoliu/kliff
    def minimize(self, method: str, **kwargs):
        """
        Minimize the loss.

        Args:
            method: minimization methods as specified at:
                https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html
                https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.least_squares.html

            kwargs: extra keyword arguments that can be used by the scipy optimizer
        """
        kwargs = self._adjust_kwargs(method, **kwargs)

        msg = "Start minimization using method: {}.".format(method)
        log_entry(logger, msg, level="info")

        result = self._scipy_optimize(method, **kwargs)

        msg = "Finish minimization using method: {}.".format(method)
        log_entry(logger, msg, level="info")

        # update final optimized parameters
        self.calculator.update_model_params(result.x)

        return result
예제 #2
0
    def update_model_params(self, params: Sequence[float]):
        """
        Update optimizing parameters (a sequence used by the optimizer) to the kim model.
        """
        # update from opt params to model params
        # TODO, in super().update_model_params(), we have parameter relation set,
        #   these parameters need to be updated here as well. However, in general
        #   we do not know how parameters are modified in parameter_relation,
        #   and it seems the only hope is to keep a copy of parameters and do some
        #   comparison to check which are modified and then set them.
        super().update_model_params(params)

        # update from model params to kim params
        n = self.get_num_opt_params()
        for i in range(n):
            _, value, p_idx, c_idx = self.get_opt_param_name_value_and_indices(
                i)
            self.kim_model.set_parameter(p_idx, c_idx, value)

        # refresh model
        self.kim_model.clear_then_refresh()

        if logger.getEffectiveLevel() == logging.DEBUG:
            params = self.get_kim_model_params()
            s = ""
            for name, p in params.items():
                s += f"\nname: {name}\n"
                s += str(p.as_dict())
            log_entry(logger, s, level="debug")
예제 #3
0
파일: loss.py 프로젝트: miaoliu/kliff
    def _adjust_kwargs(self, method, **kwargs):
        """
        Check kwargs and adjust them as necessary.
        """

        if method in self.scipy_least_squares_methods:

            # check support status
            for i in self.scipy_least_squares_methods_not_supported_args:
                if i in kwargs:
                    msg = (
                        'Argument "{}" should not be set via the "minimize" method. '
                        "It it set internally.".format(i))
                    log_entry(logger, msg, level="error")
                    raise LossError(msg)

            # adjust bounds
            if self.calculator.has_opt_params_bounds():
                if method in ["trf", "dogbox"]:
                    bounds = self.calculator.get_opt_params_bounds()
                    lb = [
                        b[0] if b[0] is not None else -np.inf for b in bounds
                    ]
                    ub = [b[1] if b[1] is not None else np.inf for b in bounds]
                    bounds = (lb, ub)
                    kwargs["bounds"] = bounds
                else:
                    msg = 'Method "{}" cannot handle bounds.'.format(method)
                    log_entry(logger, msg, level="error")
                    raise LossError(msg)

        elif method in self.scipy_minimize_methods:

            # check support status
            for i in self.scipy_minimize_methods_not_supported_args:
                if i in kwargs:
                    msg = (
                        'Argument "{}" should not be set via the "minimize" method. '
                        "It it set internally.".format(i))
                    log_entry(logger, msg, level="error")
                    raise LossError(msg)

            # adjust bounds
            if self.calculator.has_opt_params_bounds():
                if method in ["L-BFGS-B", "TNC", "SLSQP"]:
                    bounds = self.calculator.get_opt_params_bounds()
                    kwargs["bounds"] = bounds
                else:
                    msg = 'Method "{}" cannot handle bounds.'.format(method)
                    log_entry(logger, msg, level="error")
                    raise LossError(msg)
        else:
            msg = 'minimization method "{}" not supported.'.format(method)
            log_entry(logger, msg, level="error")
            raise LossError(msg)

        return kwargs
예제 #4
0
    def write_kim_model(self, path: Path = None):
        """
        Write out a KIM model that can be used directly with the kim-api.

        This function typically write two files to `path`: (1) CMakeLists.txt, and (2)
        a parameter file like A.model_params. `path` will be created if it does not exist.

        Args:
            path: Path to the a directory to store the model. If `None`, it is set to
                `./MODEL_NAME_kliff_trained`, where `MODEL_NAME` is the `model_name` that
                provided at the initialization of this class.

        Note:
            This only works for parameterized KIMModel models that support the writing of
            parameters.
        """
        present, required, error = self.kim_model.is_routine_present(
            kimpy.model_routine_name.WriteParameterizedModel)
        check_error(error, "kim_model.is_routine_is_routine_present")
        if not present:
            raise KIMModelError(
                "This KIM model does not support writing parameters.")

        if path is None:
            model_name = self.model_name + "_kliff_trained"
            path = Path.cwd().joinpath(model_name)
        else:
            path = Path(path).expanduser().resolve()
            model_name = path.name

        if not path.exists():
            os.makedirs(path)

        path = str(path)
        model_name = str(model_name)

        error = self.kim_model.write_parameterized_model(path, model_name)
        check_error(error, "kim_model.write_parameterized_model")

        log_entry(logger,
                  f"KLIFF trained model write to `{path}`",
                  level="info")
예제 #5
0
    def _read(path: Path, file_format: str = "xyz"):
        """
        Read atomic configurations from path.
        """
        try:
            extension = SUPPORTED_FORMAT[file_format]
        except KeyError:
            raise DatasetError(
                f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, "
                f"got: {file_format}.")

        path = to_path(path)

        if path.is_dir():
            parent = path
            all_files = []
            for root, dirs, files in os.walk(parent):
                for f in files:
                    if f.endswith(extension):
                        all_files.append(to_path(root).joinpath(f))
            all_files = sorted(all_files)
        else:
            parent = path.parent
            all_files = [path]

        configs = [Configuration.from_file(f, file_format) for f in all_files]

        if len(configs) <= 0:
            raise DatasetError(
                f"No dataset file with file format `{file_format}` found at {parent}."
            )

        log_entry(logger,
                  f"{len(configs)} configurations read from {path}",
                  level="info")

        return configs
예제 #6
0
파일: loss.py 프로젝트: miaoliu/kliff
def _check_compute_flag(calculator, residual_data):
    """
    Check whether compute flag correctly set when the corresponding weight in residual
    data is 0.
    """
    ew = residual_data["energy_weight"]
    fw = residual_data["forces_weight"]
    sw = residual_data["stress_weight"]
    msg = (
        '"{0}_weight" set to "{1}". Seems you do not want to use {0} in the fitting. '
        'You can set "use_{0}" in "calculator.create()" to "False" to speed up the '
        "fitting.")

    if calculator.use_energy and ew < 1e-12:
        log_entry(logger, msg.format("energy", ew), level="warning")
    if calculator.use_forces and fw < 1e-12:
        log_entry(logger, msg.format("forces", fw), level="warning")
    if calculator.use_stress and sw < 1e-12:
        log_entry(logger, msg.format("stress", sw), level="warning")
예제 #7
0
def report_error(msg):
    log_entry(logger, msg, level="error")
    raise KIMModelError(msg)
예제 #8
0
def check_error(error, msg):
    if error != 0 and error is not None:
        msg = f"Calling `{msg}` failed.\nSee `kim.log` for more information."
        log_entry(logger, msg, level="error")
        raise KIMModelError(msg)
예제 #9
0
    def run(self, normalize=True, sort=None, path=None, verbose=1):
        """
        Run the RMSE analyzer.

        Parameters
        ----------
        normalize: bool
            Whether to normalize the energy (forces) by the number of atoms in a
            configuration.

        sort: str (optional)
            Sort per configuration information according to `energy` or `forces`.
            If `None`, no sort. This works only when per configuration information is
            requested, i.e. ``verbose > 0``.

        path: str (optional)
            Path to write out the results. If `None`, write to stdout, otherwise, write to
            the file specified by `path`.
            Note, if ``verbose==3``, the difference of energy and forces will be written
            to a directory named `energy_forces_RMSE-difference`.

        verbose: int (optional)
            Verbose level of the output info. Available values are: 0, 1, 2.
            If ``verbose=0``, only output the energy and forces RMSEs for the dataset.
            If ``verbose==1``, output the norms of the energy and forces for each
            configuration additionally.
            If ``verbose==2``, output the difference of the energy and forces for each
            atom, and the information is written to extended XYZ files with the location
            specified by ``path``.
        """

        msg = "Start analyzing energy and forces RMSE."
        log_entry(logger, msg, level="info")

        cas = self.calculator.get_compute_arguments()

        all_enorm = []
        all_fnorm = []
        all_identifier = []

        # common path of dataset
        paths = [_get_config(ca).path for ca in cas]
        common = _get_common_path(paths)

        for i, ca in enumerate(cas):
            if i % 100 == 0:
                msg = "Processing configuration {}.".format(i)
                log_entry(logger, msg, level="info")

            prefix = "analysis_energy_forces_RMSE-difference"
            enorm, fnorm = self._compute_single_config(ca, normalize, verbose,
                                                       common, prefix)
            all_enorm.append(enorm)
            all_fnorm.append(fnorm)
            all_identifier.append(_get_config(ca).identifier)
        all_enorm = np.asarray(all_enorm)
        all_fnorm = np.asarray(all_fnorm)
        all_identifier = np.asarray(all_identifier)

        if sort == "energy":
            if self.compute_energy:
                order = all_enorm.argsort()
                all_enorm = all_enorm[order]
                all_fnorm = all_fnorm[order]
                all_identifier = all_identifier[order]
        elif sort == "forces":
            if self.compute_forces:
                order = all_fnorm.argsort()
                all_enorm = all_enorm[order]
                all_fnorm = all_fnorm[order]
                all_identifier = all_identifier[order]

        if path is not None:
            fout = open(path, "w")
        else:
            fout = sys.stdout

        # header
        print("#" * 80, file=fout)
        print("#", file=fout)
        print("# Root-mean-square errors for energy and forces", file=fout)
        print("#", file=fout)
        msg = (
            'Values reported is per atom quantify if "normalize=True". For example, '
            '"eV/atom" for energy and "(eV/Angstrom)/atom" if "eV" is the units for '
            'energy and "Angstrom" is the units for forces.')
        print(split_string(msg, length=80, starter="#"), file=fout)
        print("#", file=fout)
        print(
            "# See (TODO insert url of doc) for the meaning of the reported values.",
            file=fout,
        )
        print("#" * 80 + "\n", file=fout)

        # norms of each config
        if verbose >= 1:
            print("#" * 80, file=fout)
            print("Per configuration quantify\n", file=fout)
            print("# config", end=" " * 4, file=fout)
            if self.compute_energy:
                print("energy difference norm", end=" " * 4, file=fout)
            if self.compute_forces:
                print("forces difference norm", end=" " * 4, file=fout)
            print("config identifier", file=fout)

            for i, (enorm, fnorm, identifier) in enumerate(
                    zip(all_enorm, all_fnorm, all_identifier)):
                print("{:<10d}".format(i), end=" " * 4, file=fout)
                if self.compute_energy:
                    print("{:.10e}".format(enorm), end=" " * 10, file=fout)
                if self.compute_forces:
                    print("{:.10e}".format(fnorm), end=" " * 10, file=fout)
                print(identifier, file=fout)
            print("\n", file=fout)

        # RMSE of all configs
        print("#" * 80, file=fout)
        print("RMSE for the dataset (all configurations).", file=fout)
        if self.compute_energy:
            e_rmse = np.linalg.norm(all_enorm) / len(all_enorm)**0.5
            print("{:.10e}    # energy RMSE".format(e_rmse), file=fout)
        if self.compute_forces:
            f_rmse = np.linalg.norm(all_fnorm) / len(all_fnorm)**0.5
            print("{:.10e}    # forces RMSE".format(f_rmse), file=fout)
        print("\n", file=fout)

        # difference of each atom
        if verbose >= 2:
            print("#" * 80, file=fout)
            msg = (
                "The differences of energy and forces are written to the directory "
                '"energy_forces_RMSE-difference" in extended XYZ format.')
            print(split_string(msg, length=80, starter="#"), file=fout)
            print("\n", file=fout)

        msg = "Finish analyzing energy and forces RMSE."
        log_entry(logger, msg, level="info")
예제 #10
0
파일: loss.py 프로젝트: miaoliu/kliff
    def minimize(
        self,
        method: str,
        batch_size: int = 100,
        num_epochs: int = 1000,
        start_epoch: int = 0,
        **kwargs,
    ):
        """
        Minimize the loss.

        Args:
            method: PyTorch optimization methods, and available ones are:
                [`Adadelta`, `Adagrad`, `Adam`, `SparseAdam`, `Adamax`, `ASGD`, `LBFGS`,
                `RMSprop`, `Rprop`, `SGD`]
                See also: https://pytorch.org/docs/stable/optim.html
            batch_size: Number of configurations used in in each minimization step.
            num_epochs: Number of epochs to carry out the minimization.
            start_epoch: The starting epoch number. This is typically 0, but if
                continuing a training, it is useful to set this to the last epoch number
                of the previous training.
            kwargs: Extra keyword arguments that can be used by the PyTorch optimizer.
        """
        if method not in self.torch_minimize_methods:
            msg = 'Minimization method "{}" not supported.'.format(method)
            log_entry(logger, msg, level="error")
            raise LossError(msg)

        self.method = method
        self.batch_size = batch_size
        self.num_epochs = num_epochs
        self.start_epoch = start_epoch

        # data loader
        loader = self.calculator.get_compute_arguments(batch_size)

        # model save metadata
        save_prefix = self.calculator.model.save_prefix
        save_start = self.calculator.model.save_start
        save_frequency = self.calculator.model.save_frequency
        if save_prefix is None or save_start is None or save_frequency is None:
            logger.info(
                "Model saving meta data not set by user. Now set it to "
                '"prefix=./kliff_saved_model", "start=1", and "frequency=10".')
            save_prefix = os.path.join(os.getcwd(), "kliff_saved_model")
            save_start = 1
            save_frequency = 10
            self.calculator.model.set_save_metadata(save_prefix, save_start,
                                                    save_frequency)

        msg = "Start minimization using optimization method: {}.".format(
            self.method)
        log_entry(logger, msg, level="info")

        # optimizing
        try:
            self.optimizer = getattr(torch.optim, method)(
                self.calculator.model.parameters(), **kwargs)
            if self.optimizer_state_path is not None:
                self._load_optimizer_stat(self.optimizer_state_path)

        except TypeError as e:
            print(str(e))
            idx = str(e).index("argument '") + 10
            err_arg = str(e)[idx:].strip("'")
            msg = 'Argument "{}" not supported by optimizer "{}".'.format(
                err_arg, method)
            log_entry(logger, msg, level="error")
            raise LossError(msg)

        epoch = 0
        for epoch in range(self.start_epoch,
                           self.start_epoch + self.num_epochs):

            # get the loss without any optimization if continue a training
            if self.start_epoch != 0 and epoch == self.start_epoch:
                epoch_loss = self._get_loss_epoch(loader)
                print("Epoch = {:<6d}  loss = {:.10e}".format(
                    epoch, epoch_loss))

            else:
                epoch_loss = 0
                for ib, batch in enumerate(loader):

                    def closure():
                        self.optimizer.zero_grad()
                        loss = self._get_loss_batch(batch)
                        loss.backward()
                        return loss

                    loss = self.optimizer.step(closure)
                    # float() such that do not accumulate history, more memory friendly
                    epoch_loss += float(loss)

                print("Epoch = {:<6d}  loss = {:.10e}".format(
                    epoch, epoch_loss))
                if epoch >= save_start and (epoch -
                                            save_start) % save_frequency == 0:
                    path = os.path.join(save_prefix,
                                        "model_epoch{}.pkl".format(epoch))
                    self.calculator.model.save(path)

        # print loss from final parameter and save last epoch
        epoch += 1
        epoch_loss = self._get_loss_epoch(loader)
        print("Epoch = {:<6d}  loss = {:.10e}".format(epoch, epoch_loss))
        path = os.path.join(save_prefix, "model_epoch{}.pkl".format(epoch))
        self.calculator.model.save(path)

        msg = "Finish minimization using optimization method: {}.".format(
            self.method)
        log_entry(logger, msg, level="info")
예제 #11
0
파일: loss.py 프로젝트: miaoliu/kliff
    def _scipy_optimize(self, method, **kwargs):
        """
        Minimize the loss use scipy.optimize.least_squares or scipy.optimize.minimize
        methods. A user should not call this function, but should call the ``minimize``
        method.
        """

        size = parallel.get_MPI_world_size()

        if size > 1:
            comm = MPI.COMM_WORLD
            rank = comm.Get_rank()

            msg = "Running in MPI mode with {} processes.".format(size)
            log_entry(logger, msg, level="info", print_end="\n\n")

            if self.nprocs > 1:
                msg = (
                    'Argument "nprocs = {}" provided at initialization is ignored. When '
                    "running in MPI mode, the number of processes provided along with "
                    'the "mpiexec" (or "mpirun") command is used.'.format(
                        self.nprocs))
                log_entry(logger, msg, level="warning")

            x = self.calculator.get_opt_params()
            if method in self.scipy_least_squares_methods:
                # geodesic LM
                if method == "geodesiclm":
                    if not geodesicLM_avail:
                        report_import_error("geodesciLM")
                    else:
                        minimize_fn = geodesiclm
                else:
                    minimize_fn = scipy.optimize.least_squares
                func = self._get_residual_MPI

            elif method in self.scipy_minimize_methods:
                minimize_fn = scipy.optimize.minimize
                func = self._get_loss_MPI

            if rank == 0:
                result = minimize_fn(func, x, method=method, **kwargs)
                # notify other process to break func
                break_flag = True
                for i in range(1, size):
                    comm.send(break_flag, dest=i, tag=i)
            else:
                func(x)
                result = None

            result = comm.bcast(result, root=0)

            return result

        else:
            # 1. running MPI with 1 process
            # 2. running without MPI at all
            # both cases are regarded as running without MPI

            if self.nprocs == 1:
                msg = "Running in serial mode."
                log_entry(logger, msg, level="info", print_end="\n\n")
            else:
                msg = "Running in multiprocessing mode with {} processes.".format(
                    self.nprocs)
                log_entry(logger, msg, level="info", print_end="\n\n")

                # Maybe one thinks he is using MPI because nprocs is used
                if mpi4py_avail:
                    msg = (
                        '"mpi4y" detected. If you try to run in MPI mode, you should '
                        'execute your code via "mpiexec" (or "mpirun"). If not, ignore '
                        "this message.")
                    log_entry(logger, msg, level="warning")

            x = self.calculator.get_opt_params()
            if method in self.scipy_least_squares_methods:
                if method == "geodesiclm":
                    from geodesicLM import geodesiclm

                    minimize_fn = geodesiclm
                else:
                    minimize_fn = scipy.optimize.least_squares
                func = self._get_residual
            elif method in self.scipy_minimize_methods:
                minimize_fn = scipy.optimize.minimize
                func = self._get_loss

            result = minimize_fn(func, x, method=method, **kwargs)
            return result