Example #1
0
    def to_file(self, filename: Path, file_format: str = "xyz"):
        """
        Write the configuration to file.

        Args:
            filename: Path to the file that stores the configuration.
            file_format: Format of the file that stores the configuration (e.g. `xyz`).
        """
        filename = to_path(filename)
        if file_format == "xyz":
            write_extxyz(
                filename,
                self.cell,
                self.species,
                self.coords,
                self.PBC,
                self._energy,
                self._forces,
                self._stress,
            )
        else:
            raise ConfigurationError(
                f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, "
                f"got: {file_format}."
            )
Example #2
0
    def from_file(cls, filename: Path, file_format: str = "xyz"):
        """
        Read configuration from file.

        Args:
            filename: Path to the file that stores the configuration.
            file_format: Format of the file that stores the configuration (e.g. `xyz`).
        """

        if file_format == "xyz":
            cell, species, coords, PBC, energy, forces, stress = read_extxyz(
                filename)
        else:
            raise ConfigurationError(
                f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, "
                f"got: {file_format}.")

        cell = np.asarray(cell)
        species = [str(i) for i in species]
        coords = np.asarray(coords)
        PBC = [bool(i) for i in PBC]
        energy = float(energy) if energy is not None else None
        forces = np.asarray(forces) if forces is not None else None
        stress = [float(i) for i in stress] if stress is not None else None

        self = cls(cell, species, coords, PBC, energy, forces, stress)
        self._path = to_path(filename)

        return self
Example #3
0
    def load(self, filename: Path, mode: str = "train"):
        """
        Load a save model.

        Args:
            filename: Path where the model is stored, e.g. kliff_model.pkl
            mode: Purpose of the loaded model. Should be either `train` or `eval`.
        """
        filename = to_path(filename)
        state_dict = torch.load(str(filename))

        # load model state dict
        self.load_state_dict(state_dict["model_state_dict"])

        if mode == "train":
            self.train()
        elif mode == "eval":
            self.eval()
        else:
            raise ModelTorchError(f"Unrecognized mode `{mode}`.")

        # load descriptor state dict
        self.descriptor.load_state_dict(state_dict["descriptor_state_dict"])

        logger.info(f"Model loaded from `{filename}`")
Example #4
0
    def save(self, filename: Path):
        """
        Save a model to disk.

        Args:
            filename: Path to store the model.
        """
        filename = to_path(filename)
        create_directory(filename)
        torch.save(self.state_dict(), str(filename))

        # save descriptor mean and stdev
        fname = filename.parent.joinpath("mean_and_stdev.pkl")
        self.descriptor.dump_mean_stdev(fname)
Example #5
0
    def _read(path: Path, file_format: str = "xyz"):
        """
        Read atomic configurations from path.
        """
        try:
            extension = SUPPORTED_FORMAT[file_format]
        except KeyError:
            raise DatasetError(
                f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, "
                f"got: {file_format}.")

        path = to_path(path)

        if path.is_dir():
            parent = path
            all_files = []
            for root, dirs, files in os.walk(parent):
                for f in files:
                    if f.endswith(extension):
                        all_files.append(to_path(root).joinpath(f))
            all_files = sorted(all_files)
        else:
            parent = path.parent
            all_files = [path]

        configs = [Configuration.from_file(f, file_format) for f in all_files]

        if len(configs) <= 0:
            raise DatasetError(
                f"No dataset file with file format `{file_format}` found at {parent}."
            )

        log_entry(logger,
                  f"{len(configs)} configurations read from {path}",
                  level="info")

        return configs
Example #6
0
    def save(self, filename: Path):
        """
        Save a model to disk.

        Args:
            filename: Path to store the model.
        """
        state_dict = {
            "model_state_dict": self.state_dict(),
            "descriptor_state_dict": self.descriptor.state_dict(),
        }

        filename = to_path(filename)
        create_directory(filename)

        torch.save(state_dict, str(filename))
Example #7
0
    def load(self, filename: Path, mode: str = "train"):
        """
        Load a model on disk into memory.

        Args:
            filename: Path where the model is stored.
            mode: Purpose of the loaded model. Should be either `train` or `eval`.
        """
        filename = to_path(filename)
        self.load_state_dict(torch.load(str(filename)))

        if mode == "train":
            self.train()
        elif mode == "eval":
            self.eval()
        else:
            raise ModelTorchError(
                'Unrecognized mode "{}" in model.load().'.format(mode)
            )

        # load descriptor mean and stdev
        fname = filename.parent.joinpath("mean_and_stdev.pkl")
        self.descriptor.load_mean_stdev(fname)
Example #8
0
    def create(
        self,
        configs: List[Configuration],
        use_energy: bool = True,
        use_forces: bool = True,
        use_stress: bool = False,
        fingerprints_filename: Union[Path, str] = "fingerprints.pkl",
        fingerprints_mean_stdev_filename: Optional[Union[Path, str]] = None,
        reuse: bool = False,
        use_welford_method: bool = False,
        nprocs: int = 1,
    ):
        """
        Process configs to generate fingerprints.

        Args:
            configs: atomic configurations
            use_energy: Whether to require the calculator to compute energy.
            use_forces: Whether to require the calculator to compute forces.
            use_stress: Whether to require the calculator to compute stress.
            fingerprints_filename: Path to save the generated fingerprints.
                If `reuse=True`, Will not generate the fingerprints, but directly use the
                one provided via this file.
            fingerprints_mean_stdev_filename: Path to save the mean and standard deviation
                of the fingerprints. If `reuse=True`, Will not generate new fingerprints
                mean and stdev, but directly use the one provided via this file.
                If `normalize` is not required by a descriptor, this is ignored.
            reuse: Whether to reuse provided fingerprints.
            use_welford_method: Whether to compute mean and standard deviation using the
                Welford method, which is memory efficient. See
                https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
            nprocs: Number of processes used to generate the fingerprints. If `1`, run
                in serial mode, otherwise `nprocs` processes will be forked via
                multiprocessing to do the work.
        """

        self.configs = configs
        self.use_energy = use_energy
        self.use_forces = use_forces
        self.use_stress = use_stress

        if isinstance(configs, Configuration):
            configs = [configs]

        # reuse existing file
        if reuse:
            path = to_path(fingerprints_filename)
            if not path.exists():
                raise CalculatorTorchError(
                    f"You specified `reuse=True` to reuse the fingerprints stored in "
                    f"`{path}` This file does not exists."
                )
            logger.info(f"Reuse fingerprints `{path}`")

            if self.model.descriptor.normalize:
                if fingerprints_mean_stdev_filename is None:
                    path = None
                else:
                    path = to_path(fingerprints_mean_stdev_filename)
                if (path is None) or (not path.exists()):
                    raise CalculatorTorchError(
                        f"You specified `reuse=True` to reuse the fingerprints. The mean "
                        f"and stdev file of the fingerprints `{path}` does not exists."
                    )
                logger.info(f"Reuse fingerprints mean and stdev `{path}`")

        # generate fingerprints and pickle it
        else:
            self.fingerprints_path = self.model.descriptor.generate_fingerprints(
                configs,
                use_forces,
                use_stress,
                fingerprints_filename,
                fingerprints_mean_stdev_filename,
                use_welford_method,
                nprocs,
            )
Example #9
0
    def _dump_fingerprints(
        self,
        configs,
        fname,
        all_zeta,
        all_dzetadr_forces,
        all_dzetadr_stress,
        fit_forces,
        fit_stress,
    ):
        """
        Dump fingerprints to a pickle file.
        """

        logger.info(f"Pickling fingerprints to `{fname}`")

        create_directory(fname, is_directory=False)

        # remove it, because we use append mode for the file below
        fname = to_path(fname)
        if fname.exists():
            fname.unlink()

        with open(fname, "ab") as f:
            for i, conf in enumerate(configs):
                if i % 100 == 0:
                    logger.info(f"Processing configuration: {i}.")

                if all_zeta is None:
                    zeta, dzetadr_f, dzetadr_s = self.transform(
                        conf, fit_forces, fit_stress
                    )
                else:
                    zeta = all_zeta[i]
                    dzetadr_f = all_dzetadr_forces[i]
                    dzetadr_s = all_dzetadr_stress[i]

                # centering and normalization
                if self.normalize:
                    zeta = (zeta - self.mean) / self.stdev
                    if fit_forces or fit_stress:
                        stdev_3d = np.atleast_3d(self.stdev)
                    if fit_forces:
                        dzetadr_f = dzetadr_f / stdev_3d
                    if fit_stress:
                        dzetadr_s = dzetadr_s / stdev_3d

                # pickling data
                zeta = np.asarray(zeta, self.dtype)
                energy = np.asarray(conf.energy, self.dtype)
                if fit_forces:
                    dzetadr_f = np.asarray(dzetadr_f, self.dtype)
                    forces = np.asarray(conf.forces, self.dtype)
                if fit_stress:
                    dzetadr_s = np.asarray(dzetadr_s, self.dtype)
                    stress = np.asarray(conf.stress, self.dtype)
                    volume = np.asarray(conf.get_volume(), self.dtype)

                example = {"configuration": conf, "zeta": zeta, "energy": energy}
                if fit_forces:
                    example["dzetadr_forces"] = dzetadr_f
                    example["forces"] = forces
                if fit_stress:
                    example["dzetadr_stress"] = dzetadr_s
                    example["stress"] = stress
                    example["volume"] = volume

                pickle.dump(example, f)

        logger.info(f"Pickle {len(configs)} configurations finished.")