def to_file(self, filename: Path, file_format: str = "xyz"): """ Write the configuration to file. Args: filename: Path to the file that stores the configuration. file_format: Format of the file that stores the configuration (e.g. `xyz`). """ filename = to_path(filename) if file_format == "xyz": write_extxyz( filename, self.cell, self.species, self.coords, self.PBC, self._energy, self._forces, self._stress, ) else: raise ConfigurationError( f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, " f"got: {file_format}." )
def from_file(cls, filename: Path, file_format: str = "xyz"): """ Read configuration from file. Args: filename: Path to the file that stores the configuration. file_format: Format of the file that stores the configuration (e.g. `xyz`). """ if file_format == "xyz": cell, species, coords, PBC, energy, forces, stress = read_extxyz( filename) else: raise ConfigurationError( f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, " f"got: {file_format}.") cell = np.asarray(cell) species = [str(i) for i in species] coords = np.asarray(coords) PBC = [bool(i) for i in PBC] energy = float(energy) if energy is not None else None forces = np.asarray(forces) if forces is not None else None stress = [float(i) for i in stress] if stress is not None else None self = cls(cell, species, coords, PBC, energy, forces, stress) self._path = to_path(filename) return self
def load(self, filename: Path, mode: str = "train"): """ Load a save model. Args: filename: Path where the model is stored, e.g. kliff_model.pkl mode: Purpose of the loaded model. Should be either `train` or `eval`. """ filename = to_path(filename) state_dict = torch.load(str(filename)) # load model state dict self.load_state_dict(state_dict["model_state_dict"]) if mode == "train": self.train() elif mode == "eval": self.eval() else: raise ModelTorchError(f"Unrecognized mode `{mode}`.") # load descriptor state dict self.descriptor.load_state_dict(state_dict["descriptor_state_dict"]) logger.info(f"Model loaded from `{filename}`")
def save(self, filename: Path): """ Save a model to disk. Args: filename: Path to store the model. """ filename = to_path(filename) create_directory(filename) torch.save(self.state_dict(), str(filename)) # save descriptor mean and stdev fname = filename.parent.joinpath("mean_and_stdev.pkl") self.descriptor.dump_mean_stdev(fname)
def _read(path: Path, file_format: str = "xyz"): """ Read atomic configurations from path. """ try: extension = SUPPORTED_FORMAT[file_format] except KeyError: raise DatasetError( f"Expect data file_format to be one of {list(SUPPORTED_FORMAT.keys())}, " f"got: {file_format}.") path = to_path(path) if path.is_dir(): parent = path all_files = [] for root, dirs, files in os.walk(parent): for f in files: if f.endswith(extension): all_files.append(to_path(root).joinpath(f)) all_files = sorted(all_files) else: parent = path.parent all_files = [path] configs = [Configuration.from_file(f, file_format) for f in all_files] if len(configs) <= 0: raise DatasetError( f"No dataset file with file format `{file_format}` found at {parent}." ) log_entry(logger, f"{len(configs)} configurations read from {path}", level="info") return configs
def save(self, filename: Path): """ Save a model to disk. Args: filename: Path to store the model. """ state_dict = { "model_state_dict": self.state_dict(), "descriptor_state_dict": self.descriptor.state_dict(), } filename = to_path(filename) create_directory(filename) torch.save(state_dict, str(filename))
def load(self, filename: Path, mode: str = "train"): """ Load a model on disk into memory. Args: filename: Path where the model is stored. mode: Purpose of the loaded model. Should be either `train` or `eval`. """ filename = to_path(filename) self.load_state_dict(torch.load(str(filename))) if mode == "train": self.train() elif mode == "eval": self.eval() else: raise ModelTorchError( 'Unrecognized mode "{}" in model.load().'.format(mode) ) # load descriptor mean and stdev fname = filename.parent.joinpath("mean_and_stdev.pkl") self.descriptor.load_mean_stdev(fname)
def create( self, configs: List[Configuration], use_energy: bool = True, use_forces: bool = True, use_stress: bool = False, fingerprints_filename: Union[Path, str] = "fingerprints.pkl", fingerprints_mean_stdev_filename: Optional[Union[Path, str]] = None, reuse: bool = False, use_welford_method: bool = False, nprocs: int = 1, ): """ Process configs to generate fingerprints. Args: configs: atomic configurations use_energy: Whether to require the calculator to compute energy. use_forces: Whether to require the calculator to compute forces. use_stress: Whether to require the calculator to compute stress. fingerprints_filename: Path to save the generated fingerprints. If `reuse=True`, Will not generate the fingerprints, but directly use the one provided via this file. fingerprints_mean_stdev_filename: Path to save the mean and standard deviation of the fingerprints. If `reuse=True`, Will not generate new fingerprints mean and stdev, but directly use the one provided via this file. If `normalize` is not required by a descriptor, this is ignored. reuse: Whether to reuse provided fingerprints. use_welford_method: Whether to compute mean and standard deviation using the Welford method, which is memory efficient. See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance nprocs: Number of processes used to generate the fingerprints. If `1`, run in serial mode, otherwise `nprocs` processes will be forked via multiprocessing to do the work. """ self.configs = configs self.use_energy = use_energy self.use_forces = use_forces self.use_stress = use_stress if isinstance(configs, Configuration): configs = [configs] # reuse existing file if reuse: path = to_path(fingerprints_filename) if not path.exists(): raise CalculatorTorchError( f"You specified `reuse=True` to reuse the fingerprints stored in " f"`{path}` This file does not exists." ) logger.info(f"Reuse fingerprints `{path}`") if self.model.descriptor.normalize: if fingerprints_mean_stdev_filename is None: path = None else: path = to_path(fingerprints_mean_stdev_filename) if (path is None) or (not path.exists()): raise CalculatorTorchError( f"You specified `reuse=True` to reuse the fingerprints. The mean " f"and stdev file of the fingerprints `{path}` does not exists." ) logger.info(f"Reuse fingerprints mean and stdev `{path}`") # generate fingerprints and pickle it else: self.fingerprints_path = self.model.descriptor.generate_fingerprints( configs, use_forces, use_stress, fingerprints_filename, fingerprints_mean_stdev_filename, use_welford_method, nprocs, )
def _dump_fingerprints( self, configs, fname, all_zeta, all_dzetadr_forces, all_dzetadr_stress, fit_forces, fit_stress, ): """ Dump fingerprints to a pickle file. """ logger.info(f"Pickling fingerprints to `{fname}`") create_directory(fname, is_directory=False) # remove it, because we use append mode for the file below fname = to_path(fname) if fname.exists(): fname.unlink() with open(fname, "ab") as f: for i, conf in enumerate(configs): if i % 100 == 0: logger.info(f"Processing configuration: {i}.") if all_zeta is None: zeta, dzetadr_f, dzetadr_s = self.transform( conf, fit_forces, fit_stress ) else: zeta = all_zeta[i] dzetadr_f = all_dzetadr_forces[i] dzetadr_s = all_dzetadr_stress[i] # centering and normalization if self.normalize: zeta = (zeta - self.mean) / self.stdev if fit_forces or fit_stress: stdev_3d = np.atleast_3d(self.stdev) if fit_forces: dzetadr_f = dzetadr_f / stdev_3d if fit_stress: dzetadr_s = dzetadr_s / stdev_3d # pickling data zeta = np.asarray(zeta, self.dtype) energy = np.asarray(conf.energy, self.dtype) if fit_forces: dzetadr_f = np.asarray(dzetadr_f, self.dtype) forces = np.asarray(conf.forces, self.dtype) if fit_stress: dzetadr_s = np.asarray(dzetadr_s, self.dtype) stress = np.asarray(conf.stress, self.dtype) volume = np.asarray(conf.get_volume(), self.dtype) example = {"configuration": conf, "zeta": zeta, "energy": energy} if fit_forces: example["dzetadr_forces"] = dzetadr_f example["forces"] = forces if fit_stress: example["dzetadr_stress"] = dzetadr_s example["stress"] = stress example["volume"] = volume pickle.dump(example, f) logger.info(f"Pickle {len(configs)} configurations finished.")