def _select_template(self): """Creates the select command template. """ # mlp select-add pot.mtp train.cfg new.cfg diff.cfg: # actively selects configurations from new.cfg and save those # that need to be added to train.cfg to diff.cfg # Options: # --init-threshold=<num>: set the initial threshold to num, default=1e-5 # --select-threshold=<num>: set the select threshold to num, default=1.1 # --swap-threshold=<num>: set the swap threshold to num, default=1.0000001 # --energy-weight=<num>: set the weight for energy equation, default=1 # --force-weight=<num>: set the weight for force equations, default=0 # --stress-weight=<num>: set the weight for stress equations, default=0 # --nbh-weight=<num>: set the weight for site energy equations, default=0 # --mvs-filename=<filename>: name of mvs file # --selected-filename=<filename>: file with selected configurations # --selection-limit=<num>: swap limit for multiple selection, default=0 (disabled) # --weighting=<string>: way of weighting the functional for better fitting of # properties. Default=vibrations. Others=molecules, structures. template = "mlp select-add pot.mtp train.cfg candidate.cfg new_training.cfg" for k, v in self.select_args.items(): if k in ["mvs-filename", "selected-filename"]: msg.warn("Changing the {0} file name is not enabled.") continue template = template + " --{0}={1}".format(k, v) return template + " > training_select.txt"
def __init__(self, atoms=None, root=None, parent=None, incar={}, kpoints={}, execution={}, nsteps=None, samplerate=100, strains=None, tstart=None, tend=None, supercell=None, name="md"): self.name = name msg.warn("The DM group is only configured for VASP at this time.") super(DynamicsGroup, self).__init__(atoms, incar, kpoints, execution, path.join(root, self.name), parent, "D", nconfigs=None) self.samplerate = samplerate self.nsteps = nsteps self.strains = [0] if strains is None else strains self.tstart = tstart self.tend = tend self.supercell = supercell if supercell is None: self.seed = self.atoms.copy() else: msg.warn("Not Implemnted: At this time specifying a supercell is not " "yet implemented in `matdb` but will be available in " "latter versions. Using seed configuration instead.") self.seed = self.atoms.copy() self._update_incar() self._update_kpoints()
def _relax_template(self): """Creates the template for the relax command. """ # mlp relax settings-file [options]: # settings file should contain settings for relaxation and for mlip regime. # Options can be given in any order. Options include: # --pressure=<num>: external pressure (in GPa) # --iteration_limit=<num>: maximum number of iterations # --min-dist=<num>: terminate relaxation if atoms come closer than <num> # --force-tolerance=<num>: relaxes until forces are less than <num>(eV/Angstr.) # Zero <num> disables atom relaxation (fixes atom fractional coordinates) # --stress-tolerance=<num>: relaxes until stresses are smaller than <num>(GPa) # Zero <num> disables lattice relaxation # --max-step=<num>: Maximal allowed displacement of atoms and lattice vectors # (in Angstroms) # --min-step=<num>: Minimal displacement of atoms and lattice vectors (Angstr.) # If all actual displacements are smaller then the relaxation stops. # --bfgs-wolfe_c1 # --bfgs-wolfe_c2 # --cfg-filename=<str>: Read initial configurations from <str> # --save-relaxed=<str>: Save the relaxed configurations to <str> # --save-unrelaxed=<str>: If relaxation failed, save the configuration to <str> # --log=<str>: Write relaxation log to <str> if self.use_mpi: if self.run_as_root: template = ( "mpirun --allow-run-as-root -n {0} mlp relax relax.ini " "--cfg-filename=to-relax.cfg " "--save-relaxed={1} --log=relax_{2} " "--save-unrelaxed={3}".format(self.ncores, "relaxed.cfg", "log.txt", "unrelaxed.cfg")) else: template = ("mpirun -n {0} mlp relax relax.ini " "--cfg-filename=to-relax.cfg " "--save-relaxed={1} --log=relax_{2} " "--save-unrelaxed={3}".format( self.ncores, "relaxed.cfg", "log.txt", "unrelaxed.cfg")) else: template = ("mlp relax relax.ini " "--cfg-filename=to-relax.cfg " "--save-relaxed={1} --log=relax_{2} " "--save-unrelaxed={3}".format(self.ncores, "relaxed.cfg", "log.txt", "unrelaxed.cfg")) for k, v in self.relax_args.items(): if k in ["log", "save-unrelaxed", "save-relaxed", "cfg-filename"]: msg.warn( "Changing the {0} file name is not supported.".format(k)) continue if k in ["bfgs-wolfe_c1", "bfgs-wolfe_c2"]: template = template + " --{0}".format(k) else: template = template + " --{0}={1}".format(k, v) return template + " > training_relax.txt"
def __init__(self, name, repeater, root, controller, steps, **kwargs): self.name = name self.fqn = "{}.{}".format(repeater.name, name) self.root = path.join(root, name) self.repeater = repeater self.controller = controller if not path.isdir(self.root): from os import mkdir mkdir(self.root) from importlib import import_module self._settings = steps """dict: with keys and values describing the kinds of training steps to setup. """ from collections import OrderedDict self.steps = OrderedDict() for tspec in steps: if isinstance(tspec, six.string_types): #This is a reference to an existing database instance that was #defined previously. instance = self.controller[tspec] self.steps[instance.name] = instance continue modname, clsname = tspec["type"].split('.') fqdn = "matdb.fitting.{}".format(modname) module = import_module(fqdn) if not hasattr(module, clsname): # pragma: no cover #We haven't implemented this database type yet, just skip the #initialization for now. msg.warn("Cannot find trainer of type {}.".format( tspec["type"])) continue cls = getattr(module, clsname) #Make a copy of the original dictionary so that we don't mess up the #pointers; then add in the keyword arguments that are missing. cpspec = tspec.copy() del cpspec["type"] cpspec["root"] = self.root cpspec["parent"] = self cpspec["controller"] = self.controller #Add in the default values passed in from the parent instances, but #only update them if they weren't specified. for k, v in kwargs.items(): if k not in cpspec: cpspec[k] = v instance = cls(**cpspec) self.steps[instance.name] = instance
def symlink(target, source): """Creates a symbolic link from `source` to `target`. """ # from os import symlink# path, remove # from matdb import msg if path.isfile(target) or path.islink(target): remove(target) elif path.isdir(target): msg.warn( "Cannot auto-delete directory '{}' for symlinking.".format(target)) return os_symlink(source, target)
def _calc_quick(atoms, supercell=(1, 1, 1), delta=0.01): """Calculates the Hessian for a given atoms object just like :func:`calc`, *but*, it uses symmetry to speed up the calculation. Depending on the calculator being used, it is possible that the symmetrized result may be different from the full result with all displacements, done manually by :func:`calc`. Args: atoms (matdb.atoms.Atoms): atomic structure of the *primitive*. supercell (list): or `tuple` or :class:`numpy.ndarray` specifying the integer supercell matrix. delta (float): displacement in Angstroms of each atom when computing the phonons. Returns: numpy.ndarray: Hessian matrix that has dimension `(natoms*3, natoms*3)`, where `natoms` is the number of atoms in the *supercell*. """ #We need to make sure we are at the zero of the potential before ratoms = atoms.copy() try: with open("phonons.log", 'w') as f: with redirect_stdout(f): print(ratoms.get_forces()) minim = FIRE(ratoms) minim.run(fmax=1e-4, steps=100) except: #The potential is unstable probably. Issue a warning. msg.warn( "Couldn't optimize the atoms object. Potential may be unstable.") primitive = matdb_to_phonopy(ratoms) phonon = Phonopy(primitive, conform_supercell(supercell)) phonon.generate_displacements(distance=delta) supercells = phonon.get_supercells_with_displacements() pot = atoms.get_calculator() assert pot is not None forces = [] for scell in supercells: matoms = phonopy_to_matdb(scell) #Call a manual reset of the calculator so that we explicitly recalculate #the forces for the current atoms object. pot.reset() matoms.set_calculator(pot) forces.append(matoms.get_forces()) phonon.set_forces(forces) phonon.produce_force_constants() return unroll_fc(phonon._force_constants)
def add(self, key, value): """Adds key to the set if it is not already in the set. Args: key (tuple): Anything that could be added to the set. value (tuple): The actual values that the suffixes correspond to. """ if key not in self.map: end = self.end curr = end[1] curr[2] = end[1] = self.map[key] = [key, curr, end] self.values[key] = value else: msg.warn( "The key {} already exists in the set, ignoring addition.". format(key))
def _train_template(self): """Creates the train command template. """ # mlp train potential.mtp train_set.cfg [options]: # trains potential.mtp on the training set from train_set.cfg # Options include: # --energy-weight=<double>: weight of energies in the fitting. Default=1 # --force-weight=<double>: weight of forces in the fitting. Default=0.01 # --stress-weight=<double>: weight of stresses in the fitting. Default=0.001 # --scale-by-force=<double>: Default=0. If >0 then configurations near equilibrium # (with roughtly force < <double>) get more weight. # --valid-cfgs=<string>: filename with configuration to validate # --max-iter=<int>: maximal number of iterations. Default=1000 # --curr-pot-name=<string>: filename for potential on current iteration. # --trained-pot-name=<string>: filename for trained potential. Default=Trained.mtp_ # --bfgs-conv-tol=<double>: stopping criterion for optimization. Default=1e-8 # --weighting=<string>: how to weight configuration wtih different sizes # relative to each other. Default=vibrations. Other=molecules, structures. # --init-params=<string>: how to initialize parameters if a potential was not # pre-fitted. Default is random. Other is same - this is when interaction # of all species is the same (more accurate fit, but longer optimization) # --skip-preinit: skip the 75 iterations done when params are not given if self.use_mpi: if self.run_as_root: template = ( "mpirun --allow-run-as-root -n {} mlp train pot.mtp " "train.cfg".format(self.ncores)) else: template = ("mpirun -n {} mlp train pot.mtp " "train.cfg".format(self.ncores)) else: template = "mlp train pot.mtp train.cfg" for k, v in self.train_args.items(): if k == "curr-pot-name" or k == "trained-pot-name": msg.warn("Renaming of the potential file is not enabled.") continue if k == "valid-cfgs": msg.warn("Validating configurations is not enabled.") continue template = template + " --{0}={1}".format(k, v) return template + " > training.txt"
def _best_bands(self): """Returns the name of the band collection that has the smallest *converged* phonon bands. This is accomplished by assuming that the largest supercell is the "correct" answer, and comparing the total DOS. If the comparitive error is within `tolerance`, then it is acceptable. The smallest acceptable supercell's key is returned. Returns: str: the key in the group's sequence that has the smallest acceptable supercell size. """ #Find the cell size and DOS for each calculation in the sequence. sizes = { k: np.linalg.det(np.reshape(np.array(d.supercell), (3, 3))) for k, d in self.sequence.items() } dos = {k: np.loadtxt(d.dos_file) for k, d in self.sequence.items()} #Find the calculation with the largest cell size and grab its DOS. maxkey, maxval = max(sizes.items(), key=itemgetter(1)) maxdos = dos[maxkey] ok = {} for k, d in self.sequence.items(): if k == maxkey: continue assert dos[k].shape == maxdos.shape diff = np.sum(np.abs(dos[k][:, 1] - maxdos[:, 1])) if diff < self.tolerance: ok[k] = sizes[k] #Now, choose the supercell with the smallest cell size, if one #exists. Otherwise warn the user that either the tolerance was too low, or #that the calculation may not be converged. if len(ok) > 0: minkey, minval = min(ok.items(), key=itemgetter(1)) else: msg.warn( "Hessian calculation may not be converged. Your tolerance " "may be too high. Returning the largest supercell by default.") minkey = maxkey return minkey
def cleanup(self): """Extracts the calibration information from the configurations to determine the maiximum allowable amplitude to maintain linear force regime. Returns: bool: True if the amplitude calibration is ready. """ if not super(Calibration, self).cleanup(): msg.warn("cannot cleanup calibration; not all configs ready.") return False success = self.xyz() if not success: msg.warn("could not extract the calibration XYZ configurations.") return False else: imsg = "Extracted calibration configs from {0:d} folders." msg.okay(imsg.format(len(self.configs))) #Read in the XYZ file and extract the forces on each atom in each #configuration. from matdb.atoms import AtomsList forces = {} failed = 0 for cid, folder in self.configs.items(): #Find the mean, *absolute* force in each of the directions. There #will only be one atom in the atoms list. If the calculation didn't #finish, then we exclude it. This happens for some of the #calibration runs if the atoms are too close together. try: al = AtomsList(path.join(folder, "output.xyz")) forces[cid] = np.mean(np.abs(np.array(al[0].dft_force)), axis=1) except: failed += 1 pass if failed > 0: msg.warn( "couldn't extract forces for {0:d} configs.".format(failed)) if len(forces) > 0: fmt = "{0:.7f} {1:.7f} {2:.7f} {3:.7f}\n" with open(self.outfile, 'w') as f: for cid in forces: A, F = self.amplitudes[cid], forces[cid] f.write(fmt.format(A, *F)) else: msg.warn("no forces available to write {}.".format(self.outfile)) return len(forces) > 3
def ready(self): """Determines if this database is finished calculating by testing the existence of the xyz database file in the root folder. """ target = path.join(self.root, "output.xyz") result = False if path.isfile(target): from matdb.utility import linecount #We add +2 for the parameter line and the number of atoms. #This doesn't work as advertised (it's off by a factor). debug after refactor. lpconfig = self.base.atoms.n * np.linalg.det( np.array(self.base.supercell).reshape(3, 3)) + 2 nlines = linecount(target) nconfigs = nlines / lpconfig result = nconfigs == len(self.configs) if not result: wmsg = ("Number of structures in `output.xyz` does not match " "number of requested configs. Found {0} configs in" " {1} lines.") msg.warn(wmsg.format(nconfigs, nlines)) return result
def _calc_grade_template(self): """Creates the template for the calc-grade command. """ # mlp calc-grade pot.mtp train.cfg in.cfg out.cfg: # actively selects from train.cfg, generates state.mvs file from train.cfg, and # calculates maxvol grades of configurations located in in.cfg # and writes them to out.cfg # Options: # --init-threshold=<num>: set the initial threshold to 1+num, default=1e-5 # --select-threshold=<num>: set the select threshold to num, default=1.1 # --swap-threshold=<num>: set the swap threshold to num, default=1.0000001 # --energy-weight=<num>: set the weight for energy equation, default=1 # --force-weight=<num>: set the weight for force equations, default=0 # --stress-weight=<num>: set the weight for stress equations, default=0 # --nbh-weight=<num>: set the weight for site energy equations, default=0 # --mvs-filename =<filename>: name of mvs file template = "mlp calc-grade pot.mtp train.cfg train.cfg temp1.cfg" for k, v in self.grade_args.items(): if k == "mvs-filename": msg.warn("Renaming the mvs state file is not enabled.") continue template = template + " --{0}={1}".format(k, v) return template + " > training_calc_grade.txt"
def calc(primitive, cachedir=None, supercell=(1, 1, 1), delta=0.01, quick=True): """Calculates the Hessian for a given atoms object (which *must* have an attached calculator). .. note:: We choose to use the Hessian as the fundamental quantity in vibrational analysis in `matdb`. .. note:: `atoms` will be relaxed before calculating the Hessian. Args: primitive (matdb.atoms.Atoms): atomic structure of the *primitive*. cachedir (str): path to the directory where phonon calculations are cached. If not specified, a temporary directory will be used. supercell (tuple): number of times to duplicate the cell when forming the supercell. delta (float): displacement in Angstroms of each atom when computing the phonons. quick (bool): when True, use symmetry to speed up the Hessian calculation. See :func:`_calc_quick`. Returns: numpy.ndarray: Hessian matrix that has dimension `(natoms*3, natoms*3)`, where `natoms` is the number of atoms in the *supercell*. """ if quick: return _calc_quick(primitive, supercell, delta) else: atoms = primitive.make_supercell(supercell) atoms.set_calculator(primitive.get_calculator()) from ase.vibrations import Vibrations #The phonon calculator caches the displacements and force sets for each #atomic displacement using pickle. This generates three files for each #atomic degree of freedom (one for each cartesian direction). We want to #save these in a special directory. tempcache = False if cachedir is None: cachedir = mkdtemp() tempcache = True else: cachedir = path.abspath(path.expanduser(cachedir)) if not path.isdir(cachedir): mkdir(cachedir) result = None precon = Exp(A=3) aphash = None #Calculate a hash of the calculator and atoms object that we are calculating #for. If the potential doesn't have a `to_dict` method, then we ignore the #hashing. if not tempcache and hasattr(atoms, "to_dict") and hasattr( atoms._calc, "to_dict"): atoms_pot = {"atoms": atoms.to_dict(), "pot": atoms._calc.to_dict()} #This UUID will probably be different, even if the positions and species #are identical. del atoms_pot["atoms"]["uuid"] hash_str = convert_dict_to_str(atoms_pot) aphash = str(sha1(hash_str).hexdigest()) if not tempcache: #Check whether we should clobber the cache or not. extras = ["vibsummary.log", "vib.log", "phonons.log"] with chdir(cachedir): hash_match = False if path.isfile("atomspot.hash"): with open("atomspot.hash") as f: xhash = f.read() hash_match = xhash == aphash hascache = False if not hash_match: for vibfile in glob("vib.*.pckl"): remove(vibfile) hascache = True for xfile in extras: if path.isfile(xfile): remove(xfile) hascache = True if hascache: msg.warn( "Using hard-coded cache directory. We were unable to " "verify that the atoms-potential combination matches " "the one for which existing cache files exist. So, we " "clobbered the existing files to get the science " "right. You can fix this by using `matdb.atoms.Atoms` " "and `matdb.calculators.*Calculator` objects.") with chdir(cachedir): #Relax the cell before we calculate the Hessian; this gets the forces #close to zero before we make harmonic approximation. try: with open("phonons.log") as f: with redirect_stdout(f): minim = PreconLBFGS(atoms, precon=precon, use_armijo=True) minim.run(fmax=1e-5) except: #The potential is unstable probably. Issue a warning. msg.warn( "Couldn't optimize the atoms object. Potential may be unstable." ) vib = Vibrations(atoms, delta=delta) with open("vib.log", 'a') as f: with redirect_stdout(f): vib.run() vib.summary(log="vibsummary.log") result = vib.H #Cache the hash of the atoms object and potential that we were using so #that we can check next time whether we should clobber the cache or not. if aphash is not None and not tempcache: with open(path.join(cachedir, "atomspot.hash"), 'w') as f: f.write(aphash) return result
def __init__(self, name="prototype", structures=None, ran_seed=None, permutations=None, **dbargs): self.name = name self.seeded = False dbargs["prefix"] = "P" dbargs["cls"] = Prototypes if "Prototypes" not in dbargs['root']: from os import mkdir new_root = path.join(dbargs['root'], "Prototypes") if not path.isdir(new_root): mkdir(new_root) dbargs['root'] = new_root super(Prototypes, self).__init__(**dbargs) self.in_structures = structures self.ran_seed = ran_seed self.permutations = permutations self.species = self.database.parent.species #Make sure that we override the global calculator default values with #those settings that we know are needed for good phonon calculations. calcargs = self.database.calculator.copy() if "calculator" in dbargs: if dbargs["calculator"] is not None and "name" in dbargs[ "calculator"]: calcargs.update(dbargs["calculator"]) dbargs["calculator"] = calcargs # The prototypes are saved into the file prototypes.tar.gz, if # this is the first time prototypes has been run we need to unpack it. template_root = path.join(_get_reporoot(), "matdb", "templates") if not path.isdir(path.join(template_root, "uniqueUnaries")): import tarfile with chdir(template_root): tarf = "prototypes.tar.gz" tar = tarfile.open(tarf, "r:gz") tar.extractall() tar.close() # parse the structures to make a list of paths to the source folders for the if self.ran_seed is not None: import random random.seed(self.ran_seed) self.puuids = None self._load_puuids() self.nconfigs = 0 self.structures = {} for k, v in structures.items(): if k.lower() == "unary": cand_path = path.join(template_root, "uniqueUnaries") elif k.lower() == "binary": cand_path = path.join(template_root, "uniqueBinaries") elif k.lower() == "ternary": cand_path = path.join(template_root, "uniqueTernaries") else: # pragma: no cover msg.warn( "Must specify the system size, i.e., unary, binary, or " "ternary. {} not recognized".format(k)) continue if isinstance(v, list): self.structures[k.lower()] = [] for prot in v: files = glob("{0}/*{1}*".format(cand_path, prot)) if len(files) < 1: # pragma: no cover msg.warn( "No prototypes of size {0} matched the string " "{1}".format(k, prot)) else: self.structures[k.lower()].extend(files) elif isinstance(v, str) and v == "all": files = glob("{0}/*".format(cand_path)) self.structures[k.lower()] = files elif isinstance(v, int): from random import shuffle files = glob("{0}/*".format(cand_path)) shuffle(files) keep = files[:v] self.structures[k.lower()] = keep else: #pragma: no cover msg.err( "Couldn't parse {0} structures for {1} case. Must be either " "a list of file names, 'all', or an int.".format(v, k)) if self.permutations is not None and k.lower( ) in self.permutations.keys(): self.nconfigs += len(self.structures[k.lower()]) * len( self.permutations[k.lower()]) else: if k.lower() == "unary": self.nconfigs += len(self.structures[k.lower()]) * 3 elif k.lower() == "binary" or k.lower() == "ternary": self.nconfigs += len(self.structures[k.lower()]) * 6 else: #pragma: no cover continue
def _create_dbfull(self, folder, pattern, energy, force, virial, config_type): """Creates the full combined database. """ from matdb.utility import chdir, dbcat from glob import glob from tqdm import tqdm from os import path #NB! There is a subtle bug here: if you try and open a matdb.atoms.Atoms #within the context manager of `chdir`, something messes up with the #memory sharing in fortran and it dies. This has to be separate. with chdir(folder): self.dbfiles = glob(pattern) rewrites = [] for dbfile in self.dbfiles: #Look at the first configuration in the atoms list to #determine if it matches the energy, force, virial and #config type parameter names. dbpath = path.join(folder, dbfile) params, doforce = _atoms_conform(dbpath, energy, force, virial) if len(params) > 0 or doforce: msg.std("Conforming database file {}.".format(dbpath)) al = AtomsList(dbpath) outpath = path.join(self.root, dbfile.replace(".xyz",".h5")) for ai in tqdm(al): for target, source in params.items(): if (target == "config_type" and config_type is not None): ai.params[target] = config_type else: ai.add_param(target,ai.params[source]) del ai.params[source] if source in ai.info: #pragma: no cover #(if things were #dane correctly by #the atoms object #this should never #be used. It exists #mainly as a #safegaurd. msg.warn("The atoms object didn't properly " "update the parameters of the legacy " "atoms object.") del ai.info[source] if doforce: ai.add_property("ref_force",ai.properties[force]) del ai.properties[force] al.write(outpath) #Mark this db as non-conforming so that we created a new #version of it. rewrites.append(dbfile) dbcat([dbpath], outpath, docat=False, renames=params, doforce=doforce) # We want a single file to hold all of the data for all the atoms in the database. all_atoms = AtomsList() for dbfile in self.dbfiles: if dbfile in rewrites: infile = dbfile.replace(".xyz",".h5") all_atoms.extend(AtomsList(path.join(self.root, infile))) else: dbpath = path.join(folder, dbfile) all_atoms.extend(AtomsList(dbpath)) all_atoms.write(self._dbfull) #Finally, create the config file. from matdb.utility import dbcat with chdir(folder): dbcat(self.dbfiles, self._dbfull, config_type=self.config_type, docat=False)