def _get_distortion(self): """Perform the duplication of the atom_seed and displacement of atom cells. Attributes: volume_factor (int): the volume factor of the repeated cells (i.e 1==Same Cell Volume as atom_seed) cell_choice (ase.Atoms): each repeated atom_seed is rattled and saved to to the distortion array. Returns: distortion (np.n darray): an array of atoms objects of length num_cells with distorted atom positions according to the normal distribution specified. """ if(self.cov_diag is not None): scaling_matrix = self._get_scaling_matrix() atom_seed = AtomsList() for i in scaling_matrix: local_atoms = self.atoms.copy() local_atoms.set_cell(np.matmul(local_atoms.get_cell(), i)) if (self.rattle != 0.0): local_atoms.rattle(stdev=self.rattle) #Also distort the positions of the atoms just like the lattice #vectors. local_atoms.positions = np.matmul(local_atoms.get_positions(), i) atom_seed.append(local_atoms) return atom_seed
def rset(self): """Constructs the Hessian matrix for the *best* convergence parameters in this group and it's possible sub-sequences. Returns: list: list of :class:`~matdb.atoms.Atoms`; each atoms object will have a `H` matrix in its info dictionary. """ if len(self.sequence) == 0: #We are at the bottom of the stack; attach the hessian matrix #to the atoms object it corresponds to. self.atoms.info["H"] = self.H result = AtomsList() result.append(self.atoms) return result else: #Check where we are in the stack. If we are just below the database, #then we want to return a list of hessian matrices and atoms #objects. If we are not, then we must a parameter grid of sequences #to select from. if isinstance(self.parent, Hessian): #We have many dynamical matrices to choose from. We need to decide #what "best" means and then return that one. bestkey = self._best_bands() return self.sequence[bestkey].rset else: result = AtomsList() for p in self.sequence.values(): result.extend(p.rset) return result
def run(args): """Runs the matdb setup and cleanup to produce database files. """ print("matdb Copyright (C) 2019 HALL LABS") print("This program comes with ABSOLUTELY NO WARRANTY.") print( "This is free software, and you are welcome to redistribute it under " "certain conditions.") if args is None: return #No matter what other options the user has chosen, we will have to create a #database controller for the specification they have given us. cdb = Controller(args["dbspec"]) matches = [] configs = AtomsList() for pattern in args["p"]: for entry in cdb.find(pattern): for iatoms in entry.iconfigs: configs.append(iatoms) if args["format"] == "xyz": from matdb.conversion import to_xyz target = path.abspath(path.expanduser(args["o"])) to_xyz(configs, target, args["overwrite"])
def cfg_to_atomslist(cfgfile, config_type=None, species=None): """Converts the CFG format file to an internal AtomsList object. Args: cfgfile (str): path to the file to convert. config_type (str): name of the config_type to assign to each configuration. species (list): list of element names corresponding to the integer species in the CFG dictionary. Returns: matdb.atoms.AtomsList : An AtomsList object containing the all the cells in the CFG file. """ from matdb.atoms import AtomsList configs = [] cfgd = None with open(cfgfile) as f: for line in f: if line.strip() == '': continue if "BEGIN_CFG" in line: cfgd = {"features": {}} elif isinstance(cfgd, dict) and "END_CFG" not in line: if _rxcfg.match(line.strip()): if ':' in line: raw = line.strip().split() label = raw[0].rstrip(':') cols = raw[1:] cfgd[label] = {"cols": cols, "vals": []} else: label = line.strip() cfgd[label] = {"vals": []} if "Feature" in label: fvals = label.split() feature = fvals[1] values = fvals[2:] cfgd["features"][feature] = values del cfgd[label] else: parsed = list(map(eval, line.strip().split())) cfgd[label]["vals"].append(parsed) elif "END_CFG" in line: if cfgd is not None: configs.append(cfgd) cfgd = None result = AtomsList() for cfg in configs: atoms = _cfgd_to_atoms(cfg, species) result.append(atoms) return result
def rset(self): """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the latest result set. """ from matdb.atoms import Atoms, AtomsList result = AtomsList() for apath in self.fitting_configs: result.append(Atoms(apath)) return result
def rset(self): """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the latest result set. """ #Return the configurations from this group; it is at the #bottom of the stack result = AtomsList() for epath in self.fitting_configs: result.append(Atoms(epath)) return result
def _get_substitution(self): ''' ''' np.random.seed(self.ran_seed) # Set the seed for reproducibility. combs = self._set_stoichiometry() seed_atoms = AtomsList() for i in combs: local_atoms = self.atoms.copy() local_atoms.set_chemical_symbols(i) seed_atoms.append(local_atoms) return seed_atoms
def fitting_configs(self): """Returns a :class:`matdb.atoms.AtomsList` for all configs in this group. """ configs = AtomsList() if len(self.sequence) == 0: for config in self.config_atoms.values(): configs.append(config) else: for seq in self.sequence.values(): configs.extend(seq.fitting_configs) return configs
def _get_vacancies(self): '''Vacancies.py: Group to create atomic vacancies from a seed configuration. Args: atom_seed (list, str, matdb.atoms.Atoms): The location of the files that will be read into to make the atoms object or an atoms object. ran_seed (hashable):(=1 default) seed for the random number generator for index of vacancies selection. nconfigs (int): number of cells with vacancies to create. vac_per_atom (int < 1): The number of vacancies to include per atom in the cell. (i.e. 0.1 would be 1 in every 10 atoms.) min_index (int):(default=0) Default choice with the same ran_seed would produce the same vacancies in each cell. .. note:: Additional attributes are also exposed by the super class :class:`~matdb.database.Group`. Attributes: name (str): name of this database type relative to the over database collection. This is also the name of the folder in which all of its calculations will be performed. num_atom(int): The number of atoms present in each atoms object. num_vac(int): The number of vacancies per cell. seed_state(tuple, len=4): values 1,3-4 are set by ran_seed after the first call to np.random and do not change, value 2 gives the ith value of a call to random select_atoms(list): list of lists with indices of atoms to be removed unique_perm(int): number of possible combinations Returns: vacancies(AtomsList): an list of atoms objects of length nconfigs with unique vacancies for each cell. ''' select_atoms = [] # list of lists with indices of atoms to be removed num_atoms = int(len(self.atoms.get_positions())) # number of atoms num_vac = int(num_atoms * self.vac_per_atom) np.random.seed(self.ran_seed) # Set the random seed for reproduction if (choose(num_atoms, num_vac) > 1000): select_atoms = self._get_random_choice(select_atoms, num_atoms, num_vac) else: select_atoms = self._get_combinations(select_atoms, num_atoms, num_vac) atom_seed = AtomsList() for i in select_atoms: local_atoms = self.atoms.copy() del local_atoms[i] atom_seed.append(local_atoms) return atom_seed, select_atoms
def rset(self): """Returns a :class:`matdb.atoms.AtomsList`, one for each config in the latest result set. """ if len(self.sequence) == 0: # Return the configurations from this group; it is at the # bottom of the stack result = AtomsList() for epath in self.atoms_paths(): result.append(Atoms(path.join(epath, 'pre_comp_atoms.h5'))) return result else: result = [] for e in self.sequence.values(): result.extend(e.rset()) return result
def _filter_dbs(self, seqname, dbfiles): """Filters each of the database files specified so that they conform to any specified filters. Args: seqname (str): name of the sequence that the database files are from. dbfiles (list): list of `str` paths to database files to filter. Returns: list: list of `str` paths to include in the database from this sequence. """ if len(self.dbfilter) > 0 and seqname in self._dbfilters: filtered = [] #The filters values have a function and a list of the actual values #used in the formula replacement. Extract the parameters; we can't #serialize the actual functions. filters = self._dbfilters[seqname].items() params = {k: v[1] for k, v in filters} for dbfile in dbfiles: dbname = path.basename(path.dirname(dbfile)) filtdb = path.join(self.root, "__{}.h5".format(dbname)) if path.isfile(filtdb): continue al = AtomsList(dbfile) nl = AtomsList() for a in al: #The 0 index here gets the function out; see comment above #about the filters dictionary. if not any(opf[0](getattr(a, attr)) for attr, opf in filters): nl.append(a) if len(nl) != len(al): nl.write(filtdb) dN, N = (len(al) - len(nl), len(nl)) dbcat([dbfile], filtdb, filters=params, dN=dN, N=N) filtered.append(filtdb) else: filtered.append(nfile) else: filtered = dbfiles return filtered
def rset(self): """Returns the reusable set to the next database group. Returns: list: list of :class:`~matdb.atoms.Atoms` """ if len(self.sequence) == 0: #We are at the bottom of the stack; result = AtomsList() for config in self.fitting_configs: result.append(Atoms(path.join(config, "atoms.h5"))) return result else: #Check where we are in the stack. If we are just below the database, #then we want to return the atoms objects for all database entries. #If we are not, then we must a parameter grid of sequences #to select from. result = [] for g in self.sequence.values(): result.extend(g.rset) return AtomsList(result)
def _hessian_configs(self): """Returns a :class:`~matdb.atoms.AtomsList` for all configs in this group. This list includes a single *duplicated* configuration for each of the eigenvalue/eigenvector combinations of the Hessian matrix. .. note:: This assumes that the group has actual displacements and is not a parent group in the recursive structure. """ configs = AtomsList() #Start with a configuration that has energy, force and virial #information from the VASP computation. We just grab the first of the #config_atoms from this sequence or its children. atBase = next(self.iconfigs) atcalc = atBase.get_calculator() #Make sure that energy, force and virial information was found. assert getattr(atBase, atcalc.energy_name) < 0 assert getattr(atBase, atcalc.force_name).shape == (atBase.n, 3) assert getattr(atBase, atcalc.virial_name).shape == (3, 3) configs.append(atBase) #Now, make a copy of the base atoms object; this object only has the #lattice and positions. We will add the eigenvalue and eigenvectors #*individually* because they each need different scaling for sigma in #the GAP fit. atEmpty = atBase.copy() for k in list(atBase.params.keys()): if "energy" in k or "virial" in k: atEmpty.rm_param(k) for k in list(atBase.properties.keys()): if "force" in k: atEmpty.rm_property(k) hname = "{}_hessian1".format(self.calc.key) #NB: make sure you transpose the eigenvectors matrix before doing the #zip! evals, evecs = np.linalg.eigh(self.H) natoms = len(evals) / 3 l0 = np.max(evals) - np.min(evals) sigma0 = 0.01 lscaling = 0.005 for l, v in zip(*(evals, evecs.T)): #The eigenvalues should all be positive. There may be some really #small ones that are essentially zero, but slightly negative. if np.abs(l) < 1e-5 or l < 0: continue #Add this eigenvector to its own configuration. atc = atEmpty.copy() Hi = np.reshape(v, (natoms, 3)) atc.properties[hname] = Hi #Same thing for the eigenvalue. atc.add_param(hname, l) #We want the small eigenvalues to have a weighting of sigma0 and the #largest eigenvalue to have a sigma of 10% of its value. c = (lscaling * l0 - sigma0) / l0**2 #atc.add_param("{}_hessian_csigma".format(self.calc.key), sigma0 + c*l**2) atc.add_param("{}_hessian_csigma".format(self.calc.key), sigma0) atc.add_param("n_{}_hessian".format(self.calc.key), 1) configs.append(atc) return configs