Ejemplo n.º 1
0
 def _get_distortion(self):
     """Perform the duplication of the atom_seed and displacement of atom cells.
     Attributes:
         volume_factor (int): the volume factor of the repeated cells
              (i.e 1==Same Cell Volume as atom_seed)
         cell_choice (ase.Atoms): each repeated atom_seed is rattled and
              saved to to the distortion array.
     Returns:
         distortion (np.n darray): an array of atoms objects of length
              num_cells with distorted atom positions according to the
              normal distribution specified.
     """
     if(self.cov_diag is not None):
         scaling_matrix = self._get_scaling_matrix()
     atom_seed = AtomsList()
     for i in scaling_matrix:
         local_atoms = self.atoms.copy()
         local_atoms.set_cell(np.matmul(local_atoms.get_cell(), i))
         if (self.rattle != 0.0):
             local_atoms.rattle(stdev=self.rattle)
         #Also distort the positions of the atoms just like the lattice
         #vectors.
         local_atoms.positions = np.matmul(local_atoms.get_positions(), i)
         atom_seed.append(local_atoms)
     return atom_seed
Ejemplo n.º 2
0
    def rset(self):
        """Constructs the Hessian matrix for the *best* convergence parameters
        in this group and it's possible sub-sequences.

        Returns:
            list: list of :class:`~matdb.atoms.Atoms`; each atoms object will have a `H`
            matrix in its info dictionary.
        """
        if len(self.sequence) == 0:
            #We are at the bottom of the stack; attach the hessian matrix
            #to the atoms object it corresponds to.
            self.atoms.info["H"] = self.H
            result = AtomsList()
            result.append(self.atoms)
            return result
        else:
            #Check where we are in the stack. If we are just below the database,
            #then we want to return a list of hessian matrices and atoms
            #objects. If we are not, then we must a parameter grid of sequences
            #to select from.
            if isinstance(self.parent, Hessian):
                #We have many dynamical matrices to choose from. We need to decide
                #what "best" means and then return that one.
                bestkey = self._best_bands()
                return self.sequence[bestkey].rset
            else:
                result = AtomsList()
                for p in self.sequence.values():
                    result.extend(p.rset)
                return result
Ejemplo n.º 3
0
def run(args):
    """Runs the matdb setup and cleanup to produce database files.
    """
    print("matdb  Copyright (C) 2019  HALL LABS")
    print("This program comes with ABSOLUTELY NO WARRANTY.")
    print(
        "This is free software, and you are welcome to redistribute it under "
        "certain conditions.")
    if args is None:
        return

    #No matter what other options the user has chosen, we will have to create a
    #database controller for the specification they have given us.

    cdb = Controller(args["dbspec"])

    matches = []
    configs = AtomsList()
    for pattern in args["p"]:
        for entry in cdb.find(pattern):
            for iatoms in entry.iconfigs:
                configs.append(iatoms)

    if args["format"] == "xyz":
        from matdb.conversion import to_xyz
        target = path.abspath(path.expanduser(args["o"]))
        to_xyz(configs, target, args["overwrite"])
Ejemplo n.º 4
0
def cfg_to_atomslist(cfgfile, config_type=None, species=None):
    """Converts the CFG format file to an internal AtomsList object.

    Args:
        cfgfile (str): path to the file to convert.
        config_type (str): name of the config_type to assign to each
          configuration.
        species (list): list of element names corresponding to the integer
          species in the CFG dictionary.

    Returns:
        matdb.atoms.AtomsList : An AtomsList object containing the all the cells in the CFG file.

    """

    from matdb.atoms import AtomsList

    configs = []
    cfgd = None
    with open(cfgfile) as f:
        for line in f:
            if line.strip() == '':
                continue

            if "BEGIN_CFG" in line:
                cfgd = {"features": {}}
            elif isinstance(cfgd, dict) and "END_CFG" not in line:
                if _rxcfg.match(line.strip()):
                    if ':' in line:
                        raw = line.strip().split()
                        label = raw[0].rstrip(':')
                        cols = raw[1:]
                        cfgd[label] = {"cols": cols, "vals": []}
                    else:
                        label = line.strip()
                        cfgd[label] = {"vals": []}

                    if "Feature" in label:
                        fvals = label.split()
                        feature = fvals[1]
                        values = fvals[2:]
                        cfgd["features"][feature] = values
                        del cfgd[label]
                else:
                    parsed = list(map(eval, line.strip().split()))
                    cfgd[label]["vals"].append(parsed)
            elif "END_CFG" in line:
                if cfgd is not None:
                    configs.append(cfgd)
                cfgd = None

    result = AtomsList()
    for cfg in configs:
        atoms = _cfgd_to_atoms(cfg, species)
        result.append(atoms)

    return result
Ejemplo n.º 5
0
    def rset(self):
        """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the
        latest result set.
        """

        from matdb.atoms import Atoms, AtomsList
        result = AtomsList()
        for apath in self.fitting_configs:
            result.append(Atoms(apath))
        return result
Ejemplo n.º 6
0
    def rset(self):
        """Returns a :class:`~matdb.atoms.AtomsList`, one for each config in the
        latest result set.
        """

        #Return the configurations from this group; it is at the
        #bottom of the stack
        result = AtomsList()
        for epath in self.fitting_configs:
            result.append(Atoms(epath))
        return result
Ejemplo n.º 7
0
    def _get_substitution(self):
        '''
        '''
        np.random.seed(self.ran_seed)  # Set the seed for reproducibility.
        combs = self._set_stoichiometry()
        seed_atoms = AtomsList()

        for i in combs:
            local_atoms = self.atoms.copy()
            local_atoms.set_chemical_symbols(i)
            seed_atoms.append(local_atoms)
        return seed_atoms
Ejemplo n.º 8
0
    def fitting_configs(self):
        """Returns a :class:`matdb.atoms.AtomsList` for all configs in this
        group.
        """
        configs = AtomsList()
        if len(self.sequence) == 0:
            for config in self.config_atoms.values():
                configs.append(config)
        else:
            for seq in self.sequence.values():
                configs.extend(seq.fitting_configs)

        return configs
Ejemplo n.º 9
0
    def _get_vacancies(self):
        '''Vacancies.py: Group to create atomic vacancies from a seed configuration.

        Args:
            atom_seed (list, str, matdb.atoms.Atoms): The location of the
                 files that will be read into to make the atoms object or an
                 atoms object.
            ran_seed (hashable):(=1 default) seed for the random number
                 generator for index of vacancies selection.
            nconfigs (int): number of cells with vacancies to create.
            vac_per_atom (int < 1): The number of vacancies to include per
                 atom in the cell. (i.e. 0.1 would be 1 in every 10 atoms.)
            min_index (int):(default=0) Default choice with the same ran_seed
                 would produce the same vacancies in each cell.

        .. note:: Additional attributes are also exposed by the super class
              :class:`~matdb.database.Group`.

        Attributes:
            name (str): name of this database type relative to the over
                 database collection. This is also the name of the folder
                 in which all of its calculations will be performed.
            num_atom(int): The number of atoms present in each atoms object.
            num_vac(int): The number of vacancies per cell.
            seed_state(tuple, len=4): values 1,3-4 are set by ran_seed after
                 the first call to np.random and do not change, value 2 gives
                 the ith value of a call to random
            select_atoms(list): list of lists with indices of atoms to be
                 removed
            unique_perm(int): number of possible combinations
        Returns:
            vacancies(AtomsList): an list of atoms objects of length nconfigs
                 with unique vacancies for each cell.
        '''
        select_atoms = []  # list of lists with indices of atoms to be removed
        num_atoms = int(len(self.atoms.get_positions()))  # number of atoms
        num_vac = int(num_atoms * self.vac_per_atom)

        np.random.seed(self.ran_seed)  # Set the random seed for reproduction
        if (choose(num_atoms, num_vac) > 1000):
            select_atoms = self._get_random_choice(select_atoms, num_atoms,
                                                   num_vac)
        else:
            select_atoms = self._get_combinations(select_atoms, num_atoms,
                                                  num_vac)
        atom_seed = AtomsList()
        for i in select_atoms:
            local_atoms = self.atoms.copy()
            del local_atoms[i]
            atom_seed.append(local_atoms)
        return atom_seed, select_atoms
Ejemplo n.º 10
0
 def rset(self):
     """Returns a :class:`matdb.atoms.AtomsList`, one for each config in the
     latest result set.
     """
     if len(self.sequence) == 0:
         # Return the configurations from this group; it is at the
         # bottom of the stack
         result = AtomsList()
         for epath in self.atoms_paths():
             result.append(Atoms(path.join(epath, 'pre_comp_atoms.h5')))
         return result
     else:
         result = []
         for e in self.sequence.values():
             result.extend(e.rset())
         return result
Ejemplo n.º 11
0
    def _filter_dbs(self, seqname, dbfiles):
        """Filters each of the database files specified so that they conform to
        any specified filters.

        Args:
            seqname (str): name of the sequence that the database files are
              from.
            dbfiles (list): list of `str` paths to database files to filter.

        Returns:
            list: list of `str` paths to include in the database from this sequence.
        """
        if len(self.dbfilter) > 0 and seqname in self._dbfilters:
            filtered = []
            #The filters values have a function and a list of the actual values
            #used in the formula replacement. Extract the parameters; we can't
            #serialize the actual functions.
            filters = self._dbfilters[seqname].items()
            params = {k: v[1] for k, v in filters}

            for dbfile in dbfiles:
                dbname = path.basename(path.dirname(dbfile))
                filtdb = path.join(self.root, "__{}.h5".format(dbname))
                if path.isfile(filtdb):
                    continue

                al = AtomsList(dbfile)
                nl = AtomsList()
                for a in al:
                    #The 0 index here gets the function out; see comment above
                    #about the filters dictionary.
                    if not any(opf[0](getattr(a, attr))
                               for attr, opf in filters):
                        nl.append(a)

                if len(nl) != len(al):
                    nl.write(filtdb)
                    dN, N = (len(al) - len(nl), len(nl))
                    dbcat([dbfile], filtdb, filters=params, dN=dN, N=N)
                    filtered.append(filtdb)
                else:
                    filtered.append(nfile)
        else:
            filtered = dbfiles

        return filtered
Ejemplo n.º 12
0
    def rset(self):
        """Returns the reusable set to the next database group.

        Returns:
            list: list of :class:`~matdb.atoms.Atoms`
        """
        if len(self.sequence) == 0:
            #We are at the bottom of the stack;
            result = AtomsList()
            for config in self.fitting_configs:
                result.append(Atoms(path.join(config, "atoms.h5")))
            return result
        else:
            #Check where we are in the stack. If we are just below the database,
            #then we want to return the atoms objects for all database entries.
            #If we are not, then we must a parameter grid of sequences
            #to select from.
            result = []
            for g in self.sequence.values():
                result.extend(g.rset)
            return AtomsList(result)
Ejemplo n.º 13
0
    def _hessian_configs(self):
        """Returns a :class:`~matdb.atoms.AtomsList` for all configs in this
        group. This list includes a single *duplicated* configuration for each
        of the eigenvalue/eigenvector combinations of the Hessian matrix.

        .. note:: This assumes that the group has actual displacements and is
          not a parent group in the recursive structure.
        """
        configs = AtomsList()

        #Start with a configuration that has energy, force and virial
        #information from the VASP computation. We just grab the first of the
        #config_atoms from this sequence or its children.
        atBase = next(self.iconfigs)
        atcalc = atBase.get_calculator()

        #Make sure that energy, force and virial information was found.
        assert getattr(atBase, atcalc.energy_name) < 0
        assert getattr(atBase, atcalc.force_name).shape == (atBase.n, 3)
        assert getattr(atBase, atcalc.virial_name).shape == (3, 3)
        configs.append(atBase)

        #Now, make a copy of the base atoms object; this object only has the
        #lattice and positions. We will add the eigenvalue and eigenvectors
        #*individually* because they each need different scaling for sigma in
        #the GAP fit.
        atEmpty = atBase.copy()
        for k in list(atBase.params.keys()):
            if "energy" in k or "virial" in k:
                atEmpty.rm_param(k)
        for k in list(atBase.properties.keys()):
            if "force" in k:
                atEmpty.rm_property(k)

        hname = "{}_hessian1".format(self.calc.key)
        #NB: make sure you transpose the eigenvectors matrix before doing the
        #zip!
        evals, evecs = np.linalg.eigh(self.H)
        natoms = len(evals) / 3
        l0 = np.max(evals) - np.min(evals)
        sigma0 = 0.01
        lscaling = 0.005

        for l, v in zip(*(evals, evecs.T)):
            #The eigenvalues should all be positive. There may be some really
            #small ones that are essentially zero, but slightly negative.
            if np.abs(l) < 1e-5 or l < 0:
                continue

            #Add this eigenvector to its own configuration.
            atc = atEmpty.copy()
            Hi = np.reshape(v, (natoms, 3))
            atc.properties[hname] = Hi

            #Same thing for the eigenvalue.
            atc.add_param(hname, l)

            #We want the small eigenvalues to have a weighting of sigma0 and the
            #largest eigenvalue to have a sigma of 10% of its value.
            c = (lscaling * l0 - sigma0) / l0**2
            #atc.add_param("{}_hessian_csigma".format(self.calc.key), sigma0 + c*l**2)
            atc.add_param("{}_hessian_csigma".format(self.calc.key), sigma0)
            atc.add_param("n_{}_hessian".format(self.calc.key), 1)
            configs.append(atc)

        return configs