예제 #1
0
    def _select_template(self):
        """Creates the select command template.
        """
        # mlp select-add pot.mtp train.cfg new.cfg diff.cfg:
        # actively selects configurations from new.cfg and save those
        # that need to be added to train.cfg to diff.cfg
        #   Options:
        #   --init-threshold=<num>: set the initial threshold to num, default=1e-5
        #   --select-threshold=<num>: set the select threshold to num, default=1.1
        #   --swap-threshold=<num>: set the swap threshold to num, default=1.0000001
        #   --energy-weight=<num>: set the weight for energy equation, default=1
        #   --force-weight=<num>: set the weight for force equations, default=0
        #   --stress-weight=<num>: set the weight for stress equations, default=0
        #   --nbh-weight=<num>: set the weight for site energy equations, default=0
        #   --mvs-filename=<filename>: name of mvs file
        #   --selected-filename=<filename>: file with selected configurations
        #   --selection-limit=<num>: swap limit for multiple selection, default=0 (disabled)
        #   --weighting=<string>: way of weighting the functional for better fitting of
        # properties. Default=vibrations. Others=molecules, structures.

        template = "mlp select-add pot.mtp train.cfg candidate.cfg new_training.cfg"

        for k, v in self.select_args.items():
            if k in ["mvs-filename", "selected-filename"]:
                msg.warn("Changing the {0} file name is not enabled.")
                continue
            template = template + " --{0}={1}".format(k, v)

        return template + " > training_select.txt"
예제 #2
0
파일: md.py 프로젝트: HallLabs/tracy_matdb
    def __init__(self, atoms=None, root=None, parent=None, incar={},
                 kpoints={}, execution={}, nsteps=None,
                 samplerate=100, strains=None, tstart=None, tend=None,
                 supercell=None, name="md"):
        self.name = name
        msg.warn("The DM group is only configured for VASP at this time.")
        super(DynamicsGroup, self).__init__(atoms, incar, kpoints, execution,
                                               path.join(root, self.name),
                                               parent, "D", nconfigs=None)
        self.samplerate = samplerate
        self.nsteps = nsteps
        self.strains = [0] if strains is None else strains
        self.tstart = tstart
        self.tend = tend
        self.supercell = supercell

        if supercell is None:
            self.seed = self.atoms.copy()
        else:
            msg.warn("Not Implemnted: At this time specifying a supercell is not "
                    "yet implemented in `matdb` but will be available in "
                    "latter versions. Using seed configuration instead.")
            self.seed = self.atoms.copy()
            
            
        self._update_incar()
        self._update_kpoints()
예제 #3
0
    def _relax_template(self):
        """Creates the template for the relax command.
        """
        # mlp relax settings-file [options]:
        # settings file should contain settings for relaxation and for mlip regime.
        #  Options can be given in any order. Options include:
        # --pressure=<num>: external pressure (in GPa)
        # --iteration_limit=<num>: maximum number of iterations
        # --min-dist=<num>: terminate relaxation if atoms come closer than <num>
        # --force-tolerance=<num>: relaxes until forces are less than <num>(eV/Angstr.)
        #       Zero <num> disables atom relaxation (fixes atom fractional coordinates)
        # --stress-tolerance=<num>: relaxes until stresses are smaller than <num>(GPa)
        #       Zero <num> disables lattice relaxation
        # --max-step=<num>: Maximal allowed displacement of atoms and lattice vectors
        #       (in Angstroms)
        # --min-step=<num>: Minimal displacement of atoms and lattice vectors (Angstr.)
        #       If all actual displacements are smaller then the relaxation stops.
        # --bfgs-wolfe_c1
        # --bfgs-wolfe_c2
        # --cfg-filename=<str>: Read initial configurations from <str>
        # --save-relaxed=<str>: Save the relaxed configurations to <str>
        # --save-unrelaxed=<str>: If relaxation failed, save the configuration to <str>
        # --log=<str>: Write relaxation log to <str>

        if self.use_mpi:
            if self.run_as_root:
                template = (
                    "mpirun --allow-run-as-root -n {0} mlp relax relax.ini "
                    "--cfg-filename=to-relax.cfg "
                    "--save-relaxed={1} --log=relax_{2} "
                    "--save-unrelaxed={3}".format(self.ncores, "relaxed.cfg",
                                                  "log.txt", "unrelaxed.cfg"))
            else:
                template = ("mpirun -n {0} mlp relax relax.ini "
                            "--cfg-filename=to-relax.cfg "
                            "--save-relaxed={1} --log=relax_{2} "
                            "--save-unrelaxed={3}".format(
                                self.ncores, "relaxed.cfg", "log.txt",
                                "unrelaxed.cfg"))

        else:
            template = ("mlp relax relax.ini "
                        "--cfg-filename=to-relax.cfg "
                        "--save-relaxed={1} --log=relax_{2} "
                        "--save-unrelaxed={3}".format(self.ncores,
                                                      "relaxed.cfg", "log.txt",
                                                      "unrelaxed.cfg"))

        for k, v in self.relax_args.items():
            if k in ["log", "save-unrelaxed", "save-relaxed", "cfg-filename"]:
                msg.warn(
                    "Changing the {0} file name is not supported.".format(k))
                continue
            if k in ["bfgs-wolfe_c1", "bfgs-wolfe_c2"]:
                template = template + " --{0}".format(k)
            else:
                template = template + " --{0}={1}".format(k, v)

        return template + " > training_relax.txt"
예제 #4
0
    def __init__(self, name, repeater, root, controller, steps, **kwargs):
        self.name = name
        self.fqn = "{}.{}".format(repeater.name, name)
        self.root = path.join(root, name)
        self.repeater = repeater
        self.controller = controller

        if not path.isdir(self.root):
            from os import mkdir
            mkdir(self.root)

        from importlib import import_module
        self._settings = steps
        """dict: with keys and values describing the kinds of training steps to setup.
        """

        from collections import OrderedDict
        self.steps = OrderedDict()
        for tspec in steps:
            if isinstance(tspec, six.string_types):
                #This is a reference to an existing database instance that was
                #defined previously.
                instance = self.controller[tspec]
                self.steps[instance.name] = instance
                continue

            modname, clsname = tspec["type"].split('.')
            fqdn = "matdb.fitting.{}".format(modname)
            module = import_module(fqdn)
            if not hasattr(module, clsname):  # pragma: no cover
                #We haven't implemented this database type yet, just skip the
                #initialization for now.
                msg.warn("Cannot find trainer of type {}.".format(
                    tspec["type"]))
                continue

            cls = getattr(module, clsname)

            #Make a copy of the original dictionary so that we don't mess up the
            #pointers; then add in the keyword arguments that are missing.
            cpspec = tspec.copy()
            del cpspec["type"]
            cpspec["root"] = self.root
            cpspec["parent"] = self
            cpspec["controller"] = self.controller

            #Add in the default values passed in from the parent instances, but
            #only update them if they weren't specified.
            for k, v in kwargs.items():
                if k not in cpspec:
                    cpspec[k] = v

            instance = cls(**cpspec)
            self.steps[instance.name] = instance
예제 #5
0
def symlink(target, source):
    """Creates a symbolic link from `source` to `target`.
    """
    # from os import symlink# path, remove
    # from matdb import msg
    if path.isfile(target) or path.islink(target):
        remove(target)
    elif path.isdir(target):
        msg.warn(
            "Cannot auto-delete directory '{}' for symlinking.".format(target))
        return

    os_symlink(source, target)
예제 #6
0
def _calc_quick(atoms, supercell=(1, 1, 1), delta=0.01):
    """Calculates the Hessian for a given atoms object just like :func:`calc`,
    *but*, it uses symmetry to speed up the calculation. Depending on the
    calculator being used, it is possible that the symmetrized result may be
    different from the full result with all displacements, done manually by
    :func:`calc`.

    Args:
        atoms (matdb.atoms.Atoms): atomic structure of the *primitive*.
        supercell (list): or `tuple` or :class:`numpy.ndarray` specifying the
          integer supercell matrix.
        delta (float): displacement in Angstroms of each atom when computing the
          phonons. 

    Returns:
        numpy.ndarray: Hessian matrix that has dimension `(natoms*3, natoms*3)`,
        where `natoms` is the number of atoms in the *supercell*.
    """
    #We need to make sure we are at the zero of the potential before
    ratoms = atoms.copy()
    try:
        with open("phonons.log", 'w') as f:
            with redirect_stdout(f):
                print(ratoms.get_forces())
                minim = FIRE(ratoms)
                minim.run(fmax=1e-4, steps=100)
    except:
        #The potential is unstable probably. Issue a warning.
        msg.warn(
            "Couldn't optimize the atoms object. Potential may be unstable.")

    primitive = matdb_to_phonopy(ratoms)
    phonon = Phonopy(primitive, conform_supercell(supercell))
    phonon.generate_displacements(distance=delta)
    supercells = phonon.get_supercells_with_displacements()
    pot = atoms.get_calculator()
    assert pot is not None

    forces = []
    for scell in supercells:
        matoms = phonopy_to_matdb(scell)
        #Call a manual reset of the calculator so that we explicitly recalculate
        #the forces for the current atoms object.
        pot.reset()
        matoms.set_calculator(pot)
        forces.append(matoms.get_forces())

    phonon.set_forces(forces)
    phonon.produce_force_constants()
    return unroll_fc(phonon._force_constants)
예제 #7
0
    def add(self, key, value):
        """Adds key to the set if it is not already in the set.

        Args:
            key (tuple): Anything that could be added to the set.
            value (tuple): The actual values that the suffixes correspond to.
        """
        if key not in self.map:
            end = self.end
            curr = end[1]
            curr[2] = end[1] = self.map[key] = [key, curr, end]
            self.values[key] = value
        else:
            msg.warn(
                "The key {} already exists in the set, ignoring addition.".
                format(key))
예제 #8
0
    def _train_template(self):
        """Creates the train command template.
        """
        # mlp train potential.mtp train_set.cfg [options]:
        #   trains potential.mtp on the training set from train_set.cfg
        #   Options include:
        #     --energy-weight=<double>: weight of energies in the fitting. Default=1
        #     --force-weight=<double>: weight of forces in the fitting. Default=0.01
        #     --stress-weight=<double>: weight of stresses in the fitting. Default=0.001
        #     --scale-by-force=<double>: Default=0. If >0 then configurations near equilibrium
        #                                (with roughtly force < <double>) get more weight.
        #     --valid-cfgs=<string>: filename with configuration to validate
        #     --max-iter=<int>: maximal number of iterations. Default=1000
        #     --curr-pot-name=<string>: filename for potential on current iteration.
        #     --trained-pot-name=<string>: filename for trained potential. Default=Trained.mtp_
        #     --bfgs-conv-tol=<double>: stopping criterion for optimization. Default=1e-8
        #     --weighting=<string>: how to weight configuration wtih different sizes
        #         relative to each other. Default=vibrations. Other=molecules, structures.
        #     --init-params=<string>: how to initialize parameters if a potential was not
        #         pre-fitted. Default is random. Other is same - this is when interaction
        #         of all species is the same (more accurate fit, but longer optimization)
        #     --skip-preinit: skip the 75 iterations done when params are not given

        if self.use_mpi:
            if self.run_as_root:
                template = (
                    "mpirun --allow-run-as-root -n {} mlp train pot.mtp "
                    "train.cfg".format(self.ncores))
            else:
                template = ("mpirun -n {} mlp train pot.mtp "
                            "train.cfg".format(self.ncores))
        else:
            template = "mlp train pot.mtp train.cfg"

        for k, v in self.train_args.items():
            if k == "curr-pot-name" or k == "trained-pot-name":
                msg.warn("Renaming of the potential file is not enabled.")
                continue
            if k == "valid-cfgs":
                msg.warn("Validating configurations is not enabled.")
                continue

            template = template + " --{0}={1}".format(k, v)

        return template + " > training.txt"
예제 #9
0
    def _best_bands(self):
        """Returns the name of the band collection that has the smallest *converged*
        phonon bands. This is accomplished by assuming that the largest supercell is
        the "correct" answer, and comparing the total DOS. If the comparitive error
        is within `tolerance`, then it is acceptable. The smallest acceptable
        supercell's key is returned.
        Returns:
            str: the key in the group's sequence that has the smallest acceptable
            supercell size.
        """
        #Find the cell size and DOS for each calculation in the sequence.
        sizes = {
            k: np.linalg.det(np.reshape(np.array(d.supercell), (3, 3)))
            for k, d in self.sequence.items()
        }
        dos = {k: np.loadtxt(d.dos_file) for k, d in self.sequence.items()}

        #Find the calculation with the largest cell size and grab its DOS.
        maxkey, maxval = max(sizes.items(), key=itemgetter(1))
        maxdos = dos[maxkey]

        ok = {}
        for k, d in self.sequence.items():
            if k == maxkey:
                continue
            assert dos[k].shape == maxdos.shape
            diff = np.sum(np.abs(dos[k][:, 1] - maxdos[:, 1]))
            if diff < self.tolerance:
                ok[k] = sizes[k]

        #Now, choose the supercell with the smallest cell size, if one
        #exists. Otherwise warn the user that either the tolerance was too low, or
        #that the calculation may not be converged.
        if len(ok) > 0:
            minkey, minval = min(ok.items(), key=itemgetter(1))
        else:
            msg.warn(
                "Hessian calculation may not be converged. Your tolerance "
                "may be too high. Returning the largest supercell by default.")
            minkey = maxkey

        return minkey
예제 #10
0
    def cleanup(self):
        """Extracts the calibration information from the configurations to
        determine the maiximum allowable amplitude to maintain linear force
        regime.
        Returns:
           bool: True if the amplitude calibration is ready.
        """
        if not super(Calibration, self).cleanup():
            msg.warn("cannot cleanup calibration; not all configs ready.")
            return False

        success = self.xyz()
        if not success:
            msg.warn("could not extract the calibration XYZ configurations.")
            return False
        else:
            imsg = "Extracted calibration configs from {0:d} folders."
            msg.okay(imsg.format(len(self.configs)))

        #Read in the XYZ file and extract the forces on each atom in each
        #configuration.
        from matdb.atoms import AtomsList
        forces = {}
        failed = 0
        for cid, folder in self.configs.items():
            #Find the mean, *absolute* force in each of the directions. There
            #will only be one atom in the atoms list. If the calculation didn't
            #finish, then we exclude it. This happens for some of the
            #calibration runs if the atoms are too close together.
            try:
                al = AtomsList(path.join(folder, "output.xyz"))
                forces[cid] = np.mean(np.abs(np.array(al[0].dft_force)),
                                      axis=1)
            except:
                failed += 1
                pass

        if failed > 0:
            msg.warn(
                "couldn't extract forces for {0:d} configs.".format(failed))

        if len(forces) > 0:
            fmt = "{0:.7f}  {1:.7f}  {2:.7f}  {3:.7f}\n"
            with open(self.outfile, 'w') as f:
                for cid in forces:
                    A, F = self.amplitudes[cid], forces[cid]
                    f.write(fmt.format(A, *F))
        else:
            msg.warn("no forces available to write {}.".format(self.outfile))

        return len(forces) > 3
예제 #11
0
 def ready(self):
     """Determines if this database is finished calculating by testing the
     existence of the xyz database file in the root folder.
     """
     target = path.join(self.root, "output.xyz")
     result = False
     if path.isfile(target):
         from matdb.utility import linecount
         #We add +2 for the parameter line and the number of atoms.
         #This doesn't work as advertised (it's off by a factor). debug after refactor.
         lpconfig = self.base.atoms.n * np.linalg.det(
             np.array(self.base.supercell).reshape(3, 3)) + 2
         nlines = linecount(target)
         nconfigs = nlines / lpconfig
         result = nconfigs == len(self.configs)
         if not result:
             wmsg = ("Number of structures in `output.xyz` does not match "
                     "number of requested configs. Found {0} configs in"
                     " {1} lines.")
             msg.warn(wmsg.format(nconfigs, nlines))
     return result
예제 #12
0
    def _calc_grade_template(self):
        """Creates the template for the calc-grade command.
        """
        # mlp calc-grade pot.mtp train.cfg in.cfg out.cfg:
        # actively selects from train.cfg, generates state.mvs file from train.cfg, and
        # calculates maxvol grades of configurations located in in.cfg
        # and writes them to out.cfg
        #   Options:
        #   --init-threshold=<num>: set the initial threshold to 1+num, default=1e-5
        #   --select-threshold=<num>: set the select threshold to num, default=1.1
        #   --swap-threshold=<num>: set the swap threshold to num, default=1.0000001
        #   --energy-weight=<num>: set the weight for energy equation, default=1
        #   --force-weight=<num>: set the weight for force equations, default=0
        #   --stress-weight=<num>: set the weight for stress equations, default=0
        #   --nbh-weight=<num>: set the weight for site energy equations, default=0
        #   --mvs-filename =<filename>: name of mvs file
        template = "mlp calc-grade pot.mtp train.cfg train.cfg temp1.cfg"
        for k, v in self.grade_args.items():
            if k == "mvs-filename":
                msg.warn("Renaming the mvs state file is not enabled.")
                continue
            template = template + " --{0}={1}".format(k, v)

        return template + " > training_calc_grade.txt"
예제 #13
0
def calc(primitive,
         cachedir=None,
         supercell=(1, 1, 1),
         delta=0.01,
         quick=True):
    """Calculates the Hessian for a given atoms object (which *must* have an
    attached calculator).

    .. note:: We choose to use the Hessian as the fundamental quantity in
      vibrational analysis in `matdb`.

    .. note:: `atoms` will be relaxed before calculating the Hessian.

    Args:
        primitive (matdb.atoms.Atoms): atomic structure of the *primitive*.
        cachedir (str): path to the directory where phonon calculations are
          cached. If not specified, a temporary directory will be used.
        supercell (tuple): number of times to duplicate the cell when
          forming the supercell.
        delta (float): displacement in Angstroms of each atom when computing the
          phonons. 
        quick (bool): when True, use symmetry to speed up the Hessian
          calculation. See :func:`_calc_quick`.

    Returns:
        numpy.ndarray: Hessian matrix that has dimension `(natoms*3, natoms*3)`,
        where `natoms` is the number of atoms in the *supercell*.
    """
    if quick:
        return _calc_quick(primitive, supercell, delta)
    else:
        atoms = primitive.make_supercell(supercell)
        atoms.set_calculator(primitive.get_calculator())

    from ase.vibrations import Vibrations

    #The phonon calculator caches the displacements and force sets for each
    #atomic displacement using pickle. This generates three files for each
    #atomic degree of freedom (one for each cartesian direction). We want to
    #save these in a special directory.
    tempcache = False
    if cachedir is None:
        cachedir = mkdtemp()
        tempcache = True
    else:
        cachedir = path.abspath(path.expanduser(cachedir))
    if not path.isdir(cachedir):
        mkdir(cachedir)

    result = None
    precon = Exp(A=3)
    aphash = None

    #Calculate a hash of the calculator and atoms object that we are calculating
    #for. If the potential doesn't have a `to_dict` method, then we ignore the
    #hashing.
    if not tempcache and hasattr(atoms, "to_dict") and hasattr(
            atoms._calc, "to_dict"):
        atoms_pot = {"atoms": atoms.to_dict(), "pot": atoms._calc.to_dict()}
        #This UUID will probably be different, even if the positions and species
        #are identical.
        del atoms_pot["atoms"]["uuid"]
        hash_str = convert_dict_to_str(atoms_pot)
        aphash = str(sha1(hash_str).hexdigest())

    if not tempcache:
        #Check whether we should clobber the cache or not.
        extras = ["vibsummary.log", "vib.log", "phonons.log"]

        with chdir(cachedir):
            hash_match = False
            if path.isfile("atomspot.hash"):
                with open("atomspot.hash") as f:
                    xhash = f.read()
                hash_match = xhash == aphash

            hascache = False
            if not hash_match:
                for vibfile in glob("vib.*.pckl"):
                    remove(vibfile)
                    hascache = True

                for xfile in extras:
                    if path.isfile(xfile):
                        remove(xfile)
                        hascache = True

            if hascache:
                msg.warn(
                    "Using hard-coded cache directory. We were unable to "
                    "verify that the atoms-potential combination matches "
                    "the one for which existing cache files exist. So, we "
                    "clobbered the existing files to get the science "
                    "right. You can fix this by using `matdb.atoms.Atoms` "
                    "and `matdb.calculators.*Calculator` objects.")

    with chdir(cachedir):
        #Relax the cell before we calculate the Hessian; this gets the forces
        #close to zero before we make harmonic approximation.
        try:
            with open("phonons.log") as f:
                with redirect_stdout(f):
                    minim = PreconLBFGS(atoms, precon=precon, use_armijo=True)
                    minim.run(fmax=1e-5)
        except:
            #The potential is unstable probably. Issue a warning.
            msg.warn(
                "Couldn't optimize the atoms object. Potential may be unstable."
            )

        vib = Vibrations(atoms, delta=delta)
        with open("vib.log", 'a') as f:
            with redirect_stdout(f):
                vib.run()

        vib.summary(log="vibsummary.log")
        result = vib.H

        #Cache the hash of the atoms object and potential that we were using so
        #that we can check next time whether we should clobber the cache or not.
        if aphash is not None and not tempcache:
            with open(path.join(cachedir, "atomspot.hash"), 'w') as f:
                f.write(aphash)

    return result
예제 #14
0
    def __init__(self,
                 name="prototype",
                 structures=None,
                 ran_seed=None,
                 permutations=None,
                 **dbargs):
        self.name = name
        self.seeded = False
        dbargs["prefix"] = "P"
        dbargs["cls"] = Prototypes
        if "Prototypes" not in dbargs['root']:
            from os import mkdir
            new_root = path.join(dbargs['root'], "Prototypes")
            if not path.isdir(new_root):
                mkdir(new_root)
            dbargs['root'] = new_root
        super(Prototypes, self).__init__(**dbargs)

        self.in_structures = structures
        self.ran_seed = ran_seed
        self.permutations = permutations
        self.species = self.database.parent.species

        #Make sure that we override the global calculator default values with
        #those settings that we know are needed for good phonon calculations.
        calcargs = self.database.calculator.copy()
        if "calculator" in dbargs:
            if dbargs["calculator"] is not None and "name" in dbargs[
                    "calculator"]:
                calcargs.update(dbargs["calculator"])
                dbargs["calculator"] = calcargs

        # The prototypes are saved into the file prototypes.tar.gz, if
        # this is the first time prototypes has been run we need to unpack it.
        template_root = path.join(_get_reporoot(), "matdb", "templates")
        if not path.isdir(path.join(template_root, "uniqueUnaries")):
            import tarfile
            with chdir(template_root):
                tarf = "prototypes.tar.gz"
                tar = tarfile.open(tarf, "r:gz")
                tar.extractall()
                tar.close()

        # parse the structures to make a list of paths to the source folders for the
        if self.ran_seed is not None:
            import random
            random.seed(self.ran_seed)

        self.puuids = None
        self._load_puuids()
        self.nconfigs = 0

        self.structures = {}
        for k, v in structures.items():
            if k.lower() == "unary":
                cand_path = path.join(template_root, "uniqueUnaries")
            elif k.lower() == "binary":
                cand_path = path.join(template_root, "uniqueBinaries")
            elif k.lower() == "ternary":
                cand_path = path.join(template_root, "uniqueTernaries")
            else:  # pragma: no cover
                msg.warn(
                    "Must specify the system size, i.e., unary, binary, or "
                    "ternary. {} not recognized".format(k))
                continue
            if isinstance(v, list):
                self.structures[k.lower()] = []
                for prot in v:
                    files = glob("{0}/*{1}*".format(cand_path, prot))
                    if len(files) < 1:  # pragma: no cover
                        msg.warn(
                            "No prototypes of size {0} matched the string "
                            "{1}".format(k, prot))
                    else:
                        self.structures[k.lower()].extend(files)
            elif isinstance(v, str) and v == "all":
                files = glob("{0}/*".format(cand_path))
                self.structures[k.lower()] = files
            elif isinstance(v, int):
                from random import shuffle
                files = glob("{0}/*".format(cand_path))
                shuffle(files)
                keep = files[:v]
                self.structures[k.lower()] = keep
            else:  #pragma: no cover
                msg.err(
                    "Couldn't parse {0} structures for {1} case. Must be either "
                    "a list of file names, 'all', or an int.".format(v, k))

            if self.permutations is not None and k.lower(
            ) in self.permutations.keys():
                self.nconfigs += len(self.structures[k.lower()]) * len(
                    self.permutations[k.lower()])
            else:
                if k.lower() == "unary":
                    self.nconfigs += len(self.structures[k.lower()]) * 3
                elif k.lower() == "binary" or k.lower() == "ternary":
                    self.nconfigs += len(self.structures[k.lower()]) * 6
                else:  #pragma: no cover
                    continue
예제 #15
0
    def _create_dbfull(self, folder, pattern, energy, force, virial, config_type):
        """Creates the full combined database.
        """
        from matdb.utility import chdir, dbcat
        from glob import glob
        from tqdm import tqdm
        from os import path

        #NB! There is a subtle bug here: if you try and open a matdb.atoms.Atoms
        #within the context manager of `chdir`, something messes up with the
        #memory sharing in fortran and it dies. This has to be separate.
        with chdir(folder):
            self.dbfiles = glob(pattern)
        rewrites = []

        for dbfile in self.dbfiles:
            #Look at the first configuration in the atoms list to
            #determine if it matches the energy, force, virial and
            #config type parameter names.
            dbpath = path.join(folder, dbfile)
            params, doforce = _atoms_conform(dbpath, energy, force, virial)
            if len(params) > 0 or doforce:
                msg.std("Conforming database file {}.".format(dbpath))
                al = AtomsList(dbpath)
                outpath = path.join(self.root, dbfile.replace(".xyz",".h5"))
                for ai in tqdm(al):
                    for target, source in params.items():
                        if (target == "config_type" and
                            config_type is not None):
                            ai.params[target] = config_type
                        else:
                            ai.add_param(target,ai.params[source])
                            del ai.params[source]
                            if source in ai.info: #pragma: no cover
                                                  #(if things were
                                                  #dane correctly by
                                                  #the atoms object
                                                  #this should never
                                                  #be used. It exists
                                                  #mainly as a
                                                  #safegaurd.
                                msg.warn("The atoms object didn't properly "
                                         "update the parameters of the legacy "
                                         "atoms object.")
                                del ai.info[source]

                    if doforce:
                        ai.add_property("ref_force",ai.properties[force])
                        del ai.properties[force]

                al.write(outpath)

                #Mark this db as non-conforming so that we created a new
                #version of it.
                rewrites.append(dbfile)

                dbcat([dbpath], outpath, docat=False, renames=params,
                      doforce=doforce)

        # We want a single file to hold all of the data for all the atoms in the database.
        all_atoms = AtomsList()
        for dbfile in self.dbfiles:
            if dbfile in rewrites:
                infile = dbfile.replace(".xyz",".h5")
                all_atoms.extend(AtomsList(path.join(self.root, infile)))
            else:
                dbpath = path.join(folder, dbfile)
                all_atoms.extend(AtomsList(dbpath))

        all_atoms.write(self._dbfull)

        #Finally, create the config file.
        from matdb.utility import dbcat
        with chdir(folder):
            dbcat(self.dbfiles, self._dbfull, config_type=self.config_type, docat=False)