Beispiel #1
0
    def configs(self, kind, asatoms=True):
        """Loads a list of configurations of the specified kind.

        Args:
            kind (str): possible values are ['train', 'holdout', 'super'].
            asatoms (bool): when True, return a :class:`~matdb.atoms.AtomsList`
              object; otherwise just compile the file.

        Returns:
            matdb.atoms.AtomsList: Atoms list for the specified configuration class.
        """
        fmap = {
            "train": lambda seq, splt: seq.train_file(splt),
            "holdout": lambda seq, splt: seq.holdout_file(splt),
            "super": lambda seq, splt: seq.super_file(splt)
        }
        smap = {
            t: getattr(self, "_{}file".format(t))
            for t in ["train", "holdout", "super"]
        }
        cfile = smap[kind]

        if not path.isfile(cfile):
            cfiles = []
            for seq in self.dbs:
                #We need to split to get training data. If the split has already
                #been done as part of a different training run, then it won't be
                #done a second time.
                msg.info("Compiling database {} for {}.".format(
                    seq.name, self.fqn))
                seq.split()
                if seq.name in self.cust_splits:
                    splt = self.cust_splits[seq.name]
                else:
                    splt = self.split

                #We grab a list of all the files that match the particular split
                #pattern. Then we apply any filters to individual atoms objects
                #within each of the databases.
                if splt == '*':
                    nfiles = []
                    for dbsplit in seq.splits:
                        nfiles.extend([f(seq, dbsplit) for f in fmap.values()])
                else:
                    nfiles = [fmap[kind](seq, splt)]

                filtered = self._filter_dbs(seq.name, nfiles)
                cfiles.extend(filtered)

            #If this is the training file, we need to append any extras; these
            #are files that have additional trainer-specific configs to include.
            if kind == "train":
                cfiles.extend(self.extras())

            #First, save the configurations to a single file.
            dbcat(cfiles, cfile)

        if asatoms:
            return AtomsList(cfile)
Beispiel #2
0
    def _make_train_cfg(self, iteration):
        """Creates the 'train.cfg' file needed to train the potential from the
        databeses used.
        Args:
            iteration (int): the number of iterations of MTP has been 
                through.
        """
        from matdb.database.legacy import LegacyDatabase
        if iteration == 1:
            for db in self.dbs:
                if not isinstance(db, LegacyDatabase):
                    for step in db.steps.values():
                        pbar = tqdm(total=len(step.rset))
                        for atm in step.rset:
                            self._create_train_cfg(
                                atm, path.join(self.root, "train.cfg"))
                            pbar.update(1)
                else:  # pragma: no cover (Don't use LegacyDatabase for M1)
                    pbar = tqdm(total=len(db.rset))
                    for atm in db.rset:
                        self._create_train_cfg(
                            atm, path.join(self.root, "train.cfg"))
                        pbar.update(1)

        else:
            if self.active.last_iteration is None or len(
                    self.active.last_iteration) < 1:
                if path.isfile(self.active.iter_file):
                    self.active._load_last_iter()
                else:
                    raise IOError("File {0} containing most recently added "
                                  "structures is missing.".format(
                                      self.active.iter_file))

            msg.info("Extracting from {0} folders".format(
                len(self.active.last_iteration)))
            self.active.extract()
            pbar = tqdm(total=len(self.active.last_iteration))
            ccb = 1
            if self.active.last_config_atoms is not None:
                for atm in self.active.last_config_atoms.values():
                    if not atm.calc.can_extract(atm.calc.folder):
                        msg.std(
                            "Folder {} can not be extracted.".format(
                                atm.calc.folder), 2)
                        continue
                    self._create_train_cfg(atm,
                                           path.join(self.root, "train.cfg"))
                    ccb += 1
                    pbar.update(1)
Beispiel #3
0
    def status(self, printed=True):
        """Returns or prints the current status of the MTP training.
        Args:
            printed (bool): when True, print the status to screen; otherwise,
              return a dictionary with the relevant quantities.
        """

        # Our interest is in knowing which MTP model is the latest (if any) and
        # whether the job script has been created for the next one in the
        # sequence.
        last_iter = self.active.last_iteration
        result = {
            "trained": self.ready(),
            "file": self.mtp_file,
            "jobfile": path.isfile(self._jobfile),
            "mtp step": self.iter_status,
            "last addition": len(last_iter) if last_iter is not None else 0
        }

        if printed:
            fqn = "{0}.{1}".format(self.parent.name, self.name)
            msg.info("{0} => Model ready: {1}".format(fqn, result["trained"]))
            x = "exists" if result["jobfile"] else "does not exist"
            msg.info("{0} => Next jobfile '{1}' {2}".format(
                fqn, self._jobfile, x))
            msg.info("{0} => Current MTP step {1} iteration {2}.".format(
                fqn, self.iter_status, self.iter_count))
            msg.info("{0} => {1} configurations added "
                     "to the training set.".format(fqn,
                                                   result["last addition"]))
            msg.blank()
        else:
            return result
Beispiel #4
0
    def execute(self, dryrun=False):
        """Submits the job script for the currently configured potential training.

        Args:
            dryrun (bool): when True, simulate the submission without actually
              submitting.

        Returns:
            bool: True if the submission generated a job id (considered
            successful).
        """
        if self.ready():
            msg.info(
                "Trainer {} is already done;".format(self.root) +
                "skipping execute step.", 2)
            return

        if not path.isfile(self._jobfile):
            return False

        if not path.isfile(self._trainfile):
            msg.std("train.h5 missing in {}; can't execute.".format(self.root))
            return False

        # We must have what we need to execute. Compile the command and submit.

        shell_command = self.controller.db.shell_command
        # We suport 'bash' and 'sbatch' shell commands, if it's neighter one
        # of them, default to 'bash'
        if shell_command not in ['bash', 'sbatch']:
            shell_command = 'bash'
        cargs = [shell_command, self._jobfile]

        if dryrun:
            msg.okay("Executed {} in {}".format(' '.join(cargs), self.root))
            return True
        else:
            xres = execute(cargs, self.root)

        # supercompute will return "Submitted"
        if len(xres["output"]) > 0 and "Submitted" in xres["output"][0]:
            msg.okay("{}: {}".format(self.root, xres["output"][0].strip()))
            return True
        # local computer
        elif len(xres["error"]) == 0:
            return True
        else:
            return False
Beispiel #5
0
    def __init__(self,
                 atoms=None,
                 root=None,
                 parent=None,
                 phonons={},
                 nconfigs=100,
                 calibrate=True,
                 amplitude=None,
                 sampling="uniform",
                 name="modulations",
                 dynmat="dynmatrix",
                 calibrator="calib",
                 config_type="ph"):
        self.name = name
        super(Modulation, self).__init__(atoms, incar, kpoints, execution,
                                         path.join(root, self.name), parent,
                                         "M", nconfigs, config_type)
        self.sampling = sampling
        self.calibrate = calibrate

        #Setup a calibrator if automatic calibration was selected.
        if calibrate and amplitude is None:
            self.calibrator = Calibration(atoms,
                                          root,
                                          parent,
                                          kpoints,
                                          incar,
                                          phonons,
                                          execution,
                                          calibrate,
                                          name=calibrator,
                                          dynmat=dynmat)
            self.parent.steps[calibrator] = self.calibrator
            self.amplitude = self.calibrator.infer_amplitude()
            calibrated = "*calibrated* "
        else:
            self.calibrator = None
            self.amplitude = amplitude
            calibrated = ""

        imsg = "Using {} as modulation {}amplitude for {}."
        msg.info(imsg.format(self.amplitude, calibrated, self.parent.name), 2)

        self.base = self.parent.steps[dynmat]
        self.phonons = phonons
        update_phonons(self.phonons, self.base)
Beispiel #6
0
    def can_execute(self, folder):
        """Returns True if the specified folder is ready to execute QE
        in.
        """
        if not path.isdir(folder):
            return False

        sizeok = lambda x: stat(x).st_size > 25
        required = ["espresso.pwi"]

        present = {}
        for rfile in required:
            target = path.join(folder, rfile)
            present[rfile] = path.isfile(target) and sizeok(target)

        if not all(present.values()):
            for f, ok in present.items():
                if not ok:
                    msg.info(
                        "{} not present for Quantum Espresso execution.".
                        format(f), 2)
        return all(present.values())
Beispiel #7
0
def _generic_find(controller, heading, patterns):
    """Performs a generic find operation on the specified controller and formats
    the output in color.

    Args:
        controller: an instance of :class:`matdb.database.Controller` or
          :class:`matdb.fitting.Controller`. The specified controller's `find`
          method is used for the lookup.
        heading (str): title to print before the table of discovered values.
        patterns (list): list of `str` patterns to search for.
    """
    msg.info(heading)
    msg.info("--------------------------")
    msg.blank()
    for pattern in patterns:
        for entry in controller.find(pattern):
            if hasattr(entry, "uuid"):
                eid = entry.uuid
            elif hasattr(entry, "fqn"):
                eid = entry.fqn
            else:
                eid = entry.name
            text = "{} | {} ".format(eid, entry.root)
            msg.arb(text, [msg.cenum["cwarn"], msg.cenum["cstds"]], '|')
Beispiel #8
0
def run(args):
    """Runs the matdb setup and cleanup to produce database files.
    """
    print("matdb  Copyright (C) 2019  HALL LABS")
    print("This program comes with ABSOLUTELY NO WARRANTY.")
    print(
        "This is free software, and you are welcome to redistribute it under "
        "certain conditions.")
    if args is None:
        return

    import numpy as np
    from matdb import msg

    #No matter what other options the user has chosen, we will have to create a
    #database controller for the specification they have given us.
    from matdb.database import Controller
    cdb = Controller(args["dbspec"])
    if args["xyz"]:
        cdb.split(cdb.trainers.split, recalc=args["recalc"])
    if args["t"]:
        cdb.trainers.jobfiles()
    if args["x"]:
        cdb.trainers.execute(args["tfilter"], args["sfilter"], args["dryrun"])
    if args["v"]:
        vdict = cdb.trainers.validate(args["data"], args["tfilter"],
                                      args["sfilter"], args["energy"],
                                      args["force"], args["virial"])
        if "e_ref" in vdict:
            e_err = np.std(vdict["e_ref"] - vdict["e_pot"])
            msg.info("Energy RMS: {0:.4f}".format(e_err))
        if "f_ref" in vdict:
            f_err = np.std(vdict["f_ref"].flatten() - vdict["f_pot"].flatten())
            msg.info("Force RMS: {0:.4f}".format(f_err))
        if "v_ref" in vdict:
            v_err = np.std(vdict["v_ref"].flatten() - vdict["v_pot"].flatten())
            msg.info("Virial RMS: {0:.4f}".format(v_err))

    if args["status"]:
        cdb.trainers.status()