def configs(self, kind, asatoms=True): """Loads a list of configurations of the specified kind. Args: kind (str): possible values are ['train', 'holdout', 'super']. asatoms (bool): when True, return a :class:`~matdb.atoms.AtomsList` object; otherwise just compile the file. Returns: matdb.atoms.AtomsList: Atoms list for the specified configuration class. """ fmap = { "train": lambda seq, splt: seq.train_file(splt), "holdout": lambda seq, splt: seq.holdout_file(splt), "super": lambda seq, splt: seq.super_file(splt) } smap = { t: getattr(self, "_{}file".format(t)) for t in ["train", "holdout", "super"] } cfile = smap[kind] if not path.isfile(cfile): cfiles = [] for seq in self.dbs: #We need to split to get training data. If the split has already #been done as part of a different training run, then it won't be #done a second time. msg.info("Compiling database {} for {}.".format( seq.name, self.fqn)) seq.split() if seq.name in self.cust_splits: splt = self.cust_splits[seq.name] else: splt = self.split #We grab a list of all the files that match the particular split #pattern. Then we apply any filters to individual atoms objects #within each of the databases. if splt == '*': nfiles = [] for dbsplit in seq.splits: nfiles.extend([f(seq, dbsplit) for f in fmap.values()]) else: nfiles = [fmap[kind](seq, splt)] filtered = self._filter_dbs(seq.name, nfiles) cfiles.extend(filtered) #If this is the training file, we need to append any extras; these #are files that have additional trainer-specific configs to include. if kind == "train": cfiles.extend(self.extras()) #First, save the configurations to a single file. dbcat(cfiles, cfile) if asatoms: return AtomsList(cfile)
def _make_train_cfg(self, iteration): """Creates the 'train.cfg' file needed to train the potential from the databeses used. Args: iteration (int): the number of iterations of MTP has been through. """ from matdb.database.legacy import LegacyDatabase if iteration == 1: for db in self.dbs: if not isinstance(db, LegacyDatabase): for step in db.steps.values(): pbar = tqdm(total=len(step.rset)) for atm in step.rset: self._create_train_cfg( atm, path.join(self.root, "train.cfg")) pbar.update(1) else: # pragma: no cover (Don't use LegacyDatabase for M1) pbar = tqdm(total=len(db.rset)) for atm in db.rset: self._create_train_cfg( atm, path.join(self.root, "train.cfg")) pbar.update(1) else: if self.active.last_iteration is None or len( self.active.last_iteration) < 1: if path.isfile(self.active.iter_file): self.active._load_last_iter() else: raise IOError("File {0} containing most recently added " "structures is missing.".format( self.active.iter_file)) msg.info("Extracting from {0} folders".format( len(self.active.last_iteration))) self.active.extract() pbar = tqdm(total=len(self.active.last_iteration)) ccb = 1 if self.active.last_config_atoms is not None: for atm in self.active.last_config_atoms.values(): if not atm.calc.can_extract(atm.calc.folder): msg.std( "Folder {} can not be extracted.".format( atm.calc.folder), 2) continue self._create_train_cfg(atm, path.join(self.root, "train.cfg")) ccb += 1 pbar.update(1)
def status(self, printed=True): """Returns or prints the current status of the MTP training. Args: printed (bool): when True, print the status to screen; otherwise, return a dictionary with the relevant quantities. """ # Our interest is in knowing which MTP model is the latest (if any) and # whether the job script has been created for the next one in the # sequence. last_iter = self.active.last_iteration result = { "trained": self.ready(), "file": self.mtp_file, "jobfile": path.isfile(self._jobfile), "mtp step": self.iter_status, "last addition": len(last_iter) if last_iter is not None else 0 } if printed: fqn = "{0}.{1}".format(self.parent.name, self.name) msg.info("{0} => Model ready: {1}".format(fqn, result["trained"])) x = "exists" if result["jobfile"] else "does not exist" msg.info("{0} => Next jobfile '{1}' {2}".format( fqn, self._jobfile, x)) msg.info("{0} => Current MTP step {1} iteration {2}.".format( fqn, self.iter_status, self.iter_count)) msg.info("{0} => {1} configurations added " "to the training set.".format(fqn, result["last addition"])) msg.blank() else: return result
def execute(self, dryrun=False): """Submits the job script for the currently configured potential training. Args: dryrun (bool): when True, simulate the submission without actually submitting. Returns: bool: True if the submission generated a job id (considered successful). """ if self.ready(): msg.info( "Trainer {} is already done;".format(self.root) + "skipping execute step.", 2) return if not path.isfile(self._jobfile): return False if not path.isfile(self._trainfile): msg.std("train.h5 missing in {}; can't execute.".format(self.root)) return False # We must have what we need to execute. Compile the command and submit. shell_command = self.controller.db.shell_command # We suport 'bash' and 'sbatch' shell commands, if it's neighter one # of them, default to 'bash' if shell_command not in ['bash', 'sbatch']: shell_command = 'bash' cargs = [shell_command, self._jobfile] if dryrun: msg.okay("Executed {} in {}".format(' '.join(cargs), self.root)) return True else: xres = execute(cargs, self.root) # supercompute will return "Submitted" if len(xres["output"]) > 0 and "Submitted" in xres["output"][0]: msg.okay("{}: {}".format(self.root, xres["output"][0].strip())) return True # local computer elif len(xres["error"]) == 0: return True else: return False
def __init__(self, atoms=None, root=None, parent=None, phonons={}, nconfigs=100, calibrate=True, amplitude=None, sampling="uniform", name="modulations", dynmat="dynmatrix", calibrator="calib", config_type="ph"): self.name = name super(Modulation, self).__init__(atoms, incar, kpoints, execution, path.join(root, self.name), parent, "M", nconfigs, config_type) self.sampling = sampling self.calibrate = calibrate #Setup a calibrator if automatic calibration was selected. if calibrate and amplitude is None: self.calibrator = Calibration(atoms, root, parent, kpoints, incar, phonons, execution, calibrate, name=calibrator, dynmat=dynmat) self.parent.steps[calibrator] = self.calibrator self.amplitude = self.calibrator.infer_amplitude() calibrated = "*calibrated* " else: self.calibrator = None self.amplitude = amplitude calibrated = "" imsg = "Using {} as modulation {}amplitude for {}." msg.info(imsg.format(self.amplitude, calibrated, self.parent.name), 2) self.base = self.parent.steps[dynmat] self.phonons = phonons update_phonons(self.phonons, self.base)
def can_execute(self, folder): """Returns True if the specified folder is ready to execute QE in. """ if not path.isdir(folder): return False sizeok = lambda x: stat(x).st_size > 25 required = ["espresso.pwi"] present = {} for rfile in required: target = path.join(folder, rfile) present[rfile] = path.isfile(target) and sizeok(target) if not all(present.values()): for f, ok in present.items(): if not ok: msg.info( "{} not present for Quantum Espresso execution.". format(f), 2) return all(present.values())
def _generic_find(controller, heading, patterns): """Performs a generic find operation on the specified controller and formats the output in color. Args: controller: an instance of :class:`matdb.database.Controller` or :class:`matdb.fitting.Controller`. The specified controller's `find` method is used for the lookup. heading (str): title to print before the table of discovered values. patterns (list): list of `str` patterns to search for. """ msg.info(heading) msg.info("--------------------------") msg.blank() for pattern in patterns: for entry in controller.find(pattern): if hasattr(entry, "uuid"): eid = entry.uuid elif hasattr(entry, "fqn"): eid = entry.fqn else: eid = entry.name text = "{} | {} ".format(eid, entry.root) msg.arb(text, [msg.cenum["cwarn"], msg.cenum["cstds"]], '|')
def run(args): """Runs the matdb setup and cleanup to produce database files. """ print("matdb Copyright (C) 2019 HALL LABS") print("This program comes with ABSOLUTELY NO WARRANTY.") print( "This is free software, and you are welcome to redistribute it under " "certain conditions.") if args is None: return import numpy as np from matdb import msg #No matter what other options the user has chosen, we will have to create a #database controller for the specification they have given us. from matdb.database import Controller cdb = Controller(args["dbspec"]) if args["xyz"]: cdb.split(cdb.trainers.split, recalc=args["recalc"]) if args["t"]: cdb.trainers.jobfiles() if args["x"]: cdb.trainers.execute(args["tfilter"], args["sfilter"], args["dryrun"]) if args["v"]: vdict = cdb.trainers.validate(args["data"], args["tfilter"], args["sfilter"], args["energy"], args["force"], args["virial"]) if "e_ref" in vdict: e_err = np.std(vdict["e_ref"] - vdict["e_pot"]) msg.info("Energy RMS: {0:.4f}".format(e_err)) if "f_ref" in vdict: f_err = np.std(vdict["f_ref"].flatten() - vdict["f_pot"].flatten()) msg.info("Force RMS: {0:.4f}".format(f_err)) if "v_ref" in vdict: v_err = np.std(vdict["v_ref"].flatten() - vdict["v_pot"].flatten()) msg.info("Virial RMS: {0:.4f}".format(v_err)) if args["status"]: cdb.trainers.status()