def main(config=None): if config is not None: config = Config(config) else: config = Config() opts_defined = {} opts_found = set([]) for step in get_steps(config): step_config = config.for_step(step, parse_functions=False) opts_found.update( qadapter_options(step_config["HPC"]["queue_type"], verbose=False) ) opts_found.update( exec_options(step_config["Job"]["exec_type"], verbose=False) ) for name, section in step_config.items(): opts_defined.setdefault(name, set([])) for option in section.keys(): opts_defined[name].add(option) opts_undefined = opts_found - opts_defined["HPC"] - opts_defined["Job"] opts_optional = opts_undefined.intersection(set(["cmdline"])) opts_needed = opts_undefined.difference(opts_optional) print("All options found in queue adapter and executable template:") print("", *opts_found, sep=" ") if opts_needed: print("Necessary options undefined in configuration file(s):") print("", *opts_needed, sep=" ") if opts_optional: print("Optional options undefined in configuration file(s)") print("", *opts_optional, sep=" ") if not opts_undefined: print("No options left undefined in configuration file")
def test_parse_changes(self): ### # substitutions with single point of connections ### simple = Config( os.path.join(prefix, "test_files", "simple_subs.ini"), quiet=True, skip_user_default=True, ) test = json.loads(json.dumps(simple._changes)) ref_name = simple.infile.replace(".ini", "_changes.json") ref_name = ref_name.replace("test_files", "ref_files") # with open(ref_name, "w") as f: # json.dump(simple._changes, f, indent=2) with open(ref_name, "r") as f: ref = json.load(f) self.assertDictEqual(ref, test, msg=simple.infile) ### # for ring-fusing substitutions (multiple connection points) ### rings = Config( os.path.join(prefix, "test_files", "ring_subs.ini"), quiet=True, skip_user_default=True, ) test = json.loads(json.dumps(rings._changes)) ref_name = rings.infile.replace(".ini", "_changes.json") ref_name = ref_name.replace("test_files", "ref_files") # with open(ref_name, "w") as f: # json.dump(rings._changes, f, indent=2) with open(ref_name, "r") as f: ref = json.load(f) self.assertDictEqual(ref, test, msg=simple.infile)
def test_call_on_suffix(self): config = Config( os.path.join(prefix, "test_files", "structure_suffix.ini"), quiet=True, skip_user_default=True, ) structure_list = config.get_template() for structure, kind in structure_list: print(structure, kind)
def test_for_loop(self): for_loop = Config( os.path.join(prefix, "test_files", "for_loop.ini"), quiet=True, skip_user_default=True, ) structure_list = for_loop.get_template() for structure, kind in structure_list: dihedral = round( np.rad2deg(structure.dihedral("3", "1", "2", "6")), 6) if dihedral < 0: dihedral += 360 self.assertEqual(float(structure.name.split(".")[-1]), dihedral)
def test_spec(self): self.maxDiff = None config = Config(os.path.join(prefix, "test_files", "substitution.ini"), quiet=True) for change in config._changes: geom = Geometry.from_string(config["Geometry"]["structure"]) this_config = config.for_change(change, structure=geom) job = Job(geom, this_config, testing=True) ref = job.get_spec() with open(os.path.join(prefix, "ref_files", "job_spec.json")) as f: ref = json.load(f) test = json.loads(json.dumps(job.get_spec())) for key in spec_skip: del ref[key] del test[key] self.assertDictEqual(ref, test)
def test_xtb_cmdline(self): config = Config( os.path.join(prefix, "test_files", "xtb_config.ini"), skip_user_default=True, ) config = config.for_change("OH_Cl") structure = config.get_template()[0][0] structure = config.make_changes(structure) this_config = config.for_step(1) theory = this_config.get_theory(structure) ref = { "--opt": None, "--uhf": 1, "--etemp": "400", "--alpb": "acetone", "--restart": None, "-P": "2", } test = theory.get_xtb_cmdline(this_config) self.assertDictEqual(test, ref) this_config = config.for_step(2) theory = this_config.get_theory(structure) ref = { "--optts": None, "--uhf": 1, "--etemp": "400", "--gbsa": "acetone", } test = theory.get_xtb_cmdline(this_config) self.assertDictEqual(test, ref)
def main(args): config = Config(args.config) default_keys = tuple("/" + k for k in config["DEFAULT"]) print("Configuration:") for key, val in config.as_dict().items(): if key.endswith(default_keys): continue if key.startswith(("Substitution", "Mapping")): print(" ", key, ": ", "\n\t".join(val.split("\n")), sep="") template = " {:20s} {:20s} {:s}" print() print("Structures to run:") print(template.format("Name", "Type", "Change")) for key, val in config._changes.items(): print( template.format(key if key else "original", str(val[1]), str(val[0])))
def __init__(self, args, job_dict=None): self.config = Config(args.config, quiet=True) self.config.parse_functions() self._calc_attr = [] self.args = args self.thermo = self.args.thermo.lower() if self.thermo in ["rrho", "qrrho", "qharm"]: self.thermo = "free_energy" self.thermo_unit = "{} ({})".format(self.args.thermo, self.args.unit) self.data = pd.DataFrame() try: self.data = pd.read_pickle(self.args.cache) except (FileNotFoundError, EOFError, OSError): if job_dict is None: raise OSError( "There is something wrong with the results cache file. Please use the --reload option to fix this" ) pass if self.args.reload or self.data.empty: self.load_jobs(job_dict) pd.to_pickle(self.data, self.args.cache) if not self.args.temp and self.config.get( "Results", "temperature", fallback="" ): self.args.temp = self.config.getfloat("Results", "temperature") if self.args.temp: self.data["temperature"] = self.args.temp if self.thermo not in self.data: self.LOG.error( "`%s` not in data. Please adjust your `--thermo` selection, ensure necessary computations were run (e.g. frequency computation for free_energy), or ensure the correct step numbers are loaded (if using the `load` keyword in [Results] section). Then use `AaronJr results --reload` to update the data cache.", self.thermo, ) exit(1) self._parse_calc_attr(job_dict) self.apply_corrections() if self.args.command == "results": self.print_results(self.args) if self.args.command == "plot": Plot(self, self.args)
def main(args): config = Config(args.config, quiet=args.show) config.parse_functions() template = get_template(config) all_fws = set([]) template = get_template(config) if isinstance(template, Geometry): all_fws = one_job(args, template, config, submit=False) else: for template, kind in get_template(config): all_fws = all_fws.union( one_job( args, template, config, job_type=kind, submit=False, )) move_to_library(args, all_fws)
def test_substitution(self): self.maxDiff = None # substitute atom and fuse ring config = Config(os.path.join(prefix, "test_files", "substitution.ini"), quiet=True) geom = Geometry.from_string(config["Geometry"]["structure"]) job = Job(geom, config, testing=True) test = job.structure ref = Geometry( os.path.join(prefix, "ref_files", "substitution_with_rings.xyz")) self.assertTrue(validate(test, ref, sort=True))
def main(args): config = Config(args.config) config.parse_functions() all_fws = set([]) workflows = set([]) try: template = config.get_template() except FileNotFoundError as e: raise e if isinstance(template, Geometry): all_fws = get_fws(args, template, config) else: for template, kind in config.get_template(): all_fws = all_fws.union( get_fws( args, template, config, job_type=kind, ) ) for fw in all_fws: wf = LAUNCHPAD.get_wf_by_fw_id(fw.fw_id) workflows.add(wf) if args.command == "resources": resources(workflows) if args.command == "results": results(workflows)
def test_init(self): for i, (config_name, config) in enumerate(TestConfig.config_list): config = Config( os.path.join(prefix, "test_files", config_name), quiet=True, skip_user_default=True, ) TestConfig.config_list[i] = config_name, config test = config.as_dict(skip=self.USER_SPECIFIC) ref_name = os.path.join(prefix, "ref_files", config_name.replace(".ini", "_init.json")) # with open(ref_name, "w") as f: # json.dump(test, f, indent=2) # need this to make sure python->json stuff is consistent, # eg: json doesn't distinguish between tuples and lists test = json.loads(json.dumps(test)) self.LOG.debug(json.dumps(test, indent=2)) with open(ref_name, "r") as f: ref = json.load(f) self.maxDiff = None self.assertDictEqual(ref, test, msg=config_name)
class Results: # job_patt groups: full match, name, template, change job_patt = re.compile("((&?[\w\.-]+)(?:{([\w\.-]+)})?(?::([\w\.-]+))?)") LOG = None LEVEL = "info" def __init__(self, args, job_dict=None): self.config = Config(args.config, quiet=True) self.config.parse_functions() self._calc_attr = [] self.args = args self.thermo = self.args.thermo.lower() if self.thermo in ["rrho", "qrrho", "qharm"]: self.thermo = "free_energy" self.thermo_unit = "{} ({})".format(self.args.thermo, self.args.unit) self.data = pd.DataFrame() try: self.data = pd.read_pickle(self.args.cache) except (FileNotFoundError, EOFError, OSError): if job_dict is None: raise OSError( "There is something wrong with the results cache file. Please use the --reload option to fix this" ) pass if self.args.reload or self.data.empty: self.load_jobs(job_dict) pd.to_pickle(self.data, self.args.cache) if not self.args.temp and self.config.get( "Results", "temperature", fallback="" ): self.args.temp = self.config.getfloat("Results", "temperature") if self.args.temp: self.data["temperature"] = self.args.temp if self.thermo not in self.data: self.LOG.error( "`%s` not in data. Please adjust your `--thermo` selection, ensure necessary computations were run (e.g. frequency computation for free_energy), or ensure the correct step numbers are loaded (if using the `load` keyword in [Results] section). Then use `AaronJr results --reload` to update the data cache.", self.thermo, ) exit(1) self._parse_calc_attr(job_dict) self.apply_corrections() if self.args.command == "results": self.print_results(self.args) if self.args.command == "plot": Plot(self, self.args) def _parse_calc_attr(self, job_dict): calc = self.config.get("Results", "calc", fallback="") if not calc: return calc = [c.strip() for c in calc.split("\n") if c.strip()] for i, c in enumerate(calc): calc[i] = [ x.strip() for x in re.split("[:=]", c.strip(), maxsplit=1) ] calc = dict(calc) # groups: full_match, step, attr/method[, arguments] func_patt = re.compile("((\d+\.?\d*)\.(\w+(?:\((.*?)\))?))") for name, job_list in job_dict.items(): for job in job_list: step_list = self._get_job_step_list(job) fw_id = job # find index for self.data row for step in step_list: if "conformer" in job.config.for_step(step).get( "Job", "type" ): continue fw = job.set_fw(step=step) if fw is None: continue fw_id = int(fw.fw_id) break # parse function and eval for attr, func in calc.items(): self._calc_attr.append(attr) parsed_func = func step_out = {} val = np.nan for match in func_patt.findall(func): step = match[1] old = match[0] new = re.sub( "^{}\.".format(step), "step_out['{}'].".format(step), old, ) parsed_func = parsed_func.replace(old, new, 1) fw = job.set_fw(step=step) if fw is None: break output = job.get_output() if output is None: break step_out[step] = output else: val = eval(parsed_func, {"step_out": step_out}) self.data.loc[fw_id, attr] = val def _get_job_step_list(self, job): step_list = self.config.get( "Results", "load", fallback=job.config.get("Results", "load", fallback=""), ) if step_list: step_list = [float(s.strip()) for s in step_list if s.strip()] else: step_list = reversed(job.step_list) return step_list def load_jobs(self, job_dict): data = [] for name, job_list in job_dict.items(): for job in job_list: step_list = self._get_job_step_list(job) change, template = os.path.split(job.jobname) if job.conformer: template = template.replace( "_{}".format(job.conformer), "", 1 ) change, selectivity = os.path.split(change) if not change: change = selectivity selectivity = None if selectivity is None: for s in self.config.get( "Results", "selectivity", fallback="" ).split(","): s = s.strip() if s and s in template: selectivity = s break conformer = job.conformer data_row = { "name": name, "change": change, "template": template, "selectivity": selectivity, "conformer": conformer, } for step in step_list: if "conformer" in job.config.for_step(step).get( "Job", "type" ): continue fw = job.set_fw(step=step) if fw is None: continue output = job.get_output(load_geom=True) if output is None: continue if "fw_id" not in data_row: data_row["fw_id"] = int(fw.fw_id) data_row["step"] = step for attr in output.__dict__: if attr == "other" and output.other: for key, val in output.other.items(): key = "other.{}".format(key) if key not in data_row: data_row[key] = val if attr in ["opts", "conformers", "archive", "other"]: continue val = getattr(output, attr) if not val: continue if attr not in data_row: data_row[attr] = val if "fw_id" in data_row and data_row["fw_id"]: data_row = pd.Series(data_row) data.append(data_row) self.data = pd.DataFrame(data) for column in [ "charge", "conformer", "fw_id", "multiplicity", "opt_steps", ]: try: self.data[column] = self.data[column].astype( int, errors="ignore" ) except KeyError: pass try: self.data.set_index("fw_id", inplace=True) except KeyError: self.LOG.error( "No fireworks found for these jobs. Run `AaronJr update` and try again" ) exit(1) def apply_corrections(self): """ Applies the thermodynamic correction requested with args.thermo """ if self.thermo in self._calc_attr: return for index, row in self.data.iterrows(): output = row2output(row) try: corr = None if self.thermo in ["energy", "enthalpy"]: dE, dH, s = output.therm_corr(temperature=self.args.temp) self.data.loc[index, "energy"] = ( output.energy + output.ZPVE ) self.data.loc[index, "enthalpy"] = output.enthalpy + dH elif self.args.thermo.lower() in ["free_energy", "rrho"]: corr = output.calc_G_corr( v0=0, temperature=self.args.temp, method="RRHO" ) elif self.args.thermo.upper() in ["QRRHO"]: corr = output.calc_G_corr( v0=self.args.w0, temperature=self.args.temp, method="QRRHO", ) elif self.args.thermo.upper() in ["QHARM"]: corr = output.calc_G_corr( v0=self.args.w0, temperature=self.args.temp, method="QHARM", ) if corr: self.data.loc[index, "free_energy"] = output.energy + corr except (TypeError, AttributeError): pass def print_results(self, args): thermo_unit = self.thermo_unit cols = [ "name", "change", "selectivity", "template", "conformer", ] data = self.data.copy() data.sort_values(by=cols, inplace=True) geoms = data["geometry"] if args.script: data = data[cols] for index, geom in geoms.iteritems(): with subprocess.Popen( args.script, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, ) as proc: out, err = proc.communicate( geom.write(style="xyz", outfile=False).encode() ) if err: raise RuntimeError(err.decode()) data.loc[index, args.script] = out.decode().strip() if len(data["selectivity"].unique()): del data["selectivity"] if len(data["conformer"].unique()): del data["conformer"] with pd.option_context( "display.max_rows", None, "display.max_columns", None ): print(data) return data = self.get_relative(data) for d in data: if args.change: args_change = set(args.change) if "None" in args_change: args_change.discard("None") args_change.add("") if "none" in args.change: args_change.discard("none") args_change.add("") if set(d["change"].unique()) - args_change: continue tmp_cols = cols.copy() if ( "conformer" not in d.columns or len(d.groupby("conformer")) == 1 ): tmp_cols.remove("conformer") if ( "selectivity" not in d.columns or len(d.groupby("selectivity")) == 1 ): tmp_cols.remove("selectivity") d = d[tmp_cols + [self.thermo]] if args.unit == "kcal/mol": d[self.thermo] = d[self.thermo] * UNIT.HART_TO_KCAL d.rename(columns={self.thermo: thermo_unit}, inplace=True) else: d.rename(columns={self.thermo: thermo_unit}, inplace=True) with pd.option_context( "display.max_rows", None, "display.max_columns", None ): print(d) print() # skip boltzmann summary if [Results] boltzmann = False # or if --absolute flag used if self.args.absolute or not self.config.getboolean( "Results", "boltzmann", fallback=True ): return header = True for d in self.boltzmann_average(data, cols[:-1], self.thermo): d = d[cols[:-1] + [self.thermo]] # if args.unit == "kcal/mol": # d[self.thermo] = d[self.thermo] * UNIT.HART_TO_KCAL # d.rename(columns={self.thermo: thermo_unit}, inplace=True) # else: # d.rename(columns={self.thermo: thermo_unit}, inplace=True) if not header: print() print("Boltzmann averaged over conformers for template") header = False with pd.option_context( "display.max_rows", None, "display.max_columns", None ): print(d) print() header = True for d in self.boltzmann_average(data, cols[:-2], self.thermo): temperature = d["temperature"].tolist()[0] p = d[self.thermo].map( lambda x: np.exp(-x / (PHYSICAL.R * temperature)) ) p["selectivity"] = d["selectivity"] p = p.groupby("selectivity").sum() p = 100 * p / sum(p) d = d[cols[:-2] + [self.thermo]] # if args.unit == "kcal/mol": # d[self.thermo] = d[self.thermo] * UNIT.HART_TO_KCAL # d.rename(columns={self.thermo: thermo_unit}, inplace=True) # else: # d.rename(columns={self.thermo: thermo_unit}, inplace=True) if header: print() print("Boltzmann averaged over selectivity") header = False with pd.option_context( "display.max_rows", None, "display.max_columns", None ): print(d) print(", ".join(["{:0.1f}% {}".format(p[i], i) for i in p.index])) print() def get_relative(self, data, change=None): data = data.dropna(subset=[self.thermo]) if "Results" in self.config: data = self.parse_functions( data, self.config, self.thermo, absolute=self.args.absolute ) elif not self.args.absolute: relative = [] for atoms, group in data.groupby(["name", "change"]): group[self.thermo] -= group[self.thermo].min() relative.append(group) data = relative else: data = [data] if change is not None: for d in data: d = d[d["change"] == change] if d.empty: continue return d return data @staticmethod def boltzmann_average(data, cols, thermo): boltzmann_avg = [] for d in data: new_d = [] try: data_grouped = d.groupby(cols + ["temperature"]) except KeyError: continue for key, group in data_grouped: try: temperature = group["temperature"].tolist()[0] except KeyError: continue avg = utils.boltzmann_average( group[thermo].to_numpy(), group[thermo].to_numpy(), temperature, absolute=False, ) tmp = group.iloc[0] tmp[thermo] = avg new_d.append(tmp) if len(new_d): boltzmann_avg.append(pd.concat(new_d, axis=1).T) return boltzmann_avg @staticmethod def parse_functions(data, config, thermo, absolute=False): data = data.copy() relative = None drop = None for key, val in config["Results"].items(): if key == "drop": drop = val if not key.startswith("&") and key.lower() != "relative": continue subst = [] orig_val = val for match in Results.job_patt.findall(val): if match[0] == "-": continue tmp = data.copy() if match[1]: tmp = tmp[tmp["name"] == match[1]] if match[2]: tmp = tmp[tmp["template"] == match[2]] if match[3].lower() == "none": tmp = tmp[tmp["change"] == ""] elif match[3]: tmp = tmp[tmp["change"] == match[3]] tmp.set_index("change", inplace=True) if tmp.shape[0] == 1: subst.append(tmp.iloc[0]) else: subst.append(tmp) val = val.replace( match[0], "subst[{}][{}]".format(len(subst) - 1, "thermo"), 1, ) val = eval(val, {"subst": subst, "thermo": thermo}) if not isinstance(val, pd.Series): val = pd.Series( { thermo: val, "name": key.lstrip("&"), "change": "", "template": "", }, name=-1, ) else: val = pd.DataFrame(val) val["name"] = key.lstrip("&") if len(subst[0]["template"]) == len(val): val["template"] = subst[0]["template"] elif len(subst[1]["template"]) == len(val): val["template"] = subst[1]["template"] else: # this shouldn't happen, but if it does... raise NotImplementedError( "Size mismatch in {}".format(orig_val) ) val.reset_index(inplace=True) val.index = pd.Index([-1] * len(val.index)) val.dropna(inplace=True) if key == "relative": relative = val else: data = data.append(val) data.drop_duplicates( subset=["name", "template", "change"], keep="last", inplace=True, ) if drop: for _, name, template, change in Results.job_patt.findall(drop): selection = data["name"] != name if template: selection = selection | data["template"] != template if change and change.lower() != "none": selection = selection | data["change"] != change elif change.lower() == "none": selection = selection | data["change"] != "" data = data[selection] if not absolute and relative is not None: if relative.shape[0] == 1: relative = relative.iloc[0] tmp = [] for change, group in data.groupby("change"): rel = relative[relative["change"] == change] rel = rel[thermo].to_list() if len(rel) < 1: rel = relative[relative["change"] == ""][thermo].to_list() if len(rel) != 1: raise RuntimeError( "[Results] relative setting is ambiguious" ) rel = rel[0] group[thermo] = group[thermo] - rel tmp.append(group) if tmp: data = tmp else: data = [data] elif not absolute: relative = [] for atoms, group in data.groupby(["name", "change"]): group[thermo] -= group[thermo].min() relative.append(group) data = relative else: tmp = [] for _, group in data.groupby(["change"]): tmp += [group] data = tmp return data
#!/usr/bin/env python3 import argparse from warnings import warn from AaronTools.config import Config from AaronTools.job_control import SubmitProcess from AaronTools.utils.utils import glob_files config = Config(quiet=True) default_proc = config.getint("Job", "processors", fallback=config.getint("Job", "procs", fallback=4)) default_mem = config.getint("Job", "memory", fallback=8) default_walltime = config.getint("Job", "walltime", fallback=12) default_template = config.get("Job", "template", fallback=None) submit_parser = argparse.ArgumentParser( description="submit a QM computation to the queue", formatter_class=argparse.RawTextHelpFormatter, ) submit_parser.add_argument( "infile", metavar="input file", type=str, nargs="+", help="a Psi4, ORCA, or Gaussian input file", )
import re import ssl from AaronTools.config import Config from AaronTools.fileIO import FileReader from AaronTools.geometry import CACTUS_HOST from AaronTools.theory import Theory, OptimizationJob from chimerax.core.commands import run from chimerax.ui import HtmlToolInstance from SEQCROW.jobs import SQMJob from SEQCROW.managers.filereader_manager import apply_seqcrow_preset from SEQCROW.residue_collection import ResidueCollection DEFAULT_CONFIG = Config(quiet=True) #TODO: turn this into a setting on the menu instead of # AaronTools config if not DEFAULT_CONFIG["DEFAULT"].getboolean("local_only"): import urllib.parse from urllib.request import urlopen """ I was hoping for a python 2D builder, but all the ones I found were based on RDKit the RDKit that is available on pypi doesn't work with the Qt that comes with ChimeraX it relies on SVG, which our Qt doesn't have a module for I think it also wasn't available for windows (or maybe the editor that uses rdkit isn't available for windows) so I tried to make my own, but that wasn't going well lines were jagged, you can't draw double bonds yet, hydrogens don't move like they need to