def get_snap_site_features(d): feature = [] data_filename = "lammps_snap.data" input_template = "lammps_snap_template.in" input_filename = "lammps_snap.in" dump_filename = "dump.sna" log_filename = "log.lammps" sbp.check_call(["rm", "-f", input_filename]) sbp.check_call(["rm", "-f", data_filename]) sbp.check_call(["rm", "-f", dump_filename]) sbp.check_call(["rm", "-f", log_filename]) structure = Structure.from_dict(d["structure"]) feature.append(structure[d["absorbing_atom"]].specie.number) try: mol = Molecule.from_dict(d["cluster"]) except TypeError: atoms = Atoms(structure, d["absorbing_atom"], 10.0) mol = atoms.cluster logger.info(mol.formula) lmp_data = LammpsData.from_structure(mol, [[0,25], [0,25],[0,25]], translate=False) lmp_data.write_file(data_filename) el_sorted = sorted(mol.composition, key=lambda x:x.atomic_mass) cutoff = "" weight = "" for i, e in enumerate(el_sorted): cutoff += " {}".format(float(e.atomic_radius)) weight += " {}".format(1.0) settings = { 'data_file': data_filename, 'rcutfac': 1.4, 'rfac0': 0.993630, 'twojmax': 6.0, 'cutoff': cutoff, 'weight': weight, 'dump_file': dump_filename } lmp_in = LammpsInput.from_file(input_template, settings) lmp_in.write_file(input_filename) #try: logger.info("Running LAMMPS ... ") exit_code = sbp.check_call(["./lmp_serial", "-in", input_filename]) if exit_code != 0: logger.error("lammps run failed") raise RuntimeError("lammps run failed") logger.info("Processing LAMMPS outputs ... ") lmp_run = LammpsRun(data_filename, dump_filename, log_filename) t = list(lmp_run.trajectory[0]) try: assert np.linalg.norm(t[2:5]) <= 1e-6 except AssertionError: logger.info("xyz: {}".format(t[2:5])) logger.error("assertion failed: first one not at origin") raise logger.info("# bispectrum coeffs: {}".format(len(t[5:]))) feature.extend(t[5:]) return feature
def get_basic_update_specs(fw_spec, d): update_specs = { 'mol': d["molecule_final"], 'egsnl': d["snl_final"], 'snlgroup_id': d["snlgroup_id_final"], 'inchi_root': fw_spec["inchi_root"] } mixed_basis = None mixed_aux_basis = None if "mixed_basis" in fw_spec: mixed_basis = fw_spec["mixed_basis"] if "mixed_aux_basis" in fw_spec: mixed_aux_basis = fw_spec["mixed_aux_basis"] if "_mixed_basis_set_generator" in fw_spec: bs_generator_dict = fw_spec["_mixed_basis_set_generator"] if isinstance(d["molecule_final"], dict): mol = Molecule.from_dict(d["molecule_final"]) else: mol = d["molecule_final"] pop_method = None if "scf" in d["calculations"]: if "nbo" in d["calculations"]["scf"]["charges"]: pop_method = "nbo" elif "hirshfeld" in d["calculations"]["scf"]["charges"]: pop_method = "hirshfeld" if pop_method is None: raise ValueError( "An vacuum single point caculation is require to use mixed basis set generator" ) charges = d["calculations"]["scf"]["charges"][pop_method] if isinstance(bs_generator_dict, dict): bs_generator = AtomicChargeMixedBasisSetGenerator.from_dict( bs_generator_dict) else: bs_generator = bs_generator_dict mixed_basis = bs_generator.get_basis(mol, charges) if "_mixed_aux_basis_set_generator" in fw_spec: aux_bs_generator_dict = fw_spec["_mixed_aux_basis_set_generator"] pop_method = None if "scf" in d["calculations"]: if "nbo" in d["calculations"]["scf"]["charges"]: pop_method = "nbo" elif "hirshfeld" in d["calculations"]["scf"]["charges"]: pop_method = "hirshfeld" if pop_method is None: raise ValueError( "An vacuum single point caculation is require to use mixed auxiliary basis set generator" ) charges = d["calculations"]["scf"]["charges"][pop_method] aux_bs_generator = AtomicChargeMixedBasisSetGenerator( aux_bs_generator_dict) mixed_aux_basis = aux_bs_generator.get_basis(mol, charges) if mixed_basis or mixed_aux_basis: update_specs["mixed_basis"] = mixed_basis if mixed_aux_basis: update_specs["mixed_aux_basis"] = mixed_basis return update_specs
def from_dict(cls, d): return GaussianInput(mol=Molecule.from_dict(d["molecule"]), functional=d["functional"], basis_set=d["basis_set"], route_parameters=d["route_parameters"], title=d["title"], charge=d["charge"], spin_multiplicity=d["spin_multiplicity"], input_parameters=d["input_parameters"], link0_parameters=d["link0_parameters"])
def update_tags(fw_spec, d, task_id): if fw_spec: d['task_type'] = fw_spec['task_type'] if '_fizzled_parents' in fw_spec and not 'prev_task_type' in \ fw_spec: d['task_type'] = fw_spec['_fizzled_parents'][0]['task_type'] else: d['task_type'] = fw_spec['prev_task_type'] d['run_tags'] = fw_spec['run_tags'] d['implicit_solvent'] = fw_spec['implicit_solvent'] d['user_tags'] = fw_spec["user_tags"] if isinstance(fw_spec['egsnl'], dict): d['snl_initial'] = fw_spec['egsnl'] else: d['snl_initial'] = fw_spec['egsnl'].as_dict() d['snlgroup_id_initial'] = fw_spec['snlgroup_id'] d['inchi_root'] = fw_spec['inchi_root'] d['inchi_initial'] = fw_spec['inchi'] if "geometry optimization" in d[ 'task_type'] or "molecule dynamics" in d['task_type']: new_s = Molecule.from_dict(d["molecule_final"]) old_snl = EGStructureNL.from_dict(d['snl_initial']) history = old_snl.history history.append({ 'name': 'Electrolyte Genome Project structure ' 'optimization', 'url': 'http://www.materialsproject.org', 'description': { 'task_type': d['task_type'], 'task_id': task_id, 'when': datetime.datetime.utcnow() } }) new_snl = EGStructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = EGSNLMongoAdapter.auto_load() # add snl egsnl, snlgroup_id = sma.add_snl( new_snl, snlgroup_guess=d['snlgroup_id_initial']) d['snl_final'] = egsnl.as_dict() d['snlgroup_id_final'] = snlgroup_id else: if isinstance(fw_spec['egsnl'], dict): d['snl_final'] = fw_spec['egsnl'] else: d['snl_final'] = fw_spec['egsnl'].as_dict() d['snlgroup_id_final'] = fw_spec['snlgroup_id'] d['snlgroup_changed'] = (d['snlgroup_id_initial'] != d['snlgroup_id_final'])
def setUpClass(cls): cls.mols = [tfn, n1c, pc] with open("packmol_data.json") as f: mol_in_box = json.load(f) mol_in_box["mols"] = [ Molecule.from_dict(m) for m in mol_in_box["mols"] ] packmol = PackmolRunner([], []) packmol.mols = mol_in_box["mols"] packmol.param_list = mol_in_box["param_list"] cls.mols_in_box = mol_in_box super(TestLmpInput, cls).setUpClass()
def from_dict(d): a = d["about"] dec = MontyDecoder() created_at = dec.process_decoded(a["created_at"]) if "created_at" in a \ else None data = {k: v for k, v in d["about"].items() if k.startswith("_")} data = dec.process_decoded(data) structure = Structure.from_dict(d) if "lattice" in d \ else Molecule.from_dict(d) return MPStructureNL(structure, a["authors"], projects=a.get("projects", None), references=a.get("references", ""), remarks=a.get("remarks", None), data=data, history=a.get("history", None), created_at=created_at)
def from_dict(cls, d): a = d["about"] dec = MontyDecoder() created_at = dec.process_decoded(a["created_at"]) if "created_at" in a \ else None data = {k: v for k, v in d["about"].items() if k.startswith("_")} data = dec.process_decoded(data) structure = Structure.from_dict(d) if "lattice" in d \ else Molecule.from_dict(d) return MPStructureNL(structure, a["authors"], projects=a.get("projects", None), references=a.get("references", ""), remarks=a.get("remarks", None), data=data, history=a.get("history", None), created_at=created_at)
def perturb_molecule(cls, d, reversed_direction=False): old_mol = Molecule.from_dict(d['molecule_final']) vib_mode = d['calculations']['freq']['frequencies'][0]["vib_mode"] max_dis = max( [math.sqrt(sum([x**2 for x in mode])) for mode in vib_mode]) scale = cls.molecule_perturb_scale / max_dis normalized_mode = [[x * scale for x in mode] for mode in vib_mode] direction = 1.0 if reversed_direction: direction = -1.0 new_coords = [[c + v * direction for c, v in zip(site.coords, mode)] for site, mode in zip(old_mol.sites, normalized_mode)] species = [site.specie.symbol for site in old_mol.sites] charge = old_mol.charge spin_multiplicity = old_mol.spin_multiplicity new_mol = Molecule(species, new_coords, charge=charge, spin_multiplicity=spin_multiplicity) return new_mol
def img_freq_action(self, fw_spec, d, t_id, qcout_path): if "img_freq_eli" in d['user_tags']: img_freq_eli = copy.deepcopy(d['user_tags']["img_freq_eli"]) img_freq_eli['current_method_id'] += 1 else: img_freq_eli = { "methods": ["dir_dis_opt", "den_dis_opt", "alt_den_dis_opt"], "current_method_id": 0 } update_specs = get_basic_update_specs(fw_spec, d) if img_freq_eli['current_method_id'] >= len(img_freq_eli['methods']): logging.error("Failed to eliminate imaginary frequency") offending_fwid = get_defuse_causing_qchem_fwid(qcout_path) return FWAction(stored_data={'task_id': t_id}, defuse_children=True, update_spec=dict( { 'defuse_reason': "imaginary frequency " "elimination failed", 'offending_fwid': offending_fwid }, **update_specs)) new_mol = self.perturb_molecule(d) old_mol = Molecule.from_dict(d['molecule_final']) structure_changed = self._check_structure_change( old_mol, new_mol, fw_spec) if structure_changed: self.perturb_molecule(d, reversed_direction=True) structure_changed = self._check_structure_change( old_mol, new_mol, fw_spec) if structure_changed: offending_fwid = get_defuse_causing_qchem_fwid(qcout_path) return FWAction( stored_data={'task_id': t_id}, defuse_children=True, update_spec=dict( { 'perturbed_mol': new_mol.as_dict(), 'defuse_reason': "structural change in imaginary " "frequency elimination", 'offending_fwid': offending_fwid }, **update_specs)) molname = d['user_tags']['molname'] mission = d['user_tags']['mission'] additional_user_tags = {"img_freq_eli": img_freq_eli} if "initial_charge" in fw_spec["user_tags"]: additional_user_tags["initial_charge"] = fw_spec["user_tags"][ "initial_charge"] priority = fw_spec['_priority'] old_snl = EGStructureNL.from_dict(d['snl_initial']) history = old_snl.history history.append({ 'name': 'Electrolyte Genome Project eliminate imaginary ' 'frequency by perturb molecular geometry', 'url': 'http://www.materialsproject.org', 'description': { 'task_type': d['task_type'], 'task_id': d['task_id'], 'max_displacement': self.molecule_perturb_scale }, 'when': datetime.datetime.utcnow() }) new_snl = EGStructureNL(new_mol, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = EGSNLMongoAdapter.auto_load() egsnl, snlgroup_id = sma.add_snl( new_snl, snlgroup_guess=d['snlgroup_id_initial']) update_specs = { 'mol': new_mol.as_dict(), 'egsnl': egsnl.as_dict(), 'snlgroup_id': fw_spec['snlgroup_id'], 'inchi_root': fw_spec['inchi_root'] } eli_strategy = img_freq_eli["methods"][ img_freq_eli["current_method_id"]] charge = new_mol.charge spin_multiplicity = new_mol.spin_multiplicity qm_method = fw_spec["qm_method"] if eli_strategy == "dir_dis_opt": logging.info("Eliminate Imaginary Frequency By Perturbing the " "Structure of Molecule") wf = self.spawn_opt_freq_wf(new_mol, molname, mission, additional_user_tags, priority, update_specs, charge, spin_multiplicity, grid=None, qm_method=qm_method) elif eli_strategy == "den_dis_opt": logging.info("Eliminate Imaginary Frequency By Perturbing the " "Structure of Molecule, and increase the grid " "density") wf = self.spawn_opt_freq_wf(new_mol, molname, mission, additional_user_tags, priority, update_specs, charge, spin_multiplicity, grid=(128, 302), qm_method=qm_method) elif eli_strategy == "alt_den_dis_opt": logging.info("Eliminate Imaginary Frequency By Perturbing the " "Structure of Molecule, and increase the grid " "density") wf = self.spawn_opt_freq_wf(new_mol, molname, mission, additional_user_tags, priority, update_specs, charge, spin_multiplicity, grid=(90, 590), qm_method=qm_method) else: raise Exception("Unknown imaginary frequency fixing method") return FWAction(stored_data={'task_id': t_id}, detours=wf, update_spec=update_specs)
def get_task_doc(cls, path, fw_spec=None): """ Get the entire task doc for a path, including any post-processing. """ logger.info("Getting task doc for file:{}".format(path)) qcout = QcOutput(zpath(path)) data = qcout.data initial_mol = data[0]["molecules"][0] mol = data[0]["molecules"][-1] if data[0]["jobtype"] == "freq": mol = Molecule.from_dict(initial_mol.as_dict()) bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol xyz = XYZ(mol) smiles = pbmol.write(str("smi")).split()[0] can = pbmol.write(str("can")).split()[0] inchi_final = pbmol.write(str("inchi")).strip() svg = cls.modify_svg(cls.xyz2svg(xyz)) comp = mol.composition charge = mol.charge spin_mult = mol.spin_multiplicity data_dict = {} pga = PointGroupAnalyzer(mol) sch_symbol = pga.sch_symbol stationary_type = None has_structure_changing_job = False for d in data: if d["jobtype"] == "opt": data_dict["geom_opt"] = d has_structure_changing_job = True elif d["jobtype"] == "freq": data_dict["freq"] = d has_structure_changing_job = True if not d["has_error"]: if d['frequencies'][0]["frequency"] < -0.00: # it is stupied that -0.00 is less than 0.00 stationary_type = "non-minimum" else: stationary_type = "minimum" else: stationary_type = "unknown" elif d["jobtype"] == "sp": suffix = "" if d["solvent_method"] == "NA" \ else "_" + d["solvent_method"] data_dict["scf" + suffix] = d elif d["jobtype"] == "aimd": data_dict["amid"] = d has_structure_changing_job = True data = data_dict d = { "path": os.path.abspath(path), "folder": os.path.basename(os.path.dirname(os.path.abspath(path))), "calculations": data, "molecule_initial": initial_mol.as_dict(), "molecule_final": mol.as_dict(), "pointgroup": sch_symbol, "pretty_formula": comp.reduced_formula, "reduced_cell_formula_abc": comp.alphabetical_formula, "formula": comp.formula, "charge": charge, "spin_multiplicity": spin_mult, "composition": comp.as_dict(), "elements": list(comp.as_dict().keys()), "nelements": len(comp), "smiles": smiles, "can": can, "inchi_final": inchi_final, "svg": svg, "xyz": str(xyz), "names": get_nih_names(smiles) } if stationary_type: d['stationary_type'] = stationary_type if fw_spec: inchi_initial = fw_spec['inchi'] if inchi_initial != d['inchi_final']: d['inchi_changed'] = True else: d['inchi_changed'] = False if has_structure_changing_job: d['structure_changed'] = cls._check_structure_change( initial_mol, mol, path) else: d['structure_changed'] = False if d['structure_changed']: d['state'] = 'rejected' d['reject_reason'] = 'structural change' if "state" not in d: for v in data_dict.values(): if v['has_error']: d['state'] = "error" errors = d.get("errors", []) errors += v["errors"] d["errors"] = errors if "state" not in d: d["state"] = "successful" return jsanitize(d)
def main(): import argparse parser = argparse.ArgumentParser( description="Run A QChem Job for a QChem Input File") parser.add_argument( "-d", "--directory", dest="directory", type=str, required=True, help= "the directory contains all the QChem jobs to be pretended to run again" ) parser.add_argument("-p", "--priority", dest="priority", type=int, default=100, help="the FireWorks priority") parser.add_argument("-b", "--batch_size", dest="batch_size", type=int, default=100, help="the number of FireWorks in a Workflow") options = parser.parse_args() fw_priority = options.priority batch_size = options.batch_size lp = LaunchPad.auto_load() src_dir = os.path.abspath(options.directory) src_dir_sub_dirs = glob.glob(os.path.join(src_dir, "*")) num_dirs = len(src_dir_sub_dirs) current_fwid = 1 links_dict = dict() fws_all = [] num_fw_in_current_batch = 0 batch_num = 1 for i, sd in enumerate(src_dir_sub_dirs): if not os.path.isdir(sd): continue fw_json_filename = os.path.join(sd, "FW.json") if not (os.path.exists(fw_json_filename) or os.path.exists(fw_json_filename + ".gz")): continue with zopen(zpath(fw_json_filename), 'rt') as f: fw_dict = json.load(f) print("{percent:4.2%} completed, processing directory {d:s}, " "molecule name {molname:s}," \ " mission {mission:s}".format(percent=i / float(num_dirs), d=sd, molname= fw_dict['spec']['user_tags'][ 'molname'], mission= fw_dict['spec']['user_tags'][ 'mission'])) molname = fw_dict['spec']['user_tags']['molname'] egsnl_tasks = [AddEGSNLTask()] if 'mol' in fw_dict: mol = Molecule.from_dict(fw_dict['spec']['mol']) else: mol = Molecule.from_dict( fw_dict['spec']['qcinp']['jobs'][0]['molecule']) snl = StructureNL(mol, "Xiaohui Qu <*****@*****.**>", "Electrolyte Genome") egsnl_task_spec = { 'task_type': 'Add to SNL database', 'snl': snl.as_dict(), '_category': 'Parse Previous QChem Job', '_priority': fw_priority } snl_fw_id = current_fwid current_fwid += 1 fws_all.append( Firework( egsnl_tasks, egsnl_task_spec, name=get_slug(molname + ' -- Add to SNL database For fake QChem Task'), fw_id=snl_fw_id)) fake_qchem_tasks = [FakeRunQChemTask()] src_qchem_dir = sd fake_qchem_spec = { '_priority': fw_priority * 2, 'src_qchem_dir': src_qchem_dir, '_category': 'Parse Previous QChem Job', 'run_tags': fw_dict['spec']['run_tags'], 'implicit_solvent': fw_dict['spec']['implicit_solvent'], 'task_type': fw_dict['spec']['task_type'], 'charge': fw_dict['spec']['charge'], 'spin_multiplicity': fw_dict['spec']['spin_multiplicity'], 'num_atoms': fw_dict['spec']['num_atoms'], 'user_tags': fw_dict['spec']['user_tags'], 'mol': mol.as_dict(), 'inchi': fw_dict['spec']['inchi'], '_dupefinder': fw_dict['spec']['_dupefinder'], 'qcinp': fw_dict['spec']['qcinp'], 'qm_method': fw_dict['spec']['qm_method'], 'inchi_root': fw_dict['spec']['inchi_root'] } for k in ['mixed_basis', 'mixed_aux_basis']: if k in fw_dict['spec']: fake_qchem_spec[k] = fw_dict['spec'][k] fake_qchem_fw_id = current_fwid current_fwid += 1 fws_all.append( Firework(fake_qchem_tasks, fake_qchem_spec, name='Fake' + fw_dict['name'], fw_id=fake_qchem_fw_id)) links_dict[snl_fw_id] = fake_qchem_fw_id num_fw_in_current_batch += 1 if num_fw_in_current_batch >= 100: wf = Workflow(fws_all, links_dict, "Read Previous QChem Jobs Id-{}".format(batch_num)) lp.add_wf(wf) batch_num += 1 links_dict = dict() fws_all = [] num_fw_in_current_batch = 0 if num_fw_in_current_batch > 0: wf = Workflow(fws_all, links_dict, "Read Previous QChem Jobs") lp.add_wf(wf)
import tensorflow as tf # disable warnings and enhance performance tf.compat.v1.disable_eager_execution() import json with open('qm9_sample.json', 'r') as f: data = json.load(f) from pymatgen import Molecule qm9_ids = list(data.keys()) print('qm9 len is: ', len(qm9_ids)) molecules = [Molecule.from_dict(data[i]['molecule']) for i in qm9_ids] # this gives a list of pymatgen Molecule structures = molecules targets = [data[i]['property']['U0'] for i in qm9_ids] # We are training U0 herea train_structures = structures[:80] test_structures = structures[80:] train_targets = targets[:80] test_targets = targets[80:] from megnet.models import MEGNetModel from megnet.data.graph import GaussianDistance from megnet.data.crystal import CrystalGraph from megnet.utils.preprocessing import StandardScaler import numpy as np