def test_assimilate_opt_with_hidden_changes_from_handler(self): drone = QChemDrone(additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate( path=os.path.join(module_dir, "..", "test_files", "1746_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "freq") self.assertEqual(doc["output"]["final_energy"], -303.835532370106) self.assertEqual(doc["smiles"], "O1C(=CC1=O)[CH]") self.assertEqual(doc["state"], "successful") self.assertEqual(doc["num_frequencies_flattened"], 0) self.assertEqual(doc["walltime"], 631.54) self.assertEqual(doc["cputime"], 7471.17) self.assertEqual(doc["formula_pretty"], "HC2O") self.assertEqual(doc["formula_anonymous"], "ABC2") self.assertEqual(doc["chemsys"], "C-H-O") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) orig_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) initial_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), False)
def test_assimilate_unstable_opt(self): drone = QChemDrone( runs=[ "opt_0", "freq_0", "opt_1", "freq_1", "opt_2", "freq_2", "opt_3", "freq_3" ], additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate(path=os.path.join(module_dir, "..", "test_files", "2620_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "opt") self.assertEqual(doc["output"]["final_energy"], "unstable") self.assertEqual(doc["smiles"], "[S](=O)[N]S[C]") self.assertEqual(doc["state"], "unsuccessful") self.assertEqual(doc["walltime"], None) self.assertEqual(doc["cputime"], None) self.assertEqual(doc["formula_pretty"], "CS2NO") self.assertEqual(doc["formula_anonymous"], "ABCD2") self.assertEqual(doc["chemsys"], "C-N-O-S") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) self.assertEqual(doc["orig"]["molecule"], doc["calcs_reversed"][-1]["input"]["molecule"]) orig_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN()) initial_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN()) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), True)
def test_assimilate_unstable_opt(self): drone = QChemDrone( runs=[ "opt_0", "freq_0", "opt_1", "freq_1", "opt_2", "freq_2", "opt_3", "freq_3" ], additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate( path=os.path.join(module_dir, "..", "test_files", "2620_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "opt") self.assertEqual(doc["output"]["final_energy"], "unstable") self.assertEqual(doc["smiles"], "[S](=O)[N]S[C]") self.assertEqual(doc["state"], "unsuccessful") self.assertEqual(doc["num_frequencies_flattened"], 0) self.assertEqual(doc["walltime"], None) self.assertEqual(doc["cputime"], None) self.assertEqual(doc["formula_pretty"], "CS2NO") self.assertEqual(doc["formula_anonymous"], "ABCD2") self.assertEqual(doc["chemsys"], "C-N-O-S") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) self.assertEqual(doc["orig"]["molecule"], doc["calcs_reversed"][-1]["input"]["molecule"]) orig_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) initial_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), True)
def test_assimilate_opt_with_hidden_changes_from_handler(self): drone = QChemDrone( additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate(path=os.path.join(module_dir, "..", "test_files", "1746_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "freq") self.assertEqual(doc["output"]["final_energy"], -303.835532370106) self.assertEqual(doc["smiles"], "O1C(=CC1=O)[CH]") self.assertEqual(doc["state"], "successful") self.assertEqual(doc["num_frequencies_flattened"], 0) self.assertEqual(doc["walltime"], 631.54) self.assertEqual(doc["cputime"], 7471.17) self.assertEqual(doc["formula_pretty"], "HC2O") self.assertEqual(doc["formula_anonymous"], "ABC2") self.assertEqual(doc["chemsys"], "C-H-O") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) orig_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) initial_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), False)
def from_dict(cls, d): return NwInput(Molecule.from_dict(d["mol"]), tasks=[NwTask.from_dict(dt) for dt in d["tasks"]], directives=[tuple(li) for li in d["directives"]], geometry_options=d["geometry_options"], symmetry_options=d["symmetry_options"], memory_options=d["memory_options"])
def from_dict(cls, d): return FiestaInput(Molecule.from_dict(d["mol"]), correlation_grid=d["correlation_grid"], Exc_DFT_option=d["Exc_DFT_option"], COHSEX_options=d["geometry_options"], GW_options=d["symmetry_options"], BSE_TDDFT_options=d["memory_options"])
def from_dict(cls, d): """ Args: d (dict): Dict representation Returns: Class """ a = d["about"] dec = MontyDecoder() created_at = dec.process_decoded(a.get("created_at")) data = {k: v for k, v in d["about"].items() if k.startswith("_")} data = dec.process_decoded(data) structure = Structure.from_dict( d) if "lattice" in d else Molecule.from_dict(d) return cls( structure, a["authors"], projects=a.get("projects", None), references=a.get("references", ""), remarks=a.get("remarks", None), data=data, history=a.get("history", None), created_at=created_at, )
def _in_database(self, molecule): """ Check if a molecule is already present in the database, which has already been queried on relevant formulae and narrowed to self.all_relevant_docs. If no docs present, assume fragment is not present """ if len(self.all_relevant_docs) == 0: return False # otherwise, look through the docs for an entry with an isomorphic molecule with # equivalent charge and multiplicity else: new_mol_graph = MoleculeGraph.with_local_env_strategy( molecule, OpenBabelNN(), reorder=False, extend_structure=False) for doc in self.all_relevant_docs: if molecule.composition.reduced_formula == doc[ "formula_pretty"]: old_mol = Molecule.from_dict( doc["input"]["initial_molecule"]) old_mol_graph = MoleculeGraph.with_local_env_strategy( old_mol, OpenBabelNN(), reorder=False, extend_structure=False) # If such an equivalent molecule is found, return true if new_mol_graph.isomorphic_to( old_mol_graph ) and molecule.charge == old_mol_graph.molecule.charge and molecule.spin_multiplicity == old_mol_graph.molecule.spin_multiplicity: return True # Otherwise, return false return False
def from_molecule_document( cls, mol_doc: Dict, correction: float = 0.0, parameters: Optional[Dict] = None, attribute=None, ): """ Initialize a MoleculeEntry from a molecule document. Args: mol_doc: MongoDB molecule document (nested dictionary) that contains the molecule information. correction: A correction to be applied to the energy. This is used to modify the energy for certain analyses. Defaults to 0.0. parameters: An optional dict of parameters associated with the molecule. Defaults to None. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ try: if isinstance(mol_doc["molecule"], Molecule): molecule = mol_doc["molecule"] else: molecule = Molecule.from_dict(mol_doc["molecule"]) energy = mol_doc["energy_Ha"] enthalpy = mol_doc["enthalpy_kcal/mol"] entropy = mol_doc["entropy_cal/molK"] entry_id = mol_doc["task_id"] except KeyError as e: raise MoleculeEntryError( "Unable to construct molecule entry from molecule document; missing " f"attribute {e} in `mol_doc`.") if "mol_graph" in mol_doc: if isinstance(mol_doc["mol_graph"], MoleculeGraph): mol_graph = mol_doc["mol_graph"] else: mol_graph = MoleculeGraph.from_dict(mol_doc["mol_graph"]) else: mol_graph = MoleculeGraph.with_local_env_strategy( molecule, OpenBabelNN()) mol_graph = metal_edge_extender(mol_graph) return cls( molecule=molecule, energy=energy, correction=correction, enthalpy=enthalpy, entropy=entropy, parameters=parameters, entry_id=entry_id, attribute=attribute, mol_graph=mol_graph, )
def filter_fragment_entries(self,fragment_entries): self.filtered_entries = [] for entry in fragment_entries: # Check and make sure that PCM dielectric is consistent with principle: if "pcm_dielectric" in self.molecule_entry: if "pcm_dielectric" not in entry: raise RuntimeError("Principle molecule has a PCM dielectric of " + str(self.molecule_entry["pcm_dielectric"]) + " but a fragment entry has no PCM dielectric! Please only pass fragment entries with PCM details consistent with the principle entry. Exiting...") elif entry["pcm_dielectric"] != self.molecule_entry["pcm_dielectric"]: raise RuntimeError("Principle molecule has a PCM dielectric of " + str(self.molecule_entry["pcm_dielectric"]) + " but a fragment entry has a different PCM dielectric! Please only pass fragment entries with PCM details consistent with the principle entry. Exiting...") # Build initial and final molgraphs: entry["initial_molgraph"] = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(entry["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) entry["final_molgraph"] = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(entry["final_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) # Classify any potential structural change that occured during optimization: if entry["initial_molgraph"].isomorphic_to(entry["final_molgraph"]): entry["structure_change"] = "no_change" else: initial_graph = entry["initial_molgraph"].graph final_graph = entry["final_molgraph"].graph if nx.is_connected(initial_graph.to_undirected()) and not nx.is_connected(final_graph.to_undirected()): entry["structure_change"] = "unconnected_fragments" elif final_graph.number_of_edges() < initial_graph.number_of_edges(): entry["structure_change"] = "fewer_bonds" elif final_graph.number_of_edges() > initial_graph.number_of_edges(): entry["structure_change"] = "more_bonds" else: entry["structure_change"] = "bond_change" found_similar_entry = False # Check for uniqueness for ii,filtered_entry in enumerate(self.filtered_entries): if filtered_entry["formula_pretty"] == entry["formula_pretty"]: if filtered_entry["initial_molgraph"].isomorphic_to(entry["initial_molgraph"]) and filtered_entry["final_molgraph"].isomorphic_to(entry["final_molgraph"]) and filtered_entry["initial_molecule"]["charge"] == entry["initial_molecule"]["charge"]: found_similar_entry = True # If two entries are found that pass the above similarity check, take the one with the lower energy: if entry["final_energy"] < filtered_entry["final_energy"]: self.filtered_entries[ii] = entry # Note that this will essentially choose between singlet and triplet entries assuming both have the same structural details break if not found_similar_entry: self.filtered_entries += [entry]
def test_to_from_dict(self): propertied_mol = Molecule(["C", "H", "H", "H", "H"], self.coords, site_properties={'magmom': [0.5, -0.5, 1, 2, 3]}) d = propertied_mol.to_dict self.assertEqual(d['sites'][0]['properties']['magmom'], 0.5) mol = Molecule.from_dict(d) self.assertEqual(propertied_mol, mol) self.assertEqual(mol[0].magmom, 0.5) self.assertEqual(mol.formula, "H4 C1")
def test_to_from_dict(self): propertied_mol = Molecule( ["C", "H", "H", "H", "H"], self.coords, site_properties={'magmom': [0.5, -0.5, 1, 2, 3]}) d = propertied_mol.to_dict self.assertEqual(d['sites'][0]['properties']['magmom'], 0.5) mol = Molecule.from_dict(d) self.assertEqual(propertied_mol, mol) self.assertEqual(mol[0].magmom, 0.5) self.assertEqual(mol.formula, "H4 C1")
def from_dict(cls, d): """ :param d: Dict representation :return: FiestaInput """ return FiestaInput( Molecule.from_dict(d["mol"]), correlation_grid=d["correlation_grid"], Exc_DFT_option=d["Exc_DFT_option"], COHSEX_options=d["geometry_options"], GW_options=d["symmetry_options"], BSE_TDDFT_options=d["memory_options"], )
def _get_qcinp_from_fw_spec(fw_spec): if isinstance(fw_spec["qcinp"], dict): qcinp = QcInput.from_dict(fw_spec["qcinp"]) else: qcinp = fw_spec["qcinp"] if 'mol' in fw_spec: if isinstance(fw_spec["mol"], dict): mol = Molecule.from_dict(fw_spec["mol"]) else: mol = fw_spec["mol"] for qj in qcinp.jobs: if isinstance(qj.mol, Molecule): qj.mol = copy.deepcopy(mol) return qcinp
def from_dict(cls, d): mol = Molecule.from_dict(d["molecule"]) charge = d["keywords"]["CHARGE"] all_keys = set(d["keywords"].keys()) sqm_method = (all_keys & cls.available_sqm_methods).pop() jobtext = (all_keys & cls.available_sqm_tasktext).pop() jobtype = cls.jobtext2type[jobtext] title = ' '.join(d["title"]) if len(d["title"][1]) > 0 else d["title"][ 0] used_key = ["CHARGE", sqm_method, jobtext] optional_key = list(all_keys - set(used_key)) optional_params = {k: d["keywords"][k] for k in optional_key} mop = MopTask(mol, charge, jobtype, title, sqm_method, optional_params) return mop
def test_to_from_dict(self): d = self.mol.to_dict mol2 = IMolecule.from_dict(d) self.assertEqual(type(mol2), IMolecule) propertied_mol = Molecule( ["C", "H", "H", "H", "H"], self.coords, charge=1, site_properties={"magmom": [0.5, -0.5, 1, 2, 3]} ) d = propertied_mol.to_dict self.assertEqual(d["sites"][0]["properties"]["magmom"], 0.5) mol = Molecule.from_dict(d) self.assertEqual(propertied_mol, mol) self.assertEqual(mol[0].magmom, 0.5) self.assertEqual(mol.formula, "H4 C1") self.assertEqual(mol.charge, 1)
def from_dict(cls, d): """ Args: d (dict): Dict representation Returns: NwInput """ return NwInput(Molecule.from_dict(d["mol"]), tasks=[NwTask.from_dict(dt) for dt in d["tasks"]], directives=[tuple(li) for li in d["directives"]], geometry_options=d["geometry_options"], symmetry_options=d["symmetry_options"], memory_options=d["memory_options"])
def from_dict(cls, d): a = d["about"] dec = MontyDecoder() created_at = dec.process_decoded(a.get("created_at")) data = {k: v for k, v in d["about"].items() if k.startswith("_")} data = dec.process_decoded(data) structure = Structure.from_dict(d) if "lattice" in d \ else Molecule.from_dict(d) return cls(structure, a["authors"], projects=a.get("projects", None), references=a.get("references", ""), remarks=a.get("remarks", None), data=data, history=a.get("history", None), created_at=created_at)
def from_dict(cls, d): a = d["about"] dec = PMGJSONDecoder() created_at = dec.process_decoded(a.get("created_at")) data = {k: v for k, v in d["about"].items() if k.startswith("_")} data = dec.process_decoded(data) structure = Structure.from_dict(d) if "lattice" in d \ else Molecule.from_dict(d) return cls(structure, a["authors"], projects=a.get("projects", None), references=a.get("references", ""), remarks=a.get("remarks", None), data=data, history=a.get("history", None), created_at=created_at)
def test_to_from_dict(self): d = self.mol.as_dict() mol2 = IMolecule.from_dict(d) self.assertEqual(type(mol2), IMolecule) propertied_mol = Molecule( ["C", "H", "H", "H", "H"], self.coords, charge=1, site_properties={'magmom': [0.5, -0.5, 1, 2, 3]}) d = propertied_mol.as_dict() self.assertEqual(d['sites'][0]['properties']['magmom'], 0.5) mol = Molecule.from_dict(d) self.assertEqual(propertied_mol, mol) self.assertEqual(mol[0].magmom, 0.5) self.assertEqual(mol.formula, "H4 C1") self.assertEqual(mol.charge, 1)
def get_single_point_workflow(self, path, mol_id, name_pre="solubility_calc", qchem_cmd="qchem -slurm", max_cores=32, qchem_input_params=None): """ :param path: Specified (sub)path in which to run the reaction. By default, this is None, and the Fireworks will run in self.base_dir :param mol_id: str representing the unique molecule identifier :param name_pre: tr indicating the prefix which should be used for all Firework names :param qchem_cmd: str indicating how the Q-Chem code should be called. Default is "qchem -slurm", for a SLURM-based system. :param max_cores: int specifying how many cores the workflow should be split over. Default is 32. :param qchem_input_params: dict listing all parameters differing from default values. :return: Workflow """ fws = [] base_path = join(self.base_dir, path, mol_id) if self.db is None: raise RuntimeError("Cannot search for molecule geometry without" " valid database connection. Try again later.") else: entry = self.molecules.find_one({"mol_id": mol_id}) geometry = entry["output"].get( 'optimized_molecule', entry["output"].get('initial_molecule')) mol = Molecule.from_dict(geometry) fw = SinglePointFW(molecule=mol, name=name_pre, qchem_cmd=qchem_cmd, multimode="openmp", max_cores=max_cores, qchem_input_params=qchem_input_params, directory=base_path) fws.append(fw) return Workflow(fws)
def test_compare_with_dict(self): self.maxDiff = None reference = loadfn(os.path.join(gsm_dir, "optimized_string.json")) compare = GSMOptimizedStringParser( os.path.join(gsm_dir, "opt_converged_000_000.xyz")).as_dict() self.assertEqual(compare["text"], reference["text"]) self.assertSequenceEqual(compare["lines"], reference["lines"]) for key in compare["data"]: if key == "molecules": for i, e in enumerate(compare["data"]["molecules"]): self.assertEqual(Molecule.from_dict(e), reference["data"]["molecules"][i]) else: try: self.assertEqual(compare["data"][key], reference["data"][key]) except ValueError: self.assertSequenceEqual(compare["data"][key], reference["data"][key])
def get_redo_workflow(self, qchem_input_params, sp_params, max_iterations=3): """ Identifies molecules which need to be re-run (for now, based only on presence of negative frequencies) and then performs a frequency flattening workflow on those molecules. This is a hack. In the future, a frequency flattening workflow should be used from the beginning. :param qchem_input_params: dict :param sp_params: For OptFreqSPFW, single-point calculations can be treated differently from Opt and Freq. In this case, another dict for sp must be used. :param max_iterations: Maximum number of iterations for frequency flattening. Default is 3. :return: Workflow """ if self.db is None: raise RuntimeError("Cannot access database to determine what" "molecules need to be re-calculated.") fws = [] collection = self.db.db["molecules"] for mol in collection.find({}): frequencies = mol["output"]["frequencies"] if any([True if x < 0 else False for x in frequencies]): min_molecule_perturb_scale = 0.1 max_molecule_perturb_scale = 0.3 scale_grid = 10 perturb_scale_grid = (max_molecule_perturb_scale - min_molecule_perturb_scale) / scale_grid msc = MoleculeStructureComparator() old_molecule = None for calc in mol["calcs_reversed"]: if calc["task"]["type"] in ["freq", "frequency" ] and old_molecule is None: negative_freq_vecs = calc.get( "frequency_mode_vectors")[0] old_coords = calc.get("initial_geometry") old_molecule = Molecule.from_dict( calc.get("initial_molecule")) structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=old_coords, negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=False) new_molecule = Molecule( species=old_molecule.species, coords=new_coords, charge=old_molecule.charge, spin_multiplicity=old_molecule.spin_multiplicity) if msc.are_equal(old_molecule, new_molecule): structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "Unable to perturb coordinates to remove negative frequency without changing the bonding structure" ) mol_id = mol["mol_id"] dir_name = mol["dir_name"].split("/")[-1] if dir_name not in listdir(self.base_dir): os.mkdir(join(self.base_dir, dir_name)) fws.append( OptFreqSPFW(molecule=new_molecule, name="Flattening: {}/{}".format( mol_id, dir_name), qchem_cmd="qchem -slurm", input_file=join(self.base_dir, dir_name, mol_id + ".in"), output_file=join(self.base_dir, dir_name, mol_id + ".out"), qclog_file=join(self.base_dir, dir_name, mol_id + ".qclog"), max_cores=32, max_iterations=max_iterations, qchem_input_params=qchem_input_params, sp_params=sp_params, db_file=self.db_file)) if len(fws) == 0: return None else: return Workflow(fws)
def load_dict(cls, dicts): """ load the molecule from a dictionary """ mol = Molecule.from_dict(dicts) return cls(mol)
def optimize_failed_ts( db: CatDB, lp: LaunchPad, query: Optional[Dict] = None, num_run: Optional[int] = None, allowed_calc_types: Optional[frozenset] = frozenset(["relax_ts", "qst"]), allow_failed_calcs: Optional[bool] = False, with_critic: Optional[bool] = False, qchem_cmd: Optional[str] = ">>qchem_cmd<<", max_cores: Optional[Union[str, int]] = ">>max_cores<<", multimode: Optional[str] = ">>multimode<<", qchem_input_params: Optional[Dict] = None, db_file: Optional[str] = ">>db_file<<", tags: Optional[Dict] = None ): if query is None: failed_query = {"completed": False, "run_atomate": {"$ne": True}} else: failed_query = query failed_query["completed"] = False failed_query["run_atomate"] = {"$ne": True} possible_entries = [e for e in db.database[db.data_collection].find( failed_query, {"_id": 0, "rxnid": 1, "calcs": 1} )] if num_run is not None: if num_run < len(possible_entries): possible_entries = possible_entries[:num_run] for entry in possible_entries: rxnid = entry["rxnid"] for calc in entry["calcs"]: if not calc["success"] and not allow_failed_calcs: continue name = calc["job_name"] if not any([e in name for e in allowed_calc_types]): continue if "qst" in name: if calc["output"].get("frequencies", [None])[0] is None: continue elif calc["output"]["frequencies"][0] > 0: continue if calc["output"].get("molecule") is None: continue else: molecule = Molecule.from_dict(calc["output"]["molecule"]) if calc["success"]: calc_status = "success" else: calc_status = "failed" wf_name = "failed_rxn_{}:{}_{}".format( rxnid, calc_status, name) if with_critic: wf = get_wf_FFTSopt_and_critic(molecule, wf_name, qchem_input_params=qchem_input_params, db_file=db_file) if tags is not None: wf = add_tags(wf, tags) lp.add_wf(wf) else: fw = FrequencyFlatteningTransitionStateFW( molecule=molecule, name=wf_name, qchem_cmd=qchem_cmd, multimode=multimode, max_cores=max_cores, qchem_input_params=qchem_input_params, linked=True, freq_before_opt=True, db_file=db_file ) wf = Workflow([fw], name=wf_name) if tags is not None: wf = add_tags(wf, tags) lp.add_wf(wf) time_now = datetime.datetime.now(datetime.timezone.utc) db.database[db.data_collection].update_one({"rxnid": rxnid}, {"$set": {"run_atomate": True, "updated_on": time_now}})
def from_dataset_entry( cls, doc: Dict, use_thermo: str = "raw", parameters: Optional[Dict] = None, attribute=None, ): """ Initialize a MoleculeEntry from a document in the LIBE (Lithium-Ion Battery Electrolyte) or MADEIRA (MAgnesium Dataset of Electrolyte and Interphase ReAgents) datasets. Args: doc: Dictionary representing an entry from LIBE or MADEIRA use_thermo: One of "raw" (meaning raw, uncorrected thermo data will be used), "rrho_shifted" (meaning that a slightly modified Rigid-Rotor Harmonic Oscillator approximation will be used - see Ribiero et al., J. Phys. Chem. B 2011, 115, 14556-14562), or "qrrho" (meaning that Grimme's Quasi-Rigid Rotor Harmonic Oscillator - see Grimme, Chem. Eur. J. 2012, 18, 9955-9964) will be used. parameters: An optional dict of parameters associated with the molecule. Defaults to None. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ thermo = use_thermo.lower() if thermo not in ["raw", "rrho_shifted", "qrrho"]: raise ValueError( "Only allowed values for use_thermo are 'raw', 'rrho_shifted', " "and 'qrrho'!") try: if isinstance(doc["molecule"], Molecule): molecule = doc["molecule"] else: molecule = Molecule.from_dict(doc["molecule"]) # type: ignore if (thermo == "rrho_shifted" and doc["thermo"]["shifted_rrho_eV"] is not None): energy = ( doc["thermo"]["shifted_rrho_eV"]["electronic_energy"] * 0.0367493) enthalpy = doc["thermo"]["shifted_rrho_eV"][ "total_enthalpy"] * 23.061 entropy = doc["thermo"]["shifted_rrho_eV"][ "total_entropy"] * 23061 elif thermo == "qrrho" and doc["thermo"][ "quasi_rrho_eV"] is not None: energy = doc["thermo"]["quasi_rrho_eV"][ "electronic_energy"] * 0.0367493 enthalpy = doc["thermo"]["quasi_rrho_eV"][ "total_enthalpy"] * 23.061 entropy = doc["thermo"]["quasi_rrho_eV"][ "total_entropy"] * 23061 else: energy = doc["thermo"]["raw"]["electronic_energy_Ha"] enthalpy = doc["thermo"]["raw"]["total_enthalpy_kcal/mol"] entropy = doc["thermo"]["raw"]["total_entropy_cal/molK"] entry_id = doc["molecule_id"] if isinstance(doc["molecule_graph"], MoleculeGraph): mol_graph = doc["molecule_graph"] else: mol_graph = MoleculeGraph.from_dict(doc["molecule_graph"]) except KeyError as e: raise MoleculeEntryError( "Unable to construct molecule entry from molecule document; missing " f"attribute {e} in `doc`.") return cls( molecule=molecule, energy=energy, enthalpy=enthalpy, entropy=entropy, parameters=parameters, entry_id=entry_id, attribute=attribute, mol_graph=mol_graph, )
def from_atomate_tasks(cls, reactants, products, transition_state): """ Constructor using task documents (MSON-dicts) from atomate workflows. Note: This constructor is NOT FLEXIBLE, and requires that task docs at least have access to an "output" field (as well as subfields "optimized_geometry", "final_energy", "enthalpy", and "entropy"). Args: reactants (list): list of dicts representing task docs for reactant molecules products (list): list of dicts representing task docs for product molecules transition_state (dict): dict representing task doc for transition state molecule Returns: ReactionRateCalculator """ rcts = list() pros = list() # Construct main dicts with molecular properties for rct_doc in reactants: try: rcts.append(MoleculeEntry(Molecule.from_dict(rct_doc["output"]["optimized_molecule"]), rct_doc["output"]["final_energy"], enthalpy=rct_doc["output"]["enthalpy"], entropy=rct_doc["output"]["entropy"], entry_id=rct_doc["task_id"])) except KeyError: raise ValueError("Reactant task doc does not follow schema! Docs must contain" " an output field with subfields optimized_geometry, final_energy," " enthalpy, and entropy.") for pro_doc in products: try: pros.append(MoleculeEntry(Molecule.from_dict(pro_doc["output"]["optimized_molecule"]), pro_doc["output"]["final_energy"], enthalpy=pro_doc["output"]["enthalpy"], entropy=pro_doc["output"]["entropy"], entry_id=pro_doc["task_id"])) except KeyError: raise ValueError("Product task doc does not follow schema! Docs must contain" " an output field with subfields optimized_geometry, final_energy," " enthalpy, and entropy.") try: transition = MoleculeEntry(Molecule.from_dict(transition_state["output"]["optimized_molecule"]), transition_state["output"]["final_energy"], enthalpy=transition_state["output"]["enthalpy"], entropy=transition_state["output"]["entropy"], entry_id=transition_state["task_id"]) except KeyError: raise ValueError("Transition state task doc does not follow schema! Docs must contain" " an output field with subfields optimized_geometry, final_energy," " enthalpy, and entropy.") # Calculate stoichiometry rct_mols = [r.mol_graph.molecule for r in rcts] pro_mols = [p.mol_graph.molecule for p in pros] reaction = cls.generate_reaction(rct_mols, pro_mols) return cls(rcts, pros, transition, reaction=reaction)
def get_molecule_data(self, mol_id): """ Compile all useful molecular data for analysis, including molecule size (number of atoms), molecular weight, enthalpy, entropy, and functional groups. NOTE: This function automatically converts energy, enthalpy, and entropy into SI units (J/mol and J/mol*K) :param mol_id: Unique ID associated with the molecule. :return: dict of relevant molecule data. """ mol_data = {"mol_id": mol_id} if self.db is None: raise RuntimeError("Cannot query database; connection is invalid." " Try to connect again.") collection = self.db.db["molecules"] mol_entry = collection.find_one({"mol_id": mol_id}) for calc in mol_entry["calcs_reversed"]: if calc["task"]["name"] in ["freq", "frequency"]: mol_data["enthalpy"] = calc["enthalpy"] * 4.184 * 1000 mol_data["entropy"] = calc["entropy"] * 4.184 if calc["task"]["name"] == "sp": mol_data["energy"] = calc[ "final_energy_sp"] * 627.509 * 4.184 * 1000 if calc["task"]["name"] in ["opt", "optimization"]: mol_dict = calc["molecule_from_optimized_geometry"] mol_data["molecule"] = Molecule.from_dict(mol_dict) adaptor = BabelMolAdaptor(mol_data["molecule"]) pbmol = adaptor.pybel_mol mol_data["number_atoms"] = len(mol_data["molecule"]) mol_data["molecular_weight"] = pbmol.molwt mol_data["tpsa"] = pbmol.calcdesc()["TPSA"] extractor = FunctionalGroupExtractor(mol_data["molecule"]) molgraph = extractor.molgraph func_grps = extractor.get_all_functional_groups() mol_data["functional_groups"] = extractor.categorize_functional_groups( func_grps) weights = nx.get_edge_attributes(molgraph.graph, "weight") bonds_checked = set() double_bonds = 0 triple_bonds = 0 for bond, weight in weights.items(): # Remove index from multidigraph bond = (bond[0], bond[1]) if int(weight) == 2 and bond not in bonds_checked: double_bonds += 1 elif int(weight) == 3 and bond not in bonds_checked: triple_bonds += 1 bonds_checked.add(bond) mol_data["double_bonds"] = double_bonds mol_data["triple_bonds"] = triple_bonds species = [str(s.specie) for s in mol_data["molecule"].sites] mol_data["species"] = dict(Counter(species)) return mol_data
def test_to_from_dict(self): d = self.mol.as_dict() mol2 = Molecule.from_dict(d) self.assertEqual(type(mol2), Molecule)
def __init__(self, molecule_entry, fragment_entries, allow_additional_charge_separation=False, multibreak=False): """ Standard constructor for bond dissociation energies. All bonds in the principle molecule are looped through and their dissociation energies are calculated given the energies of the resulting fragments, or, in the case of a ring bond, from the energy of the molecule obtained from breaking the bond and opening the ring. This class should only be called after the energies of the optimized principle molecule and all relevant optimized fragments have been determined, either from quantum chemistry or elsewhere. It was written to provide the analysis after running an Atomate fragmentation workflow. Note that the entries passed by the user must have the following keys: formula_pretty, initial_molecule, final_molecule. If a PCM is present, all entries should also have a pcm_dielectric key. Args: molecule_entry (dict): Entry for the principle molecule. Should have the keys mentioned above. fragment_entries (list of dicts): List of fragment entries. Each should have the keys mentioned above. allow_additional_charge_separation (bool): If True, consider larger than normal charge separation among fragments. Defaults to False. See the definition of self.expected_charges below for more specific information. multibreak (bool): If True, additionally attempt to break pairs of bonds. Defaults to False. """ self.molecule_entry = molecule_entry self.filter_fragment_entries(fragment_entries) print(str(len(self.filtered_entries)) + " filtered entries") self.bond_dissociation_energies = [] self.done_frag_pairs = [] self.done_RO_frags = [] self.ring_bonds = [] required_keys = ["formula_pretty", "initial_molecule", "final_molecule"] if "pcm_dielectric" in self.molecule_entry: required_keys.append("pcm_dielectric") for key in required_keys: if key not in self.molecule_entry: raise RuntimeError(key + " must be present in molecule entry! Exiting...") for entry in self.filtered_entries: if key not in entry: raise RuntimeError(key + " must be present in all fragment entries! Exiting...") # Define expected charges if not allow_additional_charge_separation: if molecule_entry["final_molecule"]["charge"] == 0: self.expected_charges = [-1, 0, 1] elif molecule_entry["final_molecule"]["charge"] < 0: self.expected_charges = [molecule_entry["final_molecule"]["charge"], molecule_entry["final_molecule"]["charge"]+1] else: self.expected_charges = [molecule_entry["final_molecule"]["charge"]-1, molecule_entry["final_molecule"]["charge"]] else: if molecule_entry["final_molecule"]["charge"] == 0: self.expected_charges = [-2, -1, 0, 1, 2] elif molecule_entry["final_molecule"]["charge"] < 0: self.expected_charges = [molecule_entry["final_molecule"]["charge"]-1, molecule_entry["final_molecule"]["charge"], molecule_entry["final_molecule"]["charge"]+1, molecule_entry["final_molecule"]["charge"]+2] else: self.expected_charges = [molecule_entry["final_molecule"]["charge"]-2, molecule_entry["final_molecule"]["charge"]-1, molecule_entry["final_molecule"]["charge"], molecule_entry["final_molecule"]["charge"]+1] # Build principle molecule graph self.mol_graph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(molecule_entry["final_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) # Loop through bonds, aka graph edges, and fragment and process: for bond in self.mol_graph.graph.edges: bonds = [(bond[0],bond[1])] self.fragment_and_process(bonds) # If mulitbreak, loop through pairs of ring bonds. if multibreak: print("Breaking pairs of ring bonds. WARNING: Structure changes much more likely, meaning dissociation values are less reliable! This is a bad idea!") self.bond_pairs = [] for ii,bond in enumerate(self.ring_bonds): for jj in range(ii+1,len(self.ring_bonds)): bond_pair = [bond, self.ring_bonds[jj]] self.bond_pairs += [bond_pair] for bond_pair in self.bond_pairs: self.fragment_and_process(bond_pair)
def get_molecule_workflow(self, path, mol_id, name_pre="molecule_opt_freq", qchem_cmd="qchem -slurm", max_cores=32, qchem_input_params=None, modify_mol=True, max_iterations=3, max_perturb_scale=0.3): """ Generates a Fireworks Workflow to optimize a molecular geometry and perform a vibrational analysis (frequency calculation) in Q-Chem. :param path: Specified (sub)path in which to run the reaction. By default, this is None, and the Fireworks will run in self.base_dir :param mol_id: str representing the unique molecule identifier :param name_pre: str indicating the prefix which should be used for all Firework names :param qchem_cmd: str indicating how the Q-Chem code should be called. Default is "qchem -slurm", for a SLURM-based system. :param max_cores: int specifying how many cores the workflow should be split over. Default is 32. :param qchem_input_params: dict listing all parameters differing from default values. :param modify_mol: If True (default), use utility get_molecule to modify, including adding implicit hydrogens and performing an initial optimization. :param max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 3. :param max_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. :return: Workflow """ fws = [] base_path = join(self.base_dir, path, mol_id) files = [ f for f in listdir(base_path) if isfile(join(base_path, f)) and f.startswith(mol_id) and f.endswith(".mol") ] if len(files) > 1: print("Multiple valid molecule files found.") print("Generating workflows for all valid files found.") for i, file in enumerate(files): if modify_mol: mol = get_molecule(join(base_path, file)) else: mol = Molecule.from_file(join(base_path, file)) filename = file.split(".")[0] dir_name = join(base_path, "{}_{}".format(filename, i)) try: mkdir(dir_name) except FileExistsError: print("Subdirectory {} already exists".format(dir_name)) fw = FrequencyFlatteningOptimizeFW( molecule=mol, name=name_pre + "_{}".format(mol_id), qchem_cmd=qchem_cmd, qchem_input_params=qchem_input_params, multimode="openmp", max_cores=max_cores, directory=join(base_path), max_iterations=max_iterations, max_molecule_perturb_scale=max_perturb_scale, db_file=self.db_file) fws.append(fw) elif len(files) == 0: raise RuntimeError("No valid files found.") else: file = files[0] entry = self.molecules.find_one({"mol_id": mol_id}) if entry is None: mol = get_molecule(join(base_path, file)) else: geometry = entry["output"].get( 'optimized_molecule', entry["output"].get('initial_molecule')) mol = Molecule.from_dict(geometry) fw = FrequencyFlatteningOptimizeFW( molecule=mol, name=name_pre + "_{}".format(mol_id), qchem_cmd=qchem_cmd, qchem_input_params=qchem_input_params, multimode="openmp", max_cores=max_cores, directory=base_path, max_iterations=3, db_file=self.db_file) fws.append(fw) return Workflow(fws)
def optimize_successful_ts( db: CatDB, lp: LaunchPad, query: Optional[Dict] = None, num_run: Optional[int] = None, with_critic: Optional[bool] = False, qchem_cmd: Optional[str] = ">>qchem_cmd<<", max_cores: Optional[Union[str, int]] = ">>max_cores<<", multimode: Optional[str] = ">>multimode<<", qchem_input_params: Optional[Dict] = None, db_file: Optional[str] = ">>db_file<<", tags: Optional[Dict] = None ): if query is None: success_query = {"completed": True, "run_atomate": {"$ne": True}} else: success_query = query success_query["completed"] = True success_query["run_atomate"] = {"$ne": True} possible_entries = [e for e in db.database[db.data_collection].find( success_query, {"_id": 0, "rxnid": 1, "output": 1} )] if num_run is not None: if num_run < len(possible_entries): possible_entries = possible_entries[:num_run] for entry in possible_entries: rxnid = entry["rxnid"] entry_names = list(entry["output"]["optimized_structure_energies"].keys()) ts_structures = [ Molecule.from_dict(e) for i, e in enumerate(entry["output"]["path_molecules"]) if "transition_state" in entry_names[i] ] if with_critic: for i, ts in enumerate(ts_structures): name = "rxn_{}:ts_{}".format(rxnid, i + 1) wf = get_wf_FFTSopt_and_critic(ts, name, qchem_input_params=qchem_input_params, db_file=db_file) if tags is not None: wf = add_tags(wf, tags) lp.add_wf(wf) else: for i, ts in enumerate(ts_structures): name = "rxn_{}:ts_{}".format(rxnid, i + 1) fw = FrequencyFlatteningTransitionStateFW( molecule=ts, name=name, qchem_cmd=qchem_cmd, multimode=multimode, max_cores=max_cores, qchem_input_params=qchem_input_params, linked=True, freq_before_opt=True, db_file=db_file ) wf = Workflow([fw], name=name) if tags is not None: wf = add_tags(wf, tags) lp.add_wf(wf) time_now = datetime.datetime.now(datetime.timezone.utc) db.database[db.data_collection].update_one({"rxnid": rxnid}, {"$set": {"run_atomate": True, "updated_on": time_now}})
def test_to_from_dict(self): d = self.mol.to_dict mol2 = Molecule.from_dict(d) self.assertEqual(type(mol2), Molecule)
def get_reaction_workflow(self, rxn_id, mol_dir=None, name_pre="reaction_opt_freq", qchem_cmd="qchem -slurm", max_cores=32, qchem_input_params=None, max_iterations=3, max_perturb_scale=0.3): """ Generates a Fireworks Workflow to perform geometry optimizations and vibrational analyses on all of the molecules involved in a chemical reaction. :param rxn_id: str representing unique reaction identifier. :param mol_dir: str indicating a subdirectory (from self.base_dir) where molecule calculations should be stored. Default is None, indicating that all calculations should be done within self.base_dir. :param name_pre: str indicating the prefix which should be used for all Firework names :param qchem_cmd: str indicating how the Q-Chem code should be called. Default is "qchem -slurm", for a SLURM-based system. :param max_cores: int specifying how many cores the workflow should be split over. Default is 32. :param qchem_input_params: dict listing all parameters differing from default values. :param max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 3. :param max_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. :return: Workflow """ fws = [] if mol_dir is not None: base_path = join(self.base_dir, mol_dir) else: base_path = self.base_dir mol_dirs = [ d for d in listdir(base_path) if isdir(join(base_path, d)) and not ("atomate" in d) ] rxn = self.reactions.find_one({"rxn_id": rxn_id}) if rxn is None: raise RuntimeError( "No reaction with id {} found in database.".format(rxn_id)) mol_ids = [str(i) for i in rxn["pro_ids"] + rxn["rct_ids"]] for mol_id in mol_ids: mol_path = join(base_path, mol_id) if mol_id not in mol_dirs: os.mkdir(mol_path) os.chdir(mol_path) # Search for molecule in previous calculations result = self.molecules.find_one({"mol_id": mol_id}) if result is None: mol_files = [ f for f in listdir(mol_path) if isfile(join(mol_path, f)) and f.endswith(".mol") ] if len(mol_files) == 0: raise RuntimeError("Molecule not found in database or file" " system.") elif len(mol_files) > 1: print("More than one valid *.mol file available.") print("Selecting one for analysis.") mol = get_molecule(join(mol_path, mol_files[0])) else: entry = result["output"].get( 'optimized_molecule', result["output"].get('initial_molecule')) mol = Molecule.from_dict(entry) fw = FrequencyFlatteningOptimizeFW( molecule=mol, name=name_pre + "_{}".format(mol_id), qchem_cmd=qchem_cmd, qchem_input_params=qchem_input_params, multimode="openmp", max_cores=max_cores, directory=mol_path, max_iterations=max_iterations, max_molecule_perturb_scale=max_perturb_scale, db_file=self.db_file) fws.append(fw) return Workflow(fws)