def test_assimilate_unstable_opt(self): drone = QChemDrone( runs=[ "opt_0", "freq_0", "opt_1", "freq_1", "opt_2", "freq_2", "opt_3", "freq_3" ], additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate( path=os.path.join(module_dir, "..", "test_files", "2620_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "opt") self.assertEqual(doc["output"]["final_energy"], "unstable") self.assertEqual(doc["smiles"], "[S](=O)[N]S[C]") self.assertEqual(doc["state"], "unsuccessful") self.assertEqual(doc["num_frequencies_flattened"], 0) self.assertEqual(doc["walltime"], None) self.assertEqual(doc["cputime"], None) self.assertEqual(doc["formula_pretty"], "CS2NO") self.assertEqual(doc["formula_anonymous"], "ABCD2") self.assertEqual(doc["chemsys"], "C-N-O-S") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) self.assertEqual(doc["orig"]["molecule"], doc["calcs_reversed"][-1]["input"]["molecule"]) orig_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) initial_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), True)
def test_assimilate_opt_with_hidden_changes_from_handler(self): drone = QChemDrone(additional_fields={"special_run_type": "frequency_flattener"}) doc = drone.assimilate( path=os.path.join(module_dir, "..", "test_files", "1746_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "freq") self.assertEqual(doc["output"]["final_energy"], -303.835532370106) self.assertEqual(doc["smiles"], "O1C(=CC1=O)[CH]") self.assertEqual(doc["state"], "successful") self.assertEqual(doc["num_frequencies_flattened"], 0) self.assertEqual(doc["walltime"], 631.54) self.assertEqual(doc["cputime"], 7471.17) self.assertEqual(doc["formula_pretty"], "HC2O") self.assertEqual(doc["formula_anonymous"], "ABC2") self.assertEqual(doc["chemsys"], "C-H-O") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) orig_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) initial_molgraph = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), False)
def test_from_molecule_graph(self): graph = MoleculeGraph.with_empty_graph(self.mol) adaptor = BabelMolAdaptor.from_molecule_graph(graph) obmol = adaptor.openbabel_mol self.assertEqual(obmol.NumAtoms(), 5) mol = adaptor.pymatgen_mol self.assertEqual(mol.formula, "H4 C1")
def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat) self.extractor = FunctionalGroupExtractor(self.mg)
def test_babel_PC_defaults(self): fragmenter = Fragmenter(molecule=self.pc) self.assertEqual(fragmenter.open_rings, False) self.assertEqual(fragmenter.opt_steps, 10000) default_mol_graph = MoleculeGraph.with_local_env_strategy( self.pc, OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(fragmenter.mol_graph, default_mol_graph) self.assertEqual(fragmenter.total_unique_fragments, 8)
def test_babel_PC_old_defaults(self): fragmenter = Fragmenter(molecule=self.pc, open_rings=True) self.assertEqual(fragmenter.open_rings, True) self.assertEqual(fragmenter.opt_steps, 10000) default_mol_graph = MoleculeGraph.with_local_env_strategy( self.pc, OpenBabelNN()) self.assertEqual(fragmenter.mol_graph, default_mol_graph) self.assertEqual(fragmenter.total_unique_fragments, 13)
def test_babel_PC_defaults(self): pytest.importorskip("openbabel", reason="OpenBabel not installed") fragmenter = Fragmenter(molecule=self.pc) self.assertEqual(fragmenter.open_rings, False) self.assertEqual(fragmenter.opt_steps, 10000) default_mol_graph = MoleculeGraph.with_local_env_strategy(self.pc, OpenBabelNN()) self.assertEqual(fragmenter.mol_graph, default_mol_graph) self.assertEqual(fragmenter.total_unique_fragments, 8)
def filter_fragment_entries(self,fragment_entries): self.filtered_entries = [] for entry in fragment_entries: # Check and make sure that PCM dielectric is consistent with principle: if "pcm_dielectric" in self.molecule_entry: if "pcm_dielectric" not in entry: raise RuntimeError("Principle molecule has a PCM dielectric of " + str(self.molecule_entry["pcm_dielectric"]) + " but a fragment entry has no PCM dielectric! Please only pass fragment entries with PCM details consistent with the principle entry. Exiting...") elif entry["pcm_dielectric"] != self.molecule_entry["pcm_dielectric"]: raise RuntimeError("Principle molecule has a PCM dielectric of " + str(self.molecule_entry["pcm_dielectric"]) + " but a fragment entry has a different PCM dielectric! Please only pass fragment entries with PCM details consistent with the principle entry. Exiting...") # Build initial and final molgraphs: entry["initial_molgraph"] = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(entry["initial_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) entry["final_molgraph"] = MoleculeGraph.with_local_env_strategy(Molecule.from_dict(entry["final_molecule"]), OpenBabelNN(), reorder=False, extend_structure=False) # Classify any potential structural change that occured during optimization: if entry["initial_molgraph"].isomorphic_to(entry["final_molgraph"]): entry["structure_change"] = "no_change" else: initial_graph = entry["initial_molgraph"].graph final_graph = entry["final_molgraph"].graph if nx.is_connected(initial_graph.to_undirected()) and not nx.is_connected(final_graph.to_undirected()): entry["structure_change"] = "unconnected_fragments" elif final_graph.number_of_edges() < initial_graph.number_of_edges(): entry["structure_change"] = "fewer_bonds" elif final_graph.number_of_edges() > initial_graph.number_of_edges(): entry["structure_change"] = "more_bonds" else: entry["structure_change"] = "bond_change" found_similar_entry = False # Check for uniqueness for ii,filtered_entry in enumerate(self.filtered_entries): if filtered_entry["formula_pretty"] == entry["formula_pretty"]: if filtered_entry["initial_molgraph"].isomorphic_to(entry["initial_molgraph"]) and filtered_entry["final_molgraph"].isomorphic_to(entry["final_molgraph"]) and filtered_entry["initial_molecule"]["charge"] == entry["initial_molecule"]["charge"]: found_similar_entry = True # If two entries are found that pass the above similarity check, take the one with the lower energy: if entry["final_energy"] < filtered_entry["final_energy"]: self.filtered_entries[ii] = entry # Note that this will essentially choose between singlet and triplet entries assuming both have the same structural details break if not found_similar_entry: self.filtered_entries += [entry]
def from_molecule_document( cls, mol_doc: Dict, correction: float = 0.0, parameters: Optional[Dict] = None, attribute=None, ): """ Initialize a MoleculeEntry from a molecule document. Args: mol_doc: MongoDB molecule document (nested dictionary) that contains the molecule information. correction: A correction to be applied to the energy. This is used to modify the energy for certain analyses. Defaults to 0.0. parameters: An optional dict of parameters associated with the molecule. Defaults to None. attribute: Optional attribute of the entry. This can be used to specify that the entry is a newly found compound, or to specify a particular label for the entry, or else ... Used for further analysis and plotting purposes. An attribute can be anything but must be MSONable. """ try: if isinstance(mol_doc["molecule"], Molecule): molecule = mol_doc["molecule"] else: molecule = Molecule.from_dict( mol_doc["molecule"]) # type: ignore energy = mol_doc["energy_Ha"] enthalpy = mol_doc["enthalpy_kcal/mol"] entropy = mol_doc["entropy_cal/molK"] entry_id = mol_doc["task_id"] except KeyError as e: raise MoleculeEntryError( "Unable to construct molecule entry from molecule document; missing " f"attribute {e} in `mol_doc`.") if "mol_graph" in mol_doc: if isinstance(mol_doc["mol_graph"], MoleculeGraph): mol_graph = mol_doc["mol_graph"] else: mol_graph = MoleculeGraph.from_dict(mol_doc["mol_graph"]) else: mol_graph = None return cls( molecule=molecule, energy=energy, correction=correction, enthalpy=enthalpy, entropy=entropy, parameters=parameters, entry_id=entry_id, attribute=attribute, mol_graph=mol_graph, )
def test_babel_PC_defaults(self): fragmenter = Fragmenter(molecule=self.pc) self.assertEqual(fragmenter.open_rings,True) self.assertEqual(fragmenter.opt_steps,10000) default_mol_graph = MoleculeGraph.with_local_env_strategy(self.pc, OpenBabelNN(), reorder=False, extend_structure=False) self.assertEqual(fragmenter.mol_graph,default_mol_graph) self.assertEqual(len(fragmenter.unique_fragments), 13) self.assertEqual(len(fragmenter.unique_fragments_from_ring_openings), 5)
def test_edges_given_PC_not_defaults(self): fragmenter = Fragmenter(molecule=self.pc, edges=self.pc_edges, depth=2, open_rings=False, opt_steps=0) self.assertEqual(fragmenter.open_rings,False) self.assertEqual(fragmenter.opt_steps,0) edges = {(e[0], e[1]): None for e in self.pc_edges} default_mol_graph = MoleculeGraph.with_edges(self.pc, edges=edges) self.assertEqual(fragmenter.mol_graph,default_mol_graph) self.assertEqual(len(fragmenter.unique_fragments), 20) self.assertEqual(len(fragmenter.unique_fragments_from_ring_openings), 0)
def run_task(self, fw_spec): input_file = os.path.join(self.get("write_to_dir", ""), self.get("input_file", "mol.qin")) # if a molecule is being passed through fw_spec if fw_spec.get("prev_calc_molecule"): prev_calc_mol = fw_spec.get("prev_calc_molecule") # if a molecule is also passed as an optional parameter if self.get("molecule"): mol = self.get("molecule") # check if mol and prev_calc_mol are isomorphic mol_graph = MoleculeGraph.with_local_env_strategy( mol, OpenBabelNN(), reorder=False, extend_structure=False) prev_mol_graph = MoleculeGraph.with_local_env_strategy( prev_calc_molecule, OpenBabelNN(), reorder=False, extend_structure=False, ) if mol_graph.isomorphic_to(prev_mol_graph): mol = prev_calc_mol else: print( "WARNING: Molecule from spec is not isomorphic to passed molecule!" ) else: mol = prev_calc_mol elif self.get("molecule"): mol = self.get("molecule") else: raise KeyError( "No molecule present, add as an optional param or check fw_spec" ) # in the current structure there needs to be a statement for every optional QChem section # the code below defaults the section to None if the variable is not passed opt = self.get("opt", None) pcm = self.get("pcm", None) solvent = self.get("solvent", None) qcin = QCInput(molecule=mol, rem=self["rem"], opt=opt, pcm=pcm, solvent=solvent) qcin.write_file(input_file)
def test_get_disconnected(self): disconnected = Molecule( ["C", "H", "H", "H", "H", "He"], [ [0.0000, 0.0000, 0.0000], [-0.3633, -0.5138, -0.8900], [1.0900, 0.0000, 0.0000], [-0.3633, 1.0277, 0.0000], [-0.3633, -0.5138, -0.8900], [5.0000, 5.0000, 5.0000], ], ) no_he = Molecule( ["C", "H", "H", "H", "H"], [ [0.0000, 0.0000, 0.0000], [-0.3633, -0.5138, -0.8900], [1.0900, 0.0000, 0.0000], [-0.3633, 1.0277, 0.0000], [-0.3633, -0.5138, -0.8900], ], ) just_he = Molecule(["He"], [[5.0000, 5.0000, 5.0000]]) dis_mg = MoleculeGraph.with_empty_graph(disconnected) dis_mg.add_edge(0, 1) dis_mg.add_edge(0, 2) dis_mg.add_edge(0, 3) dis_mg.add_edge(0, 4) fragments = dis_mg.get_disconnected_fragments() self.assertEqual(len(fragments), 2) self.assertEqual(fragments[0].molecule, no_he) self.assertEqual(fragments[1].molecule, just_he) con_mg = MoleculeGraph.with_empty_graph(no_he) con_mg.add_edge(0, 1) con_mg.add_edge(0, 2) con_mg.add_edge(0, 3) con_mg.add_edge(0, 4) fragments = con_mg.get_disconnected_fragments() self.assertEqual(len(fragments), 1)
def __init__( self, molecule: Molecule, energy: float, correction: float = 0.0, enthalpy: Optional[float] = None, entropy: Optional[float] = None, parameters: Optional[Dict] = None, entry_id: Optional[Any] = None, attribute=None, mol_doc: Optional[Dict] = None, mol_graph: Optional[MoleculeGraph] = None, ): self.uncorrected_energy = energy self.correction = correction self.enthalpy = enthalpy self.entropy = entropy self.parameters = parameters if parameters else {} self.entry_id = entry_id self.attribute = attribute self.mol_doc = mol_doc if mol_doc else {} self.mol_graph = mol_graph if self.mol_doc != {}: self.enthalpy = self.mol_doc["enthalpy_kcal/mol"] self.entropy = self.mol_doc["entropy_cal/molK"] self.entry_id = self.mol_doc["task_id"] if "mol_graph" in self.mol_doc: if isinstance(self.mol_doc["mol_graph"], MoleculeGraph): self.mol_graph = self.mol_doc["mol_graph"] else: self.mol_graph = MoleculeGraph.from_dict( self.mol_doc["mol_graph"]) else: mol_graph = MoleculeGraph.with_local_env_strategy( molecule, OpenBabelNN()) self.mol_graph = metal_edge_extender(mol_graph) else: if self.mol_graph is None: mol_graph = MoleculeGraph.with_local_env_strategy( molecule, OpenBabelNN()) self.mol_graph = metal_edge_extender(mol_graph)
def read_molecules(self): if self.format == "graph": if self.charge_file is not None: warnings.warn( f"charge file {self.charge_file} ignored for format `graph`" ) file_type = self.molecule_file.suffix if file_type == ".json": with open(self.molecule_file, "r") as f: mol_graph_dicts = json.load(f) elif file_type in [".yaml", ".yml"]: mol_graph_dicts = yaml_load(self.molecule_file) else: supported = [".json", ".yaml", ".yml"] raise ValueError( f"File extension of {self.molecule_file} not supported; " f"supported are: {supported}.") mol_graphs = [MoleculeGraph.from_dict(d) for d in mol_graph_dicts] molecules = [ MoleculeWrapper(g, id=str(i)) for i, g in enumerate(mol_graphs) ] else: # read rdkit mols rdkit_mols = read_rdkit_mols_from_file(self.molecule_file, self.format) # read charge file if self.charge_file is None: charges = [0] * len(rdkit_mols) else: charges = read_charge(self.charge_file) msg = ( f"expect the number of molecules given in {self.molecule_file} " f"and the number of charges given in {self.charge_file} to be " f"the same, but got {len(rdkit_mols)} and f{len(charges)}. " ) assert len(rdkit_mols) == len(charges), msg # convert rdkit mols to wrapper molecules identifiers = [ m.GetProp("_Name") + f"_index-{i}" if m is not None else None for i, m in enumerate(rdkit_mols) ] molecules = rdkit_mols_to_wrapper_mols(rdkit_mols, identifiers, charges, nprocs=self.nprocs) self._molecules = molecules return molecules
def setUp(self): warnings.simplefilter("ignore") self.file = os.path.join(test_dir, "func_group_test.mol") self.mol = Molecule.from_file(self.file) self.strat = OpenBabelNN() self.mg = MoleculeGraph.with_local_env_strategy(self.mol, self.strat, reorder=False, extend_structure=False) self.extractor = FunctionalGroupExtractor(self.mg)
def convert_to_critic_mol_graph(self): bonds = dict() try: for key, val in self.critic["bonding"].items(): idx = val["atom_ids"] idx = tuple([int(i) - 1 for i in idx]) bonds[idx] = None except KeyError as e: print(self.__class__.__name__, e, "critic bonding", self.id) raise UnsuccessfulEntryError self.mol_graph = MoleculeGraph.with_edges(self.pymatgen_mol, bonds)
def test_build_unique_relevant_molecules_with_triplets(self): ft = FragmentMolecule(molecule=self.pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1323) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pos_pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1, 2] ft.do_triplets = True mol_graph = MoleculeGraph.with_edges(self.pos_pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1770) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pos_pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pos_pc_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 54) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_frag1_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_frag1_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols)
def test_assimilate_unstable_opt(self): drone = QChemDrone( runs=[ "opt_0", "freq_0", "opt_1", "freq_1", "opt_2", "freq_2", "opt_3", "freq_3", ], additional_fields={"special_run_type": "frequency_flattener"}, ) doc = drone.assimilate( path=os.path.join(module_dir, "..", "test_files", "2620_complete"), input_file="mol.qin", output_file="mol.qout", multirun=False, ) self.assertEqual(doc["input"]["job_type"], "opt") self.assertEqual(doc["output"]["job_type"], "opt") self.assertEqual(doc["output"]["final_energy"], "unstable") self.assertEqual(doc["smiles"], "[S](=O)[N]S[C]") self.assertEqual(doc["state"], "unsuccessful") self.assertEqual(doc["walltime"], None) self.assertEqual(doc["cputime"], None) self.assertEqual(doc["formula_pretty"], "CS2NO") self.assertEqual(doc["formula_anonymous"], "ABCD2") self.assertEqual(doc["chemsys"], "C-N-O-S") self.assertEqual(doc["pointgroup"], "C1") self.assertEqual(doc["orig"]["rem"], doc["calcs_reversed"][-1]["input"]["rem"]) self.assertEqual(doc["orig"]["molecule"], doc["calcs_reversed"][-1]["input"]["molecule"]) orig_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["orig"]["molecule"]), OpenBabelNN()) initial_molgraph = MoleculeGraph.with_local_env_strategy( Molecule.from_dict(doc["input"]["initial_molecule"]), OpenBabelNN()) self.assertEqual(orig_molgraph.isomorphic_to(initial_molgraph), True)
def find_mmtypes(molgraph: MoleculeGraph, uff_lib: pandas.DataFrame, uff_symbs: List[str]) -> List[str]: """ [summary] Parameters ---------- molgraph : MoleculeGraph [description] uff_lib : pandas.DataFrame [description] uff_symbs : List[str] [description] Returns ------- List[str] [description] """ mmtypes = [] for i, symb in enumerate(uff_symbs): conn = molgraph.get_connected_sites(i) ncoord = len(conn) atom_compat = uff_lib[uff_lib.symbol.str.startswith(symb)] molgraph.molecule[i].properties["ufftype"] = atom_compat.symbol.values[ 0] if len(atom_compat) == 1: continue if ncoord >= 2: c0 = molgraph.molecule.sites[i].coords # get the two closest sites s1, s2 = sorted(conn, key=lambda k: k.dist)[:2] c1, c2 = s1.site.coords, s2.site.coords c01 = c1 - c0 c02 = c2 - c0 cosine_angle = numpy.dot( c01, c02) / (numpy.linalg.norm(c01) * numpy.linalg.norm(c02)) angle = numpy.degrees(numpy.arccos(cosine_angle)) else: angle = 180.0 coordinations_compat = atom_compat[atom_compat.coordination == ncoord] if len(coordinations_compat) == 1: molgraph.molecule[i].properties[ "ufftype"] = coordinations_compat.symbol.values[0] continue elif len(coordinations_compat) == 0: # problem with the coordinations. use angles coordinations_compat = atom_compat coordinations_compat["angle_diff"] = coordinations_compat.angle - angle best_angle = coordinations_compat.sort_values(by='angle_diff').iloc[0] # TODO: if < 10% diff in angle error, use radii error mmtypes.append(best_angle.symbol) return mmtypes
def open_ring(mol_graph, bond, opt_steps): """ Function to actually open a ring using OpenBabel's local opt. Given a molecule graph and a bond, convert the molecule graph into an OpenBabel molecule, remove the given bond, perform the local opt with the number of steps determined by self.steps, and then convert the resulting structure back into a molecule graph to be returned. """ obmol = BabelMolAdaptor.from_molecule_graph(mol_graph) obmol.remove_bond(bond[0][0]+1, bond[0][1]+1) obmol.localopt(steps=opt_steps) return MoleculeGraph.with_local_env_strategy(obmol.pymatgen_mol, OpenBabelNN(), reorder=False, extend_structure=False)
def test_edges_given_PC_not_defaults(self): fragmenter = Fragmenter(molecule=self.pc, edges=self.pc_edges, depth=2, open_rings=False, opt_steps=0) self.assertEqual(fragmenter.open_rings, False) self.assertEqual(fragmenter.opt_steps, 0) edges = {(e[0], e[1]): None for e in self.pc_edges} default_mol_graph = MoleculeGraph.with_edges(self.pc, edges=edges) self.assertEqual(fragmenter.mol_graph, default_mol_graph) self.assertEqual(fragmenter.total_unique_fragments, 20)
def open_ring(mol_graph, bond, opt_steps): """ Function to actually open a ring using OpenBabel's local opt. Given a molecule graph and a bond, convert the molecule graph into an OpenBabel molecule, remove the given bond, perform the local opt with the number of steps determined by self.steps, and then convert the resulting structure back into a molecule graph to be returned. """ obmol = BabelMolAdaptor.from_molecule_graph(mol_graph) obmol.remove_bond(bond[0][0] + 1, bond[0][1] + 1) obmol.localopt(steps=opt_steps, forcefield='uff') return MoleculeGraph.with_local_env_strategy(obmol.pymatgen_mol, OpenBabelNN())
def zero_d_graph_to_molecule_graph(bonded_structure, graph): """ Converts a zero-dimensional networkx Graph object into a MoleculeGraph. Implements a similar breadth-first search to that in calculate_dimensionality_of_site(). Args: bonded_structure (StructureGraph): A structure with bonds, represented as a pymatgen structure graph. For example, generated using the CrystalNN.get_bonded_structure() method. graph (nx.Graph): A networkx `Graph` object for the component of interest. Returns: (MoleculeGraph): A MoleculeGraph object of the component. """ import networkx as nx seen_indices = [] sites = [] start_index = list(graph.nodes())[0] queue = [(start_index, (0, 0, 0), bonded_structure.structure[start_index])] while len(queue) > 0: comp_i, image_i, site_i = queue.pop(0) if comp_i in [x[0] for x in seen_indices]: raise ValueError("Graph component is not 0D") seen_indices.append((comp_i, image_i)) sites.append(site_i) for site_j in bonded_structure.get_connected_sites(comp_i, jimage=image_i): if (site_j.index, site_j.jimage) not in seen_indices and ( site_j.index, site_j.jimage, site_j.site, ) not in queue: queue.append((site_j.index, site_j.jimage, site_j.site)) # sort the list of indices and the graph by index to make consistent indices_ordering = np.argsort([x[0] for x in seen_indices]) sorted_sites = np.array(sites, dtype=object)[indices_ordering] sorted_graph = nx.convert_node_labels_to_integers(graph, ordering="sorted") mol = Molecule([s.specie for s in sorted_sites], [s.coords for s in sorted_sites]) mol_graph = MoleculeGraph.with_edges(mol, nx.Graph(sorted_graph).edges()) return mol_graph
def _check_for_structure_changes(self): initial_mol_graph = MoleculeGraph.with_local_env_strategy(self.data["initial_molecule"], OpenBabelNN(), reorder=False, extend_structure=False) initial_graph = initial_mol_graph.graph last_mol_graph = MoleculeGraph.with_local_env_strategy(self.data["molecule_from_last_geometry"], OpenBabelNN(), reorder=False, extend_structure=False) last_graph = last_mol_graph.graph if initial_mol_graph.isomorphic_to(last_mol_graph): self.data["structure_change"] = "no_change" else: if nx.is_connected(initial_graph.to_undirected()) and not nx.is_connected(last_graph.to_undirected()): self.data["structure_change"] = "unconnected_fragments" elif last_graph.number_of_edges() < initial_graph.number_of_edges(): self.data["structure_change"] = "fewer_bonds" elif last_graph.number_of_edges() > initial_graph.number_of_edges(): self.data["structure_change"] = "more_bonds" else: self.data["structure_change"] = "bond_change"
def setUpClass(cls) -> None: if ob: cls.LiEC_reextended_entries = [] entries = loadfn( os.path.join(test_dir, "LiEC_reextended_entries.json")) for entry in entries: if "optimized_molecule" in entry["output"]: mol = entry["output"]["optimized_molecule"] else: mol = entry["output"]["initial_molecule"] E = float(entry["output"]["final_energy"]) H = float(entry["output"]["enthalpy"]) S = float(entry["output"]["entropy"]) mol_entry = MoleculeEntry( molecule=mol, energy=E, enthalpy=H, entropy=S, entry_id=entry["task_id"], ) if mol_entry.formula == "Li1": if mol_entry.charge == 1: cls.LiEC_reextended_entries.append(mol_entry) else: cls.LiEC_reextended_entries.append(mol_entry) EC_mg = MoleculeGraph.with_local_env_strategy( Molecule.from_file(os.path.join(test_dir, "EC.xyz")), OpenBabelNN()) cls.EC_mg = metal_edge_extender(EC_mg) cls.EC_0_entry = None cls.EC_minus_entry = None cls.EC_1_entry = None for entry in cls.LiEC_reextended_entries: if (entry.formula == "C3 H4 O3" and entry.charge == 0 and entry.num_bonds == 10 and cls.EC_mg.isomorphic_to(entry.mol_graph)): cls.EC_0_entry = entry elif (entry.formula == "C3 H4 O3" and entry.charge == -1 and entry.num_bonds == 10 and cls.EC_mg.isomorphic_to(entry.mol_graph)): cls.EC_minus_entry = entry elif (entry.formula == "C3 H4 O3" and entry.charge == 1 and entry.num_bonds == 10 and cls.EC_mg.isomorphic_to(entry.mol_graph)): cls.EC_1_entry = entry if (cls.EC_0_entry is not None and cls.EC_minus_entry is not None and cls.EC_1_entry is not None): break
def test_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() ft.all_relevant_docs = list() new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 36)
def zero_d_graph_to_molecule_graph(bonded_structure, graph): """ Converts a zero-dimensional networkx Graph object into a MoleculeGraph. Implements a similar breadth-first search to that in calculate_dimensionality_of_site(). Args: bonded_structure (StructureGraph): A structure with bonds, represented as a pymatgen structure graph. For example, generated using the CrystalNN.get_bonded_structure() method. graph (nx.Graph): A networkx `Graph` object for the component of interest. Returns: (MoleculeGraph): A MoleculeGraph object of the component. """ import networkx as nx seen_indices = [] sites = [] start_index = list(graph.nodes())[0] queue = [(start_index, (0, 0, 0), bonded_structure.structure[start_index])] while len(queue) > 0: comp_i, image_i, site_i = queue.pop(0) if comp_i in [x[0] for x in seen_indices]: raise ValueError("Graph component is not 0D") seen_indices.append((comp_i, image_i)) sites.append(site_i) for site_j in bonded_structure.get_connected_sites( comp_i, jimage=image_i): if ((site_j.index, site_j.jimage) not in seen_indices and (site_j.index, site_j.jimage, site_j.site) not in queue): queue.append((site_j.index, site_j.jimage, site_j.site)) # sort the list of indices and the graph by index to make consistent indices_ordering = np.argsort([x[0] for x in seen_indices]) sorted_sites = np.array(sites, dtype=object)[indices_ordering] sorted_graph = nx.convert_node_labels_to_integers(graph, ordering="sorted") mol = Molecule([s.specie for s in sorted_sites], [s.coords for s in sorted_sites]) mol_graph = MoleculeGraph.with_edges(mol, nx.Graph(sorted_graph).edges()) return mol_graph
def build_MoleculeGraph(molecule, edges=None): if edges == None: edges = edges_from_babel(molecule) mol_graph = MoleculeGraph.with_empty_graph(molecule) for edge in edges: mol_graph.add_edge(edge[0], edge[1]) mol_graph.graph = mol_graph.graph.to_undirected() species = {} coords = {} for node in mol_graph.graph: species[node] = mol_graph.molecule[node].specie.symbol coords[node] = mol_graph.molecule[node].coords nx.set_node_attributes(mol_graph.graph, species, "specie") nx.set_node_attributes(mol_graph.graph, coords, "coords") return mol_graph
def test_in_database_through_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() docs = loadfn(os.path.join(module_dir, "doc.json")) for doc in docs: doc["input"]["initial_molecule"] = doc["input"]["initial_molecule"].as_dict() ft.all_relevant_docs = docs new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 29)
def mol_to_mol_graph(molecule: Union[Molecule, MoleculeGraph]): """ Convert a Molecule to a MoleculeGraph using a default connectivity algorithm. Args: molecule (Molecule): Molecule to be converted Returns: mol_graph: MoleculeGraph """ if isinstance(molecule, MoleculeGraph): return molecule else: mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) return metal_edge_extender(mol_graph)
def test_substitute(self): molecule = FunctionalGroups["methyl"] molgraph = MoleculeGraph.with_edges( molecule, { (0, 1): { "weight": 1 }, (0, 2): { "weight": 1 }, (0, 3): { "weight": 1 } }, ) eth_mol = copy.deepcopy(self.ethylene) eth_str = copy.deepcopy(self.ethylene) # Ensure that strings and molecules lead to equivalent substitutions eth_mol.substitute_group(5, molecule, MinimumDistanceNN) eth_str.substitute_group(5, "methyl", MinimumDistanceNN) self.assertEqual(eth_mol, eth_str) graph_dict = { (0, 1): { "weight": 1.0 }, (0, 2): { "weight": 1.0 }, (0, 3): { "weight": 1.0 }, } eth_mg = copy.deepcopy(self.ethylene) eth_graph = copy.deepcopy(self.ethylene) # Check that MoleculeGraph input is handled properly eth_graph.substitute_group(5, molecule, MinimumDistanceNN, graph_dict=graph_dict) eth_mg.substitute_group(5, molgraph, MinimumDistanceNN) self.assertEqual(eth_graph.graph.get_edge_data(5, 6)[0]["weight"], 1.0) self.assertEqual(eth_mg, eth_graph)
def test_metal_edge_extender(self): mol_graph = MoleculeGraph.with_edges(molecule=self.LiEC, edges={ (0, 2): None, (0, 1): None, (1, 3): None, (1, 4): None, (2, 7): None, (2, 5): None, (2, 8): None, (3, 6): None, (4, 5): None, (5, 9): None, (5, 10): None }) self.assertEqual(len(mol_graph.graph.edges), 11) extended_mol_graph = metal_edge_extender(mol_graph) self.assertEqual(len(mol_graph.graph.edges), 12)
def rdkit_mol_to_wrapper_mol(m, charge=None, free_energy=None, identifier=None): """ Convert an rdkit molecule to a :class:`MoleculeWrapper` molecule. This constructs a molecule graph from the rdkit mol and assigns the rdkit mol to the molecule wrapper. Args: m (Chem.Mol): rdkit molecule charge (int): charge of the molecule. If None, inferred from the rdkit mol; otherwise, the provided charge will override the inferred. free_energy (float): free energy of the molecule identifier (str): (unique) identifier of the molecule Returns: MoleculeWrapper instance """ species = [a.GetSymbol() for a in m.GetAtoms()] # coords = m.GetConformer().GetPositions() # NOTE, the above way to get coords results in segfault on linux, so we use the # below workaround conformer = m.GetConformer() coords = [[x for x in conformer.GetAtomPosition(i)] for i in range(m.GetNumAtoms())] bonds = [[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] for b in m.GetBonds()] bonds = {tuple(sorted(b)): None for b in bonds} charge = Chem.GetFormalCharge(m) if charge is None else charge pymatgen_mol = pymatgen.Molecule(species, coords, charge) mol_graph = MoleculeGraph.with_edges(pymatgen_mol, bonds) if identifier is None: identifier = m.GetProp("_Name") mw = MoleculeWrapper(mol_graph, free_energy, identifier) mw.rdkit_mol = m return mw
def test_in_database_through_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() docs = loadfn(os.path.join(module_dir, "doc.json")) for doc in docs: doc["input"]["initial_molecule"] = doc["input"][ "initial_molecule"].as_dict() ft.all_relevant_docs = docs new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 29)
def create_LiEC_mol_graph(): bonds = [ (0, 2), (0, 1), (2, 5), (2, 8), (4, 1), (5, 4), (5, 10), (7, 2), (9, 5), (3, 6), (3, 1), ] bonds = {b: None for b in bonds} mol = create_LiEC_pymatgen_mol() mol_graph = MoleculeGraph.with_edges(mol, bonds) return mol_graph
def test_verify_with_graphs(self): ethane = Molecule.from_file(os.path.join(test_dir, "ethane.mol")) # Test bad bond formed with self.assertRaises(ValueError): bad_bond = GSMIsomerInput(molecule=ethane, bonds_formed=[(0, 1)], use_graph=True) # Test bad bond broken with self.assertRaises(ValueError): bad_bond = GSMIsomerInput(molecule=ethane, bonds_broken=[(1, 2)], use_graph=True) # Test bad angle with self.assertRaises(ValueError): bad_angle = GSMIsomerInput(molecule=ethane, angles=[(0, 1, 2)], use_graph=True) # Test bad torsion with self.assertRaises(ValueError): bad_torsion = GSMIsomerInput(molecule=ethane, torsions=[(0, 1, 2, 3)], use_graph=True) # Test bad out of plane bend with self.assertRaises(ValueError): bad_out_of_plane = GSMIsomerInput(molecule=ethane, out_of_planes=[(0, 1, 2, 3)], use_graph=True) # Test good good = GSMIsomerInput(molecule=ethane, bonds_formed=[(0, 7)], angles=[(1, 0, 4)], torsions=[(2, 0, 1, 6)], use_graph=True) mg = MoleculeGraph.with_local_env_strategy(ethane, OpenBabelNN()) self.assertEqual(mg, good.molecule_graph)
def test_build_unique_fragments(self): edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) unique_fragment_dict = mol_graph.build_unique_fragments() unique_fragments = [] for key in unique_fragment_dict: for fragment in unique_fragment_dict[key]: unique_fragments.append(fragment) self.assertEqual(len(unique_fragments), 295) nm = iso.categorical_node_match("specie", "ERROR") for ii in range(295): # Test that each fragment is unique for jj in range(ii + 1, 295): self.assertFalse( nx.is_isomorphic( unique_fragments[ii].graph, unique_fragments[jj].graph, node_match=nm, )) # Test that each fragment correctly maps between Molecule and graph self.assertEqual( len(unique_fragments[ii].molecule), len(unique_fragments[ii].graph.nodes), ) species = nx.get_node_attributes(unique_fragments[ii].graph, "specie") coords = nx.get_node_attributes(unique_fragments[ii].graph, "coords") mol = unique_fragments[ii].molecule for ss, site in enumerate(mol): self.assertEqual(str(species[ss]), str(site.specie)) self.assertEqual(coords[ss][0], site.coords[0]) self.assertEqual(coords[ss][1], site.coords[1]) self.assertEqual(coords[ss][2], site.coords[2]) # Test that each fragment is connected self.assertTrue( nx.is_connected(unique_fragments[ii].graph.to_undirected()))
def test_construction(self): edges_frag = {(e[0], e[1]): { "weight": 1.0 } for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, edges_frag) # dumpfn(mol_graph.as_dict(), os.path.join(module_dir,"pc_frag1_mg.json")) ref_mol_graph = loadfn(os.path.join(module_dir, "pc_frag1_mg.json")) self.assertEqual(mol_graph, ref_mol_graph) self.assertEqual(mol_graph.graph.adj, ref_mol_graph.graph.adj) for node in mol_graph.graph.nodes: self.assertEqual( mol_graph.graph.nodes[node]["specie"], ref_mol_graph.graph.nodes[node]["specie"], ) for ii in range(3): self.assertEqual( mol_graph.graph.nodes[node]["coords"][ii], ref_mol_graph.graph.nodes[node]["coords"][ii], ) edges_pc = {(e[0], e[1]): {"weight": 1.0} for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges_pc) # dumpfn(mol_graph.as_dict(), os.path.join(module_dir,"pc_mg.json")) ref_mol_graph = loadfn(os.path.join(module_dir, "pc_mg.json")) self.assertEqual(mol_graph, ref_mol_graph) self.assertEqual(mol_graph.graph.adj, ref_mol_graph.graph.adj) for node in mol_graph.graph: self.assertEqual( mol_graph.graph.nodes[node]["specie"], ref_mol_graph.graph.nodes[node]["specie"], ) for ii in range(3): self.assertEqual( mol_graph.graph.nodes[node]["coords"][ii], ref_mol_graph.graph.nodes[node]["coords"][ii], ) mol_graph_edges = MoleculeGraph.with_edges(self.pc, edges=edges_pc) mol_graph_strat = MoleculeGraph.with_local_env_strategy( self.pc, OpenBabelNN()) self.assertTrue(mol_graph_edges.isomorphic_to(mol_graph_strat)) # Check inappropriate strategy with self.assertRaises(ValueError): MoleculeGraph.with_local_env_strategy(self.pc, VoronoiNN())
def test_isomorphic(self): ethylene = Molecule.from_file( os.path.join( PymatgenTest.TEST_FILES_DIR, "graphs/ethylene.xyz", ) ) # switch carbons ethylene[0], ethylene[1] = ethylene[1], ethylene[0] eth_copy = MoleculeGraph.with_edges( ethylene, { (0, 1): {"weight": 2}, (1, 2): {"weight": 1}, (1, 3): {"weight": 1}, (0, 4): {"weight": 1}, (0, 5): {"weight": 1}, }, ) # If they are equal, they must also be isomorphic eth_copy = copy.deepcopy(self.ethylene) self.assertTrue(self.ethylene.isomorphic_to(eth_copy)) self.assertFalse(self.butadiene.isomorphic_to(self.ethylene))
def opt_with_frequency_flattener(cls, qchem_command, multimode="openmp", input_file="mol.qin", output_file="mol.qout", qclog_file="mol.qclog", max_iterations=10, max_molecule_perturb_scale=0.3, check_connectivity=True, **QCJob_kwargs): """ Optimize a structure and calculate vibrational frequencies to check if the structure is in a true minima. If a frequency is negative, iteratively perturbe the geometry, optimize, and recalculate frequencies until all are positive, aka a true minima has been found. Args: qchem_command (str): Command to run QChem. multimode (str): Parallelization scheme, either openmp or mpi. input_file (str): Name of the QChem input file. output_file (str): Name of the QChem output file. max_iterations (int): Number of perturbation -> optimization -> frequency iterations to perform. Defaults to 10. max_molecule_perturb_scale (float): The maximum scaled perturbation that can be applied to the molecule. Defaults to 0.3. check_connectivity (bool): Whether to check differences in connectivity introduced by structural perturbation. Defaults to True. **QCJob_kwargs: Passthrough kwargs to QCJob. See :class:`custodian.qchem.jobs.QCJob`. """ min_molecule_perturb_scale = 0.1 scale_grid = 10 perturb_scale_grid = ( max_molecule_perturb_scale - min_molecule_perturb_scale ) / scale_grid if not os.path.exists(input_file): raise AssertionError('Input file must be present!') orig_opt_input = QCInput.from_file(input_file) orig_opt_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem = copy.deepcopy(orig_opt_input.rem) orig_freq_rem["job_type"] = "freq" first = True reversed_direction = False num_neg_freqs = [] for ii in range(max_iterations): yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".opt_" + str(ii), backup=first, **QCJob_kwargs)) first = False opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data if opt_outdata["structure_change"] == "unconnected_fragments" and not opt_outdata["completion"]: print("Unstable molecule broke into unconnected fragments which failed to optimize! Exiting...") break else: freq_QCInput = QCInput( molecule=opt_outdata.get("molecule_from_optimized_geometry"), rem=orig_freq_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) freq_QCInput.write_file(input_file) yield (QCJob( qchem_command=qchem_command, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=".freq_" + str(ii), backup=first, **QCJob_kwargs)) outdata = QCOutput(output_file + ".freq_" + str(ii)).data errors = outdata.get("errors") if len(errors) != 0: raise AssertionError('No errors should be encountered while flattening frequencies!') if outdata.get('frequencies')[0] > 0.0: print("All frequencies positive!") break else: num_neg_freqs += [sum(1 for freq in outdata.get('frequencies') if freq < 0)] if len(num_neg_freqs) > 1: if num_neg_freqs[-1] == num_neg_freqs[-2] and not reversed_direction: reversed_direction = True elif num_neg_freqs[-1] == num_neg_freqs[-2] and reversed_direction: if len(num_neg_freqs) < 3: raise AssertionError("ERROR: This should only be possible after at least three frequency flattening iterations! Exiting...") else: raise Exception("ERROR: Reversing the perturbation direction still could not flatten any frequencies. Exiting...") elif num_neg_freqs[-1] != num_neg_freqs[-2] and reversed_direction: reversed_direction = False negative_freq_vecs = outdata.get("frequency_mode_vectors")[0] structure_successfully_perturbed = False for molecule_perturb_scale in np.arange( max_molecule_perturb_scale, min_molecule_perturb_scale, -perturb_scale_grid): new_coords = perturb_coordinates( old_coords=outdata.get("initial_geometry"), negative_freq_vecs=negative_freq_vecs, molecule_perturb_scale=molecule_perturb_scale, reversed_direction=reversed_direction) new_molecule = Molecule( species=outdata.get('species'), coords=new_coords, charge=outdata.get('charge'), spin_multiplicity=outdata.get('multiplicity')) if check_connectivity: old_molgraph = MoleculeGraph.with_local_env_strategy(outdata.get("initial_molecule"), OpenBabelNN(), reorder=False, extend_structure=False) new_molgraph = MoleculeGraph.with_local_env_strategy(new_molecule, OpenBabelNN(), reorder=False, extend_structure=False) if old_molgraph.isomorphic_to(new_molgraph): structure_successfully_perturbed = True break if not structure_successfully_perturbed: raise Exception( "ERROR: Unable to perturb coordinates to remove negative frequency without changing the connectivity! Exiting..." ) new_opt_QCInput = QCInput( molecule=new_molecule, rem=orig_opt_rem, opt=orig_opt_input.opt, pcm=orig_opt_input.pcm, solvent=orig_opt_input.solvent) new_opt_QCInput.write_file(input_file)
def __init__(self, molecule, edges=None, depth=1, open_rings=True, opt_steps=10000): """ Standard constructor for molecule fragmentation Args: molecule (Molecule): The molecule to fragment edges (list): List of index pairs that define graph edges, aka molecule bonds. If not set, edges will be determined with OpenBabel. depth (int): The number of levels of iterative fragmentation to perform, where each level will include fragments obtained by breaking one bond of a fragment one level up. Defaults to 1. However, if set to 0, instead all possible fragments are generated using an alternative, non-iterative scheme. open_rings (bool): Whether or not to open any rings encountered during fragmentation. Defaults to False. If true, any bond that fails to yield disconnected graphs when broken is instead removed and the entire structure is optimized with OpenBabel in order to obtain a good initial guess for an opened geometry that can then be put back into QChem to be optimized without the ring just reforming. opt_steps (int): Number of optimization steps when opening rings. Defaults to 1000. """ self.open_rings = open_rings self.opt_steps = opt_steps if edges is None: self.mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN(), reorder=False, extend_structure=False) else: edges = {(e[0], e[1]): None for e in edges} self.mol_graph = MoleculeGraph.with_edges(molecule, edges) self.unique_fragments = [] self.unique_fragments_from_ring_openings = [] if depth == 0: # Non-iterative, find all possible fragments: # Find all unique fragments besides those involving ring opening self.unique_fragments = self.mol_graph.build_unique_fragments() # Then, if self.open_rings is True, open all rings present in self.unique_fragments # in order to capture all unique fragments that require ring opening. if self.open_rings: self._open_all_rings() else: # Iterative fragment generation: self.fragments_by_level = {} # Loop through the number of levels, for level in range(depth): # If on the first level, perform one level of fragmentation on the principle molecule graph: if level == 0: self.fragments_by_level["0"] = self._fragment_one_level([self.mol_graph]) else: if len(self.fragments_by_level[str(level-1)]) == 0: # Nothing left to fragment, so exit the loop: break else: # If not on the first level, and there are fragments present in the previous level, then # perform one level of fragmentation on all fragments present in the previous level: self.fragments_by_level[str(level)] = self._fragment_one_level(self.fragments_by_level[str(level-1)])
def __init__(self, molecule, optimize=False): """ Instantiation method for FunctionalGroupExtractor. :param molecule: Either a filename, a pymatgen.core.structure.Molecule object, or a pymatgen.analysis.graphs.MoleculeGraph object. :param optimize: Default False. If True, then the input molecule will be modified, adding Hydrogens, performing a simple conformer search, etc. """ self.molgraph = None if isinstance(molecule, str): try: if optimize: obmol = BabelMolAdaptor.from_file(molecule, file_format="mol") # OBMolecule does not contain pymatgen Molecule information # So, we need to wrap the obmol in a BabelMolAdapter obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = Molecule.from_file(molecule) except OSError: raise ValueError("Input must be a valid molecule file, a " "Molecule object, or a MoleculeGraph object.") elif isinstance(molecule, Molecule): if optimize: obmol = BabelMolAdaptor(molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule elif isinstance(molecule, MoleculeGraph): if optimize: obmol = BabelMolAdaptor(molecule.molecule) obmol.add_hydrogen() obmol.make3d() obmol.localopt() self.molecule = obmol.pymatgen_mol else: self.molecule = molecule.molecule self.molgraph = molecule else: raise ValueError("Input to FunctionalGroupExtractor must be" "str, Molecule, or MoleculeGraph.") if self.molgraph is None: self.molgraph = MoleculeGraph.with_local_env_strategy(self.molecule, OpenBabelNN(), reorder=False, extend_structure=False) # Assign a specie and coordinates to each node in the graph, # corresponding to the Site in the Molecule object self.molgraph.set_node_attributes() self.species = nx.get_node_attributes(self.molgraph.graph, "specie")