def test_build_unique_relevant_molecules_with_triplets(self): ft = FragmentMolecule(molecule=self.pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) unique_frag_dict = mol_graph.build_unique_fragments() unique_frag_list = [] for key in unique_frag_dict: for frag in unique_frag_dict[key]: unique_frag_list.append(frag) ft.unique_fragments = unique_frag_list ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1323) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_mols_with_trips.json")) # self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pos_pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1, 2] ft.do_triplets = True mol_graph = MoleculeGraph.with_edges(self.pos_pc, edges) unique_frag_dict = mol_graph.build_unique_fragments() unique_frag_list = [] for key in unique_frag_dict: for frag in unique_frag_dict[key]: unique_frag_list.append(frag) ft.unique_fragments = unique_frag_list ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1770) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pos_pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pos_pc_mols_with_trips.json")) # self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) unique_frag_dict = mol_graph.build_unique_fragments() unique_frag_list = [] for key in unique_frag_dict: for frag in unique_frag_dict[key]: unique_frag_list.append(frag) ft.unique_fragments = unique_frag_list ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 54) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_frag1_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_frag1_mols_with_trips.json"))
def test_build_unique_relevant_molecules(self): ft = FragmentMolecule(molecule=self.pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 295 * 3) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_mols.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_mols.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pos_pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1, 2] ft.do_triplets = False mol_graph = MoleculeGraph.with_edges(self.pos_pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 295 * 4) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pos_pc_mols.json")) ref_mols = loadfn(os.path.join(module_dir, "pos_pc_mols.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 12 * 3) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_frag1_mols.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_frag1_mols.json")) self.assertEqual(ft.unique_molecules, ref_mols)
def test_substitute(self): molecule = FunctionalGroups["methyl"] molgraph = MoleculeGraph.with_edges( molecule, {(0, 1): {"weight": 1}, (0, 2): {"weight": 1}, (0, 3): {"weight": 1}}, ) eth_mol = copy.deepcopy(self.ethylene) eth_str = copy.deepcopy(self.ethylene) # Ensure that strings and molecules lead to equivalent substitutions eth_mol.substitute_group(5, molecule, MinimumDistanceNN) eth_str.substitute_group(5, "methyl", MinimumDistanceNN) self.assertEqual(eth_mol, eth_str) graph_dict = { (0, 1): {"weight": 1.0}, (0, 2): {"weight": 1.0}, (0, 3): {"weight": 1.0}, } eth_mg = copy.deepcopy(self.ethylene) eth_graph = copy.deepcopy(self.ethylene) # Check that MoleculeGraph input is handled properly eth_graph.substitute_group(5, molecule, MinimumDistanceNN, graph_dict=graph_dict) eth_mg.substitute_group(5, molgraph, MinimumDistanceNN) self.assertEqual(eth_graph.graph.get_edge_data(5, 6)[0]["weight"], 1.0) self.assertEqual(eth_mg, eth_graph)
def test_isomorphic(self): ethylene = Molecule.from_file( os.path.join( PymatgenTest.TEST_FILES_DIR, "graphs/ethylene.xyz", )) # switch carbons ethylene[0], ethylene[1] = ethylene[1], ethylene[0] eth_copy = MoleculeGraph.with_edges( ethylene, { (0, 1): { "weight": 2 }, (1, 2): { "weight": 1 }, (1, 3): { "weight": 1 }, (0, 4): { "weight": 1 }, (0, 5): { "weight": 1 }, }, ) # If they are equal, they must also be isomorphic eth_copy = copy.deepcopy(self.ethylene) self.assertTrue(self.ethylene.isomorphic_to(eth_copy)) self.assertFalse(self.butadiene.isomorphic_to(self.ethylene))
def test_in_database_through_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.linked = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) unique_frag_dict = mol_graph.build_unique_fragments() unique_frag_list = [] for key in unique_frag_dict: for frag in unique_frag_dict[key]: unique_frag_list.append(frag) ft.unique_fragments = unique_frag_list ft._build_unique_relevant_molecules() docs = loadfn(os.path.join(module_dir, "doc.json")) for doc in docs: doc["input"]["initial_molecule"] = doc["input"][ "initial_molecule"].as_dict() ft.all_relevant_docs = docs new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 29)
def schrodinger_struct_to_mol_graph(structure: Structure): """ Convert a Structure object from Schrodinger to a pymatgen MoleculeGraph object. Args: structure (schrodinger.structure.Structure object): Structure to be converted Returns: mg: pymatgen.analysis.graphs.MoleculeGraph object """ formal_charge = structure.formal_charge elements = list() positions = list() bonds = list() for molecule in structure.molecule: for atom in molecule.atom: elements.append(atom.element) positions.append(atom.xyz) for bond in structure.bond: bonds.append((bond.atom1.index - 1, bond.atom2.index - 1)) mol = Molecule(elements, positions) mol.set_charge_and_spin(charge=formal_charge) mg = MoleculeGraph.with_edges(mol, {b: None for b in bonds}) return mg
def create_wrapper_mol_from_atoms_and_bonds(species, coords, bonds, charge=0, free_energy=None, identifier=None): """ Create a :class:`MoleculeWrapper` from atoms and bonds. Args: species (list of str): atom species str coords (2D array): positions of atoms bonds (list of tuple): each tuple is a bond (atom indices) charge (int): chare of the molecule free_energy (float): free energy of the molecule identifier (str): (unique) identifier of the molecule Returns: MoleculeWrapper instance """ pymatgen_mol = pymatgen.Molecule(species, coords, charge) bonds = {tuple(sorted(b)): None for b in bonds} mol_graph = MoleculeGraph.with_edges(pymatgen_mol, bonds) return MoleculeWrapper(mol_graph, free_energy, identifier)
def test_construction(self): edges_frag = {(e[0], e[1]): { "weight": 1.0 } for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, edges_frag) # dumpfn(mol_graph.as_dict(), os.path.join(module_dir,"pc_frag1_mg.json")) ref_mol_graph = loadfn(os.path.join(module_dir, "pc_frag1_mg.json")) self.assertEqual(mol_graph, ref_mol_graph) self.assertEqual(mol_graph.graph.adj, ref_mol_graph.graph.adj) for node in mol_graph.graph.nodes: self.assertEqual( mol_graph.graph.nodes[node]["specie"], ref_mol_graph.graph.nodes[node]["specie"], ) for ii in range(3): self.assertEqual( mol_graph.graph.nodes[node]["coords"][ii], ref_mol_graph.graph.nodes[node]["coords"][ii], ) edges_pc = {(e[0], e[1]): {"weight": 1.0} for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges_pc) # dumpfn(mol_graph.as_dict(), os.path.join(module_dir,"pc_mg.json")) ref_mol_graph = loadfn(os.path.join(module_dir, "pc_mg.json")) self.assertEqual(mol_graph, ref_mol_graph) self.assertEqual(mol_graph.graph.adj, ref_mol_graph.graph.adj) for node in mol_graph.graph: self.assertEqual( mol_graph.graph.nodes[node]["specie"], ref_mol_graph.graph.nodes[node]["specie"], ) for ii in range(3): self.assertEqual( mol_graph.graph.nodes[node]["coords"][ii], ref_mol_graph.graph.nodes[node]["coords"][ii], ) mol_graph_edges = MoleculeGraph.with_edges(self.pc, edges=edges_pc) mol_graph_strat = MoleculeGraph.with_local_env_strategy( self.pc, OpenBabelNN()) self.assertTrue(mol_graph_edges.isomorphic_to(mol_graph_strat)) # Check inappropriate strategy with self.assertRaises(ValueError): MoleculeGraph.with_local_env_strategy(self.pc, VoronoiNN())
def test_edges_given_PC_not_defaults(self): fragmenter = Fragmenter(molecule=self.pc, edges=self.pc_edges, depth=2, open_rings=False, opt_steps=0) self.assertEqual(fragmenter.open_rings,False) self.assertEqual(fragmenter.opt_steps,0) edges = {(e[0], e[1]): None for e in self.pc_edges} default_mol_graph = MoleculeGraph.with_edges(self.pc, edges=edges) self.assertEqual(fragmenter.mol_graph,default_mol_graph) self.assertEqual(len(fragmenter.unique_fragments), 20) self.assertEqual(len(fragmenter.unique_fragments_from_ring_openings), 0)
def convert_to_critic_mol_graph(self): bonds = dict() try: for key, val in self.critic["bonding"].items(): idx = val["atom_ids"] idx = tuple([int(i) - 1 for i in idx]) bonds[idx] = None except KeyError as e: print(self.__class__.__name__, e, "critic bonding", self.id) raise UnsuccessfulEntryError self.mol_graph = MoleculeGraph.with_edges(self.pymatgen_mol, bonds)
def test_build_unique_relevant_molecules_with_triplets(self): ft = FragmentMolecule(molecule=self.pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1323) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pos_pc, edges=self.pc_edges, depth=0, opt_steps=1000) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1, 2] ft.do_triplets = True mol_graph = MoleculeGraph.with_edges(self.pos_pc, edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 1770) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pos_pc_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pos_pc_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols) ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = True pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() self.assertEqual(len(ft.unique_molecules), 54) #dumpfn(ft.unique_molecules, os.path.join(module_dir,"pc_frag1_mols_with_trips.json")) ref_mols = loadfn(os.path.join(module_dir, "pc_frag1_mols_with_trips.json")) self.assertEqual(ft.unique_molecules, ref_mols)
def test_edges_given_PC_not_defaults(self): fragmenter = Fragmenter(molecule=self.pc, edges=self.pc_edges, depth=2, open_rings=False, opt_steps=0) self.assertEqual(fragmenter.open_rings, False) self.assertEqual(fragmenter.opt_steps, 0) edges = {(e[0], e[1]): None for e in self.pc_edges} default_mol_graph = MoleculeGraph.with_edges(self.pc, edges=edges) self.assertEqual(fragmenter.mol_graph, default_mol_graph) self.assertEqual(fragmenter.total_unique_fragments, 20)
def zero_d_graph_to_molecule_graph(bonded_structure, graph): """ Converts a zero-dimensional networkx Graph object into a MoleculeGraph. Implements a similar breadth-first search to that in calculate_dimensionality_of_site(). Args: bonded_structure (StructureGraph): A structure with bonds, represented as a pymatgen structure graph. For example, generated using the CrystalNN.get_bonded_structure() method. graph (nx.Graph): A networkx `Graph` object for the component of interest. Returns: (MoleculeGraph): A MoleculeGraph object of the component. """ import networkx as nx seen_indices = [] sites = [] start_index = list(graph.nodes())[0] queue = [(start_index, (0, 0, 0), bonded_structure.structure[start_index])] while len(queue) > 0: comp_i, image_i, site_i = queue.pop(0) if comp_i in [x[0] for x in seen_indices]: raise ValueError("Graph component is not 0D") seen_indices.append((comp_i, image_i)) sites.append(site_i) for site_j in bonded_structure.get_connected_sites(comp_i, jimage=image_i): if (site_j.index, site_j.jimage) not in seen_indices and ( site_j.index, site_j.jimage, site_j.site, ) not in queue: queue.append((site_j.index, site_j.jimage, site_j.site)) # sort the list of indices and the graph by index to make consistent indices_ordering = np.argsort([x[0] for x in seen_indices]) sorted_sites = np.array(sites, dtype=object)[indices_ordering] sorted_graph = nx.convert_node_labels_to_integers(graph, ordering="sorted") mol = Molecule([s.specie for s in sorted_sites], [s.coords for s in sorted_sites]) mol_graph = MoleculeGraph.with_edges(mol, nx.Graph(sorted_graph).edges()) return mol_graph
def test_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() ft.all_relevant_docs = list() new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 36)
def zero_d_graph_to_molecule_graph(bonded_structure, graph): """ Converts a zero-dimensional networkx Graph object into a MoleculeGraph. Implements a similar breadth-first search to that in calculate_dimensionality_of_site(). Args: bonded_structure (StructureGraph): A structure with bonds, represented as a pymatgen structure graph. For example, generated using the CrystalNN.get_bonded_structure() method. graph (nx.Graph): A networkx `Graph` object for the component of interest. Returns: (MoleculeGraph): A MoleculeGraph object of the component. """ import networkx as nx seen_indices = [] sites = [] start_index = list(graph.nodes())[0] queue = [(start_index, (0, 0, 0), bonded_structure.structure[start_index])] while len(queue) > 0: comp_i, image_i, site_i = queue.pop(0) if comp_i in [x[0] for x in seen_indices]: raise ValueError("Graph component is not 0D") seen_indices.append((comp_i, image_i)) sites.append(site_i) for site_j in bonded_structure.get_connected_sites( comp_i, jimage=image_i): if ((site_j.index, site_j.jimage) not in seen_indices and (site_j.index, site_j.jimage, site_j.site) not in queue): queue.append((site_j.index, site_j.jimage, site_j.site)) # sort the list of indices and the graph by index to make consistent indices_ordering = np.argsort([x[0] for x in seen_indices]) sorted_sites = np.array(sites, dtype=object)[indices_ordering] sorted_graph = nx.convert_node_labels_to_integers(graph, ordering="sorted") mol = Molecule([s.specie for s in sorted_sites], [s.coords for s in sorted_sites]) mol_graph = MoleculeGraph.with_edges(mol, nx.Graph(sorted_graph).edges()) return mol_graph
def test_in_database_through_build_new_FWs(self): ft = FragmentMolecule(molecule=self.pc_frag1, edges=self.pc_frag1_edges, depth=0) ft.mol = ft.get("molecule") ft.depth = ft.get("depth") ft.charges = [-1, 0, 1] ft.do_triplets = False ft.qchem_input_params = {} pc_frag1_edges = {(e[0], e[1]): None for e in self.pc_frag1_edges} mol_graph = MoleculeGraph.with_edges(self.pc_frag1, pc_frag1_edges) ft.unique_fragments = mol_graph.build_unique_fragments() ft._build_unique_relevant_molecules() docs = loadfn(os.path.join(module_dir, "doc.json")) for doc in docs: doc["input"]["initial_molecule"] = doc["input"]["initial_molecule"].as_dict() ft.all_relevant_docs = docs new_FWs = ft._build_new_FWs() self.assertEqual(len(new_FWs), 29)
def test_metal_edge_extender(self): mol_graph = MoleculeGraph.with_edges(molecule=self.LiEC, edges={ (0, 2): None, (0, 1): None, (1, 3): None, (1, 4): None, (2, 7): None, (2, 5): None, (2, 8): None, (3, 6): None, (4, 5): None, (5, 9): None, (5, 10): None }) self.assertEqual(len(mol_graph.graph.edges), 11) extended_mol_graph = metal_edge_extender(mol_graph) self.assertEqual(len(mol_graph.graph.edges), 12)
def rdkit_mol_to_wrapper_mol(m, charge=None, free_energy=None, identifier=None): """ Convert an rdkit molecule to a :class:`MoleculeWrapper` molecule. This constructs a molecule graph from the rdkit mol and assigns the rdkit mol to the molecule wrapper. Args: m (Chem.Mol): rdkit molecule charge (int): charge of the molecule. If None, inferred from the rdkit mol; otherwise, the provided charge will override the inferred. free_energy (float): free energy of the molecule identifier (str): (unique) identifier of the molecule Returns: MoleculeWrapper instance """ species = [a.GetSymbol() for a in m.GetAtoms()] # coords = m.GetConformer().GetPositions() # NOTE, the above way to get coords results in segfault on linux, so we use the # below workaround conformer = m.GetConformer() coords = [[x for x in conformer.GetAtomPosition(i)] for i in range(m.GetNumAtoms())] bonds = [[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] for b in m.GetBonds()] bonds = {tuple(sorted(b)): None for b in bonds} charge = Chem.GetFormalCharge(m) if charge is None else charge pymatgen_mol = pymatgen.Molecule(species, coords, charge) mol_graph = MoleculeGraph.with_edges(pymatgen_mol, bonds) if identifier is None: identifier = m.GetProp("_Name") mw = MoleculeWrapper(mol_graph, free_energy, identifier) mw.rdkit_mol = m return mw
def create_LiEC_mol_graph(): bonds = [ (0, 2), (0, 1), (2, 5), (2, 8), (4, 1), (5, 4), (5, 10), (7, 2), (9, 5), (3, 6), (3, 1), ] bonds = {b: None for b in bonds} mol = create_LiEC_pymatgen_mol() mol_graph = MoleculeGraph.with_edges(mol, bonds) return mol_graph
def test_build_unique_fragments(self): edges = {(e[0], e[1]): None for e in self.pc_edges} mol_graph = MoleculeGraph.with_edges(self.pc, edges) unique_fragment_dict = mol_graph.build_unique_fragments() unique_fragments = [] for key in unique_fragment_dict: for fragment in unique_fragment_dict[key]: unique_fragments.append(fragment) self.assertEqual(len(unique_fragments), 295) nm = iso.categorical_node_match("specie", "ERROR") for ii in range(295): # Test that each fragment is unique for jj in range(ii + 1, 295): self.assertFalse( nx.is_isomorphic( unique_fragments[ii].graph, unique_fragments[jj].graph, node_match=nm, )) # Test that each fragment correctly maps between Molecule and graph self.assertEqual( len(unique_fragments[ii].molecule), len(unique_fragments[ii].graph.nodes), ) species = nx.get_node_attributes(unique_fragments[ii].graph, "specie") coords = nx.get_node_attributes(unique_fragments[ii].graph, "coords") mol = unique_fragments[ii].molecule for ss, site in enumerate(mol): self.assertEqual(str(species[ss]), str(site.specie)) self.assertEqual(coords[ss][0], site.coords[0]) self.assertEqual(coords[ss][1], site.coords[1]) self.assertEqual(coords[ss][2], site.coords[2]) # Test that each fragment is connected self.assertTrue( nx.is_connected(unique_fragments[ii].graph.to_undirected()))
def __init__(self, molecule, edges=None, depth=1, open_rings=False, use_metal_edge_extender=False, opt_steps=10000, prev_unique_frag_dict=None, assume_previous_thoroughness=True): """ Standard constructor for molecule fragmentation Args: molecule (Molecule): The molecule to fragment. edges (list): List of index pairs that define graph edges, aka molecule bonds. If not set, edges will be determined with OpenBabel. Defaults to None. depth (int): The number of levels of iterative fragmentation to perform, where each level will include fragments obtained by breaking one bond of a fragment one level up. Defaults to 1. However, if set to 0, instead all possible fragments are generated using an alternative, non-iterative scheme. open_rings (bool): Whether or not to open any rings encountered during fragmentation. Defaults to False. If true, any bond that fails to yield disconnected graphs when broken is instead removed and the entire structure is optimized with OpenBabel in order to obtain a good initial guess for an opened geometry that can then be put back into QChem to be optimized without the ring just reforming. use_metal_edge_extender (bool): Whether or not to attempt to add additional edges from O, N, F, or Cl to any Li or Mg atoms present that OpenBabel may have missed. Defaults to False. Most important for ionic bonding. Note that additional metal edges may yield new "rings" (e.g. -C-O-Li-O- in LiEC) that will not play nicely with ring opening. opt_steps (int): Number of optimization steps when opening rings. Defaults to 10000. prev_unique_frag_dict (dict): A dictionary of previously identified unique fragments. Defaults to None. Typically only used when trying to find the set of unique fragments that come from multiple molecules. assume_previous_thoroughness (bool): Whether or not to assume that a molecule / fragment provided in prev_unique_frag_dict has all of its unique subfragments also provided in prev_unique_frag_dict. Defaults to True. This is an essential optimization when trying to find the set of unique fragments that come from multiple molecules if all of those molecules are being fully iteratively fragmented. However, if you're passing a prev_unique_frag_dict which includes a molecule and its fragments that were generated at insufficient depth to find all possible subfragments to a fragmentation calculation of a different molecule that you aim to find all possible subfragments of and which has common subfragments with the previous molecule, this optimization will cause you to miss some unique subfragments. """ self.assume_previous_thoroughness = assume_previous_thoroughness self.open_rings = open_rings self.opt_steps = opt_steps if edges is None: self.mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN()) else: edges = {(e[0], e[1]): None for e in edges} self.mol_graph = MoleculeGraph.with_edges(molecule, edges) if ("Li" in molecule.composition or "Mg" in molecule.composition) and use_metal_edge_extender: self.mol_graph = metal_edge_extender(self.mol_graph) self.prev_unique_frag_dict = prev_unique_frag_dict or {} self.new_unique_frag_dict = {} # new fragments from the given molecule not contained in prev_unique_frag_dict self.all_unique_frag_dict = {} # all fragments from just the given molecule self.unique_frag_dict = {} # all fragments from both the given molecule and prev_unique_frag_dict if depth == 0: # Non-iterative, find all possible fragments: # Find all unique fragments besides those involving ring opening self.all_unique_frag_dict = self.mol_graph.build_unique_fragments() # Then, if self.open_rings is True, open all rings present in self.unique_fragments # in order to capture all unique fragments that require ring opening. if self.open_rings: self._open_all_rings() else: # Iterative fragment generation: self.fragments_by_level = {} # Loop through the number of levels, for level in range(depth): # If on the first level, perform one level of fragmentation on the principle molecule graph: if level == 0: self.fragments_by_level["0"] = self._fragment_one_level({str( self.mol_graph.molecule.composition.alphabetical_formula) + " E" + str( len(self.mol_graph.graph.edges())): [self.mol_graph]}) else: num_frags_prev_level = 0 for key in self.fragments_by_level[str(level - 1)]: num_frags_prev_level += len(self.fragments_by_level[str(level - 1)][key]) if num_frags_prev_level == 0: # Nothing left to fragment, so exit the loop: break else: # If not on the first level, and there are fragments present in the previous level, then # perform one level of fragmentation on all fragments present in the previous level: self.fragments_by_level[str(level)] = self._fragment_one_level( self.fragments_by_level[str(level-1)]) if self.prev_unique_frag_dict == {}: self.new_unique_frag_dict = copy.deepcopy(self.all_unique_frag_dict) else: for frag_key in self.all_unique_frag_dict: if frag_key not in self.prev_unique_frag_dict: self.new_unique_frag_dict[frag_key] = copy.deepcopy(self.all_unique_frag_dict[frag_key]) else: for fragment in self.all_unique_frag_dict[frag_key]: found = False for prev_frag in self.prev_unique_frag_dict[frag_key]: if fragment.isomorphic_to(prev_frag): found = True if not found: if frag_key not in self.new_unique_frag_dict: self.new_unique_frag_dict[frag_key] = [fragment] else: self.new_unique_frag_dict[frag_key].append(fragment) self.new_unique_fragments = 0 for frag_key in self.new_unique_frag_dict: self.new_unique_fragments += len(self.new_unique_frag_dict[frag_key]) if self.prev_unique_frag_dict == {}: self.unique_frag_dict = self.new_unique_frag_dict self.total_unique_fragments = self.new_unique_fragments else: self.unique_frag_dict = copy.deepcopy(self.prev_unique_frag_dict) for frag_key in self.new_unique_frag_dict: if frag_key in self.unique_frag_dict: for new_frag in self.new_unique_frag_dict[frag_key]: self.unique_frag_dict[frag_key].append(new_frag) else: self.unique_frag_dict[frag_key] = copy.deepcopy(self.new_unique_frag_dict[frag_key]) self.total_unique_fragments = 0 for frag_key in self.unique_frag_dict: self.total_unique_fragments += len(self.unique_frag_dict[frag_key])
def __init__(self, molecule, edges=None, depth=1, open_rings=True, opt_steps=10000): """ Standard constructor for molecule fragmentation Args: molecule (Molecule): The molecule to fragment edges (list): List of index pairs that define graph edges, aka molecule bonds. If not set, edges will be determined with OpenBabel. depth (int): The number of levels of iterative fragmentation to perform, where each level will include fragments obtained by breaking one bond of a fragment one level up. Defaults to 1. However, if set to 0, instead all possible fragments are generated using an alternative, non-iterative scheme. open_rings (bool): Whether or not to open any rings encountered during fragmentation. Defaults to False. If true, any bond that fails to yield disconnected graphs when broken is instead removed and the entire structure is optimized with OpenBabel in order to obtain a good initial guess for an opened geometry that can then be put back into QChem to be optimized without the ring just reforming. opt_steps (int): Number of optimization steps when opening rings. Defaults to 1000. """ self.open_rings = open_rings self.opt_steps = opt_steps if edges is None: self.mol_graph = MoleculeGraph.with_local_env_strategy(molecule, OpenBabelNN(), reorder=False, extend_structure=False) else: edges = {(e[0], e[1]): None for e in edges} self.mol_graph = MoleculeGraph.with_edges(molecule, edges) self.unique_fragments = [] self.unique_fragments_from_ring_openings = [] if depth == 0: # Non-iterative, find all possible fragments: # Find all unique fragments besides those involving ring opening self.unique_fragments = self.mol_graph.build_unique_fragments() # Then, if self.open_rings is True, open all rings present in self.unique_fragments # in order to capture all unique fragments that require ring opening. if self.open_rings: self._open_all_rings() else: # Iterative fragment generation: self.fragments_by_level = {} # Loop through the number of levels, for level in range(depth): # If on the first level, perform one level of fragmentation on the principle molecule graph: if level == 0: self.fragments_by_level["0"] = self._fragment_one_level([self.mol_graph]) else: if len(self.fragments_by_level[str(level-1)]) == 0: # Nothing left to fragment, so exit the loop: break else: # If not on the first level, and there are fragments present in the previous level, then # perform one level of fragmentation on all fragments present in the previous level: self.fragments_by_level[str(level)] = self._fragment_one_level(self.fragments_by_level[str(level-1)])
def __init__(self, molecule, edges=None, depth=1, open_rings=True, opt_steps=10000): """ Standard constructor for molecule fragmentation Args: molecule (Molecule): The molecule to fragment edges (list): List of index pairs that define graph edges, aka molecule bonds. If not set, edges will be determined with OpenBabel. depth (int): The number of levels of iterative fragmentation to perform, where each level will include fragments obtained by breaking one bond of a fragment one level up. Defaults to 1. However, if set to 0, instead all possible fragments are generated using an alternative, non-iterative scheme. open_rings (bool): Whether or not to open any rings encountered during fragmentation. Defaults to False. If true, any bond that fails to yield disconnected graphs when broken is instead removed and the entire structure is optimized with OpenBabel in order to obtain a good initial guess for an opened geometry that can then be put back into QChem to be optimized without the ring just reforming. opt_steps (int): Number of optimization steps when opening rings. Defaults to 1000. """ self.open_rings = open_rings self.opt_steps = opt_steps if edges is None: self.mol_graph = MoleculeGraph.with_local_env_strategy( molecule, OpenBabelNN(), reorder=False, extend_structure=False) else: edges = {(e[0], e[1]): None for e in edges} self.mol_graph = MoleculeGraph.with_edges(molecule, edges) self.unique_fragments = [] self.unique_fragments_from_ring_openings = [] if depth == 0: # Non-iterative, find all possible fragments: # Find all unique fragments besides those involving ring opening self.unique_fragments = self.mol_graph.build_unique_fragments() # Then, if self.open_rings is True, open all rings present in self.unique_fragments # in order to capture all unique fragments that require ring opening. if self.open_rings: self._open_all_rings() else: # Iterative fragment generation: self.fragments_by_level = {} # Loop through the number of levels, for level in range(depth): # If on the first level, perform one level of fragmentation on the principle molecule graph: if level == 0: self.fragments_by_level["0"] = self._fragment_one_level( [self.mol_graph]) else: if len(self.fragments_by_level[str(level - 1)]) == 0: # Nothing left to fragment, so exit the loop: break else: # If not on the first level, and there are fragments present in the previous level, then # perform one level of fragmentation on all fragments present in the previous level: self.fragments_by_level[str( level)] = self._fragment_one_level( self.fragments_by_level[str(level - 1)])