def get_next_murcko_fragments(murcko_scaffold, break_fused_rings=True): """ Fragment a scaffold into its next set of Murcko fragments. The fragmenter assumes that a Murcko scaffold is supplied. If not the behaviour may be undesirable. Parameters ---------- murcko_scaffold : rdkit.Chem.rdchem.Mol A Murcko scaffold to fragment break_fused_rings : bool, optional If True dissect fused rings. The default is True. Returns ------- parents : list A list of parent scaffolds (next hierarchy [num_rings - 1]) """ rdlogger.setLevel(4) if break_fused_rings: fragmenter = MurckoRingFragmenter() else: fragmenter = MurckoRingSystemFragmenter() parents = [ f.mol for f in set(fragmenter.fragment(Scaffold(murcko_scaffold))) ] rdlogger.setLevel(3) return parents
def get_next_murcko_fragments(murcko_scaffold, break_fused_rings=True): """Fragment a scaffold into its next set of murcko fragments. The fragmenter assumes that a murcko scaffold is supplied. Parameters ---------- murcko_scaffold (Chem.Mol): An rdkit Mol containing a murcko scaffold break_fused_rings (bool): If True dissect fused rings (default: True) Returns ------- parents (list): a list of parent scaffolds (next hierarchy [num_rings - 1]) """ rdlogger.setLevel(4) if break_fused_rings: fragmenter = MurckoRingFragmenter() else: fragmenter = MurckoRingSystemFragmenter() parents = [ f.mol for f in set(fragmenter.fragment(Scaffold(murcko_scaffold))) ] rdlogger.setLevel(3) return parents
def get_all_murcko_fragments(mol, break_fused_rings=True): """ Get all possible murcko fragments from a molecule through recursive removal of peripheral rings. Parameters ---------- mol : rdkit.Chem.rdchem.Mol break_fused_rings : bool, optional If True dissect fused rings. The default is True. Returns ------- list A list of Murcko fragments for the input molecule. Examples -------- Generating Murcko fragments: >>> from rdkit import Chem >>> smiles = 'Cc1[nH]cnc1Cn1cccc(-c2ccccc2O)c1=O' >>> molecule = Chem.MolFromSmiles(smiles) >>> frags = get_all_murcko_fragments(molecule) """ rdlogger.setLevel(4) if break_fused_rings: fragmenter = MurckoRingFragmenter() else: fragmenter = MurckoRingSystemFragmenter() mol = get_murcko_scaffold(mol) rdmolops.RemoveStereochemistry(mol) scaffold = Scaffold(mol) parents = {scaffold} def recursive_generation(child): for parent in fragmenter.fragment(child): if parent in parents: continue parents.add(parent) recursive_generation(parent) recursive_generation(scaffold) rdlogger.setLevel(3) return [f.mol for f in parents]
def get_scaffold_frags(frag, hash_func=None): """Get fragments from a disconnected structure. This function is used primarily used during molecular fragmentation. Parameters ---------- frag : rdkit.Chem.rdchem.Mol An rdkit Mol containing disconeccted structures. hash_func : callable, optional Scaffold hash function, the default is the canonical smiles. Returns ------- set A set of scaffoldgraph.core.Scaffold objects which are unique fragments obtained from the disconnected structures in the input molecule. Notes ----- The function also performs a `partial sanitization` of the input. If the molecule fails to sanitize the function will return an empty set. This case can occur when an aromatic ring system is dissected in a way in which the resultant system is no longer aromatic. """ try: # frag.ClearComputedProps() # frag.UpdatePropertyCache() # FastFindRings(frag) partial_sanitization(frag) except ValueError as e: # This error is caught as dissecting an aromatic ring system, # may lead to an undefined state where the resultant system # is no longer aromatic. We make no attempt to prevent this # but log it for reference. # This behaviour may be desirable for a scaffold tree and is # equivalent to the behavior of SNG (I believe...) logger.debug(e) return set() frags = {Scaffold(f, hash_func) for f in GetMolFrags(frag, True, False)} return frags
def get_scaffold_frags(frag): """Get fragments from a disconnected structure. Used by fragmentation methods.""" try: # frag.ClearComputedProps() # frag.UpdatePropertyCache() # Chem.GetSymmSSSR(frag) partial_sanitization(frag) except ValueError as e: # This error is caught as dissecting an aromatic ring system, # may lead to an undefined state where the resultant system # is no longer aromatic. We make no attempt to prevent this # but log it for reference. # This behaviour may be desirable for a scaffold tree and is # equivalent to the behavior of SNG (I believe...) logger.debug(e) return set() frags = {Scaffold(f) for f in GetMolFrags(frag, True, False)} return frags
def get_all_murcko_fragments(mol, break_fused_rings=True): """Get all possible murcko fragments from a molecule through recursive removal of peripheral rings Parameters ---------- mol: rdkit molecule to be processed break_fused_rings (bool): If True dissect fused rings (default: True) Returns ------- A list of rdkit Mols representing all possible murcko fragments """ rdlogger.setLevel(4) if break_fused_rings: fragmenter = MurckoRingFragmenter() else: fragmenter = MurckoRingSystemFragmenter() mol = get_murcko_scaffold(mol) rdmolops.RemoveStereochemistry(mol) scaffold = Scaffold(mol) parents = {scaffold} def recursive_generation(child): for parent in fragmenter.fragment(child): if parent in parents: continue parents.add(parent) recursive_generation(parent) recursive_generation(scaffold) rdlogger.setLevel(3) return [f.mol for f in parents]
def _contract_rings(mol): """Private: Return a molecule with rings contracted. Create a new molecule, replacing each ring with one atom. Atoms are connected if the rings they represent are connected by a bond not in any ring. If rings share a common bond, the bond added is double. If rings share an common atom (i.e. spiro rings) the rings are connected with a single bond. Used for generating ring connectivity scaffolds. Parameters ---------- mol : rdkit.Chem.rdchem.Mol Scaffold template for ring contraction. Returns ------- rcs : rdkit.Chem.rdchem.Mol New molecule with contracted rings. Notes ----- Dummy atoms are used instead of carbon atoms, so that valence constraints are not violated. In ring connectivity scaffolds the valence of a vertex is occaisionaly > 4. See Also -------- get_ring_connectivity_scaffold """ # Use a Scaffold object for ring + ring system information. scf, rcs = Scaffold(mol), RWMol() # Add a dummy atom for each ring to the empty RWMol. for _ in range(scf.rings.count): rcs.AddAtom(Atom(0)) # Create bonds between atoms (ring vertices). weak_connections, visited = set(), set() for system in scf.ring_systems: bc = set(system.get_bond_connectivity()) ac = set(system.get_atom_connectivity()) visited.update(system.aix) # link strongly connected rings (DOUBLE) for connection in ac.intersection(bc): rcs.AddBond(*connection, BondType.DOUBLE) # link weakly connected rings (SINGLE) for connection in ac.difference(bc): rcs.AddBond(*connection, BondType.SINGLE) # link rings connected by a linker (SINGLE) for rix, ring in zip(system.rix, system): to_visit = ring.get_attachment_points() while to_visit: aix = to_visit.pop() for nbr in scf.atoms[aix].GetNeighbors(): idx = nbr.GetIdx() if idx in visited: continue elif nbr.IsInRing(): visited.add(idx) for nix in _get_rings_with_atom(scf, idx): c = tuple(sorted((nix, rix))) weak_connections.add(c) continue else: to_visit.add(idx) visited.add(idx) # Add remaining weak ring connections (SINGLE) for connection in weak_connections: rcs.AddBond(*connection, BondType.SINGLE) return rcs.GetMol()