def split_mol_by_residues(mol): """Splits a molecule in multiple fragments based on residues Parameters ---------- mol : rdkit.Chem.rdchem.Mol The molecule to fragment Returns ------- residues : list A list of :class:`rdkit.Chem.rdchem.Mol` Notes ----- Code adapted from Maciek Wójcikowski on the RDKit discussion list """ residues = [] for res in SplitMolByPDBResidues(mol).values(): for frag in GetMolFrags(res, asMols=True, sanitizeFrags=False): # count number of unique residues in the fragment resids = {a.GetIdx(): ResidueId.from_atom(a) for a in frag.GetAtoms()} if len(set(resids.values())) > 1: # split on peptide bonds bonds = [b.GetIdx() for b in frag.GetBonds() if is_peptide_bond(b, resids)] mols = FragmentOnBonds(frag, bonds, addDummies=False) mols = GetMolFrags(mols, asMols=True, sanitizeFrags=False) residues.extend(mols) else: residues.append(frag) return residues
def keep_biggest(cls, mol_in): """Strip small fragments from compound. Returns a new compound where only the "biggest" fragment is conserved according to (i) the number of non-Hs atoms and if there is tie then according to (ii) the molecular weight. :param mol_in: RDKit Mol :return mol_out: new RDKit Mol having only one connected component """ def count_non_hs_atom(mol): ans = 0 for atm in mol.GetAtoms(): if atm.GetAtomicNum() != 1: ans += 1 return ans # Remove "other" molecules molfrag = GetMolFrags(mol_in, asMols=True, sanitizeFrags=False) mol_out = mol_in if len(molfrag) > 1: accepted_nbr_atm = 0 # flag number of atoms in fragment accepted_mw = 0 # flag the molecular weight of the biggest fragment for f in molfrag: nbr_atm = count_non_hs_atom(f) if nbr_atm > accepted_nbr_atm or (nbr_atm == accepted_nbr_atm and MolWt(f) > accepted_mass): accepted_nbr_atm = nbr_atm accepted_mass = MolWt(f) mol_out = f # keep only the biggest fragment cls._copy_properties(mol_in, mol_out) # save the name and stuff return mol_out
def testB1BondGenMol(self): ini_mono_type_list = [S, S, S, G, S] sg_ratio = 1.0 max_monos = 12 random_num = 55 initial_monomers = [ Monomer(mono_type, i) for i, mono_type in enumerate(ini_mono_type_list) ] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_events.append(Event(GROW, [], rate=1e4)) initial_state = create_initial_state(initial_events, initial_monomers) result = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=2, random_seed=random_num, sg_ratio=sg_ratio) nodes = result[MONO_LIST] adj = result[ADJ_MATRIX] # generate_mol(adj, nodes) with capture_stderr(generate_mol, adj, nodes) as output: self.assertFalse(output) mol = MolFromMolBlock(generate_mol(adj, nodes)) mols = GetMolFrags(mol) analysis = analyze_adj_matrix(adj) frag_sizes = analysis[CHAIN_LEN] # Make sure there are the same number of separate fragments calculated by RDKIT # as we get from just separating the alternate B1 self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))
def get_scaffold_frags(frag, hash_func=None): """Get fragments from a disconnected structure. This function is used primarily used during molecular fragmentation. Parameters ---------- frag : rdkit.Chem.rdchem.Mol An rdkit Mol containing disconeccted structures. hash_func : callable, optional Scaffold hash function, the default is the canonical smiles. Returns ------- set A set of scaffoldgraph.core.Scaffold objects which are unique fragments obtained from the disconnected structures in the input molecule. Notes ----- The function also performs a `partial sanitization` of the input. If the molecule fails to sanitize the function will return an empty set. This case can occur when an aromatic ring system is dissected in a way in which the resultant system is no longer aromatic. """ try: # frag.ClearComputedProps() # frag.UpdatePropertyCache() # FastFindRings(frag) partial_sanitization(frag) except ValueError as e: # This error is caught as dissecting an aromatic ring system, # may lead to an undefined state where the resultant system # is no longer aromatic. We make no attempt to prevent this # but log it for reference. # This behaviour may be desirable for a scaffold tree and is # equivalent to the behavior of SNG (I believe...) logger.debug(e) return set() frags = {Scaffold(f, hash_func) for f in GetMolFrags(frag, True, False)} return frags
def get_scaffold_frags(frag): """Get fragments from a disconnected structure. Used by fragmentation methods.""" try: # frag.ClearComputedProps() # frag.UpdatePropertyCache() # Chem.GetSymmSSSR(frag) partial_sanitization(frag) except ValueError as e: # This error is caught as dissecting an aromatic ring system, # may lead to an undefined state where the resultant system # is no longer aromatic. We make no attempt to prevent this # but log it for reference. # This behaviour may be desirable for a scaffold tree and is # equivalent to the behavior of SNG (I believe...) logger.debug(e) return set() frags = {Scaffold(f) for f in GetMolFrags(frag, True, False)} return frags
def keep_largest_fragment(mol): """Return the largest fragment in a disconnected molecule. The largest fragment is simply considered to be the fragment with the largest number of atoms. Parameters ---------- mol : rdkit.Chem.rdchem.Mol rdkit molecule containg disconnected fragments. Returns ------- mol : rdkit.Chem.rdchem.Mol Molecule containing the largest disconnected fragment. """ frags = GetMolFrags(mol, asMols=True) if len(frags) <= 1: return mol return max(frags, key=lambda x: x.GetNumAtoms())