def _build_fragment_from_complex(complex, residue, ligand, ligand_core, result=0, substructure=True, simmetry=False): from rdkit import Chem import rdkit.Chem.rdmolops as rd import rdkit.Chem.rdchem as rc import rdkit.Chem.AllChem as rp #Retrieve atom core linking fragment try: atom_core_idx, atoms_core, _ = _search_core_fragment_linker( ligand, ligand_core, result, simmetry) except TypeError: raise ce.SameMolecule( "Core and ligand are the exact same molecule. Check your inputs") atom_core = at.Atom(ligand_core, atom_core_idx) mol = Chem.MolFromPDBFile(complex, removeHs=False) #Retrieve hydrogen core attach to linking atom original = rd.SplitMolByPDBResidues(mol)[residue] hydrogen_core_idx = [ atom.GetIdx() for atom in original.GetAtomWithIdx(atom_core_idx).GetNeighbors() if atom.GetAtomicNum() == 1 ][0] hydrogen_core = at.Atom(original, hydrogen_core_idx) # Delete core for full ligand with substructure # and if it fails manually #Chem.MolToPDBFile(ligand, "int0.pdb") if substructure: fragment = rd.DeleteSubstructs(ligand, ligand_core) new_mol = rc.EditableMol(fragment) for atom in reversed(fragment.GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0: new_mol.RemoveAtom(atom.GetIdx()) else: new_mol = rc.EditableMol(ligand) for atom in atoms_core: new_mol.RemoveAtom(atom) #Chem.MolToPDBFile(new_mol.GetMol(), "int1.pdb") for atom in reversed(new_mol.GetMol().GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0: new_mol.RemoveAtom(atom.GetIdx()) #Add missing hydrogen to full ligand and create pdb differently #depending on the previous step fragment = new_mol.GetMol() #Chem.MolToPDBFile(fragment, "int2.pdb") old_atoms = [atom.GetIdx() for atom in fragment.GetAtoms()] if substructure: fragment = Chem.AddHs(fragment, False, True) else: #res = rp.EmbedMolecule(fragment) fragment = Chem.AddHs(fragment, False, True) return fragment, old_atoms, hydrogen_core, atom_core
def _build_fragment_from_complex(complex, residue, ligand, ligand_core, result=0, substructure=True, simmetry=False): from rdkit import Chem import rdkit.Chem.rdmolops as rd import rdkit.Chem.rdchem as rc # Retrieve atom core linking fragment try: atom_core_idx, atoms_core, atom_fragment = _search_core_fragment_linker(ligand, ligand_core, result, simmetry) print("ATOM OF FRAGMENT ATTACHED TO CORE:", atom_fragment) print("ATOM OF CORE ATTACHED TO FRAGMENT:", atom_core_idx) except TypeError: raise ce.SameMolecule("Core and ligand are the exact same molecule. Check your inputs.") atom_core = at.Atom(ligand_core, atom_core_idx) mol = Chem.MolFromPDBFile(complex, removeHs=False) # Retrieve hydrogen core attach to linking atom original = rd.SplitMolByPDBResidues(mol)[residue] hydrogen_core_idx = \ [atom.GetIdx() for atom in original.GetAtomWithIdx(atom_core_idx).GetNeighbors() if atom.GetAtomicNum() == 1][0] hydrogen_core = at.Atom(original, hydrogen_core_idx) # Delete core for full ligand with substructure and if it fails manually if substructure: fragment = rd.DeleteSubstructs(ligand, ligand_core) new_mol = rc.EditableMol(fragment) for atom in reversed(fragment.GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0 and atom.GetAtomicNum() == 1: new_mol.RemoveAtom(atom.GetIdx()) else: new_mol = rc.EditableMol(ligand) for atom in reversed(atoms_core): new_mol.RemoveAtom(atom) for atom in reversed(new_mol.GetMol().GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0 and atom.GetAtomicNum() == 1: new_mol.RemoveAtom(atom.GetIdx()) print("FRAGMENT ATOMS", [atom.GetIdx() for atom in new_mol.GetMol().GetAtoms()]) # Add missing hydrogen to full ligand and create pdb differently depending on the previous step fragment = new_mol.GetMol() old_atoms = [atom.GetIdx() for atom in fragment.GetAtoms() if atom.GetAtomicNum() != 1] new_atoms = [atom.GetIdx() for atom in ligand.GetAtoms() if atom.GetIdx() not in atoms_core and atom.GetAtomicNum() != 1] print("old_atoms", old_atoms, "new_atoms", new_atoms) mapping = {new_atom: old_atom for new_atom, old_atom in zip(new_atoms, old_atoms)} atom_fragment_mapped = mapping[atom_fragment] fragment = Chem.AddHs(fragment, False, True) correct = _check_fragment(fragment, ligand, mapping, atom_fragment, atom_fragment_mapped, ligand_core) Chem.MolToPDBFile(fragment, "int0.pdb") assert len(old_atoms) == len(new_atoms) return fragment, old_atoms, hydrogen_core, atom_core, atom_fragment, mapping, correct
def to_rdkit_Mol(mol): rdmol = rdchem.EditableMol(rdchem.Mol()) for atom in mol.atoms: rdmol.AddAtom(rdchem.Atom(Element(atom).Z)) for bond in mol.bondmap: rdmol.AddBond(bond[0], bond[1], rdchem.BondType.values[bond[2]]) return rdmol.GetMol()
def attach_capping(mol1, mol2): """it is connecting all Nterminals with the desired capping Arguments: mol1 {rdKit mol object} -- first molecule to be connected mol2 {rdKit mol object} -- second molecule to be connected - chosen N-capping Returns: rdKit mol object -- mol1 updated (connected with mol2, one or more) """ count = 0 # detects all the N terminals in mol1 for atom in mol1.GetAtoms(): atom.SetProp('Cterm', 'False') if atom.GetSmarts() == '[N:2]' or atom.GetSmarts( ) == '[NH2:2]' or atom.GetSmarts() == '[NH:2]': count += 1 atom.SetProp('Nterm', 'True') else: atom.SetProp('Nterm', 'False') # detects all the C terminals in mol2 (it should be one) for atom in mol2.GetAtoms(): atom.SetProp('Nterm', 'False') if atom.GetSmarts() == '[C:1]' or atom.GetSmarts() == '[CH:1]': atom.SetProp('Cterm', 'True') else: atom.SetProp('Cterm', 'False') # mol2 is addes to all the N terminal of mol1 for i in range(count): combo = rdmolops.CombineMols(mol1, mol2) Nterm = [] Cterm = [] # saves in two different lists the index of the atoms which has to be connected for atom in combo.GetAtoms(): if atom.GetProp('Nterm') == 'True': Nterm.append(atom.GetIdx()) if atom.GetProp('Cterm') == 'True': Cterm.append(atom.GetIdx()) # creates the amide bond edcombo = rdchem.EditableMol(combo) edcombo.AddBond(Nterm[0], Cterm[0], order=Chem.rdchem.BondType.SINGLE) clippedMol = edcombo.GetMol() # removes tags and lables form the atoms which reacted clippedMol.GetAtomWithIdx(Nterm[0]).SetProp('Nterm', 'False') clippedMol.GetAtomWithIdx(Cterm[0]).SetProp('Cterm', 'False') clippedMol.GetAtomWithIdx(Nterm[0]).SetAtomMapNum(0) clippedMol.GetAtomWithIdx(Cterm[0]).SetAtomMapNum(0) # uptades the 'core' molecule mol1 = clippedMol return mol1
def cyclize(mol, cy): """it is connecting cyclizing the given molecule Arguments: mol {rdKit mol object} -- molecule to be cyclized cy {int} -- 1=yes, 0=no cyclazation Returns: mols {list of rdKit mol objects} -- possible cyclazation """ count = 0 # detects all the N terminals in mol for atom in mol.GetAtoms(): if atom.GetSmarts() == '[N:2]' or atom.GetSmarts( ) == '[NH2:2]' or atom.GetSmarts() == '[NH:2]': count += 1 atom.SetProp('Nterm', 'True') else: atom.SetProp('Nterm', 'False') # detects all the C terminals in mol (it should be one) for atom in mol.GetAtoms(): if atom.GetSmarts() == '[C:1]' or atom.GetSmarts() == '[CH:1]': atom.SetProp('Cterm', 'True') else: atom.SetProp('Cterm', 'False') # detects all the S terminals in mol for atom in mol.GetAtoms(): if atom.GetSmarts() == '[S:1]': atom.SetProp('Sact1', 'True') else: atom.SetProp('Sact1', 'False') for atom in mol.GetAtoms(): if atom.GetSmarts() == '[S:2]': atom.SetProp('Sact2', 'True') else: atom.SetProp('Sact2', 'False') for atom in mol.GetAtoms(): if atom.GetSmarts() == '[S:3]': atom.SetProp('Sact3', 'True') else: atom.SetProp('Sact3', 'False') Nterm = [] Cterm = [] Sact1 = [] Sact2 = [] Sact3 = [] # saves active Cysteins postions: for atom in mol.GetAtoms(): if atom.GetProp('Sact1') == 'True': Sact1.append(atom.GetIdx()) # saves active Cysteins 2 postions: for atom in mol.GetAtoms(): if atom.GetProp('Sact2') == 'True': Sact2.append(atom.GetIdx()) # saves active Cysteins 3 postions: for atom in mol.GetAtoms(): if atom.GetProp('Sact3') == 'True': Sact3.append(atom.GetIdx()) # creates the S-S bond (in the current version only two 'active' Cys, this codo picks two random anyway): while len(Sact1) >= 2: edmol = rdchem.EditableMol(mol) pos = list(range(len(Sact1))) x = np.random.choice(pos, 1)[0] pos.remove(x) y = np.random.choice(pos, 1)[0] a = Sact1[x] b = Sact1[y] edmol.AddBond(a, b, order=Chem.rdchem.BondType.SINGLE) mol = edmol.GetMol() mol.GetAtomWithIdx(a).SetProp('Sact1', 'False') mol.GetAtomWithIdx(b).SetProp('Sact1', 'False') mol.GetAtomWithIdx(a).SetAtomMapNum(0) mol.GetAtomWithIdx(b).SetAtomMapNum(0) Sact1.remove(a) Sact1.remove(b) while len(Sact2) >= 2: edmol = rdchem.EditableMol(mol) pos = list(range(len(Sact2))) x = np.random.choice(pos, 1)[0] pos.remove(x) y = np.random.choice(pos, 1)[0] a = Sact2[x] b = Sact2[y] edmol.AddBond(a, b, order=Chem.rdchem.BondType.SINGLE) mol = edmol.GetMol() mol.GetAtomWithIdx(a).SetProp('Sact2', 'False') mol.GetAtomWithIdx(b).SetProp('Sact2', 'False') mol.GetAtomWithIdx(a).SetAtomMapNum(0) mol.GetAtomWithIdx(b).SetAtomMapNum(0) Sact2.remove(a) Sact2.remove(b) while len(Sact3) >= 2: edmol = rdchem.EditableMol(mol) pos = list(range(len(Sact3))) x = np.random.choice(pos, 1)[0] pos.remove(x) y = np.random.choice(pos, 1)[0] a = Sact3[x] b = Sact3[y] edmol.AddBond(a, b, order=Chem.rdchem.BondType.SINGLE) mol = edmol.GetMol() mol.GetAtomWithIdx(a).SetProp('Sact3', 'False') mol.GetAtomWithIdx(b).SetProp('Sact3', 'False') mol.GetAtomWithIdx(a).SetAtomMapNum(0) mol.GetAtomWithIdx(b).SetAtomMapNum(0) Sact3.remove(a) Sact3.remove(b) # saves active C and N terminals postions: for atom in mol.GetAtoms(): if atom.GetProp('Nterm') == 'True': Nterm.append(atom.GetIdx()) if atom.GetProp('Cterm') == 'True': Cterm.append(atom.GetIdx()) if cy == 1: edmol = rdchem.EditableMol(mol) # creates the amide bond edmol.AddBond(Nterm[0], Cterm[0], order=Chem.rdchem.BondType.SINGLE) edmol.RemoveAtom(Cterm[0] + 1) mol = edmol.GetMol() # removes tags and lables form the atoms which reacted mol.GetAtomWithIdx(Nterm[0]).SetProp('Nterm', 'False') mol.GetAtomWithIdx(Cterm[0]).SetProp('Cterm', 'False') mol.GetAtomWithIdx(Nterm[0]).SetAtomMapNum(0) mol.GetAtomWithIdx(Cterm[0]).SetAtomMapNum(0) return mol
def connect_mol(mol1, mol2): """it is connecting all Nterminals of mol1 with the Cterminal of the maximum possible number of mol2s Arguments: mol1 {rdKit mol object} -- first molecule to be connected mol2 {rdKit mol object} -- second molecule to be connected Returns: rdKit mol object -- mol1 updated (connected with mol2, one or more) """ # used internally to recognize a methylated aa: metbond = False # can be set with exclude or allow methylation, # it refers to the possibility of having methylation in the entire GA: methyl = False count = 0 # detects all the N terminals in mol1 for atom in mol1.GetAtoms(): atom.SetProp('Cterm', 'False') atom.SetProp('methyl', 'False') if atom.GetSmarts() == '[N:2]' or atom.GetSmarts( ) == '[NH2:2]' or atom.GetSmarts() == '[NH:2]': count += 1 atom.SetProp('Nterm', 'True') else: atom.SetProp('Nterm', 'False') # detects all the C terminals in mol2 (it should be one) for atom in mol2.GetAtoms(): atom.SetProp('Nterm', 'False') atom.SetProp('methyl', 'False') if atom.GetSmarts() == '[C:1]' or atom.GetSmarts() == '[CH:1]': atom.SetProp('Cterm', 'True') else: atom.SetProp('Cterm', 'False') # mol2 is addes to all the N terminal of mol1 for i in range(count): combo = rdmolops.CombineMols(mol1, mol2) Nterm = [] Cterm = [] # saves in two different lists the index of the atoms which has to be connected for atom in combo.GetAtoms(): if atom.GetProp('Nterm') == 'True': Nterm.append(atom.GetIdx()) if atom.GetProp('Cterm') == 'True': Cterm.append(atom.GetIdx()) # creates the amide bond edcombo = rdchem.EditableMol(combo) edcombo.AddBond(Nterm[0], Cterm[0], order=Chem.rdchem.BondType.SINGLE) edcombo.RemoveAtom(Cterm[0] + 1) clippedMol = edcombo.GetMol() # removes tags and lables form c term atoms which reacted clippedMol.GetAtomWithIdx(Cterm[0]).SetProp('Cterm', 'False') clippedMol.GetAtomWithIdx(Cterm[0]).SetAtomMapNum(0) # methylates amide bond if metbond == True and methyl == True: Nterm = [] Met = [] methyl = rdmolfiles.MolFromSmiles('[C:4]') for atom in methyl.GetAtoms(): atom.SetProp('methyl', 'True') atom.SetProp('Nterm', 'False') atom.SetProp('Cterm', 'False') metcombo = rdmolops.CombineMols(clippedMol, methyl) for atom in metcombo.GetAtoms(): if atom.GetProp('Nterm') == 'True': Nterm.append(atom.GetIdx()) if atom.GetProp('methyl') == 'True': Met.append(atom.GetIdx()) metedcombo = rdchem.EditableMol(metcombo) metedcombo.AddBond(Nterm[0], Met[0], order=Chem.rdchem.BondType.SINGLE) clippedMol = metedcombo.GetMol() clippedMol.GetAtomWithIdx(Met[0]).SetProp('methyl', 'False') clippedMol.GetAtomWithIdx(Met[0]).SetAtomMapNum(0) # removes tags and lables form the atoms which reacted clippedMol.GetAtomWithIdx(Nterm[0]).SetProp('Nterm', 'False') clippedMol.GetAtomWithIdx(Nterm[0]).SetAtomMapNum(0) # uptades the 'core' molecule mol1 = clippedMol metbond = False return mol1
def _build_fragment_from_complex(complex, residue, ligand, ligand_core, result=0, substructure=True, symmetry=False, frag_core_atom=None): """ Parameters ---------- complex : str Path of PDB file with protein-ligand complex. residue : str Residue name. ligand : RDKit molecule object Ligand to grow during the simulation. ligand_core : RDKit molecule object Common structure of each grown ligand. result : int Index to extract core atoms from the substructure search. substructure : bool Delete core for full ligand with substructure. symmetry : bool Check the symmetry of the ligand. Returns ------- fragment : RDKit molecule object Grown ligand with deleted core atoms. old_atoms : list Idx for each atom of grown ligand without core. hydrogen_core : pele_platform.Frag.atoms.Atom Hydrogen core attached to linking atom. atom_core : pele_platform.Frag.atoms.Atom Atom object of the hydrogen core atom attached to linking atom. atom_fragment : int Atom of the fragment attached to the core. mapping : dict Mapping of Idx for old atoms and full fragment atoms. correct : bool Checks if the fragment is correct (fragment size, chirality, missing hydrogens) Raises ------ TypeError If the core and ligand are the same molecule. """ # Retrieve atom core linking fragment try: atom_core_idx, atoms_core, atom_fragment = _search_core_fragment_linker( ligand, ligand_core, result, symmetry, frag_core_atom) print("ATOM OF FRAGMENT ATTACHED TO CORE:", atom_fragment) print("ATOM OF CORE ATTACHED TO FRAGMENT:", atom_core_idx) except TypeError: raise ce.SameMolecule( "Core and ligand are the exact same molecule. Check your inputs.") atom_core = at.Atom(ligand_core, atom_core_idx) mol = Chem.MolFromPDBFile(complex, removeHs=False) # Retrieve hydrogen core attach to linking atom original = rd.SplitMolByPDBResidues(mol)[residue] hydrogen_core_idx = \ [atom.GetIdx() for atom in original.GetAtomWithIdx(atom_core_idx).GetNeighbors() if atom.GetAtomicNum() == 1][0] hydrogen_core = at.Atom(original, hydrogen_core_idx) # Delete core for full ligand with substructure and if it fails manually if substructure: fragment = rd.DeleteSubstructs(ligand, ligand_core) new_mol = rc.EditableMol(fragment) for atom in reversed(fragment.GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0 and atom.GetAtomicNum() == 1: new_mol.RemoveAtom(atom.GetIdx()) else: new_mol = rc.EditableMol(ligand) for atom in reversed(atoms_core): new_mol.RemoveAtom(atom) for atom in reversed(new_mol.GetMol().GetAtoms()): neighbours = atom.GetNeighbors() if len(neighbours) == 0 and atom.GetAtomicNum() == 1: new_mol.RemoveAtom(atom.GetIdx()) print("FRAGMENT ATOMS", [atom.GetIdx() for atom in new_mol.GetMol().GetAtoms()]) # Add missing hydrogen to full ligand and create pdb differently depending on the previous step fragment = new_mol.GetMol() old_atoms = [ atom.GetIdx() for atom in fragment.GetAtoms() if atom.GetAtomicNum() != 1 ] new_atoms = [ atom.GetIdx() for atom in ligand.GetAtoms() if atom.GetIdx() not in atoms_core and atom.GetAtomicNum() != 1 ] print("old_atoms", old_atoms, "new_atoms", new_atoms) mapping = { new_atom: old_atom for new_atom, old_atom in zip(new_atoms, old_atoms) } atom_fragment_mapped = mapping[atom_fragment] # Retrieve all atom idxs before adding hydrogen to fragment_atom fragment_atoms_wo_hydrogen = [ atom.GetIdx() for atom in fragment.GetAtoms() ] fragment = Chem.AddHs(fragment, False, True) correct = _check_fragment(fragment, ligand, mapping, atom_fragment, atom_fragment_mapped, ligand_core) Chem.MolToPDBFile(fragment, "int0.pdb") assert len(old_atoms) == len(new_atoms) return fragment, old_atoms, hydrogen_core, atom_core, atom_fragment, mapping, correct, fragment_atoms_wo_hydrogen
def FragmentMol(query_mol, query_mcs_indices): answer = rdchem.EditableMol(query_mol) all_query_bonds = query_mol.GetBonds() for bond_index in range(len(query_mol.GetBonds()): begin_atom_idx = all_query_bonds[bond_index].GetBeginAtom().GetIdx() end_atom_idx = all_query_bonds[bond_index].GetEndAtom().GetIdx() # if the bond is between mcs core and a side-chain - remove it rdchem.EditableMol.RemoveBond(answer, begin_atom_idx, end_atom_idx) if ( bool(begin_atom_idx in query_mcs_indices) != bool(end_atom_idx in query_mcs_indices) ) else None return(answer.GetMol()) def MapIndexQ2T(query_index, query_mcs_indices, template_mcs_indices): # get index of query_index in query_mcs_indices # return template_mcs_indices[index] # else return -1 class Sidechain: def __init__(): anchor = (-1, -1, -1) # path of length 3 starting in atom where sidechain connects that goes through 2 other atoms that are part of a skeleton # these are stored not as indices in context of query mol but instead as order in query_mcs_indices # it is assumed this order is identical in template_mcs_indices and thus is used for accessing atom indices directly in template_mol members = list() # contains indices of atoms (in context of whole molecule) that belong to this sidechain build_order = list() # indices of members (in context of sidechain) listed in ascending order of distance from skeleton contact; ties are undefined but irrelevan zpath = list() # list of triplets - first three indices (in context of whole molecule) of path to skeleton contact zvals = list() # list of triplets - distance/angle/dihedral to atoms from zpath dists_to_anchor = list() # distance of shortest path to skeleton contact - used for filling build_order atom_type = list() # add atom to sidechain by adding its index to members, first three steps of a path to anchor for z-matrix like reconstruction, # zvals for distance+angle+dihedral and total distance to anchor def AddAtom(query_mol, index): # fill anchor values by providing index of a skeleton contact def Anchor(query_mol, skeleton_contact, query_mcs_indices): for mcs_idx_it in query_mcs_indices: path = Chem.rdmolops.GetShortestPath(query_mol, skeleton_contact, mcs_idx_it) if ( len(path) == 3 & path[1] in query_mcs_indices & path[2] in query_mcs_indices ): anchor[0] = np.where(np.array(path[0]) == query_mcs_indices) # skeleton_contact anchor[1] = np.where(np.array(path[1]) == query_mcs_indices) # way_to_anchor anchor[2] = np.where(np.array(path[2]) == query_mcs_indices) # anchor return(0) return(1) # fill build_order def OrderUp(query_mol): for it in members: path = Chem.rdmolops.GetShortestPath(query_mol, it, self.skeleton_contact) # path will include starting and ending index dists_to_anchor.add(len(path)) # remember the length # we pay special attention if the path is not long enough to build the atom from zvals of this sidechain only if (len(path)) == 2 ): # atom is adjacent to skeleton contact # dihedral and angle from template is used; only distance is taken from query if (len(path)) == 3 ): # atom is 2 steps away from skeleton contact # only dihedral from template is used - IS THIS WELL DEFINED? LAST ATOM OF DIHEDRAL IS FROM QUERY, IS DIFF FROM SKELETON CONTACT AND IS AMBIGUOUS. DOES THIS MATTER? # !! this should be ok if angles are not changed # angle and distance are taken from query if (len(path)) > 3 ): # all zvals take from sidechain zpath.add(path[-3:]) # add last three steps of the path # measure distance it - path[1] and use it as zval[0] # measure angle it - path[1] - path[2] and use it as zval[1] # measure dihedral it - path[1] - path[2] - path[3] and use it as zval[2] # now time to order build_order = np.argsort(dists_to_anchor)