def check_and_assign_bond_orders_dynamic(mol1, mol2): """Checks both mol1 and mol2 to see if they have bond orders. If one does, but the other does not, then it attempts to transfer bond orders appropriately""" try: if has_bond_orders(mol1) and has_bond_orders(mol2): # Both have bond orders - do nothing pass elif has_bond_orders(mol1) and (not has_bond_orders(mol2)): # mol1 has, but mol2 does not - transfer from mol1 -> mol2 mol2 = Chem.AssignBondOrdersFromTemplate(mol1, mol2) elif (not has_bond_orders(mol1)) and has_bond_orders(mol2): # mol2 has, but mol1 does not - transfer from mol2 -> mol1 mol1 = Chem.AssignBondOrdersFromTemplate(mol2, mol1) else: # Neither has bond orders - pass pass except ValueError as err: raise BondAssignmentError( 'Failed to assign bond orders: {!s}'.format(err)) if not mol1: raise Exception('Mol1 does not exist!') elif not mol2: raise Exception('Mol2 does not exist!') return mol1, mol2
def get_rmsd(smiles, pdb_path, ref_pdb_path): smiles_mol = Chem.MolFromSmiles(smiles) ref_mol = Chem.MolFromPDBFile(ref_pdb_path) mol = Chem.MolFromPDBFile(pdb_path) ref_mol = AllChem.AssignBondOrdersFromTemplate(smiles_mol,ref_mol) mol = AllChem.AssignBondOrdersFromTemplate(smiles_mol, mol) order = list(mol.GetSubstructMatches(ref_mol)[0]) mol = Chem.RenumberAtoms(mol, order) indices = cpeptools.get_largest_ring(ref_mol) assert len(set(indices) - set(cpeptools.get_largest_ring(mol))) == 0, "ring atom indices do not agree" tmp_dir = tempfile.mkdtemp() ref_pdb_filename = tempfile.mktemp(suffix=".pdb", dir = tmp_dir) pdb_filename = tempfile.mktemp(suffix=".pdb", dir = tmp_dir) Chem.MolToPDBFile(ref_mol, ref_pdb_filename) Chem.MolToPDBFile(mol, pdb_filename) ref = md.load(ref_pdb_filename) compare = md.load(pdb_filename) rmsd = md.rmsd(compare, ref, 0) ring_rmsd = md.rmsd(compare, ref, 0, atom_indices = indices) compare = compare.superpose(ref, 0, atom_indices = indices) return rmsd, compare[np.argmin(rmsd)] , ring_rmsd, compare[np.argmin(ring_rmsd)] , ref
def testTorsionFingerprints(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1DWD_ligand.pdb') ref = Chem.MolFromSmiles( 'NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol = Chem.MolFromPDBFile(refFile) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) # the torsion lists tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) self.assertEqual(len(tors_list), 11) self.assertEqual(len(tors_list_rings), 4) self.assertAlmostEqual(tors_list[-1][1], 180.0, 4) tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, maxDev='spec') self.assertAlmostEqual(tors_list[-1][1], 90.0, 4) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionLists, mol, maxDev='test') tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol, symmRadius=0) self.assertEqual(len(tors_list[0][0]), 2) # the weights weights = TorsionFingerprints.CalculateTorsionWeights(mol) self.assertAlmostEqual(weights[4], 1.0) self.assertEqual(len(weights), len(tors_list + tors_list_rings)) weights = TorsionFingerprints.CalculateTorsionWeights(mol, 15, 14) self.assertAlmostEqual(weights[3], 1.0) self.assertRaises(ValueError, TorsionFingerprints.CalculateTorsionWeights, mol, 15, 3) # the torsion angles tors_list, tors_list_rings = TorsionFingerprints.CalculateTorsionLists(mol) torsions = TorsionFingerprints.CalculateTorsionAngles(mol, tors_list, tors_list_rings) self.assertEqual(len(weights), len(torsions)) self.assertAlmostEqual(torsions[2][0][0], 232.5346, 4) # the torsion fingerprint deviation tfd = TorsionFingerprints.CalculateTFD(torsions, torsions) self.assertAlmostEqual(tfd, 0.0) refFile = os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', '1PPC_ligand.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) torsions2 = TorsionFingerprints.CalculateTorsionAngles(mol2, tors_list, tors_list_rings) weights = TorsionFingerprints.CalculateTorsionWeights(mol) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2, weights=weights) self.assertAlmostEqual(tfd, 0.0691, 4) tfd = TorsionFingerprints.CalculateTFD(torsions, torsions2) self.assertAlmostEqual(tfd, 0.1115, 4) # the wrapper functions tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol, mol2) self.assertAlmostEqual(tfd, 0.0691, 4) mol.AddConformer(mol2.GetConformer(), assignId=True) mol.AddConformer(mol2.GetConformer(), assignId=True) tfd = TorsionFingerprints.GetTFDBetweenConformers(mol, confIds1=[0], confIds2=[1, 2]) self.assertEqual(len(tfd), 2) self.assertAlmostEqual(tfd[0], 0.0691, 4) tfdmat = TorsionFingerprints.GetTFDMatrix(mol) self.assertEqual(len(tfdmat), 3)
def testTorsionFingerprintsAtomReordering(self): # we use the xray structure from the paper (JCIM, 52, 1499, 2012): 1DWD refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand.pdb') ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') mol1 = Chem.MolFromPDBFile(refFile) mol1 = AllChem.AssignBondOrdersFromTemplate(ref, mol1) refFile = os.path.join(RDConfig.RDCodeDir,'Chem','test_data','1DWD_ligand_reordered.pdb') mol2 = Chem.MolFromPDBFile(refFile) mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol2) tfd = TorsionFingerprints.GetTFDBetweenMolecules(mol1, mol2) self.assertEqual(tfd, 0.0)
def main(): # ms = Chem.MolFromSmiles('C1CCC1OCC') # v = Torsions.GetTopologicalTorsionFingerprintAsIntVect(ms) # AllChem.EmbedMolecule(ms) # m=Chem.Get3DDistanceMatrix(ms) # factory = Gobbi_Pharm2D.factory # factory.GetBitDescription(0) # fp1= Generate.Gen2DFingerprint(ms,factory,dMat=m) # data = pd.read_csv('ml_data.csv') data = pd.read_csv('forSnigdha.csv') # Add some new columns data['Mol'] = data['SMILES'].apply(Chem.MolFromSmiles) data['Conformer'] = data['Mol'].apply(Chem.rdmolops.RemoveHs) ## Adding conformer path = '../FileConversion/PDBFiles/' for i in range(len(data['Conformer'])): mol = data['Conformer'][i] try: filenames = glob.glob(path+data['Inchi-Key'][i]+'*_S1_solv.pdb') data['Conformer'][i] = AllChem.AssignBondOrdersFromTemplate(mol, Chem.MolFromPDBFile(filenames[0])) except: try: filenames = glob.glob(path + data['Inchi-Key'][i] + '*_T1_solv.pdb') data['Conformer'][i] = AllChem.AssignBondOrdersFromTemplate(mol, Chem.MolFromPDBFile(filenames[0])) except Exception as e: print('error' +str(e)) AllChem.EmbedMolecule(data['Conformer'][i]) # data['Mol'].apply(AllChem.EmbedMolecule) fp_list = make_fingerprints(data) #Checking for H**O print("H**o predictions") y = data['H**O'].values y=y*27.211 test_fingerprints(fp_list, Ridge(alpha=1e-9), y, verbose=True) # scores_vs_size = test_fingerprint_vs_size(y,data,Ridge(alpha=1e-9), verbose=True, makeplots=True) # Checking for LUMO values print("Lumo predictions") y = data['LUMO'].values y=y*27.211 test_fingerprints(fp_list, Ridge(alpha=1e-9), y, verbose=True)
def assign_bond_orders_from_template_smiles(refsmile, rawsmile, return_mol=False): """Takes a template smile (with aromaticity etc.) and applies bond orders to other smile""" # Create rawmol rawmol = check_smile_readable(rawsmile) # Create refmol if is_valid_smiles(refsmile): refmol = Chem.MolFromSmiles(refsmile) else: refmol = Chem.MolFromSmarts(refsmile) # Check for existence if not refmol: return None # Remove Hydrogens from reference molecule try: Chem.RemoveHs(refmol, implicitOnly=True) except ValueError: return None # Generated Molecule with transferred bond orders try: newmol = Chem.AssignBondOrdersFromTemplate(refmol, rawmol) except ValueError: return None if return_mol: return newmol else: # Convert Back To Smiles newsmiles = Chem.MolToSmiles(newmol, isomericSmiles=True) return newsmiles
def create_mol_file(self, directory, file_base, mol_obj, smiles_file=None): """ a .mol file is produced for an individual ligand :param directory: The directory where the mol file should be saved. :param file_base: The name of the mol file :param mol_obj: The RDKit Mol file object :param smiles_file: The filepath of a text file that contains the smiles string of the mol file (if exists). :return: A mol file! """ out_file = os.path.join(directory, str(file_base + ".mol")) if not mol_obj: print(f'WARNING: mol object is empty: {file_base}') if smiles_file: try: smiles = open(smiles_file, 'r').readlines()[0].rstrip() template = AllChem.MolFromSmiles(smiles) new_mol = AllChem.AssignBondOrdersFromTemplate( template, mol_obj) return Chem.rdmolfiles.MolToMolFile(new_mol, out_file) except Exception as e: print(e) print('failed to fit template ' + smiles_file) print(f'template smiles: {smiles}') return Chem.rdmolfiles.MolToMolFile(mol_obj, out_file) else: print(f'Warning: No smiles file: {file_base}') # creating mol file return Chem.rdmolfiles.MolToMolFile(mol_obj, out_file)
def pdbqt2molblock(pdbqt_block, smi, mol_id): mol_block = None mol = Chem.MolFromPDBBlock('\n'.join( [i[:66] for i in pdbqt_block.split('MODEL')[1].split('\n')]), removeHs=False, sanitize=False) if mol: try: template_mol = Chem.MolFromSmiles(smi) # explicit hydrogends are removed from carbon atoms (chiral hydrogens) to match pdbqt mol, # e.g. [NH3+][C@H](C)C(=O)[O-] template_mol = Chem.AddHs(template_mol, explicitOnly=True, onlyOnAtoms=[ a.GetIdx() for a in template_mol.GetAtoms() if a.GetAtomicNum() != 6 ]) mol = AllChem.AssignBondOrdersFromTemplate(template_mol, mol) Chem.SanitizeMol(mol) Chem.AssignStereochemistry(mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) mol.SetProp('_Name', mol_id) mol_block = Chem.MolToMolBlock(mol) except Exception: sys.stderr.write( f'Could not assign bond orders while parsing PDB: {mol_id}\n') return mol_block
def pdb2sdf_via_templates(smifile="labute_set/labute.smi", pdbdir="labute_set/pdb_rarey/"): """ Converts no H/no bond pdb to proper sdf using template smi Starting frmo smiles :param smifile: smifile :param pdbdir: directory with pdb needs identical names """ refs = Chem.SmilesMolSupplier(smifile, delimiter='\t ', titleLine=False, sanitize=True) sdfile = smifile.replace('.smi', '_viaRDKIT.sdf') w = Chem.SDWriter(sdfile) pdb_missed = 0 assignment_failed = 0 converted = 0 for ref in refs: cname = ref.GetProp("_Name") pdbfile = pdbdir + cname + '.pdb' similar = True if not os.path.isfile(pdbfile): pdb_missed += 1 files = os.listdir(pdbdir) similar = False for file in files: if cname[:5] in file: logging.info("Similar PDB found: %s & %s" % (file, pdbfile)) pdb_missed -= 1 pdbfile = pdbdir + file similar = True if similar: logging.warn("PDB OK: %s" % pdbfile) else: logging.warn("PDB file not found: %s" % pdbfile) continue mol = Chem.MolFromPDBFile(pdbfile) if mol is None: assignment_failed += 1 continue try: mol = Chem.AssignBondOrdersFromTemplate(ref, mol) except ValueError: assignment_failed += 1 continue mol = centerMol(mol) mol.SetProp("_Name", cname) Chem.SanitizeMol(mol) w.write(mol) converted += 1 w.close() print( "\nConversion finished: %d molecules, %d pdb missed, %d assignments failed, %d converted." % (len(refs), pdb_missed, assignment_failed, converted)) sys.exit(1)
def show_docked(df): oechem = import_("openeye.oechem") if not oechem.OEChemIsLicensed(): raise(ImportError("Need License for OEChem!")) # def show_docked(df): mol2_blocks_docked = list(df['mol2_blocks_docked']) smiles_template = list(df['smiles']) names = list(df['names']) v = PyMol.MolViewer() v.DeleteAll() for count,molblock in enumerate(mol2_blocks_docked): molout = mol2_string_IO_san(molblock) mol = Chem.MolFromPDBBlock(molout) template = Chem.MolFromSmiles(smiles_template[count]) new_mol = AllChem.AssignBondOrdersFromTemplate(template, mol) # mol2_blocks_template = Chem.MolFromMol2Block(molblock) print(type(new_mol)) molid = names[count] print(molid) mol.SetProp('_Name', molid) probe = Chem.Mol(new_mol.ToBinary()) v.ShowMol(probe, name=molid, showOnly=False)
def _fix_minimised(self) -> Chem.Mol: """ PDBs are terrible for bond order etc. and Rosetta addes these based on atom types :return: """ self.journal.debug(f'{self.long_name} - making ligand only') ligand = self.igor.mol_from_pose() template = AllChem.DeleteSubstructs(self.params.mol, Chem.MolFromSmiles('*')) return AllChem.AssignBondOrdersFromTemplate(template, ligand)
def test_confgen(): CsA_smiles = "CC[C@H]1C(=O)N(CC(=O)N([C@H](C(=O)N[C@H](C(=O)N([C@H](C(=O)N[C@H](C(=O)N[C@@H](C(=O)N([C@H](C(=O)N([C@H](C(=O)N([C@H](C(=O)N([C@H](C(=O)N1)[C@@H]([C@H](C)C/C=C/C)O)C)C(C)C)C)CC(C)C)C)CC(C)C)C)C)C)CC(C)C)C)C(C)C)CC(C)C)C)C" CsA = Chem.MolFromSmiles(CsA_smiles, sanitize=True) ref = CsA m = Chem.MolFromPDBFile("cpeptools/tests/data/CsA.pdb", removeHs=False, sanitize=True) CsA = AllChem.AssignBondOrdersFromTemplate(ref, m) assert generate_conformers_with_eccentricity(CsA, 1, 0)
def dock(self) -> Chem.Mol: docked = self.igor.dock() self.docked_pose = docked docked.dump_pdb(f'{self.work_path}/{self.long_name}/{self.long_name}.holo_docked.pdb') ligand = self.igor.mol_from_pose(docked) template = AllChem.DeleteSubstructs(self.params.mol, Chem.MolFromSmiles('*')) lig_chem = AllChem.AssignBondOrdersFromTemplate(template, ligand) Chem.MolToMolFile(lig_chem, f'{self.work_path}/{self.long_name}/{self.long_name}.docked.mol') return lig_chem
def lig_sdf_from_pdb(lig_string, pdb_file, sdf_out, smiles=None): pdb_ligs = ''.join([x for x in open(pdb_file, 'r').readlines() if lig_string in x]) mol = Chem.rdmolfiles.MolFromPDBBlock(pdb_ligs, sanitize=False) if smiles: ref = Chem.MolFromSmiles(smiles) m = AllChem.AssignBondOrdersFromTemplate(ref, mol) else: m = Chem.AddHs(mol) Chem.SanitizeMol(m, sanitizeOps=Chem.SANITIZE_ALL ^ Chem.SANITIZE_SETAROMATICITY) m = Chem.RemoveHs(m) writer = Chem.rdmolfiles.SDWriter(sdf_out) writer.write(m)
def read_pdbqt(fname, smi, sanitize=True, removeHs=False): """ Read all MODEL entries in input PDBQT file as separate identical molecules. If no MODEL sections then whole file is recognized as a single structure (list with a single molecule will be returned) :param fname: pdbqt file :param smi: SMILES of the molecule in pdbqt file to assing bond orders :param sanitize: :param removeHs: :return: list of molecules """ def read_pdbqt_block(pdbqt_block): return Chem.MolFromPDBBlock('\n'.join( [i[:66] for i in pdbqt_block.split('\n')]), sanitize=sanitize, removeHs=removeHs) mols = [] refmol = Chem.MolFromSmiles(smi) with open(fname) as f: s = f.read() if 'MODEL' in s: pdbqt_blocks = s.split('MODEL ') for j, block in enumerate(pdbqt_blocks[1:]): m = read_pdbqt_block(block) if m is None: sys.stderr.write( f'The pose #{j+1} cannot be read from {fname}\n') else: m = AllChem.AssignBondOrdersFromTemplate(refmol, m) mols.append(m) else: m = read_pdbqt_block(s) if m is None: sys.stderr.write(f'Structure from {fname} cannot be read\n') else: m = AllChem.AssignBondOrdersFromTemplate(refmol, m) mols.append(m) return mols
def get_gobbi_similarity(correct_ligand, mol_to_fix, type_fp='normal', use_features=False): # ref = Chem.MolFromSmiles('NC(=[NH2+])c1ccc(C[C@@H](NC(=O)CNS(=O)(=O)c2ccc3ccccc3c2)C(=O)N2CCCCC2)cc1') ref = Chem.MolFromSmiles( 'C1=CC(=C(C=C1C2=C(C(=O)C3=C(C=C(C=C3O2)O)O)O)O)O') # mol1 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1DWD_ligand.pdb') mol1 = AllChem.AssignBondOrdersFromTemplate(ref, correct_ligand) # mol2 = Chem.MolFromPDBFile(RDConfig.RDBaseDir + '/rdkit/Chem/test_data/1PPC_ligand.pdb') mol2 = AllChem.AssignBondOrdersFromTemplate(ref, mol_to_fix) factory = Gobbi_Pharm2D.factory fp1 = Generate.Gen2DFingerprint(mol1, factory, dMat=Chem.Get3DDistanceMatrix(mol1)) fp2 = Generate.Gen2DFingerprint(mol2, factory, dMat=Chem.Get3DDistanceMatrix(mol2)) # Tanimoto similarity tani = DataStructs.TanimotoSimilarity(fp1, fp2) print('GOBBI similarity is ------> ', tani)
def get_target_from_pdb(self) -> Chem.Mol: """ Fills the ``self.target`` attribute. :return: a ``Chem.Mol`` """ # The reference molecule ref = Chem.MolFromSmiles(self.ref_smiles) # The PDB conformations target = Chem.MolFromPDBBlock( self.template.only_ligand ) # , sanitize=False, strictParsing=False) target = AllChem.AssignBondOrdersFromTemplate(ref, target) # target = Chem.AddHs(target) #done at the PDB step return target
def dock(self) -> Chem.Mol: """ The docking is done by ``igor.dock()``. This basically does that, extacts ligand, saves etc. :return: """ docked = self.igor.dock() self.docked_pose = docked docked.dump_pdb(f'{self.work_path}/{self.long_name}/{self.long_name}.holo_docked.pdb') ligand = self.igor.mol_from_pose(docked) template = AllChem.DeleteSubstructs(self.params.mol, Chem.MolFromSmiles('*')) lig_chem = AllChem.AssignBondOrdersFromTemplate(template, ligand) lig_chem.SetProp('_Name', 'docked') Chem.MolToMolFile(lig_chem, f'{self.work_path}/{self.long_name}/{self.long_name}.docked.mol') return lig_chem
def check_and_assign_bond_orders_static(mol1, mol2): """Transfers bond orders from mol1 to mol2, if bond orders are present""" try: # transfer from mol1 -> mol2 mol2 = Chem.AssignBondOrdersFromTemplate(mol1, mol2) except ValueError as err: raise BondAssignmentError( 'Failed to assign bond orders: {!s}'.format(err)) if not mol1: raise Exception('Mol1 does not exist!') elif not mol2: raise Exception('Mol2 does not exist!') return mol1, mol2
def get_largest_ring_indices(traj, smiles=None): tmp_dir = tempfile.mkdtemp() pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir) traj[0].save(pdb_filename) if smiles is not None: mol = Chem.MolFromPDBFile(pdb_filename, removeHs=False) ref = Chem.MolFromSmiles(smiles, sanitize=True) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) else: mol = Chem.MolFromPDBFile(pdb_filename, removeHs=False) # try: #some structures might be non-sensical and gets NaN indices = get_largest_ring(mol) return indices
def assign_temp(block, smiles, model): """Helper function to create an RDMol from PDB information""" tmp = Chem.MolFromPDBBlock(block) template = Chem.MolFromSmiles(smiles) if not template: print smiles, " not recognised" return None try: mol = AllChem.AssignBondOrdersFromTemplate(template, tmp) except ValueError: print "DOESN'T FIT", smiles mol = None except: print "UNSPECIFED ERRROR" mol = None return mol
def split_protein_ligand(complex_pdb, protein_pdb, ligand_pdb, ligand_sdf=None, ligand_smiles=None, ligand_resname=None): import pymol from pymol import cmd as pymol_cmd # load complex in pymol session pymol_cmd.load(complex_pdb, 'complex') pymol_cmd.remove('hydrogens') # extract ligand if ligand_resname is not None: pymol_cmd.extract('ligand', f'resn {ligand_resname}') else: pymol_cmd.extract('ligand', 'not polymer') # extract protein pymol_cmd.extract('receptor', 'polymer') # save protein pymol_cmd.save(protein_pdb, 'receptor') # save ligand pymol_cmd.save(ligand_pdb, 'ligand') # delete session pymol_cmd.delete('all') # pdb to sdf if ligand_sdf is not None: if ligand_smiles: try: m_smiles = Chem.RemoveHs(Chem.MolFromSmiles(ligand_smiles)) m_pdb = Chem.RemoveHs(Chem.MolFromPDBFile(ligand_pdb)) m = AllChem.AssignBondOrdersFromTemplate(m_smiles, m_pdb) w = Chem.SDWriter(ligand_sdf) w.write(m) w.close() except Exception as e: print(f"{ligand_sdf} failed: {e}") os.system(f'obabel {ligand_pdb} -O {ligand_sdf}') else: os.system(f'obabel {ligand_pdb} -O {ligand_sdf}') return
def get_rdmol_from_traj(traj, smiles=None, only_first_frame=True): """ only_first_frame : bool keep all frames in traj or only the first in the trajectory """ tmp_dir = tempfile.mkdtemp() pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir) if only_first_frame: traj[0].save(pdb_filename) else: traj.save(pdb_filename) if smiles is not None: mol = Chem.MolFromPDBFile(pdb_filename, removeHs=True) ref = Chem.MolFromSmiles(smiles, sanitize=True) mol = AllChem.AssignBondOrdersFromTemplate(ref, mol) #TODO currently have problem with hydrogens else: mol = Chem.MolFromPDBFile(pdb_filename, removeHs=False) return mol
def assign_connectivity_from_template(self, molecule): """ It assigns the connectivity to an RDKit molecule according to the connectivity from an RDKit connectivity template. Parameters ---------- molecule : an peleffy.topology.Molecule The peleffy's Molecule object """ from rdkit.Chem import AllChem if molecule.connectivity_template is None: raise ValueError('A connectivity template must be previously ' + 'assigned to the molecule') rdkit_molecule = molecule.rdkit_molecule rdkit_molecule = AllChem.AssignBondOrdersFromTemplate( molecule.connectivity_template, rdkit_molecule) molecule._rdkit_molecule = rdkit_molecule
def process_ligand(ligand, res_name): """ Add bond orders to a pdb ligand 1. Select the ligand component with name "res_name" 2. Get the corresponding SMILES from pypdb 3. Create a template molecule from the SMILES in step 2 4. Write the PDB file to a stream 5. Read the stream into an RDKit molecule 6. Assign the bond orders from the template from step 3 :param ligand: ligand as generated by prody :param res_name: residue name of ligand to extract :return: molecule with bond orders assigned """ output = StringIO() sub_mol = ligand.select(f"resname {res_name}") chem_desc = pypdb.describe_chemical(f"{res_name}") sub_smiles = chem_desc["describeHet"]["ligandInfo"]["ligand"]["smiles"] template = AllChem.MolFromSmiles(sub_smiles) writePDBStream(output, sub_mol) pdb_string = output.getvalue() rd_mol = AllChem.MolFromPDBBlock(pdb_string) new_mol = AllChem.AssignBondOrdersFromTemplate(template, rd_mol) return new_mol
def extract_mol(cls, name: str, filepath: str, smiles: Optional[str] = None, ligand_resn: str = 'LIG') -> Chem.Mol: holo = Chem.MolFromPDBFile(filepath, proximityBonding=False, removeHs=False) mol = Chem.SplitMolByPDBResidues(holo, whiteList=[ligand_resn])[ligand_resn] attachment, attachee = cls.find_attachment(holo, ligand_resn) if attachment is not None: # covalent mol = Chem.SplitMolByPDBResidues(holo, whiteList=[ligand_resn])[ligand_resn] mod = Chem.RWMol(mol) attachment.SetAtomicNum(0) # dummy atom. attachment.GetPDBResidueInfo().SetName('CONN') pos = holo.GetConformer().GetAtomPosition(attachment.GetIdx()) ni = mod.AddAtom(attachment) mod.GetConformer().SetAtomPosition(ni, pos) attachee_name = attachee.GetPDBResidueInfo().GetName() for atom in mod.GetAtoms(): if atom.GetPDBResidueInfo().GetName() == attachee_name: ai = atom.GetIdx() mod.AddBond(ai, ni, Chem.BondType.SINGLE) break mol = mod.GetMol() if smiles is not None: if '*' in Chem.MolToSmiles(mol) and '*' not in smiles: smiles = cls.make_covalent(smiles) cls.journal.info(f'{name} is covalent but a non covalent SMILES was passed.') else: pass try: template = Chem.MolFromSmiles(smiles) # template = AllChem.DeleteSubstructs(template, Chem.MolFromSmiles('*')) mol = AllChem.AssignBondOrdersFromTemplate(template, mol) except ValueError as error: cls.journal.warning(f'{name} failed at bonding ({type(error)}: {error}).') mol.SetProp('_Name', name) return mol
def evaluate_labute( smifile="labute_set/labute.smi", pdbdir='/home/loschen/calc/ml_bond_parser/naomi_rarey/ci300358c_si_001/pdb/', iterative=False, skipH=True, showImages=True): """ Evaluate labute files :param smifile: :param pdbdir: :param iterative: :param skipH: :param showImages: :return: """ showImages = False pdbdir = "labute_set/pdb_rarey/" refs = Chem.SmilesMolSupplier(smifile, delimiter='\t ', titleLine=False, sanitize=True) labute_set = [] missing = 0 hits = 0 for ref in refs: similar = False same = False cname = ref.GetProp("_Name") #print("Name: %8s"%(cname)) pdbfile = pdbdir + cname + '.pdb' if not os.path.isfile(pdbfile): files = os.listdir(pdbdir) similar = False for file in files: if cname[:8] in file and file.endswith('pdb'): pdbfile = pdbdir + file similar = True else: same = True if same or similar: print("%-10s: PDB found: %s" % (cname, pdbfile)) labute_set.append([cname, ref, Chem.MolToSmiles(ref), pdbfile]) else: print("%-10s: missing!!" % (cname)) missing += 1 print("Evaluation run with option: noH(%r)" % (skipH)) print("Loading classifier...") clf = pickle.load(open('clf.p', "rb")) if iterative: clf_iter = pickle.load(open('clf_iter.p', "rb")) else: clf_iter = None res_dict = {} assignment_failed = 0 for i, (cname, ref, smiles, pdbfile) in enumerate(labute_set): res_dict[cname] = 0 #create SD file via template mol = Chem.MolFromPDBFile(pdbfile, sanitize=False, removeHs=True) if mol is None or ref is None: assignment_failed += 1 continue try: mol = Chem.AssignBondOrdersFromTemplate(ref, mol) except ValueError: assignment_failed += 1 continue if mol is None: print("%d %s - Could not create mol from PDB!" % (i, cname)) continue res, sdf_pred = generate_predictions(mol, skipH=skipH, iterative=iterative, forceAromatics=False, maxiter=1, verbose=False, clf=clf, clf_iter=clf_iter, isEval=True) #show 2D pictures of mol & res_sdf if showImages: mol_pred = Chem.MolFromMolBlock(sdf_pred, sanitize=True) if mol_pred is None: print("WARNING: Could not sanitize predicted mol!") continue Chem.Compute2DCoords(mol) mol_pred = Chem.AddHs(mol_pred) mol_pred = Chem.RemoveHs(mol_pred) Chem.Compute2DCoords(mol_pred) ms = [mol, mol_pred] res_str = '[FALSE]' if res: res_str = '[OK]' img = Draw.MolsToGridImage(ms, molsPerRow=2, subImgSize=(400, 400), legends=[cname, 'mol_pred' + res_str]) img.save('images/' + cname + '_' + str(i) + '.png') img.show() raw_input() if res is None: print("WARNING: %d %s - Could not predict from mol!" % (i, cname)) continue #if i % 50 == 0: #logging.info("%d %r\n" % (i, res)) if res: res_dict[cname] += 1 failures = 0 corrects = 0 for key, value in res_dict.iteritems(): print("%-12s HITS: %2d" % (key, value)) if value == 0: failures += 1 else: corrects += 1 print("\n%4d total " % (len(refs))) print("%4d found " % (len(labute_set))) print("%4d missed " % (missing)) print("%4d assignment failed " % (assignment_failed)) print("%4d unique " % (len(res_dict))) nall = len(labute_set) acc = corrects / float(nall) print("\nTOTAL: %5d OK: %5d WRONG: %5d Accuray: %6.3f\n" % (nall, corrects, failures, acc))
def pdb2sdf_via_templates2( noLabute=True, smidir='/home/loschen/calc/ml_bond_parser/naomi_rarey/ci300358c_si_001/smiles/', pdbdir='/home/loschen/calc/ml_bond_parser/naomi_rarey/ci300358c_si_001/pdb/', inverse=True): """ Converts no H/no bond pdb to proper sdf using template smi Starting from pdb :param smifile: :param pdbdir: :return: """ from os import listdir from os.path import join, isfile sdfile = './naomi_rarey/ci300358c_si_001/naomi_viaRDKIT_nolabute.sdf' w = Chem.SDWriter(sdfile) pdbfiles = [ f for f in listdir(pdbdir) if isfile(join(pdbdir, f)) and f.endswith('.pdb') ] f = open("/home/loschen/calc/ml_bond_parser/labute_set/labute.smi", "r") labute_names = [] for x in f: smi = x.split() if len(smi) > 1: labute_names.append(smi[1].strip()) labute_short = [x[:8] for x in labute_names] print("Labute names: %d" % (len(labute_names))) raw_input() smi_missed = 0 assignment_failed = 0 converted = 0 skipped = 0 for f in pdbfiles: cname = f.replace('.pdb', '') smifile = smidir + cname + '.smi' if not os.path.isfile(smifile): smi_missed += 1 else: print smifile refmoles = Chem.SmilesMolSupplier(smifile, delimiter='\t ', titleLine=False, sanitize=True) mol = Chem.MolFromPDBFile(pdbdir + f) if mol is None: continue for ref in refmoles: if ref is None: continue try: mol = Chem.AssignBondOrdersFromTemplate(ref, mol) break except ValueError: assignment_failed += 1 continue mol = centerMol(mol) mol.SetProp("_Name", cname) Chem.SanitizeMol(mol) if cname in labute_names and noLabute: print cname skipped += 1 elif cname[:8] in labute_short and noLabute: print cname, labute_short[labute_short.index(cname[:8])] skipped += 1 else: w.write(mol) converted += 1 w.close() print( "\nConversion finished: %d molecules, %d smi missed, %d assignments failed, %d converted %d skipped." % (len(pdbfiles), smi_missed, assignment_failed, converted, skipped))
def extract_mol(cls, name: str, filepath: str, smiles: Optional[str] = None, ligand_resn: str = 'LIG', removeHs: bool = False, throw_on_error : bool = False) -> Chem.Mol: """ Extracts the ligand of 3-name ``ligand_resn`` from the PDB file ``filepath``. Corrects the bond order with SMILES if given. If there is a covalent bond with another residue the bond is kept as a ``*``/R. If the SMILES provided lacks the ``*`` element, the SMILES will be converted (if a warhead is matched), making the bond order correction okay. :param name: name of ligand :type name: str :param filepath: PDB file :type filepath: str :param smiles: SMILES :type smiles: str :param ligand_resn: 3letter PDB name of residue of ligand :type ligand_resn: str :param removeHs: Do you trust the hydrgens in the the PDB file? :type removeHs: bool :param throw_on_error: If an error occurs in the template step, raise error. :type throw_on_error: bool :return: rdkit Chem object :rtype: Chem.Mol """ holo = Chem.MolFromPDBFile(filepath, proximityBonding=False, removeHs=removeHs) if holo is None: cls.journal.warning(f'PDB {filepath} is problematic. Skipping sanitization.') holo = Chem.MolFromPDBFile(filepath, proximityBonding=False, removeHs=True, sanitize=False) mol = Chem.SplitMolByPDBResidues(holo, whiteList=[ligand_resn])[ligand_resn] attachment, attachee = cls.find_attachment(holo, ligand_resn) if attachment is not None: # covalent mol = Chem.SplitMolByPDBResidues(holo, whiteList=[ligand_resn])[ligand_resn] mod = Chem.RWMol(mol) attachment.SetAtomicNum(0) # dummy atom. attachment.GetPDBResidueInfo().SetName('CONN') pos = holo.GetConformer().GetAtomPosition(attachment.GetIdx()) ni = mod.AddAtom(attachment) mod.GetConformer().SetAtomPosition(ni, pos) attachee_name = attachee.GetPDBResidueInfo().GetName() for atom in mod.GetAtoms(): if atom.GetPDBResidueInfo().GetName() == attachee_name: ai = atom.GetIdx() mod.AddBond(ai, ni, Chem.BondType.SINGLE) break mol = mod.GetMol() if smiles is not None: if '*' in Chem.MolToSmiles(mol) and '*' not in smiles: new_smiles = cls.make_covalent(smiles) if new_smiles: cls.journal.info(f'{name} is covalent but a non covalent SMILES was passed, which was converted') smiles = new_smiles else: cls.journal.warning(f'{name} is covalent but a non covalent SMILES was passed, which failed to convert') else: pass try: template = Chem.MolFromSmiles(smiles) # template = AllChem.DeleteSubstructs(template, Chem.MolFromSmiles('*')) mol = AllChem.AssignBondOrdersFromTemplate(template, mol) except ValueError as error: if throw_on_error: raise error else: cls.journal.warning(f'{name} failed at template-guided bond order correction - ({type(error)}: {error}).') mol.SetProp('_Name', name) return mol
def fix_pdb(self, pdbfile): pdb_mol = Chem.MolFromPDBFile(pdbfile, removeHs=False) pdb_mol = AllChem.AssignBondOrdersFromTemplate(self.dethio_mol, pdb_mol) return pdb_mol