def check_mol( mol ): match, mList = EmbedLib.MatchPharmacophoreToMol( mol, feat_fact, pcophore) if match: res = [] num_match = len( mList ) for i in range( num_match ): num_feature = len( mList[i] ) for j in range( num_feature ): print mList[i][j].GetAtomIds(), mList[i][j].GetFamily() bounds = rdDistGeom.GetMoleculeBoundsMatrix( mol ) pList = EmbedLib.GetAllPharmacophoreMatches( mList, bounds, pcophore ) #pList = EmbedLib.MatchPharmacophore( mList, bounds, pcophore ) print pList #print raw_input("-----") num_match = len( pList ) print num_match phMatches = [] for i in range( num_match ): num_feature = len( pList[i] ) phMatch = [] for j in range( num_feature ): phMatch.append( pList[i][j].GetAtomIds() ) phMatches.append( phMatch ) for phMatch in phMatches: bm, embeds, nFail = EmbedLib.EmbedPharmacophore( mol, phMatch, pcophore, count=20, silent=1 ) print "-----> embeds num:", len( embeds ) for embed in embeds: AllChem.UFFOPtimizeMolecule( embed ) align_data = rdAliginment.GetAlignmetTransform( bm, bounds ) AllChem.TransformMol( embed, align_data[1] ) res.append( embed ) return res
def _align_molecules(self, molecules: List[Chem.Mol]) -> None: """ Align a list of molecules to a given pharmacophore. Parameters ---------- molecules : list of rdkit.Chem.Mol List of molecules to align. """ self.n_molecules += len(molecules) rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit() apply_radii_to_bounds(radii, rdkit_pharmacophore) fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdef) MolScore = namedtuple("MolScore", ["score", "id", "mol"]) for mol in tqdm(molecules): bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) can_match, all_matches = EmbedLib.MatchPharmacophoreToMol( mol, featFactory, rdkit_pharmacophore) if can_match: failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore( all_matches, bounds_matrix, rdkit_pharmacophore, useDownsampling=True) if failed: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue else: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) _, embeddings, _ = EmbedLib.EmbedPharmacophore( mol_H, atom_match, rdkit_pharmacophore, count=10) except: continue SSDs = transform_embeddings(rdkit_pharmacophore, embeddings, atom_match) if len(SSDs) == 0: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] score = 1 / SSDs[best_fit_index] matched_mol = MolScore(score, mol.GetProp("_Name"), embeddings[best_fit_index]) self.molecules.append(matched_mol)
def test3Embed(self): testResults = { 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, _ = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, _, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if name in testResults: stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.assertEqual(len(tgt), len(stats)) print(name) print(','.join(['%.2f' % x for x in stats])) # we'll use different tolerances for the different values: self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.assertEqual(nDone, 100) # print 'nHits:',nHits self.assertEqual(nHits, 50)
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory( os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore = Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0, 1, 2.251) pcophore.setUpperBound(0, 1, 2.451) pcophore.setUpperBound2D(0, 1, 3) pcophore.setLowerBound(0, 2, 4.970) pcophore.setUpperBound(0, 2, 5.170) pcophore.setUpperBound2D(0, 2, 6) pcophore.setLowerBound(1, 2, 2.681) pcophore.setUpperBound(1, 2, 2.881) pcophore.setUpperBound2D(1, 2, 6) inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) if canMatch: nMatches += 1 r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True, use2DLimits=True, mol=mol) failed, bm, match, details = r if not failed: nHits += 1 self.assertEqual(nDone, 100) self.assertEqual(nMatches, 93) #print 'nhits:',nHits self.assertEqual(nHits, 67)
def _matchMol(self, tpl, pcophore, featFactory, downSample): name, molPkl, boundsMat = tpl mol = Chem.Mol(molPkl) matched, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) if matched: r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=downSample) if r[0]: return 0 else: return 1 else: return 0
def optimize(self, rdmol, boundsMatrix=None, atomMatch=None): """ Optimizes the rdmol object using UFF. Determines the energy level for each of the conformers identified in rdmol.GetConformer. :param rdmol: :param boundsMatrix: :param atomMatch: :return rdmol, minEid (index of the lowest energy conformer) """ energy = 0.0 minEid = 0 lowestE = 9.999999e99 # start with a very high number, which would never be reached for conf in rdmol.GetConformers(): if (boundsMatrix is None) or (atomMatch is None): AllChem.UFFOptimizeMolecule(rdmol, confId=conf.GetId()) energy = AllChem.UFFGetMoleculeForceField( rdmol, confId=conf.GetId()).CalcEnergy() else: _, energy = EmbedLib.OptimizeMol(rdmol, boundsMatrix, atomMatches=atomMatch, forceConstant=100000.0) if energy < lowestE: minEid = conf.GetId() lowestE = energy return rdmol, minEid
def check_mol(mol): res = [] mol.RemoveAllConformers() match, mList = EmbedLib.MatchPharmacophoreToMol(mol, feat_fact, pcophore) #mList = [ m for m in Set( mList ) ] if match: num_match = len(mList) for i in range(num_match): num_feature = len(mList[i]) for j in range(num_feature): print mList[i][j].GetAtomIds(), mList[i][j].GetFamily() bounds = rdDistGeom.GetMoleculeBoundsMatrix(mol) pList = EmbedLib.GetAllPharmacophoreMatches(mList, bounds, pcophore) num_match = len(pList) phMatches = [] for i in range(num_match): num_feature = len(pList[i]) phMatch = [] for j in range(num_feature): phMatch.append(pList[i][j].GetAtomIds()) phMatches.append(phMatch) for phMatch in phMatches: bm, embeds, nFail = EmbedLib.EmbedPharmacophore(mol, phMatch, pcophore, count=5, silent=1) print "-----> embeds num:", len(embeds) for embed in embeds: AllChem.UFFOptimizeMolecule(embed) feats = feat_fact.GetFeaturesForMol(embed) feats_dict = GetFeatsPerAtoms(feats) match_feats = [feats_dict[atomid] for atomid in phMatch] pro_mat = [list(feat.GetPos()) for feat in match_feats] align_data = rdAlignment.GetAlignmentTransform( ref_mat, pro_mat, maxIterations=200) AllChem.TransformMol(embed, align_data[1]) print align_data[0] print align_data[1] res.append(embed) else: print "no hits" return res
def testIssue268(self): from rdkit import RDLogger #RDLogger.EnableLog('rdApp.debug') featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'Issue268.fdef')) m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol')) m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol')) with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: pcop = cPickle.load(inF, encoding='latin1') #pcop._boundsMat=numpy.array(pcop._boundsMat) #pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) #cPickle.dump(pcop,file(os.path.join(self.dataDir, # 'Issue268_Pcop.new.pkl'),'wb+')) match, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures()) match, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures()) b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) from rdkit import DistanceGeometry as DG self.assertTrue(DG.DoTriangleSmoothing(b1)) self.assertTrue(DG.DoTriangleSmoothing(b2)) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len(EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len(EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4)
def check_mol( mols, FeatFact, pcophore ): # define FeatFact To Do matched_mols = [] for mol in mols: match, mList = EmbedLib.MatchPharmacophoreToMol( mol, FeatFact, pcophore ) if match: matched_mols.append( mol ) num_match = len( mList ) for i in range( num_match ): num_feature = len( mList[i] ) for j in range( num_feature ): print mList[i][j].GetAtomIds() print len( matched_mols ) return matched_mols
def rd_embed(self): """ This portion of the script is literally taken from rmgpy but hacked to work without defining a geometry object Embed the RDKit molecule and create the crude molecule file. """ numConfAttempts = 10000 if (self.bm is None) or (self.atom_match is None): AllChem.EmbedMultipleConfs(self.rdkit_molecule, numConfAttempts, randomSeed=1) self.rdkit_molecule, minEid = self.optimize_rdkit_molecule() else: """ Embed the molecule according to the bounds matrix. Built to handle possible failures of some of the embedding attempts. """ self.rdkit_molecule.RemoveAllConformers() for i in range(0, numConfAttempts): try: EmbedLib.EmbedMol(self.rdkit_molecule, self.bm, atomMatch=self.atom_match) break except ValueError: logging.info( "RDKit failed to embed on attempt {0} of {1}".format( i + 1, numConfAttempts)) except RuntimeError: logging.info("RDKit failed to embed.") else: logging.error("RDKit failed all attempts to embed") return None, None """ RDKit currently embeds the conformers and sets the id as 0, so even though multiple conformers have been generated, only 1 can be called. Below the id's are resolved. """ for i in range(len(self.rdkit_molecule.GetConformers())): self.rdkit_molecule.GetConformers()[i].SetId(i) self.rdkit_molecule, minEid = self.optimize_rdkit_molecule() return self.rdkit_molecule, minEid
def _align_molecule(mol, pharmacophore, matches, featFactory, sort=False): """ Align a molecule to a given pharmacophore. Uses rdkit alignment algorithm Parameters ---------- mol : rdkit.Chem.Mol Molecule to align. matches : list If a moleculed is matched to the pharmacophore it will be appended to this list. pharmacophore : rdkit.Chem.Pharm3D.Pharmacophore An rdkit pharmacophore featFactory : rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory The feature factory. sort : bool, default=False Whether to sort the list with the matches """ bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) # Check if the molecule features can match with the pharmacophore. can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pharmacophore) # all_matches is a list of tuples where each tuple contains the chemical features if can_match: # Match the molecule to the pharmacophore without aligning it failed, bounds_matrix_matched, matched_mols, match_details = EmbedLib.MatchPharmacophore(all_matches, bounds_matrix, pharmacophore, useDownsampling=True) if failed: return else: return atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) # Embed molecule onto the pharmacophore # embeddings is a list of molecules with a single conformer b_matrix, embeddings, num_fail = EmbedLib.EmbedPharmacophore(mol_H, atom_match, pharmacophore, count=10) except: return # Align embeddings to the pharmacophore SSDs = transform_embeddings(pharmacophore, embeddings, atom_match) if len(SSDs) == 0: return best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] try: mol_id = mol.GetProp("_Name") except: mol_id = None matched_mol = Match(SSDs[best_fit_index], mol_id, embeddings[best_fit_index]) if sort: # Append to list in ordered manner try: # Case when a molecule is repeated. It will throw an error since bisect # cannot compare molecules. bisect.insort(matches, matched_mol) except: return else: matches.append(matched_mol)