def test5EmbedFail(self): arr = np.array([[0, 1.0, 5.0], [1.0, 0, 1.0], [3.0, 1.0, 0]], np.float) self.assertRaises(ValueError, lambda : DG.EmbedBoundsMatrix(arr)) #DG.EmbedBoundsMatrix(arr,randomizeOnFailure=0,randomSeed=1) DG.EmbedBoundsMatrix(arr, randomizeOnFailure=1)
def test5EmbedFail(self): arr = Numeric.array([[0,1.0,5.0], [1.0,0,1.0], [3.0,1.0,0]],Numeric.Float) self.failUnlessRaises(ValueError,lambda : DG.EmbedBoundsMatrix(arr)) #DG.EmbedBoundsMatrix(arr,randomizeOnFailure=0,randomSeed=1) DG.EmbedBoundsMatrix(arr,randomizeOnFailure=1);
def test4Embed(self): arr = np.array([[0, 1.0, 5.0], [1.0, 0, 1.0], [0.0, 1.0, 0]], np.float) self.assertTrue(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr, randomSeed=100) v1 = coords[0] - coords[1] v2 = coords[1] - coords[2] d1 = np.dot(v1, v1) self.assertTrue(feq(d1, 1.0, 0.001)) d2 = np.dot(v2, v2) self.assertTrue(feq(d2, 1.0, 0.001))
def test4Embed(self): arr = Numeric.array([[0,1.0,5.0], [1.0,0,1.0], [0.0,1.0,0]],Numeric.Float) self.failUnless(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr,randomSeed=100); v1 = coords[0]-coords[1] v2 = coords[1]-coords[2] d1 = Numeric.dot(v1,v1) self.failUnless(feq(d1,1.0, 0.001)); d2 = Numeric.dot(v2,v2) self.failUnless(feq(d2,1.0, 0.001));
def testProvidingBoundsMatrix(self): m1 = Chem.MolFromSmiles("C1CCC1C") bm1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) bm1[0, 3] = 1.21 bm1[3, 0] = 1.20 bm1[2, 3] = 1.21 bm1[3, 2] = 1.20 bm1[4, 3] = 1.21 bm1[3, 4] = 1.20 DG.DoTriangleSmoothing(bm1) ps = rdDistGeom.EmbedParameters() ps.useRandomCoords = True ps.SetBoundsMat(bm1) ps.randomSeed = 0xf00d self.assertEqual(rdDistGeom.EmbedMolecule(m1, ps), 0) conf = m1.GetConformer() self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(0)).Length(), 1.2, delta=0.05) self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(2)).Length(), 1.2, delta=0.05) self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(4)).Length(), 1.2, delta=0.05)
def testScaleBoundsMatForce(self): """ for pentane, set a target distance for the 1-5 distance, and generate conformers with changing weights for (all) the atom pair distance restraints, the conformer with the stronger weight for the atom pairs will always have a 1-5 distance closer to the target value than that with the weaker weight. """ target = 4 for i in range(5): ps = rdDistGeom.EmbedParameters() ps.randomSeed = i ps.useBasicKnowledge = True ps.useRandomCoords = False m1 = Chem.MolFromSmiles("CCCCC") bm1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) bm1[0,4] = target bm1[4,0] = target DG.DoTriangleSmoothing(bm1) ps.boundsMatForceScaling = 0.1 ps.SetBoundsMat(bm1) self.assertEqual(rdDistGeom.EmbedMolecule(m1,ps),0) m2 = Chem.MolFromSmiles("CCCCC") ps = rdDistGeom.EmbedParameters() ps.randomSeed = i ps.useBasicKnowledge = True ps.useRandomCoords = False ps.boundsMatForceScaling = 10 ps.SetBoundsMat(bm1) self.assertEqual(rdDistGeom.EmbedMolecule(m2,ps),0) conf1 = m1.GetConformer() conf2 = m2.GetConformer() self.assertTrue(abs((conf2.GetAtomPosition(4)-conf2.GetAtomPosition(0)).Length() - target) < abs((conf1.GetAtomPosition(4)-conf1.GetAtomPosition(0)).Length() - target))
def AddExcludedVolumes(bm, excludedVolumes, smoothIt=True): """ Adds a set of excluded volumes to the bounds matrix and returns the new matrix excludedVolumes is a list of ExcludedVolume objects >>> boundsMat = numpy.array([[0.0, 2.0, 2.0],[1.0, 0.0, 2.0],[1.0, 1.0, 0.0]]) >>> ev1 = ExcludedVolume.ExcludedVolume(([(0, ), 0.5, 1.0], ), exclusionDist=1.5) >>> bm = AddExcludedVolumes(boundsMat, (ev1, )) the results matrix is one bigger: >>> bm.shape == (4, 4) True and the original bounds mat is not altered: >>> boundsMat.shape == (3, 3) True >>> print(', '.join([f'{x:.3f}' for x in bm[-1]])) 0.500, 1.500, 1.500, 0.000 >>> print(', '.join([f'{x:.3f}' for x in bm[:,-1]])) 1.000, 3.000, 3.000, 0.000 """ oDim = bm.shape[0] dim = oDim + len(excludedVolumes) res = numpy.zeros((dim, dim), dtype=numpy.float64) res[:oDim, :oDim] = bm for i, vol in enumerate(excludedVolumes): bmIdx = oDim + i vol.index = bmIdx # set values to all the atoms: res[bmIdx, :bmIdx] = vol.exclusionDist res[:bmIdx, bmIdx] = 1000.0 # set values to our defining features: for indices, minV, maxV in vol.featInfo: for index in indices: try: res[bmIdx, index] = minV res[index, bmIdx] = maxV except IndexError: logger.error( f'BAD INDEX: res[{bmIdx},{index}], shape is {str(res.shape)}' ) raise IndexError # set values to other excluded volumes: for j in range(bmIdx + 1, dim): res[bmIdx, j:dim] = 0.0 res[j:dim, bmIdx] = 1000.0 if smoothIt: DG.DoTriangleSmoothing(res) return res
def test3SmoothPass(self): arr = np.array([[0, 1.1, 5.0], [0.9, 0, 1.1], [0.0, 0.9, 0]], np.float) self.assertTrue(DG.DoTriangleSmoothing(arr)) self.assertTrue(feq(arr[0, 2], 2.2)) self.assertTrue(feq(arr[2, 0], 0.0)) self.assertTrue(feq(arr[0, 1], 1.1)) self.assertTrue(feq(arr[1, 0], 0.9)) self.assertTrue(feq(arr[1, 2], 1.1))
def get_rdkit_mol(self, rmg_molecule=None, reaction_family="H_Abstraction", distance_data=None): """ A method to create an rdkit geometry... slightly different than that of the conformer method returns both the rdkit_molecule and the bm """ if not rmg_molecule: rmg_molecule = self.rmg_molecule rdkit_molecule = Chem.RWMol( Conformer().get_rdkit_mol(rmg_molecule=rmg_molecule)) labels, atom_match = self.get_labels(rmg_molecule, reaction_family) for i, atom in enumerate(rmg_molecule.atoms): assert atom.number == rdkit_molecule.GetAtoms()[i].GetAtomicNum() if len(labels) == 3: rd_copy = rdkit_molecule.__copy__() lbl1, lbl2, lbl3 = labels if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2): rd_copy.AddBond(lbl1, lbl2, order=rdkit.Chem.rdchem.BondType.SINGLE) else: rd_copy.AddBond(lbl2, lbl3, order=rdkit.Chem.rdchem.BondType.SINGLE) self._pseudo_geometry = rd_copy logging.info("Initially embedded molecule") bm = None if distance_data: logging.info("Getting bounds matrix") bm = self.get_bounds_matrix(rdkit_molecule=rdkit_molecule) if len(labels) > 0: logging.info("Editing bounds matrix") bm = self.edit_matrix(rmg_molecule, bm, labels, distance_data) logging.info("Performing triangle smoothing on bounds matrix.") DistanceGeometry.DoTriangleSmoothing(bm) logging.info("Now attempting to embed using edited bounds matrix.") rdkit_molecule = self.rd_embed(rdkit_molecule, 10000, bm=bm, match=atom_match)[0] return rdkit_molecule, bm
def EmbedMol(mol, bm, atomMatch=None, weight=2.0, randomSeed=-1, excludedVolumes=None): """ Generates an embedding for a molecule based on a bounds matrix and adds a conformer (id 0) to the molecule if the optional argument atomMatch is provided, it will be used to provide supplemental weights for the embedding routine (used in the optimization phase to ensure that the resulting geometry really does satisfy the pharmacophore). if the excludedVolumes is provided, it should be a sequence of ExcludedVolume objects >>> m = Chem.MolFromSmiles('c1ccccc1C') >>> bounds = MolDG.GetMoleculeBoundsMatrix(m) >>> bounds.shape == (7, 7) True >>> m.GetNumConformers() 0 >>> EmbedMol(m,bounds,randomSeed=23) >>> m.GetNumConformers() 1 """ nAts = mol.GetNumAtoms() weights = [] if (atomMatch): for i in range(len(atomMatch)): for j in range(i + 1, len(atomMatch)): weights.append((i, j, weight)) if (excludedVolumes): for vol in excludedVolumes: idx = vol.index # excluded volumes affect every other atom: for i in range(nAts): weights.append((i, idx, weight)) coords = DG.EmbedBoundsMatrix(bm, weights=weights, numZeroFail=1, randomSeed=randomSeed) #for row in coords: # print(', '.join(['%.2f'%x for x in row])) conf = Chem.Conformer(nAts) conf.SetId(0) for i in range(nAts): conf.SetAtomPosition(i, list(coords[i])) if excludedVolumes: for vol in excludedVolumes: vol.pos = numpy.array(coords[vol.index]) #print(' % 7.4f % 7.4f % 7.4f Ar 0 0 0 0 0 0 0 0 0 0 0 0'%tuple(coords[-1]), file=sys.stderr) mol.AddConformer(conf)
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore= Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0,1,2.251) pcophore.setUpperBound(0,1,2.451) pcophore.setUpperBound2D(0,1,3) pcophore.setLowerBound(0,2,4.970) pcophore.setUpperBound(0,2,5.170) pcophore.setUpperBound2D(0,2,6) pcophore.setLowerBound(1,2,2.681) pcophore.setUpperBound(1,2,2.881) pcophore.setUpperBound2D(1,2,6) inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory, pcophore) if canMatch: nMatches+=1 r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore, useDownsampling=True,use2DLimits=True, mol=mol) failed,bm,match,details = r if not failed: nHits+=1 self.assertEqual(nDone,100) self.assertEqual(nMatches,93) #print 'nhits:',nHits self.assertEqual(nHits,67)
def test3Embed(self): testResults = { 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, _ = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, _, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if name in testResults: stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.assertEqual(len(tgt), len(stats)) print(name) print(','.join(['%.2f' % x for x in stats])) # we'll use different tolerances for the different values: self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.assertEqual(nDone, 100) # print 'nHits:',nHits self.assertEqual(nHits, 50)
def test3SmoothPass(self): arr = Numeric.array([[0,1.1,5.0], [0.9,0,1.1], [0.0,0.9,0]],Numeric.Float) self.failUnless(DG.DoTriangleSmoothing(arr)) self.failUnless(feq(arr[0,2],2.2)) self.failUnless(feq(arr[2,0],0.0)) self.failUnless(feq(arr[0,1],1.1)) self.failUnless(feq(arr[1,0],0.9)) self.failUnless(feq(arr[1,2],1.1))
def GetAllPharmacophoreMatches(matches, bounds, pcophore, useDownsampling=0, progressCallback=None, use2DLimits=False, mol=None, verbose=False): res = [] nDone = 0 for match in CombiEnum(matches): atomMatch = ChemicalFeatures.GetAtomMatch(match) if atomMatch and use2DLimits and mol: pass2D = Check2DBounds(atomMatch, mol, pcophore) if verbose: print('..', atomMatch) print(' ..Pass2d:', pass2D) else: pass2D = True if atomMatch and pass2D and \ CoarseScreenPharmacophore(atomMatch,bounds,pcophore,verbose=verbose): if verbose: print(' ..CoarseScreen: Pass') bm = bounds.copy() if verbose: print('pre update:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore) sz = bm.shape[0] if verbose: print('pre downsample:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) if useDownsampling: indices = [] for entry in atomMatch: indices += list(entry) bm = DownsampleBoundsMatrix(bm, indices) if verbose: print('post downsample:') for row in bm: print(' ', ' '.join(['% 4.2f' % x for x in row])) if DG.DoTriangleSmoothing(bm): res.append(match) elif verbose: print('cannot smooth') nDone += 1 if progressCallback: progressCallback(nDone) return res
def test3Embed(self): testResults = { 'mol_197': (181.30, 30.21, 92.03, 8.73, 91.60, 8.33, 74.68, 1.35, 0.00), 'mol_223': (211.07, 4.22, 114.14, 1.57, 114.08, 1.58, 68.22, 0.48, 0.00), 'mol_269': (162.28, 2.03, 74.50, 1.00, 73.45, 0.96, 60.18, 0.91, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, boundsMat = cPickle.load(inF) except: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, bm, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if testResults.has_key(name): stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.failUnlessEqual(len(tgt), len(stats)) print name print ','.join(['%.2f' % x for x in stats]) # we'll use different tolerances for the different values: self.failUnless(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.failUnless(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.failUnlessEqual(nDone, 100) #print 'nHits:',nHits self.failUnlessEqual(nHits, 50)
def testIssue268(self): from rdkit import RDLogger #RDLogger.EnableLog('rdApp.debug') featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'Issue268.fdef')) m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol')) m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol')) with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'),'rb') as inF: pcop = cPickle.load(inF, encoding='latin1') #pcop._boundsMat=numpy.array(pcop._boundsMat) #pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) #cPickle.dump(pcop,file(os.path.join(self.dataDir, # 'Issue268_Pcop.new.pkl'),'wb+')) match,mList1 = EmbedLib.MatchFeatsToMol(m1,featFactory,pcop.getFeatures()) match,mList2 = EmbedLib.MatchFeatsToMol(m2,featFactory,pcop.getFeatures()) b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop, mol=m1,use2DLimits=True)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop, mol=m2,use2DLimits=True)[2]),4) from rdkit import DistanceGeometry as DG self.assertTrue(DG.DoTriangleSmoothing(b1)) self.assertTrue(DG.DoTriangleSmoothing(b2)) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop, mol=m1,use2DLimits=True)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop, mol=m2,use2DLimits=True)[2]),4)
def MatchPharmacophore(matches, bounds, pcophore, useDownsampling=False, use2DLimits=False, mol=None, excludedVolumes=None, useDirs=False): """ if use2DLimits is set, the molecule must also be provided and topological distances will also be used to filter out matches """ for match, atomMatch in ConstrainedEnum(matches, mol, pcophore, bounds, use2DLimits=use2DLimits): bm = bounds.copy() bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol) if excludedVolumes: localEvs = [] for eV in excludedVolumes: featInfo = [] for i, entry in enumerate(atomMatch): info = list(eV.featInfo[i]) info[0] = entry featInfo.append(info) localEvs.append( ExcludedVolume.ExcludedVolume(featInfo, eV.index, eV.exclusionDist)) bm = AddExcludedVolumes(bm, localEvs, smoothIt=False) sz = bm.shape[0] if useDownsampling: indices = [] for entry in atomMatch: indices.extend(entry) if excludedVolumes: for vol in localEvs: indices.append(vol.index) bm = DownsampleBoundsMatrix(bm, indices) if DG.DoTriangleSmoothing(bm): return 0, bm, match, (sz, bm.shape[0]) return 1, None, None, None
def embed_bounds_matrix(mol: Mol, bounds_matrix: np.ndarray, seed: int = 42) -> int: DistanceGeometry.DoTriangleSmoothing(bounds_matrix) ps = rdDistGeom.EmbedParameters() ps.numThreads = 0 # max number of threads supported by the system will be used ps.useRandomCoords = True # recommended for larger molecules ps.clearConfs = False ps.randomSeed = seed ps.SetBoundsMat(bounds_matrix) return rdDistGeom.EmbedMolecule(mol, ps)
def get_rdkit_mol(self): """ A method to create an rdkit geometry... slightly different than that of the conformer method returns both the rdkit_molecule and the bm """ self.rdkit_molecule = Conformer( rmg_molecule=self.rmg_molecule).get_rdkit_mol() self.get_labels() for i, atom in enumerate(self.rmg_molecule.atoms): assert atom.number == self.rdkit_molecule.GetAtoms( )[i].GetAtomicNum() if len(self.labels) == 3: rd_copy = Chem.RWMol(self.rdkit_molecule.__copy__()) lbl1, lbl2, lbl3 = self.labels if not rd_copy.GetBondBetweenAtoms(lbl1, lbl2): rd_copy.AddBond(lbl1, lbl2, order=rdkit.Chem.rdchem.BondType.SINGLE) elif not rd_copy.GetBondBetweenAtoms(lbl2, lbl3): rd_copy.AddBond(lbl2, lbl3, order=rdkit.Chem.rdchem.BondType.SINGLE) self._pseudo_geometry = rd_copy logging.info("Initially embedded molecule") self.bm = None if self.distance_data: logging.info("Getting bounds matrix") self.bm = self.get_bounds_matrix() if len(self.labels) > 0: logging.info("Editing bounds matrix") self.bm = self.edit_matrix() logging.info("Performing triangle smoothing on bounds matrix.") DistanceGeometry.DoTriangleSmoothing(self.bm) logging.info("Now attempting to embed using edited bounds matrix.") self.rd_embed() return self.rdkit_molecule
def test6EmbedConstraints(self): arr = Numeric.array([[0.0,1.0,1.0], [1.0,0.0,1.0], [0.99,1.0,0.0]], Numeric.Float) self.failUnless(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr, randomSeed=100) v1 = coords[0]-coords[1] v2 = coords[1]-coords[2] d1 = Numeric.dot(v1,v1) self.failUnless(feq(d1,1.0,2e-3)); d2 = Numeric.dot(v2,v2) self.failUnless(feq(d2,1.0,2e-3)); arr = Numeric.array([[0.0,1.0,1.0,1.01], [1.0,0.0,1.0,1.0], [1.0,1.0,0.0,1.0], [0.99,1.0,1.0,0.0], ],Numeric.Float) self.failUnless(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr) v1 = coords[0]-coords[1] v2 = coords[1]-coords[2] d1 = Numeric.dot(v1,v1) self.failUnless(feq(d1,1.0,1e-3)); d2 = Numeric.dot(v2,v2) self.failUnless(feq(d2,1.0,1e-3)); return # this test is currently (rev:4769) passing on windows and # failing on linux. It's kind of dependent on fp precision, so # it's probably ok to ditch it. arr = Numeric.array([[0.0,1.0,1.0,1.0], [1.0,0.0,1.0,1.0], [1.0,1.0,0.0,1.0], [1.0,1.0,1.0,0.0], ],Numeric.Float) self.failUnless(DG.DoTriangleSmoothing(arr)) coords = DG.EmbedBoundsMatrix(arr,randomSeed=100) v1 = coords[0]-coords[1] v2 = coords[1]-coords[2] d1 = Numeric.dot(v1,v1) self.failUnless(feq(d1,1.0,1e-3)); d2 = Numeric.dot(v2,v2) self.failUnless(feq(d2,1.0,1e-3));
def testIssue268(self): featFactory = ChemicalFeatures.BuildFeatureFactory( os.path.join(self.dataDir, 'Issue268.fdef')) m1 = Chem.MolFromMolFile( os.path.join(self.dataDir, 'Issue268_Mol1.mol')) m2 = Chem.MolFromMolFile( os.path.join(self.dataDir, 'Issue268_Mol2.mol')) with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: pcop = pickle.load(inF, encoding='latin1') # pcop._boundsMat=numpy.array(pcop._boundsMat) # pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) # pickle.dump(pcop,file(os.path.join(self.dataDir, # 'Issue268_Pcop.new.pkl'),'wb+')) _, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures()) _, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures()) b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) self.assertTrue(DG.DoTriangleSmoothing(b1)) self.assertTrue(DG.DoTriangleSmoothing(b2)) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4)
def test2SmoothFail(self): arr = Numeric.array([[0,1.0,5.0], [1.0,0,1.0], [3.0,1.0,0]],Numeric.Float) self.failIf(DG.DoTriangleSmoothing(arr))
bm[p, q] = 0 bm[q, p] = 1000 #print("original:") #for i in range(len(bm)): # for j in range(i+1, len(bm)): # print("({}, {}) {} < x < {}".format(i, j, bm_org[j, i], bm_org[i, j])) for i in range(len(bm)): for j in range(i + 1, len(bm)): #print("({}, {}) {} < x < {}".format(i, j, bm[j, i], bm[i, j])) if bm[i, j] < bm[j, i]: print( " ***** Assertion failed !! (Before smoothing) *****" ) assert (bm[i, j] >= bm[j, i]) if DG.DoTriangleSmoothing(bm) == False: print("Smoothing failed") for i in range(len(bm)): for j in range(i + 1, len(bm)): #print("({}, {}) {} < x < {}".format(i, j, bm[j, i], bm[i, j])) if bm[i, j] < bm[j, i]: print( " ***** Assertion failed !! (After smoothing) *****") assert (bm[i, j] >= bm[j, i]) ps = rdDistGeom.EmbedParameters() ps.useRandomCoords = True ps.SetBoundsMat(bm) ps.randomSeed = 0xf00d try: rdDistGeom.EmbedMolecule(mol, ps)
def EmbedPharmacophore(mol, atomMatch, pcophore, randomSeed=-1, count=10, smoothFirst=True, silent=False, bounds=None, excludedVolumes=None, targetNumber=-1, useDirs=False): """ Generates one or more embeddings for a molecule that satisfy a pharmacophore atomMatch is a sequence of sequences containing atom indices for each of the pharmacophore's features. - count: is the maximum number of attempts to make a generating an embedding - smoothFirst: toggles triangle smoothing of the molecular bounds matix - bounds: if provided, should be the molecular bounds matrix. If this isn't provided, the matrix will be generated. - targetNumber: if this number is positive, it provides a maximum number of embeddings to generate (i.e. we'll have count attempts to generate targetNumber embeddings). returns: a 3 tuple: 1) the molecular bounds matrix adjusted for the pharmacophore 2) a list of embeddings (molecules with a single conformer) 3) the number of failed attempts at embedding >>> m = Chem.MolFromSmiles('OCCN') >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)), ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)), ... ] >>> pcophore=Pharmacophore.Pharmacophore(feats) >>> pcophore.setLowerBound(0,1, 2.5) >>> pcophore.setUpperBound(0,1, 3.5) >>> atomMatch = ((0,),(3,)) >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) >>> len(embeds) 10 >>> nFail 0 Set up a case that can't succeed: >>> pcophore=Pharmacophore.Pharmacophore(feats) >>> pcophore.setLowerBound(0,1, 2.0) >>> pcophore.setUpperBound(0,1, 2.1) >>> atomMatch = ((0,),(3,)) >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) >>> len(embeds) 0 >>> nFail 10 """ global _times if not hasattr(mol, '_chiralCenters'): mol._chiralCenters = Chem.FindMolChiralCenters(mol) if bounds is None: bounds = MolDG.GetMoleculeBoundsMatrix(mol) if smoothFirst: DG.DoTriangleSmoothing(bounds) bm = bounds.copy() #print '------------' #print 'initial' #for row in bm: # print ' ',' '.join(['% 4.2f'%x for x in row]) #print '------------' bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol) if excludedVolumes: bm = AddExcludedVolumes(bm, excludedVolumes, smoothIt=False) if not DG.DoTriangleSmoothing(bm): raise ValueError("could not smooth bounds matrix") #print '------------' #print 'post replace and smooth' #for row in bm: # print ' ',' '.join(['% 4.2f'%x for x in row]) #print '------------' if targetNumber <= 0: targetNumber = count nFailed = 0 res = [] for i in range(count): tmpM = bm[:, :] m2 = Chem.Mol(mol) t1 = time.time() try: if randomSeed <= 0: seed = i * 10 + 1 else: seed = i * 10 + randomSeed EmbedMol(m2, tmpM, atomMatch, randomSeed=seed, excludedVolumes=excludedVolumes) except ValueError: if not silent: logger.info('Embed failed') nFailed += 1 else: t2 = time.time() _times['embed'] = _times.get('embed', 0) + t2 - t1 keepIt = True for idx, stereo in mol._chiralCenters: if stereo in ('R', 'S'): vol = ComputeChiralVolume(m2, idx) if (stereo=='R' and vol>=0) or \ (stereo=='S' and vol<=0): keepIt = False break if keepIt: res.append(m2) else: logger.debug('Removed embedding due to chiral constraints.') if len(res) == targetNumber: break return bm, res, nFailed
def test2SmoothFail(self): arr = np.array([[0, 1.0, 5.0], [1.0, 0, 1.0], [3.0, 1.0, 0]], np.float) self.assertFalse(DG.DoTriangleSmoothing(arr))
processed.extend(match) for i, p in enumerate(match): for j, q in enumerate(match): if i == j: continue elif p < q: bm[p, q] = distMat[i, j] + 0.01 else: bm[p, q] = distMat[i, j] - 0.01 def ub(i, j): if i < j: return bm[i, j] else: return bm[j, i] def lb(i, j): if i < j: return bm[j, i] else: return bm[i, j] # assure bounds are nonerroneous for i in range(len(bm)): for j in range(len(bm)): assert lb(i, j) <= ub(i, j) assert lb(i, j) >= 0.0 DG.DoTriangleSmoothing(bm) for i in range(len(bm)): for j in range(len(bm)): assert lb(i, j) <= ub(i, j) assert lb(i, j) >= 0.0
def bound_matrix_from_ellipse(mol, angle, eccentricity=0.99, bond_scale_factor=1.0, update_scheme=modify_bound_matrix): """ Modifies the bounds matrix entries of those atoms lying on the largest ring of the input molecule. The modifications are based on distances constraints derived from fitting these atoms onto the circumference of an ellipse. This effectively reduces the allowed conformation space spanned by the cyclic molecule as defined by the bounds matrix. For detail see method section of https://pubs.acs.org/doi/10.1021/acs.jcim.0c00025 Parameters ----------- mol : rdkit.Chem.Mol angle : The angle at which the first ring atom makes with the horizantal. This param stipulates at which orientation the ring is squashed. eccentricity : float How squashed is the ellipse that the ring atoms are fitted onto. The default is 0.99. It is recommended to use a very high value(eccentricity ranges from 0 to 1 for ellipse). bond_scale_factor : float The circumference of the ellipse that undergoes the fitting is equal to the sum of all ring bond lengths. This param controls whether the circumference is scaled. For smaller ring systems, it can be good to scale up this param. The default is 1.0, i.e. no scaling. update_scheme: function Rule used to modify the initial bound matrix in accordance to the ellipse fit. The default function reduces both the lower and upper bound whenever ellipse fit gives smaller bounds. Return ------- bmat : numpy.matrix The updated bound matrix, which can be subsequently fed into RDKit's conformer generation workflow. """ bmat = AllChem.GetMoleculeBoundsMatrix(mol, useMacrocycle14config=True) ring_indices = get_largest_ring(mol) bond_length_list = get_ring_bond_length_list( bmat, ring_indices, scale_factor=bond_scale_factor) perimeter = sum(bond_length_list) a, b = calculate_ellipse_radii(guess=(perimeter / 2 / np.pi, perimeter / 2 / np.pi), eccentricity=0.99, perimeter=perimeter) #angstroms coord_2d = get_points_on_ellipse(a, b, len(ring_indices), bond_length_list, startAngle=angle, verbose=False) bound_mat = cdist(coord_2d, coord_2d) bmat = update_scheme(bmat, new_matrix=bound_mat, modify="upper", indices=ring_indices, verbose=False) DistanceGeometry.DoTriangleSmoothing(bmat) return bmat