def compareOrder(smi1, smi2, tol=1.0e-5): m1 = Chem.MolFromSmiles(smi1) m2 = Chem.MolFromSmiles(smi2) bm1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) bm2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) map = m1.GetSubstructMatch(m2) return compareMatrices(bm1, bm2, map, tol)
def testGithub1763(self): mol = Chem.MolFromSmiles('CCCCC') bm1 = rdDistGeom.GetMoleculeBoundsMatrix(mol) bm2 = rdDistGeom.GetMoleculeBoundsMatrix(mol, doTriangleSmoothing=False) print(bm1) print(bm2) self.assertTrue(bm1[0, 4] < bm2[0, 4])
def testProvidingBoundsMatrix(self): m1 = Chem.MolFromSmiles("C1CCC1C") bm1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) bm1[0, 3] = 1.21 bm1[3, 0] = 1.20 bm1[2, 3] = 1.21 bm1[3, 2] = 1.20 bm1[4, 3] = 1.21 bm1[3, 4] = 1.20 DG.DoTriangleSmoothing(bm1) ps = rdDistGeom.EmbedParameters() ps.useRandomCoords = True ps.SetBoundsMat(bm1) ps.randomSeed = 0xf00d self.assertEqual(rdDistGeom.EmbedMolecule(m1, ps), 0) conf = m1.GetConformer() self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(0)).Length(), 1.2, delta=0.05) self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(2)).Length(), 1.2, delta=0.05) self.assertAlmostEqual( (conf.GetAtomPosition(3) - conf.GetAtomPosition(4)).Length(), 1.2, delta=0.05)
def testScaleBoundsMatForce(self): """ for pentane, set a target distance for the 1-5 distance, and generate conformers with changing weights for (all) the atom pair distance restraints, the conformer with the stronger weight for the atom pairs will always have a 1-5 distance closer to the target value than that with the weaker weight. """ target = 4 for i in range(5): ps = rdDistGeom.EmbedParameters() ps.randomSeed = i ps.useBasicKnowledge = True ps.useRandomCoords = False m1 = Chem.MolFromSmiles("CCCCC") bm1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) bm1[0,4] = target bm1[4,0] = target DG.DoTriangleSmoothing(bm1) ps.boundsMatForceScaling = 0.1 ps.SetBoundsMat(bm1) self.assertEqual(rdDistGeom.EmbedMolecule(m1,ps),0) m2 = Chem.MolFromSmiles("CCCCC") ps = rdDistGeom.EmbedParameters() ps.randomSeed = i ps.useBasicKnowledge = True ps.useRandomCoords = False ps.boundsMatForceScaling = 10 ps.SetBoundsMat(bm1) self.assertEqual(rdDistGeom.EmbedMolecule(m2,ps),0) conf1 = m1.GetConformer() conf2 = m2.GetConformer() self.assertTrue(abs((conf2.GetAtomPosition(4)-conf2.GetAtomPosition(0)).Length() - target) < abs((conf1.GetAtomPosition(4)-conf1.GetAtomPosition(0)).Length() - target))
def check_mol( mol ): match, mList = EmbedLib.MatchPharmacophoreToMol( mol, feat_fact, pcophore) if match: res = [] num_match = len( mList ) for i in range( num_match ): num_feature = len( mList[i] ) for j in range( num_feature ): print mList[i][j].GetAtomIds(), mList[i][j].GetFamily() bounds = rdDistGeom.GetMoleculeBoundsMatrix( mol ) pList = EmbedLib.GetAllPharmacophoreMatches( mList, bounds, pcophore ) #pList = EmbedLib.MatchPharmacophore( mList, bounds, pcophore ) print pList #print raw_input("-----") num_match = len( pList ) print num_match phMatches = [] for i in range( num_match ): num_feature = len( pList[i] ) phMatch = [] for j in range( num_feature ): phMatch.append( pList[i][j].GetAtomIds() ) phMatches.append( phMatch ) for phMatch in phMatches: bm, embeds, nFail = EmbedLib.EmbedPharmacophore( mol, phMatch, pcophore, count=20, silent=1 ) print "-----> embeds num:", len( embeds ) for embed in embeds: AllChem.UFFOPtimizeMolecule( embed ) align_data = rdAliginment.GetAlignmetTransform( bm, bounds ) AllChem.TransformMol( embed, align_data[1] ) res.append( embed ) return res
def _align_molecules(self, molecules: List[Chem.Mol]) -> None: """ Align a list of molecules to a given pharmacophore. Parameters ---------- molecules : list of rdkit.Chem.Mol List of molecules to align. """ self.n_molecules += len(molecules) rdkit_pharmacophore, radii = self.pharmacophore.to_rdkit() apply_radii_to_bounds(radii, rdkit_pharmacophore) fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') featFactory = ChemicalFeatures.BuildFeatureFactory(fdef) MolScore = namedtuple("MolScore", ["score", "id", "mol"]) for mol in tqdm(molecules): bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) can_match, all_matches = EmbedLib.MatchPharmacophoreToMol( mol, featFactory, rdkit_pharmacophore) if can_match: failed, _, matched_mols, _ = EmbedLib.MatchPharmacophore( all_matches, bounds_matrix, rdkit_pharmacophore, useDownsampling=True) if failed: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue else: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) _, embeddings, _ = EmbedLib.EmbedPharmacophore( mol_H, atom_match, rdkit_pharmacophore, count=10) except: continue SSDs = transform_embeddings(rdkit_pharmacophore, embeddings, atom_match) if len(SSDs) == 0: matched_mol = MolScore(0.0, mol.GetProp("_Name"), mol) self.molecules.append(matched_mol) continue best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] score = 1 / SSDs[best_fit_index] matched_mol = MolScore(score, mol.GetProp("_Name"), embeddings[best_fit_index]) self.molecules.append(matched_mol)
def test2Utils(self): mol = Chem.MolFromSmiles('CC') bm = rdDistGeom.GetMoleculeBoundsMatrix(mol) self.assertTrue(bm[1, 0] > 0) self.assertTrue(bm[0, 1] > 0) self.assertTrue(bm[0, 1] >= bm[1, 0]) self.assertTrue(bm[1, 0] < 1.510) self.assertTrue(bm[0, 1] > 1.510)
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore= Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0,1,2.251) pcophore.setUpperBound(0,1,2.451) pcophore.setUpperBound2D(0,1,3) pcophore.setLowerBound(0,2,4.970) pcophore.setUpperBound(0,2,5.170) pcophore.setUpperBound2D(0,2,6) pcophore.setLowerBound(1,2,2.681) pcophore.setUpperBound(1,2,2.881) pcophore.setUpperBound2D(1,2,6) inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory, pcophore) if canMatch: nMatches+=1 r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore, useDownsampling=True,use2DLimits=True, mol=mol) failed,bm,match,details = r if not failed: nHits+=1 self.assertEqual(nDone,100) self.assertEqual(nMatches,93) #print 'nhits:',nHits self.assertEqual(nHits,67)
def test3Embed(self): testResults = { 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, _ = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, _, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if name in testResults: stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.assertEqual(len(tgt), len(stats)) print(name) print(','.join(['%.2f' % x for x in stats])) # we'll use different tolerances for the different values: self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.assertEqual(nDone, 100) # print 'nHits:',nHits self.assertEqual(nHits, 50)
def test3Embed(self): testResults = { 'mol_197': (181.30, 30.21, 92.03, 8.73, 91.60, 8.33, 74.68, 1.35, 0.00), 'mol_223': (211.07, 4.22, 114.14, 1.57, 114.08, 1.58, 68.22, 0.48, 0.00), 'mol_269': (162.28, 2.03, 74.50, 1.00, 73.45, 0.96, 60.18, 0.91, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, boundsMat = cPickle.load(inF) except: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, bm, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if testResults.has_key(name): stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.failUnlessEqual(len(tgt), len(stats)) print name print ','.join(['%.2f' % x for x in stats]) # we'll use different tolerances for the different values: self.failUnless(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.failUnless(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.failUnlessEqual(nDone, 100) #print 'nHits:',nHits self.failUnlessEqual(nHits, 50)
def testIssue268(self): from rdkit import RDLogger #RDLogger.EnableLog('rdApp.debug') featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'Issue268.fdef')) m1 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol1.mol')) m2 = Chem.MolFromMolFile(os.path.join(self.dataDir, 'Issue268_Mol2.mol')) with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'),'rb') as inF: pcop = cPickle.load(inF, encoding='latin1') #pcop._boundsMat=numpy.array(pcop._boundsMat) #pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) #cPickle.dump(pcop,file(os.path.join(self.dataDir, # 'Issue268_Pcop.new.pkl'),'wb+')) match,mList1 = EmbedLib.MatchFeatsToMol(m1,featFactory,pcop.getFeatures()) match,mList2 = EmbedLib.MatchFeatsToMol(m2,featFactory,pcop.getFeatures()) b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop, mol=m1,use2DLimits=True)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop, mol=m2,use2DLimits=True)[2]),4) from rdkit import DistanceGeometry as DG self.assertTrue(DG.DoTriangleSmoothing(b1)) self.assertTrue(DG.DoTriangleSmoothing(b2)) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1,b1,pcop, mol=m1,use2DLimits=True)[2]),4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2,b2,pcop, mol=m2,use2DLimits=True)[2]),4)
def get_3DDistanceMatrix(trainFoldPath): with open(trainFoldPath, 'r') as f: trainCpi_list = f.read().strip().split('\n') trainDataSet = [cpi.strip().split()[0] for cpi in trainCpi_list] smilesDataset = [] for smile in trainDataSet: mol = Chem.MolFromSmiles(smile) bm = molDG.GetMoleculeBoundsMatrix(mol) print(len(bm)) # mol2 = Chem.AddHs(mol) # 加氢 AllChem.EmbedMolecule(mol, randomSeed=1) #通过距离几何算法计算3D坐标 dm = AllChem.Get3DDistanceMatrix(mol) dm_tensor = torch.FloatTensor([sl for sl in dm])
def check_mol(mol): res = [] mol.RemoveAllConformers() match, mList = EmbedLib.MatchPharmacophoreToMol(mol, feat_fact, pcophore) #mList = [ m for m in Set( mList ) ] if match: num_match = len(mList) for i in range(num_match): num_feature = len(mList[i]) for j in range(num_feature): print mList[i][j].GetAtomIds(), mList[i][j].GetFamily() bounds = rdDistGeom.GetMoleculeBoundsMatrix(mol) pList = EmbedLib.GetAllPharmacophoreMatches(mList, bounds, pcophore) num_match = len(pList) phMatches = [] for i in range(num_match): num_feature = len(pList[i]) phMatch = [] for j in range(num_feature): phMatch.append(pList[i][j].GetAtomIds()) phMatches.append(phMatch) for phMatch in phMatches: bm, embeds, nFail = EmbedLib.EmbedPharmacophore(mol, phMatch, pcophore, count=5, silent=1) print "-----> embeds num:", len(embeds) for embed in embeds: AllChem.UFFOptimizeMolecule(embed) feats = feat_fact.GetFeaturesForMol(embed) feats_dict = GetFeatsPerAtoms(feats) match_feats = [feats_dict[atomid] for atomid in phMatch] pro_mat = [list(feat.GetPos()) for feat in match_feats] align_data = rdAlignment.GetAlignmentTransform( ref_mat, pro_mat, maxIterations=200) AllChem.TransformMol(embed, align_data[1]) print align_data[0] print align_data[1] res.append(embed) else: print "no hits" return res
def get_bounds_matrix(self, rmg_molecule=None, rdkit_molecule=None): """ A method to obtain the bounds matrix """ if not rmg_molecule: try: rmg_molecule = self.rmg_molecule except: return None if not rdkit_molecule: try: rdkit_molecule = self.get_rdkit_mol(rmg_molecule=rmg_molecule) except: return None bm = rdDistGeom.GetMoleculeBoundsMatrix(rdkit_molecule) return bm
def get_bounds_matrix(self, rmg_molecule=None, rdkit_molecule=None): """ A method to obtain the bounds matrix """ if not rmg_molecule: try: rmg_molecule = self.rmg_molecule except BaseException: return None if not rdkit_molecule: try: rdkit_molecule = self.get_rdkit_mol(rmg_molecule=rmg_molecule) except BaseException: return None logging.info("before") bm = rdDistGeom.GetMoleculeBoundsMatrix(rdkit_molecule) logging.info("Got bounds matrix") return bm
def _align_molecule(mol, pharmacophore, matches, featFactory, sort=False): """ Align a molecule to a given pharmacophore. Uses rdkit alignment algorithm Parameters ---------- mol : rdkit.Chem.Mol Molecule to align. matches : list If a moleculed is matched to the pharmacophore it will be appended to this list. pharmacophore : rdkit.Chem.Pharm3D.Pharmacophore An rdkit pharmacophore featFactory : rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory The feature factory. sort : bool, default=False Whether to sort the list with the matches """ bounds_matrix = rdDistGeom.GetMoleculeBoundsMatrix(mol) # Check if the molecule features can match with the pharmacophore. can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pharmacophore) # all_matches is a list of tuples where each tuple contains the chemical features if can_match: # Match the molecule to the pharmacophore without aligning it failed, bounds_matrix_matched, matched_mols, match_details = EmbedLib.MatchPharmacophore(all_matches, bounds_matrix, pharmacophore, useDownsampling=True) if failed: return else: return atom_match = [list(x.GetAtomIds()) for x in matched_mols] try: mol_H = Chem.AddHs(mol) # Embed molecule onto the pharmacophore # embeddings is a list of molecules with a single conformer b_matrix, embeddings, num_fail = EmbedLib.EmbedPharmacophore(mol_H, atom_match, pharmacophore, count=10) except: return # Align embeddings to the pharmacophore SSDs = transform_embeddings(pharmacophore, embeddings, atom_match) if len(SSDs) == 0: return best_fit_index = min(enumerate(SSDs), key=itemgetter(1))[0] try: mol_id = mol.GetProp("_Name") except: mol_id = None matched_mol = Match(SSDs[best_fit_index], mol_id, embeddings[best_fit_index]) if sort: # Append to list in ordered manner try: # Case when a molecule is repeated. It will throw an error since bisect # cannot compare molecules. bisect.insort(matches, matched_mol) except: return else: matches.append(matched_mol)
def get_bounds_matrix(self): """ A method to obtain the bounds matrix """ self.bm = rdDistGeom.GetMoleculeBoundsMatrix(self.rdkit_molecule) return self.bm
def EmbedPharmacophore(mol, atomMatch, pcophore, randomSeed=-1, count=10, smoothFirst=True, silent=False, bounds=None, excludedVolumes=None, targetNumber=-1, useDirs=False): """ Generates one or more embeddings for a molecule that satisfy a pharmacophore atomMatch is a sequence of sequences containing atom indices for each of the pharmacophore's features. - count: is the maximum number of attempts to make a generating an embedding - smoothFirst: toggles triangle smoothing of the molecular bounds matix - bounds: if provided, should be the molecular bounds matrix. If this isn't provided, the matrix will be generated. - targetNumber: if this number is positive, it provides a maximum number of embeddings to generate (i.e. we'll have count attempts to generate targetNumber embeddings). returns: a 3 tuple: 1) the molecular bounds matrix adjusted for the pharmacophore 2) a list of embeddings (molecules with a single conformer) 3) the number of failed attempts at embedding >>> m = Chem.MolFromSmiles('OCCN') >>> feats = [ChemicalFeatures.FreeChemicalFeature('HBondAcceptor', 'HAcceptor1', Geometry.Point3D(0.0, 0.0, 0.0)), ... ChemicalFeatures.FreeChemicalFeature('HBondDonor', 'HDonor1', Geometry.Point3D(2.65, 0.0, 0.0)), ... ] >>> pcophore=Pharmacophore.Pharmacophore(feats) >>> pcophore.setLowerBound(0,1, 2.5) >>> pcophore.setUpperBound(0,1, 3.5) >>> atomMatch = ((0,),(3,)) >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) >>> len(embeds) 10 >>> nFail 0 Set up a case that can't succeed: >>> pcophore=Pharmacophore.Pharmacophore(feats) >>> pcophore.setLowerBound(0,1, 2.0) >>> pcophore.setUpperBound(0,1, 2.1) >>> atomMatch = ((0,),(3,)) >>> bm,embeds,nFail = EmbedPharmacophore(m,atomMatch,pcophore,randomSeed=23,silent=1) >>> len(embeds) 0 >>> nFail 10 """ global _times if not hasattr(mol, '_chiralCenters'): mol._chiralCenters = Chem.FindMolChiralCenters(mol) if bounds is None: bounds = MolDG.GetMoleculeBoundsMatrix(mol) if smoothFirst: DG.DoTriangleSmoothing(bounds) bm = bounds.copy() #print '------------' #print 'initial' #for row in bm: # print ' ',' '.join(['% 4.2f'%x for x in row]) #print '------------' bm = UpdatePharmacophoreBounds(bm, atomMatch, pcophore, useDirs=useDirs, mol=mol) if excludedVolumes: bm = AddExcludedVolumes(bm, excludedVolumes, smoothIt=False) if not DG.DoTriangleSmoothing(bm): raise ValueError("could not smooth bounds matrix") #print '------------' #print 'post replace and smooth' #for row in bm: # print ' ',' '.join(['% 4.2f'%x for x in row]) #print '------------' if targetNumber <= 0: targetNumber = count nFailed = 0 res = [] for i in range(count): tmpM = bm[:, :] m2 = Chem.Mol(mol) t1 = time.time() try: if randomSeed <= 0: seed = i * 10 + 1 else: seed = i * 10 + randomSeed EmbedMol(m2, tmpM, atomMatch, randomSeed=seed, excludedVolumes=excludedVolumes) except ValueError: if not silent: logger.info('Embed failed') nFailed += 1 else: t2 = time.time() _times['embed'] = _times.get('embed', 0) + t2 - t1 keepIt = True for idx, stereo in mol._chiralCenters: if stereo in ('R', 'S'): vol = ComputeChiralVolume(m2, idx) if (stereo=='R' and vol>=0) or \ (stereo=='S' and vol<=0): keepIt = False break if keepIt: res.append(m2) else: logger.debug('Removed embedding due to chiral constraints.') if len(res) == targetNumber: break return bm, res, nFailed
def testIssue268(self): featFactory = ChemicalFeatures.BuildFeatureFactory( os.path.join(self.dataDir, 'Issue268.fdef')) m1 = Chem.MolFromMolFile( os.path.join(self.dataDir, 'Issue268_Mol1.mol')) m2 = Chem.MolFromMolFile( os.path.join(self.dataDir, 'Issue268_Mol2.mol')) with open(os.path.join(self.dataDir, 'Issue268_Pcop.pkl'), 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: pcop = pickle.load(inF, encoding='latin1') # pcop._boundsMat=numpy.array(pcop._boundsMat) # pcop._boundsMat2D=numpy.array(pcop._boundsMat2D) # pickle.dump(pcop,file(os.path.join(self.dataDir, # 'Issue268_Pcop.new.pkl'),'wb+')) _, mList1 = EmbedLib.MatchFeatsToMol(m1, featFactory, pcop.getFeatures()) _, mList2 = EmbedLib.MatchFeatsToMol(m2, featFactory, pcop.getFeatures()) b1 = rdDistGeom.GetMoleculeBoundsMatrix(m1) b2 = rdDistGeom.GetMoleculeBoundsMatrix(m2) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4) self.assertTrue(DG.DoTriangleSmoothing(b1)) self.assertTrue(DG.DoTriangleSmoothing(b2)) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList1, b1, pcop)[2]), 4) self.assertEqual(len(EmbedLib.MatchPharmacophore(mList2, b2, pcop)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList1, b1, pcop, mol=m1, use2DLimits=True)[2]), 4) self.assertEqual( len( EmbedLib.MatchPharmacophore(mList2, b2, pcop, mol=m2, use2DLimits=True)[2]), 4)
np.set_printoptions(precision=4) import pickle import rdkit.DistanceGeometry as DG import rdkit from rdkit import Chem from rdkit.Chem import rdDistGeom, ChemicalForceFields, rdMolAlign print(rdkit.__version__) with open('fragments.pickle', mode='rb') as f: db = pickle.load(f) smiles = "s1c(c(c(c1c1cc(ccc1)NC1CCCCC1)Br)OCC(=O)[O-])C(=O)[O-]" mol = Chem.MolFromSmiles(smiles) bm = rdDistGeom.GetMoleculeBoundsMatrix(mol) bm_org = copy.deepcopy(bm) # Cut input molecule by rotatable bonds RotatableBond = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]') rwmol = Chem.RWMol(mol) for begin, end in mol.GetSubstructMatches(RotatableBond): rwmol.RemoveBond(begin, end) beginAtom = rwmol.GetAtomWithIdx(begin) endAtom = rwmol.GetAtomWithIdx(end) if beginAtom.GetAtomicNum() != 6 and beginAtom.GetIsAromatic(): beginAtom.SetNumExplicitHs(1) beginAtom.SetNoImplicit(True) if endAtom.GetAtomicNum() != 6 and endAtom.GetIsAromatic(): endAtom.SetNumExplicitHs(1) endAtom.SetNoImplicit(True)