def test1_Issue163(self): name1 = os.path.join(self.baseDir, 'humanoral.1.pkl') try: with open(name1, 'rb') as pklF: c1 = pickle.load(pklF) except Exception: c1 = None self.assertTrue(c1) name2 = os.path.join(self.baseDir, 'humanoral.2.pkl') try: with open(name2, 'rb') as pklF: c2 = pickle.load(pklF) except Exception: c2 = None self.assertTrue(c2) try: res = sorted(AnalyzeComposite.ProcessIt([c1, c2], verbose=-1)) except Exception: import traceback traceback.print_exc() ok = 0 else: ok = 1 self.assertTrue(ok) self.assertEqual(res[0][0],'BALABANJ') self.assertEqual(res[1][0],'BERTZCT') self.assertEqual(res[-1][0],'VSA_ESTATE9') for entry in res: self.assertEqual(len(entry),5)
def test1_Issue163(self): name1 = os.path.join(self.baseDir, 'humanoral.1.pkl') try: with open(name1, 'rb') as pklF: c1 = pickle.load(pklF) except Exception: # pragma: nocover c1 = None self.assertTrue(c1) name2 = os.path.join(self.baseDir, 'humanoral.2.pkl') try: with open(name2, 'rb') as pklF: c2 = pickle.load(pklF) except Exception: # pragma: nocover c2 = None self.assertTrue(c2) try: res = AnalyzeComposite.ProcessIt([c1, c2], verbose=-1) except Exception: # pragma: nocover import traceback traceback.print_exc() ok = 0 else: ok = 1 self.assertTrue(ok) self.assertTrue(res[0][0] == 'BALABANJ') self.assertTrue(res[1][0] == 'BERTZCT') self.assertTrue(res[-1][0] == 'FR_ALLYLIC_OXID') for entry in res: self.assertTrue(len(entry) == 5)
def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0),123) pkl = cPickle.dumps(ffeat) ffeat2 = cPickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId()==ffeat.GetId()); self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos())) # Check that the old pickled versions have not been broken inF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'),'rb') ffeat2=cPickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos())) # Test the new version also has the id and works as expected # uncomment the following to generate (overrwrite) new version of pickled # data file #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inF = open(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'rb') ffeat2=cPickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId()==ffeat.GetId()); self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily()) self.assertTrue(ffeat2.GetType()==ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
def test6Bug29_2(self): """ a more extensive test of the cmp stuff using pickled trees""" import os with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree1.pkl'),'rb') as t1File: t1 = cPickle.load(t1File) with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree2.pkl'),'rb') as t2File: t2 = cPickle.load(t2File) assert cmp(t1,t2),'equality failed'
def testBuild(self): """ tests building and screening a packager """ with open(os.path.join(self.dataDir,'Jan9_build3_calc.dsc'),'rb') as calcF: calc = cPickle.load(calcF) with open(os.path.join(self.dataDir,'Jan9_build3_model.pkl'),'rb') as modelF: model = cPickle.load(modelF) pkg = Packager.ModelPackage(descCalc=calc,model=model) self._verify(pkg,self.testD)
def testBuild(self): # """ tests building and screening a packager """ with open(os.path.join(self.dataDir, 'Jan9_build3_calc.dsc'), 'r') as calcTF: buf = calcTF.read().replace('\r\n', '\n').encode('utf-8') calcTF.close() calc = cPickle.load(BytesIO(buf)) with open(os.path.join(self.dataDir, 'Jan9_build3_model.pkl'), 'rb') as modelF: model = cPickle.load(modelF) pkg = Packager.ModelPackage(descCalc=calc, model=model) self._verify(pkg, self.testD)
def test6Bug29_2(self): """ a more extensive test of the cmp stuff using pickled trees""" import os with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree1.pkl'),'r') as t1TFile: buf = t1TFile.read().replace('\r\n', '\n').encode('utf-8') t1TFile.close() with io.BytesIO(buf) as t1File: t1 = cPickle.load(t1File) with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree2.pkl'),'r') as t2TFile: buf = t2TFile.read().replace('\r\n', '\n').encode('utf-8') t2TFile.close() with io.BytesIO(buf) as t2File: t2 = cPickle.load(t2File) assert cmp(t1,t2),'equality failed'
def testPerm1(self): """ tests the descriptor remapping stuff in a packager """ from rdkit.Chem import Descriptors with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF: buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') pkgTF.close() with io.BytesIO(buf) as pkgF: pkg = cPickle.load(pkgF) calc = pkg.GetCalculator() names = calc.GetDescriptorNames() ref = {} DataUtils.InitRandomNumbers((23,42)) for smi,pred,conf in self.testD: for desc in names: fn = getattr(Descriptors,desc,lambda x:777) m = Chem.MolFromSmiles(smi) ref[desc] = fn(m) for i in range(5): perm = list(names) random.shuffle(perm,random=random.random) m = Chem.MolFromSmiles(smi) for desc in perm: fn = getattr(Descriptors,desc,lambda x:777) val = fn(m) assert feq(val,ref[desc],1e-4),'%s: %s(%s): %f!=%f'%(str(perm), smi, desc, val, ref[desc])
def _loadPackage(self): with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'r') as pkgTF: buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') pkgTF.close() io = BytesIO(buf) pkg = cPickle.load(io) return pkg
def test4(self): """ include thresholding """ self.details.tableName = 'ferro_quant' self.details.threshold = 0.80 self.details.doHoldout=0 self.details.doTraining=0 with open(os.path.join(self.baseDir,'ferromag_quant_10.pkl'),'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: compos = pickle.load(pklF) tgt = 5 self.assertEqual(len(compos),tgt) nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = ScreenComposite.ScreenFromDetails(compos,self.details) self.assertEqual(nGood,91) self.assertEqual(misCount,1) self.assertEqual(nSkipped,3) self.assertAlmostEqual(avgGood,0.9956,4) self.assertAlmostEqual(avgBad,1.000,4) self.assertAlmostEqual(avgSkip,0.6000,4) self.assertEqual(tbl[0,0] , 54) self.assertEqual(tbl[1,1] , 37) self.assertEqual(tbl[0,1] , 1) self.assertEqual(tbl[1,0] , 0)
def testSaveState(self): fName = os.path.join(RDConfig.RDCodeDir,'ML/Descriptors/test_data','molcalc.dsc') with open(fName,'rb') as inF: calc = cPickle.load(inF) self.assertEqual(calc.GetDescriptorNames(),tuple(self.descs)) self.assertEqual(calc.GetDescriptorVersions(),tuple(self.vers)) self._testVals(calc,self.testD)
def test6(self): """ multiple models """ self.details.tableName = 'ferro_noquant' with open(os.path.join(self.baseDir,'ferromag_auto_10_3.pkl'),'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: compos = pickle.load(pklF) tgt = 10 self.assertEqual(len(compos),tgt) composites = [compos,compos] tpl = ScreenComposite.ScreenFromDetails(composites,self.details) nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = tpl self.assertEqual(nGood[0],95) self.assertEqual(misCount[0],8) self.assertEqual(nSkipped[0],0) self.assertAlmostEqual(avgGood[0],.9684,4) self.assertAlmostEqual(avgBad[0],.8375,4) self.assertEqual(nGood[1],0) self.assertEqual(misCount[1],0) self.assertEqual(nSkipped[1],0) self.assertEqual(avgGood[1],0) self.assertEqual(avgBad[1],0) self.assertEqual(tbl[0,0],50) self.assertEqual(tbl[1,1],45) self.assertEqual(tbl[0,1],5) self.assertEqual(tbl[1,0],3)
def test11(self): """ filtering with segmentation """ self.details.tableName = 'ferro_noquant' with open(os.path.join(self.baseDir,'ferromag_filt_10_3.pkl'), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: compos = pickle.load(pklF) tgt = 10 self.assertEqual(len(compos),tgt) self.details.doHoldout=1 self.details.filterVal=1 self.details.filterFrac=.33 nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = ScreenComposite.ScreenFromDetails(compos,self.details) self.assertEqual(nGood,37) self.assertEqual(misCount,6) self.assertEqual(nSkipped,0) self.assertAlmostEqual(avgGood,.95946,4) self.assertAlmostEqual(avgBad,.85,4) self.assertEqual(tbl[0,0] , 14) self.assertEqual(tbl[1,1] , 23) self.assertEqual(tbl[0,1] , 1) self.assertEqual(tbl[1,0] , 5)
def test3_include_training(self): # """ include training data only """ self.details.tableName = 'ferro_quant' self.details.doHoldout = 0 self.details.doTraining = 1 with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: compos = pickle.load(pklF) tgt = 5 self.assertEqual(len(compos), tgt, 'bad composite loaded: %d != %d' % (len(compos), tgt)) nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails( compos, self.details) self.assertEqual(nGood, 65) self.assertEqual(misCount, 1) self.assertEqual(nSkipped, 0) self.assertAlmostEqual(avgGood, .98307, 4) self.assertAlmostEqual(avgBad, 0.600, 4) self.assertAlmostEqual(avgSkip, 0, 4) self.assertEqual(tbl[0, 0], 38, tbl) self.assertEqual(tbl[1, 1], 27) self.assertEqual(tbl[0, 1], 1) self.assertEqual(tbl[1, 0], 0)
def test1PPDataset(self): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', 'PP_descrs_regress.2.csv') infil = open(fileN, 'r') lines = infil.readlines() infil.close() infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap', 'test_data', 'PP_combi_charges.pkl') with open(infile, 'r') as cchtFile: buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8') cchtFile.close() with io.BytesIO(buf) as cchFile: combiCharges = pickle.load(cchFile) for lin in lines: if (lin[0] == '#'): continue tlst = lin.strip().split(',') smi = tlst[0] rdmol = Chem.MolFromSmiles(smi) rdPartialCharges.ComputeGasteigerCharges(rdmol) nat = rdmol.GetNumAtoms() failed = False for ai in range(nat): rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge')) if not feq(rdch, combiCharges[smi][ai], 1.e-2): failed = True print(smi, ai, rdch, combiCharges[smi][ai]) if failed: rdmol.Debug() self.assertFalse(failed)
def test3Pickle2(self): """ """ l=1<<21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError,lambda:v1[l+1]) v1[0]=1 v1[2]=2 v1[1<<12]=3 self.assertTrue(v1==v1) v2= cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2==v1) v3= ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2==v3) self.assertTrue(v1==v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'r' ) as tf: buf = tf.read().replace('\r\n', '\n').encode('utf-8') tf.close() with io.BytesIO(buf) as f: v3 = cPickle.load(f) self.assertTrue(v3==v1)
def testTreeGrow(self): " testing tree-based composite " with open(RDConfig.RDCodeDir + "/ML/Composite/test_data/composite_base.pkl", "r") as pklTF: buf = pklTF.read().replace("\r\n", "\n").encode("utf-8") pklTF.close() with io.BytesIO(buf) as pklF: self.refCompos = cPickle.load(pklF) composite = Composite.Composite() composite._varNames = self.varNames composite.SetQuantBounds(self.qBounds, self.nPoss) from rdkit.ML.DecTree import CrossValidate driver = CrossValidate.CrossValidationDriver pruner = None composite.Grow(self.examples, self.attrs, [], buildDriver=driver, pruner=pruner, nTries=100, silent=1) composite.AverageErrors() composite.SortModels() # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF: # cPickle.dump(composite,pklF) self.treeComposite = composite self.assertEqual(len(composite), len(self.refCompos)) for i in xrange(len(composite)): t1, c1, e1 = composite[i] t2, c2, e2 = self.refCompos[i] self.assertEqual(e1, e2)
def testSaveState(self): fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data', 'molcalc.dsc') with open(fName, 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = BytesIO(buf) calc = cPickle.load(inF) self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs)) self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers)) self._testVals(calc, self.testD) f = StringIO() with redirect_stdout(f): calc.ShowDescriptors() s = f.getvalue() for name in calc.GetDescriptorNames(): self.assertIn(name, s) self.assertIn('Wildman-Crippen LogP value', calc.GetDescriptorSummaries()) self.assertIn('N/A', calc.GetDescriptorSummaries()) funcs = calc.GetDescriptorFuncs() self.assertEqual(len(funcs), len(self.descs)) for f in funcs: self.assertTrue(callable(f))
def test3Pickle2(self): """ """ l=1<<21 v1 = ds.IntSparseIntVect(l) self.assertRaises(IndexError,lambda:v1[l+1]) v1[0]=1 v1[2]=2 v1[1<<12]=3 self.assertTrue(v1==v1) v2= cPickle.loads(cPickle.dumps(v1)) self.assertTrue(v2==v1) v3= ds.IntSparseIntVect(v2.ToBinary()) self.assertTrue(v2==v3) self.assertTrue(v1==v3) #cPickle.dump(v1,file('isiv.pkl','wb+')) with open( os.path.join(RDConfig.RDBaseDir, 'Code/DataStructs/Wrap/testData/isiv.pkl'), 'rb' ) as f: v3 = cPickle.load(f) self.assertTrue(v3==v1)
def testLoad2(self): """ tests loading and screening a packager 2 """ with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF: buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') pkgTF.close() with io.BytesIO(buf) as pkgF: pkg = cPickle.load(pkgF) self._verify2(pkg,self.testD)
def test3CatFilePickle(self): with open(os.path.join(RDConfig.RDCodeDir,'Chem', 'test_data','simple_catalog.pkl'), 'rb') as pklFile: cat = cPickle.load(pklFile, encoding='bytes') assert cat.GetNumEntries()==21 assert cat.GetFPLength()==21 self._testBits(cat)
def test4(self): from rdkit.six.moves import cPickle gz = gzip.open( os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'cdk2-few.pkl.gz'), 'rb') examples = cPickle.load(gz, encoding='Latin1') t = BuildSigTree(examples, 2, maxDepth=3) self.assertEqual(t.GetLabel(), 2181) self.assertEqual(t.GetChildren()[0].GetLabel(), 2861) self.assertEqual(t.GetChildren()[1].GetLabel(), 8182)
def readFragmentScores(name='fpscores'): import gzip global _fscores _fscores = cPickle.load(gzip.open('%s.pkl.gz'%name)) outDict = {} for i in _fscores: for j in range(1,len(i)): outDict[i[j]] = float(i[0]) _fscores = outDict
def testMultiTree(self): # " testing multivalued tree growth " self._setupMultiTree() with open(self.multiTreeName, 'r') as inTFile: buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: t2 = cPickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.'
def testPyBasicTree(self): # " testing basic tree growth (python entropy code) " self._setupPyBasicTree() with open(self.basicTreeName, 'r') as inTFile: buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: t2 = cPickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.'
def test4UnusedVars(self): " testing unused variables " self._setupTree1a() with open(self.qTree1Name,'rb') as inFile: t2 = cPickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' for i in xrange(len(self.examples1)): assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\ 'examples1[%d] misclassified'%i
def test2Tree(self): " testing tree2 " self._setupTree2() with open(self.qTree2Name, 'r') as inTFile: buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: t2 = cPickle.load(inFile) assert self.t2 == t2, 'Incorrect tree generated.'
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore= Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0,1,2.251) pcophore.setUpperBound(0,1,2.451) pcophore.setUpperBound2D(0,1,3) pcophore.setLowerBound(0,2,4.970) pcophore.setUpperBound(0,2,5.170) pcophore.setUpperBound2D(0,2,6) pcophore.setLowerBound(1,2,2.681) pcophore.setUpperBound(1,2,2.881) pcophore.setUpperBound2D(1,2,6) inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory, pcophore) if canMatch: nMatches+=1 r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore, useDownsampling=True,use2DLimits=True, mol=mol) failed,bm,match,details = r if not failed: nHits+=1 self.assertEqual(nDone,100) self.assertEqual(nMatches,93) #print 'nhits:',nHits self.assertEqual(nHits,67)
def testTorsionsRegression(self): inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'), 'rb') torsions = cPickle.load(inF, encoding='bytes') for i, m in enumerate(self.mols): tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m) if tt != torsions[i]: # pragma: nocover debugFingerprint(m, tt, torsions[i]) self.assertEqual(tt, torsions[i]) self.assertNotEqual(tt, torsions[i - 1])
def testPairsRegression(self): inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.aps.pkl.gz'), 'rb') atomPairs = cPickle.load(inF, encoding='bytes') for i, m in enumerate(self.mols): ap = Pairs.GetAtomPairFingerprint(m) if ap != atomPairs[i]: # pragma: nocover debugFingerprint(m, ap, atomPairs[i]) self.assertEqual(ap, atomPairs[i]) self.assertNotEqual(ap, atomPairs[i - 1])
def GetFingerprints(details): """ returns an iterable sequence of fingerprints each fingerprint will have a _fieldsFromDb member whose first entry is the id. """ if details.dbName and details.tableName: try: conn = DbConnect(details.dbName, details.tableName) if hasattr(details, 'dbUser'): conn.user = details.dbUser if hasattr(details, 'dbPassword'): conn.password = details.dbPassword except Exception: import traceback FingerprintMols.error( 'Error: Problems establishing connection to database: %s|%s\n' % (details.dbName, details.tableName)) traceback.print_exc() cmd = _ConstructSQL(details, extraFields=details.fpColName) curs = conn.GetCursor() # curs.execute(cmd) # print 'CURSOR:',curs,curs.closed if _dataSeq: suppl = _dataSeq(curs, cmd, depickle=not details.noPickle, klass=DataStructs.ExplicitBitVect) _dataSeq._conn = conn else: suppl = DbFpSupplier.ForwardDbFpSupplier( data, fpColName=details.fpColName) elif details.inFileName: conn = None try: inF = open(details.inFileName, 'r') except IOError: import traceback FingerprintMols.error('Error: Problems reading from file %s\n' % (details.inFileName)) traceback.print_exc() suppl = [] done = 0 while not done: try: ID, fp = cPickle.load(inF) except Exception: done = 1 else: fp._fieldsFromDb = [ID] suppl.append(fp) else: suppl = None return suppl
def test3Embed(self): testResults = { 'mol_197': (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00), 'mol_223': (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00), 'mol_269': (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00), } inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except: break nDone += 1 mol = Chem.Mol(molPkl) nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(nboundsMat) matched, matches = EmbedLib.MatchPharmacophoreToMol( mol, self.featFactory, self.pcophore) if matched: failed, bm, match, stats = EmbedLib.MatchPharmacophore( matches, nboundsMat, self.pcophore, useDownsampling=1) if not failed: nHits += 1 if name in testResults: stats = EmbedLib.EmbedOne(mol, name, match, self.pcophore, count=10, silent=1, randomSeed=23) tgt = testResults[name] self.assertEqual(len(tgt), len(stats)) print(name) print(','.join(['%.2f' % x for x in stats])) # we'll use different tolerances for the different values: self.assertTrue(feq(tgt[0], stats[0], 5.0), (tgt[0], stats[0])) for i in range(2, len(tgt)): self.assertTrue(feq(tgt[i], stats[i], 5.0), (tgt[i], stats[i])) self.assertEqual(nDone, 100) #print 'nHits:',nHits self.assertEqual(nHits, 50)
def test3CatFilePickle(self): with open( os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data', 'simple_catalog.pkl'), 'r') as pklTFile: buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') pklTFile.close() with io.BytesIO(buf) as pklFile: cat = cPickle.load(pklFile, encoding='bytes') assert cat.GetNumEntries() == 21 assert cat.GetFPLength() == 21 self._testBits(cat)
def test4Search(self): featFactory = ChemicalFeatures.BuildFeatureFactory( os.path.join(self.dataDir, 'BaseFeatures.fdef')) activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)), ChemicalFeatures.FreeChemicalFeature('Aromatic', Geometry.Point3D(0.0, 0.0, 0.0))] pcophore = Pharmacophore.Pharmacophore(activeFeats) pcophore.setLowerBound(0, 1, 2.251) pcophore.setUpperBound(0, 1, 2.451) pcophore.setUpperBound2D(0, 1, 3) pcophore.setLowerBound(0, 2, 4.970) pcophore.setUpperBound(0, 2, 5.170) pcophore.setUpperBound2D(0, 2, 6) pcophore.setLowerBound(1, 2, 2.681) pcophore.setUpperBound(1, 2, 2.881) pcophore.setUpperBound2D(1, 2, 6) inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nMatches = 0 nHits = 0 while 1: try: name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1') if PY3: molPkl = bytes(molPkl, encoding='latin1') except Exception: break nDone += 1 mol = Chem.Mol(molPkl) boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol) DG.DoTriangleSmoothing(boundsMat) canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore) if canMatch: nMatches += 1 r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True, use2DLimits=True, mol=mol) failed, bm, match, details = r if not failed: nHits += 1 self.assertEqual(nDone, 100) self.assertEqual(nMatches, 93) #print 'nhits:',nHits self.assertEqual(nHits, 67)
def testSaveState(self): fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data', 'molcalc.dsc') with open(fName, 'r') as inTF: buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() with io.BytesIO(buf) as inF: calc = cPickle.load(inF) self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs)) self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers)) self._testVals(calc, self.testD)
def setUp(self): self.dataset = dict() self.dataset_inchi = dict() inf = gzip.open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.sdf.gz'),'r') self.dataset['problematic'] = ForwardSDMolSupplier(inf,sanitize=False,removeHs=False) with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data', 'pubchem-hard-set.inchi'),'rb') as inF: self.dataset_inchi['problematic'] = load(inF) # disable logging DisableLog('rdApp.warning')
def readFragmentScores(name='fpscores'): import gzip global _fscores fname = '{}.pkl.gz'.format(name) # download(_get_dgl_url(os.path.join('dataset', fname)), path=fname) _fscores = cPickle.load(gzip.open(fname)) outDict = {} for i in _fscores: for j in range(1, len(i)): outDict[i[j]] = float(i[0]) _fscores = outDict
def GetComposites(details): res = [] if details.persistTblName and details.inNote: conn = DbConnect(details.dbName,details.persistTblName) mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote)) for row in mdls: rawD = row[0] res.append(cPickle.loads(str(rawD))) elif details.composFileName: res.append(cPickle.load(open(details.composFileName,'rb'))) return res
def testGeneralPickle(self): " testing DataSet pickling" self.setUpGeneralLoad() DataUtils.WritePickledData(RDConfig.RDCodeDir+'/ML/Data/test_data/testgeneral.dat.pkl',self.d) with open(RDConfig.RDCodeDir+'/ML/Data/test_data/testgeneral.dat.pkl','rb') as f: vNames = cPickle.load(f) qBounds = cPickle.load(f) ptNames = cPickle.load(f) examples = cPickle.load(f) d = MLData.MLDataSet(examples,varNames=vNames,qBounds=qBounds,ptNames=ptNames) assert self.d.GetNPts()==d.GetNPts(),'nPts wrong' assert self.d.GetNVars()==d.GetNVars(),'nVars wrong' assert self.d.GetNResults()==d.GetNResults(),'nResults wrong' assert self.d.GetVarNames()==d.GetVarNames(),'varNames wrong' assert self.d.GetPtNames()==d.GetPtNames(),'ptNames wrong' assert self.d.GetNPossibleVals()==d.GetNPossibleVals(),'nPossible Wrong' assert self.d.GetQuantBounds()==d.GetQuantBounds(),'quantBounds Wrong' assert self.d.GetResults()==d.GetResults(),'GetResults wrong' assert self.d.GetAllData()[1]==d.GetAllData()[1],'GetAllData wrong' assert self.d.GetInputData()[3]==d.GetInputData()[3],'GetInputData wrong' assert self.d.GetNamedData()[2]==d.GetNamedData()[2],'GetNamedData wrong'
def test4UnusedVars(self): " testing unused variables " self._setupTree1a() with open(self.qTree1Name, 'r') as inTFile: buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: t2 = cPickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated.' for i in xrange(len(self.examples1)): assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\ 'examples1[%d] misclassified'%i
def readFragmentScores(name='fpscores'): import gzip global _fscores # generate the full path filename: if name == "fpscores": name = op.join(op.dirname(__file__), name) _fscores = cPickle.load(gzip.open('%s.pkl.gz' % name)) outDict = {} for i in _fscores: for j in range(1, len(i)): outDict[i[j]] = float(i[0]) _fscores = outDict
def test1Tree(self): # " testing tree1 " self._setupTree1() with open(self.qTree1Name, 'r') as inTFile: buf = inTFile.read().replace('\r\n', '\n').encode('utf-8') inTFile.close() with io.BytesIO(buf) as inFile: t2 = cPickle.load(inFile) assert self.t1 == t2, 'Incorrect tree generated. ' self.assertIn('Var: 2 []', str(self.t1)) self.assertEqual(self.t1.GetQuantBounds(), [])
def testPickle(self): ffeat = ChemicalFeatures.FreeChemicalFeature( "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123) pkl = cPickle.dumps(ffeat) ffeat2 = cPickle.loads(pkl, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Check that the old pickled versions have not been broken inTF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'r') buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) ffeat2 = cPickle.load(inF, encoding='bytes') # this version (1.0) does not have an id in the byte stream self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos())) # Test the new version also has the id and works as expected # uncomment the following to generate (overrwrite) new version of pickled # data file #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+')) inTF = open( os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'), 'r') buf = inTF.read().replace('\r\n', '\n').encode('utf-8') inTF.close() inF = io.BytesIO(buf) ffeat2 = cPickle.load(inF, encoding='bytes') self.assertTrue(ffeat2.GetId() == ffeat.GetId()) self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily()) self.assertTrue(ffeat2.GetType() == ffeat.GetType()) self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
def test4Issue237(self): with open( os.path.join(RDConfig.RDBaseDir, 'Code', 'ML', 'InfoTheory', 'Wrap', 'testData', 'Issue237.pkl'), 'rb') as inF: examples, avail, bias, nB, nPoss = cPickle.load(inF, encoding='bytes') ranker = rdit.InfoBitRanker(nB, nPoss, rdit.InfoType.BIASENTROPY) ranker.SetMaskBits(avail) for ex in examples: ranker.AccumulateVotes(ex[1], ex[-1]) # this dumps core on linux if the bug isn't fixed: v = ranker.GetTopN(1) self.assertTrue(int(v[0][0]) == 12)
def readFragmentScores(name='fpscores'): import gzip global _fscores # generate the full path filename: if name == "fpscores": name = op.join(op.dirname(__file__), name) with open('saved/s.pkl', 'rb') as pickle_file: _fscores = cPickle.load(pickle_file) outDict = {} for i in _fscores: for j in range(1, len(i)): outDict[i[j]] = float(i[0]) _fscores = outDict
def ReadFragScores(name='fpscores'): print "reading SAS fragment scores... ", import gzip global _fscores #generate the full path filename if name == "fpscores": name = os.path.join(os.path.dirname(__file__), name) _fscores = cPickle.load(gzip.open('%s.pkl.gz' % name)) outDict = {} for i in _fscores: for j in range(1, len(i)): outDict[i[j]] = float(i[0]) _fscores = outDict
def testPerm2(self): """ tests the descriptor remapping stuff in a packager """ with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'rb') as pkgF: pkg = cPickle.load(pkgF) calc = pkg.GetCalculator() names = calc.GetDescriptorNames() DataUtils.InitRandomNumbers((23, 42)) perm = list(names) random.shuffle(perm, random=random.random) calc.simpleList = perm calc.descriptorNames = perm pkg.Init() self._verify(pkg, self.testD)
def setUp(self): #print '\n%s: '%self.shortDescription(), with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/ferro.pkl', 'rb') as pklF: self.examples = cPickle.load(pklF) self.varNames = [ 'composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc', 'atvol', 'isferro' ] self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73], [11.81, 14.52], []] self.nPoss = [0, 3, 2, 2, 2, 3, 3, 2] self.attrs = range(1, len(self.varNames) - 1) from rdkit.ML.Data import DataUtils DataUtils.InitRandomNumbers((23, 43))
def _writeDetailFile(self, inF, outF): while 1: try: smi, refContribs = cPickle.load(inF) except EOFError: break else: mol = Chem.MolFromSmiles(smi) if mol: mol = Chem.AddHs(mol, 1) smi2 = Chem.MolToSmiles(mol) contribs = Crippen._GetAtomContribs(mol) cPickle.dump((smi, contribs), outF) else: print('Problems with SMILES:', smi)
def testPkl(self): # Test pickling v1 = self.klass(10) v1[1] = 1 v1[2] = 1 v1[3] = 1 pklName = 'foo.pkl' outF = open(pklName, 'wb+') cPickle.dump(v1, outF) outF.close() inF = open(pklName, 'rb') v2 = cPickle.load(inF) inF.close() os.unlink(pklName) assert tuple(v1.GetOnBits()) == tuple(v2.GetOnBits()), 'pkl failed'
def _doDetailFile(self, inF, nFailsAllowed=1): done = 0 verbose = 0 nFails = 0 while not done: if verbose: print('---------------') try: smi, refContribs = cPickle.load(inF) except EOFError: done = 1 else: refContribs = [x[0] for x in refContribs] refOrder = numpy.argsort(refContribs) try: mol = Chem.MolFromSmiles(smi) except: import traceback traceback.print_exc() mol = None if mol: mol = Chem.AddHs(mol, 1) smi2 = Chem.MolToSmiles(mol) contribs = Crippen._GetAtomContribs(mol) contribs = [x[0] for x in contribs] # # we're comparing to the old results using the oelib code. # Since we have some disagreements with them as to what is # aromatic and what isn't, we may have different numbers of # Hs. For the sake of comparison, just pop those off our # new results. # while len(contribs) > len(refContribs): del contribs[-1] order = numpy.argsort(contribs) for i in range(len(refContribs)): refL = refContribs[refOrder[i]] l = contribs[order[i]] if not feq(refL, l): print('%s (%s): %d %6.5f != %6.5f' % (smi, smi2, order[i], refL, l)) Crippen._GetAtomContribs(mol, force=1) print('-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*') nFails += 1 break else: print('Problems with SMILES:', smi) self.assertTrue(nFails < nFailsAllowed)
def testPerm2(self): """ tests the descriptor remapping stuff in a packager """ with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF: buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8') pkgTF.close() with io.BytesIO(buf) as pkgF: pkg = cPickle.load(pkgF) calc = pkg.GetCalculator() names = calc.GetDescriptorNames() DataUtils.InitRandomNumbers((23,42)) perm = list(names) random.shuffle(perm,random=random.random) calc.simpleList = perm calc.descriptorNames = perm pkg.Init() self._verify(pkg,self.testD)
def testTreeGrow(self): # testing tree-based composite with open( RDConfig.RDCodeDir + '/ML/Composite/test_data/composite_base.pkl', 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: self.refCompos = cPickle.load(pklF) composite = Composite.Composite() composite._varNames = self.varNames composite.SetQuantBounds(self.qBounds, self.nPoss) from rdkit.ML.DecTree import CrossValidate driver = CrossValidate.CrossValidationDriver pruner = None composite.Grow(self.examples, self.attrs, [], buildDriver=driver, pruner=pruner, nTries=100, silent=1) composite.AverageErrors() composite.SortModels(sortOnError=False) self.assertEqual(composite.countList, sorted(composite.countList)) self.assertNotEqual(composite.errList, sorted(composite.errList)) composite.SortModels() self.assertNotEqual(composite.countList, sorted(composite.countList)) self.assertEqual(composite.errList, sorted(composite.errList)) # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF: # cPickle.dump(composite,pklF) self.treeComposite = composite self.assertEqual(len(composite), len(self.refCompos)) for i in range(len(composite)): t1, c1, e1 = composite[i] t2, c2, e2 = self.refCompos[i] self.assertEqual(e1, e2) # we used to check for equality here, but since there are redundant errors, # that's non-trivial. # assert t1 == t2, 'tree mismatch' # assert c1 == c2, 'count mismatch' s = str(composite) self.assertIn('Composite', s) self.assertIn('Model', s) self.assertIn('error', s)
def setUp(self): with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/ferro.pkl', 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: self.examples = cPickle.load(pklF) self.varNames = [ 'composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc', 'atvol', 'isferro' ] self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73], [11.81, 14.52], []] self.nPoss = [0, 3, 2, 2, 2, 3, 3, 2] self.attrs = list(range(1, len(self.varNames) - 1)) from rdkit.ML.Data import DataUtils DataUtils.InitRandomNumbers((23, 43))
def test2(self): """ depth limit """ self.details.tableName = 'ferro_quant' refComposName = 'ferromag_quant_10_3.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF: buf = pklTF.read().replace('\r\n', '\n').encode('utf-8') pklTF.close() with io.BytesIO(buf) as pklF: refCompos = pickle.load(pklF) # first make sure the data are intact self._init(refCompos) self.details.limitDepth = 3 compos = BuildComposite.RunIt(self.details, saveIt=0) self.compare(compos, refCompos)
def test7(self): """ Test composite of naive bayes""" self.details.tableName = 'ferro_noquant' refComposName = 'ferromag_NaiveBayes.pkl' with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile: buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8') pklTFile.close() with io.BytesIO(buf) as pklFile: refCompos = pickle.load(pklFile) self._init(refCompos, copyBounds=1) self.details.useTrees = 0 self.details.useNaiveBayes = 1 self.details.mEstimateVal = 20.0 self.details.qBounds = [0] + [2] * 6 + [0] compos = BuildComposite.RunIt(self.details, saveIt=0) self.compare(compos, refCompos)
def test2SearchDownsample(self): inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb') nDone = 0 nHits = 0 while 1: try: tpl = cPickle.load(inF, encoding='latin1') if PY3: tpl = tpl[0], tpl[1].encode('latin1'), tpl[2] except Exception: break if self._matchMol(tpl, self.pcophore, self.featFactory, 1): nHits += 1 nDone += 1 self.assertEqual(nDone, 100) # print 'nHits:',nHits self.assertEqual(nHits, 47)