def test1_Issue163(self):
    name1 = os.path.join(self.baseDir, 'humanoral.1.pkl')
    try:
      with open(name1, 'rb') as pklF:
        c1 = pickle.load(pklF)
    except Exception:
      c1 = None
    self.assertTrue(c1)
    name2 = os.path.join(self.baseDir, 'humanoral.2.pkl')
    try:
      with open(name2, 'rb') as pklF:
        c2 = pickle.load(pklF)
    except Exception:
      c2 = None
    self.assertTrue(c2)

    try:
      res = sorted(AnalyzeComposite.ProcessIt([c1, c2], verbose=-1))
    except Exception:
      import traceback
      traceback.print_exc()
      ok = 0
    else:
      ok = 1
    self.assertTrue(ok)

    self.assertEqual(res[0][0],'BALABANJ')
    self.assertEqual(res[1][0],'BERTZCT')
    self.assertEqual(res[-1][0],'VSA_ESTATE9')
    for entry in res:
      self.assertEqual(len(entry),5)
Esempio n. 2
0
  def test1_Issue163(self):
    name1 = os.path.join(self.baseDir, 'humanoral.1.pkl')
    try:
      with open(name1, 'rb') as pklF:
        c1 = pickle.load(pklF)
    except Exception:  # pragma: nocover
      c1 = None
    self.assertTrue(c1)
    name2 = os.path.join(self.baseDir, 'humanoral.2.pkl')
    try:
      with open(name2, 'rb') as pklF:
        c2 = pickle.load(pklF)
    except Exception:  # pragma: nocover
      c2 = None
    self.assertTrue(c2)

    try:
      res = AnalyzeComposite.ProcessIt([c1, c2], verbose=-1)
    except Exception:  # pragma: nocover
      import traceback
      traceback.print_exc()
      ok = 0
    else:
      ok = 1
    self.assertTrue(ok)

    self.assertTrue(res[0][0] == 'BALABANJ')
    self.assertTrue(res[1][0] == 'BERTZCT')
    self.assertTrue(res[-1][0] == 'FR_ALLYLIC_OXID')
    for entry in res:
      self.assertTrue(len(entry) == 5)
Esempio n. 3
0
    def testPickle(self):
      ffeat = ChemicalFeatures.FreeChemicalFeature("HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0),123)
      pkl = cPickle.dumps(ffeat)
      ffeat2 = cPickle.loads(pkl, encoding='bytes')
      self.assertTrue(ffeat2.GetId()==ffeat.GetId());      
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))

      # Check that the old pickled versions have not been broken        
      inF = open(os.path.join(RDConfig.RDBaseDir,
                              'Code/ChemicalFeatures/Wrap/testData/feat.pkl'),'rb')
      ffeat2=cPickle.load(inF, encoding='bytes')
      # this version (1.0) does not have an id in the byte stream 
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
        
      # Test the new version also has the id and works as expected
      
      # uncomment the following to generate (overrwrite) new version of pickled
      # data file
      #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+'))
      inF = open(os.path.join(RDConfig.RDBaseDir,
                              'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'rb')
      ffeat2=cPickle.load(inF, encoding='bytes')
      self.assertTrue(ffeat2.GetId()==ffeat.GetId());
      self.assertTrue(ffeat2.GetFamily()==ffeat.GetFamily())
      self.assertTrue(ffeat2.GetType()==ffeat.GetType())
      self.assertTrue(ptFeq(ffeat2.GetPos(),ffeat.GetPos()))
Esempio n. 4
0
 def test6Bug29_2(self):
   """ a more extensive test of the cmp stuff using pickled trees"""
   import os
   with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree1.pkl'),'rb') as t1File:
     t1 = cPickle.load(t1File)
   with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree2.pkl'),'rb') as t2File:
     t2 = cPickle.load(t2File)
   assert cmp(t1,t2),'equality failed'
Esempio n. 5
0
 def testBuild(self):
   """ tests building and screening a packager """
   with open(os.path.join(self.dataDir,'Jan9_build3_calc.dsc'),'rb') as calcF:
     calc = cPickle.load(calcF)
   with open(os.path.join(self.dataDir,'Jan9_build3_model.pkl'),'rb') as modelF:
     model = cPickle.load(modelF)
   pkg = Packager.ModelPackage(descCalc=calc,model=model)
   self._verify(pkg,self.testD)
Esempio n. 6
0
 def testBuild(self):
   # """ tests building and screening a packager """
   with open(os.path.join(self.dataDir, 'Jan9_build3_calc.dsc'), 'r') as calcTF:
     buf = calcTF.read().replace('\r\n', '\n').encode('utf-8')
     calcTF.close()
   calc = cPickle.load(BytesIO(buf))
   with open(os.path.join(self.dataDir, 'Jan9_build3_model.pkl'), 'rb') as modelF:
     model = cPickle.load(modelF)
   pkg = Packager.ModelPackage(descCalc=calc, model=model)
   self._verify(pkg, self.testD)
Esempio n. 7
0
 def test6Bug29_2(self):
   """ a more extensive test of the cmp stuff using pickled trees"""
   import os
   with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree1.pkl'),'r') as t1TFile:
     buf = t1TFile.read().replace('\r\n', '\n').encode('utf-8')
     t1TFile.close()
   with io.BytesIO(buf) as t1File:
     t1 = cPickle.load(t1File)
   with open(os.path.join(RDConfig.RDCodeDir,'ML','DecTree','test_data','CmpTree2.pkl'),'r') as t2TFile:
     buf = t2TFile.read().replace('\r\n', '\n').encode('utf-8')
     t2TFile.close()
   with io.BytesIO(buf) as t2File:
     t2 = cPickle.load(t2File)
   assert cmp(t1,t2),'equality failed'
Esempio n. 8
0
  def testPerm1(self):
    """ tests the descriptor remapping stuff in a packager """
    from rdkit.Chem import Descriptors
    with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF:
      buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8')
      pkgTF.close()
    with io.BytesIO(buf) as pkgF:
      pkg = cPickle.load(pkgF)
    calc = pkg.GetCalculator()
    names = calc.GetDescriptorNames()
    ref = {}
    DataUtils.InitRandomNumbers((23,42))
    for smi,pred,conf in self.testD:
      for desc in names:
        fn = getattr(Descriptors,desc,lambda x:777)
        m = Chem.MolFromSmiles(smi)
        ref[desc] = fn(m)

      for i in range(5):
        perm = list(names)
        random.shuffle(perm,random=random.random)

        m = Chem.MolFromSmiles(smi)
        for desc in perm:
          fn = getattr(Descriptors,desc,lambda x:777)
          val = fn(m)
          assert feq(val,ref[desc],1e-4),'%s: %s(%s): %f!=%f'%(str(perm),
                                                               smi,
                                                               desc,
                                                               val,
                                                               ref[desc])
Esempio n. 9
0
 def _loadPackage(self):
   with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'), 'r') as pkgTF:
     buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8')
     pkgTF.close()
   io = BytesIO(buf)
   pkg = cPickle.load(io)
   return pkg
Esempio n. 10
0
  def test4(self):
    """ include thresholding """
    self.details.tableName = 'ferro_quant'
    self.details.threshold = 0.80
    self.details.doHoldout=0
    self.details.doTraining=0

    with open(os.path.join(self.baseDir,'ferromag_quant_10.pkl'),'r') as pklTF:
      buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
      pklTF.close()
    with io.BytesIO(buf) as pklF:
      compos = pickle.load(pklF)
    tgt = 5
    self.assertEqual(len(compos),tgt)

    nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = ScreenComposite.ScreenFromDetails(compos,self.details)
    self.assertEqual(nGood,91)
    self.assertEqual(misCount,1)
    self.assertEqual(nSkipped,3)
    self.assertAlmostEqual(avgGood,0.9956,4)
    self.assertAlmostEqual(avgBad,1.000,4)
    self.assertAlmostEqual(avgSkip,0.6000,4)
    self.assertEqual(tbl[0,0] , 54)
    self.assertEqual(tbl[1,1] , 37)
    self.assertEqual(tbl[0,1] , 1)
    self.assertEqual(tbl[1,0] , 0)
Esempio n. 11
0
 def testSaveState(self):
   fName = os.path.join(RDConfig.RDCodeDir,'ML/Descriptors/test_data','molcalc.dsc')
   with open(fName,'rb') as inF:
     calc = cPickle.load(inF)
   self.assertEqual(calc.GetDescriptorNames(),tuple(self.descs))
   self.assertEqual(calc.GetDescriptorVersions(),tuple(self.vers))
   self._testVals(calc,self.testD)
Esempio n. 12
0
 def test6(self):
   """ multiple models """
   self.details.tableName = 'ferro_noquant'
   with open(os.path.join(self.baseDir,'ferromag_auto_10_3.pkl'),'r') as pklTF:
     buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
     pklTF.close()
   with io.BytesIO(buf) as pklF:
     compos = pickle.load(pklF)
   tgt = 10
   self.assertEqual(len(compos),tgt)
   composites = [compos,compos]
   tpl = ScreenComposite.ScreenFromDetails(composites,self.details)
   nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = tpl
   self.assertEqual(nGood[0],95)
   self.assertEqual(misCount[0],8)
   self.assertEqual(nSkipped[0],0)
   self.assertAlmostEqual(avgGood[0],.9684,4)
   self.assertAlmostEqual(avgBad[0],.8375,4)
   self.assertEqual(nGood[1],0)
   self.assertEqual(misCount[1],0)
   self.assertEqual(nSkipped[1],0)
   self.assertEqual(avgGood[1],0)
   self.assertEqual(avgBad[1],0)
   self.assertEqual(tbl[0,0],50)
   self.assertEqual(tbl[1,1],45)
   self.assertEqual(tbl[0,1],5)
   self.assertEqual(tbl[1,0],3)
Esempio n. 13
0
  def test11(self):
    """ filtering with segmentation """
    self.details.tableName = 'ferro_noquant'
    with open(os.path.join(self.baseDir,'ferromag_filt_10_3.pkl'),
              'r') as pklTF:
      buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
      pklTF.close()
    with io.BytesIO(buf) as pklF:
      compos = pickle.load(pklF)
    tgt = 10
    self.assertEqual(len(compos),tgt)
    self.details.doHoldout=1
    self.details.filterVal=1
    self.details.filterFrac=.33

    nGood,misCount,nSkipped,avgGood,avgBad,avgSkip,tbl = ScreenComposite.ScreenFromDetails(compos,self.details)

    self.assertEqual(nGood,37)
    self.assertEqual(misCount,6)
    self.assertEqual(nSkipped,0)
    self.assertAlmostEqual(avgGood,.95946,4)    
    self.assertAlmostEqual(avgBad,.85,4)
    self.assertEqual(tbl[0,0] , 14)
    self.assertEqual(tbl[1,1] , 23)
    self.assertEqual(tbl[0,1] , 1)
    self.assertEqual(tbl[1,0] , 5)
Esempio n. 14
0
  def test3_include_training(self):
    # """ include training data only """
    self.details.tableName = 'ferro_quant'
    self.details.doHoldout = 0
    self.details.doTraining = 1

    with open(os.path.join(self.baseDir, 'ferromag_quant_10.pkl'), 'r') as pklTF:
      buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
      pklTF.close()
    with io.BytesIO(buf) as pklF:
      compos = pickle.load(pklF)
    tgt = 5
    self.assertEqual(len(compos), tgt, 'bad composite loaded: %d != %d' % (len(compos), tgt))

    nGood, misCount, nSkipped, avgGood, avgBad, avgSkip, tbl = ScreenComposite.ScreenFromDetails(
      compos, self.details)
    self.assertEqual(nGood, 65)
    self.assertEqual(misCount, 1)
    self.assertEqual(nSkipped, 0)
    self.assertAlmostEqual(avgGood, .98307, 4)
    self.assertAlmostEqual(avgBad, 0.600, 4)
    self.assertAlmostEqual(avgSkip, 0, 4)
    self.assertEqual(tbl[0, 0], 38, tbl)
    self.assertEqual(tbl[1, 1], 27)
    self.assertEqual(tbl[0, 1], 1)
    self.assertEqual(tbl[1, 0], 0)
Esempio n. 15
0
  def test1PPDataset(self):
    fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap',
                         'test_data', 'PP_descrs_regress.2.csv')
    infil = open(fileN, 'r')
    lines = infil.readlines()
    infil.close()

    infile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'PartialCharges', 'Wrap',
                          'test_data', 'PP_combi_charges.pkl')
    with open(infile, 'r') as cchtFile:
      buf = cchtFile.read().replace('\r\n', '\n').encode('utf-8')
      cchtFile.close()
    with io.BytesIO(buf) as cchFile:
      combiCharges = pickle.load(cchFile)

    for lin in lines:
      if (lin[0] == '#'):
        continue
      tlst = lin.strip().split(',')
      smi = tlst[0]
      rdmol = Chem.MolFromSmiles(smi)
      rdPartialCharges.ComputeGasteigerCharges(rdmol)

      nat = rdmol.GetNumAtoms()
      failed = False
      for ai in range(nat):
        rdch = float(rdmol.GetAtomWithIdx(ai).GetProp('_GasteigerCharge'))
        if not feq(rdch, combiCharges[smi][ai], 1.e-2):
          failed = True
          print(smi, ai, rdch, combiCharges[smi][ai])
      if failed:
        rdmol.Debug()
      self.assertFalse(failed)
Esempio n. 16
0
  def test3Pickle2(self):
    """

    """
    l=1<<21
    v1 = ds.IntSparseIntVect(l)
    self.assertRaises(IndexError,lambda:v1[l+1])
    v1[0]=1
    v1[2]=2
    v1[1<<12]=3
    self.assertTrue(v1==v1)

    v2=  cPickle.loads(cPickle.dumps(v1))
    self.assertTrue(v2==v1)
    
    v3=  ds.IntSparseIntVect(v2.ToBinary())
    self.assertTrue(v2==v3)
    self.assertTrue(v1==v3)

    #cPickle.dump(v1,file('isiv.pkl','wb+'))
    with open(
      os.path.join(RDConfig.RDBaseDir, 
                   'Code/DataStructs/Wrap/testData/isiv.pkl'),
      'r'
      ) as tf:
      buf = tf.read().replace('\r\n', '\n').encode('utf-8')
      tf.close()
    with io.BytesIO(buf) as f:
      v3 = cPickle.load(f)
      self.assertTrue(v3==v1)
Esempio n. 17
0
    def testTreeGrow(self):
        " testing tree-based composite "
        with open(RDConfig.RDCodeDir + "/ML/Composite/test_data/composite_base.pkl", "r") as pklTF:
            buf = pklTF.read().replace("\r\n", "\n").encode("utf-8")
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            self.refCompos = cPickle.load(pklF)

        composite = Composite.Composite()
        composite._varNames = self.varNames
        composite.SetQuantBounds(self.qBounds, self.nPoss)
        from rdkit.ML.DecTree import CrossValidate

        driver = CrossValidate.CrossValidationDriver
        pruner = None
        composite.Grow(self.examples, self.attrs, [], buildDriver=driver, pruner=pruner, nTries=100, silent=1)
        composite.AverageErrors()
        composite.SortModels()

        # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF:
        #  cPickle.dump(composite,pklF)

        self.treeComposite = composite
        self.assertEqual(len(composite), len(self.refCompos))
        for i in xrange(len(composite)):
            t1, c1, e1 = composite[i]
            t2, c2, e2 = self.refCompos[i]
            self.assertEqual(e1, e2)
Esempio n. 18
0
  def testSaveState(self):
    fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data', 'molcalc.dsc')
    with open(fName, 'r') as inTF:
      buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
      inTF.close()
    inF = BytesIO(buf)
    calc = cPickle.load(inF)
    self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs))
    self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers))
    self._testVals(calc, self.testD)

    f = StringIO()
    with redirect_stdout(f):
      calc.ShowDescriptors()
    s = f.getvalue()
    for name in calc.GetDescriptorNames():
      self.assertIn(name, s)

    self.assertIn('Wildman-Crippen LogP value', calc.GetDescriptorSummaries())
    self.assertIn('N/A', calc.GetDescriptorSummaries())

    funcs = calc.GetDescriptorFuncs()
    self.assertEqual(len(funcs), len(self.descs))
    for f in funcs:
      self.assertTrue(callable(f))
Esempio n. 19
0
  def test3Pickle2(self):
    """

    """
    l=1<<21
    v1 = ds.IntSparseIntVect(l)
    self.assertRaises(IndexError,lambda:v1[l+1])
    v1[0]=1
    v1[2]=2
    v1[1<<12]=3
    self.assertTrue(v1==v1)

    v2=  cPickle.loads(cPickle.dumps(v1))
    self.assertTrue(v2==v1)
    
    v3=  ds.IntSparseIntVect(v2.ToBinary())
    self.assertTrue(v2==v3)
    self.assertTrue(v1==v3)

    #cPickle.dump(v1,file('isiv.pkl','wb+'))
    with open(
      os.path.join(RDConfig.RDBaseDir, 
                   'Code/DataStructs/Wrap/testData/isiv.pkl'),
      'rb'
      ) as f:
      v3 = cPickle.load(f)
      self.assertTrue(v3==v1)
Esempio n. 20
0
 def testLoad2(self):
   """ tests loading and screening a packager 2 """
   with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF:
     buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8')
     pkgTF.close()
   with io.BytesIO(buf) as pkgF:
     pkg = cPickle.load(pkgF)
   self._verify2(pkg,self.testD)
Esempio n. 21
0
 def test3CatFilePickle(self):
   with open(os.path.join(RDConfig.RDCodeDir,'Chem',
                          'test_data','simple_catalog.pkl'),
             'rb') as pklFile:
     cat = cPickle.load(pklFile, encoding='bytes')
   assert cat.GetNumEntries()==21
   assert cat.GetFPLength()==21
   self._testBits(cat)
Esempio n. 22
0
 def test4(self):
   from rdkit.six.moves import cPickle
   gz = gzip.open(
     os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data', 'cdk2-few.pkl.gz'), 'rb')
   examples = cPickle.load(gz, encoding='Latin1')
   t = BuildSigTree(examples, 2, maxDepth=3)
   self.assertEqual(t.GetLabel(), 2181)
   self.assertEqual(t.GetChildren()[0].GetLabel(), 2861)
   self.assertEqual(t.GetChildren()[1].GetLabel(), 8182)
Esempio n. 23
0
def readFragmentScores(name='fpscores'):
    import gzip
    global _fscores
    _fscores = cPickle.load(gzip.open('%s.pkl.gz'%name))
    outDict = {}
    for i in _fscores:
        for j in range(1,len(i)):
            outDict[i[j]] = float(i[0])
    _fscores = outDict
Esempio n. 24
0
 def testMultiTree(self):
   # " testing multivalued tree growth "
   self._setupMultiTree()
   with open(self.multiTreeName, 'r') as inTFile:
     buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
     inTFile.close()
   with io.BytesIO(buf) as inFile:
     t2 = cPickle.load(inFile)
   assert self.t1 == t2, 'Incorrect tree generated.'
Esempio n. 25
0
 def testPyBasicTree(self):
   # " testing basic tree growth (python entropy code) "
   self._setupPyBasicTree()
   with open(self.basicTreeName, 'r') as inTFile:
     buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
     inTFile.close()
   with io.BytesIO(buf) as inFile:
     t2 = cPickle.load(inFile)
   assert self.t1 == t2, 'Incorrect tree generated.'
Esempio n. 26
0
 def test4UnusedVars(self):
   " testing unused variables "
   self._setupTree1a()
   with open(self.qTree1Name,'rb') as inFile:
     t2 = cPickle.load(inFile)
   assert self.t1 == t2, 'Incorrect tree generated.'
   for i in xrange(len(self.examples1)):
     assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
            'examples1[%d] misclassified'%i
Esempio n. 27
0
 def test2Tree(self):
   " testing tree2 "
   self._setupTree2()
   with open(self.qTree2Name, 'r') as inTFile:
     buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
     inTFile.close()
   with io.BytesIO(buf) as inFile:
     t2 = cPickle.load(inFile)
   assert self.t2 == t2, 'Incorrect tree generated.'
Esempio n. 28
0
  def test4Search(self):
    featFactory = ChemicalFeatures.BuildFeatureFactory(os.path.join(self.dataDir,
                                                        'BaseFeatures.fdef'))

    activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Donor',
                                            Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Aromatic',
                                            Geometry.Point3D(0.0, 0.0, 0.0))]
    pcophore= Pharmacophore.Pharmacophore(activeFeats)
    pcophore.setLowerBound(0,1,2.251)
    pcophore.setUpperBound(0,1,2.451)
    pcophore.setUpperBound2D(0,1,3)

    pcophore.setLowerBound(0,2,4.970)
    pcophore.setUpperBound(0,2,5.170)
    pcophore.setUpperBound2D(0,2,6)

    pcophore.setLowerBound(1,2,2.681)
    pcophore.setUpperBound(1,2,2.881)
    pcophore.setUpperBound2D(1,2,6)

    inF = gzip.open(os.path.join(self.dataDir,'cdk2-syn-clip100.pkl.gz'),'rb')
    nDone = 0
    nMatches = 0
    nHits = 0

    while 1:
      try:
        name,molPkl,boundsMat = cPickle.load(inF, encoding='latin1')
        if PY3:
          molPkl = bytes(molPkl, encoding='latin1')
      except Exception:
        break

      nDone += 1

      mol = Chem.Mol(molPkl)
      boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
      DG.DoTriangleSmoothing(boundsMat)
    
      canMatch,matches = EmbedLib.MatchPharmacophoreToMol(mol,featFactory,
                                                          pcophore)
      if canMatch:
        nMatches+=1
        r = EmbedLib.MatchPharmacophore(matches,boundsMat,pcophore,
                                        useDownsampling=True,use2DLimits=True,
                                        mol=mol)
        failed,bm,match,details = r
        if not failed:
          nHits+=1

    self.assertEqual(nDone,100)
    self.assertEqual(nMatches,93)
    #print 'nhits:',nHits
    self.assertEqual(nHits,67)
Esempio n. 29
0
 def testTorsionsRegression(self):
   inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'), 'rb')
   torsions = cPickle.load(inF, encoding='bytes')
   for i, m in enumerate(self.mols):
     tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
     if tt != torsions[i]:  # pragma: nocover
       debugFingerprint(m, tt, torsions[i])
     self.assertEqual(tt, torsions[i])
     self.assertNotEqual(tt, torsions[i - 1])
Esempio n. 30
0
 def testPairsRegression(self):
   inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.aps.pkl.gz'), 'rb')
   atomPairs = cPickle.load(inF, encoding='bytes')
   for i, m in enumerate(self.mols):
     ap = Pairs.GetAtomPairFingerprint(m)
     if ap != atomPairs[i]:  # pragma: nocover
       debugFingerprint(m, ap, atomPairs[i])
     self.assertEqual(ap, atomPairs[i])
     self.assertNotEqual(ap, atomPairs[i - 1])
Esempio n. 31
0
def GetFingerprints(details):
    """ returns an iterable sequence of fingerprints
  each fingerprint will have a _fieldsFromDb member whose first entry is
  the id.

  """
    if details.dbName and details.tableName:
        try:
            conn = DbConnect(details.dbName, details.tableName)
            if hasattr(details, 'dbUser'):
                conn.user = details.dbUser
            if hasattr(details, 'dbPassword'):
                conn.password = details.dbPassword
        except Exception:
            import traceback
            FingerprintMols.error(
                'Error: Problems establishing connection to database: %s|%s\n'
                % (details.dbName, details.tableName))
            traceback.print_exc()
        cmd = _ConstructSQL(details, extraFields=details.fpColName)
        curs = conn.GetCursor()
        # curs.execute(cmd)
        # print 'CURSOR:',curs,curs.closed
        if _dataSeq:
            suppl = _dataSeq(curs,
                             cmd,
                             depickle=not details.noPickle,
                             klass=DataStructs.ExplicitBitVect)
            _dataSeq._conn = conn
        else:
            suppl = DbFpSupplier.ForwardDbFpSupplier(
                data, fpColName=details.fpColName)
    elif details.inFileName:
        conn = None
        try:
            inF = open(details.inFileName, 'r')
        except IOError:
            import traceback
            FingerprintMols.error('Error: Problems reading from file %s\n' %
                                  (details.inFileName))
            traceback.print_exc()

        suppl = []
        done = 0
        while not done:
            try:
                ID, fp = cPickle.load(inF)
            except Exception:
                done = 1
            else:
                fp._fieldsFromDb = [ID]
                suppl.append(fp)
    else:
        suppl = None

    return suppl
Esempio n. 32
0
    def test3Embed(self):
        testResults = {
            'mol_197':
            (218.80, 35.75, 110.33, 11.58, 109.66, 11.09, 90.35, 2.95, 0.00),
            'mol_223':
            (259.19, 6.27, 134.13, 1.12, 134.06, 1.12, 85.74, 0.61, 0.00),
            'mol_269':
            (204.51, 7.89, 103.89, 1.20, 102.66, 1.20, 88.07, 1.21, 6.00),
        }
        inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'),
                        'rb')
        nDone = 0
        nHits = 0
        while 1:
            try:
                name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1')
                if PY3:
                    molPkl = bytes(molPkl, encoding='latin1')
            except:
                break

            nDone += 1

            mol = Chem.Mol(molPkl)
            nboundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
            DG.DoTriangleSmoothing(nboundsMat)
            matched, matches = EmbedLib.MatchPharmacophoreToMol(
                mol, self.featFactory, self.pcophore)
            if matched:
                failed, bm, match, stats = EmbedLib.MatchPharmacophore(
                    matches, nboundsMat, self.pcophore, useDownsampling=1)
                if not failed:
                    nHits += 1

                    if name in testResults:
                        stats = EmbedLib.EmbedOne(mol,
                                                  name,
                                                  match,
                                                  self.pcophore,
                                                  count=10,
                                                  silent=1,
                                                  randomSeed=23)
                        tgt = testResults[name]
                        self.assertEqual(len(tgt), len(stats))
                        print(name)
                        print(','.join(['%.2f' % x for x in stats]))
                        # we'll use different tolerances for the different values:
                        self.assertTrue(feq(tgt[0], stats[0], 5.0),
                                        (tgt[0], stats[0]))
                        for i in range(2, len(tgt)):
                            self.assertTrue(feq(tgt[i], stats[i], 5.0),
                                            (tgt[i], stats[i]))

        self.assertEqual(nDone, 100)
        #print 'nHits:',nHits
        self.assertEqual(nHits, 50)
Esempio n. 33
0
 def test4(self):
     from rdkit.six.moves import cPickle
     gz = gzip.open(
         os.path.join(RDConfig.RDCodeDir, 'ML', 'DecTree', 'test_data',
                      'cdk2-few.pkl.gz'), 'rb')
     examples = cPickle.load(gz, encoding='Latin1')
     t = BuildSigTree(examples, 2, maxDepth=3)
     self.assertEqual(t.GetLabel(), 2181)
     self.assertEqual(t.GetChildren()[0].GetLabel(), 2861)
     self.assertEqual(t.GetChildren()[1].GetLabel(), 8182)
Esempio n. 34
0
 def testPairsRegression(self):
     inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.aps.pkl.gz'),
                     'rb')
     atomPairs = cPickle.load(inF, encoding='bytes')
     for i, m in enumerate(self.mols):
         ap = Pairs.GetAtomPairFingerprint(m)
         if ap != atomPairs[i]:  # pragma: nocover
             debugFingerprint(m, ap, atomPairs[i])
         self.assertEqual(ap, atomPairs[i])
         self.assertNotEqual(ap, atomPairs[i - 1])
Esempio n. 35
0
 def testTorsionsRegression(self):
     inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'),
                     'rb')
     torsions = cPickle.load(inF, encoding='bytes')
     for i, m in enumerate(self.mols):
         tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
         if tt != torsions[i]:  # pragma: nocover
             debugFingerprint(m, tt, torsions[i])
         self.assertEqual(tt, torsions[i])
         self.assertNotEqual(tt, torsions[i - 1])
Esempio n. 36
0
 def test3CatFilePickle(self):
     with open(
             os.path.join(RDConfig.RDCodeDir, 'Chem', 'test_data',
                          'simple_catalog.pkl'), 'r') as pklTFile:
         buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8')
         pklTFile.close()
     with io.BytesIO(buf) as pklFile:
         cat = cPickle.load(pklFile, encoding='bytes')
     assert cat.GetNumEntries() == 21
     assert cat.GetFPLength() == 21
     self._testBits(cat)
Esempio n. 37
0
  def test4Search(self):
    featFactory = ChemicalFeatures.BuildFeatureFactory(
      os.path.join(self.dataDir, 'BaseFeatures.fdef'))

    activeFeats = [ChemicalFeatures.FreeChemicalFeature('Acceptor',
                                                        Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Donor', Geometry.Point3D(0.0, 0.0, 0.0)),
                   ChemicalFeatures.FreeChemicalFeature('Aromatic',
                                                        Geometry.Point3D(0.0, 0.0, 0.0))]
    pcophore = Pharmacophore.Pharmacophore(activeFeats)
    pcophore.setLowerBound(0, 1, 2.251)
    pcophore.setUpperBound(0, 1, 2.451)
    pcophore.setUpperBound2D(0, 1, 3)

    pcophore.setLowerBound(0, 2, 4.970)
    pcophore.setUpperBound(0, 2, 5.170)
    pcophore.setUpperBound2D(0, 2, 6)

    pcophore.setLowerBound(1, 2, 2.681)
    pcophore.setUpperBound(1, 2, 2.881)
    pcophore.setUpperBound2D(1, 2, 6)

    inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'), 'rb')
    nDone = 0
    nMatches = 0
    nHits = 0

    while 1:
      try:
        name, molPkl, boundsMat = cPickle.load(inF, encoding='latin1')
        if PY3:
          molPkl = bytes(molPkl, encoding='latin1')
      except Exception:
        break

      nDone += 1

      mol = Chem.Mol(molPkl)
      boundsMat = rdDistGeom.GetMoleculeBoundsMatrix(mol)
      DG.DoTriangleSmoothing(boundsMat)

      canMatch, matches = EmbedLib.MatchPharmacophoreToMol(mol, featFactory, pcophore)
      if canMatch:
        nMatches += 1
        r = EmbedLib.MatchPharmacophore(matches, boundsMat, pcophore, useDownsampling=True,
                                        use2DLimits=True, mol=mol)
        failed, bm, match, details = r
        if not failed:
          nHits += 1

    self.assertEqual(nDone, 100)
    self.assertEqual(nMatches, 93)
    #print 'nhits:',nHits
    self.assertEqual(nHits, 67)
 def testSaveState(self):
     fName = os.path.join(RDConfig.RDCodeDir, 'ML/Descriptors/test_data',
                          'molcalc.dsc')
     with open(fName, 'r') as inTF:
         buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
         inTF.close()
     with io.BytesIO(buf) as inF:
         calc = cPickle.load(inF)
     self.assertEqual(calc.GetDescriptorNames(), tuple(self.descs))
     self.assertEqual(calc.GetDescriptorVersions(), tuple(self.vers))
     self._testVals(calc, self.testD)
Esempio n. 39
0
 def setUp(self):
     self.dataset = dict()
     self.dataset_inchi = dict()
     inf = gzip.open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data',
                                  'pubchem-hard-set.sdf.gz'),'r')
     self.dataset['problematic'] = ForwardSDMolSupplier(inf,sanitize=False,removeHs=False)
     with open(os.path.join(RDConfig.RDCodeDir, 'Chem/test_data',
                            'pubchem-hard-set.inchi'),'rb') as inF:
         self.dataset_inchi['problematic'] = load(inF)
     # disable logging
     DisableLog('rdApp.warning')
Esempio n. 40
0
def readFragmentScores(name='fpscores'):
    import gzip
    global _fscores
    fname = '{}.pkl.gz'.format(name)
    # download(_get_dgl_url(os.path.join('dataset', fname)), path=fname)
    _fscores = cPickle.load(gzip.open(fname))
    outDict = {}
    for i in _fscores:
        for j in range(1, len(i)):
            outDict[i[j]] = float(i[0])
    _fscores = outDict
Esempio n. 41
0
def GetComposites(details):
  res = []
  if details.persistTblName and details.inNote:
    conn = DbConnect(details.dbName,details.persistTblName)
    mdls = conn.GetData(fields='MODEL',where="where note='%s'"%(details.inNote))
    for row in mdls:
      rawD = row[0]
      res.append(cPickle.loads(str(rawD)))
  elif details.composFileName:
    res.append(cPickle.load(open(details.composFileName,'rb')))
  return res
Esempio n. 42
0
 def testGeneralPickle(self):
   " testing DataSet pickling"
   self.setUpGeneralLoad()
   DataUtils.WritePickledData(RDConfig.RDCodeDir+'/ML/Data/test_data/testgeneral.dat.pkl',self.d)
   with open(RDConfig.RDCodeDir+'/ML/Data/test_data/testgeneral.dat.pkl','rb') as f: 
     vNames = cPickle.load(f)
     qBounds = cPickle.load(f)
     ptNames = cPickle.load(f)
     examples = cPickle.load(f)
   d = MLData.MLDataSet(examples,varNames=vNames,qBounds=qBounds,ptNames=ptNames)
   assert self.d.GetNPts()==d.GetNPts(),'nPts wrong'
   assert self.d.GetNVars()==d.GetNVars(),'nVars wrong'
   assert self.d.GetNResults()==d.GetNResults(),'nResults wrong'
   assert self.d.GetVarNames()==d.GetVarNames(),'varNames wrong'
   assert self.d.GetPtNames()==d.GetPtNames(),'ptNames wrong'
   assert self.d.GetNPossibleVals()==d.GetNPossibleVals(),'nPossible Wrong'
   assert self.d.GetQuantBounds()==d.GetQuantBounds(),'quantBounds Wrong'
   assert self.d.GetResults()==d.GetResults(),'GetResults wrong'
   assert self.d.GetAllData()[1]==d.GetAllData()[1],'GetAllData wrong'
   assert self.d.GetInputData()[3]==d.GetInputData()[3],'GetInputData wrong'
   assert self.d.GetNamedData()[2]==d.GetNamedData()[2],'GetNamedData wrong'
Esempio n. 43
0
 def test4UnusedVars(self):
     " testing unused variables "
     self._setupTree1a()
     with open(self.qTree1Name, 'r') as inTFile:
         buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
         inTFile.close()
     with io.BytesIO(buf) as inFile:
         t2 = cPickle.load(inFile)
     assert self.t1 == t2, 'Incorrect tree generated.'
     for i in xrange(len(self.examples1)):
         assert self.t1.ClassifyExample(self.examples1[i])==self.examples1[i][-1],\
                'examples1[%d] misclassified'%i
Esempio n. 44
0
def readFragmentScores(name='fpscores'):
    import gzip
    global _fscores
    # generate the full path filename:
    if name == "fpscores":
        name = op.join(op.dirname(__file__), name)
    _fscores = cPickle.load(gzip.open('%s.pkl.gz' % name))
    outDict = {}
    for i in _fscores:
        for j in range(1, len(i)):
            outDict[i[j]] = float(i[0])
    _fscores = outDict
Esempio n. 45
0
    def test1Tree(self):
        # " testing tree1 "
        self._setupTree1()
        with open(self.qTree1Name, 'r') as inTFile:
            buf = inTFile.read().replace('\r\n', '\n').encode('utf-8')
            inTFile.close()
        with io.BytesIO(buf) as inFile:
            t2 = cPickle.load(inFile)
        assert self.t1 == t2, 'Incorrect tree generated. '

        self.assertIn('Var: 2 []', str(self.t1))
        self.assertEqual(self.t1.GetQuantBounds(), [])
Esempio n. 46
0
    def testPickle(self):
        ffeat = ChemicalFeatures.FreeChemicalFeature(
            "HBondDonor", "HBondDonor1", geom.Point3D(1.0, 2.0, 3.0), 123)
        pkl = cPickle.dumps(ffeat)
        ffeat2 = cPickle.loads(pkl, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Check that the old pickled versions have not been broken
        inTF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/feat.pkl'), 'r')
        buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
        inTF.close()
        inF = io.BytesIO(buf)
        ffeat2 = cPickle.load(inF, encoding='bytes')
        # this version (1.0) does not have an id in the byte stream
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))

        # Test the new version also has the id and works as expected

        # uncomment the following to generate (overrwrite) new version of pickled
        # data file
        #cPickle.dump(ffeat,file(os.path.join(RDConfig.RDBaseDir, 'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),'wb+'))
        inTF = open(
            os.path.join(RDConfig.RDBaseDir,
                         'Code/ChemicalFeatures/Wrap/testData/featv2.pkl'),
            'r')
        buf = inTF.read().replace('\r\n', '\n').encode('utf-8')
        inTF.close()
        inF = io.BytesIO(buf)
        ffeat2 = cPickle.load(inF, encoding='bytes')
        self.assertTrue(ffeat2.GetId() == ffeat.GetId())
        self.assertTrue(ffeat2.GetFamily() == ffeat.GetFamily())
        self.assertTrue(ffeat2.GetType() == ffeat.GetType())
        self.assertTrue(ptFeq(ffeat2.GetPos(), ffeat.GetPos()))
Esempio n. 47
0
 def test4Issue237(self):
     with open(
             os.path.join(RDConfig.RDBaseDir, 'Code', 'ML', 'InfoTheory',
                          'Wrap', 'testData', 'Issue237.pkl'), 'rb') as inF:
         examples, avail, bias, nB, nPoss = cPickle.load(inF,
                                                         encoding='bytes')
     ranker = rdit.InfoBitRanker(nB, nPoss, rdit.InfoType.BIASENTROPY)
     ranker.SetMaskBits(avail)
     for ex in examples:
         ranker.AccumulateVotes(ex[1], ex[-1])
     # this dumps core on linux if the bug isn't fixed:
     v = ranker.GetTopN(1)
     self.assertTrue(int(v[0][0]) == 12)
Esempio n. 48
0
def readFragmentScores(name='fpscores'):
    import gzip
    global _fscores
    # generate the full path filename:
    if name == "fpscores":
        name = op.join(op.dirname(__file__), name)
        with open('saved/s.pkl', 'rb') as pickle_file:
            _fscores = cPickle.load(pickle_file)

    outDict = {}
    for i in _fscores:
        for j in range(1, len(i)):
            outDict[i[j]] = float(i[0])
    _fscores = outDict
Esempio n. 49
0
def ReadFragScores(name='fpscores'):
    print "reading SAS fragment scores... ",
    import gzip
    global _fscores
    #generate the full path filename
    if name == "fpscores":
        name = os.path.join(os.path.dirname(__file__), name)

    _fscores = cPickle.load(gzip.open('%s.pkl.gz' % name))
    outDict = {}
    for i in _fscores:
        for j in range(1, len(i)):
            outDict[i[j]] = float(i[0])
    _fscores = outDict
Esempio n. 50
0
 def testPerm2(self):
     """ tests the descriptor remapping stuff in a packager """
     with open(os.path.join(self.dataDir, 'Jan9_build3_pkg.pkl'),
               'rb') as pkgF:
         pkg = cPickle.load(pkgF)
     calc = pkg.GetCalculator()
     names = calc.GetDescriptorNames()
     DataUtils.InitRandomNumbers((23, 42))
     perm = list(names)
     random.shuffle(perm, random=random.random)
     calc.simpleList = perm
     calc.descriptorNames = perm
     pkg.Init()
     self._verify(pkg, self.testD)
Esempio n. 51
0
 def setUp(self):
     #print '\n%s: '%self.shortDescription(),
     with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/ferro.pkl',
               'rb') as pklF:
         self.examples = cPickle.load(pklF)
     self.varNames = [
         'composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc',
         'atvol', 'isferro'
     ]
     self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73],
                     [11.81, 14.52], []]
     self.nPoss = [0, 3, 2, 2, 2, 3, 3, 2]
     self.attrs = range(1, len(self.varNames) - 1)
     from rdkit.ML.Data import DataUtils
     DataUtils.InitRandomNumbers((23, 43))
Esempio n. 52
0
 def _writeDetailFile(self, inF, outF):
     while 1:
         try:
             smi, refContribs = cPickle.load(inF)
         except EOFError:
             break
         else:
             mol = Chem.MolFromSmiles(smi)
             if mol:
                 mol = Chem.AddHs(mol, 1)
                 smi2 = Chem.MolToSmiles(mol)
                 contribs = Crippen._GetAtomContribs(mol)
                 cPickle.dump((smi, contribs), outF)
             else:
                 print('Problems with SMILES:', smi)
Esempio n. 53
0
 def testPkl(self):
     # Test pickling
     v1 = self.klass(10)
     v1[1] = 1
     v1[2] = 1
     v1[3] = 1
     pklName = 'foo.pkl'
     outF = open(pklName, 'wb+')
     cPickle.dump(v1, outF)
     outF.close()
     inF = open(pklName, 'rb')
     v2 = cPickle.load(inF)
     inF.close()
     os.unlink(pklName)
     assert tuple(v1.GetOnBits()) == tuple(v2.GetOnBits()), 'pkl failed'
Esempio n. 54
0
    def _doDetailFile(self, inF, nFailsAllowed=1):
        done = 0
        verbose = 0
        nFails = 0
        while not done:
            if verbose: print('---------------')
            try:
                smi, refContribs = cPickle.load(inF)
            except EOFError:
                done = 1
            else:
                refContribs = [x[0] for x in refContribs]
                refOrder = numpy.argsort(refContribs)
                try:
                    mol = Chem.MolFromSmiles(smi)
                except:
                    import traceback
                    traceback.print_exc()
                    mol = None
                if mol:
                    mol = Chem.AddHs(mol, 1)
                    smi2 = Chem.MolToSmiles(mol)
                    contribs = Crippen._GetAtomContribs(mol)
                    contribs = [x[0] for x in contribs]
                    #
                    #  we're comparing to the old results using the oelib code.
                    #  Since we have some disagreements with them as to what is
                    #  aromatic and what isn't, we may have different numbers of
                    #  Hs. For the sake of comparison, just pop those off our
                    #  new results.
                    #
                    while len(contribs) > len(refContribs):
                        del contribs[-1]
                    order = numpy.argsort(contribs)

                    for i in range(len(refContribs)):
                        refL = refContribs[refOrder[i]]
                        l = contribs[order[i]]
                        if not feq(refL, l):
                            print('%s (%s): %d %6.5f != %6.5f' %
                                  (smi, smi2, order[i], refL, l))
                            Crippen._GetAtomContribs(mol, force=1)
                            print('-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*')
                            nFails += 1
                            break
                else:
                    print('Problems with SMILES:', smi)
        self.assertTrue(nFails < nFailsAllowed)
Esempio n. 55
0
 def testPerm2(self):
   """ tests the descriptor remapping stuff in a packager """
   with open(os.path.join(self.dataDir,'Jan9_build3_pkg.pkl'),'r') as pkgTF:
     buf = pkgTF.read().replace('\r\n', '\n').encode('utf-8')
     pkgTF.close()
   with io.BytesIO(buf) as pkgF:
     pkg = cPickle.load(pkgF)
   calc = pkg.GetCalculator()
   names = calc.GetDescriptorNames()
   DataUtils.InitRandomNumbers((23,42))
   perm = list(names)
   random.shuffle(perm,random=random.random)
   calc.simpleList = perm
   calc.descriptorNames = perm
   pkg.Init()
   self._verify(pkg,self.testD)
Esempio n. 56
0
    def testTreeGrow(self):
        # testing tree-based composite
        with open(
                RDConfig.RDCodeDir +
                '/ML/Composite/test_data/composite_base.pkl', 'r') as pklTF:
            buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            self.refCompos = cPickle.load(pklF)

        composite = Composite.Composite()
        composite._varNames = self.varNames
        composite.SetQuantBounds(self.qBounds, self.nPoss)
        from rdkit.ML.DecTree import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        pruner = None
        composite.Grow(self.examples,
                       self.attrs, [],
                       buildDriver=driver,
                       pruner=pruner,
                       nTries=100,
                       silent=1)
        composite.AverageErrors()
        composite.SortModels(sortOnError=False)
        self.assertEqual(composite.countList, sorted(composite.countList))
        self.assertNotEqual(composite.errList, sorted(composite.errList))
        composite.SortModels()
        self.assertNotEqual(composite.countList, sorted(composite.countList))
        self.assertEqual(composite.errList, sorted(composite.errList))

        # with open(RDConfig.RDCodeDir+'/ML/Composite/test_data/composite_base.pkl','wb') as pklF:
        #   cPickle.dump(composite,pklF)

        self.treeComposite = composite
        self.assertEqual(len(composite), len(self.refCompos))
        for i in range(len(composite)):
            t1, c1, e1 = composite[i]
            t2, c2, e2 = self.refCompos[i]
            self.assertEqual(e1, e2)
            # we used to check for equality here, but since there are redundant errors,
            #  that's non-trivial.
            # assert t1 == t2, 'tree mismatch'
            # assert c1 == c2, 'count mismatch'
        s = str(composite)
        self.assertIn('Composite', s)
        self.assertIn('Model', s)
        self.assertIn('error', s)
Esempio n. 57
0
 def setUp(self):
     with open(RDConfig.RDCodeDir + '/ML/Composite/test_data/ferro.pkl',
               'r') as pklTF:
         buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
         pklTF.close()
     with io.BytesIO(buf) as pklF:
         self.examples = cPickle.load(pklF)
     self.varNames = [
         'composition', 'max_atomic', 'has3d', 'has4d', 'has5d', 'elconc',
         'atvol', 'isferro'
     ]
     self.qBounds = [[], [1.89, 3.53], [], [], [], [0.55, 0.73],
                     [11.81, 14.52], []]
     self.nPoss = [0, 3, 2, 2, 2, 3, 3, 2]
     self.attrs = list(range(1, len(self.varNames) - 1))
     from rdkit.ML.Data import DataUtils
     DataUtils.InitRandomNumbers((23, 43))
Esempio n. 58
0
    def test2(self):
        """ depth limit """
        self.details.tableName = 'ferro_quant'
        refComposName = 'ferromag_quant_10_3.pkl'

        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF:
            buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            refCompos = pickle.load(pklF)

        # first make sure the data are intact
        self._init(refCompos)
        self.details.limitDepth = 3
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        self.compare(compos, refCompos)
Esempio n. 59
0
    def test7(self):
        """ Test composite of naive bayes"""
        self.details.tableName = 'ferro_noquant'
        refComposName = 'ferromag_NaiveBayes.pkl'
        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile:
            buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8')
            pklTFile.close()
        with io.BytesIO(buf) as pklFile:
            refCompos = pickle.load(pklFile)
        self._init(refCompos, copyBounds=1)
        self.details.useTrees = 0
        self.details.useNaiveBayes = 1
        self.details.mEstimateVal = 20.0
        self.details.qBounds = [0] + [2] * 6 + [0]
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        self.compare(compos, refCompos)
Esempio n. 60
0
 def test2SearchDownsample(self):
     inF = gzip.open(os.path.join(self.dataDir, 'cdk2-syn-clip100.pkl.gz'),
                     'rb')
     nDone = 0
     nHits = 0
     while 1:
         try:
             tpl = cPickle.load(inF, encoding='latin1')
             if PY3:
                 tpl = tpl[0], tpl[1].encode('latin1'), tpl[2]
         except Exception:
             break
         if self._matchMol(tpl, self.pcophore, self.featFactory, 1):
             nHits += 1
         nDone += 1
     self.assertEqual(nDone, 100)
     # print 'nHits:',nHits
     self.assertEqual(nHits, 47)