Ejemplo n.º 1
0
    def setUp(self):
        self.baseDir = os.path.join(RDConfig.RDCodeDir, 'ML', 'test_data')
        self.dbName = RDConfig.RDTestDatabase

        self.details = BuildComposite.SetDefaults()
        self.details.dbName = self.dbName
        self.details.dbUser = RDConfig.defaultDBUser
        self.details.dbPassword = RDConfig.defaultDBPassword
Ejemplo n.º 2
0
  def test1(self):
    """ basics """
    self.details.tableName = 'ferro_quant'
    refComposName = 'ferromag_quant_10.pkl'

    refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName),
                                 'rb'))

    # first make sure the data are intact
    self._init(refCompos)
    compos = BuildComposite.RunIt(self.details,saveIt=0)

    self.compare(compos,refCompos)
Ejemplo n.º 3
0
 def test7(self):
   """ Test composite of naive bayes"""
   self.details.tableName = 'ferro_noquant'
   refComposName = 'ferromag_NaiveBayes.pkl'
   pklFile = open(os.path.join(self.baseDir,refComposName), 'rb')
   refCompos = pickle.load(pklFile)
   self._init(refCompos,copyBounds=1)
   self.details.useTrees = 0
   self.details.useNaiveBayes = 1
   self.details.mEstimateVal = 20.0
   self.details.qBounds = [0] + [2]*6 + [0]
   compos = BuildComposite.RunIt(self.details, saveIt= 0)
   self.compare(compos,refCompos)
Ejemplo n.º 4
0
  def test6(self):
    """ auto bounds with a real valued activity"""
    self.details.tableName = 'ferro_noquant_realact'
    refComposName = 'ferromag_auto_10_3.pkl'

    refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName),
                                 'rb'))

    # first make sure the data are intact
    self._init(refCompos,copyBounds=1)
    self.details.limitDepth = 3
    self.details.nModels = 10
    self.details.activityBounds=[0.5]
    compos = BuildComposite.RunIt(self.details,saveIt=0)
    self.compare(compos,refCompos)
Ejemplo n.º 5
0
  def test4(self):
    """ more trees """
    self.details.tableName = 'ferro_quant'
    refComposName = 'ferromag_quant_50_3.pkl'

    refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName),
                                 'rb'))

    # first make sure the data are intact
    self._init(refCompos)
    self.details.limitDepth = 3
    self.details.nModels = 50
    compos = BuildComposite.RunIt(self.details,saveIt=0)

    self.compare(compos,refCompos)
Ejemplo n.º 6
0
  def test3(self):
    """ depth limit + less greedy """
    self.details.tableName = 'ferro_quant'
    refComposName = 'ferromag_quant_10_3_lessgreedy.pkl'

    refCompos = pickle.load(open(os.path.join(self.baseDir,refComposName),
                                 'rb'))

    # first make sure the data are intact
    self._init(refCompos)
    self.details.limitDepth = 3
    self.details.lessGreedy = 1
    compos = BuildComposite.RunIt(self.details,saveIt=0)

    self.compare(compos,refCompos)
Ejemplo n.º 7
0
    def test7(self):
        """ Test composite of naive bayes"""
        self.details.tableName = 'ferro_noquant'
        refComposName = 'ferromag_NaiveBayes.pkl'
        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTFile:
            buf = pklTFile.read().replace('\r\n', '\n').encode('utf-8')
            pklTFile.close()
        with io.BytesIO(buf) as pklFile:
            refCompos = pickle.load(pklFile)
        self._init(refCompos, copyBounds=1)
        self.details.useTrees = 0
        self.details.useNaiveBayes = 1
        self.details.mEstimateVal = 20.0
        self.details.qBounds = [0] + [2] * 6 + [0]
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        self.compare(compos, refCompos)
Ejemplo n.º 8
0
    def test2(self):
        """ depth limit """
        self.details.tableName = 'ferro_quant'
        refComposName = 'ferromag_quant_10_3.pkl'

        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF:
            buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            refCompos = pickle.load(pklF)

        # first make sure the data are intact
        self._init(refCompos)
        self.details.limitDepth = 3
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        self.compare(compos, refCompos)
Ejemplo n.º 9
0
    def test6(self):
        """ auto bounds with a real valued activity"""
        self.details.tableName = 'ferro_noquant_realact'
        refComposName = 'ferromag_auto_10_3.pkl'

        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF:
            buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            refCompos = pickle.load(pklF)

        # first make sure the data are intact
        self._init(refCompos, copyBounds=1)
        self.details.limitDepth = 3
        self.details.nModels = 10
        self.details.activityBounds = [0.5]
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        self.compare(compos, refCompos)
Ejemplo n.º 10
0
    def test1_basics(self):
        # """ basics """
        self.details.tableName = 'ferro_quant'
        refComposName = 'ferromag_quant_10.pkl'

        with open(os.path.join(self.baseDir, refComposName), 'r') as pklTF:
            buf = pklTF.read().replace('\r\n', '\n').encode('utf-8')
            pklTF.close()
        with io.BytesIO(buf) as pklF:
            refCompos = pickle.load(pklF)

        # first make sure the data are intact
        self._init(refCompos)
        compos = BuildComposite.RunIt(self.details, saveIt=0)

        # pickle.dump(compos,open(os.path.join(self.baseDir,refComposName), 'wb'))
        # with open(os.path.join(self.baseDir,refComposName), 'rb') as pklF:
        #   refCompos = pickle.load(pklF)

        self.compare(compos, refCompos)
Ejemplo n.º 11
0
def GrowIt(details,composite,progressCallback=None,
           saveIt=1,setDescNames=0,data=None):
  """ does the actual work of building a composite model

    **Arguments**

      - details:  a _CompositeRun.CompositeRun_ object containing details
        (options, parameters, etc.) about the run

      - composite: the composite model to grow
      
      - progressCallback: (optional) a function which is called with a single
        argument (the number of models built so far) after each model is built.

      - saveIt: (optional) if this is nonzero, the resulting model will be pickled
        and dumped to the filename specified in _details.outName_

      - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method
        will be called using the results of the data set's _GetVarNames()_ method;
        it is assumed that the details object has a _descNames attribute which
        is passed to the composites _SetDescriptorNames()_ method.  Otherwise
        (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_.

      - data: (optional) the data set to be used.  If this is not provided, the
        data set described in details will be used.
        
    **Returns**

      the enlarged composite model


  """
  details.rundate = time.asctime()

  if data is None:
    fName = details.tableName.strip()
    if details.outName == '':
      details.outName = fName + '.pkl'
    if details.dbName == '':
      data = DataUtils.BuildQuantDataSet(fName)
    elif details.qBounds != []:
      details.tableName = fName
      data = details.GetDataSet()
    else:
      data = DataUtils.DBToQuantData(details.dbName,fName,quantName=details.qTableName,
                                     user=details.dbUser,password=details.dbPassword)

  nExamples = data.GetNPts()
  seed = composite._randomSeed
  DataUtils.InitRandomNumbers(seed)
  testExamples = [] 
  if details.shuffleActivities == 1:
    DataUtils.RandomizeActivities(data,shuffle=1,runDetails=details)
  elif details.randomActivities == 1:
    DataUtils.RandomizeActivities(data,shuffle=0,runDetails=details)

  namedExamples = data.GetNamedData()
  trainExamples = namedExamples
  nExamples = len(trainExamples)
  message('Training with %d examples'%(nExamples))
  message('\t%d descriptors'%(len(trainExamples[0])-2))
  nVars = data.GetNVars()
  nPossibleVals = composite.nPossibleVals
  attrs = range(1,nVars+1)

  if details.useTrees:
    from rdkit.ML.DecTree import CrossValidate,PruneTree
    if details.qBounds != []:
      from rdkit.ML.DecTree import BuildQuantTree
      builder = BuildQuantTree.QuantTreeBoot
    else:
      from rdkit.ML.DecTree import ID3
      builder = ID3.ID3Boot
    driver = CrossValidate.CrossValidationDriver
    pruner = PruneTree.PruneTree

    if setDescNames:
      composite.SetInputOrder(data.GetVarNames())
    composite.Grow(trainExamples,attrs,[0]+nPossibleVals,
                   buildDriver=driver,
                   pruner=pruner,
                   nTries=details.nModels,pruneIt=details.pruneIt,
                   lessGreedy=details.lessGreedy,needsQuantization=0,
                   treeBuilder=builder,nQuantBounds=details.qBounds,
                   startAt=details.startAt,
                   maxDepth=details.limitDepth,
                   progressCallback=progressCallback,
                   silent=not _verbose)


  else:
    from rdkit.ML.Neural import CrossValidate
    driver = CrossValidate.CrossValidationDriver
    composite.Grow(trainExamples,attrs,[0]+nPossibleVals,nTries=details.nModels,
                   buildDriver=driver,needsQuantization=0)
    
  composite.AverageErrors()
  composite.SortModels()
  modelList,counts,avgErrs = composite.GetAllData()
  counts = numpy.array(counts)
  avgErrs = numpy.array(avgErrs)
  composite._varNames = data.GetVarNames()

  for i in range(len(modelList)):
    modelList[i].NameModel(composite._varNames)

  # do final statistics
  weightedErrs = counts*avgErrs
  averageErr = sum(weightedErrs)/sum(counts)
  devs = (avgErrs - averageErr)
  devs = devs * counts
  devs = numpy.sqrt(devs*devs)
  avgDev = sum(devs)/sum(counts)
  if _verbose:
    message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f'%(100.*averageErr,100.*avgDev))
  
  if details.bayesModel:
    composite.Train(trainExamples,verbose=0)

  badExamples = []
  if not details.detailedRes:
    if _verbose:
      message('Testing all examples')
    wrong = BuildComposite.testall(composite,namedExamples,badExamples)
    if _verbose:
      message('%d examples (%% %5.2f) were misclassified'%(len(wrong),100.*float(len(wrong))/float(len(namedExamples))))
    _runDetails.overall_error = float(len(wrong))/len(namedExamples)

  if details.detailedRes:
    if _verbose:
      message('\nEntire data set:')
    resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()),data,composite,
                                             nPossibleVals[-1],details.threshold)
    nGood,nBad,nSkip,avgGood,avgBad,avgSkip,voteTab = resTup
    nPts = len(namedExamples)
    nClass = nGood+nBad
    _runDetails.overall_error = float(nBad) / nClass
    _runDetails.overall_correct_conf = avgGood
    _runDetails.overall_incorrect_conf = avgBad
    _runDetails.overall_result_matrix = repr(voteTab)
    nRej = nClass-nPts
    if nRej > 0:
      _runDetails.overall_fraction_dropped = float(nRej)/nPts
      
  return composite