Ejemplo n.º 1
0
 def testQuantLoad(self):
   " testing QuantDataSet load"
   ok = 1
   try:
     self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir+'/ML/Data/test_data/test.qdat')
   except:
     ok = 0
   assert ok,'BuildQuantDataSet failed'
Ejemplo n.º 2
0
def RunIt(details, progressCallback=None, saveIt=1, setDescNames=0):
    """ does the actual work of building a composite model

    **Arguments**

      - details:  a _CompositeRun.CompositeRun_ object containing details
        (options, parameters, etc.) about the run

      - progressCallback: (optional) a function which is called with a single
        argument (the number of models built so far) after each model is built.

      - saveIt: (optional) if this is nonzero, the resulting model will be pickled
        and dumped to the filename specified in _details.outName_

      - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method
        will be called using the results of the data set's _GetVarNames()_ method;
        it is assumed that the details object has a _descNames attribute which
        is passed to the composites _SetDescriptorNames()_ method.  Otherwise
        (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_.

    **Returns**

      the composite model constructed


  """
    details.rundate = time.asctime()

    fName = details.tableName.strip()
    if details.outName == '':
        details.outName = fName + '.pkl'
    if not details.dbName:
        if details.qBounds != []:
            data = DataUtils.TextFileToData(fName)
        else:
            data = DataUtils.BuildQuantDataSet(fName)
    elif details.useSigTrees or details.useSigBayes:
        details.tableName = fName
        data = details.GetDataSet(pickleCol=0,
                                  pickleClass=DataStructs.ExplicitBitVect)
    elif details.qBounds != [] or not details.useTrees:
        details.tableName = fName
        data = details.GetDataSet()
    else:
        data = DataUtils.DBToQuantData(
            details.dbName,  # Function no longer defined
            fName,
            quantName=details.qTableName,
            user=details.dbUser,
            password=details.dbPassword)

    composite = RunOnData(details,
                          data,
                          progressCallback=progressCallback,
                          saveIt=saveIt,
                          setDescNames=setDescNames)
    return composite
Ejemplo n.º 3
0
 def setUpQuantLoad(self):
   self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir+'/ML/Data/test_data/test.qdat')
Ejemplo n.º 4
0
def GrowIt(details,composite,progressCallback=None,
           saveIt=1,setDescNames=0,data=None):
  """ does the actual work of building a composite model

    **Arguments**

      - details:  a _CompositeRun.CompositeRun_ object containing details
        (options, parameters, etc.) about the run

      - composite: the composite model to grow
      
      - progressCallback: (optional) a function which is called with a single
        argument (the number of models built so far) after each model is built.

      - saveIt: (optional) if this is nonzero, the resulting model will be pickled
        and dumped to the filename specified in _details.outName_

      - setDescNames: (optional) if nonzero, the composite's _SetInputOrder()_ method
        will be called using the results of the data set's _GetVarNames()_ method;
        it is assumed that the details object has a _descNames attribute which
        is passed to the composites _SetDescriptorNames()_ method.  Otherwise
        (the default), _SetDescriptorNames()_ gets the results of _GetVarNames()_.

      - data: (optional) the data set to be used.  If this is not provided, the
        data set described in details will be used.
        
    **Returns**

      the enlarged composite model


  """
  details.rundate = time.asctime()

  if data is None:
    fName = details.tableName.strip()
    if details.outName == '':
      details.outName = fName + '.pkl'
    if details.dbName == '':
      data = DataUtils.BuildQuantDataSet(fName)
    elif details.qBounds != []:
      details.tableName = fName
      data = details.GetDataSet()
    else:
      data = DataUtils.DBToQuantData(details.dbName,fName,quantName=details.qTableName,
                                     user=details.dbUser,password=details.dbPassword)

  nExamples = data.GetNPts()
  seed = composite._randomSeed
  DataUtils.InitRandomNumbers(seed)
  testExamples = [] 
  if details.shuffleActivities == 1:
    DataUtils.RandomizeActivities(data,shuffle=1,runDetails=details)
  elif details.randomActivities == 1:
    DataUtils.RandomizeActivities(data,shuffle=0,runDetails=details)

  namedExamples = data.GetNamedData()
  trainExamples = namedExamples
  nExamples = len(trainExamples)
  message('Training with %d examples'%(nExamples))
  message('\t%d descriptors'%(len(trainExamples[0])-2))
  nVars = data.GetNVars()
  nPossibleVals = composite.nPossibleVals
  attrs = range(1,nVars+1)

  if details.useTrees:
    from rdkit.ML.DecTree import CrossValidate,PruneTree
    if details.qBounds != []:
      from rdkit.ML.DecTree import BuildQuantTree
      builder = BuildQuantTree.QuantTreeBoot
    else:
      from rdkit.ML.DecTree import ID3
      builder = ID3.ID3Boot
    driver = CrossValidate.CrossValidationDriver
    pruner = PruneTree.PruneTree

    if setDescNames:
      composite.SetInputOrder(data.GetVarNames())
    composite.Grow(trainExamples,attrs,[0]+nPossibleVals,
                   buildDriver=driver,
                   pruner=pruner,
                   nTries=details.nModels,pruneIt=details.pruneIt,
                   lessGreedy=details.lessGreedy,needsQuantization=0,
                   treeBuilder=builder,nQuantBounds=details.qBounds,
                   startAt=details.startAt,
                   maxDepth=details.limitDepth,
                   progressCallback=progressCallback,
                   silent=not _verbose)


  else:
    from rdkit.ML.Neural import CrossValidate
    driver = CrossValidate.CrossValidationDriver
    composite.Grow(trainExamples,attrs,[0]+nPossibleVals,nTries=details.nModels,
                   buildDriver=driver,needsQuantization=0)
    
  composite.AverageErrors()
  composite.SortModels()
  modelList,counts,avgErrs = composite.GetAllData()
  counts = numpy.array(counts)
  avgErrs = numpy.array(avgErrs)
  composite._varNames = data.GetVarNames()

  for i in range(len(modelList)):
    modelList[i].NameModel(composite._varNames)

  # do final statistics
  weightedErrs = counts*avgErrs
  averageErr = sum(weightedErrs)/sum(counts)
  devs = (avgErrs - averageErr)
  devs = devs * counts
  devs = numpy.sqrt(devs*devs)
  avgDev = sum(devs)/sum(counts)
  if _verbose:
    message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f'%(100.*averageErr,100.*avgDev))
  
  if details.bayesModel:
    composite.Train(trainExamples,verbose=0)

  badExamples = []
  if not details.detailedRes:
    if _verbose:
      message('Testing all examples')
    wrong = BuildComposite.testall(composite,namedExamples,badExamples)
    if _verbose:
      message('%d examples (%% %5.2f) were misclassified'%(len(wrong),100.*float(len(wrong))/float(len(namedExamples))))
    _runDetails.overall_error = float(len(wrong))/len(namedExamples)

  if details.detailedRes:
    if _verbose:
      message('\nEntire data set:')
    resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()),data,composite,
                                             nPossibleVals[-1],details.threshold)
    nGood,nBad,nSkip,avgGood,avgBad,avgSkip,voteTab = resTup
    nPts = len(namedExamples)
    nClass = nGood+nBad
    _runDetails.overall_error = float(nBad) / nClass
    _runDetails.overall_correct_conf = avgGood
    _runDetails.overall_incorrect_conf = avgBad
    _runDetails.overall_result_matrix = repr(voteTab)
    nRej = nClass-nPts
    if nRej > 0:
      _runDetails.overall_fraction_dropped = float(nRej)/nPts
      
  return composite
Ejemplo n.º 5
0
            outFileName = val
        elif arg == '-x':
            xScale = int(val)
        elif arg == '-y':
            yScale = int(val)
        elif arg == '-d':
            dbName = val
        elif arg == '-h':
            Usage()
        else:
            Usage()
    modelFile = open(extra[0], 'rb')
    model = cPickle.load(modelFile)

    fName = extra[1]
    if dbName == '':
        data = DataUtils.BuildQuantDataSet(fName)
    else:
        data = DataUtils.DBToQuantData(dbName,
                                       fName)  # Function no longer defined

    dataSet = data.GetNamedData()

    img = VoteAndBuildImage(model,
                            dataSet,
                            badOnly=badOnly,
                            sortTrueVals=sortTrueVals,
                            xScale=xScale,
                            yScale=yScale)
    img.save(outFileName)
Ejemplo n.º 6
0
 def testQuantLoad(self):
     # " testing QuantDataSet load"
     self.d = DataUtils.BuildQuantDataSet(RDConfig.RDCodeDir +
                                          '/ML/Data/test_data/test.qdat')