Esempio n. 1
0
 def test5(self):
     """ indicesToUse """
     probes = [
         (.5, 4, 2),
         (.7, 3, 3),
         (.75, 3, 3),
         (.333, 6, 0),
         (.25, 4, 2),
     ]
     nPts = len(self.d1)
     for frac, nKeep, nRej in probes:
         DataUtils.InitRandomNumbers((23, 42))
         k, r = DataUtils.FilterData(self.d1,
                                     1,
                                     frac,
                                     indicesToUse=range(nPts))
         assert len(k) == nKeep, 'bad nKeep (%d != %d)' % (len(k), nKeep)
         assert len(r) == nRej, 'bad nRej (%d != %d)' % (len(r), nRej)
         keep, rej = k, r
         # make sure the examples are actually correct
         DataUtils.InitRandomNumbers((23, 42))
         tgtKeep, tgtRej = DataUtils.FilterData(self.d1, 1, frac)
         assert keep == tgtKeep, '%.2f: %s!=%s' % (frac, str(keep),
                                                   str(tgtKeep))
         assert rej == tgtRej, '%.2f: %s!=%s' % (frac, str(rej),
                                                 str(tgtRej))
Esempio n. 2
0
 def test4_indicesOnly_indicesToUse(self):
     # """ indicesOnly with indicesToUse """
     probes = [
         (.5, 4, 2),
         (.7, 3, 3),
         (.75, 3, 3),
         (.333, 6, 0),
         (.25, 4, 2),
     ]
     nPts = len(self.d1)
     for frac, nKeep, nRej in probes:
         DataUtils.InitRandomNumbers((23, 42))
         k, r = DataUtils.FilterData(self.d1,
                                     1,
                                     frac,
                                     indicesToUse=range(nPts),
                                     indicesOnly=1)
         assert len(k) == nKeep, 'bad nKeep (%d != %d)' % (len(k), nKeep)
         assert len(r) == nRej, 'bad nRej (%d != %d)' % (len(r), nRej)
         # make sure the indices are actually correct
         keep = [self.d1[x] for x in k]
         rej = [self.d1[x] for x in r]
         DataUtils.InitRandomNumbers((23, 42))
         tgtKeep, tgtRej = DataUtils.FilterData(self.d1, 1, frac)
         assert keep == tgtKeep, '%.2f: %s!=%s' % (frac, str(keep),
                                                   str(tgtKeep))
         assert rej == tgtRej, '%.2f: %s!=%s' % (frac, str(rej),
                                                 str(tgtRej))
Esempio n. 3
0
def _balanced_parallel_build_trees(n_trees, forest, X, y, sample_weight,
                                   sample_mask, X_argsorted, seed, verbose):
    """Private function used to build a batch of trees within a job"""
    from sklearn.utils import check_random_state
    from sklearn.utils.fixes import bincount
    import random
    MAX_INT = numpy.iinfo(numpy.int32).max
    random_state = check_random_state(seed)

    trees = []
    for i in xrange(n_trees):
        if verbose > 1:
            print("building tree %d of %d" % (i + 1, n_trees))
        seed = random_state.randint(MAX_INT)

        tree = forest._make_estimator(append=False)
        tree.set_params(compute_importances=forest.compute_importances)
        tree.set_params(random_state=check_random_state(seed))

        if forest.bootstrap:
            n_samples = X.shape[0]
            if sample_weight is None:
                curr_sample_weight = numpy.ones((n_samples, ),
                                                dtype=numpy.float64)
            else:
                curr_sample_weight = sample_weight.copy()

            ty = list(enumerate(y))
            indices = DataUtils.FilterData(ty,
                                           val=1,
                                           frac=0.5,
                                           col=1,
                                           indicesToUse=0,
                                           indicesOnly=1)[0]
            indices2 = random_state.randint(0, len(indices), len(indices))
            indices = [indices[j] for j in indices2]
            sample_counts = bincount(indices, minlength=n_samples)

            curr_sample_weight *= sample_counts
            curr_sample_mask = sample_mask.copy()
            curr_sample_mask[sample_counts == 0] = False

            tree.fit(X,
                     y,
                     sample_weight=curr_sample_weight,
                     sample_mask=curr_sample_mask,
                     X_argsorted=X_argsorted,
                     check_input=False)
            tree.indices = curr_sample_mask
        else:
            tree.fit(X,
                     y,
                     sample_weight=sample_weight,
                     sample_mask=sample_mask,
                     X_argsorted=X_argsorted,
                     check_input=False)
        trees.append(tree)
    return trees
Esempio n. 4
0
 def test1(self):
     """ basics """
     probes = [
         (.5, 4, 2),
         (.7, 3, 3),
         (.75, 3, 3),
         (.333, 6, 0),
         (.25, 4, 2),
     ]
     for frac, nKeep, nRej in probes:
         k, r = DataUtils.FilterData(self.d1, 1, frac)
         assert len(k) == nKeep, 'bad nKeep (%d != %d)' % (len(k), nKeep)
         assert len(r) == nRej, 'bad nRej (%d != %d)' % (len(r), nRej)
Esempio n. 5
0
            seed = model._randomSeed
        except AttributeError:
            pass
        else:
            DataUtils.InitRandomNumbers(seed)
        if details.shuffleActivities:
            DataUtils.RandomizeActivities(tmpD, shuffle=1)
        if hasattr(model, '_splitFrac') and (details.doHoldout
                                             or details.doTraining):
            trainIdx, testIdx = SplitData.SplitIndices(tmpD.GetNPts(),
                                                       model._splitFrac,
                                                       silent=1)
            if details.filterFrac != 0.0:
                trainFilt, temp = DataUtils.FilterData(tmpD,
                                                       details.filterVal,
                                                       details.filterFrac,
                                                       -1,
                                                       indicesToUse=trainIdx,
                                                       indicesOnly=1)
                testIdx += temp
                trainIdx = trainFilt
            if details.doTraining:
                testIdx, trainIdx = trainIdx, testIdx
        else:
            testIdx = range(tmpD.GetNPts())

        message('screening %d examples' % (len(testIdx)))
        nTrueActives, screenRes = ScreenModel(
            model,
            descs,
            tmpD,
            picking=details.activeTgt,
Esempio n. 6
0
def RunOnData(details, data, progressCallback=None, saveIt=1, setDescNames=0):
    nExamples = data.GetNPts()
    if details.lockRandom:
        seed = details.randomSeed
    else:
        import random
        seed = (random.randint(0, 1e6), random.randint(0, 1e6))
    DataUtils.InitRandomNumbers(seed)
    testExamples = []
    if details.shuffleActivities == 1:
        DataUtils.RandomizeActivities(data, shuffle=1, runDetails=details)
    elif details.randomActivities == 1:
        DataUtils.RandomizeActivities(data, shuffle=0, runDetails=details)

    namedExamples = data.GetNamedData()
    if details.splitRun == 1:
        trainIdx, testIdx = SplitData.SplitIndices(len(namedExamples),
                                                   details.splitFrac,
                                                   silent=not _verbose)

        trainExamples = [namedExamples[x] for x in trainIdx]
        testExamples = [namedExamples[x] for x in testIdx]
    else:
        testExamples = []
        testIdx = []
        trainIdx = range(len(namedExamples))
        trainExamples = namedExamples

    if details.filterFrac != 0.0:
        # if we're doing quantization on the fly, we need to handle that here:
        if hasattr(details, 'activityBounds') and details.activityBounds:
            tExamples = []
            bounds = details.activityBounds
            for pt in trainExamples:
                pt = pt[:]
                act = pt[-1]
                placed = 0
                bound = 0
                while not placed and bound < len(bounds):
                    if act < bounds[bound]:
                        pt[-1] = bound
                        placed = 1
                    else:
                        bound += 1
                if not placed:
                    pt[-1] = bound
                tExamples.append(pt)
        else:
            bounds = None
            tExamples = trainExamples
        trainIdx, temp = DataUtils.FilterData(tExamples,
                                              details.filterVal,
                                              details.filterFrac,
                                              -1,
                                              indicesOnly=1)
        tmp = [trainExamples[x] for x in trainIdx]
        testExamples += [trainExamples[x] for x in temp]
        trainExamples = tmp

        counts = DataUtils.CountResults(trainExamples, bounds=bounds)
        ks = counts.keys()
        ks.sort()
        message('Result Counts in training set:')
        for k in ks:
            message(str((k, counts[k])))
        counts = DataUtils.CountResults(testExamples, bounds=bounds)
        ks = counts.keys()
        ks.sort()
        message('Result Counts in test set:')
        for k in ks:
            message(str((k, counts[k])))
    nExamples = len(trainExamples)
    message('Training with %d examples' % (nExamples))

    nVars = data.GetNVars()
    attrs = range(1, nVars + 1)
    nPossibleVals = data.GetNPossibleVals()
    for i in range(1, len(nPossibleVals)):
        if nPossibleVals[i - 1] == -1:
            attrs.remove(i)

    if details.pickleDataFileName != '':
        pickleDataFile = open(details.pickleDataFileName, 'wb+')
        cPickle.dump(trainExamples, pickleDataFile)
        cPickle.dump(testExamples, pickleDataFile)
        pickleDataFile.close()

    if details.bayesModel:
        composite = BayesComposite.BayesComposite()
    else:
        composite = Composite.Composite()

    composite._randomSeed = seed
    composite._splitFrac = details.splitFrac
    composite._shuffleActivities = details.shuffleActivities
    composite._randomizeActivities = details.randomActivities

    if hasattr(details, 'filterFrac'):
        composite._filterFrac = details.filterFrac
    if hasattr(details, 'filterVal'):
        composite._filterVal = details.filterVal

    composite.SetModelFilterData(details.modelFilterFrac,
                                 details.modelFilterVal)

    composite.SetActivityQuantBounds(details.activityBounds)
    nPossibleVals = data.GetNPossibleVals()
    if details.activityBounds:
        nPossibleVals[-1] = len(details.activityBounds) + 1

    if setDescNames:
        composite.SetInputOrder(data.GetVarNames())
        composite.SetDescriptorNames(details._descNames)
    else:
        composite.SetDescriptorNames(data.GetVarNames())
    composite.SetActivityQuantBounds(details.activityBounds)
    if details.nModels == 1:
        details.internalHoldoutFrac = 0.0
    if details.useTrees:
        from rdkit.ML.DecTree import CrossValidate, PruneTree
        if details.qBounds != []:
            from rdkit.ML.DecTree import BuildQuantTree
            builder = BuildQuantTree.QuantTreeBoot
        else:
            from rdkit.ML.DecTree import ID3
            builder = ID3.ID3Boot
        driver = CrossValidate.CrossValidationDriver
        pruner = PruneTree.PruneTree

        composite.SetQuantBounds(details.qBounds)
        nPossibleVals = data.GetNPossibleVals()
        if details.activityBounds:
            nPossibleVals[-1] = len(details.activityBounds) + 1
        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       pruner=pruner,
                       nTries=details.nModels,
                       pruneIt=details.pruneIt,
                       lessGreedy=details.lessGreedy,
                       needsQuantization=0,
                       treeBuilder=builder,
                       nQuantBounds=details.qBounds,
                       startAt=details.startAt,
                       maxDepth=details.limitDepth,
                       progressCallback=progressCallback,
                       holdOutFrac=details.internalHoldoutFrac,
                       replacementSelection=details.replacementSelection,
                       recycleVars=details.recycleVars,
                       randomDescriptors=details.randomDescriptors,
                       silent=not _verbose)

    elif details.useSigTrees:
        from rdkit.ML.DecTree import CrossValidate
        from rdkit.ML.DecTree import BuildSigTree
        builder = BuildSigTree.SigTreeBuilder
        driver = CrossValidate.CrossValidationDriver
        nPossibleVals = data.GetNPossibleVals()
        if details.activityBounds:
            nPossibleVals[-1] = len(details.activityBounds) + 1
        if hasattr(details, 'sigTreeBiasList'):
            biasList = details.sigTreeBiasList
        else:
            biasList = None
        if hasattr(details, 'useCMIM'):
            useCMIM = details.useCMIM
        else:
            useCMIM = 0
        if hasattr(details, 'allowCollections'):
            allowCollections = details.allowCollections
        else:
            allowCollections = False
        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       nTries=details.nModels,
                       needsQuantization=0,
                       treeBuilder=builder,
                       maxDepth=details.limitDepth,
                       progressCallback=progressCallback,
                       holdOutFrac=details.internalHoldoutFrac,
                       replacementSelection=details.replacementSelection,
                       recycleVars=details.recycleVars,
                       randomDescriptors=details.randomDescriptors,
                       biasList=biasList,
                       useCMIM=useCMIM,
                       allowCollection=allowCollections,
                       silent=not _verbose)

    elif details.useKNN:
        from rdkit.ML.KNN import CrossValidate
        from rdkit.ML.KNN import DistFunctions

        driver = CrossValidate.CrossValidationDriver
        dfunc = ''
        if (details.knnDistFunc == "Euclidean"):
            dfunc = DistFunctions.EuclideanDist
        elif (details.knnDistFunc == "Tanimoto"):
            dfunc = DistFunctions.TanimotoDist
        else:
            assert 0, "Bad KNN distance metric value"

        composite.Grow(trainExamples,
                       attrs,
                       nPossibleVals=[0] + nPossibleVals,
                       buildDriver=driver,
                       nTries=details.nModels,
                       needsQuantization=0,
                       numNeigh=details.knnNeighs,
                       holdOutFrac=details.internalHoldoutFrac,
                       distFunc=dfunc)

    elif details.useNaiveBayes or details.useSigBayes:
        from rdkit.ML.NaiveBayes import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        if not (hasattr(details, 'useSigBayes') and details.useSigBayes):
            composite.Grow(trainExamples,
                           attrs,
                           nPossibleVals=[0] + nPossibleVals,
                           buildDriver=driver,
                           nTries=details.nModels,
                           needsQuantization=0,
                           nQuantBounds=details.qBounds,
                           holdOutFrac=details.internalHoldoutFrac,
                           replacementSelection=details.replacementSelection,
                           mEstimateVal=details.mEstimateVal,
                           silent=not _verbose)
        else:
            if hasattr(details, 'useCMIM'):
                useCMIM = details.useCMIM
            else:
                useCMIM = 0

            composite.Grow(trainExamples,
                           attrs,
                           nPossibleVals=[0] + nPossibleVals,
                           buildDriver=driver,
                           nTries=details.nModels,
                           needsQuantization=0,
                           nQuantBounds=details.qBounds,
                           mEstimateVal=details.mEstimateVal,
                           useSigs=True,
                           useCMIM=useCMIM,
                           holdOutFrac=details.internalHoldoutFrac,
                           replacementSelection=details.replacementSelection,
                           silent=not _verbose)


##   elif details.useSVM:
##     from rdkit.ML.SVM import CrossValidate
##     driver = CrossValidate.CrossValidationDriver
##     composite.Grow(trainExamples, attrs, nPossibleVals=[0]+nPossibleVals,
##                    buildDriver=driver, nTries=details.nModels,
##                    needsQuantization=0,
##                    cost=details.svmCost,gamma=details.svmGamma,
##                    weights=details.svmWeights,degree=details.svmDegree,
##                    type=details.svmType,kernelType=details.svmKernel,
##                    coef0=details.svmCoeff,eps=details.svmEps,nu=details.svmNu,
##                    cache_size=details.svmCache,shrinking=details.svmShrink,
##                    dataType=details.svmDataType,
##                    holdOutFrac=details.internalHoldoutFrac,
##                    replacementSelection=details.replacementSelection,
##                    silent=not _verbose)

    else:
        from rdkit.ML.Neural import CrossValidate
        driver = CrossValidate.CrossValidationDriver
        composite.Grow(trainExamples,
                       attrs, [0] + nPossibleVals,
                       nTries=details.nModels,
                       buildDriver=driver,
                       needsQuantization=0)

    composite.AverageErrors()
    composite.SortModels()
    modelList, counts, avgErrs = composite.GetAllData()
    counts = numpy.array(counts)
    avgErrs = numpy.array(avgErrs)
    composite._varNames = data.GetVarNames()

    for i in range(len(modelList)):
        modelList[i].NameModel(composite._varNames)

    # do final statistics
    weightedErrs = counts * avgErrs
    averageErr = sum(weightedErrs) / sum(counts)
    devs = (avgErrs - averageErr)
    devs = devs * counts
    devs = numpy.sqrt(devs * devs)
    avgDev = sum(devs) / sum(counts)
    message('# Overall Average Error: %%% 5.2f, Average Deviation: %%% 6.2f' %
            (100. * averageErr, 100. * avgDev))

    if details.bayesModel:
        composite.Train(trainExamples, verbose=0)

    # blow out the saved examples and then save the composite:
    composite.ClearModelExamples()
    if saveIt:
        composite.Pickle(details.outName)
    details.model = DbModule.binaryHolder(cPickle.dumps(composite))

    badExamples = []
    if not details.detailedRes and (not hasattr(details, 'noScreen')
                                    or not details.noScreen):
        if details.splitRun:
            message('Testing all hold-out examples')
            wrong = testall(composite, testExamples, badExamples)
            message('%d examples (%% %5.2f) were misclassified' %
                    (len(wrong),
                     100. * float(len(wrong)) / float(len(testExamples))))
            _runDetails.holdout_error = float(len(wrong)) / len(testExamples)
        else:
            message('Testing all examples')
            wrong = testall(composite, namedExamples, badExamples)
            message('%d examples (%% %5.2f) were misclassified' %
                    (len(wrong),
                     100. * float(len(wrong)) / float(len(namedExamples))))
            _runDetails.overall_error = float(len(wrong)) / len(namedExamples)

    if details.detailedRes:
        message('\nEntire data set:')
        resTup = ScreenComposite.ShowVoteResults(range(data.GetNPts()), data,
                                                 composite, nPossibleVals[-1],
                                                 details.threshold)
        nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
        nPts = len(namedExamples)
        nClass = nGood + nBad
        _runDetails.overall_error = float(nBad) / nClass
        _runDetails.overall_correct_conf = avgGood
        _runDetails.overall_incorrect_conf = avgBad
        _runDetails.overall_result_matrix = repr(voteTab)
        nRej = nClass - nPts
        if nRej > 0:
            _runDetails.overall_fraction_dropped = float(nRej) / nPts

        if details.splitRun:
            message('\nHold-out data:')
            resTup = ScreenComposite.ShowVoteResults(range(len(testExamples)),
                                                     testExamples, composite,
                                                     nPossibleVals[-1],
                                                     details.threshold)
            nGood, nBad, nSkip, avgGood, avgBad, avgSkip, voteTab = resTup
            nPts = len(testExamples)
            nClass = nGood + nBad
            _runDetails.holdout_error = float(nBad) / nClass
            _runDetails.holdout_correct_conf = avgGood
            _runDetails.holdout_incorrect_conf = avgBad
            _runDetails.holdout_result_matrix = repr(voteTab)
            nRej = nClass - nPts
            if nRej > 0:
                _runDetails.holdout_fraction_dropped = float(nRej) / nPts

    if details.persistTblName and details.dbName:
        message('Updating results table %s:%s' %
                (details.dbName, details.persistTblName))
        details.Store(db=details.dbName, table=details.persistTblName)

    if details.badName != '':
        badFile = open(details.badName, 'w+')
        for i in range(len(badExamples)):
            ex = badExamples[i]
            vote = wrong[i]
            outStr = '%s\t%s\n' % (ex, vote)
            badFile.write(outStr)
        badFile.close()

    composite.ClearModelExamples()
    return composite
Esempio n. 7
0
def BalanceComposite(details,composite,data1=None,data2=None):
  """ balances the composite using the parameters provided in details

   **Arguments**

     - details a _CompositeRun.RunDetails_ object

     - composite: the composite model to be balanced

     - data1: (optional) if provided, this should be the
       data set used to construct the original models

     - data2: (optional) if provided, this should be the
       data set used to construct the new individual models

  """
  if not details.balCnt or details.balCnt > len(composite):
    return composite
  message("Balancing Composite")

  #
  # start by getting data set 1: which is the data set used to build the
  #  original models
  #
  if data1 is None:
    message("\tReading First Data Set")
    fName = details.balTable.strip()
    tmp = details.tableName
    details.tableName = fName
    dbName = details.dbName
    details.dbName = details.balDb
    data1 = details.GetDataSet()
    details.tableName = tmp
    details.dbName = dbName
  if data1 is None:
    return composite
  details.splitFrac = composite._splitFrac
  details.randomSeed = composite._randomSeed
  DataUtils.InitRandomNumbers(details.randomSeed)
  if details.shuffleActivities == 1:
    DataUtils.RandomizeActivities(data1,shuffle=1,runDetails=details)
  elif details.randomActivities == 1:
    DataUtils.RandomizeActivities(data1,shuffle=0,runDetails=details)
  namedExamples = data1.GetNamedData()
  if details.balDoHoldout or details.balDoTrain:
    trainIdx,testIdx = SplitData.SplitIndices(len(namedExamples),details.splitFrac,
                                              silent=1)
    trainExamples = [namedExamples[x] for x in trainIdx]
    testExamples = [namedExamples[x] for x in testIdx]
    if details.filterFrac != 0.0:
      trainIdx,temp = DataUtils.FilterData(trainExamples,details.filterVal,
                                           details.filterFrac,-1,
                                           indicesOnly=1)
      tmp = [trainExamples[x] for x in trainIdx]
      testExamples += [trainExamples[x] for x in temp]
      trainExamples = tmp
    if details.balDoHoldout:
      testExamples,trainExamples = trainExamples,testExamples
  else:
    trainExamples = namedExamples
  dataSet1 = trainExamples
  cols1 = [x.upper() for x in data1.GetVarNames()]
  data1 = None

  #
  # now grab data set 2: the data used to build the new individual models
  #
  if data2 is None:
    message("\tReading Second Data Set")
    data2 = details.GetDataSet()
  if data2 is None:
    return composite
  details.splitFrac = composite._splitFrac
  details.randomSeed = composite._randomSeed
  DataUtils.InitRandomNumbers(details.randomSeed)
  if details.shuffleActivities == 1:
    DataUtils.RandomizeActivities(data2,shuffle=1,runDetails=details)
  elif details.randomActivities == 1:
    DataUtils.RandomizeActivities(data2,shuffle=0,runDetails=details)
  dataSet2 = data2.GetNamedData()
  cols2 = [x.upper() for x in data2.GetVarNames()]
  data2 = None

  # and balance it:
  res = []
  weights = details.balWeight
  if type(weights) not in (types.TupleType,types.ListType):
    weights = (weights,)
  for weight in weights:
    message("\tBalancing with Weight: %.4f"%(weight))
    res.append(AdjustComposite.BalanceComposite(composite,dataSet1,dataSet2,
                                                weight,
                                                details.balCnt,
                                                names1=cols1,names2=cols2))
  return res
Esempio n. 8
0
  def Grow(self,examples,attrs,nPossibleVals,buildDriver,pruner=None,
           nTries=10,pruneIt=0,
           needsQuantization=1,progressCallback=None,
           **buildArgs):
    """ Grows the composite

      **Arguments**

       - examples: a list of examples to be used in training

       - attrs: a list of the variables to be used in training

       - nPossibleVals: this is used to provide a list of the number
          of possible values for each variable.  It is used if the
          local quantBounds have not been set (for example for when you
          are working with data which is already quantized).

       - buildDriver: the function to call to build the new models

       - pruner: a function used to "prune" (reduce the complexity of)
          the resulting model.
       
       - nTries: the number of new models to add

       - pruneIt: toggles whether or not pruning is done

       - needsQuantization: used to indicate whether or not this type of model
          requires quantized data

       - **buildArgs: all other keyword args are passed to _buildDriver_

      **Note**

        - new models are *added* to the existing ones

    """
    silent = buildArgs.get('silent',0)
    buildArgs['silent']=1
    buildArgs['calcTotalError']=1

    if self._mapOrder is not None:
      examples = map(self._RemapInput,examples)
    if self.GetActivityQuantBounds():
      for i in range(len(examples)):
        examples[i] = self.QuantizeActivity(examples[i])
        nPossibleVals[-1]=len(self.GetActivityQuantBounds())+1  
    if self.nPossibleVals is None:
      self.nPossibleVals = nPossibleVals[:]
    if needsQuantization:
      trainExamples = [None]*len(examples)
      nPossibleVals = self.nPossibleVals
      for i in range(len(examples)):
        trainExamples[i] = self.QuantizeExample(examples[i],self.quantBounds)
    else:
      trainExamples = examples

    for i in range(nTries):
      trainSet = None
      
      if (hasattr(self, '_modelFilterFrac')) and (self._modelFilterFrac != 0) :
        trainIdx, temp = DataUtils.FilterData(trainExamples, self._modelFilterVal,
                                              self._modelFilterFrac,-1, indicesOnly=1)
        trainSet = [trainExamples[x] for x in trainIdx]

      else:
        trainSet = trainExamples

      #print("Training model %i with %i out of %i examples"%(i, len(trainSet), len(trainExamples)))
      model,frac = buildDriver(*(trainSet,attrs,nPossibleVals), **buildArgs)
      if pruneIt:
        model,frac2 = pruner(model,model.GetTrainingExamples(),
                            model.GetTestExamples(),
                            minimizeTestErrorOnly=0)
        frac = frac2
      if hasattr(self, '_modelFilterFrac') and self._modelFilterFrac!=0 and \
         hasattr(model,'_trainIndices'):
        # correct the model's training indices:
        trainIndices = [trainIdx[x] for x in model._trainIndices]
        model._trainIndices = trainIndices
        
      self.AddModel(model,frac,needsQuantization)
      if not silent and (nTries < 10 or i % (nTries/10) == 0):
        print('Cycle: % 4d'%(i))
      if progressCallback is not None:
        progressCallback(i)