Ejemplo n.º 1
0
 def bagAndTrain(self, tree, dataMatrix, classVectors):
   numToBag = int(helpers.HyperParams.FRAC_PER_BAG * dataMatrix.shape[0])
   baggedIndices = helpers.bag(np.arange(dataMatrix.shape[0]), numToBag)
   tree.train(dataMatrix.iloc[baggedIndices], classVectors.iloc[baggedIndices])
   baggedIndicesBool = np.zeros(dataMatrix.shape[0], dtype=bool)
   baggedIndicesBool[baggedIndices] = True
   return baggedIndicesBool
Ejemplo n.º 2
0
  def trainBoosted(self, dataMatrix, classVectors):
    mislabels = self.getMislabelSet(np.arange(
                 dataMatrix.shape[0]), classVectors)
    distribution = np.ones(mislabels.shape[0])
    distribution = helpers.toProbDistribution(distribution)
    numToBag = int(mislabels.shape[0])

    for tree in self.trees:
        mislabelIndices = helpers.bag(np.arange(mislabels.shape[0]),
                                  numToBag, distribution)
        tree.train(dataMatrix.iloc[mislabels[mislabelIndices][:,0]],
                   classVectors.iloc[mislabels[mislabelIndices][:,0]])
        loss = self.calculatePseudoLoss(tree, dataMatrix,
                                        mislabels, distribution)
        beta = loss/(1-loss)
        self.boostedBeta.append(beta)
        distribution = self.updateDistribution(tree, dataMatrix, mislabels,
                                               distribution, beta)