Example #1
0
 def updateDistribution(self, tree, dataMatrix, mislabels, distribution, beta):
   probs = tree.predictProbabilities(dataMatrix.iloc[mislabels[:,0]])
   probsIncorrect = np.choose(mislabels[:,1], probs.T)
   probsCorrect = np.choose(mislabels[:,2], probs.T)
   power = (0.5 * sum(distribution * (1 + probsCorrect - probsIncorrect)))
   distribution = distribution * (np.power(beta,power))
   distribution = helpers.toProbDistribution(distribution)
   return distribution
Example #2
0
  def trainBoosted(self, dataMatrix, classVectors):
    mislabels = self.getMislabelSet(np.arange(
                 dataMatrix.shape[0]), classVectors)
    distribution = np.ones(mislabels.shape[0])
    distribution = helpers.toProbDistribution(distribution)
    numToBag = int(mislabels.shape[0])

    for tree in self.trees:
        mislabelIndices = helpers.bag(np.arange(mislabels.shape[0]),
                                  numToBag, distribution)
        tree.train(dataMatrix.iloc[mislabels[mislabelIndices][:,0]],
                   classVectors.iloc[mislabels[mislabelIndices][:,0]])
        loss = self.calculatePseudoLoss(tree, dataMatrix,
                                        mislabels, distribution)
        beta = loss/(1-loss)
        self.boostedBeta.append(beta)
        distribution = self.updateDistribution(tree, dataMatrix, mislabels,
                                               distribution, beta)