Beispiel #1
0
    def train(features, steps=10):
        classifiers = []
        #print len(features)
        weights = [1 for n in features]
        errors = []
        bestStump = {}
        featureList = features[0][0].keys()
        
        for i in range(0, steps):
            print "iteration: {0}".format(i)
            minError = 99999999
            for f in featureList:
                #print "feature name={0}".format(f)
                ds, matches = DecisionStump.train(features,f, weights)
                error = ds.getErrorRate()
                if error < minError:
                    minError = error
                    bestStump["stump"] = ds
                    bestStump["matches"] = matches
            minError = minError / len(features)
            alpha = float(0.5 * log((1.0-minError) / max(minError, 1e-16)))
            bestStump["alpha"] = alpha
            classifiers.append(bestStump)
            AdaBoost2.setWeights(weights, bestStump["matches"], alpha)
            errors.append(minError)
            #aggErrorRate = sum(errors) / len(features)
            print "error={0}".format(minError)
            if minError == 0:
                break
 
            
        _max = max(errors)
        
        votingWeights = [1 - e / sum(errors) for e in errors]
        return AdaBoost2({'featureList':featureList, 'classifiers':classifiers, 'votingWeights':votingWeights})
Beispiel #2
0
 def train(features, steps=10):
     bestStumps = []
     minError = 99999999
     featureList = features[0][0].keys()
     
     for i in range(steps):
         classifiers = []
         #print len(features)
         weights = [5 for n in features]
         errors = []
         shuffle(featureList)
         for f in featureList:
             #print "feature name={0}".format(f)
             ds, matches = DecisionStump.train(features,f, weights, 15)
             #print len(features)
             #print features[0][0]
             AdaBoost.setWeights(weights, matches)
             #AdaBoost.printWeights(weights)
             errors.append(ds.getErrorRate())
             classifiers.append(ds)
         
         error = sum(errors)
         print("error=",error)
         if error < minError:
             minError = error
             bestStumps = list(classifiers)
             _max = max(errors)
             votingWeights = [1 - e / _max for e in errors]
             
     print("minError=",minError)
     return AdaBoost4({'featureList':featureList, 'classifiers':bestStumps, 'votingWeights':votingWeights})
    def get_hypothesis(self, sampled_train_data):
        """
        :param pandas.DataFrame sampled_train_data:
        :return: self.best_stump
        :type: DecisionStump
        """
        self.stumps = []
        self.max_gain = -float("inf")  # negative infinity
        self.best_stump = None  # type: DecisionStump

        for name in self.stump_names:
            stump_temp = DecisionStump(name)
            stump_temp.train(sampled_train_data)
            self.stumps.append(stump_temp)

        for stump in self.stumps:
            gain_temp = stump.get_info_gain()
            if gain_temp > self.max_gain:
                self.max_gain = gain_temp
                self.best_stump = stump

        return self.best_stump
Beispiel #4
0
 def train(features, steps=10):
     classifiers = []
     #print len(features)
     weights = [10 for n in features]
     errors = []
     featureList = features[0][0].keys()
     
     for f in featureList:
         #print "feature name={0}".format(f)
         ds, matches = DecisionStump.train(features,f, weights, steps)
         #print len(features)
         #print features[0][0]
         AdaBoostAtCascade.setWeights(weights, matches)
         #AdaBoost.printWeights(weights)
         errors.append(ds.getErrorRate())
         classifiers.append(ds)
         
     _max = max(errors)
     votingWeights = [1 - e / _max for e in errors]
     return AdaBoostAtCascade({'featureList':featureList, 'classifiers':classifiers, 'votingWeights':votingWeights})