예제 #1
0
def ada_boosting(X, y, L):
    D = [1.0/len(X)] * len(X)
    
    ensemble = [] #{stump": => , "alpha" : => } 
    for i in range(L):
        errorSum = 0
        bestFeature, stump = decisionStump.train(X, y, D)
        #calculate error sum
        for i, (example, true_label) in enumerate(zip(X, y)):
            if decisionStump.predict(example, stump, bestFeature) != true_label:
                errorSum += D[i]
        mean_error = errorSum / sum(D)
        if mean_error == 0:
            print >> sys.stderr, "achieved perfect training, exiting at iteration:", L
            break
        alpha = .5 * log((1 - mean_error) / mean_error)
        #update weights
        for i, (example, true_label) in enumerate(zip(X, y)):
            if decisionStump.predict(example, stump, bestFeature) == true_label:
                D[i] *= exp(-1 * alpha)
            else:
                D[i] *= exp(alpha)
        ensemble.append({"stump": stump, "bestFeature": bestFeature, "alpha": alpha})
    return ensemble
예제 #2
0
def create_bag(x, y):
    sample_x, sample_y = zip(*sample(zip(x, y), num_samples))
    sample_x, sample_y = np.array(sample_x), np.array(sample_y)
    return decisionStump.train(sample_x, sample_y)