def ada_boosting(X, y, L): D = [1.0/len(X)] * len(X) ensemble = [] #{stump": => , "alpha" : => } for i in range(L): errorSum = 0 bestFeature, stump = decisionStump.train(X, y, D) #calculate error sum for i, (example, true_label) in enumerate(zip(X, y)): if decisionStump.predict(example, stump, bestFeature) != true_label: errorSum += D[i] mean_error = errorSum / sum(D) if mean_error == 0: print >> sys.stderr, "achieved perfect training, exiting at iteration:", L break alpha = .5 * log((1 - mean_error) / mean_error) #update weights for i, (example, true_label) in enumerate(zip(X, y)): if decisionStump.predict(example, stump, bestFeature) == true_label: D[i] *= exp(-1 * alpha) else: D[i] *= exp(alpha) ensemble.append({"stump": stump, "bestFeature": bestFeature, "alpha": alpha}) return ensemble
def create_bag(x, y): sample_x, sample_y = zip(*sample(zip(x, y), num_samples)) sample_x, sample_y = np.array(sample_x), np.array(sample_y) return decisionStump.train(sample_x, sample_y)