def main(): train, test, L = get_args() X_train, y_train = decisionStump.load_data(train) X_train, y_train = np.array(X_train), np.array(y_train) X_test, y_test = decisionStump.load_data(test) X_test, y_test = np.array(X_test), np.array(y_test) ensemble = ada_boosting(X_train, y_train, L) acc_t,c,i = compute_accuracy(X_train, y_train, ensemble) acc_te,c,i = compute_accuracy(X_test, y_test, ensemble) print acc_t, acc_te
def main(): train_file, test_file, T = get_args() x_train, y_train = decisionStump.load_data(train_file) x_test, y_test = decisionStump.load_data(test_file) for t in range(5, T, 5): bags = [] # "(bestFeature, stump) ..." for i in range(t): bestFeature, stump = create_bag(x_train, y_train) bags.append((bestFeature, stump)) test_acc = [] train_acc = [] for i in range(10000): acc, c, i = compute_accuracy(x_train, y_train, bags) train_acc.append(acc) acc, c, i = compute_accuracy(x_test, y_test, bags) test_acc.append(acc) print str(t) + "," + str(scipy.mean(train_acc)) + "," + str(scipy.mean(test_acc))
""" correct = 0 total = len(y) for i, example in enumerate(x): decision = classifier.predict(example) if decision == y[i]: correct += 1 accuracy = correct / float(total) return accuracy, correct, float(total) - correct data = [[1], [2]] y = [1,0] X_train, y_train = decisionStump.load_data(sys.argv[1]) X_test, y_test = decisionStump.load_data(sys.argv[2]) classifier = tree.DecisionTreeClassifier(max_depth=1) stump = tree.DecisionTreeClassifier(max_depth=1) trained = classifier.fit(X_train, y_train) print "Decision stump" print compute_accuracy(X_train, y_train, classifier) print compute_accuracy(X_test, y_test, classifier) print "Bagged results" bags = ensemble.BaggingClassifier(base_estimator=stump, n_estimators=10, max_samples=40) bag_trained = bags.fit(X_train, y_train) print compute_accuracy(X_train, y_train, bag_trained) print compute_accuracy(X_test, y_test, bag_trained)