def adaboost_avg_run(max_classes, avg_num_of_run, training_set, testing_set): testing_error_list = [] all_error_list = [] # because datasets sometimes place the class attribute at the end or even # at the beginning or the middle, we'll separate the attribute vector from # the class-label. also note that this is the way scikit-learn does it. # train_x: the attribute vector; train_y: the class_label (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) # print(len(train_x)) train_subset_num = int(len(train_y) * 0.2) for cl in range(1, max_classes + 1, 2): train_error = [] testing_error = [] scikit_error = [] for i in range(avg_num_of_run): ada_obj = AdaBoost(cl, train_subset_num, THRESHOLD, ETA, UPPER_BOUND, ETA_WEIGHTS, False) ada_obj.fit(train_x, train_y) hypothesis_list = ada_obj.predict(train_x) mistakes = ada_obj.xor_tuples(train_y, hypothesis_list) error_rate_train = classifier_error_rate(mistakes) hypothesis_list = ada_obj.predict(test_x) mistakes = ada_obj.xor_tuples(test_y, hypothesis_list) error_rate_test = classifier_error_rate(mistakes) train_error.append(error_rate_train) testing_error.append(error_rate_test) pada = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None, fit_intercept=True, eta0=ETA) bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl) bdt.fit(train_x, train_y) result_list = bdt.predict(test_x) scikit_error.append(calculate_error(test_y, result_list)) errors = ErrorWrapper(cl, sum(train_error) / len(train_error), sum(testing_error) / len(testing_error), sum(scikit_error) / len(scikit_error)) all_error_list.append(errors) print("Train avg for %s %s" % (cl, errors.train_error)) print("Testing avg for %s %s" % (cl, errors.test_error)) testing_error_list.append( (sum(testing_error) / len(testing_error)) * 100) print("Scikit adaboost avg for %s %s" % (cl, errors.scikit_error)) #return testing_error_list return all_error_list