def adaboost_avg_run_new(max_classes, avg_num_of_run, training_set, testing_set): all_error_list = [] # because datasets sometimes place the class attribute at the end or even # at the beginning or the middle, we'll separate the attribute vector from # the class-label. also note that this is the way scikit-learn does it. # train_x: the attribute vector; train_y: the class_label (train_x, train_y) = split_attribute_and_label(training_set) (test_x, test_y) = split_attribute_and_label(testing_set) # print(len(train_x)) train_subset_num = int(len(train_y) * 0.2) our_ada_training_errors = {} our_ada_testing_errors = {} # init dict of num classifier to error list for i in range(1, max_classes + 1): our_ada_training_errors[i] = [] our_ada_testing_errors[i] = [] # run ada num_runs times for i in range(avg_num_of_run): ada_obj = AdaBoost(max_classes, train_subset_num, THRESHOLD, ETA, UPPER_BOUND, ETA_WEIGHTS, False) ada_obj.fit_with_errors(train_x, train_y, test_x, test_y) for j in range(max_classes): our_ada_training_errors[j + 1].append(ada_obj.training_error[j]) our_ada_testing_errors[j + 1].append(ada_obj.testing_error[j]) for cl in range(1, max_classes + 1): scikit_error = [] for i in range(avg_num_of_run): pada = perceptron.Perceptron(max_iter=UPPER_BOUND, verbose=0, random_state=None, fit_intercept=True, eta0=ETA) bdt = AdaBoostClassifier(pada, algorithm="SAMME", n_estimators=cl) bdt.fit(train_x, train_y) result_list = bdt.predict(test_x) scikit_error.append(calculate_error(test_y, result_list)) errors = ErrorWrapper( cl, sum(our_ada_training_errors[cl]) / len(our_ada_training_errors[cl]), sum(our_ada_testing_errors[cl]) / len(our_ada_testing_errors[cl]), sum(scikit_error) / len(scikit_error)) all_error_list.append(errors) print("Train avg for %s %s" % (cl, errors.train_error)) print("Testing avg for %s %s" % (cl, errors.test_error)) print("Scikit adaboost avg for %s %s" % (cl, errors.scikit_error)) return all_error_list