import data_sort from sklearn.ensemble import AdaBoostClassifier boost = AdaBoostClassifier(algorithm = "SAMME", n_estimators = 100) number_of_features = 15 training_data = data_sort.makeSet("../datasets/adult_data_big.txt", number_of_features) test_data = data_sort.makeSet("../datasets/adult_data_test.txt", number_of_features) X_train, y_train = data_sort.split(training_data) X_test, y_test = data_sort.split(test_data) boost.fit(X_train, y_train) print(boost.score(X_test, y_test))
# adaptive_boosting.py modifed for (somewhat) more efficient testing import data_sort from dt_thread import TreeThread from random import randint import math import time number_of_workers_list = [200, 400] number_of_features = 7 training_data = data_sort.makeSet("datasets/car_split_train.txt", number_of_features) training_data, labels = data_sort.binaryfy(training_data) print(len(training_data)) test_data = data_sort.makeSet("datasets/car_split_test.txt", number_of_features) test_data, labels = data_sort.binaryfy(test_data) # Loss function only indirectly used # def loss_function(test_data): # loss = 0 # for n in range(0, len(test_data)): # m = 0 # for k in range(0, len(threads)): # m += alpha[k]*threads[k].binary_query(test_data[n]) # m = m*test_data[n][-1] # loss += exp(-m) # return loss def calculate_weights(weights, error_rate, alpha, data_set, predictions): temp_weights = weights.copy()
import data_sort training_data = data_sort.makeSet("adult_data_big.txt", 15) label1 = 0 label2 = 0 file = open("adult_data_unbiased.txt", "w+") for a in training_data: if label1 != 0: if a[14] != label1[14]: label2 = a file.write(str(label1) + "\n") file.write(str(label2) + "\n") label1 = 0 label2 = 0 else: label1 = a file.close()
startTime = time.time() training_data = [['Green', 3, 'Apple'], ['Red', 3, 'Apple'], ['Red', 1, 'Grape'], ['Red', 2, 'Grape'], ['Yellow', 3, 'Lemon'], ['Yellow', 3, 'Apple'], ['Red', 1, 'Grape'], ['Red', 2, 'Grape'], ['Green', 1, 'Grape'], ['Yellow', 2, 'Lemon']] test_data = [['Green', 1, 'Grape'], ['Yellow', 2, 'Lemon'], ['Green', 2, 'Apple'], ['Green', 4, 'Apple']] number_of_workers = 10 data_for_workers = [] number_of_features = 15 training_data = data_sort.makeSet("datasets/adult_data.txt", number_of_features) weights = [] for i in range(0, len(training_data)): weights.append(1) # TODO: Ta hänsyn till weights, görs i adaptive_boosting.py def extract_training_data(dataSet): data = [] for n in range(0, int(round(0.6 * len(dataSet)))): data.append(dataSet[randint(0, len(dataSet) - 1)]) return data # Create threads