Ejemplo n.º 1
0
import data_sort

from sklearn.ensemble import AdaBoostClassifier

boost = AdaBoostClassifier(algorithm = "SAMME", n_estimators = 100)

number_of_features = 15
training_data = data_sort.makeSet("../datasets/adult_data_big.txt", number_of_features)
test_data = data_sort.makeSet("../datasets/adult_data_test.txt", number_of_features)

X_train, y_train = data_sort.split(training_data)
X_test, y_test = data_sort.split(test_data)

boost.fit(X_train, y_train)
print(boost.score(X_test, y_test))
Ejemplo n.º 2
0
# adaptive_boosting.py modifed for (somewhat) more efficient testing

import data_sort
from dt_thread import TreeThread

from random import randint
import math
import time

number_of_workers_list = [200, 400]

number_of_features = 7
training_data = data_sort.makeSet("datasets/car_split_train.txt", number_of_features)
training_data, labels = data_sort.binaryfy(training_data)
print(len(training_data))
test_data = data_sort.makeSet("datasets/car_split_test.txt", number_of_features)
test_data, labels = data_sort.binaryfy(test_data)

# Loss function only indirectly used
# def loss_function(test_data):
#     loss = 0
#     for n in range(0, len(test_data)):
#         m = 0
#         for k in range(0, len(threads)):        
#             m += alpha[k]*threads[k].binary_query(test_data[n])
#         m = m*test_data[n][-1]
#         loss += exp(-m)
#     return loss

def calculate_weights(weights, error_rate, alpha, data_set,  predictions):
    temp_weights = weights.copy()
Ejemplo n.º 3
0
import data_sort

training_data = data_sort.makeSet("adult_data_big.txt", 15)

label1 = 0
label2 = 0

file = open("adult_data_unbiased.txt", "w+")
for a in training_data:
    if label1 != 0:
        if a[14] != label1[14]:
            label2 = a
            file.write(str(label1) + "\n")
            file.write(str(label2) + "\n")
            label1 = 0
            label2 = 0
    else:
        label1 = a

file.close()
Ejemplo n.º 4
0
startTime = time.time()
training_data = [['Green', 3, 'Apple'], ['Red', 3,
                                         'Apple'], ['Red', 1, 'Grape'],
                 ['Red', 2, 'Grape'], ['Yellow', 3, 'Lemon'],
                 ['Yellow', 3, 'Apple'], ['Red', 1, 'Grape'],
                 ['Red', 2, 'Grape'], ['Green', 1, 'Grape'],
                 ['Yellow', 2, 'Lemon']]

test_data = [['Green', 1, 'Grape'], ['Yellow', 2, 'Lemon'],
             ['Green', 2, 'Apple'], ['Green', 4, 'Apple']]

number_of_workers = 10
data_for_workers = []
number_of_features = 15
training_data = data_sort.makeSet("datasets/adult_data.txt",
                                  number_of_features)
weights = []

for i in range(0, len(training_data)):
    weights.append(1)


# TODO: Ta hänsyn till weights, görs i adaptive_boosting.py
def extract_training_data(dataSet):
    data = []
    for n in range(0, int(round(0.6 * len(dataSet)))):
        data.append(dataSet[randint(0, len(dataSet) - 1)])
    return data


# Create threads