Exemple #1
0
import pandas as pd
from adaboost import AdaBoost
import numpy as np

# load the dataset and define the no value symbol in the dataset
# because 2^k are the possible subsets to check in discrete values we take only a portion of the data
data = pd.read_csv('heart.csv', delimiter=',', na_values=['no info',
                                                          '.'])[0:500]
msk = np.random.rand(
    len(data)
) < 0.9  # split the dataset 90-10 for train and test randomly choosing rows
train_data = data[msk]
test_data = data[~msk]

print("start the training")
# create an AdaBoost object and pass the train data,the user can define the max number of trees, but the default is 10
# adabo = AdaBoost(train_data,8)
adabo = AdaBoost(train_data)
# train AdaBoost with default max tree depth=1 , the user can define this optional parameter
adabo.trainadaboost()
# print all the trees found
adabo.printtrees()
# print(adabo.predict_sample(test_data.iloc[0]))  #predicts the output for the first row of test data
adabo.make_output_file(test_data)
adabo.calculate_metrics(test_data)