def iterative_reduced_error_pruning(training_set_size, validation_set_range, data): """ Implements reduced error pruning on a tree build by the id3 algorithm. Gets training, validation, and testing error for each iteration by naively creating a new tree for all training set sizes between 1 and training_set_size. Returns lists of training, validation, and testing error vs number of instances trained on from 1 to training_set_size. """ train_error_points, validation_error_points, test_error_points = [], [], [] for cur_training_set_size in range(1, training_set_size): decision_tree = id3(data.iloc[:cur_training_set_size]) validation_error = _calc_error(validation_set_range, decision_tree, data) _mark_nodes(decision_tree, decision_tree, data, validation_error, validation_set_range) train_error_points.append( _calc_error(range(cur_training_set_size), decision_tree, data)) validation_error_points.append( _calc_error(validation_set_range, decision_tree, data)) test_error_points.append( _calc_error(range(80, len(data)), decision_tree, data)) if cur_training_set_size % 5 == 0: print("Iterations left: ", training_set_size - cur_training_set_size) return train_error_points, validation_error_points, test_error_points
def tennis(): """Builds, displays, and tests the tennis.csv on the id3 algorithm.""" data = read_csv('tennis.csv') decision_tree = id3(data) print(decision_tree) for idx in range(len(data)): print("Correct?: ", _is_correct_label(decision_tree, data, idx), " Value: ", decision_tree.traverse(data.iloc[idx]))
def zoo(): """Builds, displays, and tests the zoo.csv on the id3 algorithm.""" data = read_csv('zoo.csv').drop('animal_name', 1) decision_tree = id3(data.iloc[:80]) print(decision_tree) for idx in range(80, len(data)): print("Correct?: ", _is_correct_label(decision_tree, data, idx), " Value: ", decision_tree.traverse(data.iloc[idx]))
def zoo_iterative_id3(training_set_size): """ Builds and tests the zoo.csv on the id3 algorithm. Gets training and testing error for each iteration by naively creating a new tree for all training set sizes between 1 and training_set_size. Returns lists of training and testing error vs number of instances trained on from 1 to training_set_size. """ train_error_points, test_error_points = [], [] data = read_csv('zoo.csv').drop('animal_name', 1) for cur_training_set_size in range(1, training_set_size): decision_tree = id3(data.iloc[:cur_training_set_size]) train_error_points.append( _calc_error(range(cur_training_set_size), decision_tree, data)) test_error_points.append( _calc_error(range(80, len(data)), decision_tree, data)) if cur_training_set_size % 5 == 0: print("Iterations left: ", training_set_size - cur_training_set_size) return train_error_points, test_error_points
from ID3 import id3 from InfoGain import Gain from BuildFromTraining import loadFromFile gain = {} ################################################## print("Cargando ejemplos de entrenamiento...") S = loadFromFile("training_examples.txt") Attributes = list(S[0]) ################################################## ### ################################################## #print("Calculando ganancia de informacion...") #for A in Attributes[:-1]: # gain[A] = Gain(S, A) ################################################## print("Atributos: {}".format(Attributes)) id3(S, "", Attributes[:-1])
N = 5 one = True two = True three = True if one: print("=====================================") print("========== Experiment 3.2.1 =========") print("=====================================") print("") trees = [] for i in range(N): e = ensemble_data(hand_train_data, hand_train_label) random.shuffle(hand_train_attr) t = id3(e['d'], e['l'], hand_train_attr[:8]) trees.append(t) data_set = get_data(trees, hand_train_data) W_b = run_svm(hand_train_label, data_set, epochs, 2, 0.001) evaluation = evaluate_svm(hand_train_label, hand_train_data, W_b['W'], W_b['b']) print("=========== Training Data ===========") print_eval(evaluation) evaluation = evaluate_svm(hand_test_label, hand_test_data, W_b['W'], W_b['b']) print("============= Test Data =============") print_eval(evaluation) if two: print("=====================================")
''' Created on Sep 23, 2016 @author: Leland Stenquist ''' from ID3 import scan, id3, print_tree, test_id3 from ID3_Helper import ensemble_data r = scan("res/test/train.labels","res/test/train.data",50) #r = scan("res/madelon/madelon_train.labels","res/madelon/madelon_train.data",25) e = ensemble_data(r['d'],r['l']) t = id3(e['d'],e['l'],r['a'],8) print_tree(t) test = test_id3(t,r['d'],r['l'],r['a']) print(test)