예제 #1
0
def iterative_reduced_error_pruning(training_set_size, validation_set_range,
                                    data):
    """
    Implements reduced error pruning on a tree build by the id3 algorithm.
    Gets training, validation, and testing error for each iteration by naively creating a new tree for
    all training set sizes between 1 and training_set_size.
    Returns lists of training, validation, and testing error vs number of instances trained on from 1 to
    training_set_size.
    """
    train_error_points, validation_error_points, test_error_points = [], [], []
    for cur_training_set_size in range(1, training_set_size):
        decision_tree = id3(data.iloc[:cur_training_set_size])
        validation_error = _calc_error(validation_set_range, decision_tree,
                                       data)
        _mark_nodes(decision_tree, decision_tree, data, validation_error,
                    validation_set_range)
        train_error_points.append(
            _calc_error(range(cur_training_set_size), decision_tree, data))
        validation_error_points.append(
            _calc_error(validation_set_range, decision_tree, data))
        test_error_points.append(
            _calc_error(range(80, len(data)), decision_tree, data))
        if cur_training_set_size % 5 == 0:
            print("Iterations left: ",
                  training_set_size - cur_training_set_size)
    return train_error_points, validation_error_points, test_error_points
예제 #2
0
def tennis():
    """Builds, displays, and tests the tennis.csv on the id3 algorithm."""
    data = read_csv('tennis.csv')
    decision_tree = id3(data)
    print(decision_tree)
    for idx in range(len(data)):
        print("Correct?: ", _is_correct_label(decision_tree, data, idx),
              " Value: ", decision_tree.traverse(data.iloc[idx]))
예제 #3
0
def zoo():
    """Builds, displays, and tests the zoo.csv on the id3 algorithm."""
    data = read_csv('zoo.csv').drop('animal_name', 1)
    decision_tree = id3(data.iloc[:80])
    print(decision_tree)
    for idx in range(80, len(data)):
        print("Correct?: ", _is_correct_label(decision_tree, data, idx),
              " Value: ", decision_tree.traverse(data.iloc[idx]))
예제 #4
0
def zoo_iterative_id3(training_set_size):
    """
    Builds and tests the zoo.csv on the id3 algorithm.
    Gets training and testing error for each iteration by naively creating a new tree for all training set sizes
    between 1 and training_set_size.
    Returns lists of training and testing error vs number of instances trained on from 1 to training_set_size.
    """
    train_error_points, test_error_points = [], []
    data = read_csv('zoo.csv').drop('animal_name', 1)
    for cur_training_set_size in range(1, training_set_size):
        decision_tree = id3(data.iloc[:cur_training_set_size])
        train_error_points.append(
            _calc_error(range(cur_training_set_size), decision_tree, data))
        test_error_points.append(
            _calc_error(range(80, len(data)), decision_tree, data))
        if cur_training_set_size % 5 == 0:
            print("Iterations left: ",
                  training_set_size - cur_training_set_size)
    return train_error_points, test_error_points
예제 #5
0
from ID3 import id3
from InfoGain import Gain
from BuildFromTraining import loadFromFile

gain = {}

##################################################
print("Cargando ejemplos de entrenamiento...")
S = loadFromFile("training_examples.txt")
Attributes = list(S[0])
##################################################
###
##################################################
#print("Calculando ganancia de informacion...")

#for A in Attributes[:-1]:
#  gain[A] = Gain(S, A)
##################################################
print("Atributos: {}".format(Attributes))
id3(S, "", Attributes[:-1])
예제 #6
0
N = 5

one = True
two = True
three = True

if one:
    print("=====================================")
    print("========== Experiment 3.2.1 =========")
    print("=====================================")
    print("")
    trees = []
    for i in range(N):
        e = ensemble_data(hand_train_data, hand_train_label)
        random.shuffle(hand_train_attr)
        t = id3(e['d'], e['l'], hand_train_attr[:8])
        trees.append(t)

    data_set = get_data(trees, hand_train_data)
    W_b = run_svm(hand_train_label, data_set, epochs, 2, 0.001)
    evaluation = evaluate_svm(hand_train_label, hand_train_data, W_b['W'],
                              W_b['b'])
    print("=========== Training Data ===========")
    print_eval(evaluation)
    evaluation = evaluate_svm(hand_test_label, hand_test_data, W_b['W'],
                              W_b['b'])
    print("============= Test Data =============")
    print_eval(evaluation)

if two:
    print("=====================================")
예제 #7
0
'''
Created on Sep 23, 2016

@author: Leland Stenquist
'''
from ID3 import scan, id3, print_tree, test_id3
from ID3_Helper import ensemble_data

r = scan("res/test/train.labels","res/test/train.data",50)
#r = scan("res/madelon/madelon_train.labels","res/madelon/madelon_train.data",25)
e = ensemble_data(r['d'],r['l'])
t = id3(e['d'],e['l'],r['a'],8)
print_tree(t)
test = test_id3(t,r['d'],r['l'],r['a'])
print(test)