def check_ID3(): attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}] data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]] numerical_splits_count = [5, 5] n = ID3(data_set, attribute_metadata, numerical_splits_count, 0) fails = 0; if n and n.label == 1: print "Passed 1" else: print "Failed 1" fails += 1 attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}] data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]] numerical_splits_count = [1, 1] n = ID3(data_set, attribute_metadata, numerical_splits_count, 5) if n and [n.classify(x) == x[0] for x in data_set] == [True, False, True, True, False, True, True, True, True, True, True]: print "Passed 2" else: print "Failed 2" fails += 1 attribute_metadata = [{'name': "winner",'is_nominal': True},{'name': "opprundifferential",'is_nominal': False}] data_set = [[1, 0.27], [0, 0.42], [0, 0.86], [0, 0.68], [0, 0.04], [1, 0.01], [1, 0.33], [1, 0.42], [1, 0.42], [0, 0.51], [1, 0.4]] numerical_splits_count = [5, 5] n = ID3(data_set, attribute_metadata, numerical_splits_count, 5) if n and [n.classify(x) == x[0] for x in data_set] == [True, False, True, True, True, True, True, True, True, True, True]: print "Passed 3" else: print "Failed 3" fails += 1 if fails > 0: print "not all tests passed, please see ID3." else: print "all tests passed."
def decision_tree_driver(train, validate = False, predict = False, prune = False, limit_splits_on_numerical = False, limit_depth = False, print_tree = False, print_dnf = False, learning_curve = False): train_set, attribute_metadata = parse(train, False) if limit_splits_on_numerical != False: numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata) else: numerical_splits_count = [float("inf")] * len(attribute_metadata) if limit_depth != False: depth = limit_depth else: depth = float("inf") print "###\n# Training Tree\n###" # call the ID3 classification algorithm with the appropriate options tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print '\n' # call reduced error pruning using the pruning set if prune != False: print '###\n# Pruning\n###' pruning_set, _ = parse(prune, False) n = Node() reduced_error_pruning(tree,train_set,pruning_set, 0, n) print '' # print tree visually if print_tree: print '###\n# Decision Tree\n###' cursor = open('./output/tree.txt','w+') cursor.write(tree.print_tree()) cursor.close() print 'Decision Tree written to /output/tree' print '' # print tree in disjunctive normalized form if print_dnf: print '###\n# Decision Tree as DNF\n###' cursor = open('./output/DNF.txt','w+') cursor.write(tree.print_dnf_tree()) cursor.close() print 'Decision Tree written to /output/DNF' print '' # test tree accuracy on validation set if validate != False: print '###\n# Validating\n###' validate_set, _ = parse(validate, False) accuracy = validation_accuracy(tree,validate_set) print "Accuracy on validation set: " + str(accuracy) print '' # generate predictions on the test set if predict != False: print '###\n# Generating Predictions on Test Set\n###' create_predictions(tree, predict) print '' # generate a learning curve using the validation set """if learning_curve and validate:
def decision_tree_driver(train, validate = False, predict = False, prune = False, limit_splits_on_numerical = False, limit_depth = False, print_tree = False, print_dnf = False, learning_curve = False): train_set, attribute_metadata = parse('D:/2016 Spring/349 Machine Learning/Problem Set 2/PS2.code/data/test_btrain.csv', False) train_set = handle_missing_value(train_set,attribute_metadata) if limit_splits_on_numerical != False: numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata) else: numerical_splits_count = [float("inf")] * len(attribute_metadata) if limit_depth != False: depth = limit_depth else: depth = float("inf") print "###\n# Training Tree\n###" # call the ID3 classification algorithm with the appropriate options tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print '\n' # call reduced error pruning using the pruning set # if prune != False: print '###\n# Pruning\n###' pruning_set, _ = parse('D:/2016 Spring/349 Machine Learning/Problem Set 2/PS2.code/data/test_bvalidate.csv', False) pruning_set = handle_missing_value(pruning_set,attribute_metadata) accuracy = validation_accuracy(tree,pruning_set) print(tree) print "Accuracy on validation set of original tree: " + str(accuracy) _ , newtree = reduced_error_pruning(tree,train_set,pruning_set) print '' # print tree visually # if print_tree: # print '###\n# Decision Tree\n###' # cursor = open('./output/tree.txt','w+') # cursor.write(tree.print_tree()) # cursor.close() # print 'Decision Tree written to /output/tree' # print '' # print tree in disjunctive normalized form # if print_dnf: # print '###\n# Decision Tree as DNF\n###' # cursor = open('./output/DNF.txt','w+') # cursor.write(tree.print_dnf_tree()) # cursor.close() # print 'Decision Tree written to /output/DNF' # print '' # test tree accuracy on validation set # if validate != False: print '###\n# Validating\n###' accuracy2 = validation_accuracy(newtree,pruning_set) print(newtree) print "Accuracy on validation set of new tree: " + str(accuracy2) print ''
def decision_tree_driver(train, validate=False, predict=False, new=False, prune=False, limit_splits_on_numerical=False, limit_depth=False, print_tree=False, print_dnf=False, learning_curve=False): train_set, attribute_metadata = parse(train, False) if limit_splits_on_numerical != False: numerical_splits_count = [limit_splits_on_numerical ] * len(attribute_metadata) else: numerical_splits_count = [float("inf")] * len(attribute_metadata) if limit_depth != False: depth = limit_depth else: depth = float("inf") origin_splits_count = copy.deepcopy(numerical_splits_count) print "###\n# Training Tree\n###" # call the ID3 classification algorithm with the appropriate options tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print 'finish' if validate != False: print '###\n# Validating\n###' validate_set, _ = parse(validate, False) accuracy = validation_accuracy( tree, validate_set, attribute_metadata) #add attribute_metadata print "Accuracy on validation set: " + str(accuracy) print '' # call reduced error pruning using the pruning set if prune != False: print '###\n# Pruning\n###' pruning_set, _ = parse(prune, False) temptree = copy.deepcopy(tree) temp_origintree = temptree origintree = tree reduced_error_pruning(temptree, temp_origintree, tree, origintree, train_set, pruning_set, attribute_metadata) print '' # print tree visually if print_tree: print '###\n# Decision Tree\n###' cursor = open('./output/tree.txt', 'w+') cursor.write(tree.print_tree()) cursor.close() print 'Decision Tree written to /output/tree' print '' # print tree in disjunctive normalized form if print_dnf: print '###\n# Decision Tree as DNF\n###' cursor = open('./output/DNF.txt', 'w+') cursor.write(tree.print_dnf_tree()) cursor.close() print 'Decision Tree written to /output/DNF' print '' # test tree accuracy on validation set if validate != False: print '###\n# Validating\n###' validate_set, _ = parse(validate, False) accuracy = validation_accuracy( tree, validate_set, attribute_metadata) #add attribute_metadata print "Accuracy on validation set: " + str(accuracy) print '' # generate predictions on the test set # if predict != False: # print '###\n# Generating Predictions on Test Set\n###' # create_predictions(tree, predict, new) #add new # print '' # generate a learning curve using the validation set if learning_curve and validate: print '###\n# Generating Learning Curve\n###' iterations = 3 # number of times to test each size get_graph(train_set, attribute_metadata, validate_set, origin_splits_count, depth, 3, 0, learning_curve['upper_bound'], learning_curve['increment']) print ''
def decision_tree_driver(train, validate = False, predict = False, prune = False, limit_splits_on_numerical = False, limit_depth = False, print_tree = False, print_dnf = False, learning_curve = False): train_set, attribute_metadata = parse(train, False) if limit_splits_on_numerical != False: numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata) else: numerical_splits_count = [float("inf")] * len(attribute_metadata) if limit_depth != False: depth = limit_depth else: depth = float("inf") print "###\n# Training Tree\n###" # call the ID3 classification algorithm with the appropriate options tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print '\n' print "Nodes before pruning: " + str(tree.num_nodes()) # # call reduced error pruning using the pruning set if prune != False: print '###\n# Pruning\n###' pruning_set, _ = parse(prune, False) reduced_error_pruning(tree,train_set,pruning_set) print '' print "Nodes after pruning: " + str(tree.num_nodes()) # print tree visually if print_tree: print '###\n# Decision Tree\n###' cursor = open('./output/tree.txt','w+') cursor.write(tree.print_tree()) cursor.close() print 'Decision Tree written to /output/tree' print '' # print tree in disjunctive normalized form if print_dnf: print '###\n# Decision Tree as DNF\n###' cursor = open('./output/DNF.txt','w+') print tree.print_dnf_tree() # cursor.write(final) # cursor.close() print 'Decision Tree written to /output/DNF' print '' # test tree accuracy on validation set if validate != False: print '###\n# Validating\n###' train_set, _ = parse(train, False) accuracy = validation_accuracy(tree, train_set) print "Accuracy on training set: " + str(accuracy) validate_set, _ = parse(validate, False) accuracy = validation_accuracy(tree,validate_set) print "Accuracy on validation set: " + str(accuracy) print '' # generate predictions on the test set if predict != False: print '###\n# Generating Predictions on Test Set\n###' create_predictions(tree, predict) print '' # generate a learning curve using the validation set if learning_curve and validate: print '###\n# Generating Learning Curve\n###' iterations = 2 # number of times to test each size print get_graph_data(train_set, attribute_metadata, validate_set, numerical_splits_count, iterations, [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]) # get_graph(train_set, attribute_metadata, validate_set, # numerical_splits_count, depth, 5, 0, learning_curve['upper_bound'], # learning_curve['increment']) print ''
from modules.predictions import * from modules.pickled import * from modules.parse import * from modules.node import * import matplotlib.pyplot as plt # Import training and validation datasets TRAINING_SET_PATH = 'data/btrain.csv' VALIDATION_SET_PATH = 'data/bvalidate.csv' TEST_SET_PATH = 'data/btest.csv' data, attr = parse(TRAINING_SET_PATH, True) validate_data, validate_attr = parse(VALIDATION_SET_PATH, True) # Train initial tree and print # Current best values for accuracy are below tree = ID3(data, attr, 14*[3], 5) print 'Question 2' print 'DNF form of initial tree trained on ' + TRAINING_SET_PATH + ':' tree.print_dnf_tree() print '\r\n' # Prune initial tree and print new tree pruned_tree = reduced_error_pruning(tree,data,validate_data) print 'Question 5' print 'DNF form of reduced-error pruned tree:' pruned_tree.print_dnf_tree() print '\r\n' # Calculate validation accuracy of initial tree and print print 'Question 7' print 'Initial tree validation accuracy:'
def decision_tree_driver(train, validate=False, predict=False, prune=False, limit_splits_on_numerical=False, limit_depth=False, print_tree=False, print_dnf=False, learning_curve=False): train_set, attribute_metadata = parse(train, False) if limit_splits_on_numerical != False: numerical_splits_count = [limit_splits_on_numerical] * len(attribute_metadata) else: numerical_splits_count = [float("inf")] * len(attribute_metadata) if limit_depth != False: depth = limit_depth else: depth = float("inf") print "###\n# Training Tree\n###" # call the ID3 classification algorithm with the appropriate options tree = ID3(train_set, attribute_metadata, numerical_splits_count, depth) print '\n' # print tree visually if print_tree: print '###\n# Decision Tree\n###' cursor = open('./output/tree.txt', 'w+') cursor.write(tree.print_tree()) cursor.close() print 'Decision Tree written to /output/tree' print '' # print tree in disjunctive normalized form if print_dnf: print '###\n# Decision Tree as DNF\n###' cursor = open('./output/DNF.txt', 'w+') cursor.write(tree.print_dnf_tree()) cursor.close() print 'Decision Tree written to /output/DNF' print '' # test tree accuracy on validation set if validate != False: print '###\n# Validating\n###' validate_set, _ = parse(validate, False) accuracy = validation_accuracy(tree, validate_set) print "Accuracy on training set: " + str(validation_accuracy(tree, train_set)) print "Accuracy on validation set: " + str(accuracy) print '' # call reduced error pruning using the pruning set if prune != False: print '###\n# Pruning\n###' pruning_set, _ = parse(prune, False) reduced_error_pruning(tree, pruning_set) print '' # print tree visually if print_tree: print '###\n# Decision Tree\n###' cursor = open('./output/prune_tree.txt', 'w+') cursor.write(tree.print_tree()) cursor.close() print 'Decision Tree written to /output/prune_tree' print '' # print tree in disjunctive normalized form if print_dnf: print '###\n# Decision Tree as DNF\n###' cursor = open('./output/prune_DNF.txt', 'w+') cursor.write(tree.print_dnf_tree()) cursor.close() print 'Decision Tree written to /output/prune_DNF' print '' # test tree accuracy on validation set if validate != False: print '###\n# Validating\n###' validate_set, _ = parse(validate, False) accuracy = validation_accuracy(tree, validate_set) print "Accuracy on training set: " + str(validation_accuracy(tree, train_set)) print "Accuracy on validation set: " + str(accuracy) print '' # generate predictions on the test set if predict != False: print '###\n# Generating Predictions on Test Set\n###' with open('./output/predictions.csv', 'w+') as cursor: writer = csv.writer(cursor) fieldnames = ['winner', ' winpercent', ' oppwinpercent', ' weather', ' temperature', ' numinjured', ' oppnuminjured', ' startingpitcher', ' oppstartingpitcher', ' dayssincegame', ' oppdayssincegame', ' homeaway', ' rundifferential', ' opprundifferential'] writer.writerow(fieldnames) writer.writerows(create_predictions(tree, predict)) print '' # generate a learning curve using the validation set if learning_curve and validate: print '###\n# Generating Learning Curve\n###' validate_set, _ = parse(validate, False) iterations = 5 # number of times to test each size get_graph(train_set, attribute_metadata, validate_set, numerical_splits_count, depth, iterations, 0, learning_curve['upper_bound'], learning_curve['increment']) print ''