Ejemplo n.º 1
0
def decision_spambase_set_no_libs():
    """
    Solution for HW1 prob 1
    """
    print('Homework 1 problem 1 - No Libraries - Regression Decision tree')
    print('Spambase Dataset')
    spam_data = utils.load_and_normalize_spam_data()
    test, train = utils.split_test_and_train(spam_data)
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))

    node = mytree.Node(np.ones(len(train)))
    branch_node(node, train, 5, 'is_spam')
    #node.show_children_tree()
    node.show_children_tree(follow=False)

    model = mytree.Tree(node)
    model.print_leaves()
    print 'Trained model error is : ' + str(model.error())

    node.presence = np.ones(len(test))
    test_node(node, test, 'is_spam')
    test_tree = mytree.Tree(node)
    prediction = test_tree.predict_obj()
    test_tree.print_leaves_test()
    print 'predict sum: ' + str(sum(prediction))
    print 'MSE:' + str(test_tree.error_test())

    [tp, tn, fp, fn] = mystats.get_performance_stats(test['is_spam'].as_matrix(), prediction)
    print 'TP: {}\tFP: {}\nTN: {}\tFN: {}'.format(tp, fp, tn, fn)
    print 'Accuracy: ' + str(mystats.compute_accuracy(tp,tn, fp,fn))
    print 'MSE: ' + str(mystats.compute_MSE_arrays(prediction, test['is_spam']))