コード例 #1
0
def decision_spambase_set_no_libs():
    """
    Solution for HW1 prob 1
    """
    print('Homework 1 problem 1 - No Libraries - Regression Decision tree')
    print('Spambase Dataset')
    spam_data = utils.load_and_normalize_spam_data()
    test, train = utils.split_test_and_train(spam_data)
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))

    node = mytree.Node(np.ones(len(train)))
    branch_node(node, train, 5, 'is_spam')
    #node.show_children_tree()
    node.show_children_tree(follow=False)

    model = mytree.Tree(node)
    model.print_leaves()
    print 'Trained model error is : ' + str(model.error())

    node.presence = np.ones(len(test))
    test_node(node, test, 'is_spam')
    test_tree = mytree.Tree(node)
    prediction = test_tree.predict_obj()
    test_tree.print_leaves_test()
    print 'predict sum: ' + str(sum(prediction))
    print 'MSE:' + str(test_tree.error_test())

    [tp, tn, fp, fn] = mystats.get_performance_stats(test['is_spam'].as_matrix(), prediction)
    print 'TP: {}\tFP: {}\nTN: {}\tFN: {}'.format(tp, fp, tn, fn)
    print 'Accuracy: ' + str(mystats.compute_accuracy(tp,tn, fp,fn))
    print 'MSE: ' + str(mystats.compute_MSE_arrays(prediction, test['is_spam']))
コード例 #2
0
def regression_line_spam_no_libs():
    """
    Solution for HW1 prob 2
    """
    print('Homework 1 problem 2 - No Libraries - Regression Line')
    print('Spam Dataset')
    spam_data = utils.load_and_normalize_spam_data()
    test, train = utils.split_test_and_train(spam_data)
    columns = train.columns[:-1]
    Y_fit = mystats.linear_regression_points(train[columns], train['is_spam'])

    #print 'Y_fit'
    #print Y_fit
    #for i in range(0, len(Y_fit)):
    #    print str(Y_fit[i]) + ' -- ' + str(train['is_spam'][i])

    col_MSE = {}
    for i, col in enumerate(columns):
        col_fit = Y_fit[i] + Y_fit[-1]
        col_MSE[col] = mystats.compute_MSE_arrays(col_fit, train['is_spam'])
    print col_MSE
    RMSE = np.sqrt(col_MSE.values())
    average_MSE = utils.average(col_MSE.values())
    average_RMSE = utils.average(RMSE)
    print 'Average MSE: ' + str(average_MSE)
    print 'Average RMSE: ' + str(average_RMSE)
コード例 #3
0
def test_regression_line_housing_no_libs():
    """
    Testing 2 variable solution for HW1 prob 2
    """
    print('Testing linear regression with 2 columns')
    test, train = utils.load_and_normalize_housing_set()
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    columns = train.columns[:-1]
    Y_fit = mystats.linear_regression_points(train[columns[0]], train['MEDV'])
    #for i, col in enumerate(columns):
    print 'Y_fit'
    print Y_fit
    for i in range(0, len(Y_fit)):
        print str(Y_fit[i]) + ' -- ' + str(train['MEDV'][i])
    print train[columns[0]]
    #myplot.points([train[columns[0]], train['MEDV']])

    #myplot.points([train[columns[0]], list(Y_fit[0])])
    myplot.fit_v_point([train[columns[0]], train['MEDV'], list(Y_fit[0] + Y_fit[-1])])
    col_MSE = {}
    print columns[0]
    i = 0
    col = 'CRIM'
    col_fit = Y_fit[i] + Y_fit[-1]
    col_MSE[col] = mystats.compute_MSE_arrays(col_fit, train['MEDV'])
    print col_MSE
コード例 #4
0
def decision_housing_set_no_libs():
    """
    Solution for HW1 prob 1
    """
    print('Homework 1 problem 1 - No Libraries - Regression Decision tree')
    print('Housing Dataset')
    test, train = utils.load_and_normalize_housing_set()

    # The following 2 lines are for debugging
    #train = utils.train_subset(train, ['ZN','CRIM', 'TAX', 'DIS', 'MEDV'], n=50)
    #test = utils.train_subset(test, ['ZN', 'CRIM', 'TAX', 'DIS', 'MEDV'], n=3)

    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    node = mytree.Node(np.ones(len(train)))
    branch_node(node, train, 2, 'MEDV', regression=True)
    #node.show_children_tree()
    node.show_children_tree(follow=False)

    model = mytree.Tree(node)
    model.print_leaves()
    model.print_tree(train)
    print 'Trained model error is : ' + str(model.error())
    train_prediction = model.predict_obj()
    print 'Training MSE is: ' + str(mystats.compute_MSE_arrays(train_prediction, train['MEDV']))
    sys.exit()

    node.presence = np.ones(len(test))
    test_node(node, test, 'MEDV', regression=True)
    test_tree = mytree.Tree(node)
    prediction = test_tree.predict_obj()
    #raw_input()
    print 'predict sum: ' + str(sum(prediction))
    test_tree.print_leaves_test()
    print 'ERROR: ' + str(test_tree.error_test())
    print prediction
    print 'train'
    print train['MEDV']
    print 'test'
    print test['MEDV']
    MSE = mystats.compute_MSE_arrays(prediction, test['MEDV'])
    print 'MSE: ' + str(MSE)
    print 'RMSE: ' + str(np.sqrt(MSE))

    test_tree.print_tree(test, long=False)