コード例 #1
0
def test_regression_line_housing_no_libs():
    """
    Testing 2 variable solution for HW1 prob 2
    """
    print('Testing linear regression with 2 columns')
    test, train = utils.load_and_normalize_housing_set()
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    columns = train.columns[:-1]
    Y_fit = mystats.linear_regression_points(train[columns[0]], train['MEDV'])
    #for i, col in enumerate(columns):
    print 'Y_fit'
    print Y_fit
    for i in range(0, len(Y_fit)):
        print str(Y_fit[i]) + ' -- ' + str(train['MEDV'][i])
    print train[columns[0]]
    #myplot.points([train[columns[0]], train['MEDV']])

    #myplot.points([train[columns[0]], list(Y_fit[0])])
    myplot.fit_v_point([train[columns[0]], train['MEDV'], list(Y_fit[0] + Y_fit[-1])])
    col_MSE = {}
    print columns[0]
    i = 0
    col = 'CRIM'
    col_fit = Y_fit[i] + Y_fit[-1]
    col_MSE[col] = mystats.compute_MSE_arrays(col_fit, train['MEDV'])
    print col_MSE
コード例 #2
0
def testHW2():  # Success
    test, train = utils.load_and_normalize_housing_set()
    df_train = pd.DataFrame(train)
    df_test = pd.DataFrame(test)
    print df_train.head(10)
    #raw_input()
    print hw2.linear_gd(df_train, df_test, 'MEDV')
コード例 #3
0
def regression_line_housing_no_libs():
    """
    Solution for HW1 prob 2
    """
    print('Homework 1 problem 2 - No Libraries - Regression Line')
    print('Housing Dataset')
    test, train = utils.load_and_normalize_housing_set()
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    columns = train.columns[:-1]
    Y_fit = mystats.linear_regression_points(train[columns], train['MEDV'])
    print 'Y_fit'
    print Y_fit
    #for i in range(0, len(Y_fit)):
    #    print str(Y_fit[i]) + ' -- ' + str(train['MEDV'][i])

    row_sums = np.zeros(len(Y_fit[0]))
    for col in Y_fit:
        for i in range(0, len(col)):
            row_sums[i] += col[i]

    print row_sums

    col_MSE = {}
    for i, col in enumerate(columns):
        col_fit = row_sums[i]  # Y_fit[i] + Y_fit[-1]
        col_MSE[col] = mystats.compute_MSE(col_fit, train['MEDV'])
    print col_MSE
    RMSE = np.sqrt(col_MSE.values())
    average_MSE = utils.average(col_MSE.values())
    average_RMSE = utils.average(RMSE)
    print 'Average MSE: ' + str(average_MSE)
    print 'Average RMSE: ' + str(average_RMSE)
コード例 #4
0
def testHW2_subset(): # Success
    test, train = utils.load_and_normalize_housing_set()
    df_full = pd.DataFrame(train)
    df_test = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10)
    df_train = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10)
    dfX_test = pd.DataFrame([df_test['CRIM'], df_test['TAX'], df_test['MEDV']]).transpose()
    dfX_train = pd.DataFrame([df_train['CRIM'], df_train['TAX'], df_train['MEDV']]).transpose()
    print hw2.linear_gd(dfX_train, dfX_test, 'MEDV')
コード例 #5
0
def testHW2_allcols():  # Fail
    test, train = utils.load_and_normalize_housing_set()
    df_full = pd.DataFrame(train)
    cols = [col for col in df_full.columns if col != 'MEDV']
    df_test = utils.train_subset(df_full, cols, n=10)
    df_train = utils.train_subset(df_full, cols, n=10)
    #dfX_test = pd.DataFrame([df_test['CRIM'], df_test['TAX'], df_test['MEDV']]).transpose()
    #dfX_train = pd.DataFrame([df_train['CRIM'], df_train['TAX'], df_train['MEDV']]).transpose()
    print hw2.linear_gd(df_train, df_test, 'MEDV')
コード例 #6
0
def testScale():
    test, train = utils.load_and_normalize_housing_set()
    df_full = pd.DataFrame(train)
    df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10)
    w = []
    for i in range(0,len(df['TAX'])):
        w.append(random.random())
    scaled = utils.scale(w, min(df['TAX']), max(df['TAX']))
    plot.fit_v_point([w, df['MEDV'], scaled])
コード例 #7
0
def regression_housing_set():
    """
    Solution for HW1 prob 1
    """
    print('Homework 1 problem 1 - Regression Decision tree')
    print('Housing Dataset')
    test, train = utils.load_and_normalize_housing_set()
    dt_reg = train_regression_tree(train)
    predicted = test_regression_tree(dt_reg, test)
    error = mystats.calculate_chisq_error(predicted, test['MEDV'])
    print 'Error: ' + str(error)
コード例 #8
0
def testGradientByColumn():
    test, train = utils.load_and_normalize_housing_set()
    blacklist = ['NOX', 'RM']
    df_full = pd.DataFrame(train)
    for i in range(2, len(df_full.columns) - 1):
        cols = []
        for j in range(1, i):
            if df_full.columns[j] not in blacklist:
                cols.append(df_full.columns[j])
        cols.append('MEDV')
        print cols
        raw_input()
        testGradient_by_columns(df_full, cols)
コード例 #9
0
def q7():
    h_test, h_train = utils.load_and_normalize_housing_set()
    housingData_test = hw3.pandas_to_data(h_test)
    housingData_train = hw3.pandas_to_data(h_train)
    y, X = hw4.split_truth_from_data(housingData_train)
    y_test, X_test = hw4.split_truth_from_data(housingData_test)
    #gb = GradientBoostingRegressor(learning_rate=.1, n_estimators=1, max_depth=1)
    gb = gradb.GradientBoostRegressor(learning_rate=.1, n_estimators=100, max_depth=1, learner=lambda: DecisionTreeRegressor(max_depth=1))
    gb.fit(X, y)
    gb.print_stats()
    yhat = gb.predict(X_test)
    print y_test[:10]
    print yhat[:10]
    print 'MSE: {}'.format(hw4.compute_mse(y_test, yhat))
コード例 #10
0
def do2A():
    """
    HW 2A
    Train linear regression using gradient descent on spambase and housing data
    """
    print('HW2 A. Gradient descent with housing and spam data sets')
    num_iters = 50
    learning_param = 0.25
    housingData_test, housingData_train = utils.load_and_normalize_housing_set()
    theta, error_matrix = gradient_descent(housingData_test, 'MEDV', num_iters, learning_param)
    print('Errors for housing set')
    print error_matrix
    print('theta for housing set')
    print theta
コード例 #11
0
def q_1():
    h_test, h_train = utils.load_and_normalize_housing_set()
    h_results = []
    s_results = []
    # h_results.append(dec_or_reg_tree(h_train, h_test, 'MEDV')) # MSE - 568 test- 448
    # h_results.append(linear_reg_errors(h_train, h_test, 'MEDV')) # MSE - 27 test -14
    # h_results.append(linear_reg_errors(h_train, h_test, 'MEDV', True)) # 24176 - 68289
    # h_results.append(linear_gd(h_train, h_test, 'MEDV')) # works but MSE too low? .0022 - .0013
    # h_results.append(logistic_gd(h_train, h_test, 'MEDV'))  # 1.46e_13 - 1.17e+13

    s_test, s_train = utils.split_test_and_train(utils.load_and_normalize_spam_data())
    s_results.append(dec_or_reg_tree(s_train, s_test, "is_spam"))  # works .845 - .86
    s_results.append(linear_reg_errors(s_train, s_test, "is_spam"))  # works .8609 - .903
    s_results.append(linear_reg_errors(s_train, s_test, "is_spam", True))  # works .8416 - .8543
    s_results.append(k_folds_linear_gd(s_train, s_test, "is_spam"))  # does not work .6114 - .6114
    s_results.append(logistic_gd(s_train, s_test, "is_spam"))  # returns perfect... 1- 1
    print_results_1(s_results, h_results)
コード例 #12
0
def testGradient():  # Great success with subset
    test, train = utils.load_and_normalize_housing_set()
    df_full = pd.DataFrame(train)
    subset_size = 100
    df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=subset_size)
    dfX = pd.DataFrame([df['CRIM'], df['TAX']]).transpose()
    print len(dfX)
    print dfX
    #raw_input()

    fit = gd.gradient(dfX, df['MEDV'].head(subset_size), .5, max_iterations=300)

    print 'read v fit'
    print len(dfX)
    print df['MEDV'].head(10)
    print fit
    data = gd.add_col(gd.pandas_to_data(dfX), 1)
    print np.dot(data, fit)
コード例 #13
0
def decision_housing_set_no_libs():
    """
    Solution for HW1 prob 1
    """
    print('Homework 1 problem 1 - No Libraries - Regression Decision tree')
    print('Housing Dataset')
    test, train = utils.load_and_normalize_housing_set()

    # The following 2 lines are for debugging
    #train = utils.train_subset(train, ['ZN','CRIM', 'TAX', 'DIS', 'MEDV'], n=50)
    #test = utils.train_subset(test, ['ZN', 'CRIM', 'TAX', 'DIS', 'MEDV'], n=3)

    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    node = mytree.Node(np.ones(len(train)))
    branch_node(node, train, 2, 'MEDV', regression=True)
    #node.show_children_tree()
    node.show_children_tree(follow=False)

    model = mytree.Tree(node)
    model.print_leaves()
    model.print_tree(train)
    print 'Trained model error is : ' + str(model.error())
    train_prediction = model.predict_obj()
    print 'Training MSE is: ' + str(mystats.compute_MSE_arrays(train_prediction, train['MEDV']))
    sys.exit()

    node.presence = np.ones(len(test))
    test_node(node, test, 'MEDV', regression=True)
    test_tree = mytree.Tree(node)
    prediction = test_tree.predict_obj()
    #raw_input()
    print 'predict sum: ' + str(sum(prediction))
    test_tree.print_leaves_test()
    print 'ERROR: ' + str(test_tree.error_test())
    print prediction
    print 'train'
    print train['MEDV']
    print 'test'
    print test['MEDV']
    MSE = mystats.compute_MSE_arrays(prediction, test['MEDV'])
    print 'MSE: ' + str(MSE)
    print 'RMSE: ' + str(np.sqrt(MSE))

    test_tree.print_tree(test, long=False)
コード例 #14
0
def testLinRidge():
    h_test, h_train = utils.load_and_normalize_housing_set()
    #print hw2.linear_reg_errors(h_train, h_test, 'MEDV', True)
    print hw2.linear_reg(h_train, 'MEDV', False, False)
コード例 #15
0
def do2B():
    hd_test, hd_train = utils.load_and_normalize_housing_set()
    logistic_regression(hd_train, hd_test, 'MEDV')