예제 #1
0
def test_regression_line_housing_no_libs():
    """
    Testing 2 variable solution for HW1 prob 2
    """
    print('Testing linear regression with 2 columns')
    test, train = utils.load_and_normalize_housing_set()
    print str(len(train)) + " # in training set <--> # in test " + str(len(test))
    columns = train.columns[:-1]
    Y_fit = mystats.linear_regression_points(train[columns[0]], train['MEDV'])
    #for i, col in enumerate(columns):
    print 'Y_fit'
    print Y_fit
    for i in range(0, len(Y_fit)):
        print str(Y_fit[i]) + ' -- ' + str(train['MEDV'][i])
    print train[columns[0]]
    #myplot.points([train[columns[0]], train['MEDV']])

    #myplot.points([train[columns[0]], list(Y_fit[0])])
    myplot.fit_v_point([train[columns[0]], train['MEDV'], list(Y_fit[0] + Y_fit[-1])])
    col_MSE = {}
    print columns[0]
    i = 0
    col = 'CRIM'
    col_fit = Y_fit[i] + Y_fit[-1]
    col_MSE[col] = mystats.compute_MSE_arrays(col_fit, train['MEDV'])
    print col_MSE
def testScale():
    test, train = utils.load_and_normalize_housing_set()
    df_full = pd.DataFrame(train)
    df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10)
    w = []
    for i in range(0,len(df['TAX'])):
        w.append(random.random())
    scaled = utils.scale(w, min(df['TAX']), max(df['TAX']))
    plot.fit_v_point([w, df['MEDV'], scaled])