def test_regression_line_housing_no_libs(): """ Testing 2 variable solution for HW1 prob 2 """ print('Testing linear regression with 2 columns') test, train = utils.load_and_normalize_housing_set() print str(len(train)) + " # in training set <--> # in test " + str(len(test)) columns = train.columns[:-1] Y_fit = mystats.linear_regression_points(train[columns[0]], train['MEDV']) #for i, col in enumerate(columns): print 'Y_fit' print Y_fit for i in range(0, len(Y_fit)): print str(Y_fit[i]) + ' -- ' + str(train['MEDV'][i]) print train[columns[0]] #myplot.points([train[columns[0]], train['MEDV']]) #myplot.points([train[columns[0]], list(Y_fit[0])]) myplot.fit_v_point([train[columns[0]], train['MEDV'], list(Y_fit[0] + Y_fit[-1])]) col_MSE = {} print columns[0] i = 0 col = 'CRIM' col_fit = Y_fit[i] + Y_fit[-1] col_MSE[col] = mystats.compute_MSE_arrays(col_fit, train['MEDV']) print col_MSE
def testScale(): test, train = utils.load_and_normalize_housing_set() df_full = pd.DataFrame(train) df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10) w = [] for i in range(0,len(df['TAX'])): w.append(random.random()) scaled = utils.scale(w, min(df['TAX']), max(df['TAX'])) plot.fit_v_point([w, df['MEDV'], scaled])