def testHW2_subset(): # Success test, train = utils.load_and_normalize_housing_set() df_full = pd.DataFrame(train) df_test = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10) df_train = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10) dfX_test = pd.DataFrame([df_test['CRIM'], df_test['TAX'], df_test['MEDV']]).transpose() dfX_train = pd.DataFrame([df_train['CRIM'], df_train['TAX'], df_train['MEDV']]).transpose() print hw2.linear_gd(dfX_train, dfX_test, 'MEDV')
def testHW2_allcols(): # Fail test, train = utils.load_and_normalize_housing_set() df_full = pd.DataFrame(train) cols = [col for col in df_full.columns if col != 'MEDV'] df_test = utils.train_subset(df_full, cols, n=10) df_train = utils.train_subset(df_full, cols, n=10) #dfX_test = pd.DataFrame([df_test['CRIM'], df_test['TAX'], df_test['MEDV']]).transpose() #dfX_train = pd.DataFrame([df_train['CRIM'], df_train['TAX'], df_train['MEDV']]).transpose() print hw2.linear_gd(df_train, df_test, 'MEDV')
def testPdToDict(): df = hw3.load_and_normalize_spambase() cols = df.columns[0:3] sub = utils.train_subset(df, cols, 5) print sub print hw3.pandas_to_data(sub)
def testTransposeArray(): dfup = hw3.load_and_normalize_spambase() cols = dfup.columns[0:3] sub = utils.train_subset(dfup, cols, 5) up = hw3.pandas_to_data(sub) print up trans = hw3.transpose_array(up) print trans
def testScale(): test, train = utils.load_and_normalize_housing_set() df_full = pd.DataFrame(train) df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=10) w = [] for i in range(0,len(df['TAX'])): w.append(random.random()) scaled = utils.scale(w, min(df['TAX']), max(df['TAX'])) plot.fit_v_point([w, df['MEDV'], scaled])
def testGradient_by_columns(df, cols): # fail df = utils.train_subset(df, cols, n=len(df)) #dfX = pd.DataFrame([df['CRIM'], df['TAX']]).transpose() print len(df) print df #raw_input() fit = gd.gradient(df, df['MEDV'].head(len(df)), .00001, max_iterations=5000) print 'read v fit' print len(df) print df['MEDV'].head(10) print fit print np.dot(df, fit)
def testGradient(): # Great success with subset test, train = utils.load_and_normalize_housing_set() df_full = pd.DataFrame(train) subset_size = 100 df = utils.train_subset(df_full, ['CRIM', 'TAX', 'B', 'MEDV'], n=subset_size) dfX = pd.DataFrame([df['CRIM'], df['TAX']]).transpose() print len(dfX) print dfX #raw_input() fit = gd.gradient(dfX, df['MEDV'].head(subset_size), .5, max_iterations=300) print 'read v fit' print len(dfX) print df['MEDV'].head(10) print fit data = gd.add_col(gd.pandas_to_data(dfX), 1) print np.dot(data, fit)