'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15'] features_train, output_train = get_numpy_data(train, feature_list, 'price') features_test, output_test = get_numpy_data(test, feature_list, 'price') features_valid, output_valid = get_numpy_data(validation, feature_list, 'price') features_train, norms = normalize_features(features_train) # normalize training set features (columns) features_test = features_test / norms # normalize test set by training set norms features_valid = features_valid / norms # normalize validation set by training set norms print features_test[0] print features_train[9] dist1=np.sqrt(np.sum((features_test[0]-features_test[9])**2)) #dist1==0.058352853644336386 distances=[] smallest_dist=1000000
fit2.intercept_ fit2.coef_[0][2] # -71461.308292759204 rss2 = res_sum_squares(sales_train[model_2_features], sales_train["price"], reg2) # training data rss2b = res_sum_squares(sales_test[model_2_features], sales_test["price"], reg2) # test data # Model 3 coefficients and sum of squares residual fit3.coef_ fit3.intercept_ rss3 = res_sum_squares(sales_train[model_3_features], sales_train["price"], reg3) # training data rss3b = res_sum_squares(sales_test[model_3_features], sales_test["price"], reg3) # test data # Model A simple_features = ["sqft_living"] my_output = "price" (simple_feature_matrix, output) = get_numpy_data(sales_train, simple_features, my_output) initial_weights = np.array([-47000.0, 1.0]) step_size = 7e-12 tolerance = 2.5e7 simple_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance) # 281.91 (test_matrix1, test_output1) = get_numpy_data(sales_test, simple_features, my_output) # predicting the sale price of the first house in the dataset using model A np.dot(simple_weights, np.array([1, sales_test["sqft_living"][0]])) # 356134.44325500238 # Residual sum of squares for model A rss = res_sum_squares2(test_matrix1, test_output1, initial_weights, step_size, tolerance) # 275395691278133.28
'sqft_lot': int, 'view': int } sales = pd.read_csv('kc_house_data_small.csv', dtype=dtype_dict) train = pd.read_csv('kc_house_data_small_train.csv', dtype=dtype_dict) validation = pd.read_csv('kc_house_data_validation.csv', dtype=dtype_dict) test = pd.read_csv('kc_house_data_small_test.csv', dtype=dtype_dict) feature_list = [ 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15' ] features_train, output_train = get_numpy_data(train, feature_list, 'price') features_test, output_test = get_numpy_data(test, feature_list, 'price') features_valid, output_valid = get_numpy_data(validation, feature_list, 'price') features_train, norms = normalize_features( features_train) # normalize training set features (columns) features_test = features_test / norms # normalize test set by training set norms features_valid = features_valid / norms # normalize validation set by training set norms print features_test[0] print features_train[9] dist1 = np.sqrt(np.sum( (features_test[0] - features_test[9])**2)) #dist1==0.058352853644336386
reg2) # training data rss2b = res_sum_squares(sales_test[model_2_features], sales_test['price'], reg2) # test data #Model 3 coefficients and sum of squares residual fit3.coef_ fit3.intercept_ rss3 = res_sum_squares(sales_train[model_3_features], sales_train['price'], reg3) # training data rss3b = res_sum_squares(sales_test[model_3_features], sales_test['price'], reg3) # test data #Model A simple_features = ['sqft_living'] my_output = 'price' (simple_feature_matrix, output) = get_numpy_data(sales_train, simple_features, my_output) initial_weights = np.array([-47000., 1.]) step_size = 7e-12 tolerance = 2.5e7 simple_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance) #281.91 (test_matrix1, test_output1) = get_numpy_data(sales_test, simple_features, my_output) #predicting the sale price of the first house in the dataset using model A np.dot(simple_weights, np.array([1, sales_test['sqft_living'][0]])) #356134.44325500238