예제 #1
0
                'sqft_lot',  
                'floors',
                'waterfront',  
                'view',  
                'condition',  
                'grade',  
                'sqft_above',  
                'sqft_basement',
                'yr_built',  
                'yr_renovated',  
                'lat',  
                'long',  
                'sqft_living15',  
                'sqft_lot15']
                
features_train, output_train = get_numpy_data(train, feature_list, 'price')
features_test, output_test = get_numpy_data(test, feature_list, 'price')
features_valid, output_valid = get_numpy_data(validation, feature_list, 'price')

features_train, norms = normalize_features(features_train) # normalize training set features (columns)
features_test = features_test / norms # normalize test set by training set norms
features_valid = features_valid / norms # normalize validation set by training set norms

print features_test[0]
print features_train[9]

dist1=np.sqrt(np.sum((features_test[0]-features_test[9])**2)) #dist1==0.058352853644336386


distances=[]
smallest_dist=1000000
fit2.intercept_
fit2.coef_[0][2]  # -71461.308292759204
rss2 = res_sum_squares(sales_train[model_2_features], sales_train["price"], reg2)  # training data
rss2b = res_sum_squares(sales_test[model_2_features], sales_test["price"], reg2)  # test data


# Model 3 coefficients and sum of squares residual
fit3.coef_
fit3.intercept_
rss3 = res_sum_squares(sales_train[model_3_features], sales_train["price"], reg3)  # training data
rss3b = res_sum_squares(sales_test[model_3_features], sales_test["price"], reg3)  # test data

# Model A
simple_features = ["sqft_living"]
my_output = "price"
(simple_feature_matrix, output) = get_numpy_data(sales_train, simple_features, my_output)
initial_weights = np.array([-47000.0, 1.0])
step_size = 7e-12
tolerance = 2.5e7

simple_weights = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)
# 281.91

(test_matrix1, test_output1) = get_numpy_data(sales_test, simple_features, my_output)

# predicting the sale price of the first house in the dataset using model A
np.dot(simple_weights, np.array([1, sales_test["sqft_living"][0]]))  # 356134.44325500238


# Residual sum of squares for model A
rss = res_sum_squares2(test_matrix1, test_output1, initial_weights, step_size, tolerance)  # 275395691278133.28
예제 #3
0
    'sqft_lot': int,
    'view': int
}

sales = pd.read_csv('kc_house_data_small.csv', dtype=dtype_dict)
train = pd.read_csv('kc_house_data_small_train.csv', dtype=dtype_dict)
validation = pd.read_csv('kc_house_data_validation.csv', dtype=dtype_dict)
test = pd.read_csv('kc_house_data_small_test.csv', dtype=dtype_dict)

feature_list = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
    'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built',
    'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15'
]

features_train, output_train = get_numpy_data(train, feature_list, 'price')
features_test, output_test = get_numpy_data(test, feature_list, 'price')
features_valid, output_valid = get_numpy_data(validation, feature_list,
                                              'price')

features_train, norms = normalize_features(
    features_train)  # normalize training set features (columns)
features_test = features_test / norms  # normalize test set by training set norms
features_valid = features_valid / norms  # normalize validation set by training set norms

print features_test[0]
print features_train[9]

dist1 = np.sqrt(np.sum(
    (features_test[0] - features_test[9])**2))  #dist1==0.058352853644336386
예제 #4
0
                       reg2)  # training data
rss2b = res_sum_squares(sales_test[model_2_features], sales_test['price'],
                        reg2)  # test data

#Model 3 coefficients and sum of squares residual
fit3.coef_
fit3.intercept_
rss3 = res_sum_squares(sales_train[model_3_features], sales_train['price'],
                       reg3)  # training data
rss3b = res_sum_squares(sales_test[model_3_features], sales_test['price'],
                        reg3)  # test data

#Model A
simple_features = ['sqft_living']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(sales_train, simple_features,
                                                 my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

simple_weights = regression_gradient_descent(simple_feature_matrix, output,
                                             initial_weights, step_size,
                                             tolerance)
#281.91

(test_matrix1, test_output1) = get_numpy_data(sales_test, simple_features,
                                              my_output)

#predicting the sale price of the first house in the dataset using model A
np.dot(simple_weights,
       np.array([1, sales_test['sqft_living'][0]]))  #356134.44325500238