def test_feature_derivative_ridge_002(self):
        (example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price') 
        my_weights = np.array([1., 10.])
        test_predictions = predict_output(example_features, my_weights) 
        errors = test_predictions - example_output # prediction errors

        a = feature_derivative_ridge(errors, example_features[:,0], my_weights[0], 1, True)
        b = np.sum(errors)*2.
        self.assertEqual(b, a)
    def test_ridge_regression_gradient_descent_004(self):
        model_features = ['sqft_living', 'sqft_living15'] # sqft_living15 is the average squarefeet for the nearest 15 neighbors. 
        my_output = 'price'
        train_data,test_data = sales.random_split(.8,seed=0)

        (feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
        (test_feature_matrix, test_output) = get_numpy_data(test_data, model_features, my_output)
        
        initial_weights = np.array([0.0,0.0,0.0])
        step_size = 1e-12
        max_iterations = 1000
        
        l2_penalty = 1e11
        multiple_weights_high_penalty = ridge_regression_gradient_descent(feature_matrix,
                                                                       output,
                                                                       initial_weights,
                                                                       step_size,
                                                                       l2_penalty,
                                                                       max_iterations)
        a = np.array([6.7429658,91.48927361,78.43658768])
        self.assertEqual(str(a), str(multiple_weights_high_penalty))
    def test_ridge_regression_gradient_descent_003(self):
        model_features = ['sqft_living', 'sqft_living15'] # sqft_living15 is the average squarefeet for the nearest 15 neighbors. 
        my_output = 'price'
        train_data,test_data = sales.random_split(.8,seed=0)

        (feature_matrix, output) = get_numpy_data(train_data, model_features, my_output)
        (test_feature_matrix, test_output) = get_numpy_data(test_data, model_features, my_output)
        
        initial_weights = np.array([0.0,0.0,0.0])
        step_size = 1e-12
        max_iterations = 1000
        
        l2_penalty = 0.
        multiple_weights_0_penalty = ridge_regression_gradient_descent(feature_matrix,
                                                                       output,
                                                                       initial_weights,
                                                                       step_size,
                                                                       l2_penalty,
                                                                       max_iterations)
        a = np.array([-0.35743482,243.0541689,22.41481594])
        self.assertEqual(str(a), str(multiple_weights_0_penalty))
    def test_ridge_regression_gradient_descent_002(self):
        simple_features = ['sqft_living']
        my_output = 'price'
        train_data,test_data = sales.random_split(.8,seed=0)

        (simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
        (simple_test_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

        initial_weights = np.array([0., 0.])
        step_size = 1e-12
        max_iterations = 1000

        l2_penalty = 1e11
        simple_weights_high_penalty = ridge_regression_gradient_descent(simple_feature_matrix,
                                                                        output,
                                                                        initial_weights,
                                                                        step_size,
                                                                        l2_penalty,
                                                                        max_iterations)
        a = np.array([9.76730383,124.57217565])
        self.assertEqual(str(a), str(simple_weights_high_penalty))
    def test_ridge_regression_gradient_descent_001(self):
        simple_features = ['sqft_living']
        my_output = 'price'
        train_data,test_data = sales.random_split(.8,seed=0)

        (simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
        (simple_test_feature_matrix, test_output) = get_numpy_data(test_data, simple_features, my_output)

        initial_weights = np.array([0., 0.])
        step_size = 1e-12
        max_iterations = 1000

        l2_penalty = 0.
        simple_weights_0_penalty = ridge_regression_gradient_descent(simple_feature_matrix,
                                                                     output,
                                                                     initial_weights,
                                                                     step_size,
                                                                     l2_penalty,
                                                                     max_iterations)
        a = np.array([-1.63113501e-01,2.63024369e+02])
        self.assertEqual(str(a), str(simple_weights_0_penalty))
import graphlab as gl
import numpy as np
from regression import get_numpy_data
from regression import predict_output
from regression import regression_gradient_descent

sales = gl.SFrame('kc_house_data.gl/')

# train and test

#Q1 & Q2
train_data,test_data = sales.random_split(.8,seed=0)
simple_features = ['sqft_living']
my_output= 'price'
(simple_feature_matrix, output) = get_numpy_data(train_data, simple_features, my_output)
initial_weights = np.array([-47000., 1.])
step_size = 7e-12
tolerance = 2.5e7

weights1 = regression_gradient_descent(simple_feature_matrix, output, initial_weights, step_size, tolerance)

(feature_matrix1, output) = get_numpy_data(test_data, simple_features, my_output)
predict1 = predict_output(feature_matrix1,weights1)

#Q1
print "Q1: What is the value of the weight for sqft_living", weights1[1]

#Q2
print "Q2: What is the predicted price for the 1st house in the Test data set for model 1", predict1[0]
Exemple #7
0
                'sqft_living',
                'sqft_lot',
                'floors',
                'waterfront',
                'view',
                'condition',
                'grade',
                'sqft_above',
                'sqft_basement',
                'yr_built',
                'yr_renovated',
                'lat',
                'long',
                'sqft_living15',
                'sqft_lot15']
features_train, output_train = get_numpy_data(train, feature_list, 'price')
features_test, output_test = get_numpy_data(test, feature_list, 'price')
features_valid, output_valid = get_numpy_data(validation, feature_list, 'price')

features_train, norms = normalize_features(features_train) # normalize training set features (columns)
features_test = features_test / norms # normalize test set by training set norms
features_valid = features_valid / norms # normalize validation set by training set norms

query = features_test[0]
print "1st row of teat features: ", query
comp = features_train[9]
print "10th row of training features: ", comp

distance = np.sqrt(np.sum((query-comp)**2))
print "*** QUIZ QUESTION ***"
print "Euclidean distance between the query house and the 10th house of the training set: ", distance
Exemple #8
0
# so we'll convert them to int, before using it below
sales['floors'] = sales['floors'].astype(int) 

# ---------------------------------------
# Normalize features
# ---------------------------------------
print("*** Normalize features")

# ---------------------------------------
# Implementing Coordinate Descent with normalized features
# ---------------------------------------
print("*** Implementing Coordinate Descent with normalized features")

simple_features = ['sqft_living', 'bedrooms']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(sales, simple_features, my_output)
simple_feature_matrix, norms = normalize_features(simple_feature_matrix)

weights = np.array([1., 4., 1.])
weights

prediction = predict_output(simple_feature_matrix, weights)
prediction

w = weights
# need to normalize output here?
ro = {}
for i in range(0,len(w)):
    feature_i = simple_feature_matrix[:,i]
    tmp = feature_i * (output - prediction + w[i]*feature_i)
    print tmp
Exemple #9
0
sys.path.append("..")

import sys
import graphlab

sales = graphlab.SFrame('kc_house_data.gl/')

import numpy as np  # note this allows us to refer to numpy as np instead

import unittest
from regression import get_numpy_data
from regression import predict_output
from regression import feature_derivative
from regression import regression_gradient_descent

(example_features, example_output) = get_numpy_data(sales, ['sqft_living'],
                                                    'price')
# the [] around 'sqft_living' makes it a list

print "example_features"
print len(example_features)
print example_features[0:3, :]
# this accesses the first row of the data the ':' indicates 'all columns'
print "example_output"
print len(example_output)
print example_output[0]
# and the corresponding output

# Predicting output given regression weights
print("*** Predicting output given regression weights")

my_weights = np.array([1., 1.])  # the example weights
print features
print "Should print: \n[[ 0.6  0.6  0.6]\n  [ 0.8  0.8  0.8]]"# should print
# [[ 0.6  0.6  0.6]
#  [ 0.8  0.8  0.8]]
print norms
print "Should print: \n[5.  10.  15.]"
# should print
# [5.  10.  15.]

print "==== Implementing Coordinate Descent with normalized features ===="

print "=== Effect of L1 penalty ==="

simple_features = ['sqft_living', 'bedrooms']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(sales, simple_features, my_output)
simple_feature_matrix, norms = normalize_features(simple_feature_matrix)

weights = np.array([1., 4., 1.])

prediction = predict_output(simple_feature_matrix, weights)

ro = {}
for i in range(len(weights)):
    feature_i = simple_feature_matrix[:,i]
    tmp = feature_i * (output - prediction + weights[i]*feature_i)
    ro[i] = tmp.sum()
    print "ro[", i, "] is:", ro[i]
print "***** Quiz question *****"
print "Range 1 of L1 is [", 2*ro[2], ", ", 2*ro[1], ")."
print "Range 2 of L1 is lambda <", 2*ro[2]
print features
print "Should print: \n[[ 0.6  0.6  0.6]\n  [ 0.8  0.8  0.8]]"  # should print
# [[ 0.6  0.6  0.6]
#  [ 0.8  0.8  0.8]]
print norms
print "Should print: \n[5.  10.  15.]"
# should print
# [5.  10.  15.]

print "==== Implementing Coordinate Descent with normalized features ===="

print "=== Effect of L1 penalty ==="

simple_features = ['sqft_living', 'bedrooms']
my_output = 'price'
(simple_feature_matrix, output) = get_numpy_data(sales, simple_features,
                                                 my_output)
simple_feature_matrix, norms = normalize_features(simple_feature_matrix)

weights = np.array([1., 4., 1.])

prediction = predict_output(simple_feature_matrix, weights)

ro = {}
for i in range(len(weights)):
    feature_i = simple_feature_matrix[:, i]
    tmp = feature_i * (output - prediction + weights[i] * feature_i)
    ro[i] = tmp.sum()
    print "ro[", i, "] is:", ro[i]
print "***** Quiz question *****"
print "Range 1 of L1 is [", 2 * ro[2], ", ", 2 * ro[1], ")."
print "Range 2 of L1 is lambda <", 2 * ro[2]
import graphlab as gl
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from regression import get_numpy_data
from regression import predict_output
from regression import feature_derivative_ridge
from regression import ridge_regression_gradient_descent
from regression import get_simple_residuals

mpl.use('TkAgg')

sales = gl.SFrame('kc_house_data.gl/')

(example_features, example_output) = get_numpy_data(sales, ['sqft_living'], 'price')
my_weights = np.array([1., 10.])
test_predictions = predict_output(example_features, my_weights)
errors = test_predictions - example_output # prediction errors

# next two lines should print the same values
print feature_derivative_ridge(errors, example_features[:,1], my_weights[1], 1, False)
print np.sum(errors*example_features[:,1])*2+20.
print ''
# -5.65541667824e+13
# -5.65541667824e+13

# next two lines should print the same values
print feature_derivative_ridge(errors, example_features[:,0], my_weights[0], 1, True)
print np.sum(errors)*2.
# -22446749336.0
# -22446749336.0
Exemple #13
0
from regression import multiple_predict_knn
from regression import get_simple_residuals
import matplotlib.pyplot as plt

sales = gl.SFrame('kc_house_data_small.gl/')
(train_and_validation,
 test) = sales.random_split(.8, seed=1)  # initial train/test split
(train, validation) = train_and_validation.random_split(
    .8, seed=1)  # split training set into training and validation sets

feature_list = [
    'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
    'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built',
    'yr_renovated', 'lat', 'long', 'sqft_living15', 'sqft_lot15'
]
features_train, output_train = get_numpy_data(train, feature_list, 'price')
features_test, output_test = get_numpy_data(test, feature_list, 'price')
features_valid, output_valid = get_numpy_data(validation, feature_list,
                                              'price')

features_train, norms = normalize_features(
    features_train)  # normalize training set features (columns)
features_test = features_test / norms  # normalize test set by training set norms
features_valid = features_valid / norms  # normalize validation set by training set norms

query = features_test[0]
print "1st row of teat features: ", query
comp = features_train[9]
print "10th row of training features: ", comp

distance = np.sqrt(np.sum((query - comp)**2))