Ejemplo n.º 1
0
def start():
    # ****** Multivariate Linear Regression ******
    # Prepare dataset
    data = pd.read_csv('datasets/house_prices.csv', header=0)
    x = data[['Size', 'Bedrooms']]
    y = data[['Price']]

    # Normalize x, since features differ by many orders of magnitude
    x_norm, mu, sigma = linear.normalize(x)

    # Add intercept (bias) term
    x_norm.insert(0, 'Intercept', 1)
    x_norm = x_norm.values
    y = y.values
    theta = np.zeros((x_norm.shape[1], 1))

    print('Cost with theta [0; 0; 0]: {0}'.format(linear.cost(
        x_norm, theta, y)))

    alpha = 0.01
    iterations = 500

    # Run gradient descent to minimize the error
    new_theta, j_vals = linear.gradient_descent(x_norm, theta, y, alpha,
                                                iterations)
    linear.plot_cost(j_vals)

    # Our cost is millions of times lower!
    print('\nNew theta: [{0}; {1}; {2}]'.format(new_theta[0][0],
                                                new_theta[1][0],
                                                new_theta[2][0]))
    print('Final cost: ', linear.cost(x_norm, new_theta, y))
    print('\nA house of 3000sqft and 3 bedrooms costs around ',
          predict_house_price(np.array([[3000, 3]]), mu, sigma, new_theta))
 def linear_analysis(self):
     #利用线性回归函数进行相关计算
     
     l_candidate = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100, 300, 1000, 3000]
     training_cost, cv_cost = [], []
     for l in l_candidate:
         res = lr.linear_regression_np(self.train_X, self.train_Y, l)
         tc = lr.cost(res.x, self.train_X, self.train_Y)
         cv = lr.cost(res.x, self.cv_X, self.cv_Y)
         training_cost.append(tc)
         cv_cost.append(cv)
     sel_index = np.argmin(cv_cost) #其实可以直接将sel_index=0,因为目前
     #sel_index = 0 #其实可以直接将sel_index=0,因为目前
     set_l = l_candidate[sel_index]
     #绘图看效果
     plt.figure(figsize = (12.7, 7.8))    
     plt.plot(l_candidate, training_cost, label='training')
     plt.plot(l_candidate, cv_cost, label='cross validation')
     plt.xscale('log')
     plt.ylim([0,0.1])
     plt.legend(loc=2)
     #plt.yscale('log')
     plt.xlabel('lambda')
     plt.ylabel('cost')
     plt.suptitle('lambda: %f, train: %f, cv: %f'%(set_l, training_cost[sel_index], cv_cost[sel_index]), fontsize = 12, fontweight = 'bold')
     title_cost_l = 'lambda'
     plt.savefig(os.path.join(self.data_directory, title_cost_l))
     plt.close('all')
     
     logging.info('lambda figure saved successfully !!!')
     
     #m个样本进行训练查看,判断bias和variance
     training_cost, cv_cost = [], []
     m = self.train_X.shape[0]
     for i in range(1, m+1): #查看随着训练样本m的增大,cost的变化情况
         res = lr.linear_regression_np(self.train_X[:i, :], self.train_Y[:i], l=set_l)
         tc = lr.regularized_cost(res.x, self.train_X[:i, :], self.train_Y[:i], l=set_l)
         cv = lr.regularized_cost(res.x, self.cv_X, self.cv_Y, l=set_l)
         training_cost.append(tc)
         cv_cost.append(cv)
     #绘图看效果
     plt.figure(figsize = (12.7, 7.8))    
     plt.plot(np.arange(1, m+1), training_cost, label='training cost')
     plt.plot(np.arange(1, m+1), cv_cost, label='cv cost')
     plt.legend(loc=2)
     plt.yscale('log')
     plt.ylim([1e-8,1e2])
     plt.xlabel('sample_num')
     plt.ylabel('cost')
     plt.suptitle('cost vs sample_num', fontsize = 12, fontweight = 'bold')
     title_cost_l = 'sample_num'
     plt.savefig(os.path.join(self.data_directory, title_cost_l))
     plt.close('all')
     
     logging.info('sample_num figure saved successfully !!!')
     
     self.theta = lr.linear_regression_np(self.train_X, self.train_Y, set_l).x
Ejemplo n.º 3
0
def start():
    # Changing plots' style
    style.use('ggplot')

    # Get and Visualize dataset
    data = pd.read_csv('datasets/food_truck.csv', delimiter=',', header=0)
    x = data[['Population']]
    y = data[['Profits']]

    plt.scatter(x['Population'], y['Profits'], color='red', marker='.')
    plt.title('Change in Profits in relation to Population')
    plt.xlabel('Population in 10,000s')
    plt.ylabel('Profits in $ 10,000s')
    plt.show()

    # Add intercept (bias) column to x and check initial cost with all params = 0
    x.insert(0, 'Intercept', 1)
    x = x.values
    y = y.values
    theta = np.zeros((x.shape[1], 1))

    print('Cost with theta [0; 0]: {0}'.format(linear.cost(x, theta, y)))
    print('Cost with theta [-1; 2]: {0}'.format(
        linear.cost(x, np.array([[-1], [2]]), y)))

    # Setting hyperparameters before running gradient descent
    alpha = 0.01
    iterations = 1500

    # Run gradient descent to minimize the error
    new_theta, j_vals = linear.gradient_descent(x, theta, y, alpha, iterations)
    print('\nNew theta: [{0}; {1}]\n'.format(new_theta[0][0], new_theta[1][0]))

    # Plotting cost history
    linear.plot_cost(j_vals)

    # Plotting Line of Best Fit
    print('Displaying line of best fit...')
    plt.scatter(x[:, 1], y[:, 0], color='red', marker='.')
    plt.plot(x[:, 1], np.dot(x, new_theta))
    plt.xlabel('Population in 10,000s')
    plt.ylabel('Profits in $ 10,000s')
    plt.show()

    # Making predictions
    user_choice = input('Do you want to make a prediction? (y/n)')
    while user_choice == 'y':
        predict_profit(int(input('Enter population: ')), new_theta)
        user_choice = input('Do you want to make another prediction? (y/n)')
    def test_cost_0_errror(self):
        X = np.matrix([[1, 2, 3], [1, 2, 3]])
        y = np.matrix([[5], [5]])
        theta = np.matrix([[0], [1], [1]])
        reg = 0

        self.assertEqual(gd.cost(X, y, theta, reg), 0)
    def test_cost_some(self):
        X = np.matrix([[1, 2, 3], [1, 2, 3]])
        y = np.matrix([[3], [5]])
        theta = np.matrix([[0], [1], [1]])
        reg = 0

        # sum of error^2 = 4
        # m = 2
        # 0.25 * 4 = 1

        self.assertEqual(gd.cost(X, y, theta, reg), 1)
    def test_cost_vs_octave(self):
        """
    compare cost results to those given by octave to 10 decimal places
    try combos of long decimal places to seek out rounding differences with numpy
    try different amounts of regularisation to ensure that works
    """
        Data = np.loadtxt('./test/fixtures/data/sample.data')

        y = np.matrix(Data[:, 0]).T

        X = np.matrix(Data[:, 1]).T
        X = gd.add_y_intercept(X)

        theta = np.matrix('1; 1')

        self.assertEqual(round(gd.cost(X, y, theta, 0), 10), 303.9515255536)
        self.assertEqual(round(gd.cost(X, y, theta, .5), 10), 303.9723588869)
        self.assertEqual(round(gd.cost(X, y, theta, 1), 10), 303.9931922203)
        self.assertEqual(round(gd.cost(X, y, theta, 1.5), 10), 304.0140255536)

        theta = np.matrix('0.123456789;1.987654321')
        self.assertEqual(round(gd.cost(X, y, theta, 1.5), 10), 1327.2183625598)
Ejemplo n.º 7
0
Predict mpg for test set generated from auto_mpg.
Params are learned by ./train_mpg.py this should be run first
"""

# Load in learned params from file
with np.load('params.npz') as data:
    l = data['l']
    p = data['p']
    theta = data['theta']

# Load in test set
[X_test, y_test, Data] = data_from_file('./data/test.data', p)

# Make predictions
predictions = X_test * theta
test_cost = lr.cost(X_test, y_test, theta, l)

abs_diff = np.abs(y_test - predictions).round(2)

# Load in car names to be mapped to predictions
fileName = open("../cars.names")
names = [i.rstrip() for i in fileName.readlines()]

# Show off results!!
print 'Car, Predicted, Actual, difference'

rounded = predictions.round(2)

for i in range(0, predictions.shape[0]):
    print "{}, {}, {}, {}".format(names[i], rounded[i, 0], y_test[i, 0],
                                  abs_diff[i, 0])
Ejemplo n.º 8
0
print('No. of iterations = ', iterations)
print('Theta = ', theta)

input('Press Enter to draw Hypothesis:')

#Plotting hypothesis
x_test = np.linspace(np.amin(X) - 1, np.amax(X) + 1, num=500)
y_test = rg.pred(x_test.reshape((-1, 1)), theta)
dp.plot_hypothesis(x_test, y_test)

input('Press Enter to draw 3D Mesh:')
dp.plot_close()

# Creating J(theta) values for mesh
theta0 = np.arange(-1, 3, 0.05)
theta1 = np.arange(-1, 3, 0.05)
Theta0, Theta1 = np.meshgrid(theta0, theta1)

Xp = np.ones((m, n + 1))
Xp[:, 1:] = X

J = np.zeros(Theta0.shape)
for i in range(len(theta1)):
    for j in range(len(theta0)):
        J[i][j] = rg.cost(Xp, y, np.array([Theta0[i][j], Theta1[i][j]]))

# Draw Animation for gradient descent
dp.animate_contour_and_mesh(Theta0, Theta1, J, theta_history, cost_history)

input('Press Enter to close:')
dp.plot_close()
Ejemplo n.º 9
0
    print 'p = {}'.format(p)

    for l in lambdas:
        print '---'
        print "Lambda = {}".format(l)
        [X, y, _] = data_from_file('./data/train.data', p)

        theta = np.matrix(np.zeros([X.shape[1], 1]))
        [theta, cost_history] = lr.gradient_descent(X, y, theta, alpha, l,
                                                    iterations)

        [X, y, _] = data_from_file('./data/cv.data', p)

        predictions = X * theta

        cv_cost = lr.cost(X, y, theta, l)

        if best_cost == None or cv_cost < best_cost:
            best_cost = cv_cost
            best_p = p
            best_l = l
            best_theta = theta

        costings[p].append(cv_cost)

        print "CV cost = {}".format(cv_cost)

plt.poly_lambda_error(lambdas, costings)

p = best_p
l = best_l