def testLinreg(): train_x, train_y, dev_x, dev_y, test_x, test_y, x_max, y_max = util.loadLinRegData( ) LR = LinearRegression() LR.fit_stochastic(train_x, train_y, eta=.01, eps=10**-12, max_iters=10**8) preds = LR.predict(train_x) train_linreg_rmse = util.findRMSE(preds, train_y) * y_max[0] preds = LR.predict(test_x) test_linreg_rmse = util.findRMSE(preds, test_y) * y_max[0] print('Linreg RMSE: \t', test_linreg_rmse) return train_linreg_rmse, test_linreg_rmse
file = open(filePath,'r') allData = np.loadtxt(file, delimiter=',') X = np.matrix(allData[:,:-1]) y = np.matrix((allData[:,-1])).T n,d = X.shape # Standardize mean = X.mean(axis=0) std = X.std(axis=0) X = (X - mean) / std # Add a row of ones for the bias term X = np.c_[np.ones((n,1)), X] # initialize the model init_theta = np.matrix(np.random.randn((d+1))).T n_iter = 2000 alpha = 0.01 # Instantiate objects lr_model = LinearRegression(init_theta = init_theta, alpha = alpha, n_iter = n_iter) lr_model.fit(X,y) # Compute the closed form solution in one line of code thetaClosedForm = linalg.inv((X.T*X))*X.T*y # TODO: replace "0" with closed form solution print "thetaClosedForm: ", thetaClosedForm
Xtrain = X[train_indice:valid_indice_one - 1] ytrain = y[train_indice:valid_indice_one - 1] Xvalidate_one = X[valid_indice_one:valid_indice_two - 1] yvalidate_one = y[valid_indice_one:valid_indice_two - 1] Xvalidate_two = X[valid_indice_two:] yvalidate_two = y[valid_indice_two:] num_of_polynomials = 5 best_poly = 1 best_err = 100 for poly in range(1, num_of_polynomials): print("Polynomial degree: %.3f" % poly) model = LinearRegression(poly) model.fit(Xtrain, ytrain) y_hat_train = model.predict(Xtrain) y_hat_validate_one = model.predict(Xvalidate_one) y_hat_validate_two = model.predict(Xvalidate_two) tr_err = np.mean((y_hat_train - ytrain)**2) va_err_one = np.mean((y_hat_validate_one - yvalidate_one)**2) va_err_two = np.mean((y_hat_validate_two - yvalidate_two)**2) print("Train error: %.3f" % tr_err) print("Val1 error: %.3f" % va_err_one) print("Val2 error: %.3f \n" % va_err_two) sum_err = (va_err_one + va_err_two) / 2 if sum_err < best_err: best_err = sum_err
y = np.matrix((allData[:, -1])).T n, d = X.shape # Add a row of ones for the bias term X = np.c_[np.ones((n, 1)), X] # initialize the model init_theta = np.matrix( np.ones((d + 1, 1)) ) * 10 # note that we really should be initializing this to be near zero, but starting it near [10,10] works better to visualize gradient descent for this particular problem n_iter = 1500 alpha = 0.01 # Instantiate objects lr_model = LinearRegression(init_theta=init_theta, alpha=alpha, n_iter=n_iter) plotData1D(X[:, 1], y) lr_model.fit(X, y) plotRegLine1D(lr_model, X, y) # Visualize the objective function convex shape theta1_vals = np.linspace(-10, 10, 100) theta2_vals = np.linspace(-10, 10, 100) visualizeObjective(lr_model, theta1_vals, theta2_vals, X, y) # Compute the closed form solution in one line of code theta_closed_form = 0 # TODO: replace "0" with closed form solution print "theta_closed_form: ", theta_closed_form
eta_vals = [10**-4, 10**-3, 10**-2] max_iters = [10**8, 10**7, 10**6] results = [] for eps_val in eps_vals: for eta_val in eta_vals: for max_iter in max_iters: print('Fitting regression with eta', eta_val, 'eps', eps_val, 'max iter', max_iter) cur_result = [] cur_result.append(eps_val) cur_result.append(eta_val) cur_result.append(max_iter) LR = LinearRegression() LR.fit_stochastic(train_x, train_y, eta=eta_val, eps=eps_val, max_iters=max_iter) preds = LR.predict(train_x) rmse = util.findRMSE(preds, train_y) cur_result.append(rmse * y_max[0]) preds = LR.predict(dev_x) rmse = util.findRMSE(preds, dev_y) cur_result.append(rmse * y_max[0]) results.append(cur_result)
def setUp(self): self.linreg = LinearRegression()
class LinearRegressionTest(unittest.TestCase): def setUp(self): self.linreg = LinearRegression() def test_dot_product_of_two_vectors_should_get_dot_product_result(self): a = [1, 2, 3] b = [4, 5, 6] result = self.linreg.dot_product(a, b) expected = 32 self.assertEquals(result, expected) def test_dot_product_should_get_negative_if_vectors_have_different_length( self ): a = [1, 2, 3] b = [4, 5, 6, 7] result = self.linreg.dot_product(a, b) expected = -1 self.assertEquals(result, expected) def test_compute_cost_for_single_example_should_return_cost(self): X = [[1, 2]] y = [2] theta = [0.1, 0.2] result = self.linreg.compute_cost(X, y, theta) expected = 1.125 self.assertEquals(result, expected) def test_compute_cost_for_entire_examples_should_return_cost(self): X = [[1, 2], [3, 4]] y = [2, 2.5] theta = [0.1, 0.2] result = self.linreg.compute_cost(X, y, theta) expected = 2.105 self.assertEquals(result, expected) def test_run_gradient_descent_with_one_iteration_should_change_theta(self): X = [[1, 2], [3, 4]] y = [2, 2.5] theta = [0, 0] number_of_iterations = 1 result = self.linreg.run_gradient_descent(X, y, theta, number_of_iterations) expected = ( [2.6572999999999993], [0.07, 0.16] ) self.assertEquals(result, expected) def test_run_gradient_descent_with_ten_iteration_should_change_theta(self): X = [[1, 2], [3, 4]] y = [2, 2.5] theta = [0, 0] number_of_iterations = 10 result = self.linreg.run_gradient_descent(X, y, theta, number_of_iterations) expected = ( [ 2.6572999999999993, 1.4340467700000001, 0.8330420285729998, 0.5415905512402678, 0.4029957986666608, 0.339058852804416, 0.31098737943231103, 0.2997051253564717, 0.2959527141041505, 0.2953245352067722 ], [0.21850056185143937, 0.5469413946894416] ) self.assertEquals(result, expected)
for i in range(my): img[:,i] = img[:,i]/np.max(img[:,i]) xs = [] ys = [] for x in counts: for y in counts[x]: xs.append(x) ys.append(y) xs = np.array(xs) ys = np.array(ys) xs = xs.reshape((len(xs), 1)) ys = ys.reshape((len(ys), 1)) print ys.shape, xs.shape x = LinearRegression() y = x.fit(xs,ys)#,weight) fig = pplot.figure() ax = fig.add_subplot(111) ax.imshow(img, interpolation='nearest',cmap=Reds,vmin=0,vmax=1,origin='lower') ax.set_title("Distribution of degree per avalanche size") ax.set_xlabel("Avalanche size s") ax.set_ylabel("Degree of first defaulting node") pplot.show()
# and the third column is the price of the house, which we want to predict. file_name = 'dataset/ex1data2.txt' with open(file_name, 'r') as f: house_data = np.loadtxt(file_name, delimiter=',') num_sample = house_data.shape[0] # number of all the samples X = house_data[:, :2] y = house_data[:, 2].reshape((-1, 1)) # Add intercept term or bias to X print('X shape: ', X.shape) print('y shape: ', y.shape) print('First 10 examples from the dataset') print(house_data[0:10, :]) # Normalize X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) # Add bias dimension X = np.hstack((X, np.ones((num_sample, 1)))) lr_bgd = LinearRegression() tic = time.time() losses_bgd = lr_bgd.train(X, y, method='sgd', learning_rate=1e-2, num_iters=1000, verbose=True) toc = time.time() print('Traning time for BGD with vectorized version is %f \n' % (toc - tic)) print(lr_bgd.W)