def start(): # ****** Multivariate Linear Regression ****** # Prepare dataset data = pd.read_csv('datasets/house_prices.csv', header=0) x = data[['Size', 'Bedrooms']] y = data[['Price']] # Normalize x, since features differ by many orders of magnitude x_norm, mu, sigma = linear.normalize(x) # Add intercept (bias) term x_norm.insert(0, 'Intercept', 1) x_norm = x_norm.values y = y.values theta = np.zeros((x_norm.shape[1], 1)) print('Cost with theta [0; 0; 0]: {0}'.format(linear.cost( x_norm, theta, y))) alpha = 0.01 iterations = 500 # Run gradient descent to minimize the error new_theta, j_vals = linear.gradient_descent(x_norm, theta, y, alpha, iterations) linear.plot_cost(j_vals) # Our cost is millions of times lower! print('\nNew theta: [{0}; {1}; {2}]'.format(new_theta[0][0], new_theta[1][0], new_theta[2][0])) print('Final cost: ', linear.cost(x_norm, new_theta, y)) print('\nA house of 3000sqft and 3 bedrooms costs around ', predict_house_price(np.array([[3000, 3]]), mu, sigma, new_theta))
def part4(): ''' Display the θs that you obtain by training using the full training dataset, and by training using a training set that contains only two images of each actor. :return: void ''' # Get training data, validation data and testing data from part2 im_data_training, im_data_validation, im_data_testing = part2() # Using full training set of Baldwin and Carell # 500 iterations theta_full, iters_full = part3(1e-5, 1e-6, 5000) # 5000 iterations # theta_full, iters_full = part3(1e-5, 1e-6, 5000) # 50000 iterations # theta_full, iters_full = part3(1e-5, 1e-6, 50000) # Get required actors' image validation data x_valid, y_valid = dataExtraction.prepare_training_data_label_by_actor_order( im_data_validation, [3, 5], 10) # Add constant values for each image data in x_valid x_valid = np.concatenate( (x_valid, np.ones([x_valid.shape[0], 1])), axis=1) / 255 # Apply hypothesis function y_hypothesis = lr.hypothesis(theta_full, x_valid) # Compute accuracy accuracy = accuracy_compute.accuracy(y_valid, y_hypothesis) print(accuracy) # Using two images of Baldwin and Carell # Get training data, validation data and testing data from part2 im_data_training, im_data_validation, im_data_testing = part2() y_train = np.array([1, 1, 0, 0]) x_train = im_data_training[3][10] x_train = np.vstack((x_train, im_data_training[3][11])) x_train = np.vstack((x_train, im_data_training[5][20])) x_train = np.vstack((x_train, im_data_training[5][21])) # Add constant values for each image in x_train x_train = np.concatenate( (x_train, np.ones([x_train.shape[0], 1])), axis=1) / 255 # Theta initialization theta0 = np.ones(1025) * 0.01 theta_2, costs, iters_2 = lr.gradient_descent( lr.quadratic_cost_function, lr.derivative_quadratic_cost_function, x_train, y_train, theta0, 1e-5, 1e-6, 50) # Show image of theta new_theta_full = np.reshape(theta_full[:1024], (32, 32)) imshow(new_theta_full, cmap="RdBu", interpolation="spline16") show() new_theta_2 = np.reshape(theta_2[:1024], (32, 32)) imshow(new_theta_2, cmap="RdBu", interpolation="spline16") show()
def start(): # Changing plots' style style.use('ggplot') # Get and Visualize dataset data = pd.read_csv('datasets/food_truck.csv', delimiter=',', header=0) x = data[['Population']] y = data[['Profits']] plt.scatter(x['Population'], y['Profits'], color='red', marker='.') plt.title('Change in Profits in relation to Population') plt.xlabel('Population in 10,000s') plt.ylabel('Profits in $ 10,000s') plt.show() # Add intercept (bias) column to x and check initial cost with all params = 0 x.insert(0, 'Intercept', 1) x = x.values y = y.values theta = np.zeros((x.shape[1], 1)) print('Cost with theta [0; 0]: {0}'.format(linear.cost(x, theta, y))) print('Cost with theta [-1; 2]: {0}'.format( linear.cost(x, np.array([[-1], [2]]), y))) # Setting hyperparameters before running gradient descent alpha = 0.01 iterations = 1500 # Run gradient descent to minimize the error new_theta, j_vals = linear.gradient_descent(x, theta, y, alpha, iterations) print('\nNew theta: [{0}; {1}]\n'.format(new_theta[0][0], new_theta[1][0])) # Plotting cost history linear.plot_cost(j_vals) # Plotting Line of Best Fit print('Displaying line of best fit...') plt.scatter(x[:, 1], y[:, 0], color='red', marker='.') plt.plot(x[:, 1], np.dot(x, new_theta)) plt.xlabel('Population in 10,000s') plt.ylabel('Profits in $ 10,000s') plt.show() # Making predictions user_choice = input('Do you want to make a prediction? (y/n)') while user_choice == 'y': predict_profit(int(input('Enter population: ')), new_theta) user_choice = input('Do you want to make another prediction? (y/n)')
def part5(alpha, EPS, max_iters): ''' Plot the performance of the classifiers on the training and validation sets vs the size of the training set. :param alpha: alpha :param EPS: epsilon :param max_iters: maximum iterations :return: void ''' i = 0 accuracy_list = [] no_of_images = [10, 20, 30, 40, 50, 60, 70] while i < 7: # Get training data, validation data and testing data from part2 im_data_training, im_data_validation, im_data_testing = part2() # Get required actors' image training data # Male as 0, female as 1 x_train, y_train = dataExtraction.prepare_training_data_label_by_gender( im_data_training, [0, 1, 2, 3, 4, 5], [1, 1, 1, 0, 0, 0], no_of_images[i]) # Add constant values for each image data in x_train x_train = np.concatenate( (x_train, np.ones([x_train.shape[0], 1])), axis=1) / 255 # Theta initialization (1024 plus a constant theta) theta0 = np.ones(1025) * 0.01 # Train classifiers theta, costs, iters = lr.gradient_descent( lr.quadratic_cost_function, lr.derivative_quadratic_cost_function, x_train, y_train, theta0, alpha, EPS, max_iters) # Get required actors' image validation data # Male as 0, female as 1 x_valid, y_valid = dataExtraction.prepare_training_data_label_by_gender( im_data_validation, [0, 1, 2, 3, 4, 5], [1, 1, 1, 0, 0, 0], 10) # Add constant values for each image data in x_valid x_valid = np.concatenate( (x_valid, np.ones([x_valid.shape[0], 1])), axis=1) / 255 # Apply hypothesis function y_hypothesis = lr.hypothesis(theta, x_valid) # Compute accuracy accuracy = accuracy_compute.accuracy(y_valid, y_hypothesis) accuracy_list.append(accuracy) i = i + 1 figure(1) plot(no_of_images, accuracy_list) xlabel('number of training images for each actor') ylabel('classifier accuracy') show()
def part7(alpha, EPS, max_iters): ''' Plot the performance of the classifiers on the training and validation sets vs the size of the training set. :param alpha: alpha :param EPS: epsilon :param max_iters: maximum iterations :return: void ''' # Get training data, validation data and testing data from part2 im_data_training, im_data_validation, im_data_testing = part2() # Get required x_train and prepare labels for training data x_train, y_train = dataExtraction.prepare_training_data_label_by_actor_order_2( im_data_training, [0, 1, 2, 3, 4, 5], 70) # Add constant values for each image data in x_train x_train = np.concatenate( (x_train, np.ones([x_train.shape[0], 1])), axis=1) / 255 # Prepare for calculating gradient in two ways theta0 = np.ones((1025, 6)) * 0.01 # Train classifiers theta, costs, iters = lr.gradient_descent( lr.new_quadratic_cost_function, lr.new_derivative_quadratic_cost_function, x_train, y_train, theta0, alpha, EPS, max_iters) # Performance on training set y_hypothesis = lr.hypothesis(theta, x_train) accuracy = accuracy_compute.accuracy_2(y_train, y_hypothesis) print(accuracy) # Performance on validation set x_valid, y_valid = dataExtraction.prepare_training_data_label_by_actor_order_2( im_data_validation, [0, 1, 2, 3, 4, 5], 10) x_valid = np.concatenate( (x_valid, np.ones([x_valid.shape[0], 1])), axis=1) / 255 y_hypothesis = lr.hypothesis(theta, x_valid) accuracy = accuracy_compute.accuracy_2(y_valid, y_hypothesis) print(accuracy) return theta
def part3(alpha, EPS, max_iters): ''' Baldwin vs. Carell classification Build a classifier to distinguish pictures of Alec Baldwin from pictures of Steve Carell :return: theta ''' # Get training data, validation data and testing data from part2 im_data_training, im_data_validation, im_data_testing = part2() # Split out training data and label of Baldwin and Carell x_train, y_train = dataExtraction.prepare_training_data_label_by_actor_order( im_data_training, [3, 5], 70) # Add constant values for each image in x_train x_train = np.concatenate( (x_train, np.ones([x_train.shape[0], 1])), axis=1) / 255 # Theta initialization (1024 plus a constant theta) theta0 = np.ones(1025) * 0.01 # theta0 = np.ones(1025) * 0.5 # Train classifier theta, costs, iters = lr.gradient_descent( lr.quadratic_cost_function, lr.derivative_quadratic_cost_function, x_train, y_train, theta0, alpha, EPS, max_iters) return theta, iters
def main(): #load sample data #mengisi contoh data data = multivariasi.load_data_single() X_, y = data[:, 0], data[:, 1] X = np.ones([y.size, 2]) X[:, 1] = X_ #compute theta #menghitung theta m, dim = X.shape theta = np.zeros([dim, 1]) alpha, max_iter = 0.01, 300 theta = linear_regression.gradient_descent(theta, X, y, alpha, max_iter) print theta #plot sample data and predicted line #contoh plot data dan melakukan prediksi garis plt.subplot(2, 1, 1) plt.scatter(data[:, 0], data[:, 1], color='r', marker='x') xx = np.linspace(-10, 10) yy = theta[0] + theta[1] * xx plt.plot(xx, yy, 'k-') #plot contour #bentuk permukaan plot theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) #initialize J_vals to a matrix of 0's #mengenalkan J_vals pada sebuah matrix dari 0 J_vals = np.zeros(shape=(theta0_vals.size, theta1_vals.size)) #fill out J_vals #mengisi hasil keluaran J_vals for t1, element in enumerate(theta0_vals): for t2, element2 in enumerate(theta1_vals): thetaT = np.zeros(shape=(2, 1)) thetaT[0][0] = element thetaT[1][0] = element2 J_vals[t1, t2] = linear_regression.compute_cost(thetaT, X, y) #contour plot #bentuk permukaan plot J_vals = J_vals.T #plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 #plot J_vals pada 15 bentuk permukaan yang terisi secara logaritma diantara 0.01 dan 100 plt.subplot(2, 1, 2) plt.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 40)) plt.xlabel('theta_0') plt.ylabel('theta_1') plt.scatter(theta[0][0], theta[1][0]) #3D contour and scatter plot #bentuk permukaan 3D dan plot yang betebaran theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals) fig = plt.figure() ax = fig.gca(projection='3d') plt.hold(True) ax.plot_surface(theta0_vals, theta1_vals, J_vals, cmap=cm.coolwarm, rstride=3, cstride=3, antialiased=True) ax.view_init(elev=60, azim=50) ax.dist = 8 x_sct, y_sct = theta[0][0], theta[1][0] thetaT_sct = np.zeros(shape=(2, 1)) thetaT_sct[0][0] = theta[0][0] thetaT_sct[1][0] = theta[1][0] z_sct = linear_regression.compute_cost(thetaT_sct, X, y) ax.scatter(x_sct, y_sct, z_sct) plt.show()
x, y = mock_data(m) x, y = x_graph, y_graph x_ = lr.min_max_scaler(x) y_ = lr.min_max_scaler(y) x, x_ = x_, x y, y_ = y_, y thetas = [] costs = [] i = 0 while i < 10: print(lr.cost_function(lr.hyp_func(x, theta), y)) theta = lr.gradient_descent(x, y, theta, 0.001) plt.scatter(x, y) plt.scatter(x, lr.hyp_func(x, theta)) print(theta) thetas.append(theta) costs.append(lr.cost_function(lr.hyp_func(x, theta), y)) costss = np.array(costs) thetass = np.array(thetas) #plt.plot(costss) i += 1 y_h = lr.hyp_func(x, theta) plt.scatter(x, y) plt.scatter(x_, y_h)
costings = {} for p in polys: costings[p] = [] print '----------' print 'p = {}'.format(p) for l in lambdas: print '---' print "Lambda = {}".format(l) [X, y, _] = data_from_file('./data/train.data', p) theta = np.matrix(np.zeros([X.shape[1], 1])) [theta, cost_history] = lr.gradient_descent(X, y, theta, alpha, l, iterations) [X, y, _] = data_from_file('./data/cv.data', p) predictions = X * theta cv_cost = lr.cost(X, y, theta, l) if best_cost == None or cv_cost < best_cost: best_cost = cv_cost best_p = p best_l = l best_theta = theta costings[p].append(cv_cost)
# compute and display initial cost :: ans = 32.07 import linear_regression as lr J = lr.compute_cost(X1, y, theta) print("With theta = [0 ; 0] ... Cost computed = {:7.3f}".format(J)) # further testing of the cost function :: ans = 54.24 J = lr.compute_cost(X1, y, [[-1.0], [2.0]]) print("With theta = [-1 ; 2] ... Cost computed = {:7.3f}".format(J)) # Some gradient descent settings iterations = 1500 alpha = 0.01 # run gradient descent :: ans = [ [-3.6303], [1.1664] ] theta, J_history = lr.gradient_descent(X1, y, theta, alpha, iterations) print("Calculated theta = \n", theta) # predict values for population sizes of 35,000 and 70,000 p1 = np.dot([[1, 3.5]], theta) p2 = np.dot([[1, 7.0]], theta) print("For population of 35k, profit = {}".format(p1 * 10000)) print("For population of 70k, profit = {}".format(p2 * 10000)) # overlay the hypothesis on the data from matplotlib import pyplot #pyplot.scatter(X, y, c='b', s=7) #pyplot.xlabel('Price (x $10k)') #pyplot.ylabel('Population (x 10k)') #pyplot.plot(X, np.dot(X1, theta), 'r-') #pyplot.show()
import numpy as np import pandas as pd import matplotlib.pyplot as plt from linear_regression import gradient_descent from linear_regression import compute_cost path = 'D:\Study\Coding\Machine Learning WuEnda\homework\ex1\ex1data2.txt' data = pd.read_csv(path, header=None, names=['Size', 'Bedroom_nums', 'Price']) # print(data.head()) # 归一化处理 # data.mean()处理平均值,data.std() (short for 'standard')处理标准偏差(max-min) data = (data - data.mean()) / data.std() data.insert(0, 'Ones', 1) print(data.head()) # 初始化x,y cols = data.shape[1] # 返回一个元组(行, 列) x = data.iloc[:, :cols - 1] y = data.iloc[:, cols - 1:cols] # 转换成矩阵 x = np.mat(x.values) y = np.mat(y.values) theta = np.mat(np.array([0, 0, 0])) alpha = 0.01 iters = 1500 g2, cost = gradient_descent(x, y, theta, alpha, iters) print(g2)
cf_val_err = mse(y_val, cf_weights, X_val) print('Closed form no text features:') print(' Train err:', cf_train_err) print(' Val err: ', cf_val_err) print(' CF Time: ', stop_cf - start_cf) ### gradient descent comparison # hyperparam settings wo = np.ones((4, 1)) # initialize weights to 1 b = 1000000 n = 200 epsilon = 10**-7 start_gd = timeit.default_timer() gd_weights = lr.gradient_descent(X_train, y_train, wo, b, n, epsilon) stop_gd = timeit.default_timer() gd_train_err = mse(y_train, gd_weights, X_train) gd_val_err = mse(y_val, gd_weights, X_val) print('\nGradient descent no text features:') print(' Train err:', gd_train_err) print(' Val err:', gd_val_err) print(' GD Time: ', stop_gd - start_gd) ### part 2: top 60 words X_train = load.make_matrix_60(train)[0] X_val = load.make_matrix_60(val)[0] X_test = load.make_matrix_60(test)[0]
def main(): # load sample data data = multivariate_normal.load_data_single() X_, y = data[:, 0], data[:, 1] X = np.ones([y.size, 2]) X[:, 1] = X_ # compute theta m, dim = X.shape theta = np.zeros([dim, 1]) alpha, max_iter = 0.01, 300 theta = linear_regression.gradient_descent(theta, X, y, alpha, max_iter) print theta # plot sample data and predicted line plt.subplot(2, 1, 1) plt.scatter(data[:, 0], data[:, 1], color='r', marker='x') xx = np.linspace(-10, 10) yy = theta[0] + theta[1] * xx plt.plot(xx, yy, 'k-') # plot contour theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) # initialize J_vals to a matrix of 0's J_vals = np.zeros(shape=(theta0_vals.size, theta1_vals.size)) # Fill out J_vals for t1, element in enumerate(theta0_vals): for t2, element2 in enumerate(theta1_vals): thetaT = np.zeros(shape=(2, 1)) thetaT[0][0] = element thetaT[1][0] = element2 J_vals[t1, t2] = linear_regression.compute_cost(thetaT, X, y) # Contour plot J_vals = J_vals.T # Plot J_vals as 15 contours spaced logarithmically between 0.01 and 100 plt.subplot(2, 1, 2) plt.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 40)) plt.xlabel('theta_0') plt.ylabel('theta_1') plt.scatter(theta[0][0], theta[1][0]) # 3D contour and scatter plot theta0_vals, theta1_vals = np.meshgrid(theta0_vals, theta1_vals) fig = plt.figure() ax = fig.gca(projection='3d') plt.hold(True) ax.plot_surface(theta0_vals, theta1_vals, J_vals, cmap=cm.coolwarm, rstride=3, cstride=3, antialiased=True) ax.view_init(elev=60, azim=50) ax.dist = 8 x_sct, y_sct = theta[0][0], theta[1][0] thetaT_sct = np.zeros(shape=(2, 1)) thetaT_sct[0][0] = theta[0][0] thetaT_sct[1][0] = theta[1][0] z_sct = linear_regression.compute_cost(thetaT_sct, X, y) ax.scatter(x_sct, y_sct, z_sct) plt.show()
import extract_data as extract data_filepath = input('Enter file path for data set > ') param_filepath = input('Enter file path with prediction parameters > ') m, y, X = extract.datafile_values(data_filepath) parameters = extract.paramfile_values(param_filepath) theta = np.zeros((m, 1)) X_norm, param_norm = lr.normalize(X, parameters, m) # Normal equation print('\nAttemping normal equation...') if m < 100000: theta = lr.normal_equation(X, y) else: print('Number of parameters too large') print('Prediction via normal equation: ') print(str(float(np.dot(theta.T, parameters.T))) + '\n') #Linear Regression print('Attemping linear regression...') print('Suggested alpha: 0.01; Suggested iterations: 400') alpha = float(input('Enter alpha value > ')) num_iter = int(input('Enter number of iterations > ')) theta, J_history = lr.gradient_descent(X_norm, y, m, alpha, num_iter) print('Prediction via linear regression: ') print(str(float(np.dot(theta.T, param_norm.T))) + '\n') lr.plot_descent(range(num_iter), J_history)