def compare_M(X, y, k=1): # if k is less than 1, throw an error assert k > 0 # Create Xremain, which we will begin to slice columns out of Xremain = X # Create list M, which we will store each model Mi M = [] # Create list orig_indices, so we can keep track of how each model was # built orig_indices = [] for i in range(k): # get the index of the best column, and get it's cost ind, cost = compare(Xremain, y) # extract this best column Xnext = Xremain[:, ind].reshape(-1, 1) # add the extracted column to the new best matrix try: Xbest = np.c_[Xbest, Xnext] except: Xbest = Xnext # Extend the best matrix, this is Mi Xe = amf.extended_matrix(Xbest) # Get betas using normal equation of Mi betas = lirf.normal_equation(Xe, y) # Get Cost of Mi normal_eq_cost = lirf.cost_function(Xe, betas, y) # Get the original index of the extracted column orig_index = np.where(Xnext == X)[1][0] # Add the index to the originals list orig_indices.append(orig_index) # Append the Mi model to the Models list. Attach some meta data for # later use M.append({"model": Xbest}) # Remove the extracted column from the Xremain matrix Xremain = np.c_[Xremain[:, 0:ind], Xremain[:, ind + 1:X.shape[1]]] return (orig_indices, M)
def cost(X, y, j): # 1 - Extract the J column Xreduced = X[:, j].reshape(-1, 1) #2 - Extend Xn Xe = amf.extended_matrix(Xreduced) # 3 - Get betas using normal equation betas = lirf.normal_equation(Xe, y) # 4 - Get Cost normal_eq_cost = lirf.cost_function(Xe, betas, y) return normal_eq_cost
def exerciseA_1_gradient(): print("\nExercise A.1 Gradient") # Load Data X, y = load_data() ### Gradient # Step 1 - Normalize and Extend X Xe_n = amf.extended_matrix(amf.feature_normalization(X)) # Step 2 - Calculate betas using gradient descent betas = lirf.gradient_descent(Xe_n, y, alpha=.001, n=1000) # Step 3 - Calculate cost function for each beta J_gradient = [] for i, j in enumerate(betas): J_grad = lirf.cost_function(Xe_n, betas[i], y) J_gradient.append(J_grad) # Step 4 - Plot the cost over iterations fig, ax1 = plt.subplots() fig.suptitle('Ex A.1 Gradient Descent, alpha = .001', fontsize=14) ax1.set(xlabel="Number of iterations = " + str(len(betas)), ylabel="Cost J, min = " + str(round(J_gradient[-1], 3))) ax1.plot(np.arange(0, len(betas)), J_gradient) plt.xlim(0, len(betas)) plt.show() # Step 5 - Predict arbitrary height # 5a) Place in matrix heights_to_predict = np.array([[65, 70]]) # 5b) Place in matrix y_parents_grad = lirf.predict_gradient(X, heights_to_predict, betas) print("==> The predicted height for a girl with parents (65.70) is:\n", round(y_parents_grad[0], 2))
def exercise1_1(): print("\nExercise 1 - Normal Equation") # Step 1 - Load Data Csv_data = np.loadtxt("./A2_datasets_2020/GPUBenchmark.csv", delimiter=',') # load csv X = Csv_data[:, :-1] y = Csv_data[:, 6] # Step 2 - Normalize Data Xn = amf.feature_normalization(X) # Step 3 - Plot data fig, ax = plt.subplots(2, 3) fig.suptitle('Ex 1.1, Multivariate Data Sets', fontsize=14) fig.tight_layout(pad=1.0, rect=[0, 0.03, 1, 0.95]) titles = [ "CudaCores", "BaseClock", "BoostClock", "MemorySpeed", "MemoryConfig", "MemoryBandwidth", "BenchmarkSpeed" ] # iterate over columns of Xn by using the Transpose of Xn i, j = 0, 0 for ind, xi in enumerate(Xn.T): ax[i][j].scatter(xi, y) ax[i][j].set_title(titles[ind]) #ax[i][j].set_xlim([xi.min()-1.5, xi.max()+1.5]) j += 1 if j == 3: i, j = 1, 0 plt.show() # Step 4 - Get extended matrix Xe = amf.extended_matrix(X) # Step 5 - Get betas using normal equation betas = lirf.normal_equation(Xe, y) # Step 6 - Create prediction matrix pred = np.array([[2432, 1607, 1683, 8, 8, 256]]) # Step 7 - Make prediction y_pred = lirf.predict(amf.extended_matrix(pred), betas)[0] print( "Predicted benchmark:", y_pred, " \tActual benchmark: 114", ) # Step 9 - What is the cost J(β) when using the β computed by # the normal equation above? normal_eq_cost = lirf.cost_function(Xe, betas, y) print("Cost:", normal_eq_cost) print("\nExercise 1 - Gradient Descent") # Gradient - Step 1 - Normalize and Extend X Xe_n = amf.extended_matrix(amf.feature_normalization(X)) # Step 2 - Calculate betas using gradient descent alpha, n = .01, 1000 betas = lirf.gradient_descent(Xe_n, y, alpha, n) # Step 3 - Calculate cost function for each beta J_gradient = [] for i, j in enumerate(betas): J_grad = lirf.cost_function(Xe_n, betas[i], y) J_gradient.append(J_grad) grad_cost = J_gradient[-1] print("alpha =", str(alpha), " n =", str(n)) print("Cost:", str(grad_cost)) print( "Gradient cost within", str(round(100 * abs(grad_cost - normal_eq_cost) / normal_eq_cost, 5)) + "% of normal cost -> This is less than 1%!") # Step XXX - Predict benchmark y_parents_grad = lirf.predict_gradient( X, np.array([[2432, 1607, 1683, 8, 8, 256]]), betas) print("Predicted benchmark:", y_parents_grad[0])
def exerciseA_1(): print("\nExercise A.1") # Load Data global X, y X, y = load_data() # A1.1 - Plot Data fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) fig.suptitle('Ex A.1, Girl Height in inches', fontsize=14) ax1.set(xlabel="Mom Height", ylabel="Girl Height") ax2.set(xlabel="Dad Height") ax1.scatter(X[:, 0], y, c='#e82d8f', marker='1') ax2.scatter(X[:, 1], y, c='#40925a', marker='2') plt.show() # A1.2 - Compute Extended Matrix Xe_parents = amf.extended_matrix(X) print("Extended Matrix of Parent's Heights\n", Xe_parents, "\n") # A1.3 - Compute Normal Equation and Make a Prediction Beta_normal_parents = lirf.normal_equation(Xe_parents, y) y_parents_normal_eq = lirf.predict( amf.extended_matrix(np.array([[65, 70]])), Beta_normal_parents) print("==> Prediction of girl height with parental heights of 65,70\n", y_parents_normal_eq[0], "\n") # A1.4 - Apply Feature Normalization, plot dataset, # heights should be centered around 0 with a standard deviation of 1. X_feature_normalized_heights = amf.feature_normalization(X) fig, (ax1, ax2) = plt.subplots(1, 2, sharey=True, sharex=True) fig.suptitle('Ex A.1, Girl Height in inches', fontsize=14) ax1.set(xlabel="Mom Height Normalized", ylabel="Girl Height") ax2.set(xlabel="Dad Height Normalized") ax1.scatter(X_feature_normalized_heights[:, 0], y, c='#e82d8f', marker='1') ax2.scatter(X_feature_normalized_heights[:, 1], y, c='#40925a', marker='2') plt.show() # A1.5 - Compute the extended matrix Xe and apply the Normal equation # on the normalized version of (65.70). The prediction should # still be 65.42 inches. Xe_feature_normalized_heights = amf.extended_matrix( X_feature_normalized_heights) Beta_normal_parents_normalized = lirf.normal_equation( Xe_feature_normalized_heights, y) heights_to_predict = np.array([[65, 70]]) Heights_plus_pred = np.append(X, heights_to_predict, 0) Normalized_heights_plus_pred = amf.feature_normalization(Heights_plus_pred) Normalized_heights_to_pred = np.array([Normalized_heights_plus_pred[-1]]) y_parents_pred = lirf.predict( amf.extended_matrix(Normalized_heights_to_pred), Beta_normal_parents_normalized) print( "==> Prediction of girl height with normalized parental heights of 65,70\n", y_parents_pred[0], "\n") # A1.6 - Implement the cost function J(β) = n1 (Xeβ − y)T (Xeβ − y) as a # function of parameters Xe,y,β. The cost for β from the Normal # equation should be 4.068. cost_function_normalized = lirf.cost_function( Xe_feature_normalized_heights, Beta_normal_parents_normalized, y) print("==> Cost Function (normalized)\n", cost_function_normalized, "\n") cost_function = lirf.cost_function(Xe_parents, Beta_normal_parents, y) print("==> Cost Function not-normalized\n", cost_function, "\n")