def compute_cost_function_part_6a(result_weights): """0-2 range x1 and x2, indices are (4, 500) for x1 and (5, 500) for x2""" weights = result_weights x_axis_array = np.arange(-2.0, 2.0, 0.2) x_axis_lst = x_axis_array.tolist() y_axis_lst = x_axis_lst x_train, y_train = get_data.split_digits_training_set(M) x_valid, y_valid = get_data.split_digits_training_set(M, valid=True) b = np.ones((10, 1)) X, Y = np.meshgrid(x_axis_lst, y_axis_lst) Z_train = array(X) Z_valid = array(X) w1_orig = weights[500][4] w2_orig = weights[500][5] for i in xrange(len(x_axis_lst)): for j in xrange(len(y_axis_lst)): weights[500][4] = x_axis_lst[i] + w1_orig weights[500][5] = y_axis_lst[j] + w2_orig cost_train = cost_f(weights, x_train, b, y_train.T) cost_valid = cost_f(weights, x_valid, b, y_valid.T) Z_train[i, j] = cost_train Z_valid[i, j] = cost_valid fig = plt.figure() plt.contour(X, Y, Z_train) plt.xlabel("w1 values") plt.ylabel("w2 values") plt.title("Contour plot of x1 and x2") plt.legend(loc="best") plt.savefig("part_6a_contour_plot")
def plot_learning_curve(weights, part_number): x_train, y_train = get_data.split_digits_training_set(M) x_valid, y_valid = get_data.split_digits_training_set(M, valid=True) b = np.ones((10, 1)) x_vals = [] y_vals_train = [] y_vals_valid = [] num_iters = weights.keys() num_iters.sort() for w in num_iters: cost_train = cost_f(weights[w], x_train, b, y_train.T) / x_train.shape[1] cost_val = cost_f(weights[w], x_valid, b, y_valid.T) / x_valid.shape[1] # w is the x axis, cost is the y axis x_vals.append(w) y_vals_train.append(cost_train) y_vals_valid.append(cost_val) fig = plt.figure() plt.plot(x_vals, y_vals_train, 'r-', label="Training Set") plt.plot(x_vals, y_vals_valid, label="Validation Set") plt.xlabel("Number of Iterations") plt.ylabel("Cost") plt.title("Average Cost vs. Number of Iterations") plt.legend(loc="best") plt.savefig("part_" + str(part_number) + "_validation_training_plot")
def plot_trajectory(gd_data, mo_data, filename="part6_contour_plot_with_trajectory", plt_gd=True, plt_mo=True): weights = gd_data[0] indices = gd_data[1].keys() indices.sort() gd_traj = [(gd_data[1][i][500][4], gd_data[1][i][500][5]) for i in indices] mo_traj = [(mo_data[1][i][500][4], mo_data[1][i][500][5]) for i in indices] x_axis_array = np.arange(-2.0, 2.0, 0.2) x_axis_lst = x_axis_array.tolist() y_axis_lst = x_axis_lst x_train, y_train = get_data.split_digits_training_set(M) x_valid, y_valid = get_data.split_digits_training_set(M, valid=True) b = np.ones((10, 1)) X, Y = np.meshgrid(x_axis_lst, y_axis_lst) Z_train = array(X) Z_valid = array(X) w1_orig = weights[500][4] w2_orig = weights[500][5] for i in xrange(len(x_axis_lst)): for j in xrange(len(y_axis_lst)): weights[500][4] = x_axis_lst[i] + w1_orig weights[500][5] = y_axis_lst[j] + w2_orig cost_train = cost_f(weights, x_train, b, y_train.T) / x_train.shape[1] cost_valid = cost_f(weights, x_valid, b, y_valid.T) / x_train.shape[1] Z_train[i, j] = cost_train Z_valid[i, j] = cost_valid # z_axis_lst_train.append(cost_train) # z_axis_lst_valid.append(cost_valid) fig = plt.figure() CS = plt.contour(X, Y, Z_train) plt.xlabel("w1 values") plt.ylabel("w2 values") plt.title("Contour plot of x1 and x2") plt.clabel(CS, inline=1, fontsize=10) if plt_gd: plt.plot([a for a, b in gd_traj], [b for a, b in gd_traj], 'yo-', label="No Momentum") if plt_mo: plt.plot([a for a, b in mo_traj], [b for a, b in mo_traj], 'go-', label="Momentum") plt.legend(loc="best") plt.savefig(filename)
def get_performance(trained_weights): x, y = get_data.split_digits_training_set(M, valid=True) b = np.ones((10, 1)) output_array = compute_output(trained_weights, x, b) print "Shape of the output array", output_array.shape softmax_array = softmax(output_array) output_index_list = np.argmax(output_array, 0) y_index_list = np.argmax(y, 1) correct = 0 print y.shape, softmax_array.shape for i in range(x.shape[1]): if output_index_list[i] == y_index_list[i]: correct += 1 print float(correct) / x.shape[1]
def train_neural_net_momentum_two_weights(initial_weights, w1, w2, initial_weights_coefficient=1e-3, alpha=1e-6, max_iter=100): """Returns the trained weights of the neuro network""" # Build up the x_matrix -> 784 x m and the y ground truths # Get 10 images x_matrix, y_ground_truths = get_data.split_digits_training_set(M) b = np.ones((10, 1)) result = grad_descent_momentum_two_weights(cost_f, df, x_matrix, y_ground_truths.T, initial_weights, alpha, b, w1, w2, max_iter) return result
def train_neural_net_momentum(initial_weights_coefficient=1e-3, alpha=1e-6, max_iter=10000): """Returns the trained weights of the neuro network""" # Build up the x_matrix -> 784 x m and the y ground truths # Get 10 images x_matrix, y_ground_truths = get_data.split_digits_training_set(M) b = np.ones((10, 1)) # Build up the weights matrix initial_weights = [] for i in range(x_matrix.shape[0]): initial_weights_row = initial_weights_coefficient * np.ones(10) initial_weights.append(initial_weights_row) initial_weights = np.vstack(initial_weights) result = grad_descent_momentum(cost_f, df, x_matrix, y_ground_truths.T, initial_weights, alpha, b, max_iter) return result