def model(X_train, Y_train, layers_dims, learning_rate, num_iter, lambd, print_cost): with tf.device('/device:GPU:0'): tf.reset_default_graph( ) # to be able to rerun the model without overwriting tf variables ( n_x, m ) = X_train.shape # Number of features and number of training examples n_y = Y_train.shape[0] # Number of classes n_hidden_layers = len(layers_dims) # Number of hidden layers costs = [] # Keep track of the cost ### Create Placheholders ### X, Y = create_placeholders(n_x, n_y) ### Initialize Parameters ### parameters = init_params(layers_dims) ### Foward propagation - Build the forward propagation in the tensorflow graph ### ZL = forward_propagation(X, parameters) ### Cost - Add cost function to tensorflow graph ### cost_function = compute_cost(ZL, Y, parameters, n_hidden_layers, lambd, m) ### Backpropagation - Define the tensorflow optimizer ### optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost_function) #optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost_function) ### Initializer all the variables ### init = tf.global_variables_initializer() ### Start the session to compute the tensorflow graph ### with tf.Session() as sess: # Run the initialization sess.run(init) # Do Training lopp # for i in range(num_iter): # Run the session to execute the optimizer and the cost _, cost_value = sess.run([optimizer, cost_function], feed_dict={ X: X_train, Y: Y_train }) # Print the cost every 1000 iterations #if print_cost == True and i % 1000 == 0: # print ("Cost after iteration %i: %f" % (i, cost_value)) if print_cost == True and i % 1000 == 0: costs.append(cost_value) # Save the parameters in a variable parameters = sess.run(parameters) return parameters, costs
def model(X, Y, layers_dims, learning_rate=0.01, initialization='random', init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000, seed=None): L = len(layers_dims) - 1 # number of layers # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Gradient Descent for i in range(num_of_iterations): # Forward propagation AL, caches = forward_propagation(X, parameters, L) # Compute cost cost = compute_cost(AL, Y) # Backward propagation grads = backward_propagation(AL, Y, caches) # Updating parameters parameters = update_parameters(parameters, grads, learning_rate, L) # Priniting cost after given iterations if print_cost and i % print_cost_after == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def nn_model(X, Y, n_h, num_iterations=1500, print_cost=False): np.random.seed(3) n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] for i in range(0, num_iterations): A2, cache = forward_propagation(X, parameters) cost = compute_cost(A2, Y, parameters) grads = backward_propagation(parameters, cache, X, Y) parameters = update_parameters(parameters, grads) if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) plt.scatter(i + 1, cost) plt.title('cost curve') plt.xlabel('iteration times') plt.ylabel('cost') plt.savefig('cost curve.jpg') return parameters
def valuate(mnist): input_data_x = tf.placeholder(tf.float32, [None, forward_propagation.INPUT_NODE], name="input_data_x") input_data_y = tf.placeholder(tf.float32, [None, forward_propagation.OUTPUT_NODE], name="input_data_y") y = forward_propagation.forward_propagation(input_data_x, False, None) correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(input_data_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) variable_averages = tf.train.ExponentialMovingAverage( mnist_train.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) validate_feed = { input_data_x: mnist.validation.images, input_data_y: mnist.validation.labels } print(validate_feed) print("*") for variables in tf.all_variables(): print(variables) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) #global_step = ckpt.model_checkpoint_path.split("/")[-1].split("-")[-1] accuracy_prediction = sess.run(accuracy, feed_dict=validate_feed) print("accuracy on validation data is %g" % (accuracy_prediction)) else: print("No checkpoint file found") return
def model_using_sgd(X, Y, layers_dims, learning_rate=0.01, initialization='random', _lambda=0, keep_prob=1, init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000, seed=None): L = len(layers_dims) - 1 # number of layers m = X.shape[1] # number of training examples # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Gradient Descent for i in range(num_of_iterations): for j in range(m): # Forward propagation if keep_prob == 1: AL, caches = forward_propagation(X[:, j], parameters, L) elif keep_prob < 1: AL, caches = forward_propagation_with_dropout( X[:, j], parameters, L, keep_prob) # Compute cost if _lambda == 0: cost = compute_cost(AL, Y[:, j]) else: cost = compute_cost_with_regularization( AL, Y[:, j], parameters, _lambda, L) # Backward propagation if _lambda == 0 and keep_prob == 1: grads = backward_propagation(AL, Y[:, j], caches) elif _lambda != 0: grads = backward_propagation_with_regularization( AL, Y[:, j], caches, _lambda) elif keep_prob < 1: grads = backward_propagation_with_dropout( AL, Y[:, j], caches, keep_prob) # Updating parameters parameters = update_parameters_using_gd(parameters, grads, learning_rate, L) # Priniting cost after given iterations if print_cost and i % print_cost_after == 0: print("Cost after iteration %i: %f" % (i, cost)) # Gradient checking gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda) return parameters
def train(): #step 1------define placeholder of input data input_data_x = tf.placeholder(tf.float32, [ None, forward_propagation.IMAGE_SIZE, forward_propagation.IMAGE_SIZE, forward_propagation.INPUT_CHANNELS ], name="input_data_x") input_data_y = tf.placeholder(tf.float32, [None, forward_propagation.OUTPUT_NODE], name="input_data_y") #step 2------define network structure #goto forward_propagation.py #step 3------calculate forward propagation (y, weight1, weight2, weight3) = forward_propagation.forward_propagation(input_data_x) #step 4------define loss #cross_entropy = tf.reduce_mean( -tf.reduce_sum(input_data_y * tf.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))), reduction_indices = [1])) cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=input_data_y)) #use l2_regularization l2_w1 = LAMDA * tf.nn.l2_loss(weight1) l2_w2 = LAMDA * tf.nn.l2_loss(weight2) l2_w3 = LAMDA * tf.nn.l2_loss(weight3) loss = cross_entropy + l2_w1 + l2_w2 + l2_w3 #step 5------train train_step = tf.train.GradientDescentOptimizer(0.05).minimize(loss) #step 6------define a object to save model saver = tf.train.Saver() #step 8------execution TRAINING_STEP = 20000 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) (data_train, labels_train) = load_cifar_dataset.get_train_data("cifar_dataset") for steps in range(1, TRAINING_STEP): start = steps * forward_propagation.BATCH_SIZE % 50000 _, loss_value = sess.run( [train_step, loss], feed_dict={ input_data_x: data_train[start:start + forward_propagation.BATCH_SIZE], input_data_y: labels_train[start:start + forward_propagation.BATCH_SIZE] }) if steps % 1000 == 1: print("After %d steps, loss on training batch is %g" % (steps, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))
def setUp(self): self.layer_shapes = {0: 3, 1: 4, 2: 1} self.num_layers = len(self.layer_shapes) self.labels = np.array([[2, 4]]) self.input = np.array(([1, 1], [2, 2], [3, 3])) self.num_train_examples = 2 self.params = ip.initialize_weights_and_biases(self.layer_shapes) self.cache = fp.forward_propagation(self.params, self.labels, self.input) self.params.update(self.cache) self.da_loss = lf.mean_squared_error_dx(self.labels, self.params['A' + str(2)])
def nn_model(X, Y, n_h, num_iterations=10000, learning_rate=0.01, print_cost=False): """ Parameters ---------- X : dataset of shape (2, number of examples) Y : labels of shape (1, number of examples) n_h : size of the hidden layer num_iterations : Number of iterations in gradient descent loop print_cost : if True, print the cost every 1000 iterations Returns ------- parameters : parameters learnt by the model. They can then be used to predict. """ np.random.seed(3) n_x = network_structure(X, Y, n_h)[0] n_h = network_structure(X, Y, n_h)[1] n_y = network_structure(X, Y, n_h)[2] # Initialize parameters parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". A2, cache = forward_propagation(X, parameters) # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost". cost = compute_cost(A2, Y) # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". grads = backward_propagation(parameters, cache, X, Y) # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". parameters = update_parameters(parameters, grads, learning_rate=0.01) # Print the cost every 1000 iterations if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False): """ Arguments: X -- dataset of shape (2, number of examples) Y -- labels of shape (1, number of examples) n_h -- size of the hidden layer num_iterations -- Number of iterations in gradient descent loop print_cost -- if True, print the cost every 1000 iterations Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ np.random.seed(3) n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters". ### START CODE HERE ### (≈ 5 lines of code) parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] ### END CODE HERE ### # Loop (gradient descent) for i in range(0, num_iterations): ### START CODE HERE ### (≈ 4 lines of code) # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". A2, cache = forward_propagation(X, parameters) # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost". cost = compute_cost(A2, Y, parameters) # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". grads = backward_propagation(parameters, cache, X, Y) # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". parameters = update_parameters(parameters, grads) ### END CODE HERE ### # Print the cost every 1000 iterations if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def evaluate_model(self, x_test, y_test, parameters, print_results=True): """ Evaluate accuracy, F1-score, precision and recall of trained model on given inputs/outputs. :param x_test: np.array of shape (num_features, num_examples) of inputs :param y_test: np.array of shape (num_classifiers, num_examples) of true labels :param parameters: dictionary containing parameters Wl, bl :param print_results: if True, print metrics. :return: metrics: dictionary of model metrics (Accuracy, F1-Score, Precision, and Recall) predictions: array of predicted outputs """ metrics = {} (n_features, n_examples) = x_test.shape n_classifiers = y_test.shape[0] with tf.Session(): # Create placeholders for x, y x = tf.placeholder(tf.float32, shape=(n_features, None)) y = tf.placeholder(tf.float32, shape=(n_classifiers, None)) ZL = forward_propagation(x, parameters, self.activation_functions, drop_rate=0.0, training=False) prediction = tf.one_hot(tf.argmax(ZL), n_classifiers) correct_prediction = tf.equal(tf.argmax(ZL), tf.argmax(y)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) t_p = tf.count_nonzero(tf.argmax(ZL) * tf.argmax(y), dtype=tf.float32) # true positive f_p = tf.count_nonzero(tf.argmax(ZL) * (tf.argmax(y) - 1), dtype=tf.float32) # false positive f_n = tf.count_nonzero((tf.argmax(ZL) - 1) * tf.argmax(y), dtype=tf.float32) # false negative precision = tf.divide(t_p, tf.add(t_p, f_p)) recall = tf.divide(t_p, tf.add(t_p, f_n)) f1_score = tf.divide(2 * tf.multiply(precision, recall), tf.add(precision, recall)) metrics['Accuracy'] = accuracy.eval({x: x_test, y: y_test}) metrics['F1-Score:'] = f1_score.eval({x: x_test, y: y_test}) metrics['Precision:'] = precision.eval({x: x_test, y: y_test}) metrics['Recall:'] = recall.eval({x: x_test, y: y_test}) if print_results: for met_label, met in sorted(metrics.items()): print(met_label, met) predictions = prediction.eval({x: x_test}) return metrics, predictions
def predict(parameters, X): """ Using the learned parameters, predicts a class for each example in X Arguments: parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns predictions -- vector of predictions of our model (red: 0 / blue: 1) """ from forward_propagation import forward_propagation # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold. A2, cache = forward_propagation(X, parameters) predictions = (A2 > 0.5) return predictions
def evaluate_model(self, x_test, y_test, parameters, print_results=True): """ Evaluate prediction error (mse) of trained model on given inputs/outputs. :param x_test: np.array of shape (num_features, num_examples) of inputs :param y_test: np.array of shape (num_classifiers, num_examples) of true labels :param parameters: dictionary containing parameters Wl, bl :param print_results: if True, print metrics. :return: metrics: dictionary of model metrics (mse) predictions: array of predicted outputs """ metrics = {} (n_features, n_examples) = x_test.shape n_classifiers = y_test.shape[0] with tf.Session(): # Create placeholders for x, y x = tf.placeholder(tf.float32, shape=(n_features, None)) y = tf.placeholder(tf.float32, shape=(n_classifiers, None)) ZL = forward_propagation(x, parameters, self.activation_functions, drop_rate=0.0, training=False) prediction = ZL mse = tf.reduce_mean(tf.squared_difference( ZL, y)) # evaluate model on mean squared error tss = tf.reduce_mean(tf.squared_difference( y, tf.reduce_mean(y))) # total squared sum rsq = tf.subtract(1.0, tf.divide(mse, tss)) # R^2 metrics['MSE:'] = mse.eval({x: x_test, y: y_test}) metrics['R^2:'] = rsq.eval({x: x_test, y: y_test}) if print_results: for met_label, met in sorted(metrics.items()): print(met_label, met) predictions = prediction.eval({x: x_test}).T return metrics, predictions
def predict(parameters, X): """ Using the learned parameters, predicts a class for each example in X Arguments: parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns predictions -- vector of predictions of our model (red: 0 / blue: 1) """ # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold. ### START CODE HERE ### (≈ 2 lines of code) A2, cache = forward_propagation(X, parameters) predictions = np.round(A2) ### END CODE HERE ### return predictions
def predict(parameters, X): """ Using the learned parameters, predicts a class for each example in X Arguments: parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns predictions -- vector of predictions of our model (red: 0 / blue: 1) """ # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold. A2, cache = forward_propagation(X, parameters) predictions = np.zeros((1, X.shape[1])) for i in range(A2.shape[1]): predictions[0, i] = 1 if A2[0, i] > 0.5 else 0 return predictions
def valuate(): #step 1------define placeholder of input data input_data_x = tf.placeholder(tf.float32, [ None, forward_propagation.IMAGE_SIZE, forward_propagation.IMAGE_SIZE, forward_propagation.INPUT_CHANNELS ], name="input_data_x") input_data_y = tf.placeholder(tf.float32, [None, forward_propagation.OUTPUT_NODE], name="input_data_y") #step 2------use network struct #goto forward_progation.py #step 3-----calculate forward propagation (y, _1, _2, _3) = forward_propagation.forward_propagation(input_data_x) #step 4------predict accuracy correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(input_data_y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) #step 5------define a object to load model saver = tf.train.Saver() #step 6------execution with tf.Session() as sess: (data_test, labels_test) = load_cifar_dataset.get_test_data() validate_feed = {input_data_x: data_test, input_data_y: labels_test} #fine model ckpt = tf.train.get_checkpoint_state(cifar_train.MODEL_SAVE_PATH) if ckpt and ckpt.model_checkpoint_path: #load model saver.restore(sess, ckpt.model_checkpoint_path) accuracy_prediction = sess.run(accuracy, feed_dict=validate_feed) print("accuracy on validation data is %g" % (accuracy_prediction)) else: print("No checkpoint file found") return
def nadam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", batch_size=1): print("Function Invoked: nadam") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) n_hl = len(hl) t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0 v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * ( n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2) while t < epochs: gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array( [])] * (n_hl + 2), [np.array([])] * (n_hl + 2) for index, (x, y) in enumerate(zip(train_x, train_y)): if index % batch_size == 0: if t == 0 and index == 0: W_look_ahead = np.copy(W) b_look_ahead = np.copy(b) else: for _index, (_b, _m_b, _v_b) in enumerate(zip(b, m_b, v_b)): _m_b_hat = (beta1 * _m_b) / (1 - np.power(beta1, count + 1)) _v_b_hat = (beta2 * _v_b) / (1 - np.power(beta2, count + 1)) b_look_ahead[_index] = _b - ( eta / np.sqrt(_v_b_hat + epsilon)) * _m_b_hat for _index, (_W, _m_W, _v_W) in enumerate(zip(W, m_W, v_W)): _m_W_hat = (beta1 * _m_W) / (1 - np.power(beta1, count + 1)) _v_W_hat = (beta2 * _v_W) / (1 - np.power(beta2, count + 1)) W_look_ahead[_index] = _W - ( eta / np.sqrt(_v_W_hat + epsilon)) * _m_W_hat # Forward propagation h, a = forward_propagation.forward_propagation( W_look_ahead, b_look_ahead, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W_look_ahead, h, x, y, _y, n_hl, ac, lf) if index % batch_size == 0: gW = _gW gb = _gb else: gW = np.add(gW, _gW) gb = np.add(gb, _gb) if (index + 1) % batch_size == 0: count += 1 update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array( [])] * (n_hl + 2), [np.array([])] * (n_hl + 2) if len(train_x) % batch_size != 0: count += 1 index = batch_size - 1 if len(train_x) < batch_size else -1 update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) # Logging to WandB if lf == "cross_entropy": val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, n_hl, ac, lf) train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, n_hl, ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) t += 1 return W, b
def gradient_checking(parameters, gradients, X, Y, layers_dims, _lambda=0, keep_prob=1, epsilon=1e-7): # Set-up variables parameters_values = dictionary_to_vector(parameters) grad = gradients_to_vector(gradients, len(layers_dims)) num_of_parameters = parameters_values.shape[0] J_plus = np.zeros((num_of_parameters, 1)) J_minus = np.zeros((num_of_parameters, 1)) grad_approx = np.zeros((num_of_parameters, 1)) num_of_layers = len(layers_dims) - 1 # Compute grad_approx for i in range(num_of_parameters): # Compute J_plus[i] theta_plus = np.copy(parameters_values) theta_plus[i][0] = theta_plus[i][0] + epsilon if keep_prob == 1: AL, _ = forward_propagation( X, vector_to_dictionary(theta_plus, layers_dims), num_of_layers) elif keep_prob < 1: AL, _ = forward_propagation_with_dropout( X, vector_to_dictionary(theta_plus, layers_dims), num_of_layers, keep_prob) if _lambda == 0: J_plus[i] = compute_cost(AL, Y) else: J_plus[i] = compute_cost_with_regularization( AL, Y, parameters, _lambda, num_of_layers) # Compute J_minus[i] theta_minus = np.copy(parameters_values) theta_minus[i][0] = theta_minus[i][0] - epsilon if keep_prob == 1: AL, _ = forward_propagation( X, vector_to_dictionary(theta_minus, layers_dims), num_of_layers) elif keep_prob < 1: AL, _ = forward_propagation_with_dropout( X, vector_to_dictionary(theta_minus, layers_dims), num_of_layers, keep_prob) if _lambda == 0: J_minus[i] = compute_cost(AL, Y) else: J_minus[i] = compute_cost_with_regularization( AL, Y, parameters, _lambda, num_of_layers) # Compute grad_approx[i] grad_approx[i] = np.divide(J_plus[i] - J_minus[i], 2 * epsilon) # Compare gradapprox to backward propagation gradients by computing difference numerator = np.linalg.norm(grad - grad_approx) denominator = np.linalg.norm(grad) + np.linalg.norm(grad_approx) difference = np.divide(numerator, denominator) if difference > 2e-7: print("\033[93m" + "There is a mistake in the backward propagation! difference = " + str(difference) + "\033[0m") else: print("\033[92m" + "Your backward propagation works perfectly fine! difference = " + str(difference) + "\033[0m") return difference
def test_forward_propagation(self): cache = forward_propagation(self.params, self.labels, self.input) print(cache) for l in range(1, self.num_layers): self.assertEqual(cache['Z' + str(l)].shape, (self.layer_shapes[l], self.num_train_examples)) self.assertEqual(cache['A' + str(l)].shape, (self.layer_shapes[l], self.num_train_examples))
def train_model(self, x_train, y_train, layer_dims=None, print_cost=True): """ Train a L-layer neural network for regression. :param x_train: np.array of shape (num_features, num_examples) of training inputs :param y_train: np.array of shape (num_classifiers, num_examples) of training true labels :param layer_dims: list containing dimensions of each layer in network :param print_cost: if True, print and plot the cost :return: parameters: dictionary containing parameters Wl, bl metrics: dictionary of model metrics (mse) predictions: array of predicted outputs """ (n_features, n_examples) = x_train.shape n_classifiers = y_train.shape[0] costs = [] # cost over each iteration if layer_dims is None: # option to specify different layer_dims layer_dims = self.layer_dims x, y, parameters = self.initialize_parameters(n_features, n_classifiers, layer_dims) # Initialize optimizer ZL = forward_propagation(x, parameters, self.activation_functions, self.drop_rate, training=True) cost = self.compute_cost(ZL, y, parameters) optimizer = tf.train.AdamOptimizer( learning_rate=self.alpha, beta1=self.adam_beta1, beta2=self.adam_beta2, epsilon=self.adam_epsilon).minimize(cost) init = tf.global_variables_initializer( ) # Initialize graph global variables # Start session to compute tensorflow graph with tf.Session() as sess: # Run initialization sess.run(init) for epoch in range(self.num_epochs): epoch_cost = 0.0 num_mini_batches = int(n_examples / self.mini_batch_size) mini_batches = self.randomize_mini_batches(x_train, y_train) for tmp_mini_batch in mini_batches: (tmp_X, tmp_Y) = tmp_mini_batch _, mini_batch_cost = sess.run([optimizer, cost], feed_dict={ x: tmp_X, y: tmp_Y }) epoch_cost += mini_batch_cost / num_mini_batches # Print cost every epoch if print_cost and epoch % 100 == 0: print('Cost after epoch %i: %f' % (epoch, epoch_cost)) if print_cost and epoch % 100 == 0: costs.append(epoch_cost) # Plot cost curve if print_cost: plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('epochs (per 10s)') plt.show() parameters = sess.run(parameters) # Evaluate model on training set and print results print('Training set Model Performance ------------------') metrics, predictions = self.evaluate_model(x_train, y_train, parameters, print_results=True) return parameters, metrics, predictions
def train_network(nn, epochs): """ Trains the neural network for epochs """ for iter in range(epochs): cache = fp.forward_propagation(nn) bp.backward_propagation(nn, cache)
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.009, num_epochs=100, minibatch_size=64, print_cost=True, operation='save', predict=None): """ Implements a three-layer ConvNet in Tensorflow: CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED Arguments: X_train -- training set, of shape (None, 64, 64, 3) Y_train -- test set, of shape (None, n_y = 6) X_test -- training set, of shape (None, 64, 64, 3) Y_test -- test set, of shape (None, n_y = 6) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: train_accuracy -- real number, accuracy on the train set (X_train) test_accuracy -- real number, testing accuracy on the test set (X_test) parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph( ) # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep results consistent (tensorflow seed) seed = 3 # to keep results consistent (numpy seed) (m, n_H0, n_W0, n_C0) = X_train.shape n_y = Y_train.shape[1] costs = [] # To keep track of the cost X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y) parameters = initialize_parameters() Z3 = forward_propagation(X, parameters) cost = compute_cost(Z3, Y) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: if operation == 'save': sess.run(init) for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int( m / minibatch_size ) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch _, temp_cost = sess.run([optimizer, cost], feed_dict={ X: minibatch_X, Y: minibatch_Y }) minibatch_cost += temp_cost / num_minibatches if print_cost == True and epoch % 5 == 0: print("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) save_path = saver.save(sess, "model.ckpt") print("Model saved in path: %s" % save_path) predict_op = tf.argmax(Z3, 1) correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy) train_accuracy = accuracy.eval({X: X_train, Y: Y_train}) test_accuracy = accuracy.eval({X: X_test, Y: Y_test}) print("Train Accuracy:", train_accuracy) print("Test Accuracy:", test_accuracy) elif operation == 'restore': saver.restore(sess, "model.ckpt") predict_op = tf.argmax(Z3, 1) result = predict_op.eval({X: predict}) print result
def adam(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", batch_size=1): print("Function Invoked: adam") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) n_hl = len(hl) t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0 v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * ( n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2) while t < epochs: gW, gb = [], [] for index, (x, y) in enumerate(zip(train_x, train_y)): # Forward propagation h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W, h, x, y, _y, n_hl, ac, lf) if index % batch_size == 0: gW = _gW gb = _gb else: gW = np.add(gW, _gW) gb = np.add(gb, _gb) if (index + 1) % batch_size == 0: count += 1 update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) gW, gb = [], [] if len(train_x) % batch_size != 0: count += 1 index = batch_size - 1 if len(train_x) < batch_size else -1 update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac) # Logging to WandB if lf == "cross_entropy": val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, n_hl, ac, lf) train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, n_hl, ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) t += 1 return W, b
def predict(parameters, X): A2, cache = forward_propagation(X, parameters) predictions = np.around(A2) return predictions
print( '=============== 4.2 - Initialize the model\'s parameters ====================' ) n_x, n_h, n_y = initialize_parameters_test_case() parameters = initialize_parameters(n_x, n_h, n_y) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) print('=============== 4.3 - The Loop ====================') # forward_propagation X_assess, parameters = forward_propagation_test_case() A2, cache = forward_propagation(X_assess, parameters) print(np.mean(cache['Z1']), np.mean(cache['A1']), np.mean(cache['Z2']), np.mean(cache['A2'])) # compute_cost A2, Y_assess, parameters = compute_cost_test_case() print("cost = " + str(compute_cost(A2, Y_assess, parameters))) # backward_propagation parameters, cache, X_assess, Y_assess = backward_propagation_test_case() grads = backward_propagation(parameters, cache, X_assess, Y_assess) print("dW1 = " + str(grads["dW1"])) print("db1 = " + str(grads["db1"])) print("dW2 = " + str(grads["dW2"])) print("db2 = " + str(grads["db2"]))
def vgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs=100, eta=0.1, init_strategy="xavier", alpha=0): print("Function Invoked: vgd") # Initialize params W, b = init_methods.random_init( d, hl, ol) if init_strategy == "random" else init_methods.xavier_init( d, hl, ol) t, n_hl = 0, len(hl) while t < epochs: gW, gb = [], [] for index, (x, y) in enumerate(zip(train_x, train_y)): # Forward propagation h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac) # Prediction (y hat) _y = h[n_hl + 1] # Backward propagation _gW, _gb = back_propagation.back_propagation( W, h, x, y, _y, n_hl, ac, lf) if index == 0: gW = _gW gb = _gb else: gW = list(np.add(gW, _gW)) gb = list(np.add(gb, _gb)) # Update bias for index, (_b, _gb) in enumerate(zip(b, gb)): b[index] = _b - eta * np.array(_gb) # Update weights for index, (_W, _gW) in enumerate(zip(W, gW)): W[index] = _W - eta * (np.array(_gW) + alpha * _W) # Logging to WandB if lf == "cross_entropy": # val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(W, b, val_x, val_y, n_hl, ac, lf) # train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(W, b, train_x, train_y, n_hl, ac, lf) # wandb.log( { "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss } ) t += 1 return W, b
def rmsprop(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size): # Initialize paramaters if init_strategy == "random": W, b = init_methods.random_init2(d, hl, ol) else: W, b = init_methods.xavier_init(d, hl, ol) hist_W, hist_b = init_methods.random_init2(d, hl, ol) grad_W, grad_b = init_methods.random_init2(d, hl, ol) epsilon, beta1 = 1e-8, 0.95 iteration = 0 while iteration < epochs: num_points_seen = 0 for loc, (x, y_true) in enumerate(zip(train_x, train_y)): num_points_seen += 1 # Forward propagation h, a = forward_propagation.forward_propagation( W, b, x, len(hl), ac) # Prediction (y hat) . It will be the last element(np array) of the h list y_pred = h[len(hl) + 1] # Backward propagation grad_W_element, grad_b_element = back_propagation.back_propagation( W, h, x, y_true, y_pred, len(hl), ac, lf) if loc == 0 or num_points_seen == 1: for i in range(len(grad_W)): grad_W[i] = grad_W_element[i] grad_b[i] = grad_b_element[i] else: for i in range(len(grad_W)): grad_W[i] += grad_W_element[i] grad_b[i] += grad_b_element[i] if num_points_seen == batch_size or loc == len(train_x) - 1: num_points_seen = 0 if iteration == 0: for i in range(1, len(W)): hist_W[i] = (1 - beta1) * np.square(grad_W[i]) hist_b[i] = (1 - beta1) * np.square(grad_b[i]) W[i] = W[i] - ( eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i] b[i] = b[i] - ( eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i] else: for i in range(1, len(W)): hist_W[i] = beta1 * hist_W[i] + ( 1 - beta1) * np.square(grad_W[i]) hist_b[i] = beta1 * hist_b[i] + ( 1 - beta1) * np.square(grad_b[i]) W[i] = W[i] - ( eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i] b[i] = b[i] - ( eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i] grad_W, grad_b = init_methods.random_init2(d, hl, ol) if lf == "cross_entropy": train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, len(hl), ac, lf) val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, len(hl), ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) # print("\n\niteration number ",iteration," Training Accuracy: ", train_acc, " Training Loss: ", train_loss) # print("\n\niteration number ",iteration," validation Accuracy: ", val_acc, " validation Loss: ", val_loss) iteration += 1 return W, b
Arguments: Z3 -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples) Y -- "true" labels vector placeholder, same shape as Z3 Returns: cost - Tensor of the cost function """ entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Z3, labels=Y) cost = tf.reduce_mean(entropy) return cost if __name__ == '__main__': tf.reset_default_graph() with tf.Session() as sess: np.random.seed(1) X, Y = create_placeholders(64, 64, 3, 6) parameters = initialize_parameters() Z3 = forward_propagation(X, parameters) cost = compute_cost(Z3, Y) init = tf.global_variables_initializer() sess.run(init) a = sess.run(cost, { X: np.random.randn(4, 64, 64, 3), Y: np.random.randn(4, 6) }) print("cost = " + str(a))
def train(mnist): #step 1.2------define placeholder of input datas input_data_x = tf.placeholder(tf.float32, [None, forward_propagation.INPUT_NODE], name="input_data_x") input_data_y = tf.placeholder(tf.float32, [None, forward_propagation.OUTPUT_NODE], name="input_data_y") #step 3------calculate forward propagation REGULARIZATION_RATE = 0.0001 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y = forward_propagation.forward_propagation(input_data_x, True, regularizer) #step 4------define loss cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=tf.argmax(input_data_y, 1), logits=y) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses")) #step 5.1------optimization & train(moving average) global_steps = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage( MOVING_AVERAGE_DECAY, global_steps) moving_operation = variable_averages.apply(tf.trainable_variables()) #step 5.2------optimization & train(learning_rate) LEARNING_RATE_BASE = 0.1 LEARNING_RATE_DECAY = 0.99 BATCH_SIZE = 100 learning_rate = tf.train.exponential_decay( LEARNING_RATE_BASE, global_steps, mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY) #step 5.3------train train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize( loss, global_step=global_steps) #step 6------define dependency with tf.control_dependencies([train_step, moving_operation]): train_operation = tf.no_op(name="train") for variables in tf.global_variables(): print(variables) #step 7------define a object to save model saver = tf.train.Saver(max_to_keep=0) #step 8------execution TRAINING_STEP = 5000 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for steps in range(1, TRAINING_STEP): input_data, output_data = mnist.train.next_batch(BATCH_SIZE) _, loss_value = sess.run([train_operation, loss], feed_dict={ input_data_x: input_data, input_data_y: output_data }) if steps % 250 == 1: print("After %d steps, loss on training batch is %g" % (steps, loss_value)) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))
def mgd(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta, init_strategy, batch_size, alpha=0): # Initialize paramaters if init_strategy == "random": W, b = init_methods.random_init2(d, hl, ol) else: W, b = init_methods.xavier_init(d, hl, ol) gamma = 0.9 grad_W, grad_b = init_methods.random_init2(d, hl, ol) prev_W, prev_b = init_methods.random_init2(d, hl, ol) iteration = 0 while iteration < epochs: num_points_seen = 0 for loc, (x, y_true) in enumerate(zip(train_x, train_y)): num_points_seen += 1 #Forward Propagation h, a = forward_propagation.forward_propagation( W, b, x, len(hl), ac) # Prediction (y hat) .It will be the last element(np array) of the h list y_pred = h[len(hl) + 1] # Backward Propagation grad_W_element, grad_b_element = back_propagation.back_propagation( W, h, x, y_true, y_pred, len(hl), ac, lf) if loc == 0 or num_points_seen == 1: for i in range(len(grad_W)): grad_W[i] = grad_W_element[i] grad_b[i] = grad_b_element[i] else: for i in range(len(grad_W)): grad_W[i] += grad_W_element[i] grad_b[i] += grad_b_element[i] if num_points_seen == batch_size or loc == len(train_x) - 1: num_points_seen = 0 # Updating of prev_W,prev_b, W and b if iteration == 0: for i in range(1, len(W)): W[i] = W[i] - eta * grad_W[i] - eta * alpha * W[i] b[i] = b[i] - eta * grad_b[i] prev_W[i] = eta * grad_W[i] + eta * alpha * W[i] prev_b[i] = eta * grad_b[i] else: for i in range(1, len(W)): prev_W[i] = np.multiply( gamma, prev_W[i]) + eta * grad_W[i] + eta * alpha * W[i] prev_b[i] = np.multiply(gamma, prev_b[i]) + eta * grad_b[i] W[i] = W[i] - prev_W[i] b[i] = b[i] - prev_b[i] grad_W, grad_b = init_methods.random_init2(d, hl, ol) if lf == "cross_entropy": train_acc, train_loss = accuracy_loss.get_accuracy_and_loss( W, b, train_x, train_y, len(hl), ac, lf) val_acc, val_loss = accuracy_loss.get_accuracy_and_loss( W, b, val_x, val_y, len(hl), ac, lf) wandb.log({ "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss }) # print("\n\niteration number ",iteration," Training Accuracy: ", train_acc, " Training Loss: ", train_loss) # print("\n\niteration number ",iteration," validation Accuracy: ", val_acc, " validation Loss: ", val_loss) iteration += 1 return W, b
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.009, num_epochs = 100, minibatch_size = 64, print_cost = True): """ Implements a three-layer ConvNet in Tensorflow: CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED Arguments: X_train -- training set, of shape (None, 64, 64, 3) Y_train -- test set, of shape (None, n_y = 6) X_test -- training set, of shape (None, 64, 64, 3) Y_test -- test set, of shape (None, n_y = 6) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: train_accuracy -- real number, accuracy on the train set (X_train) test_accuracy -- real number, testing accuracy on the test set (X_test) parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph() # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep results consistent (tensorflow seed) seed = 3 # to keep results consistent (numpy seed) (m, n_H0, n_W0, n_C0) = X_train.shape n_y = Y_train.shape[1] costs = [] # To keep track of the cost # Create Placeholders of the correct shape X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y) # Initialize parameters parameters = initialize_parameters() # Forward propagation: Build the forward propagation in the tensorflow graph Z3 = forward_propagation(X, parameters) # Cost function: Add cost function to tensorflow graph cost = compute_cost(Z3, Y) # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost. optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost) # Initialize all the variables globally init = tf.global_variables_initializer() # Start the session to compute the tensorflow graph with tf.Session() as sess: # Run the initialization sess.run(init) # Do the training loop for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch """ # IMPORTANT: The line that runs the graph on a minibatch. # Run the session to execute the optimizer and the cost. # The feedict should contain a minibatch for (X,Y). """ _ , temp_cost = sess.run(fetches=[optimizer,cost], feed_dict={X: minibatch_X, Y: minibatch_Y}) minibatch_cost += temp_cost / num_minibatches # Print the cost every epoch if print_cost == True and epoch % 5 == 0: print ("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate =" + str(learning_rate)) plt.show() # Calculate the correct predictions predict_op = tf.argmax(Z3, 1) correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1)) # Calculate accuracy on the test set accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy) train_accuracy = accuracy.eval({X: X_train, Y: Y_train}) test_accuracy = accuracy.eval({X: X_test, Y: Y_test}) print("Train Accuracy:", train_accuracy) print("Test Accuracy:", test_accuracy) return train_accuracy, test_accuracy, parameters