Example #1
0
    def model(X_train, Y_train, layers_dims, learning_rate, num_iter, lambd,
              print_cost):

        with tf.device('/device:GPU:0'):

            tf.reset_default_graph(
            )  # to be able to rerun the model without overwriting tf variables
            (
                n_x, m
            ) = X_train.shape  # Number of features and number of training examples
            n_y = Y_train.shape[0]  # Number of classes
            n_hidden_layers = len(layers_dims)  # Number of hidden layers
            costs = []  # Keep track of the cost

            ### Create Placheholders ###
            X, Y = create_placeholders(n_x, n_y)

            ### Initialize Parameters ###
            parameters = init_params(layers_dims)

            ### Foward propagation - Build the forward propagation in the tensorflow graph ###
            ZL = forward_propagation(X, parameters)

            ### Cost - Add cost function to tensorflow graph ###
            cost_function = compute_cost(ZL, Y, parameters, n_hidden_layers,
                                         lambd, m)

            ### Backpropagation - Define the tensorflow optimizer ###
            optimizer = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(cost_function)
            #optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost_function)

            ### Initializer all the variables ###
            init = tf.global_variables_initializer()

            ### Start the session to compute the tensorflow graph ###
            with tf.Session() as sess:

                # Run the initialization
                sess.run(init)

                # Do Training lopp #
                for i in range(num_iter):

                    # Run the session to execute the optimizer and the cost
                    _, cost_value = sess.run([optimizer, cost_function],
                                             feed_dict={
                                                 X: X_train,
                                                 Y: Y_train
                                             })

                    # Print the cost every 1000 iterations
                    #if print_cost == True and i % 1000 == 0:
                    #    print ("Cost after iteration %i: %f" % (i, cost_value))
                    if print_cost == True and i % 1000 == 0:
                        costs.append(cost_value)
                # Save the parameters in a variable
                parameters = sess.run(parameters)

        return parameters, costs
Example #2
0
def model(X,
          Y,
          layers_dims,
          learning_rate=0.01,
          initialization='random',
          init_const=0.01,
          num_of_iterations=10000,
          print_cost=True,
          print_cost_after=1000,
          seed=None):
    L = len(layers_dims) - 1  # number of layers

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Gradient Descent
    for i in range(num_of_iterations):
        # Forward propagation
        AL, caches = forward_propagation(X, parameters, L)

        # Compute cost
        cost = compute_cost(AL, Y)

        # Backward propagation
        grads = backward_propagation(AL, Y, caches)

        # Updating parameters
        parameters = update_parameters(parameters, grads, learning_rate, L)

        # Priniting cost after given iterations
        if print_cost and i % print_cost_after == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
Example #3
0
def nn_model(X, Y, n_h, num_iterations=1500, print_cost=False):

    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    for i in range(0, num_iterations):

        A2, cache = forward_propagation(X, parameters)

        cost = compute_cost(A2, Y, parameters)

        grads = backward_propagation(parameters, cache, X, Y)

        parameters = update_parameters(parameters, grads)

        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
        plt.scatter(i + 1, cost)
        plt.title('cost curve')
        plt.xlabel('iteration times')
        plt.ylabel('cost')
    plt.savefig('cost curve.jpg')
    return parameters
Example #4
0
def valuate(mnist):
    input_data_x = tf.placeholder(tf.float32,
                                  [None, forward_propagation.INPUT_NODE],
                                  name="input_data_x")
    input_data_y = tf.placeholder(tf.float32,
                                  [None, forward_propagation.OUTPUT_NODE],
                                  name="input_data_y")

    y = forward_propagation.forward_propagation(input_data_x, False, None)
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(input_data_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    variable_averages = tf.train.ExponentialMovingAverage(
        mnist_train.MOVING_AVERAGE_DECAY)
    variables_to_restore = variable_averages.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    validate_feed = {
        input_data_x: mnist.validation.images,
        input_data_y: mnist.validation.labels
    }
    print(validate_feed)
    print("*")
    for variables in tf.all_variables():
        print(variables)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state(mnist_train.MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            #global_step = ckpt.model_checkpoint_path.split("/")[-1].split("-")[-1]
            accuracy_prediction = sess.run(accuracy, feed_dict=validate_feed)
            print("accuracy on validation data is %g" % (accuracy_prediction))
        else:
            print("No checkpoint file found")
            return
Example #5
0
def model_using_sgd(X,
                    Y,
                    layers_dims,
                    learning_rate=0.01,
                    initialization='random',
                    _lambda=0,
                    keep_prob=1,
                    init_const=0.01,
                    num_of_iterations=10000,
                    print_cost=True,
                    print_cost_after=1000,
                    seed=None):
    L = len(layers_dims) - 1  # number of layers
    m = X.shape[1]  # number of training examples

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Gradient Descent
    for i in range(num_of_iterations):
        for j in range(m):
            # Forward propagation
            if keep_prob == 1:
                AL, caches = forward_propagation(X[:, j], parameters, L)
            elif keep_prob < 1:
                AL, caches = forward_propagation_with_dropout(
                    X[:, j], parameters, L, keep_prob)

            # Compute cost
            if _lambda == 0:
                cost = compute_cost(AL, Y[:, j])
            else:
                cost = compute_cost_with_regularization(
                    AL, Y[:, j], parameters, _lambda, L)

            # Backward propagation
            if _lambda == 0 and keep_prob == 1:
                grads = backward_propagation(AL, Y[:, j], caches)
            elif _lambda != 0:
                grads = backward_propagation_with_regularization(
                    AL, Y[:, j], caches, _lambda)
            elif keep_prob < 1:
                grads = backward_propagation_with_dropout(
                    AL, Y[:, j], caches, keep_prob)

            # Updating parameters
            parameters = update_parameters_using_gd(parameters, grads,
                                                    learning_rate, L)

        # Priniting cost after given iterations
        if print_cost and i % print_cost_after == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    # Gradient checking
    gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda)

    return parameters
Example #6
0
def train():
    #step 1------define placeholder of input data
    input_data_x = tf.placeholder(tf.float32, [
        None, forward_propagation.IMAGE_SIZE, forward_propagation.IMAGE_SIZE,
        forward_propagation.INPUT_CHANNELS
    ],
                                  name="input_data_x")
    input_data_y = tf.placeholder(tf.float32,
                                  [None, forward_propagation.OUTPUT_NODE],
                                  name="input_data_y")

    #step 2------define network structure
    #goto forward_propagation.py

    #step 3------calculate forward propagation
    (y, weight1, weight2,
     weight3) = forward_propagation.forward_propagation(input_data_x)

    #step 4------define loss
    #cross_entropy = tf.reduce_mean( -tf.reduce_sum(input_data_y * tf.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))), reduction_indices = [1]))
    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=input_data_y))
    #use l2_regularization
    l2_w1 = LAMDA * tf.nn.l2_loss(weight1)
    l2_w2 = LAMDA * tf.nn.l2_loss(weight2)
    l2_w3 = LAMDA * tf.nn.l2_loss(weight3)
    loss = cross_entropy + l2_w1 + l2_w2 + l2_w3

    #step 5------train
    train_step = tf.train.GradientDescentOptimizer(0.05).minimize(loss)

    #step 6------define a object to save model
    saver = tf.train.Saver()

    #step 8------execution
    TRAINING_STEP = 20000
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        (data_train,
         labels_train) = load_cifar_dataset.get_train_data("cifar_dataset")
        for steps in range(1, TRAINING_STEP):
            start = steps * forward_propagation.BATCH_SIZE % 50000
            _, loss_value = sess.run(
                [train_step, loss],
                feed_dict={
                    input_data_x:
                    data_train[start:start + forward_propagation.BATCH_SIZE],
                    input_data_y:
                    labels_train[start:start + forward_propagation.BATCH_SIZE]
                })
            if steps % 1000 == 1:
                print("After %d steps, loss on training batch is %g" %
                      (steps, loss_value))
        saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))
 def setUp(self):
     self.layer_shapes = {0: 3, 1: 4, 2: 1}
     self.num_layers = len(self.layer_shapes)
     self.labels = np.array([[2, 4]])
     self.input = np.array(([1, 1], [2, 2], [3, 3]))
     self.num_train_examples = 2
     self.params = ip.initialize_weights_and_biases(self.layer_shapes)
     self.cache = fp.forward_propagation(self.params, self.labels,
                                         self.input)
     self.params.update(self.cache)
     self.da_loss = lf.mean_squared_error_dx(self.labels,
                                             self.params['A' + str(2)])
def nn_model(X,
             Y,
             n_h,
             num_iterations=10000,
             learning_rate=0.01,
             print_cost=False):
    """
    

    Parameters
    ----------
    X : dataset of shape (2, number of examples)
    Y : labels of shape (1, number of examples)
    n_h : size of the hidden layer
    num_iterations : Number of iterations in gradient descent loop
    print_cost : if True, print the cost every 1000 iterations

    Returns
    -------
    parameters : parameters learnt by the model. They can then be used to predict.

    """

    np.random.seed(3)
    n_x = network_structure(X, Y, n_h)[0]
    n_h = network_structure(X, Y, n_h)[1]
    n_y = network_structure(X, Y, n_h)[2]

    # Initialize parameters
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # Loop (gradient descent)
    for i in range(0, num_iterations):
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y)
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads, learning_rate=0.01)

        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
Example #9
0
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
    ### START CODE HERE ### (≈ 5 lines of code)
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    ### END CODE HERE ###

    # Loop (gradient descent)

    for i in range(0, num_iterations):

        ### START CODE HERE ### (≈ 4 lines of code)
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)

        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y, parameters)

        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)

        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads)

        ### END CODE HERE ###

        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
    def evaluate_model(self, x_test, y_test, parameters, print_results=True):
        """
        Evaluate accuracy, F1-score, precision and recall of trained model on given inputs/outputs.
        :param x_test: np.array of shape (num_features, num_examples) of inputs
        :param y_test: np.array of shape (num_classifiers, num_examples) of true labels
        :param parameters: dictionary containing parameters Wl, bl
        :param print_results: if True, print metrics.
        :return: metrics: dictionary of model metrics (Accuracy, F1-Score, Precision, and Recall)
                 predictions: array of predicted outputs
        """
        metrics = {}

        (n_features, n_examples) = x_test.shape
        n_classifiers = y_test.shape[0]

        with tf.Session():
            # Create placeholders for x, y
            x = tf.placeholder(tf.float32, shape=(n_features, None))
            y = tf.placeholder(tf.float32, shape=(n_classifiers, None))

            ZL = forward_propagation(x, parameters, self.activation_functions, drop_rate=0.0, training=False)

            prediction = tf.one_hot(tf.argmax(ZL), n_classifiers)

            correct_prediction = tf.equal(tf.argmax(ZL), tf.argmax(y))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float'))

            t_p = tf.count_nonzero(tf.argmax(ZL) * tf.argmax(y), dtype=tf.float32)              # true positive
            f_p = tf.count_nonzero(tf.argmax(ZL) * (tf.argmax(y) - 1), dtype=tf.float32)        # false positive
            f_n = tf.count_nonzero((tf.argmax(ZL) - 1) * tf.argmax(y), dtype=tf.float32)        # false negative

            precision = tf.divide(t_p, tf.add(t_p, f_p))
            recall = tf.divide(t_p, tf.add(t_p, f_n))
            f1_score = tf.divide(2 * tf.multiply(precision, recall), tf.add(precision, recall))

            metrics['Accuracy'] = accuracy.eval({x: x_test, y: y_test})
            metrics['F1-Score:'] = f1_score.eval({x: x_test, y: y_test})
            metrics['Precision:'] = precision.eval({x: x_test, y: y_test})
            metrics['Recall:'] = recall.eval({x: x_test, y: y_test})

            if print_results:
                for met_label, met in sorted(metrics.items()):
                    print(met_label, met)

            predictions = prediction.eval({x: x_test})

            return metrics, predictions
Example #11
0
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X

    Arguments:
    parameters -- python dictionary containing your parameters
    X -- input data of size (n_x, m)

    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """
    from forward_propagation import forward_propagation

    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    A2, cache = forward_propagation(X, parameters)
    predictions = (A2 > 0.5)

    return predictions
Example #12
0
    def evaluate_model(self, x_test, y_test, parameters, print_results=True):
        """
        Evaluate prediction error (mse) of trained model on given inputs/outputs.
        :param x_test: np.array of shape (num_features, num_examples) of inputs
        :param y_test: np.array of shape (num_classifiers, num_examples) of true labels
        :param parameters: dictionary containing parameters Wl, bl
        :param print_results: if True, print metrics.
        :return: metrics: dictionary of model metrics (mse)
                 predictions: array of predicted outputs
        """
        metrics = {}

        (n_features, n_examples) = x_test.shape
        n_classifiers = y_test.shape[0]

        with tf.Session():
            # Create placeholders for x, y
            x = tf.placeholder(tf.float32, shape=(n_features, None))
            y = tf.placeholder(tf.float32, shape=(n_classifiers, None))

            ZL = forward_propagation(x,
                                     parameters,
                                     self.activation_functions,
                                     drop_rate=0.0,
                                     training=False)

            prediction = ZL

            mse = tf.reduce_mean(tf.squared_difference(
                ZL, y))  # evaluate model on mean squared error
            tss = tf.reduce_mean(tf.squared_difference(
                y, tf.reduce_mean(y)))  # total squared sum
            rsq = tf.subtract(1.0, tf.divide(mse, tss))  # R^2

            metrics['MSE:'] = mse.eval({x: x_test, y: y_test})
            metrics['R^2:'] = rsq.eval({x: x_test, y: y_test})

            if print_results:
                for met_label, met in sorted(metrics.items()):
                    print(met_label, met)

            predictions = prediction.eval({x: x_test}).T

            return metrics, predictions
Example #13
0
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """

    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    ### START CODE HERE ### (≈ 2 lines of code)
    A2, cache = forward_propagation(X, parameters)
    predictions = np.round(A2)
    ### END CODE HERE ###

    return predictions
Example #14
0
def predict(parameters, X):
    """
    Using the learned parameters, predicts a class for each example in X
    
    Arguments:
    parameters -- python dictionary containing your parameters 
    X -- input data of size (n_x, m)
    
    Returns
    predictions -- vector of predictions of our model (red: 0 / blue: 1)
    """

    # Computes probabilities using forward propagation, and classifies to 0/1 using 0.5 as the threshold.
    A2, cache = forward_propagation(X, parameters)
    predictions = np.zeros((1, X.shape[1]))
    for i in range(A2.shape[1]):
        predictions[0, i] = 1 if A2[0, i] > 0.5 else 0

    return predictions
Example #15
0
def valuate():

    #step 1------define placeholder of input data
    input_data_x = tf.placeholder(tf.float32, [
        None, forward_propagation.IMAGE_SIZE, forward_propagation.IMAGE_SIZE,
        forward_propagation.INPUT_CHANNELS
    ],
                                  name="input_data_x")
    input_data_y = tf.placeholder(tf.float32,
                                  [None, forward_propagation.OUTPUT_NODE],
                                  name="input_data_y")

    #step 2------use network struct
    #goto forward_progation.py

    #step 3-----calculate forward propagation
    (y, _1, _2, _3) = forward_propagation.forward_propagation(input_data_x)

    #step 4------predict accuracy
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(input_data_y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    #step 5------define a object to load model
    saver = tf.train.Saver()

    #step 6------execution
    with tf.Session() as sess:
        (data_test, labels_test) = load_cifar_dataset.get_test_data()
        validate_feed = {input_data_x: data_test, input_data_y: labels_test}

        #fine model
        ckpt = tf.train.get_checkpoint_state(cifar_train.MODEL_SAVE_PATH)
        if ckpt and ckpt.model_checkpoint_path:
            #load model
            saver.restore(sess, ckpt.model_checkpoint_path)
            accuracy_prediction = sess.run(accuracy, feed_dict=validate_feed)
            print("accuracy on validation data is %g" % (accuracy_prediction))
        else:
            print("No checkpoint file found")
            return
Example #16
0
def nadam(train_x,
          train_y,
          val_x,
          val_y,
          d,
          hl,
          ol,
          ac,
          lf,
          epochs=100,
          eta=0.1,
          init_strategy="xavier",
          batch_size=1):

    print("Function Invoked: nadam")

    # Initialize params
    W, b = init_methods.random_init(
        d, hl, ol) if init_strategy == "random" else init_methods.xavier_init(
            d, hl, ol)

    n_hl = len(hl)

    t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0
    v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * (
        n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2)

    while t < epochs:

        gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array(
            [])] * (n_hl + 2), [np.array([])] * (n_hl + 2)

        for index, (x, y) in enumerate(zip(train_x, train_y)):

            if index % batch_size == 0:

                if t == 0 and index == 0:
                    W_look_ahead = np.copy(W)
                    b_look_ahead = np.copy(b)

                else:
                    for _index, (_b, _m_b, _v_b) in enumerate(zip(b, m_b,
                                                                  v_b)):
                        _m_b_hat = (beta1 *
                                    _m_b) / (1 - np.power(beta1, count + 1))
                        _v_b_hat = (beta2 *
                                    _v_b) / (1 - np.power(beta2, count + 1))
                        b_look_ahead[_index] = _b - (
                            eta / np.sqrt(_v_b_hat + epsilon)) * _m_b_hat

                    for _index, (_W, _m_W, _v_W) in enumerate(zip(W, m_W,
                                                                  v_W)):
                        _m_W_hat = (beta1 *
                                    _m_W) / (1 - np.power(beta1, count + 1))
                        _v_W_hat = (beta2 *
                                    _v_W) / (1 - np.power(beta2, count + 1))
                        W_look_ahead[_index] = _W - (
                            eta / np.sqrt(_v_W_hat + epsilon)) * _m_W_hat

            # Forward propagation
            h, a = forward_propagation.forward_propagation(
                W_look_ahead, b_look_ahead, x, n_hl, ac)

            # Prediction (y hat)
            _y = h[n_hl + 1]

            # Backward propagation
            _gW, _gb = back_propagation.back_propagation(
                W_look_ahead, h, x, y, _y, n_hl, ac, lf)

            if index % batch_size == 0:
                gW = _gW
                gb = _gb
            else:
                gW = np.add(gW, _gW)
                gb = np.add(gb, _gb)

            if (index + 1) % batch_size == 0:
                count += 1
                update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta,
                                n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb,
                                W, b, ac)
                gW, gb, W_look_ahead, b_look_ahead = [], [], [np.array(
                    [])] * (n_hl + 2), [np.array([])] * (n_hl + 2)

        if len(train_x) % batch_size != 0:
            count += 1
            index = batch_size - 1 if len(train_x) < batch_size else -1
            update_nadam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl,
                            batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac)

        # Logging to WandB
        if lf == "cross_entropy":
            val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, val_x, val_y, n_hl, ac, lf)
            train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, train_x, train_y, n_hl, ac, lf)
            wandb.log({
                "val_accuracy": val_acc,
                "accuracy": train_acc,
                "val_loss": val_loss,
                "loss": train_loss
            })

        t += 1

    return W, b
def gradient_checking(parameters,
                      gradients,
                      X,
                      Y,
                      layers_dims,
                      _lambda=0,
                      keep_prob=1,
                      epsilon=1e-7):
    # Set-up variables
    parameters_values = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients, len(layers_dims))
    num_of_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_of_parameters, 1))
    J_minus = np.zeros((num_of_parameters, 1))
    grad_approx = np.zeros((num_of_parameters, 1))
    num_of_layers = len(layers_dims) - 1

    # Compute grad_approx
    for i in range(num_of_parameters):
        # Compute J_plus[i]
        theta_plus = np.copy(parameters_values)
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        if keep_prob == 1:
            AL, _ = forward_propagation(
                X, vector_to_dictionary(theta_plus, layers_dims),
                num_of_layers)
        elif keep_prob < 1:
            AL, _ = forward_propagation_with_dropout(
                X, vector_to_dictionary(theta_plus, layers_dims),
                num_of_layers, keep_prob)
        if _lambda == 0:
            J_plus[i] = compute_cost(AL, Y)
        else:
            J_plus[i] = compute_cost_with_regularization(
                AL, Y, parameters, _lambda, num_of_layers)

        # Compute J_minus[i]
        theta_minus = np.copy(parameters_values)
        theta_minus[i][0] = theta_minus[i][0] - epsilon
        if keep_prob == 1:
            AL, _ = forward_propagation(
                X, vector_to_dictionary(theta_minus, layers_dims),
                num_of_layers)
        elif keep_prob < 1:
            AL, _ = forward_propagation_with_dropout(
                X, vector_to_dictionary(theta_minus, layers_dims),
                num_of_layers, keep_prob)
        if _lambda == 0:
            J_minus[i] = compute_cost(AL, Y)
        else:
            J_minus[i] = compute_cost_with_regularization(
                AL, Y, parameters, _lambda, num_of_layers)

        # Compute grad_approx[i]
        grad_approx[i] = np.divide(J_plus[i] - J_minus[i], 2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference
    numerator = np.linalg.norm(grad - grad_approx)
    denominator = np.linalg.norm(grad) + np.linalg.norm(grad_approx)
    difference = np.divide(numerator, denominator)

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
 def test_forward_propagation(self):
     cache = forward_propagation(self.params, self.labels, self.input)
     print(cache)
     for l in range(1, self.num_layers):
         self.assertEqual(cache['Z' + str(l)].shape, (self.layer_shapes[l], self.num_train_examples))
         self.assertEqual(cache['A' + str(l)].shape, (self.layer_shapes[l], self.num_train_examples))
Example #19
0
    def train_model(self, x_train, y_train, layer_dims=None, print_cost=True):
        """
        Train a L-layer neural network for regression.
        :param x_train: np.array of shape (num_features, num_examples) of training inputs
        :param y_train: np.array of shape (num_classifiers, num_examples) of training true labels
        :param layer_dims: list containing dimensions of each layer in network
        :param print_cost: if True, print and plot the cost
        :return: parameters: dictionary containing parameters Wl, bl
                 metrics: dictionary of model metrics (mse)
                 predictions: array of predicted outputs
        """

        (n_features, n_examples) = x_train.shape
        n_classifiers = y_train.shape[0]

        costs = []  # cost over each iteration

        if layer_dims is None:  # option to specify different layer_dims
            layer_dims = self.layer_dims

        x, y, parameters = self.initialize_parameters(n_features,
                                                      n_classifiers,
                                                      layer_dims)

        # Initialize optimizer
        ZL = forward_propagation(x,
                                 parameters,
                                 self.activation_functions,
                                 self.drop_rate,
                                 training=True)
        cost = self.compute_cost(ZL, y, parameters)

        optimizer = tf.train.AdamOptimizer(
            learning_rate=self.alpha,
            beta1=self.adam_beta1,
            beta2=self.adam_beta2,
            epsilon=self.adam_epsilon).minimize(cost)

        init = tf.global_variables_initializer(
        )  # Initialize graph global variables

        # Start session to compute tensorflow graph
        with tf.Session() as sess:

            # Run initialization
            sess.run(init)

            for epoch in range(self.num_epochs):
                epoch_cost = 0.0
                num_mini_batches = int(n_examples / self.mini_batch_size)
                mini_batches = self.randomize_mini_batches(x_train, y_train)

                for tmp_mini_batch in mini_batches:

                    (tmp_X, tmp_Y) = tmp_mini_batch

                    _, mini_batch_cost = sess.run([optimizer, cost],
                                                  feed_dict={
                                                      x: tmp_X,
                                                      y: tmp_Y
                                                  })

                    epoch_cost += mini_batch_cost / num_mini_batches

                # Print cost every epoch
                if print_cost and epoch % 100 == 0:
                    print('Cost after epoch %i: %f' % (epoch, epoch_cost))
                if print_cost and epoch % 100 == 0:
                    costs.append(epoch_cost)

            # Plot cost curve
            if print_cost:
                plt.plot(np.squeeze(costs))
                plt.ylabel('cost')
                plt.xlabel('epochs (per 10s)')
                plt.show()

            parameters = sess.run(parameters)

            # Evaluate model on training set and print results
            print('Training set Model Performance ------------------')
            metrics, predictions = self.evaluate_model(x_train,
                                                       y_train,
                                                       parameters,
                                                       print_results=True)

            return parameters, metrics, predictions
Example #20
0
def train_network(nn, epochs):
    """ Trains the neural network for epochs """
    for iter in range(epochs):
        cache = fp.forward_propagation(nn)
        bp.backward_propagation(nn, cache)
Example #21
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.009,
          num_epochs=100,
          minibatch_size=64,
          print_cost=True,
          operation='save',
          predict=None):
    """
    Implements a three-layer ConvNet in Tensorflow:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED

    Arguments:
    X_train -- training set, of shape (None, 64, 64, 3)
    Y_train -- test set, of shape (None, n_y = 6)
    X_test -- training set, of shape (None, 64, 64, 3)
    Y_test -- test set, of shape (None, n_y = 6)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs

    Returns:
    train_accuracy -- real number, accuracy on the train set (X_train)
    test_accuracy -- real number, testing accuracy on the test set (X_test)
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    ops.reset_default_graph(
    )  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)  # to keep results consistent (tensorflow seed)
    seed = 3  # to keep results consistent (numpy seed)
    (m, n_H0, n_W0, n_C0) = X_train.shape
    n_y = Y_train.shape[1]
    costs = []  # To keep track of the cost

    X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)

    parameters = initialize_parameters()

    Z3 = forward_propagation(X, parameters)

    cost = compute_cost(Z3, Y)

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    with tf.Session() as sess:

        if operation == 'save':
            sess.run(init)
            for epoch in range(num_epochs):

                minibatch_cost = 0.
                num_minibatches = int(
                    m / minibatch_size
                )  # number of minibatches of size minibatch_size in the train set
                seed = seed + 1
                minibatches = random_mini_batches(X_train, Y_train,
                                                  minibatch_size, seed)

                for minibatch in minibatches:
                    (minibatch_X, minibatch_Y) = minibatch

                    _, temp_cost = sess.run([optimizer, cost],
                                            feed_dict={
                                                X: minibatch_X,
                                                Y: minibatch_Y
                                            })

                    minibatch_cost += temp_cost / num_minibatches

                if print_cost == True and epoch % 5 == 0:
                    print("Cost after epoch %i: %f" % (epoch, minibatch_cost))
                if print_cost == True and epoch % 1 == 0:
                    costs.append(minibatch_cost)

            save_path = saver.save(sess, "model.ckpt")
            print("Model saved in path: %s" % save_path)

            predict_op = tf.argmax(Z3, 1)
            correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))

            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            print(accuracy)
            train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
            test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
            print("Train Accuracy:", train_accuracy)
            print("Test Accuracy:", test_accuracy)

        elif operation == 'restore':
            saver.restore(sess, "model.ckpt")
            predict_op = tf.argmax(Z3, 1)
            result = predict_op.eval({X: predict})
            print result
Example #22
0
def adam(train_x,
         train_y,
         val_x,
         val_y,
         d,
         hl,
         ol,
         ac,
         lf,
         epochs=100,
         eta=0.1,
         init_strategy="xavier",
         batch_size=1):

    print("Function Invoked: adam")

    # Initialize params
    W, b = init_methods.random_init(
        d, hl, ol) if init_strategy == "random" else init_methods.xavier_init(
            d, hl, ol)

    n_hl = len(hl)

    t, beta1, beta2, epsilon, count = 0, 0.9, 0.999, 1e-8, 0
    v_W, v_b, m_W, m_b = [np.array([])] * (n_hl + 2), [np.array([])] * (
        n_hl + 2), [np.array([])] * (n_hl + 2), [np.array([])] * (n_hl + 2)

    while t < epochs:

        gW, gb = [], []

        for index, (x, y) in enumerate(zip(train_x, train_y)):

            # Forward propagation
            h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac)

            # Prediction (y hat)
            _y = h[n_hl + 1]

            # Backward propagation
            _gW, _gb = back_propagation.back_propagation(
                W, h, x, y, _y, n_hl, ac, lf)

            if index % batch_size == 0:
                gW = _gW
                gb = _gb
            else:
                gW = np.add(gW, _gW)
                gb = np.add(gb, _gb)

            if (index + 1) % batch_size == 0:
                count += 1
                update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta,
                               n_hl, batch_size, m_W, m_b, v_W, v_b, gW, gb, W,
                               b, ac)
                gW, gb = [], []

        if len(train_x) % batch_size != 0:
            count += 1
            index = batch_size - 1 if len(train_x) < batch_size else -1
            update_adam_Wb(t, index, count, beta1, beta2, epsilon, eta, n_hl,
                           batch_size, m_W, m_b, v_W, v_b, gW, gb, W, b, ac)

        # Logging to WandB
        if lf == "cross_entropy":
            val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, val_x, val_y, n_hl, ac, lf)
            train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, train_x, train_y, n_hl, ac, lf)
            wandb.log({
                "val_accuracy": val_acc,
                "accuracy": train_acc,
                "val_loss": val_loss,
                "loss": train_loss
            })

        t += 1

    return W, b
Example #23
0
def predict(parameters, X):

    A2, cache = forward_propagation(X, parameters)
    predictions = np.around(A2)

    return predictions
print(
    '=============== 4.2 - Initialize the model\'s parameters ===================='
)
n_x, n_h, n_y = initialize_parameters_test_case()

parameters = initialize_parameters(n_x, n_h, n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

print('=============== 4.3 - The Loop ====================')
# forward_propagation
X_assess, parameters = forward_propagation_test_case()
A2, cache = forward_propagation(X_assess, parameters)
print(np.mean(cache['Z1']), np.mean(cache['A1']), np.mean(cache['Z2']),
      np.mean(cache['A2']))

# compute_cost
A2, Y_assess, parameters = compute_cost_test_case()
print("cost = " + str(compute_cost(A2, Y_assess, parameters)))

# backward_propagation
parameters, cache, X_assess, Y_assess = backward_propagation_test_case()
grads = backward_propagation(parameters, cache, X_assess, Y_assess)
print("dW1 = " + str(grads["dW1"]))
print("db1 = " + str(grads["db1"]))
print("dW2 = " + str(grads["dW2"]))
print("db2 = " + str(grads["db2"]))
Example #25
0
def vgd(train_x,
        train_y,
        val_x,
        val_y,
        d,
        hl,
        ol,
        ac,
        lf,
        epochs=100,
        eta=0.1,
        init_strategy="xavier",
        alpha=0):

    print("Function Invoked: vgd")

    # Initialize params
    W, b = init_methods.random_init(
        d, hl, ol) if init_strategy == "random" else init_methods.xavier_init(
            d, hl, ol)

    t, n_hl = 0, len(hl)

    while t < epochs:

        gW, gb = [], []

        for index, (x, y) in enumerate(zip(train_x, train_y)):

            # Forward propagation
            h, a = forward_propagation.forward_propagation(W, b, x, n_hl, ac)

            # Prediction (y hat)
            _y = h[n_hl + 1]

            # Backward propagation
            _gW, _gb = back_propagation.back_propagation(
                W, h, x, y, _y, n_hl, ac, lf)

            if index == 0:
                gW = _gW
                gb = _gb
            else:
                gW = list(np.add(gW, _gW))
                gb = list(np.add(gb, _gb))

        # Update bias
        for index, (_b, _gb) in enumerate(zip(b, gb)):
            b[index] = _b - eta * np.array(_gb)

        # Update weights
        for index, (_W, _gW) in enumerate(zip(W, gW)):
            W[index] = _W - eta * (np.array(_gW) + alpha * _W)

    # Logging to WandB
    if lf == "cross_entropy":
        # val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(W, b, val_x, val_y, n_hl, ac, lf)
        # train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(W, b, train_x, train_y, n_hl, ac, lf)
        # wandb.log( { "val_accuracy": val_acc, "accuracy": train_acc, "val_loss": val_loss, "loss": train_loss } )

        t += 1

    return W, b
Example #26
0
def rmsprop(train_x, train_y, val_x, val_y, d, hl, ol, ac, lf, epochs, eta,
            init_strategy, batch_size):

    # Initialize paramaters
    if init_strategy == "random":
        W, b = init_methods.random_init2(d, hl, ol)
    else:
        W, b = init_methods.xavier_init(d, hl, ol)

    hist_W, hist_b = init_methods.random_init2(d, hl, ol)
    grad_W, grad_b = init_methods.random_init2(d, hl, ol)

    epsilon, beta1 = 1e-8, 0.95
    iteration = 0

    while iteration < epochs:
        num_points_seen = 0

        for loc, (x, y_true) in enumerate(zip(train_x, train_y)):
            num_points_seen += 1
            # Forward propagation
            h, a = forward_propagation.forward_propagation(
                W, b, x, len(hl), ac)

            # Prediction (y hat) . It will be the last element(np array) of the h list
            y_pred = h[len(hl) + 1]

            # Backward propagation
            grad_W_element, grad_b_element = back_propagation.back_propagation(
                W, h, x, y_true, y_pred, len(hl), ac, lf)

            if loc == 0 or num_points_seen == 1:
                for i in range(len(grad_W)):
                    grad_W[i] = grad_W_element[i]
                    grad_b[i] = grad_b_element[i]
            else:
                for i in range(len(grad_W)):
                    grad_W[i] += grad_W_element[i]
                    grad_b[i] += grad_b_element[i]

            if num_points_seen == batch_size or loc == len(train_x) - 1:
                num_points_seen = 0

                if iteration == 0:
                    for i in range(1, len(W)):
                        hist_W[i] = (1 - beta1) * np.square(grad_W[i])
                        hist_b[i] = (1 - beta1) * np.square(grad_b[i])
                        W[i] = W[i] - (
                            eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i]
                        b[i] = b[i] - (
                            eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i]
                else:
                    for i in range(1, len(W)):
                        hist_W[i] = beta1 * hist_W[i] + (
                            1 - beta1) * np.square(grad_W[i])
                        hist_b[i] = beta1 * hist_b[i] + (
                            1 - beta1) * np.square(grad_b[i])
                        W[i] = W[i] - (
                            eta / np.sqrt(hist_W[i] + epsilon)) * grad_W[i]
                        b[i] = b[i] - (
                            eta / np.sqrt(hist_b[i] + epsilon)) * grad_b[i]

                grad_W, grad_b = init_methods.random_init2(d, hl, ol)

        if lf == "cross_entropy":
            train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, train_x, train_y, len(hl), ac, lf)
            val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, val_x, val_y, len(hl), ac, lf)
            wandb.log({
                "val_accuracy": val_acc,
                "accuracy": train_acc,
                "val_loss": val_loss,
                "loss": train_loss
            })

        # print("\n\niteration number ",iteration," Training  Accuracy: ", train_acc, " Training Loss: ", train_loss)
        # print("\n\niteration number ",iteration," validation  Accuracy: ", val_acc, " validation Loss: ", val_loss)

        iteration += 1
    return W, b
Example #27
0
    Arguments:
    Z3 -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples)
    Y -- "true" labels vector placeholder, same shape as Z3

    Returns:
    cost - Tensor of the cost function
    """

    entropy = tf.nn.softmax_cross_entropy_with_logits(logits=Z3, labels=Y)
    cost = tf.reduce_mean(entropy)

    return cost


if __name__ == '__main__':
    tf.reset_default_graph()

    with tf.Session() as sess:
        np.random.seed(1)
        X, Y = create_placeholders(64, 64, 3, 6)
        parameters = initialize_parameters()
        Z3 = forward_propagation(X, parameters)
        cost = compute_cost(Z3, Y)
        init = tf.global_variables_initializer()
        sess.run(init)
        a = sess.run(cost, {
            X: np.random.randn(4, 64, 64, 3),
            Y: np.random.randn(4, 6)
        })
        print("cost = " + str(a))
Example #28
0
def train(mnist):
    #step 1.2------define placeholder of input datas
    input_data_x = tf.placeholder(tf.float32,
                                  [None, forward_propagation.INPUT_NODE],
                                  name="input_data_x")
    input_data_y = tf.placeholder(tf.float32,
                                  [None, forward_propagation.OUTPUT_NODE],
                                  name="input_data_y")

    #step 3------calculate forward propagation
    REGULARIZATION_RATE = 0.0001
    regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
    y = forward_propagation.forward_propagation(input_data_x, True,
                                                regularizer)

    #step 4------define loss
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels=tf.argmax(input_data_y, 1), logits=y)
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + tf.add_n(tf.get_collection("losses"))

    #step 5.1------optimization & train(moving average)
    global_steps = tf.Variable(0, trainable=False)
    variable_averages = tf.train.ExponentialMovingAverage(
        MOVING_AVERAGE_DECAY, global_steps)
    moving_operation = variable_averages.apply(tf.trainable_variables())

    #step 5.2------optimization & train(learning_rate)
    LEARNING_RATE_BASE = 0.1
    LEARNING_RATE_DECAY = 0.99
    BATCH_SIZE = 100
    learning_rate = tf.train.exponential_decay(
        LEARNING_RATE_BASE, global_steps,
        mnist.train.num_examples / BATCH_SIZE, LEARNING_RATE_DECAY)

    #step 5.3------train
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(
        loss, global_step=global_steps)

    #step 6------define dependency
    with tf.control_dependencies([train_step, moving_operation]):
        train_operation = tf.no_op(name="train")

    for variables in tf.global_variables():
        print(variables)

    #step 7------define a object to save model
    saver = tf.train.Saver(max_to_keep=0)

    #step 8------execution
    TRAINING_STEP = 5000
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for steps in range(1, TRAINING_STEP):

            input_data, output_data = mnist.train.next_batch(BATCH_SIZE)

            _, loss_value = sess.run([train_operation, loss],
                                     feed_dict={
                                         input_data_x: input_data,
                                         input_data_y: output_data
                                     })
            if steps % 250 == 1:
                print("After %d steps, loss on training batch is %g" %
                      (steps, loss_value))
        saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME))
Example #29
0
def mgd(train_x,
        train_y,
        val_x,
        val_y,
        d,
        hl,
        ol,
        ac,
        lf,
        epochs,
        eta,
        init_strategy,
        batch_size,
        alpha=0):

    # Initialize paramaters
    if init_strategy == "random":
        W, b = init_methods.random_init2(d, hl, ol)
    else:
        W, b = init_methods.xavier_init(d, hl, ol)

    gamma = 0.9
    grad_W, grad_b = init_methods.random_init2(d, hl, ol)
    prev_W, prev_b = init_methods.random_init2(d, hl, ol)
    iteration = 0

    while iteration < epochs:
        num_points_seen = 0
        for loc, (x, y_true) in enumerate(zip(train_x, train_y)):
            num_points_seen += 1

            #Forward Propagation
            h, a = forward_propagation.forward_propagation(
                W, b, x, len(hl), ac)

            # Prediction (y hat) .It will be the last element(np array) of the h list
            y_pred = h[len(hl) + 1]

            # Backward Propagation
            grad_W_element, grad_b_element = back_propagation.back_propagation(
                W, h, x, y_true, y_pred, len(hl), ac, lf)

            if loc == 0 or num_points_seen == 1:
                for i in range(len(grad_W)):
                    grad_W[i] = grad_W_element[i]
                    grad_b[i] = grad_b_element[i]
            else:
                for i in range(len(grad_W)):
                    grad_W[i] += grad_W_element[i]
                    grad_b[i] += grad_b_element[i]

            if num_points_seen == batch_size or loc == len(train_x) - 1:
                num_points_seen = 0
                # Updating of prev_W,prev_b, W and b
                if iteration == 0:
                    for i in range(1, len(W)):
                        W[i] = W[i] - eta * grad_W[i] - eta * alpha * W[i]
                        b[i] = b[i] - eta * grad_b[i]
                        prev_W[i] = eta * grad_W[i] + eta * alpha * W[i]
                        prev_b[i] = eta * grad_b[i]
                else:
                    for i in range(1, len(W)):
                        prev_W[i] = np.multiply(
                            gamma,
                            prev_W[i]) + eta * grad_W[i] + eta * alpha * W[i]
                        prev_b[i] = np.multiply(gamma,
                                                prev_b[i]) + eta * grad_b[i]

                        W[i] = W[i] - prev_W[i]
                        b[i] = b[i] - prev_b[i]

                grad_W, grad_b = init_methods.random_init2(d, hl, ol)

        if lf == "cross_entropy":
            train_acc, train_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, train_x, train_y, len(hl), ac, lf)
            val_acc, val_loss = accuracy_loss.get_accuracy_and_loss(
                W, b, val_x, val_y, len(hl), ac, lf)
            wandb.log({
                "val_accuracy": val_acc,
                "accuracy": train_acc,
                "val_loss": val_loss,
                "loss": train_loss
            })

        # print("\n\niteration number ",iteration," Training  Accuracy: ", train_acc, " Training Loss: ", train_loss)
        # print("\n\niteration number ",iteration," validation  Accuracy: ", val_acc, " validation Loss: ", val_loss)

        iteration += 1
    return W, b
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.009,
          num_epochs = 100, minibatch_size = 64, print_cost = True):
    """
    Implements a three-layer ConvNet in Tensorflow:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED
    
    Arguments:
    X_train -- training set, of shape (None, 64, 64, 3)
    Y_train -- test set, of shape (None, n_y = 6)
    X_test -- training set, of shape (None, 64, 64, 3)
    Y_test -- test set, of shape (None, n_y = 6)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    train_accuracy -- real number, accuracy on the train set (X_train)
    test_accuracy -- real number, testing accuracy on the test set (X_test)
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)                             # to keep results consistent (tensorflow seed)
    seed = 3                                          # to keep results consistent (numpy seed)
    (m, n_H0, n_W0, n_C0) = X_train.shape             
    n_y = Y_train.shape[1]                            
    costs = []                                        # To keep track of the cost
    
    # Create Placeholders of the correct shape
    X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)

    # Initialize parameters
    parameters = initialize_parameters()
    
    # Forward propagation: Build the forward propagation in the tensorflow graph
    Z3 = forward_propagation(X, parameters)
    
    # Cost function: Add cost function to tensorflow graph
    cost = compute_cost(Z3, Y)
    
    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
    optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)
    
    # Initialize all the variables globally
    init = tf.global_variables_initializer()
     
    # Start the session to compute the tensorflow graph
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            minibatch_cost = 0.
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set
            seed = seed + 1
            minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch
                """
                # IMPORTANT: The line that runs the graph on a minibatch.
                # Run the session to execute the optimizer and the cost.
                # The feedict should contain a minibatch for (X,Y).
                """
                _ , temp_cost = sess.run(fetches=[optimizer,cost],
                                        feed_dict={X: minibatch_X, Y: minibatch_Y})
                
                minibatch_cost += temp_cost / num_minibatches
                

            # Print the cost every epoch
            if print_cost == True and epoch % 5 == 0:
                print ("Cost after epoch %i: %f" % (epoch, minibatch_cost))
            if print_cost == True and epoch % 1 == 0:
                costs.append(minibatch_cost)
        
        
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # Calculate the correct predictions
        predict_op = tf.argmax(Z3, 1)
        correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        print(accuracy)
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy:", train_accuracy)
        print("Test Accuracy:", test_accuracy)
                
        return train_accuracy, test_accuracy, parameters