Exemple #1
0
def model_using_sgd(X,
                    Y,
                    layers_dims,
                    learning_rate=0.01,
                    initialization='random',
                    _lambda=0,
                    keep_prob=1,
                    init_const=0.01,
                    num_of_iterations=10000,
                    print_cost=True,
                    print_cost_after=1000,
                    seed=None):
    L = len(layers_dims) - 1  # number of layers
    m = X.shape[1]  # number of training examples

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Gradient Descent
    for i in range(num_of_iterations):
        for j in range(m):
            # Forward propagation
            if keep_prob == 1:
                AL, caches = forward_propagation(X[:, j], parameters, L)
            elif keep_prob < 1:
                AL, caches = forward_propagation_with_dropout(
                    X[:, j], parameters, L, keep_prob)

            # Compute cost
            if _lambda == 0:
                cost = compute_cost(AL, Y[:, j])
            else:
                cost = compute_cost_with_regularization(
                    AL, Y[:, j], parameters, _lambda, L)

            # Backward propagation
            if _lambda == 0 and keep_prob == 1:
                grads = backward_propagation(AL, Y[:, j], caches)
            elif _lambda != 0:
                grads = backward_propagation_with_regularization(
                    AL, Y[:, j], caches, _lambda)
            elif keep_prob < 1:
                grads = backward_propagation_with_dropout(
                    AL, Y[:, j], caches, keep_prob)

            # Updating parameters
            parameters = update_parameters_using_gd(parameters, grads,
                                                    learning_rate, L)

        # Priniting cost after given iterations
        if print_cost and i % print_cost_after == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    # Gradient checking
    gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda)

    return parameters
Exemple #2
0
def optimize_with_adam(hyperparameters, X, Y, parameters, print_cost):

    (
        layer_dims,
        learning_rate,
        num_epochs,
        mini_batch_size,
        lambd,
        beta1,
        beta2,
        epsilon,
    ) = hyperparameters

    L = len(layer_dims) - 1
    m = X.shape[1]
    costs = []
    v, s = initialize_adam(L, parameters)

    for i in range(num_epochs):

        seed = np.random.seed(int(time()))
        minibatches = random_mini_batches(X, Y, mini_batch_size, seed)
        cost_total = 0

        for minibatch in minibatches:

            (minibatch_X, minibatch_Y) = minibatch
            # Forward propagation
            AL, caches = L_model_forward(L, minibatch_X, parameters)
            # Compute cost
            cost_total += compute_cost_with_regularization(
                layer_dims, AL, minibatch_Y, parameters, lambd)

            # Backward propagation
            grads = L_model_backward_with_regularization(
                AL, minibatch_Y, caches, lambd)

            # Update parameters
            parameters, v, s = update_parameters_with_adam(
                parameters,
                grads,
                v,
                s,
                i,
                learning_rate,
            )

        cost_avg = cost_total / (int(m / mini_batch_size) +
                                 int(m % mini_batch_size != 0))
        if i % 100 == 0 and print_cost:
            print(f"Cost after Epoch {i} : {cost_avg}")
            costs.append(cost_avg)

    return parameters, grads, costs
Exemple #3
0
def model_with_regularization(X_train,
                              Y_train,
                              X_test,
                              Y_test,
                              layers_dims,
                              learning_rate=0.0075,
                              num_iterations=3000,
                              lambd=0.7,
                              print_cost=False):
    costs = []  # keep track of cost
    # Parameters initialization.
    parameters = initialize_parameters_random(layers_dims)

    W1 = parameters["W1"]
    W2 = parameters["W2"]
    # Loop (gradient descent)
    for i in range(0, num_iterations):
        a3, cache = forward_propagation(X_train, parameters)
        # Compute cost.
        cost = compute_cost_with_regularization(a3, Y_train, parameters, lambd)

        grads = backward_propagation_with_regularization(
            X_train, Y_train, cache, lambd)
        # Update parameters.
        parameters = update_parameters(parameters, grads, learning_rate)
        # Print the cost every 100 training example
        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)

    Y_prediction_train = predict(X_train, Y_train, parameters)
    Y_prediction_test = predict(X_test, Y_test, parameters)
    d = {
        "Y_prediction_test": Y_prediction_test,
        "Y_prediction_train": Y_prediction_train,
        "parameters": parameters,
        "learning_rate": learning_rate,
        "num_iterations": num_iterations,
    }
    return d
def gradient_checking(parameters,
                      gradients,
                      X,
                      Y,
                      layers_dims,
                      _lambda=0,
                      keep_prob=1,
                      epsilon=1e-7):
    # Set-up variables
    parameters_values = dictionary_to_vector(parameters)
    grad = gradients_to_vector(gradients, len(layers_dims))
    num_of_parameters = parameters_values.shape[0]
    J_plus = np.zeros((num_of_parameters, 1))
    J_minus = np.zeros((num_of_parameters, 1))
    grad_approx = np.zeros((num_of_parameters, 1))
    num_of_layers = len(layers_dims) - 1

    # Compute grad_approx
    for i in range(num_of_parameters):
        # Compute J_plus[i]
        theta_plus = np.copy(parameters_values)
        theta_plus[i][0] = theta_plus[i][0] + epsilon
        if keep_prob == 1:
            AL, _ = forward_propagation(
                X, vector_to_dictionary(theta_plus, layers_dims),
                num_of_layers)
        elif keep_prob < 1:
            AL, _ = forward_propagation_with_dropout(
                X, vector_to_dictionary(theta_plus, layers_dims),
                num_of_layers, keep_prob)
        if _lambda == 0:
            J_plus[i] = compute_cost(AL, Y)
        else:
            J_plus[i] = compute_cost_with_regularization(
                AL, Y, parameters, _lambda, num_of_layers)

        # Compute J_minus[i]
        theta_minus = np.copy(parameters_values)
        theta_minus[i][0] = theta_minus[i][0] - epsilon
        if keep_prob == 1:
            AL, _ = forward_propagation(
                X, vector_to_dictionary(theta_minus, layers_dims),
                num_of_layers)
        elif keep_prob < 1:
            AL, _ = forward_propagation_with_dropout(
                X, vector_to_dictionary(theta_minus, layers_dims),
                num_of_layers, keep_prob)
        if _lambda == 0:
            J_minus[i] = compute_cost(AL, Y)
        else:
            J_minus[i] = compute_cost_with_regularization(
                AL, Y, parameters, _lambda, num_of_layers)

        # Compute grad_approx[i]
        grad_approx[i] = np.divide(J_plus[i] - J_minus[i], 2 * epsilon)

    # Compare gradapprox to backward propagation gradients by computing difference
    numerator = np.linalg.norm(grad - grad_approx)
    denominator = np.linalg.norm(grad) + np.linalg.norm(grad_approx)
    difference = np.divide(numerator, denominator)

    if difference > 2e-7:
        print("\033[93m" +
              "There is a mistake in the backward propagation! difference = " +
              str(difference) + "\033[0m")
    else:
        print("\033[92m" +
              "Your backward propagation works perfectly fine! difference = " +
              str(difference) + "\033[0m")

    return difference
def model(X,
          Y,
          layers_dims,
          learning_rate=0.01,
          optimizer='adam',
          beta=0.9,
          beta1=0.9,
          beta2=0.999,
          epsilon=1e-8,
          mini_batch_size=64,
          initialization='random',
          _lambda=0,
          keep_prob=1,
          init_const=0.01,
          num_of_iterations=10000,
          print_cost=True,
          print_cost_after=1000):
    L = len(layers_dims) - 1  # number of layers
    costs = []  # to keep track of total cost
    seed = 10  # For grading purposes, so that your "random" minibatches are the same as ours
    t = 0  # initializing the counter required for Adam update
    m = X.shape[1]  # number of training example

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Initialize the optimizer
    if optimizer == 'gd':
        pass  # no initialization required for gradient descent
    elif optimizer == 'momentum':
        v = initialize_velocity(parameters, L)
    elif optimizer == 'adam':
        v, s = initialize_adam(parameters, L)

    # Optimization loop
    for i in range(num_of_iterations):
        # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch
        seed = seed + 1
        mini_batches = random_mini_batches(X, Y, mini_batch_size, seed)
        cost_total = 0

        for mini_batch in mini_batches:
            # Select a minibatch
            (minibatch_X, minibatch_Y) = mini_batch

            # Forward propagation
            if keep_prob == 1:
                AL, caches = forward_propagation(minibatch_X, parameters, L)
            elif keep_prob < 1:
                AL, caches = forward_propagation_with_dropout(
                    minibatch_X, parameters, L, keep_prob)

            # Compute cost and add to the total cost
            if _lambda == 0:
                cost_total += compute_cost(AL, minibatch_Y)
            else:
                cost_total += compute_cost_with_regularization(
                    AL, minibatch_Y, parameters, _lambda, L)

            # Backward propagation
            if _lambda == 0 and keep_prob == 1:
                grads = backward_propagation(AL, minibatch_Y, caches)
            elif _lambda != 0:
                grads = backward_propagation_with_regularization(
                    AL, minibatch_Y, caches, _lambda)
            elif keep_prob < 1:
                grads = backward_propagation_with_dropout(
                    AL, minibatch_Y, caches, keep_prob)

            # Update parameters
            if optimizer == 'gd':
                parameters = update_parameters_using_gd(
                    parameters, grads, learning_rate, L)
            elif optimizer == 'momentum':
                parameters, v = update_parameters_using_momentum(
                    parameters, grads, v, beta, learning_rate, L)
            elif optimizer == 'adam':
                t += 1  # adam counter
                parameters, v, s = update_parameters_using_adam(
                    parameters, grads, v, s, t, learning_rate, L, beta1, beta2,
                    epsilon)

            cost_avg = cost_total / m

            # Print the cost every given epoch
            if print_cost and i % print_cost_after == 0:
                print("Cost after epoch %i: %f" % (i, cost_avg))
            if print_cost and i % 100 == 0:
                costs.append(cost_avg)

        # Gradient checking
        gradient_checking(parameters,
                          grads,
                          X,
                          Y,
                          layers_dims,
                          _lambda=_lambda)

        return parameters