def model_using_sgd(X, Y, layers_dims, learning_rate=0.01, initialization='random', _lambda=0, keep_prob=1, init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000, seed=None): L = len(layers_dims) - 1 # number of layers m = X.shape[1] # number of training examples # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Gradient Descent for i in range(num_of_iterations): for j in range(m): # Forward propagation if keep_prob == 1: AL, caches = forward_propagation(X[:, j], parameters, L) elif keep_prob < 1: AL, caches = forward_propagation_with_dropout( X[:, j], parameters, L, keep_prob) # Compute cost if _lambda == 0: cost = compute_cost(AL, Y[:, j]) else: cost = compute_cost_with_regularization( AL, Y[:, j], parameters, _lambda, L) # Backward propagation if _lambda == 0 and keep_prob == 1: grads = backward_propagation(AL, Y[:, j], caches) elif _lambda != 0: grads = backward_propagation_with_regularization( AL, Y[:, j], caches, _lambda) elif keep_prob < 1: grads = backward_propagation_with_dropout( AL, Y[:, j], caches, keep_prob) # Updating parameters parameters = update_parameters_using_gd(parameters, grads, learning_rate, L) # Priniting cost after given iterations if print_cost and i % print_cost_after == 0: print("Cost after iteration %i: %f" % (i, cost)) # Gradient checking gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda) return parameters
def model_with_regularization(X_train, Y_train, X_test, Y_test, layers_dims, learning_rate=0.0075, num_iterations=3000, lambd=0.7, print_cost=False): costs = [] # keep track of cost # Parameters initialization. parameters = initialize_parameters_random(layers_dims) W1 = parameters["W1"] W2 = parameters["W2"] # Loop (gradient descent) for i in range(0, num_iterations): a3, cache = forward_propagation(X_train, parameters) # Compute cost. cost = compute_cost_with_regularization(a3, Y_train, parameters, lambd) grads = backward_propagation_with_regularization( X_train, Y_train, cache, lambd) # Update parameters. parameters = update_parameters(parameters, grads, learning_rate) # Print the cost every 100 training example if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) Y_prediction_train = predict(X_train, Y_train, parameters) Y_prediction_test = predict(X_test, Y_test, parameters) d = { "Y_prediction_test": Y_prediction_test, "Y_prediction_train": Y_prediction_train, "parameters": parameters, "learning_rate": learning_rate, "num_iterations": num_iterations, } return d
def model(X, Y, layers_dims, learning_rate=0.01, optimizer='adam', beta=0.9, beta1=0.9, beta2=0.999, epsilon=1e-8, mini_batch_size=64, initialization='random', _lambda=0, keep_prob=1, init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000): L = len(layers_dims) - 1 # number of layers costs = [] # to keep track of total cost seed = 10 # For grading purposes, so that your "random" minibatches are the same as ours t = 0 # initializing the counter required for Adam update m = X.shape[1] # number of training example # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Initialize the optimizer if optimizer == 'gd': pass # no initialization required for gradient descent elif optimizer == 'momentum': v = initialize_velocity(parameters, L) elif optimizer == 'adam': v, s = initialize_adam(parameters, L) # Optimization loop for i in range(num_of_iterations): # Define the random minibatches. We increment the seed to reshuffle differently the dataset after each epoch seed = seed + 1 mini_batches = random_mini_batches(X, Y, mini_batch_size, seed) cost_total = 0 for mini_batch in mini_batches: # Select a minibatch (minibatch_X, minibatch_Y) = mini_batch # Forward propagation if keep_prob == 1: AL, caches = forward_propagation(minibatch_X, parameters, L) elif keep_prob < 1: AL, caches = forward_propagation_with_dropout( minibatch_X, parameters, L, keep_prob) # Compute cost and add to the total cost if _lambda == 0: cost_total += compute_cost(AL, minibatch_Y) else: cost_total += compute_cost_with_regularization( AL, minibatch_Y, parameters, _lambda, L) # Backward propagation if _lambda == 0 and keep_prob == 1: grads = backward_propagation(AL, minibatch_Y, caches) elif _lambda != 0: grads = backward_propagation_with_regularization( AL, minibatch_Y, caches, _lambda) elif keep_prob < 1: grads = backward_propagation_with_dropout( AL, minibatch_Y, caches, keep_prob) # Update parameters if optimizer == 'gd': parameters = update_parameters_using_gd( parameters, grads, learning_rate, L) elif optimizer == 'momentum': parameters, v = update_parameters_using_momentum( parameters, grads, v, beta, learning_rate, L) elif optimizer == 'adam': t += 1 # adam counter parameters, v, s = update_parameters_using_adam( parameters, grads, v, s, t, learning_rate, L, beta1, beta2, epsilon) cost_avg = cost_total / m # Print the cost every given epoch if print_cost and i % print_cost_after == 0: print("Cost after epoch %i: %f" % (i, cost_avg)) if print_cost and i % 100 == 0: costs.append(cost_avg) # Gradient checking gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda) return parameters