예제 #1
0
def gradient_descent(X, y, theta, alpha, num_iters):
    #GRADIENTDESCENT Performs gradient descent to learn theta
    #   theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by
    #   taking num_iters gradient steps with learning rate alpha

    # Initialize some useful values
    m = len(y)
    # number of training examples
    #J_history = np.zeros(num_iters)
    J_history = [compute_cost(X, y, theta)]

    for _ in xrange(num_iters):
        # ====================== YOUR CODE HERE ======================
        # Instructions: Perform a single gradient step on the parameter vector
        #               theta.
        #
        # Hint: While debugging, it can be useful to print out the values
        #       of the cost function (computeCost) and gradient here.
        #
        # ============================================================
        delta = np.dot(X, theta).transpose() - y
        delta = np.multiply(delta.transpose(), X).transpose()
        delta = np.dot(delta, np.ones((m, 1)))  # sum
        theta = theta - alpha * delta / m

        # Save the cost J in every iteration
        J_history.append(compute_cost(X, y, theta))
    return theta, J_history
예제 #2
0
    def model(X_train, Y_train, layers_dims, learning_rate, num_iter, lambd,
              print_cost):

        with tf.device('/device:GPU:0'):

            tf.reset_default_graph(
            )  # to be able to rerun the model without overwriting tf variables
            (
                n_x, m
            ) = X_train.shape  # Number of features and number of training examples
            n_y = Y_train.shape[0]  # Number of classes
            n_hidden_layers = len(layers_dims)  # Number of hidden layers
            costs = []  # Keep track of the cost

            ### Create Placheholders ###
            X, Y = create_placeholders(n_x, n_y)

            ### Initialize Parameters ###
            parameters = init_params(layers_dims)

            ### Foward propagation - Build the forward propagation in the tensorflow graph ###
            ZL = forward_propagation(X, parameters)

            ### Cost - Add cost function to tensorflow graph ###
            cost_function = compute_cost(ZL, Y, parameters, n_hidden_layers,
                                         lambd, m)

            ### Backpropagation - Define the tensorflow optimizer ###
            optimizer = tf.train.AdamOptimizer(
                learning_rate=learning_rate).minimize(cost_function)
            #optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost_function)

            ### Initializer all the variables ###
            init = tf.global_variables_initializer()

            ### Start the session to compute the tensorflow graph ###
            with tf.Session() as sess:

                # Run the initialization
                sess.run(init)

                # Do Training lopp #
                for i in range(num_iter):

                    # Run the session to execute the optimizer and the cost
                    _, cost_value = sess.run([optimizer, cost_function],
                                             feed_dict={
                                                 X: X_train,
                                                 Y: Y_train
                                             })

                    # Print the cost every 1000 iterations
                    #if print_cost == True and i % 1000 == 0:
                    #    print ("Cost after iteration %i: %f" % (i, cost_value))
                    if print_cost == True and i % 1000 == 0:
                        costs.append(cost_value)
                # Save the parameters in a variable
                parameters = sess.run(parameters)

        return parameters, costs
예제 #3
0
def gradient_descent(X, y, theta, alpha, num_iters):
    """
    Performs gradient descent to learn theta.

    Parameters
    ----------
    X : ndarray, shape (n_samples, n_features)
        Training data, where n_samples is the number of samples and n_features is the number of features.
    y : ndarray, shape (n_samples,)
        Labels.
    theta : ndarray, shape (n_features,)
        Initial linear regression parameter.
    alpha : float
        Learning rate.
    num_iters: int
        Number of iteration.

    Returns
    -------
    theta : ndarray, shape (n_features,)
        Linear regression parameter.
    J_history: ndarray, shape (num_iters,)
        Cost history.
    """
    m = len(y)
    J_history = np.zeros(num_iters)

    for i in range(num_iters):
        theta -= alpha / m * ((X.dot(theta) - y).T.dot(X))
        J_history[i] = compute_cost(X, y, theta)

    return theta, J_history
예제 #4
0
def L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=2400, print_cost=False):  # lr was 0.009
    """
    Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID.

    Arguments:
    X -- data, numpy array of shape (number of examples, num_px * num_px * 3)
    Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
    layers_dims -- list containing the input size and each layer size, of length (number of layers + 1).
    learning_rate -- learning rate of the gradient descent update rule
    num_iterations -- number of iterations of the optimization loop
    print_cost -- if True, it prints the cost every 100 steps

    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(1)
    costs = []  # keep track of cost

    # Parameters initialization.
    ### START CODE HERE ###
    parameters = initialize_parameters_deep(layers_dims)
    ### END CODE HERE ###
    # Loop (gradient descent)
    for i in range(0, num_iterations):

        # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
        ### START CODE HERE ### (≈ 1 line of code)
        AL, caches = L_model_forward(X=X, parameters=parameters)
        ### END CODE HERE ###

        # Compute cost.
        ### START CODE HERE ### (≈ 1 line of code)
        cost = compute_cost(AL, Y)
        ### END CODE HERE ###

        # Backward propagation.
        ### START CODE HERE ### (≈ 1 line of code)
        grads = L_model_backward(AL, Y, caches)
        ### END CODE HERE ###

        # Update parameters.
        ### START CODE HERE ### (≈ 1 line of code)
        parameters = update_parameters(parameters, grads, learning_rate)
        ### END CODE HERE ###

        # Print the cost every 100 training example
        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
        if print_cost and i % 100 == 0:
            costs.append(cost)

    # plot the cost
    plt.plot(np.squeeze(costs))
    plt.ylabel('cost')
    plt.xlabel('iterations (per tens)')
    plt.title("Learning rate =" + str(learning_rate))
    plt.show()

    return parameters
예제 #5
0
def train_model(x, y, ax):
    # gradient descent settings
    (_, n) = x.shape
    iters = 1500
    alpha = 0.01
    theta = np.zeros(n)

    # compute and display initial cost
    print('Testing the cost function ...\n')
    j = compute_cost.compute_cost(x, y, theta)

    print('  With theta = [0.0, 0.0]')
    print('  Cost computed = %0.2f' % j)
    print('  Expected cost value (approx) 32.07\n')

    # run gradient descent
    print('Running Gradient Descent ...\n')
    (theta,
     j_history) = gradient_descent.gradient_descent(x, y, theta, alpha, iters)

    print('  Theta found by gradient descent:')
    print('  ', theta)
    print('  Expected theta values (approx):')
    print('  [-3.6303, 1.1664]\n')

    return (alpha, theta, j_history)
예제 #6
0
def gradient_descent(X: np.array, y: np.array, theta: np.array, alpha: float, num_iters: int):
    m = len(y)
    J_history = np.zeros((num_iters, 1))
    for iter in range(num_iters):
        theta = theta - (alpha / m) * X.T.dot((X.dot(theta) - y))
        J_history[iter] = compute_cost(X, y, theta)
    return [theta, J_history]
def gradient_descent_multi(x, y, theta, alpha, n):
    j_history = np.zeros((n, 1))
    am = alpha / len(y)
    for i in range(n):
        theta -= am * np.dot(np.transpose(x), np.dot(x, theta) - y)
        j_history[i] = compute_cost(x, y, theta)
    return theta, j_history
예제 #8
0
def model(X,
          Y,
          layers_dims,
          learning_rate=0.01,
          initialization='random',
          init_const=0.01,
          num_of_iterations=10000,
          print_cost=True,
          print_cost_after=1000,
          seed=None):
    L = len(layers_dims) - 1  # number of layers

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Gradient Descent
    for i in range(num_of_iterations):
        # Forward propagation
        AL, caches = forward_propagation(X, parameters, L)

        # Compute cost
        cost = compute_cost(AL, Y)

        # Backward propagation
        grads = backward_propagation(AL, Y, caches)

        # Updating parameters
        parameters = update_parameters(parameters, grads, learning_rate, L)

        # Priniting cost after given iterations
        if print_cost and i % print_cost_after == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
예제 #9
0
def nn_model(X, Y, n_h, num_iterations=1500, print_cost=False):

    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    for i in range(0, num_iterations):

        A2, cache = forward_propagation(X, parameters)

        cost = compute_cost(A2, Y, parameters)

        grads = backward_propagation(parameters, cache, X, Y)

        parameters = update_parameters(parameters, grads)

        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
        plt.scatter(i + 1, cost)
        plt.title('cost curve')
        plt.xlabel('iteration times')
        plt.ylabel('cost')
    plt.savefig('cost curve.jpg')
    return parameters
예제 #10
0
def gradient_descent(x, y, theta, alpha, num_iters):
    m = len(y)
    j_hist = zeros((num_iters, 1))
    for i in range(num_iters):
        prediction = x.dot(theta) - y
        theta -= (alpha / m) * x.T.dot(prediction)
        j_hist[i] = compute_cost(x, y, theta)
    return [theta, j_hist]
예제 #11
0
def model_using_sgd(X,
                    Y,
                    layers_dims,
                    learning_rate=0.01,
                    initialization='random',
                    _lambda=0,
                    keep_prob=1,
                    init_const=0.01,
                    num_of_iterations=10000,
                    print_cost=True,
                    print_cost_after=1000,
                    seed=None):
    L = len(layers_dims) - 1  # number of layers
    m = X.shape[1]  # number of training examples

    # Initialize parameters
    parameters = initialize_parameters(layers_dims, initialization, init_const,
                                       seed)

    # Gradient Descent
    for i in range(num_of_iterations):
        for j in range(m):
            # Forward propagation
            if keep_prob == 1:
                AL, caches = forward_propagation(X[:, j], parameters, L)
            elif keep_prob < 1:
                AL, caches = forward_propagation_with_dropout(
                    X[:, j], parameters, L, keep_prob)

            # Compute cost
            if _lambda == 0:
                cost = compute_cost(AL, Y[:, j])
            else:
                cost = compute_cost_with_regularization(
                    AL, Y[:, j], parameters, _lambda, L)

            # Backward propagation
            if _lambda == 0 and keep_prob == 1:
                grads = backward_propagation(AL, Y[:, j], caches)
            elif _lambda != 0:
                grads = backward_propagation_with_regularization(
                    AL, Y[:, j], caches, _lambda)
            elif keep_prob < 1:
                grads = backward_propagation_with_dropout(
                    AL, Y[:, j], caches, keep_prob)

            # Updating parameters
            parameters = update_parameters_using_gd(parameters, grads,
                                                    learning_rate, L)

        # Priniting cost after given iterations
        if print_cost and i % print_cost_after == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    # Gradient checking
    gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda)

    return parameters
예제 #12
0
def gradient_descent(X, y, theta, alpha, iterations):
    m = len(y)
    J_history = np.zeros(iterations)

    for i in range(iterations):
        theta -= ((X.dot(theta) - y).T.dot(X)) * alpha / m
        J_history[i] = compute_cost.compute_cost(X, y, theta)

    return theta, J_history
예제 #13
0
def gradient_descent(x, y, learning_step, number_of_iterations):
    #  initial theta vector set to zero
    theta = np.zeros((x.shape[1], 1))
    # initial cost function history vector set to zero
    j_history = np.zeros((number_of_iterations, 1))
    am = learning_step/len(y)
    for i in range(number_of_iterations):
        theta -= am*np.dot(np.transpose(x), (np.dot(x, theta)-y))
        j_history[i] = compute_cost(x, y, theta)
    return theta, j_history
예제 #14
0
def gradient_descent(x, y, size, theta, alpha, iterations):
    """
        Performs gradient descent to optimize the 'theta' parameters. Updates theta for a total of
        inputted 'iterations', with a learning rate 'alpha'.

        Parameters
        ----------
        x : array_like
            Shape (m, n+1), where m is the number of examples, and n is the number of features
            including the vector of ones for the zeroth parameter.

        y : array_like
            Shape (m,), where m is the value of the function at each point.

        size : int
            Number of total training points.

        theta : array_like
            Shape (n+1, 1). Starting parameters of the regression function.

        alpha : float
            The learning rate.

        iterations : int
            The number of iterations for gradient descent.

        Returns
        -------
        theta : array_like
            Shape (n+1, 1). The optimized linear regression parameters.

        cost_history : list
            A list of the values of the cost function after each iteration.
    """

    cost_history = []
    converge = False
    for i in range(iterations):
        temp_cost = compute_cost(x, y, size, theta)
        try:
            if cost_history[-1] - temp_cost <= 0.0001:
                converge = True
        except IndexError:
            pass
        cost_history.append(temp_cost)

        delta = (1 / size) * ((np.dot(theta.T, x)) - y) * x
        delta2 = delta.sum(axis=1, keepdims=True)
        theta = (theta - (alpha * delta2))
    if converge:
        print("The function converged, use less iterations.")
    print(f"The new optimized parameters are: \n{theta}\n")
    return theta, cost_history
def nn_model(X,
             Y,
             n_h,
             num_iterations=10000,
             learning_rate=0.01,
             print_cost=False):
    """
    

    Parameters
    ----------
    X : dataset of shape (2, number of examples)
    Y : labels of shape (1, number of examples)
    n_h : size of the hidden layer
    num_iterations : Number of iterations in gradient descent loop
    print_cost : if True, print the cost every 1000 iterations

    Returns
    -------
    parameters : parameters learnt by the model. They can then be used to predict.

    """

    np.random.seed(3)
    n_x = network_structure(X, Y, n_h)[0]
    n_h = network_structure(X, Y, n_h)[1]
    n_y = network_structure(X, Y, n_h)[2]

    # Initialize parameters
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]

    # Loop (gradient descent)
    for i in range(0, num_iterations):
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)
        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y)
        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)
        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads, learning_rate=0.01)

        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
예제 #16
0
def plot_j_history(x, y, theta, ax3, ax4):
    print('Visualizing J(theta_0, theta_1) ...\n')

    # grid over which we will calculate j_vals
    theta0_vals = np.linspace(-10, 10, 100)
    theta1_vals = np.linspace(-1, 4, 100)

    # calculate j_vals
    j_vals = np.zeros([len(theta0_vals), len(theta1_vals)])
    for i in range(len(theta0_vals)):
        for j in range(len(theta1_vals)):
            t = [theta0_vals[i], theta1_vals[j]]
            j_vals[i, j] = compute_cost.compute_cost(x, y, t)

    # make x, y and z data objects
    axis_z = np.transpose(j_vals)
    axis_x, axis_y = np.meshgrid(theta0_vals, theta1_vals)

    # plot a new 3d surface figure
    surf = ax3.plot_surface(axis_x,
                            axis_y,
                            axis_z,
                            rstride=1,
                            cstride=1,
                            cmap=cm.coolwarm,
                            linewidth=0,
                            antialiased=False)
    ax3.get_figure().colorbar(surf, shrink=0.5, aspect=10)
    ax3.set_title('Surface')

    ax3.set_xlabel('$\\theta_0$')
    ax3.set_ylabel('$\\theta_1$')

    ax3.set_xticks(range(-10, 11, 5))
    ax3.set_yticks(range(-1, 5, 1))

    plt.show()

    # plot the corresponding contour figure
    cs = ax4.contour(axis_x, axis_y, np.log10(axis_z))
    ax4.plot(theta[0], theta[1], color='r', marker='x', linewidth=0.5)
    ax4.set_title('Contour, showing minimum')

    ax4.set_xlabel('$\\theta_0$')
    ax4.set_ylabel('$\\theta_1$')

    ax4.set_xticks(range(-10, 11, 2))
    ax4.set_yticks(np.linspace(-1, 4, 11))

    plt.show()
예제 #17
0
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False):
    """
    Arguments:
    X -- dataset of shape (2, number of examples)
    Y -- labels of shape (1, number of examples)
    n_h -- size of the hidden layer
    num_iterations -- Number of iterations in gradient descent loop
    print_cost -- if True, print the cost every 1000 iterations
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]

    # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters".
    ### START CODE HERE ### (≈ 5 lines of code)
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    ### END CODE HERE ###

    # Loop (gradient descent)

    for i in range(0, num_iterations):

        ### START CODE HERE ### (≈ 4 lines of code)
        # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache".
        A2, cache = forward_propagation(X, parameters)

        # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost".
        cost = compute_cost(A2, Y, parameters)

        # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads".
        grads = backward_propagation(parameters, cache, X, Y)

        # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters".
        parameters = update_parameters(parameters, grads)

        ### END CODE HERE ###

        # Print the cost every 1000 iterations
        if print_cost and i % 1000 == 0:
            print("Cost after iteration %i: %f" % (i, cost))

    return parameters
def compute_cost_with_regularization(AL, Y, parameters, _lambda,
                                     num_of_layers):
    m = Y.shape[1]  # number of examples

    # Compute sum of squares of parameters
    W = 0
    for i in range(1, num_of_layers + 1):
        W += np.sum(np.square(parameters[f'W{i}']))

    # Regularization parameters
    L2_regularization_cost = (1 / m) * (_lambda / 2) * W

    # Cross entropy cost
    cross_entropy_cost = compute_cost(AL, Y)

    cost = cross_entropy_cost + L2_regularization_cost
    return cost
예제 #19
0
def plot_figures(x, y, theta):
    # Make data.
    theta0_values = np.linspace(-10, 10, 100)
    theta1_values = np.linspace(-1, 4, 100)
    j_values = np.zeros((len(theta0_values), len(theta1_values)))

    # Fill out J_values
    for i in range(1, len(theta0_values)):
        for j in range(1, len(theta1_values)):
            t = np.transpose(np.matrix([theta0_values[i], theta1_values[j]]))
            j_values[i, j] = compute_cost(x, y, t)

    x_plot, y_plot = np.meshgrid(theta0_values, theta1_values, indexing='ij')

    plt.figure()
    cs = plt.contour(x_plot, y_plot, j_values, np.logspace(-2, 3, 20))
    plt.plot(theta[0], theta[1], 'rx')
    plt.xlabel(r'$\theta0$')
    plt.ylabel(r'$\theta1$')
    plt.clabel(cs, inline=1, fontsize=8)
    plt.title('Contour plot for cost function J()\n')
    plt.show()

    fig = plt.figure(figsize=plt.figaspect(0.5))

    ax = fig.add_subplot(121, projection='3d')
    ax.plot_surface(x_plot, y_plot, j_values, cmap='bwr')
    ax.set_xlabel(r'$\theta0$')
    ax.set_ylabel(r'$\theta1$')
    ax.set_zlabel('Cost function')

    # Customize the z axis.
    # ax.set_zlim(0, 700)
    # ax.zaxis.set_major_locator(LinearLocator(10))
    # ax.zaxis.set_major_formatter(FormatStrFormatter('%3.0f'))

    # Add a color bar which maps values to colors.
    #fig.colorbar(surf, shrink=0.5, aspect=5)

    plt.show()
    return
예제 #20
0
def gradient_descent(x, y, theta, alpha, iters):
    temp_theta = theta
    j_history = np.zeros(iters)

    # ====================== YOUR CODE HERE ======================

    (m, n) = x.shape
    for iter in range(iters):
        for j in range(n):
            sum_j = 0
            for i in range(m):
                h = np.dot(x.iloc[i], theta)
                sum_j = sum_j + (h - y.iloc[i]) * x.iloc[i,j]

            temp_theta[j] = theta[j] - alpha * sum_j / m

        # update theta values simultaneously
        theta = temp_theta
        j_history[iter] = compute_cost.compute_cost(x, y, theta)

    # ============================================================

    return (theta, j_history)
parameters = initialize_parameters(n_x, n_h, n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

print('=============== 4.3 - The Loop ====================')
# forward_propagation
X_assess, parameters = forward_propagation_test_case()
A2, cache = forward_propagation(X_assess, parameters)
print(np.mean(cache['Z1']), np.mean(cache['A1']), np.mean(cache['Z2']),
      np.mean(cache['A2']))

# compute_cost
A2, Y_assess, parameters = compute_cost_test_case()
print("cost = " + str(compute_cost(A2, Y_assess, parameters)))

# backward_propagation
parameters, cache, X_assess, Y_assess = backward_propagation_test_case()
grads = backward_propagation(parameters, cache, X_assess, Y_assess)
print("dW1 = " + str(grads["dW1"]))
print("db1 = " + str(grads["db1"]))
print("dW2 = " + str(grads["dW2"]))
print("db2 = " + str(grads["db2"]))

# update_parameters
parameters, grads = update_parameters_test_case()
parameters = update_parameters(parameters, grads)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
X = data[:, 0].reshape(-1, 1)
y = data[:, 1].reshape(-1, 1)

m = len(y)

plot_data(X, y, 'x')

X = np.c_[np.ones((m, 1)), data[:, 0]]
theta = np.zeros((2, 1))

iterations = 1500
alpha = 0.01

print('\nTesting the cost function ...\n')

J = compute_cost(X, y, theta)

print('With theta = [0 ; 0]\nCost computed = %f\n' % J)
print('Expected cost value (approx) 32.07\n')

J = compute_cost(X, y, np.mat('-1 ; 2'))

print('With theta = [-1 ; 2]\nCost computed = %f\n' % J)
print('Expected cost value (approx) 54.24\n')

[theta, J_history] = gradient_descent(X, y, theta, alpha, iterations)

print('Theta found by gradient descent:\n')
print('%s\n' % theta)
print('Expected theta values (approx)\n')
print(' -3.6303\n  1.1664\n\n')
예제 #23
0
    # Set train parameters.
    # lambdav = 0.00001
    lambdav = 0
    # alpha = 0.0000001
    # iterations = 1000000
    alpha = 0.1
    iterations = 1200

    # print "Solving normal equation."
    theta = solve_normal_equation(music_train.X, music_train.y, lambdav)

    print "Solving using gradient descent."
    # theta = gradient_descent(music_train.X, music_train.y, None, alpha, lambdav, iterations)
    #theta, J_history = gradient_descent_with_J_history(music_train.X, music_train.y, None, alpha, lambdav, iterations)
    #plot_history(J_history)

    print "Computing cost."
    print compute_cost(music_train.X, music_train.y, theta, lambdav)
    print compute_cost(music_validation.X, music_validation.y, theta, lambdav)
    print compute_cost(music_test.X, music_test.y, theta, lambdav)

    for delta_year in range(10):
        print delta_year

        print "Computing train accuracy."
        print compute_accuracy(music_train.X, music_train.y, theta, delta_year)
        print compute_accuracy(music_validation.X, music_validation.y, theta,
                               delta_year)
        print compute_accuracy(music_test.X, music_test.y, theta, delta_year)
예제 #24
0
plt.show()

# =================== Part 3: Gradient descent ===================
print 'Running Gradient Descent...'
# Add a column of ones to x
X = np.hstack((np.ones((m, 1)), X.reshape(m, 1)))

# Initialize fitting parameters
theta = np.zeros(2)

# Some gradient descent settings
iterations = 1500
alpha = 0.01

# Compute and display initial cost
cost = compute_cost(X, y, theta)
print cost

# Run gradient descent
theta, _ = gradient_descent(X, y, theta, alpha, iterations)

# Print theta to screen
print "Theta found by gradient descent:", theta

plt.figure()
plot_data(X[:, 1], y)
plt.plot(X[:, 1], X.dot(theta), label='Linear Regression')
plt.legend(loc='upper left', numpoints=1)
plt.show()

# Predict values for population sizes of 35,000 and 70,000
예제 #25
0
                                                       W,
                                                       b,
                                                       activation="relu")
print("With ReLU: A = " + str(A))

#L-Layer Model
X, parameters = L_model_forward_test_case()
AL, caches = L_model_forward(X, parameters)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))

#Cost function

Y, AL = compute_cost_test_case()

print("cost = " + str(compute_cost(AL, Y)))

#Linear backward
# Set up some test inputs
dZ, linear_cache = linear_backward_test_case()

dA_prev, dW, db = linear_backward(dZ, linear_cache)
print("dA_prev = " + str(dA_prev))
print("dW = " + str(dW))
print("db = " + str(db))

#Linear-Activation backward
AL, linear_activation_cache = linear_activation_backward_test_case()

dA_prev, dW, db = linear_activation_backward(AL,
                                             linear_activation_cache,
    # Set train parameters.
    lambdav = 0.0000000001
    n = len(music_train.X[0])
    
    print "Solving normal equation."
    
    # Get thetas to reduce data.
    theta = solve_normal_equation(music_train.X, music_train.y, lambdav)
    ordered_theta = np.argsort(np.abs(theta).reshape(len(theta)))
    ordered_theta = ordered_theta[::-1]
    
    # Initialize costs.
    J_history_train = np.zeros(n)
    J_history_validation = np.zeros(n)
    
    for iteration in range(n):
        theta = solve_normal_equation(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, lambdav)
        J_history_train[iteration] = compute_cost(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, theta, 0)
        J_history_validation[iteration] = compute_cost(music_validation.X[:, ordered_theta[:(n - iteration)]], music_validation.y, theta, 0)
        
        print "Theta size: " + str(n - iteration)
        print "J_train: %f" % J_history_train[iteration]
        print "J_validation: %f" % J_history_validation[iteration]
        print "Accuracy: %f" % compute_accuracy(music_test.X[:, ordered_theta[:(n - iteration)]], music_test.y, theta, 9)

        ordered_theta = np.argsort(np.abs(theta).reshape(len(theta)))
        ordered_theta = ordered_theta[::-1]
    
    plot_history_train_validation(J_history_train, J_history_validation)
    plot_history(J_history_train -J_history_validation)
예제 #27
0
X, mu, sigma = feature_normalize(X_Original)

plt.show()

X = add_x0(X)
m = X.shape[0]
n = X.shape[1]
learning_rate = .3
theta = np.zeros((n, 1))
max_iter = 800

his = np.zeros((max_iter, 1))

for i in range(max_iter):

    cost = compute_cost(X, y, theta)
    grad = gradient_descent(X, y, theta, learning_rate, m)
    theta = theta - grad

    his[i, :] = cost

    if i % 100 == 99:
        print("iterate number: " + str(i + 1) + " -- cost: " + str(cost))

plt.plot(his, label='cost')

plt.ylabel('cost')
plt.xlabel('step')
plt.title("logistic regression'")

plt.legend(loc='upper center', shadow=True)
X_dat = np.hstack([ones, X_dat])

X = np.linspace(-5, 1, 30)
Y = np.linspace(-1, 2, 30)

X, Y = np.meshgrid(X,Y)

Z = []
Z_flat = []

for i in range(0, len(X)):
  Z.append([])
  Z_flat.append([])
  for j in range(0, len(Y)):
    Z[i].append(compute_cost(X_dat, y_dat, np.matrix( [X[i][j], Y[i][j]] ).T ))
    Z_flat[i].append(4.48339)

fig = plt.figure()
ax = fig.add_subplot(1, 1, 1, projection='3d')

ax.plot_wireframe(X, Y, Z)
ax.plot_surface(X, Y, Z_flat)

ax.scatter(0, 0, compute_cost(X_dat, y_dat, np.matrix( [0, 0] ).T), c='g')
ax.scatter(-3.89530051, 1.19298539, 4.48339, c='r')

ax.set_xlabel("Theta_0")
ax.set_ylabel("Theta_1")
ax.set_zlabel("Cost")
예제 #29
0
def model(X_train,
          Y_train,
          X_test,
          Y_test,
          learning_rate=0.009,
          num_epochs=100,
          minibatch_size=64,
          print_cost=True,
          operation='save',
          predict=None):
    """
    Implements a three-layer ConvNet in Tensorflow:
    CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED

    Arguments:
    X_train -- training set, of shape (None, 64, 64, 3)
    Y_train -- test set, of shape (None, n_y = 6)
    X_test -- training set, of shape (None, 64, 64, 3)
    Y_test -- test set, of shape (None, n_y = 6)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs

    Returns:
    train_accuracy -- real number, accuracy on the train set (X_train)
    test_accuracy -- real number, testing accuracy on the test set (X_test)
    parameters -- parameters learnt by the model. They can then be used to predict.
    """

    ops.reset_default_graph(
    )  # to be able to rerun the model without overwriting tf variables
    tf.set_random_seed(1)  # to keep results consistent (tensorflow seed)
    seed = 3  # to keep results consistent (numpy seed)
    (m, n_H0, n_W0, n_C0) = X_train.shape
    n_y = Y_train.shape[1]
    costs = []  # To keep track of the cost

    X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)

    parameters = initialize_parameters()

    Z3 = forward_propagation(X, parameters)

    cost = compute_cost(Z3, Y)

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    init = tf.global_variables_initializer()

    saver = tf.train.Saver()

    with tf.Session() as sess:

        if operation == 'save':
            sess.run(init)
            for epoch in range(num_epochs):

                minibatch_cost = 0.
                num_minibatches = int(
                    m / minibatch_size
                )  # number of minibatches of size minibatch_size in the train set
                seed = seed + 1
                minibatches = random_mini_batches(X_train, Y_train,
                                                  minibatch_size, seed)

                for minibatch in minibatches:
                    (minibatch_X, minibatch_Y) = minibatch

                    _, temp_cost = sess.run([optimizer, cost],
                                            feed_dict={
                                                X: minibatch_X,
                                                Y: minibatch_Y
                                            })

                    minibatch_cost += temp_cost / num_minibatches

                if print_cost == True and epoch % 5 == 0:
                    print("Cost after epoch %i: %f" % (epoch, minibatch_cost))
                if print_cost == True and epoch % 1 == 0:
                    costs.append(minibatch_cost)

            save_path = saver.save(sess, "model.ckpt")
            print("Model saved in path: %s" % save_path)

            predict_op = tf.argmax(Z3, 1)
            correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1))

            accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
            print(accuracy)
            train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
            test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
            print("Train Accuracy:", train_accuracy)
            print("Test Accuracy:", test_accuracy)

        elif operation == 'restore':
            saver.restore(sess, "model.ckpt")
            predict_op = tf.argmax(Z3, 1)
            result = predict_op.eval({X: predict})
            print result
# Linear Regression: Company Profit per City Population

# Load File and Set Initial Parameters

filename = 'city_profit.txt'
x, y = load_data(filename)
size = y.size
theta = np.array([[0.0], [0.0]])
alpha = 0.01
iterations = 1500
population = 175000

# Cost Function

cost = compute_cost(x, y, size, theta=theta)
print(f"With given theta: \n\tCost computed = {cost}\n")

# Gradient Descent

new_theta, cost_history = gradient_descent(x,
                                           y,
                                           size,
                                           theta=theta,
                                           alpha=alpha,
                                           iterations=iterations)

# Plot Data and Regression Line

plot_data(x, y, new_theta)
예제 #31
0
ax1.plot(X, y, 'rx')

plt.xlabel('Population of City in 10,000s')
plt.ylabel('Profit in $10,000s')

print " =================== Part 3: Gradient descent ==================="
print 'Running Gradient Descent ...'
X = np.array([np.ones(m), X]).transpose()  # Add a column of ones to x
theta = np.zeros((2, 1))  # initialize fitting parameters

# Some gradient descent settings
iterations = 1500
alpha = 0.01

# compute and display initial cost
print 'Initial cost is', compute_cost(X, y, theta)
# run gradient descent
theta, J_history = gradient_descent(X, y, theta, alpha, iterations)

# print theta to screen
print 'Theta found by gradient descent:\n', theta
print 'J_history=', J_history

# Plot the linear fit
ax1.plot(X[:, 1], np.dot(X, theta), 'k-')

# Predict values for population sizes of 35,000 and 70,000
predict1 = np.dot([1, 10], theta)
print 'For population = 100,000, we predict a profit:', predict1 * 10000

print '============= Part 4: Visualizing J(theta_0, theta_1) ============= '