Exemple #1
0
def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels,
                   X, y, lambda_nn):
    import numpy as np
    from sigmoid_function import sigmoid_function
    from sigmoid_gradient import sigmoid_gradient
    Theta1 = np.reshape(
        nn_params[0:hidden_layer_size * (input_layer_size + 1)],
        [hidden_layer_size, input_layer_size + 1])
    Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):],
                        [num_labels, hidden_layer_size + 1])
    m = len(X[:, 0])
    J = 0
    Theta1_grad = np.zeros(Theta1.size)
    Theta2_grad = np.zeros(Theta2.size)
    X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1)
    a1 = X
    z2 = np.dot(a1, Theta1.T)
    a2 = sigmoid_function(z2)
    a2 = np.concatenate((np.ones([len(X[:, 0]), 1]), a2), axis=1)
    a3 = sigmoid_function(np.dot(a2, Theta2.T))
    #num_labels_eye=np.eye(num_labels)
    #ry=num_labels_eye[y,:]
    ry = np.zeros([len(X[:, 0]), num_labels])
    for i in range(5000):
        ry[i, y[i] - 1] = 1
    cost = ry * np.log(a3) + (1 - ry) * np.log(1 - a3)
    J = -np.sum(cost) / m
    reg = np.sum(Theta1[:, 1:]**2) + np.sum(Theta2[:, 1:]**2)
    J = J + lambda_nn * 1.0 / (2 * m) * reg

    # Backpropagation algorithm
    delta3 = a3 - ry
    temp = np.dot(delta3, Theta2)
    delta2 = temp[:, 1:] * sigmoid_gradient(z2)

    Delta1 = np.dot(delta2.T, a1)
    Delta2 = np.dot(delta3.T, a2)

    Theta1_grad = Delta1 / m + lambda_nn * np.concatenate(
        (np.zeros([hidden_layer_size, 1]), Theta1[:, 1:]), axis=1) / m
    Theta2_grad = Delta2 / m + lambda_nn * np.concatenate(
        (np.zeros([num_labels, 1]), Theta2[:, 1:]), axis=1) / m

    Theta1_grad = np.reshape(Theta1_grad, [Theta1_grad.size, 1])
    Theta2_grad = np.reshape(Theta2_grad, [Theta2_grad.size, 1])
    grad = np.concatenate((Theta1_grad, Theta2_grad), axis=0)
    return J, grad
def gradients(gradient_parameters):
    """Gradient"""
    theta = gradient_parameters['theta']
    input_layer_size = gradient_parameters['input_layer_size']
    hidden_layer_size = gradient_parameters['hidden_layer_size']
    number_of_labels = gradient_parameters['number_of_labels']
    x_values = gradient_parameters['x_values']
    y_values = gradient_parameters['y_values']
    lambda_value = gradient_parameters['lambda_value']

    theta_1_params = theta[0:(hidden_layer_size * (input_layer_size + 1))]
    theta_2_params = theta[(hidden_layer_size * (input_layer_size + 1)):]

    theta_1 = theta_1_params.reshape(hidden_layer_size, input_layer_size + 1)
    theta_2 = theta_2_params.reshape(number_of_labels, (hidden_layer_size + 1))

    input_examples_size = x_values.shape[0]

    hidden_layer_input = numpy.c_[numpy.ones(input_examples_size),
                                  x_values].dot(theta_1.T)
    hidden_layer_output = sigmoid(hidden_layer_input)

    output_layer_input = numpy.c_[numpy.ones(hidden_layer_output.shape[0]),
                                  hidden_layer_output].dot(theta_2.T)
    output = sigmoid(output_layer_input)

    errors = output - y_values
    backpropagated_errors = errors.dot(
        theta_2[:, 1:]) * sigmoid_gradient(hidden_layer_input)

    delta_1 = backpropagated_errors.T.dot(
        numpy.c_[numpy.ones(input_examples_size), x_values])
    delta_2 = errors.T.dot(numpy.c_[numpy.ones(hidden_layer_output.shape[0]),
                                    hidden_layer_output])

    theta_1[:, 0] = 0
    theta_2[:, 0] = 0

    theta_1_gradient = ((1.0 / input_examples_size) * delta_1) + (
        (lambda_value / input_examples_size) * theta_1)
    theta_2_gradient = ((1.0 / input_examples_size) * delta_2) + (
        (lambda_value / input_examples_size) * theta_2)

    gradient = numpy.append(theta_1_gradient.flatten(),
                            theta_2_gradient.flatten())
    return gradient
def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,lambda_nn):
    import numpy as np
    from sigmoid_function import sigmoid_function
    from sigmoid_gradient import sigmoid_gradient
    Theta1=np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],[hidden_layer_size,input_layer_size+1])
    Theta2=np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],[num_labels,hidden_layer_size+1])
    m=len(X[:,0])
    J=0
    Theta1_grad=np.zeros(Theta1.size)
    Theta2_grad=np.zeros(Theta2.size)
    X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1)
    a1=X
    z2=np.dot(a1,Theta1.T)
    a2=sigmoid_function(z2)
    a2=np.concatenate((np.ones([len(X[:,0]),1]),a2),axis=1)
    a3=sigmoid_function(np.dot(a2,Theta2.T))
    #num_labels_eye=np.eye(num_labels)
    #ry=num_labels_eye[y,:]
    ry=np.zeros([len(X[:,0]),num_labels])
    for i in range(5000):
        ry[i,y[i]-1]=1
    cost=ry*np.log(a3)+(1-ry)*np.log(1-a3)
    J=-np.sum(cost)/m
    reg=np.sum(Theta1[:,1:]**2)+np.sum(Theta2[:,1:]**2)
    J=J+lambda_nn*1.0/(2*m)*reg
    
    # Backpropagation algorithm
    delta3=a3-ry
    temp=np.dot(delta3,Theta2)
    delta2=temp[:,1:]*sigmoid_gradient(z2)

    Delta1=np.dot(delta2.T,a1)
    Delta2=np.dot(delta3.T,a2)

    Theta1_grad=Delta1/m+lambda_nn*np.concatenate((np.zeros([hidden_layer_size,1]),Theta1[:,1:]),axis=1)/m
    Theta2_grad=Delta2/m+lambda_nn*np.concatenate((np.zeros([num_labels,1]),Theta2[:,1:]),axis=1)/m

    Theta1_grad=np.reshape(Theta1_grad,[Theta1_grad.size,1])
    Theta2_grad=np.reshape(Theta2_grad,[Theta2_grad.size,1])
    grad=np.concatenate((Theta1_grad,Theta2_grad),axis=0)
    return J,grad
def gradients(gradient_parameters):
    """Gradient"""
    theta = gradient_parameters['theta']
    input_layer_size = gradient_parameters['input_layer_size']
    hidden_layer_size = gradient_parameters['hidden_layer_size']
    number_of_labels = gradient_parameters['number_of_labels']
    x_values = gradient_parameters['x_values']
    y_values = gradient_parameters['y_values']
    lambda_value = gradient_parameters['lambda_value']

    theta_1_params = theta[0: (hidden_layer_size * (input_layer_size + 1))]
    theta_2_params = theta[(hidden_layer_size * (input_layer_size + 1)):]

    theta_1 = theta_1_params.reshape(hidden_layer_size, input_layer_size + 1)
    theta_2 = theta_2_params.reshape(number_of_labels, (hidden_layer_size + 1))

    input_examples_size = x_values.shape[0]

    hidden_layer_input = numpy.c_[numpy.ones(input_examples_size), x_values].dot(theta_1.T)
    hidden_layer_output = sigmoid(hidden_layer_input)

    output_layer_input = numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output].dot(theta_2.T)
    output = sigmoid(output_layer_input)

    errors = output - y_values
    backpropagated_errors = errors.dot(theta_2[:, 1:]) * sigmoid_gradient(hidden_layer_input)

    delta_1 = backpropagated_errors.T.dot(numpy.c_[numpy.ones(input_examples_size), x_values])
    delta_2 = errors.T.dot(numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output])

    theta_1[:, 0] = 0
    theta_2[:, 0] = 0

    theta_1_gradient = ((1.0 / input_examples_size) * delta_1) + ((lambda_value / input_examples_size) * theta_1)
    theta_2_gradient = ((1.0 / input_examples_size) * delta_2) + ((lambda_value / input_examples_size) * theta_2)

    gradient = numpy.append(theta_1_gradient.flatten(), theta_2_gradient.flatten())
    return gradient
Exemple #5
0
# compute cost(FeedForward) with lambda_nn=0
print "FeedForwad Using Neural Network..."
lambda_nn=0
J,grad=nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn)
print "Cost at parameters (loaded from ex4weight): (%s)"%(J)

# Compute the cost with lambda_nn=1
print "FeedForwad Using Neural Network...(with lambda=1)"
lambda_nn=1
J,grad=nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn)
print "Cost at parameters (loaded from ex4weight): (%s)"%(J)

# Sigmoid Gradient
print "Evaluating sigmoid gradient..."
test=np.array([1,-0.5,0,0.5,1])
g=sigmoid_gradient(test)
print "sigmoid gradient with :[1,-0.5,0,0.5,1] (%s)"%(g)

# Initializing Pameters
print "Initializing Neural Network Parameters..."
from randInitializeWeights import randInitializeWeights
initial_Theta1=randInitializeWeights(input_layer_size,hidden_layer_size)
initial_Theta2=randInitializeWeights(hidden_layer_size,num_labels)
initial_Theta1=np.reshape(initial_Theta1,[initial_Theta1.size,1])
initial_Theta2=np.reshape(initial_Theta2,[initial_Theta2.size,1])
initial_nn_params=np.concatenate((initial_Theta1,initial_Theta2))

def costFunction(p):
    J,gradient=nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn)
    print "training"
    print J
Exemple #6
0
print 'Feedforward Using Neural Network...'
l = 0.0
j, _ = nn_cost_function(params_trained, input_layer_size, hidden_layer_size, num_labels, X, y, l)
print 'Cost at parameters (loaded from ex4weights):', j, '(this value should be about 0.287629)'


# =============== Part 4: Implement Regularization ===============
print 'Checking Cost Function (w/ Regularization)...'
l = 1.0
j, _ = nn_cost_function(params_trained, input_layer_size, hidden_layer_size, num_labels, X, y, l)
print 'Cost at parameters (loaded from ex4weights):', j, '(this value should be about 0.383770)'


# ================ Part 5: Sigmoid Gradient  ================
print 'Evaluating sigmoid gradient...'
g = sigmoid_gradient(np.array([-1, -0.5, 0, 0.5, 1]))
print 'Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:'
print g


# ================ Part 6: Initializing Parameters ================
print 'Initializing Neural Network Parameters...'
initial_theta_1 = rand_initialize_weights(input_layer_size, hidden_layer_size)
initial_theta_2 = rand_initialize_weights(hidden_layer_size, num_labels)
initial_nn_params = np.hstack((initial_theta_1.ravel(), initial_theta_2.ravel()))


# =============== Part 7: Implement Backpropagation ===============
print 'Checking Backpropagation...'

Exemple #7
0
lambda_nn = 0
J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                         num_labels, data, labels, lambda_nn)
print "Cost at parameters (loaded from ex4weight): (%s)" % (J)

# Compute the cost with lambda_nn=1
print "FeedForwad Using Neural Network...(with lambda=1)"
lambda_nn = 1
J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size,
                         num_labels, data, labels, lambda_nn)
print "Cost at parameters (loaded from ex4weight): (%s)" % (J)

# Sigmoid Gradient
print "Evaluating sigmoid gradient..."
test = np.array([1, -0.5, 0, 0.5, 1])
g = sigmoid_gradient(test)
print "sigmoid gradient with :[1,-0.5,0,0.5,1] (%s)" % (g)

# Initializing Pameters
print "Initializing Neural Network Parameters..."
from randInitializeWeights import randInitializeWeights
initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size)
initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels)
initial_Theta1 = np.reshape(initial_Theta1, [initial_Theta1.size, 1])
initial_Theta2 = np.reshape(initial_Theta2, [initial_Theta2.size, 1])
initial_nn_params = np.concatenate((initial_Theta1, initial_Theta2))


def costFunction(p):
    J, gradient = nnCostFunction(p, input_layer_size, hidden_layer_size,
                                 num_labels, data, labels, lambda_nn)
Exemple #8
0
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size,
                     num_labels, X, Y, lamda):
    #Implements the neural network cost function for a two layer neural network

    #Reshaping nn_params back into the parameters params_1 and params_2
    params_1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)]
    params_1 = (np.reshape(params_1,
                           (input_layer_size + 1, hidden_layer_size))).T
    params_2 = nn_params[hidden_layer_size * (input_layer_size + 1):]
    params_2 = (np.reshape(params_2, (hidden_layer_size + 1, -1))).T

    m = len(X)

    J = 0

    def sigmoid(x):
        #returns the value of the sigmoid function evaluated at z (can be scalar/vector/matrix)
        return 1 / (1 + np.exp(-x))

    sigmoid = np.vectorize(sigmoid)

    #Adding bias unit in the input layer
    X = np.insert(X, 0, 1, axis=1)
    z2 = X @ params_1.T
    a2 = sigmoid(z2)

    #Adding bias unit in the hidden layer
    a2 = np.insert(a2, 0, 1, axis=1)
    h = a2 @ params_2.T
    h = sigmoid(h)

    J = np.sum(np.square(h - Y)) / (2 * m)

    #Calculating Jreg
    params_1_reg = np.square(params_1[:, 1::])
    params_2_reg = np.square(params_2[:, 1::])
    Jreg = (np.sum(params_1_reg) + np.sum(params_2_reg)) * (lamda / (2 * m))
    J = J + Jreg

    params_1_grad = np.zeros(params_1.shape)
    params_2_grad = np.zeros(params_2.shape)

    delta_a1 = np.zeros(params_1.shape)
    delta_a2 = np.zeros(params_2.shape)

    error_h = h - Y
    error_a2 = np.multiply((error_h @ params_2),
                           np.insert(sigmoid_gradient(z2), 0, 1, axis=1))

    delta_a2 = delta_a2 + error_h.T @ a2
    delta_a1 = delta_a1 + (error_a2[:, 1::]).T @ X

    params_1_grad = delta_a1 / m
    params_2_grad = delta_a2 / m

    params_1_grad[:, 1::] += (lamda / m) * params_1[:, 1::]
    params_2_grad[:, 1::] += (lamda / m) * params_2[:, 1::]

    #Unroll gradients
    grad = np.concatenate(
        ((params_1_grad.T).ravel(), (params_2_grad.T).ravel()), axis=None)

    print(J)

    return J, grad
Exemple #9
0
# J_data = []
for i in range(NUM_ITERATIONS):
    # forward propagation

    J, h, new_a2_sets, z2 = cost_function.cost_function(
        new_training_sets, Theta1, Theta2, number_features_sets, y_training_sets)
    # J_data.append(J)
    print(J)
    if(J < COST_THRESHOLD):
        break
    # back propagation

    delta3 = h - y_training_sets.T
    Theta2_grad = (delta3@new_a2_sets.T)/number_features_sets
    Fake_theta2 = Theta2[:, 1:]
    delta2 = (Fake_theta2.T@delta3)*sigmoid_gradient.sigmoid_gradient(z2)
    Theta1_grad = (delta2@training_sets)/number_features_sets

    # gradient descent

    Theta1 = Theta1 - LEARNING_RATE*Theta1_grad
    Theta2 = Theta2 - LEARNING_RATE*Theta2_grad
    # J_datas.append(J_data)
np.savetxt("Theta1.csv", Theta1, delimiter=",")
np.savetxt("Theta2.csv", Theta2, delimiter=",")
# for i in range(len(J_datas)):
#     plt.plot(J_datas[i])
#     plt.xlabel("Number of iterations")
#     plt.ylabel(f"Cost function for learning rate = {LEARNING_RATES[i]}")
#     plt.show()