Esempio n. 1
0
def cf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y,
          lambda_coef):

    Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape(
        (hidden_layer_size, (input_layer_size + 1)))
    Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape(
        (num_labels, (hidden_layer_size + 1)))

    m = Y.shape[1]
    Y = Y.A

    A_1 = X
    Z_2 = Theta1 * A_1.T
    A_2 = sigmoid(Z_2)
    A_2 = add_zero_feature(A_2, axis=0)
    Z_3 = Theta2 * A_2
    A_3 = sigmoid(Z_3)
    H = A_3.A

    J = np.sum(-Y * np.log(H) - (1 - Y) * np.log(1 - H)) / m

    reg_J = 0.0
    reg_J += np.sum(np.power(Theta1, 2)[:, 1:])
    reg_J += np.sum(np.power(Theta2, 2)[:, 1:])

    J += reg_J * (float(lambda_coef) / (2 * m))

    return J
Esempio n. 2
0
def gf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y,
          lambda_coef):

    Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape(
        (hidden_layer_size, (input_layer_size + 1)))
    Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape(
        (num_labels, (hidden_layer_size + 1)))

    m = Y.shape[1]

    A_1 = X
    Z_2 = Theta1 * A_1.T
    A_2 = sigmoid(Z_2)
    A_2 = add_zero_feature(A_2, axis=0)
    Z_3 = Theta2 * A_2
    A_3 = sigmoid(Z_3)

    DELTA_3 = A_3 - Y
    DELTA_2 = np.multiply((Theta2.T * DELTA_3)[1:, :], sigmoid_gradient(Z_2))
    Theta1_grad = (DELTA_2 * A_1) / m
    Theta2_grad = (DELTA_3 * A_2.T) / m

    lambda_coef = float(lambda_coef)
    Theta1_grad[:, 1:] += (lambda_coef / m) * Theta1[:, 1:]
    Theta2_grad[:, 1:] += (lambda_coef / m) * Theta2[:, 1:]

    return np.concatenate((Theta1_grad.A1, Theta2_grad.A1))
Esempio n. 3
0
def cf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef):

    Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape((hidden_layer_size, (input_layer_size + 1)))
    Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape((num_labels, (hidden_layer_size + 1)))

    m = Y.shape[1]
    Y = Y.A

    A_1 = X
    Z_2 = Theta1*A_1.T
    A_2 = sigmoid(Z_2)
    A_2 = add_zero_feature(A_2, axis=0)
    Z_3 = Theta2*A_2
    A_3 = sigmoid(Z_3)
    H = A_3.A

    J = np.sum(-Y*np.log(H) - (1-Y)*np.log(1-H))/m

    reg_J = 0.0
    reg_J += np.sum(np.power(Theta1, 2)[:, 1:])
    reg_J += np.sum(np.power(Theta2, 2)[:, 1:])

    J += reg_J*(float(lambda_coef)/(2*m))

    return J
Esempio n. 4
0
def gf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef):

    Theta1 = nn_params[0, :hidden_layer_size * (input_layer_size + 1)].reshape((hidden_layer_size, (input_layer_size + 1)))
    Theta2 = nn_params[0, hidden_layer_size * (input_layer_size + 1):].reshape((num_labels, (hidden_layer_size + 1)))

    m = Y.shape[1]

    A_1 = X
    Z_2 = Theta1*A_1.T
    A_2 = sigmoid(Z_2)
    A_2 = add_zero_feature(A_2, axis=0)
    Z_3 = Theta2*A_2
    A_3 = sigmoid(Z_3)

    DELTA_3 = A_3 - Y
    DELTA_2 = np.multiply((Theta2.T*DELTA_3)[1:, :], sigmoid_gradient(Z_2))
    Theta1_grad = (DELTA_2 * A_1)/m
    Theta2_grad = (DELTA_3 * A_2.T)/m

    lambda_coef = float(lambda_coef)
    Theta1_grad[:, 1:] += (lambda_coef/m)*Theta1[:, 1:]
    Theta2_grad[:, 1:] += (lambda_coef/m)*Theta2[:, 1:]

    return np.concatenate((Theta1_grad.A1, Theta2_grad.A1))
Esempio n. 5
0
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), 1 - sigmoid(z))
Esempio n. 6
0
    initial_Theta1 = rand_initialize_weights(input_layer_size,
                                             hidden_layer_size)
    initial_Theta2 = rand_initialize_weights(hidden_layer_size, num_labels)

    initial_nn_params = np.concatenate(
        (initial_Theta1.ravel(), initial_Theta2.ravel()))

    res = minimize(cf_nn,
                   initial_nn_params,
                   method='L-BFGS-B',
                   jac=gf_nn,
                   options={
                       'disp': True,
                       'maxiter': 100
                   },
                   args=(input_layer_size, hidden_layer_size, num_labels, X, Y,
                         lambda_coef)).x

    Theta1 = res[:hidden_layer_size * (input_layer_size + 1)].reshape(
        (hidden_layer_size, (input_layer_size + 1)))
    Theta2 = res[hidden_layer_size * (input_layer_size + 1):].reshape(
        (num_labels, (hidden_layer_size + 1)))

    h1 = sigmoid(np.dot(X, Theta1.T))
    h2 = sigmoid(np.dot(add_zero_feature(h1), Theta2.T))
    y_pred = np.argmax(h2, axis=1) + 1

    print 'Training Set Accuracy: {}'.format(
        np.mean(y_pred == y.ravel(), ) * 100)
Esempio n. 7
0
def sigmoid_gradient(z):
    return np.multiply(sigmoid(z), 1-sigmoid(z))
Esempio n. 8
0
    nn_params = np.concatenate((Theta1.ravel(), Theta2.ravel()))

    input_layer_size  = 400
    hidden_layer_size = 25
    num_labels = 10
    m = len(y)

    Y = (np.arange(num_labels)[:, np.newaxis] == (y.T-1)).astype(float)

    for lambda_coef in (0, 1):
        print 'Cost function = {}, lambda = {}'.format(cf_nn(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef), lambda_coef)

    initial_Theta1 = rand_initialize_weights(input_layer_size, hidden_layer_size)
    initial_Theta2 = rand_initialize_weights(hidden_layer_size, num_labels)

    initial_nn_params = np.concatenate((initial_Theta1.ravel(), initial_Theta2.ravel()))

    res = minimize(cf_nn, initial_nn_params, method='L-BFGS-B', jac=gf_nn, options={'disp': True, 'maxiter':100},
                        args=(input_layer_size, hidden_layer_size, num_labels, X, Y, lambda_coef)).x

    Theta1 = res[:hidden_layer_size * (input_layer_size + 1)].reshape((hidden_layer_size, (input_layer_size + 1)))
    Theta2 = res[hidden_layer_size * (input_layer_size + 1):].reshape((num_labels, (hidden_layer_size + 1)))


    h1 = sigmoid(np.dot(X, Theta1.T))
    h2 = sigmoid(np.dot(add_zero_feature(h1), Theta2.T))
    y_pred = np.argmax(h2, axis=1)+1

    print 'Training Set Accuracy: {}'.format(np.mean(y_pred == y.ravel(), ) * 100)
Esempio n. 9
0
import scipy.io as sio
import numpy as np

from common_functions import add_zero_feature, sigmoid

if __name__ == '__main__':

    data = sio.loadmat('ex3data1.mat')
    y = data['y']
    X = data['X']

    X = add_zero_feature(X)

    data = sio.loadmat('ex3weights.mat')
    Theta1 = data['Theta1']
    Theta2 = data['Theta2']

    p = sigmoid(np.dot(Theta1, X.T))
    p = add_zero_feature(p, axis=0)
    p = sigmoid(np.dot(Theta2, p))

    y_pred = np.argmax(p, axis=0)+1

    print 'Training Set Accuracy: {}'.format(np.mean(y_pred == y.flatten()) * 100)
Esempio n. 10
0
import scipy.io as sio
import numpy as np

from common_functions import add_zero_feature, sigmoid

if __name__ == '__main__':

    data = sio.loadmat('ex3data1.mat')
    y = data['y']
    X = data['X']

    X = add_zero_feature(X)

    data = sio.loadmat('ex3weights.mat')
    Theta1 = data['Theta1']
    Theta2 = data['Theta2']

    p = sigmoid(np.dot(Theta1, X.T))
    p = add_zero_feature(p, axis=0)
    p = sigmoid(np.dot(Theta2, p))

    y_pred = np.argmax(p, axis=0) + 1

    print 'Training Set Accuracy: {}'.format(
        np.mean(y_pred == y.flatten()) * 100)