Example #1
0
print nn_cost_fun(init_params, 400, 25, 10, X, y, 0)[0]

# """
# Initializing Parameters Rand
rand_theta1 = rand_initialize_weights(400, 25)
rand_theta2 = rand_initialize_weights(25, 10)
rand_params = np.r_[rand_theta1.ravel(), rand_theta2.ravel()]
# print nn_cost_fun(rand_params, 400, 25, 10, X, y, 0)[0]

# ================= compute_numerical_gradient... =================
# If your backpropagation implementation is correct, then the relative difference will be small (less than 1e-9) Relative Difference
sample = np.random.choice(X.shape[0], 10)
XX = X[sample]
yy = y[sample]
nn_param_grad = nn_cost_fun(rand_params, 400, 25, 10, XX, yy, 0)[1]
number_param_grad = compute_numerical_grad(rand_params, 400, 25, 10, XX, yy, 0)
diff = np.abs(number_param_grad - nn_param_grad) / (np.abs(number_param_grad) +
                                                    np.abs(nn_param_grad))
print 'number_param_grad diff is: ', diff[0:100]

print 'Training Neural Network...'
l = 1
result = opt.minimize(fun=nn_cost_fun,
                      x0=rand_params,
                      args=(input_layer_size, hidden_layer_size, num_labels, X,
                            y, l),
                      method='TNC',
                      jac=True,
                      options={'maxiter': 150})
params_trained = result.x
Example #2
0
# The diff, given at the last, represent the difference between
# the gradients from BP and numerical computing. It should be
# very small, otherwise the autoencoder was not trained correctly.

##==========================================================


import numpy as np
import sparse_autoencoder as sp
import compute_numerical_grad as co


data = np.random.rand(64, 100)
visible_size = 64
hidden_size = [16, 8, 4]
#hidden_size = [25]
theta = sp.initial_parameter(hidden_size, visible_size)

bp_cost = sp.compute_cost(theta, data, visible_size, hidden_size)
print bp_cost

bp_grad = sp.compute_grad(theta, data, visible_size, hidden_size)
print bp_grad

num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                     visible_size, hidden_size)

diff = np.linalg.norm(bp_grad - num_grad) / np.linalg.norm(num_grad + bp_grad)

print str(diff) + " should be less than 1e-9! Is it?"
Example #3
0
# Randomly generate theta and data
visible_size = 32
hidden_size = [16, 8, 4]
data = np.random.rand(32, 100)

layer_ind = range(len(hidden_size) + 1)
layer_ind.remove(0)
layer_size = [visible_size] + hidden_size

# Debugging!
for ind in layer_ind:

    theta = sp.initial_parameter(layer_size[ind], layer_size[ind - 1])

    bp_grad = sp.compute_grad(theta, data, layer_size[ind - 1],
                              layer_size[ind])

    num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                         layer_size[ind - 1], layer_size[ind])

    diff = np.linalg.norm(bp_grad - num_grad) /\
        np.linalg.norm(num_grad + bp_grad)

    print str(diff) + " should be less than 1e-9! Is it?"

    W = theta[:layer_size[ind]*layer_size[ind-1]].\
        reshape(layer_size[ind], layer_size[ind-1])

    data = np.dot(W, data)
Example #4
0
dpark_ctx = DparkContext()

printdiff = []

start = clock()
# Debugging!
for ind in layer_ind:

    theta = sp.initial_parameter(layer_size[ind], layer_size[ind - 1])

    bp_grad = sp.compute_grad(theta, data, layer_size[ind-1], layer_size[ind],
                              0.0001, 0.01, 3, dpark_ctx)

    num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                         layer_size[ind-1], layer_size[ind],
                                         0.0001, 0.01, 3, dpark_ctx)

    diff = np.linalg.norm(bp_grad - num_grad) /\
        np.linalg.norm(num_grad + bp_grad)

    printdiff.append(diff)

    W = theta[:layer_size[ind]*layer_size[ind-1]].\
        reshape(layer_size[ind], layer_size[ind-1])

    data = np.dot(W, data)

for ind in layer_ind:
    print str(printdiff[ind-1]) + " should be less than 1e-9! Is it?"
Example #5
0
dpark_ctx = DparkContext()

printdiff = []

start = clock()
# Debugging!
for ind in layer_ind:

    theta = sp.initial_parameter(layer_size[ind], layer_size[ind - 1])

    bp_grad = sp.compute_grad(theta, data, layer_size[ind - 1],
                              layer_size[ind], 0.0001, 0.01, 3, dpark_ctx)

    num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                         layer_size[ind - 1], layer_size[ind],
                                         0.0001, 0.01, 3, dpark_ctx)

    diff = np.linalg.norm(bp_grad - num_grad) /\
        np.linalg.norm(num_grad + bp_grad)

    printdiff.append(diff)

    W = theta[:layer_size[ind]*layer_size[ind-1]].\
        reshape(layer_size[ind], layer_size[ind-1])

    data = np.dot(W, data)

for ind in layer_ind:
    print str(printdiff[ind - 1]) + " should be less than 1e-9! Is it?"
Example #6
0
import compute_numerical_grad as co

# Randomly generate theta and data
visible_size = 32
hidden_size = [16, 8, 4]
data = np.random.rand(32, 100)

layer_ind = range(len(hidden_size) + 1)
layer_ind.remove(0)
layer_size = [visible_size] + hidden_size

# Debugging!
for ind in layer_ind:

    theta = sp.initial_parameter(layer_size[ind], layer_size[ind - 1])

    bp_grad = sp.compute_grad(theta, data, layer_size[ind-1], layer_size[ind])

    num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                         layer_size[ind-1], layer_size[ind])

    diff = np.linalg.norm(bp_grad - num_grad) /\
        np.linalg.norm(num_grad + bp_grad)

    print str(diff) + " should be less than 1e-9! Is it?"

    W = theta[:layer_size[ind]*layer_size[ind-1]].\
        reshape(layer_size[ind], layer_size[ind-1])

    data = np.dot(W, data)
Example #7
0
# is highly recommended to check precisely.

# The diff, given at the last, represent the difference between
# the gradients from BP and numerical computing. It should be
# very small, otherwise the autoencoder was not trained correctly.

##==========================================================

import numpy as np
import sparse_autoencoder as sp
import compute_numerical_grad as co

data = np.random.rand(64, 100)
visible_size = 64
hidden_size = [16, 8, 4]
#hidden_size = [25]
theta = sp.initial_parameter(hidden_size, visible_size)

bp_cost = sp.compute_cost(theta, data, visible_size, hidden_size)
print bp_cost

bp_grad = sp.compute_grad(theta, data, visible_size, hidden_size)
print bp_grad

num_grad = co.compute_numerical_grad(sp.compute_cost, theta, data,
                                     visible_size, hidden_size)

diff = np.linalg.norm(bp_grad - num_grad) / np.linalg.norm(num_grad + bp_grad)

print str(diff) + " should be less than 1e-9! Is it?"