def checkNNGradients(lambd):

    input_layer_size  = 3;
    hidden_layer_size = 5;
    num_labels = 3;
    m          = 5;
    layers     = [3, 5, 3]

    # In this point we generate a number of random data
    Theta = [] 
    Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size))
    Theta.append(debugInitializeWeights(num_labels, hidden_layer_size))

    X = debugInitializeWeights(m, input_layer_size - 1)
    y = remainder(arange(m)+1, num_labels)
    
    # Unroll parameters
    nn_params = unroll_params(Theta)

    # Compute Numerical Gradient
    numgrad = computeNumericalGradient(nn_params,layers, X, y, num_labels, lambd)

    # Compute Analytical Gradient (BackPropagation)
    truegrad = backwards(nn_params, layers, X, y, num_labels, lambd)

    
    print concatenate(([numgrad], [truegrad]), axis = 0).transpose()
    print "The above two columns must be very similar.\n(Left-Numerical Gradient, Right-Analytical Gradient (BackPropagation)\n"
    
    diff = linalg.norm(numgrad - truegrad) / linalg.norm(numgrad + truegrad)
    print "\nNote: If the implementation of the backpropagation is correct, the relative different must be quite small (less that 1e-09)."
    print "Relative difference: " + str(diff) + "\n"
def checkNumericalGradient():
  """This code can be used to check your numerical gradient implementation in
  computeNumericalGradient.m
  It analytically evaluates the gradient of a very simple function called
  simpleQuadraticFunction (see below) and compares the result with your
  numerical solution. Your numerical gradient implementation is incorrect if
  your numerical solution deviates too much from the analytical solution.
  """
  
  # Evaluate the function and gradient at x = [4; 10]; (Here, x is a 2d vector.)
  x = np.array([4, 10])
  _, grad = simpleQuadraticFunction(x)

  # Use your code to numerically compute the gradient of simpleQuadraticFunction at x.
  numgrad = computeNumericalGradient(simpleQuadraticFunction, x)

  # Visually examine the two gradient computations.  The two columns
  # you get should be very similar. 
  print(np.stack((numgrad, grad)).T)
  print('The above two columns you get should be very similar.')
  print('Left-Your Numerical Gradient, Right-Analytical Gradient.\n')

  # Evaluate the norm of the difference between two solutions.  
  # If you have a correct implementation, and assuming you used \epsilon = 0.0001 
  # in computeNumericalGradient.m, then diff below should be 2.1452e-12 
  diff = norm(numgrad - grad) / norm(numgrad + grad)
  print(diff)
  print('Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n');
def checkCostFunction(lamda=0):
    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = X_t.dot(Theta_t.T)
    Y[np.where(np.random.random_sample(Y.shape) > 0.5)] = 0
    R = np.zeros(Y.shape)
    R[np.where(Y != 0)] = 1

    # Run Gradient Checking
    X = np.random.random_sample(X_t.shape)
    Theta = np.random.random_sample(Theta_t.shape)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    # params = np.hstack((X.T.flatten(), Theta.T.flatten()))
    costFunc = lambda X, Theta: cofiCostFunc(X, Theta, Y, R, lamda)
    costFunc_w = lambda X, Theta: costFunc(X, Theta)[0]
    numgrad = computeNumericalGradient(costFunc_w, X, Theta)

    cost, grad = cofiCostFunc(X, Theta, Y, R, lamda)

    print(grad)
    print(numgrad)
def checkNumericalGradient():
    # This code can be used to check your numerical gradient implementation
    # in computeNumericalGradient.m
    # It analytically evaluates the gradient of a very simple function called
    # simpleQuadraticFunction (see below) and compares the result with your
    # numerical
    # solution. Your numerical gradient implementation is incorrect if
    # your numerical solution deviates too much from the analytical solution.

    # Evaluate the function and gradient at x = [4; 10]; (Here, x is a 2d
    # vector.)
    x = np.array([4, 10]).reshape((-1, 1))
    [value, grad] = simpleQuadraticFunction(x)

    # Use your code to numerically compute the gradient of
    # simpleQuadraticFunction at x.
    # (The notation "@simpleQuadraticFunction" denotes a pointer to a function.)
    numgrad = computeNumericalGradient(simpleQuadraticFunction, x)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print [numgrad, grad]
    print 'The above two columns you get should be very similar.\n(Left-Your\
    Numerical Gradient, Right-Analytical Gradient)\n\n'

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be 2.1452e-12
    diff = np.linalg.norm(numgrad - grad)/np.linalg.norm(numgrad + grad)
    print diff
    print 'Norm of the difference between numerical and analytical gradient \
Beispiel #5
0
def checkNNGradients(Lambda=0):
    """Creates a small neural network to check the
    backpropagation gradients, it will output the analytical gradients
    produced by your backprop code and the numerical gradients (computed
    using computeNumericalGradient). These two gradient computations should
    result in very similar values.
    """

    #input_layer_size = 4
    #hidden_layer_size = 5
    #num_labels = 3
    #m = 10

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.mod(range(1, m + 1), num_labels)

    # Unroll parameters
    nn_params = np.hstack((Theta1.T.ravel(), Theta2.T.ravel()))

    # Short hand for cost function
    costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size,
                                        num_labels, X, y, Lambda)
    #costFunc = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)

    numgrad = computeNumericalGradient(costFunc, nn_params)
    grad = costFunc(nn_params)[1]

    print(numgrad)
    print(grad)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(np.column_stack((numgrad, grad)))

    print('The above two columns you get should be very similar.\n' \
             '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print('If your backpropagation implementation is correct, then\n ' \
          'the relative difference will be small (less than 1e-9). \n' \
          '\nRelative Difference: %g\n' % diff)

    return (nn_params, grad)
def checkNNGradients(lambda_reg=0):
    #CHECKNNGRADIENTS Creates a small neural network to check the
    #backpropagation gradients
    #   CHECKNNGRADIENTS(lambda_reg) Creates a small neural network to check the
    #   backpropagation gradients, it will output the analytical gradients
    #   produced by your backprop code and the numerical gradients (computed
    #   using computeNumericalGradient). These two gradient computations should
    #   result in very similar values.
    #

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = diw.debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = diw.debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X  = diw.debugInitializeWeights(m, input_layer_size - 1)
    y  = 1 + np.mod(range(m), num_labels).T

    # Unroll parameters
    nn_params = np.concatenate((Theta1.reshape(Theta1.size, order='F'), Theta2.reshape(Theta2.size, order='F')))

    # Short hand for cost function
    def costFunc(p):
        return nncf.nnCostFunction(p, input_layer_size, hidden_layer_size, \
                   num_labels, X, y, lambda_reg)

    _, grad = costFunc(nn_params)
    numgrad = cng.computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar. 
    # code from http://stackoverflow.com/a/27663954/583834
    fmt = '{:<25}{}'
    print(fmt.format('Numerical Gradient', 'Analytical Gradient'))
    for numerical, analytical in zip(numgrad, grad):
        print(fmt.format(numerical, analytical))

    print('The above two columns you get should be very similar.\n' \
             '(Left Col.: Your Numerical Gradient, Right Col.: Analytical Gradient)')

    # Evaluate the norm of the difference between two solutions.  
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001 
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = Decimal(np.linalg.norm(numgrad-grad))/Decimal(np.linalg.norm(numgrad+grad))

    print('If your backpropagation implementation is correct, then \n' \
             'the relative difference will be small (less than 1e-9). \n' \
             '\nRelative Difference: {:.10E}'.format(diff))
Beispiel #7
0
def checkNNGradients(NNlambda = 0.0):
    input_layer_size = 3
    hidden1_layer_size = 5
    hidden2_layer_size = 4
    num_labels = 3
    m = 5

    #We generate some 'random' test data
    Theta1 = debugInitializeWeights(input_layer_size, hidden1_layer_size)
    Theta2 = debugInitializeWeights(hidden1_layer_size, hidden2_layer_size)
    Theta3 = debugInitializeWeights(hidden2_layer_size, num_labels)

    # Reusing debugInitializeWeights to generate X
    X  = debugInitializeWeights(m, input_layer_size - 1)
    y  = 1.0 + transpose(mod(range(0, m), num_labels))

    # Unroll parameters
    nn_params = concatenate((Theta1.flatten(), Theta2.flatten(), Theta3.flatten()))

    miniBatchSize = 1000.0
    theta = nn_params
    counter = 0
    numberOfIterations = range(int(ceil(X.shape[0] / miniBatchSize)))
    for i in numberOfIterations:
        values2Train = range(counter, counter + int(miniBatchSize))
        counter = max(values2Train) + 1

        while X.shape[0] <= max(values2Train):
            values2Train.remove(values2Train[-1])

        arguments = (input_layer_size, hidden1_layer_size, hidden2_layer_size, num_labels, X[values2Train, :], y[values2Train, :], NNlambda)
        theta = optimize.fmin_l_bfgs_b(nnCostFunction, x0 = theta, fprime =  nnGradFunction, args = arguments, maxiter = 20, disp = True, iprint = 0 )
        #theta = optimize.fmin_cg(nnCostFunction, x0 = nnThetas, fprime = nnGradFunction, args = arguments, maxiter = 3, disp = True, retall= True )
        theta = array(theta[0])

    cost = nnCostFunction(theta, input_layer_size, hidden1_layer_size, hidden2_layer_size, num_labels, X, y, NNlambda)
    grad = nnGradFunction(theta, input_layer_size, hidden1_layer_size, hidden2_layer_size, num_labels, X, y, NNlambda)

    numgrad = computeNumericalGradient(theta, input_layer_size, hidden1_layer_size, hidden2_layer_size, num_labels, X, y, NNlambda)

    # Visually examine the two gradient computations.  The two columns you get should be very similar.
    print(hstack((numgrad, grad)))
    print('The above two columns you get should be very similar')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = linalg.norm(numgrad-grad)/linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then the relative difference will be small (less than 1e-9) relative Difference')
    print(diff)
    return(diff)
Beispiel #8
0
def run_training(FLAGS, images, labels):
  # For debugging purposes, you may wish to reduce the size of the input data
  # in order to speed up gradient checking. 
  # Here, we create synthetic dataset using random data for testing
  
  if FLAGS.debug:
    inputSize = 8
    images = randn(8, 100)
    labels = randint(0, 10, 100, dtype = np.uint8)
  else:
    inputSize = FLAGS.visibleSize

  numClasses = 5
  decay = FLAGS.decay
  
  # Randomly initialise theta
  theta = 0.005 * randn(numClasses * inputSize)
  
  # Implement softmaxCost in softmax.py.   
  cost, grad = softmaxCost(theta, numClasses, inputSize, decay, images, labels)
  
  #  As with any learning algorithm, you should always check that your
  #  gradients are correct before learning the parameters.
  if FLAGS.debug:
    # First, lets make sure your numerical gradient computation is correct for a
    # simple function.  After you have implemented computeNumericalGradient.py,
    # run the following: 
    #checkNumericalGradient()
  
    numGrad = computeNumericalGradient(lambda x: softmaxCost(x, numClasses, inputSize, decay, images, labels),
                                       theta)
  
    # Use this to visually compare the gradients side by side.
    print(np.stack((numGrad, grad)).T)
  
    # Compare numerically computed gradients with those computed analytically.
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff)
    sys.exit(1)
    # The difference should be small. 
    # In our implementation, these values are usually less than 1e-7.
                                      
  #  Once you have verified that your gradients are correct, 
  #  you can start training your softmax regression code using L-BFGS.
  theta, _, _ = fmin_l_bfgs_b(softmaxCost, theta,
                              args = (numClasses, inputSize, decay, images, labels),
                              maxiter = 400, disp = 1)

  # Fold parameters into a matrix format.
  theta = np.reshape(theta, (numClasses, inputSize));

  return theta
Beispiel #9
0
def checkCostFunction(lambda_var=0):
    #CHECKCOSTFUNCTION Creates a collaborative filtering problem
    #to check your cost function and gradients
    #   CHECKCOSTFUNCTION(lambda_var) Creates a collaborative filtering problem
    #   to check your cost function and gradients, it will output the
    #   analytical gradients produced by your code and the numerical gradients
    #   (computed using computeNumericalGradient). These two gradient
    #   computations should result in very similar values.

    # Set lambda_var
    # if not lambda_var or not 'lambda_var' in locals():
    #     lambda_var = 0

    ## Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0
    R = np.zeros(Y.shape)
    R[Y != 0] = 1

    ## Run Gradient Checking
    X = np.random.randn(X_t.shape[0], X_t.shape[1])
    Theta = np.random.randn(Theta_t.shape[0], Theta_t.shape[1])
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    params = np.concatenate(
        (X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F')))

    # Short hand for cost function
    def costFunc(p):
        return cofiCostFunc(p, Y, R, num_users, num_movies, num_features,
                            lambda_var)

    numgrad = computeNumericalGradient(costFunc, params)

    cost, grad = cofiCostFunc(params, Y, R, num_users, num_movies,
                              num_features, lambda_var)

    print(np.column_stack((numgrad, grad)))
    print('The above two columns you get should be very similar.\n' \
             '(Left-Your Numerical Gradient, Right-Analytical Gradient)')

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your backpropagation implementation is correct, then \n' \
             'the relative difference will be small (less than 1e-9). ' \
             '\nRelative Difference: {:e}'.format(diff))
Beispiel #10
0
def checkNNGradients(_lambda):
#CHECKNNGRADIENTS Creates a small neural network to check the
#backpropagation gradients
#   CHECKNNGRADIENTS(lambda) Creates a small neural network to check the
#   backpropagation gradients, it will output the analytical gradients
#   produced by your backprop code and the numerical gradients (computed
#   using computeNumericalGradient). These two gradient computations should
#   result in very similar values.
#

    if '_lambda' not in locals():
        _lambda = 0;


    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to generate X
    X  = debugInitializeWeights(m, input_layer_size - 1)
    y  = np.mod(np.arange(0,m), num_labels).reshape(-1,1)


    # Unroll parameters
    nn_params = np.hstack((Theta1.flatten(), Theta2.flatten()))

    # Short hand for cost function
    costFunc = functools.partial(nnCostFunction, input_layer_size = input_layer_size, hidden_layer_size = hidden_layer_size, num_labels = num_labels, X = X, y = y, _lambda = _lambda)
    #cost, grad = nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda)
    #(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, _lambda)
    cost, grad  = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc, nn_params)


# Visually examine the two gradient computations.  The two columns
# you get should be very similar.

    print (np.vstack((numgrad, grad)))
    print('The above two columns you get should be very similar.\n (Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

# Evaluate the norm of the difference between two solutions.
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
# in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \nthe relative difference will be small (less than 1e-9). \nRelative Difference:', diff)
def checkCostFunction(Lambda=0):
    """Creates a collaborative filering problem
    to check your cost function and gradients, it will output the
    analytical gradients produced by your code and the numerical gradients
    (computed using computeNumericalGradient). These two gradient
    computations should result in very similar values.
    """

    ## Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = X_t.dot(Theta_t.T)
    Y[np.where(np.random.random_sample(Y.shape) > 0.5, True, False)] = 0
    R = np.zeros(Y.shape)
    R[np.where(Y != 0, True, False)] = 1

    ## Run Gradient Checking
    X = np.random.random_sample(X_t.shape)
    Theta = np.random.random_sample(Theta_t.shape)
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    # Unroll parameters
    params = np.hstack((X.T.flatten(), Theta.T.flatten()))

    costFunc = lambda t: cofiCostFunc(t, Y, R, num_users, num_movies,
                                      num_features, Lambda)

    def costFunc_w(t):
        Jgrad = costFunc(t)
        return Jgrad

    numgrad = computeNumericalGradient(costFunc_w, params)

    cost, grad = cofiCostFunc(params, Y, R, num_users, num_movies,
                              num_features, Lambda)

    print np.column_stack((numgrad, grad))

    print 'The above two columns you get should be very similar.\n' \
             '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print 'If your backpropagation implementation is correct, then\n ' \
          'the relative difference will be small (less than 1e-9). \n' \
          '\nRelative Difference: %g\n' % diff
def checkCostFunction(lambda_var=0):
    #CHECKCOSTFUNCTION Creates a collaborative filering problem 
    #to check your cost function and gradients
    #   CHECKCOSTFUNCTION(lambda_var) Creates a collaborative filering problem 
    #   to check your cost function and gradients, it will output the 
    #   analytical gradients produced by your code and the numerical gradients 
    #   (computed using computeNumericalGradient). These two gradient 
    #   computations should result in very similar values.

    # Set lambda_var
    # if not lambda_var or not 'lambda_var' in locals():
    #     lambda_var = 0

    ## Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0
    R = np.zeros(Y.shape)
    R[Y != 0] = 1

    ## Run Gradient Checking
    X = np.random.randn(X_t.shape[0], X_t.shape[1])
    Theta = np.random.randn(Theta_t.shape[0], Theta_t.shape[1])
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    params = np.concatenate((X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F')))

    # Short hand for cost function
    def costFunc(p):
        return ccf.cofiCostFunc(p, Y, R, num_users, num_movies, num_features, lambda_var)

    numgrad = cng.computeNumericalGradient(costFunc, params)

    cost, grad = ccf.cofiCostFunc(params, Y, R, num_users, num_movies, num_features, lambda_var)


    print(np.column_stack((numgrad, grad)))
    print('The above two columns you get should be very similar.\n' \
             '(Left-Your Numerical Gradient, Right-Analytical Gradient)')

    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)
    print('If your backpropagation implementation is correct, then \n' \
             'the relative difference will be small (less than 1e-9). ' \
             '\nRelative Difference: {:e}'.format(diff))
def checkNNGradients(reg_lambda=0):
    """ Creates a small neural network to check the backpropagation gradients
        CHECKNNGRADIENTS(reg_lambda) Creates a small neural network to check the
        backpropagation gradients, it will output the analytical gradients
        produced by your backprop code and the numerical gradients (computed
        using computeNumericalGradient). These two gradient computations should
        result in very similar values."""

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.mod(np.arange(m), num_labels).T.reshape(m, 1)

    # Unroll parameters
    nn_params = np.r_[Theta1.ravel(), Theta2.ravel()]

    # Short hand for cost function
    costFunc = lambda params: nnCostFunction(
        params, input_layer_size, hidden_layer_size, num_labels, X, y,
        reg_lambda)

    cost, grad = costFunc(nn_params)

    numgrad = computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(numgrad, grad)

    print('The above two columns you get should be very similar.\n',
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.py, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print('If your backpropagation implementation is correct, then \n',
          'the relative difference will be small (less than 1e-9). \n',
          '\nRelative Difference: \n', diff)
def checkNNGradients(lambda_=0):
    """
        Creates a small neural network to check the
        backpropagation gradients, it will output the analytical gradients
        produced by your backprop code and the numerical gradients (computed
        using computeNumericalGradient). These two gradient computations should
        result in very similar values.
    """

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.arange(1, m + 1) % num_labels

    # Unroll parameters
    nn_params = np.r_[Theta1.flatten(order='F'), Theta2.T.flatten(order='F')]

    # Short hand for cost function
    def costFunction(p):
        return nnCostFunction(p, input_layer_size, hidden_layer_size,
                              num_labels, X, y, lambda_)

    numgrad = computeNumericalGradient(costFunction, nn_params)
    _, grad = costFunction(nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(np.c_[numgrad, grad])

    print('The above two columns you get should be very similar.\n'
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON 1e-4
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print('If your backpropagation implementation is correct, then\n '
          'the relative difference will be small (less than 1e-9). \n'
          '\nRelative Difference: %g\n' % diff)
def checkNNGradients(Lambda = 0):

    """Creates a small neural network to check the
    backpropagation gradients, it will output the analytical gradients
    produced by your backprop code and the numerical gradients (computed
    using computeNumericalGradient). These two gradient computations should
    result in very similar values.
    """

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)

    # Reusing debugInitializeWeights to generate X
    X  = debugInitializeWeights(m, input_layer_size - 1)
    y  = np.mod(range(1, m+1), num_labels)

    # Unroll parameters
    nn_params = np.hstack((Theta1.T.ravel(), Theta2.T.ravel()))

    # Short hand for cost function

    costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, Lambda)

    numgrad = computeNumericalGradient(costFunc, nn_params)
    grad = costFunc(nn_params)[1]
    
    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print np.column_stack((numgrad, grad))
    numgrad.shape #
    grad.shape  #
    print 'The above two columns you get should be very similar.\n' \
             '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n'

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)

    print 'If your backpropagation implementation is correct, then\n ' \
          'the relative difference will be small (less than 1e-9). \n' \
          '\nRelative Difference: %g\n' % diff
def checkCostFunction(reg_lambda=0):
    """ Creates a collaborative filtering problem
        to check your cost function and gradients
        checkCostFunction(lambda) Creates a collaborative filtering problem
        to check your cost function and gradients, it will output the
        analytical gradients produced by your code and the numerical gradients
        (computed using computeNumericalGradient). These two gradient
        computations should result in very similar values."""

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = X_t.dot(Theta_t.T)
    rand_data = np.random.randn(*Y.shape)
    Y[np.where(rand_data > 0.5)] = 0
    R = np.zeros(Y.shape)
    R[np.where(Y != 0)] = 1

    # Run Gradient Checking
    X = np.random.randn(*X_t.shape)
    Theta = np.random.randn(*Theta_t.shape)
    num_movies, num_users = Y.shape
    num_features = Theta_t.shape[1]

    # build params
    params = np.r_[X.flatten(), Theta.flatten()].reshape(-1, 1)

    costFunc = lambda t: cofiCostFunc(t, Y, R, num_users, num_movies, num_features, reg_lambda)

    numgrad = computeNumericalGradient(costFunc, params)

    cost, grad = costFunc(params)

    # make sure both grad have the same shape
    grad = grad.reshape(numgrad.shape)
    print(np.c_[numgrad.ravel(), grad.ravel()])
    print('The above two columns you get should be very similar. '
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your cost function implementation is correct, then \n the relative difference '
          'will be small (less than 1e-9). '
          '\n \nRelative Difference: \n', diff)
def checkNNGradients(lmbda):
	#CHECKNNGRADIENTS Creates a small neural network to check the
	#backpropagation gradients
	#   CHECKNNGRADIENTS(lmbda) Creates a small neural network to check the
	#   backpropagation gradients, it will output the analytical gradients
	#   produced by your backprop code and the numerical gradients (computed
	#   using computeNumericalGradient). These two gradient computations should
	#   result in very similar values.
	#
	if not 'lmbda' in locals():
	    lmbda = 0

	input_layer_size = 3
	hidden_layer_size = 5
	num_labels = 3
	m = 5

	# We generate some 'random' test data
	Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
	Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
	# Reusing debugInitializeWeights to generate X
	X  = debugInitializeWeights(m, input_layer_size - 1)
	y  = (np.reshape(np.mod(range(0,m), num_labels), (1,m))).flatten()

	# Unroll parameters
	nn_params = np.concatenate((Theta1.ravel(), Theta2.ravel()), axis=0)

	# Short hand for cost function
	costFunc = lambda p : nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

	cost, grad = costFunc(nn_params)
	numgrad = computeNumericalGradient(costFunc, nn_params)

	# Visually examine the two gradient computations.  The two columns
	# you get should be very similar.
	print np.concatenate((np.reshape(numgrad, (1, numgrad.size)).T, np.reshape(grad, (1, grad.size)).T), axis=1)
	print 'The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)'

	# Evaluate the norm of the difference between two solutions.  
	# If you have a correct implementation, and assuming you used EPSILON = 0.0001 
	# in computeNumericalGradient.m, then diff below should be less than 1e-9
	diff = np.linalg.norm(numgrad-grad) / np.linalg.norm(numgrad+grad)

	print 'If your backpropagation implementation is correct, then \nthe relative difference will be small (less than 1e-9). \nRelative Difference: {}\n'.format(diff)
def checkCostFunction(*xlambda):
    if len(xlambda) == 0:
        xlambda = 0

    # Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.where(np.random.rand(Y.shape[0], Y.shape[1]) > 0.5)] = 0
    R = np.zeros(np.shape(Y))
    R[np.where(Y != 0)] = 1

    # Run Gradient Checking
    X = np.random.randn(X_t.shape[0], X_t.shape[1])
    Theta = np.random.randn(Theta_t.shape[0], Theta_t.shape[1])
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    # cost function
    def cost_func(p):
        return cCF.cofiCostFunc(p, Y, R, num_users, num_movies, num_features,
                                xlambda)

    nn_params = np.r_[(X.ravel().reshape(num_movies * num_features, 1),
                       Theta.ravel().reshape(num_users * num_features, 1))]
    numgrad = cNG.computeNumericalGradient(cost_func, nn_params)

    cost, grad = cCF.cofiCostFunc(nn_params, Y, R, num_users, num_movies,
                                  num_features, xlambda)

    # Visually examine the two gradient computations.  The two columns you get should be very similar.
    print(np.c_[numgrad, grad])
    print(
        'The above two columns you get should be very similar.\n(Left: Numerical Gradient\tRight: Analytical Gradient)'
    )

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('''If your cost function implementation is correct, then
         the relative difference will be small (less than 1e-9).
         Relative Difference: %.16f);''' % diff)
def checkNNGradients(_lambda=None):
    
    if _lambda == None:
        _lambda = 0

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = (1 + np.arange(m)) % num_labels
    y = y.reshape(-1,1)

    # Unroll parameters
    nn_params = np.append(Theta1.flatten(),Theta2.flatten())

    # Short hand for cost function
    costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size, \
                               num_labels, X, y, _lambda)

    cost, grad = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar. 
    print(grad)
    print(numgrad)
    print('The above two columns you get should be very similar.\n \
            (Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

    # Evaluate the norm of the difference between two solutions.  
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001 
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad)

    print('If your backpropagation implementation is correct, then \n \
          the relative difference will be small (less than 1e-9). \n \
          \nRelative Difference: %g\n'%diff)
def checkCostFunction(_lambda=None):

    if _lambda == None:
        _lambda = 0

    ## Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.where(np.random.rand(Y.shape[0], Y.shape[1]) > 0.5)] = 0
    R = np.zeros(Y.shape)
    R[np.where(Y != 0)] = 1
    R = R.astype(int)

    ## Run Gradient Checking
    X = np.random.randn(X_t.shape[0], X_t.shape[1])
    Theta = np.random.randn(Theta_t.shape[0], Theta_t.shape[1])
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    func = lambda t: cofiCostFunc(t, Y, R, num_users, num_movies, num_features,
                                  _lambda)
    numgrad = computeNumericalGradient(func,
                                       np.append(X.flatten(), Theta.flatten()))

    cost, grad = cofiCostFunc(
        np.append(X.flatten(), Theta.flatten()), \
        Y, R, num_users, num_movies, num_features, _lambda
    )

    print(numgrad)
    print(grad)
    print("The above two columns you get should be very similar.\n \
        (Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n")

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your backpropagation implementation is correct, then \n \
        the relative difference will be small (less than 1e-9). \n \
        \nRelative Difference: %g\n' % diff)
Beispiel #21
0
def checkNNGradients(lambda_value=0):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = 1 + np.transpose(np.mod(range(1, m + 1), num_labels))
    #    y=np.expand_dims(y,axis=1)

    # Unroll parameters
    Theta1_1d = np.reshape(Theta1, Theta1.size, order='F')
    Theta2_1d = np.reshape(Theta2, Theta2.size, order='F')

    nn_params = np.hstack((Theta1_1d, Theta2_1d))

    # Short hand for cost function
    costFunc = lambda p: nnCostFunction(p, input_layer_size, hidden_layer_size,
                                        num_labels, X, y, lambda_value)

    cost, grad = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc,
                                       np.expand_dims(nn_params, axis=1))

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(numgrad, grad)
    print(
        'The above two columns you get should be very similar.\n (Left-Numerical Gradient, Right-(Your) Analytical Gradient)\n\n'
    )

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print(
        'If your backpropagation implementation is correct, then \n the relative difference will be small (less than 1e-9). \n \nRelative Difference: ',
        diff)
Beispiel #22
0
def checkCostFunction(_lambda=0):
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(Y.shape[0], Y.shape[1]) > 0.5] = 0
    R = np.zeros(Y.shape)
    R[Y != 0] = 1

    # Run Gradient Checking
    X = np.random.randn(X_t.shape[0], X_t.shape[1])
    Theta = np.random.randn(Theta_t.shape[0], Theta_t.shape[1])
    num_users = Y.shape[1]
    num_movies = Y.shape[0]
    num_features = Theta_t.shape[1]

    params = np.concatenate(
        (X.reshape(X.size, order='F'), Theta.reshape(Theta.size, order='F')))

    def costFunc(t):
        return cofiCostFunc(t, Y, R, num_users, num_movies, num_features,
                            _lambda, True)

    _, grad = costFunc(params)
    numgrad = computeNumericalGradient(costFunc, params)

    print('Numerical Gradient', 'Analytical Gradient')
    for numerical, analytical in zip(numgrad, grad):
        print(numerical, analytical)

    print('The above two columns you get should be very similar.\n' \
             '(Left Col.: Your Numerical Gradient, Right Col.: Analytical Gradient)')

    diff = Decimal(np.linalg.norm(numgrad - grad)) / Decimal(
        np.linalg.norm(numgrad + grad))
    print('If your backpropagation implementation is correct, then \n' \
             'the relative difference will be small (less than 1e-9). \n' \
             '\nRelative Difference: {:.10E}'.format(diff))
Beispiel #23
0
def checkNNGradients(lmbda=0):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.mod(range(m), num_labels)

    # Unroll parameters
    nn_params = np.hstack((Theta1.flatten(), Theta2.flatten()))

    # Short hand for cost function
    costFunc = lambda nn_params: nnCostFunction(
        nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda
    )

    cost, grad = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(np.vstack((numgrad, grad)).T)
    print('The above two columns you get should be very similar.')
    print('(Left-Your Numerical Gradient, Right-Analytical Gradient)')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print('If your backpropagation implementation is correct, then')
    print('the relative difference will be small (less than 1e-9).')
    print('Relative Difference: %g' % (diff))
Beispiel #24
0
def checkCostFunction(lambda_value=0):
    #CHECKCOSTFUNCTION Creates a collaborative filering problem 
    #to check your cost function and gradients
    #   CHECKCOSTFUNCTION(lambda) Creates a collaborative filering problem 
    #   to check your cost function and gradients, it will output the 
    #   analytical gradients produced by your code and the numerical gradients 
    #   (computed using computeNumericalGradient). These two gradient 
    #   computations should result in very similar values.

    ## Create small problem
    X_t = np.random.rand(4, 3)
    Theta_t = np.random.rand(5, 3)

    # Zap out most entries
    Y = np.dot(X_t, Theta_t.T)
    Y[np.random.rand(*Y.shape) > 0.5] = 0
    R = np.zeros(Y.shape)
    R[Y != 0] = 1

    ## Run Gradient Checking
    X = np.random.randn(*X_t.shape)
    Theta = np.random.randn(*Theta_t.shape)
    num_movies, num_users = Y.shape
    num_features = Theta_t.shape[1]

    numgrad = computeNumericalGradient(
        lambda x: cofiCostFunc(x, Y, R, num_users, num_movies, num_features, lambda_value), np.concatenate([X.ravel(), Theta.ravel()]))

    cost, grad = cofiCostFunc(np.concatenate([X.ravel(), Theta.ravel()]), Y, R, num_users, num_movies, num_features, lambda_value)

    print(np.stack([numgrad, grad], axis=1))
    print('The above two columns you get should be very similar.\n(Left-Your Numerical Gradient, Right-Analytical Gradient)\n')

    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)
    print('If your cost function implementation is correct, then \nthe relative difference will be small (less than 1e-9).\nRelative Difference: %g' % diff)

    #end
Beispiel #25
0
def checkNNGradients(lambd):

    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5
    layers = [3, 5, 3]

    # In this point we generate a number of random data
    Theta = []
    Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size))
    Theta.append(debugInitializeWeights(num_labels, hidden_layer_size))

    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.remainder(np.arange(m) + 1, num_labels)

    # Unroll parameters
    nn_params = unroll_params(Theta)

    # Compute Numerical Gradient
    numgrad = computeNumericalGradient(nn_params, layers, X, y, num_labels,
                                       lambd)

    # Compute Analytical Gradient (BackPropagation)
    truegrad = backwards(nn_params, layers, X, y, num_labels, lambd)

    print(np.concatenate(([numgrad], [truegrad]), axis=0).transpose())
    print(
        "The above two columns must be very similar.\n(Left-Numerical Gradient, Right-Analytical Gradient (BackPropagation)\n"
    )

    diff = np.linalg.norm(numgrad - truegrad) / np.linalg.norm(numgrad +
                                                               truegrad)
    print(
        "\nNote: If the implementation of the backpropagation is correct, the relative different must be quite small (less that 1e-09)."
    )
    print("Relative difference: " + str(diff) + "\n")
Beispiel #26
0
def checkNNGradients(lmbda=0):
    input_layer_size = 3
    hidden_layer_size = 5
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    # Reusing debugInitializeWeights to generate X
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = np.mod(range(m), num_labels)

    # Unroll parameters
    nn_params = np.hstack((Theta1.flatten(), Theta2.flatten()))

    # Short hand for cost function
    costFunc = lambda nn_params: nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lmbda)

    cost, grad = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(np.vstack((numgrad, grad)).T)
    print("The above two columns you get should be very similar.")
    print("(Left-Your Numerical Gradient, Right-Analytical Gradient)")

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.m, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print("If your backpropagation implementation is correct, then")
    print("the relative difference will be small (less than 1e-9).")
    print("Relative Difference: %g" % (diff))
def checkNNGradients(lamda=0):
    input_layer_size = 3
    hidden_layer_size = 3
    num_labels = 3
    m = 5

    # We generate some 'random' test data
    Theta1 = debugInitializeWeights(hidden_layer_size, input_layer_size)
    Theta2 = debugInitializeWeights(num_labels, hidden_layer_size)
    X = debugInitializeWeights(m, input_layer_size - 1)
    y = 1 + np.mod(range(1, m + 1), num_labels).reshape((m, 1))

    # Unroll parameters
    nn_params = np.hstack((Theta1.T.ravel(), Theta2.T.ravel()))

    # Short hand for cost function
    costFunc = lambda p: nnCostFunction(
        p, input_layer_size, hidden_layer_size, num_labels, X, y, lamda)

    cost, grad = costFunc(nn_params)
    numgrad = computeNumericalGradient(costFunc, nn_params)

    # Visually examine the two gradient computations.  The two columns
    # you get should be very similar.
    print(np.vstack((numgrad, grad)).T)
    print('The above two columns you get should be very similar.\n' +
          '(Left-Your Numerical Gradient, Right-Analytical Gradient)')

    # Evaluate the norm of the difference between two solutions.
    # If you have a correct implementation, and assuming you used EPSILON = 0.0001
    # in computeNumericalGradient.py, then diff below should be less than 1e-9
    diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

    print('If your backpropagation implementation is correct, then \n' +
          'the relative difference will be small (less than 1e-9). \n' +
          'Relative Difference: %g\n' % diff)
cost, grad = softmaxCost(theta, numClasses, inputSize, decay, images, labels)

##======================================================================
## STEP 3: Gradient checking
#
#  As with any learning algorithm, you should always check that your
#  gradients are correct before learning the parameters.
#

if FLAGS.debug:
  # First, lets make sure your numerical gradient computation is correct for a
  # simple function.  After you have implemented computeNumericalGradient.py,
  # run the following: 
  checkNumericalGradient()

  numGrad = computeNumericalGradient(lambda x: softmaxCost(x, numClasses, inputSize, decay, images, labels),
                                     theta)

  # Use this to visually compare the gradients side by side.
  print(np.stack((numGrad, grad)).T)

  # Compare numerically computed gradients with those computed analytically.
  diff = norm(numGrad - grad) / norm(numGrad + grad)
  print(diff)
  sys.exit(1)
  # The difference should be small. 
  # In our implementation, these values are usually less than 1e-7.

  # When your gradients are correct, congratulations!
                                    
##======================================================================
## STEP 4: Learning parameters
Beispiel #29
0
# Hint: If you are debugging your code, performing gradient checking on smaller
# models and smaller training sets (e.g., using only 10 training examples and
# 1-2  hidden units) may speed things up.

# First, lets make sure your numerical gradient computation is correct for a
# simple function.  After you have implemented computeNumericalGradient.m,
# run the following:

from checkNumericalGradient import checkNumericalGradient
checkNumericalGradient()

# Now we can use it to check your cost function and derivative calculations
# for the sparse autoencoder.
from computeNumericalGradient import computeNumericalGradient
numgrad = computeNumericalGradient(lambda x:sparseAutoencoderCost(x, visibleSize=visibleSize,
                                                  hiddenSize=hiddenSize, lam=lambdaval,
                                                  sparsityParam=sparsityParam, beta=beta,
                                                  data=patches)[0], theta)

# Use this to visually compare the gradients side by side
print [numgrad, grad]

# Compare numerically computed gradients with the ones obtained from backpropagation
diff = np.linalg.norm(numgrad-grad)*1.0/np.linalg.norm(numgrad+grad)
print 'the difference in the gradients is: ', diff
# print diff # Should be small. In our implementation, these values are
            # usually less than 1e-9.


            # When you got this working, Congratulations!!!

##======================================================================
Beispiel #30
0
W = 0.01 * randn(numClasses, inputSize)
b = np.zeros((numClasses, 1))

##======================================================================
## STEP 2: Gradient checking
#
#  As with any learning algorithm, you should always check that your
#  gradients are correct before learning the parameters.
#

if FLAGS.debug:
    decay = 0.001
    cost, dW, db = softmaxCost(W, b, numClasses, inputSize, decay, instances,
                               labels)
    W_numGrad = computeNumericalGradient(
        lambda x: softmaxCost(x, b, numClasses, inputSize, decay, instances,
                              labels), W)

    # Use this to visually compare the gradients side by side.
    print(np.stack((W_numGrad.ravel(), dW.ravel())).T)

    # Compare numerically computed gradients with those computed analytically.
    diff = norm(W_numGrad - dW) / norm(W_numGrad + dW)
    print(diff)
    sys.exit(0)
    # The difference should be small.
    # In our implementation, these values are usually less than 1e-7.

##======================================================================
## STEP 3: Learning parameters
#
Beispiel #31
0
def softmax_scipy():

    FLAGS = parse_args()

    # Initiliaze values
    inputSize = 28 * 28  # Size of input vector (MNIST images are 28x28)
    numClasses = 10  # Number of classes (MNIST images fall into 10 classes)
    decay = 1e-4  # Weight decay parameter

    # Load training data
    images = np.load(FLAGS.input_data_dir + 'train-images.npy')
    labels = np.load(FLAGS.input_data_dir + 'train-labels.npy')
    print("\n\n For MNIST train data")
    print("images.shape = {}".format(images.shape))  # (784, 55000)
    print("labels.shape = {}".format(labels.shape))  # (55000,)
    print("\n\n")

    # -------------------------------------------------------
    # Create data for debugging
    if FLAGS.debug:
        inputSize = 8
        np.random.seed(100)
        images = randn(8, 100)
        labels = randint(0, 10, 100, dtype=np.uint8)

    # Randomly initialise theta (theta is 1d array)
    np.random.seed(100)
    theta_init = 0.005 * randn(numClasses * inputSize)

    # Get cost and grad
    cost, grad = softmaxCost(theta, numClasses, inputSize, decay, images,
                             labels)

    # ---------------- debug: Gradient Checking Start ------------------------
    if FLAGS.debug:
        checkNumericalGradient()

        numGrad = computeNumericalGradient(
            lambda x: softmaxCost(x, numClasses, inputSize, decay, images,
                                  labels), theta)

        # Use this to visually compare the gradients side by side.
        print(np.stack((numGrad, grad)).T)

        # Compare numerically computed gradients with those computed analytically.
        diff = norm(numGrad - grad) / norm(numGrad + grad)
        print(diff)
        sys.exit(1)
    # ---------------- debug: Gradient Checking End ------------------------
    max_iters = 2000
    learning_rate = 0.1
    batchsize = 100
    batches = shuffle_and_split(images, labels, batchsize)
    # print("batches[0].shape = {}".format(batches[0].shape)) # (100, 785)

    # Fit the parameter theta
    theta, cost_lst = minibatch_grad_desc(theta_init, max_iters, batches,
                                          numClasses, inputSize, decay,
                                          learning_rate, batchsize)

    # Test the data
    images = np.load(FLAGS.input_data_dir + 'test-images.npy')
    labels = np.load(FLAGS.input_data_dir + 'test-labels.npy')
    print("\n\n For MNIST test data")
    print("images.shape = {}".format(images.shape))  # (784, 10000)
    print("labels.shape = {}".format(labels.shape))  # (10000,)
    print("\n\n")

    # Get prediction for test data
    theta = np.reshape(theta, (numClasses, inputSize))
    pred = softmaxPredict(theta, images)
    acc = np.mean(labels == pred)
    print('Accuracy: %0.3f%%.' % (acc * 100))  # 92.630%. (for eta = 10)
def run_training(FLAGS, patches):
  ##======================================================================
  ## STEP 1: Here we provide the relevant parameters values that will
  #  allow your sparse autoencoder to get good filters; you do not need to 
  #  change the parameters below.
  
  visibleSize = FLAGS.visibleSize  # number of input units 
  hiddenSize = FLAGS.hiddenSize    # number of hidden units 
  sparsityParam = FLAGS.rho        # desired average activation \rho of the hidden units.
  decay = FLAGS.decay              # weight decay parameter       
  beta = FLAGS.beta                # weight of sparsity penalty term
  
  #  Obtain random parameters theta
  theta = initializeParameters(hiddenSize, visibleSize)
  
  ##======================================================================
  ## STEP 2: Implement sparseAutoencoderCost
  #
  #  You can implement all of the components (squared error cost, weight decay term,
  #  sparsity penalty) in the cost function at once, but it may be easier to do 
  #  it step-by-step and run gradient checking (see STEP 3) after each step.  We 
  #  suggest implementing the sparseAutoencoderCost function using the following steps:
  #
  #  (a) Implement forward propagation in your neural network, and implement the 
  #      squared error term of the cost function.  Implement backpropagation to 
  #      compute the derivatives.   Then (using lambda=beta=0), run Gradient Checking 
  #      to verify that the calculations corresponding to the squared error cost 
  #      term are correct.
  #
  #  (b) Add in the weight decay term (in both the cost function and the derivative
  #      calculations), then re-run Gradient Checking to verify correctness. 
  #
  #  (c) Add in the sparsity penalty term, then re-run Gradient Checking to 
  #      verify correctness.
  #
  #  Feel free to change the training settings when debugging your
  #  code.  (For example, reducing the training set size or 
  #  number of hidden units may make your code run faster; and setting beta 
  #  and/or lambda to zero may be helpful for debugging.)  However, in your 
  #  final submission of the visualized weights, please use parameters we 
  #  gave in Step 0 above.
  
  cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
                                     sparsityParam, beta, patches)
  
  ##======================================================================
  ## STEP 3: Gradient Checking
  #
  # Hint: If you are debugging your code, performing gradient checking on smaller models 
  # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
  # units) may speed things up.
  
  
  if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \
                                       sparsityParam, beta, patches)
    numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta)
  
    # Use this to visually compare the gradients side by side
    print(np.stack((numGrad, grad)).T)
  
    # Compare numerically computed gradients with the ones obtained from backpropagation
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff) # Should be small. In our implementation, these values are
                # usually less than 1e-9.
    sys.exit(1) # When you got this working, Congratulations!!!
    
  
  ##======================================================================
  ## STEP 4: After verifying that your implementation of
  #  sparseAutoencoderCost is correct, You can start training your sparse
  #  autoencoder with minFunc (L-BFGS).
  
  #  Randomly initialize the parameters.
  theta = initializeParameters(hiddenSize, visibleSize)
  
  #  Use L-BFGS to minimize the function.
  theta, _, _ = fmin_l_bfgs_b(sparseAutoencoderCost, theta,
                              args = (visibleSize, hiddenSize, decay, sparsityParam, beta, patches),
                              maxiter = 400, disp = 1)

  # save the learned parameters to external file
  pickle.dump(theta, open(FLAGS.log_dir + '/' + FLAGS.params_file, 'wb'))
  
  ##======================================================================
  ## STEP 5: Visualization 
  
  # Fold W1 parameters into a matrix format.
  W1 = np.reshape(theta[:hiddenSize * visibleSize], (hiddenSize, visibleSize))
  
  # Save the visualization to a file.
  displayNetwork(W1.T, file_name = 'weights_digits.jpg')

  return theta
#  Obtain random parameters theta
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models 
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden 
# units) may speed things up.


if FLAGS.debug:
  # Check your cost function and derivative calculations for the sparse autoencoder.
  cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay, \
                                     sparsityParam, beta, patches)
  numGrad = computeNumericalGradient(lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay, sparsityParam, beta, patches), theta)

  # Use this to visually compare the gradients side by side
  print(np.stack((numGrad, grad)).T)

  # Compare numerically computed gradients with the ones obtained from backpropagation
  diff = norm(numGrad - grad) / norm(numGrad + grad)
  print(diff) # Should be small. In our implementation, these values are
              # usually less than 1e-9.
  sys.exit(1) # When you got this working, Congratulations!!!
  

##======================================================================
## After verifying that your implementation of sparseAutoencoderCost is 
# correct, You can start training your sparse autoencoder with minFunc (L-BFGS).
theta = initializeParameters(hiddenSize, visibleSize)

##======================================================================
## Gradient Checking
#
# Hint: If you are debugging your code, performing gradient checking on smaller models
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden
# units) may speed things up.

if FLAGS.debug:
    # Now we can use it to check your cost function and derivative calculations
    # for the sparse autoencoder.
    cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, decay,
                                       sparsityParam, beta, patches)
    numGrad = computeNumericalGradient(
        lambda x: sparseAutoencoderCost(x, visibleSize, hiddenSize, decay,
                                        sparsityParam, beta, patches), theta)

    # Use this to visually compare the gradients side by side
    print(np.stack((numGrad, grad)).T)

    # Compare numerically computed gradients with the ones obtained from backpropagation
    diff = norm(numGrad - grad) / norm(numGrad + grad)
    print(diff)  # Should be small. In our implementation, these values are
    # usually less than 1e-9.
    sys.exit(1)  # When you got this working, Congratulations!!!

##======================================================================
## After verifying that your implementation of sparseAutoencoderCost is
# correct, You can start training your sparse autoencoder with L-BFGS.
Beispiel #35
0
# Randomly initialize parameters
params = nn1Layer.initParams(n_x, n_h, n_y)

##======================================================================
## STEP 2: Gradient checking
#
#  As with any learning algorithm, you should always check that your
#  gradients are correct before learning the parameters.
#

if FLAGS.debug:
    a3, cache = nn1Layer.forward(X, params)
    dParams = nn1Layer.backward(X, y, params, cache, decay)

    dNumParams = computeNumericalGradient(
        lambda p: nn1Layer.cost(X, y, p, decay), params)

    rdParams = nn1Layer.ravelGrads(dParams)
    rdnParams = nn1Layer.ravelGrads(dNumParams)

    # Use this to visually compare the gradients side by side.
    print(rdnParams.shape)
    print(rdParams.shape)
    print(np.stack((rdnParams, rdParams)).T)

    # Compare numerically computed gradients with those computed analytically.
    diff = norm(rdnParams - rdParams) / norm(rdnParams + rdParams)
    print(diff)
    sys.exit(0)
    # The difference should be small.
    # In our implementation, these values are usually less than 1e-7.
cost, grad = softmaxCost(theta, numClasses, inputSize, decay, images, labels)

##======================================================================
## STEP 3: Gradient checking
#
#  As with any learning algorithm, you should always check that your
#  gradients are correct before learning the parameters.
#

if FLAGS.debug:
  # First, lets make sure your numerical gradient computation is correct for a
  # simple function.  After you have implemented computeNumericalGradient.py,
  # run the following: 
  checkNumericalGradient()
  
  numGrad = computeNumericalGradient(lambda x: softmaxCost(x, numClasses, inputSize, decay, images, labels),
                                     theta)

  # Use this to visually compare the gradients side by side.
  print(np.stack((numGrad, grad)).T)

  # Compare numerically computed gradients with those computed analytically.
  diff = norm(numGrad - grad) / norm(numGrad + grad)
  print(diff)
  sys.exit(1)
  # The difference should be small. 
  # In our implementation, these values are usually less than 1e-7.

  # When your gradients are correct, congratulations!
                                    
##======================================================================
## STEP 4: Learning parameters
Beispiel #37
0
cost, grad = sparseAutoencoderCost(theta, visibleSize, hiddenSize, _lambda,
                                   sparsityParam, beta, patches.T)

##======================================================================
## STEP 3: Gradient Checking

# Hint: If you are debugging your code, performing gradient checking on smaller models
# and smaller training sets (e.g., using only 10 training examples and 1-2 hidden
# units) may speed things up.

# Short hand for cost function
costFunc = lambda p: sparseAutoencoderCost(
    p, visibleSize, hiddenSize, _lambda, sparsityParam, beta, patches.T[0:10])

cost, grad = costFunc(theta)
numgrad = computeNumericalGradient(costFunc, theta)

# Visually examine the two gradient computations.  The two columns
# you get should be very similar.
print(grad)
print(numgrad)
print('The above two columns you get should be very similar.\n \
(Left-Your Numerical Gradient, Right-Analytical Gradient)\n\n')

# Evaluate the norm of the difference between two solutions.
# If you have a correct implementation, and assuming you used EPSILON = 0.0001
# in computeNumericalGradient.m, then diff below should be less than 1e-9
diff = np.linalg.norm(numgrad - grad) / np.linalg.norm(numgrad + grad)

print('If your backpropagation implementation is correct, then \n \
        the relative difference will be small (less than 1e-9). \n \