inputSize = 4 hiddenSize = 5 lambdaParam = 0.01 data = random.normal(size=(5, inputSize)) labels = array([0, 1, 0, 1, 0]) numClasses = 2 stack = [Layer(1), Layer(2)] stack[0].W = 0.1 * random.normal(size=(3, inputSize)) stack[0].b = zeros(3) stack[1].W = 0.1 * random.normal(size=(hiddenSize, 3)) stack[1].b = zeros(hiddenSize) softmaxTheta = 0.005 * random.normal(size=hiddenSize * numClasses) (stackParams, netConfig) = stack2params(stack) stackedAETheta = concatenate([softmaxTheta, stackParams]) def stackedAutoencoderCostCallback(x): return cost(x, inputSize, hiddenSize, numClasses, netConfig, lambdaParam, data, labels) (cost_value, grad) = stackedAutoencoderCostCallback(stackedAETheta) numgrad = computeNumericalGradient(stackedAutoencoderCostCallback, stackedAETheta) diff = linalg.norm(numgrad-grad)/linalg.norm(numgrad+grad) print('%s' % diff) print('Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n')
return argmax(h_data, axis=0) if __name__ == "__main__": """ Check correctness of implemenation of softmax cost function using gradient check """ numClasses = 10 # Number of classes (MNIST images fall into 10 classes) lambdaParam = 1e-4 # Weight decay parameter inputSize = 8 inputData = random.normal(size=(100, inputSize)) labels = random.randint(10, size=100) def softmaxCostCallback(x): return cost(x, numClasses, inputSize, lambdaParam, inputData, labels) # Randomly initialise theta thetaParam = 0.005 * random.normal(size=numClasses * inputSize) (cost_value, grad) = softmaxCostCallback(thetaParam) numGrad = computeNumericalGradient(softmaxCostCallback, thetaParam) # Compare numerically computed gradients with those computed analytically diff = linalg.norm(numGrad - grad) / linalg.norm(numGrad + grad) print('%s' % diff) print( 'Norm of the difference between numerical and analytical gradient (should be < 1e-7)\n\n' )
h_data = exp(thetaParam.dot(data.T)) h_data = h_data / sum(h_data, 0) return argmax(h_data, axis=0) if __name__ == "__main__": """ Check correctness of implemenation of softmax cost function using gradient check """ numClasses = 10 # Number of classes (MNIST images fall into 10 classes) lambdaParam = 1e-4 # Weight decay parameter inputSize = 8 inputData = random.normal(size=(100,inputSize)) labels = random.randint(10, size=100) def softmaxCostCallback(x): return cost(x, numClasses, inputSize, lambdaParam, inputData, labels) # Randomly initialise theta thetaParam = 0.005 * random.normal(size=numClasses * inputSize) (cost_value, grad) = softmaxCostCallback(thetaParam) numGrad = computeNumericalGradient(softmaxCostCallback, thetaParam) # Compare numerically computed gradients with those computed analytically diff = linalg.norm(numGrad-grad)/linalg.norm(numGrad+grad) print('%s' % diff) print('Norm of the difference between numerical and analytical gradient (should be < 1e-7)\n\n')
return theta if __name__ == "__main__": """ Check correctness of implemenation of sparse_autoencoder cost function using gradient check """ patchSize = 8 visibleSize = patchSize * patchSize # number of input units hiddenSize = 25 # number of hidden units sparsityParam = 0.01 # desired average activation of the hidden units. lambdaParam = 0.0001 # weight decay parameter betaParam = 3 # weight of sparsity penalty term patches = getPatches(numPatches=10, patchSize=patchSize) # Obtain random parameters theta thetaParam = initializeParameters(hiddenSize, visibleSize) def sparseAutoencoderCostCallback(x): return cost(x, visibleSize, hiddenSize, lambdaParam, sparsityParam, betaParam, patches) (cost_value, grad) = sparseAutoencoderCostCallback(thetaParam) numgrad = computeNumericalGradient(sparseAutoencoderCostCallback, thetaParam) diff = linalg.norm(numgrad - grad) / linalg.norm(numgrad + grad) print("%s" % diff) print("Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n")
if __name__ == "__main__": """ Check correctness of implemenation of sparse_autoencoder cost function using gradient check """ patchSize = 8 visibleSize = patchSize * patchSize # number of input units hiddenSize = 25 # number of hidden units sparsityParam = 0.01 # desired average activation of the hidden units. lambdaParam = 0.0001 # weight decay parameter betaParam = 3 # weight of sparsity penalty term patches = getPatches(numPatches=10, patchSize=patchSize) # Obtain random parameters theta thetaParam = initializeParameters(hiddenSize, visibleSize) def sparseAutoencoderCostCallback(x): return cost(x, visibleSize, hiddenSize, lambdaParam, sparsityParam, betaParam, patches) (cost_value, grad) = sparseAutoencoderCostCallback(thetaParam) numgrad = computeNumericalGradient(sparseAutoencoderCostCallback, thetaParam) diff = linalg.norm(numgrad - grad) / linalg.norm(numgrad + grad) print('%s' % diff) print( 'Norm of the difference between numerical and analytical gradient (should be < 1e-9)\n\n' )