def main(testing=False): # STEP 0: Parameters patchsize = 8 num_patches = testing and 10 or 10000 visible_size = patchsize * patchsize hidden_size = testing and 3 or 25 target_activation = 0.01 lamb = 0.0001 beta = 3 # STEP 1: Get data patches = sampleIMAGES(patchsize, num_patches) util.display_network(patches[:, np.random.randint(0, num_patches, 200)]) theta = autoencoder.initialize_parameters(hidden_size, visible_size) # STEP 2: Implement sparseAutoencoderLoss def sal(theta): return autoencoder.sparse_autoencoder_loss(theta, visible_size, hidden_size, lamb, target_activation, beta, patches) loss, grad = sal(theta) # STEP 3: Gradient Checking if testing: numgrad = util.compute_numerical_gradient(lambda x: sal(x)[0], theta) # Eyeball the gradients print np.hstack([numgrad, grad]) diff = linalg.norm(numgrad - grad) / linalg.norm(numgrad + grad) print "Normed difference: %f" % diff # STEP 4: Run sparse_autoencoder_loss with L-BFGS # Initialize random theta theta = autoencoder.initialize_parameters(hidden_size, visible_size) print "Starting..." x, f, d = scipy.optimize.fmin_l_bfgs_b(sal, theta, maxfun=400, iprint=25, m=20) print "Done" print x print f print d W1, W2, b1, b2 = autoencoder.unflatten(x, visible_size, hidden_size) print "W1.shape=%s" % (W1.shape, ) util.display_network(W1.T)
def main(testing=False): # STEP 0: Parameters patchsize = 8 num_patches = testing and 10 or 10000 visible_size = patchsize * patchsize hidden_size = testing and 3 or 25 target_activation = 0.01 lamb = 0.0001 beta = 3 # STEP 1: Get data patches = sampleIMAGES(patchsize, num_patches) util.display_network(patches[:,np.random.randint(0, num_patches, 200)]) theta = autoencoder.initialize_parameters(hidden_size, visible_size) # STEP 2: Implement sparseAutoencoderLoss def sal(theta): return autoencoder.sparse_autoencoder_loss(theta, visible_size, hidden_size, lamb, target_activation, beta, patches) loss, grad = sal(theta) # STEP 3: Gradient Checking if testing: numgrad = util.compute_numerical_gradient(lambda x: sal(x)[0], theta) # Eyeball the gradients print np.hstack([numgrad, grad]) diff = linalg.norm(numgrad-grad) / linalg.norm(numgrad+grad) print "Normed difference: %f" % diff # STEP 4: Run sparse_autoencoder_loss with L-BFGS # Initialize random theta theta = autoencoder.initialize_parameters(hidden_size, visible_size) print "Starting..." x, f, d = scipy.optimize.fmin_l_bfgs_b(sal, theta, maxfun=400, iprint=25, m=20) print "Done" print x print f print d W1, W2, b1, b2 = autoencoder.unflatten(x, visible_size, hidden_size) print "W1.shape=%s" % (W1.shape,) util.display_network(W1.T)
# Implement softmaxCost in [softmax.cost()](softmax.html#section-1). cost, grad = softmax.cost(theta, num_classes, input_size, lamb, input_data, labels) # === Step 3: Gradient checking === # # As with any learning algorithm, always check that the gradients are # correct before learning the parameters. # Cost function def cost_func(x): return softmax.cost(x, num_classes, input_size, lamb, input_data, labels)[0] # For testing… if False: num_grad = util.compute_numerical_gradient(cost_func, theta) num_grad = num_grad.ravel('F') # Visually compare the gradients side by side print np.vstack([grad, num_grad]).T # Compare numerically computed gradients with those computed analytically diff = linalg.norm(num_grad - grad) / linalg.norm(num_grad + grad); print(diff) # === Step 4: Learning parameters === # # Once the gradients are correct, we start training using # [softmax.train()](softmax.html#section-5). softmax_model = softmax.train(input_size, num_classes, lamb,