def main(testing=False): # STEP 0: Parameters patchsize = 8 num_patches = testing and 10 or 10000 visible_size = patchsize * patchsize hidden_size = testing and 3 or 25 target_activation = 0.01 lamb = 0.0001 beta = 3 # STEP 1: Get data patches = sampleIMAGES(patchsize, num_patches) util.display_network(patches[:, np.random.randint(0, num_patches, 200)]) theta = autoencoder.initialize_parameters(hidden_size, visible_size) # STEP 2: Implement sparseAutoencoderLoss def sal(theta): return autoencoder.sparse_autoencoder_loss(theta, visible_size, hidden_size, lamb, target_activation, beta, patches) loss, grad = sal(theta) # STEP 3: Gradient Checking if testing: numgrad = util.compute_numerical_gradient(lambda x: sal(x)[0], theta) # Eyeball the gradients print np.hstack([numgrad, grad]) diff = linalg.norm(numgrad - grad) / linalg.norm(numgrad + grad) print "Normed difference: %f" % diff # STEP 4: Run sparse_autoencoder_loss with L-BFGS # Initialize random theta theta = autoencoder.initialize_parameters(hidden_size, visible_size) print "Starting..." x, f, d = scipy.optimize.fmin_l_bfgs_b(sal, theta, maxfun=400, iprint=25, m=20) print "Done" print x print f print d W1, W2, b1, b2 = autoencoder.unflatten(x, visible_size, hidden_size) print "W1.shape=%s" % (W1.shape, ) util.display_network(W1.T)
def main(testing=False): # STEP 0: Parameters patchsize = 8 num_patches = testing and 10 or 10000 visible_size = patchsize * patchsize hidden_size = testing and 3 or 25 target_activation = 0.01 lamb = 0.0001 beta = 3 # STEP 1: Get data patches = sampleIMAGES(patchsize, num_patches) util.display_network(patches[:,np.random.randint(0, num_patches, 200)]) theta = autoencoder.initialize_parameters(hidden_size, visible_size) # STEP 2: Implement sparseAutoencoderLoss def sal(theta): return autoencoder.sparse_autoencoder_loss(theta, visible_size, hidden_size, lamb, target_activation, beta, patches) loss, grad = sal(theta) # STEP 3: Gradient Checking if testing: numgrad = util.compute_numerical_gradient(lambda x: sal(x)[0], theta) # Eyeball the gradients print np.hstack([numgrad, grad]) diff = linalg.norm(numgrad-grad) / linalg.norm(numgrad+grad) print "Normed difference: %f" % diff # STEP 4: Run sparse_autoencoder_loss with L-BFGS # Initialize random theta theta = autoencoder.initialize_parameters(hidden_size, visible_size) print "Starting..." x, f, d = scipy.optimize.fmin_l_bfgs_b(sal, theta, maxfun=400, iprint=25, m=20) print "Done" print x print f print d W1, W2, b1, b2 = autoencoder.unflatten(x, visible_size, hidden_size) print "W1.shape=%s" % (W1.shape,) util.display_network(W1.T)
def main(testing=True): images = mnist.load_images('../data/train-images-idx3-ubyte') # 784 x 60000 labels = mnist.load_labels('../data/train-labels-idx1-ubyte') # 60000 x 1 util.display_network(images[:,0:100]) # Show the first 100 images visible_size = 28*28 hidden_size = 196 sparsity_param = 0.1 lamb = 3e-3 beta = 3 patches = images[:,0:10000] theta = autoencoder.initialize_parameters(hidden_size, visible_size) def sal(theta): return autoencoder.sparse_autoencoder_loss(theta, visible_size, hidden_size, lamb, sparsity_param, beta, patches) x, f, d = scipy.optimize.fmin_l_bfgs_b(sal, theta, maxfun=400, iprint=1, m=20) W1, W2, b1, b2 = autoencoder.unflatten(x, visible_size, hidden_size) util.display_network(W1.T)
# sae1_opt_theta, loss = minFunc( @(p) sparseAutoencoderLoss(p, ... # inputSize, hiddenSizeL1, ... # lambda, sparsityParam, ... # beta, trainData), ... # sae1Theta, options); fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, input_size, hidden_size_l1, lamb, sparsity_param, beta, train_data) sae1_opt_theta, loss, d = (scipy.optimize.fmin_l_bfgs_b(fn, sae1_theta, maxfun=maxfun, iprint=1)) if DISPLAY: W1, W2, b1, b2 = autoencoder.unflatten(sae1_opt_theta, input_size, hidden_size_l1) util.display_network(W1.T) # === Step 3: Train the second sparse autoencoder === # # Train the second sparse autoencoder on the first autoencoder features. sae1_features = autoencoder.feedforward_autoencoder(sae1_opt_theta, hidden_size_l1, input_size, train_data) # Randomly initialize the parameters sae2_theta = autoencoder.initialize_parameters(hidden_size_l2, hidden_size_l1) fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, hidden_size_l1, hidden_size_l2, lamb, sparsity_param, beta, sae1_features)
testData = labeledData[:, numOfTrain:] testLabels = labels[numOfTrain:] # Output some statistics print '# examples in unlabeled set: %d' % unlabeledData.shape[1] print '# examples in supervised training set: %d' % trainData.shape[1] print '# examples in supervised testing set: %d' % testData.shape[1] # Randomly initialize the parameters theta = autoencoder.initializeParameters(hiddenSize, inputSize) fn = lambda theta: autoencoder.sparseAutoencoderCost(theta, inputSize, hiddenSize, lamb, sparsityParam, beta, unlabeledData) optTheta, cost, d = scipy.optimize.fmin_l_bfgs_b(fn, theta, maxfun=maxfun, iprint=1, m=20) W1, W2, b1, b2 = autoencoder.unflatten(optTheta, inputSize, hiddenSize) np.savez("result.npz", W1 = W1) # util.display_network(W1.T) # trainFeatures = autoencoder.feedforwardAutoencoder(optTheta, hiddenSize, inputSize, trainData) # testFeatures = autoencoder.feedforwardAutoencoder(optTheta, hiddenSize, inputSize, testData) # lamb = 1e-4 # numOfClasses = len(set(trainLabels)) # softmaxModel = softmax.train(hiddenSize, numOfClasses, lamb, trainFeatures, trainLabels, maxfun=100) # pred = softmax.predict(softmaxModel, testFeatures) # acc = (testLabels == pred).mean() # print 'Accuracy: %0.3f' % (acc * 100)
# Train the first layer sparse autoencoder. This layer has a hidden # size of `hidden_size_l1`. # sae1_opt_theta, loss = minFunc( @(p) sparseAutoencoderLoss(p, ... # inputSize, hiddenSizeL1, ... # lambda, sparsityParam, ... # beta, trainData), ... # sae1Theta, options); fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, input_size, hidden_size_l1, lamb, sparsity_param, beta, train_data ) sae1_opt_theta, loss, d = scipy.optimize.fmin_l_bfgs_b(fn, sae1_theta, maxfun=maxfun, iprint=1) if DISPLAY: W1, W2, b1, b2 = autoencoder.unflatten(sae1_opt_theta, input_size, hidden_size_l1) util.display_network(W1.T) # === Step 3: Train the second sparse autoencoder === # # Train the second sparse autoencoder on the first autoencoder features. sae1_features = autoencoder.feedforward_autoencoder(sae1_opt_theta, hidden_size_l1, input_size, train_data) # Randomly initialize the parameters sae2_theta = autoencoder.initialize_parameters(hidden_size_l2, hidden_size_l1) fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, hidden_size_l1, hidden_size_l2, lamb, sparsity_param, beta, sae1_features ) sae2_opt_theta, loss, d = scipy.optimize.fmin_l_bfgs_b(fn, sae2_theta, maxfun=maxfun, iprint=1)
# # This trains the sparse autoencoder on the unlabeled training images. # Randomly initialize the parameters theta = autoencoder.initialize_parameters(hidden_size, input_size) # The single-parameter function to minimize fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, input_size, hidden_size,lamb, sparsity_param, beta, unlabeled_data) # Find `opt_theta` by running the sparse autoencoder on unlabeled # training images. opt_theta, loss, d = ( scipy.optimize.fmin_l_bfgs_b(fn, theta, maxfun=maxfun, iprint=1, m=20)) # Visualize weights W1, W2, b1, b2 = autoencoder.unflatten(opt_theta, input_size, hidden_size) util.display_network(W1.T) # === Step 3: Extract Features from the Supervised Dataset === train_features = autoencoder.feedforward_autoencoder( opt_theta, hidden_size, input_size, train_data) test_features = autoencoder.feedforward_autoencoder( opt_theta, hidden_size, input_size, test_data) # === Step 4: Train the softmax classifier === lamb = 1e-4 num_classes = len(set(train_labels)) softmax_model = softmax.train(hidden_size, num_classes, lamb, train_features, train_labels, maxfun=100) # === Step 5: Testing ===
# Randomly initialize the parameters theta = autoencoder.initialize_parameters(hidden_size, input_size) # The single-parameter function to minimize fn = lambda theta: autoencoder.sparse_autoencoder_loss( theta, input_size, hidden_size, lamb, sparsity_param, beta, unlabeled_data) # Find `opt_theta` by running the sparse autoencoder on unlabeled # training images. opt_theta, loss, d = (scipy.optimize.fmin_l_bfgs_b(fn, theta, maxfun=maxfun, iprint=1, m=20)) # Visualize weights W1, W2, b1, b2 = autoencoder.unflatten(opt_theta, input_size, hidden_size) util.display_network(W1.T) # === Step 3: Extract Features from the Supervised Dataset === train_features = autoencoder.feedforward_autoencoder(opt_theta, hidden_size, input_size, train_data) test_features = autoencoder.feedforward_autoencoder(opt_theta, hidden_size, input_size, test_data) # === Step 4: Train the softmax classifier === lamb = 1e-4 num_classes = len(set(train_labels)) softmax_model = softmax.train(hidden_size, num_classes, lamb, train_features,
sparsityParam = 0.1 lamb = 3e-3 beta = 3 trainData = mnist.load_images('../data/train-images-idx3-ubyte') trainLabels = mnist.load_labels('../data/train-labels-idx1-ubyte') # Train the first sparse autoencoder. # Randomly initialize the parameters sae1Theta = autoencoder.initializeParameters(hiddenSizeL1, inputSize) fn = lambda theta: autoencoder.sparseAutoencoderCost(theta, inputSize, hiddenSizeL1, lamb, sparsityParam, beta, trainData) sae1OptTheta, loss, d = scipy.optimize.fmin_l_bfgs_b(fn, sae1Theta, maxfun=maxfun, iprint=1) if DISPLAY: W1, W2, b1, b2 = autoencoder.unflatten(sae1OptTheta, inputSize, hiddenSizeL1) util.display_network(W1.T) sae1Features = autoencoder.feedforwardAutoencoder(sae1OptTheta, hiddenSizeL1, inputSize, trainData) # Train the second sparse autoencoder. # Randomly initialize the parameters sae2Theta = autoencoder.initializeParameters(hiddenSizeL2, hiddenSizeL1) fn = lambda theta: autoencoder.sparseAutoencoderCost(theta, hiddenSizeL1, hiddenSizeL2, lamb, sparsityParam, beta, sae1Features) sae2OptTheta, loss, d = scipy.optimize.fmin_l_bfgs_b(fn, sae2Theta, maxfun=maxfun, iprint=1) if DISPLAY: W11, W21, b11, b21 = autoencoder.unflatten(sae1OptTheta, inputSize, hiddenSizeL1) W12, W22, b12, b22 = autoencoder.unflatten(sae2OptTheta, hiddenSizeL1, hiddenSizeL2) # figure out how to display a 2-level network