def raw_interpolation(one, two, count): images=np.empty([784,count]) for ii in xrange(count): current_patch=((patches[:,one]*ii)+(patches[:,two]*(count-ii)))/count images[:,ii]=current_patch display_network.display_network('interpolations/raw_interpolation'+str(one)+'_'+str(two)+'.png',images) print "Figure written to 'raw_interpolation.png'"
def deep_interpolation(one,two,count,W1,W2,b1,b2): deep_image1=np.dot(W1,patches[:,one]) deep_image2=np.dot(W1,patches[:,two]) images=np.empty([784,count]) for ii in xrange(count): deep_interpolation_img=((deep_image1*ii)+deep_image2*(count-ii))/count images[:,ii]=np.dot(W2,deep_interpolation_img) display_network.display_network('interpolations/deep_interpolation'+str(one)+'_'+str(two)+'.png',images) print "Figure Written to 'deep_interpolation.png'"
def complex_train_test(): np.random.seed(0) do_images = True train_images = load_MNIST.load_MNIST_images('data/train-images-idx3-ubyte') dsize = 10000 patches = train_images[:, :dsize] fs = [dsize, 28 * 28, 196, 28 * 28] cost, train_op = cost_and_grad(fs=fs, X0=patches, lambda_=3e-3, rho=0.1, beta=3, lr=0.1) sess = tf.get_default_session() u.reset_time() old_cost = sess.run(cost) old_i = 0 frame_count = 0 costs = [] for i in range(2000): cost0, _ = sess.run([cost, train_op]) costs.append(cost0) if i % 100 == 0: print(cost0) # filters are transposed in visualization if ((old_cost - cost0) / old_cost > 0.05 or i - old_i > 50) and do_images: Wf_ = sess.run("Wf_var/read:0") W1_ = u.unflatten_np(Wf_, fs[1:])[0] display_network.display_network(W1_.T, filename="pics/weights-%03d.png" % (frame_count, )) frame_count += 1 old_cost = cost0 old_i = i u.record_time() # u.dump(costs, "costs_adam.csv") u.dump(costs, "costs_adam_bn1.csv") u.summarize_time()
""" Step 0a: Load data Here we provide the code to load natural image data into x. x will be a 144 * 10000 matrix, where the kth column x(:, k) corresponds to the raw image data from the kth 12x12 image patch sampled. You do not need to change the code below. """ # Return patches from sample images x = sample_images_raw('data/IMAGES_RAW.mat') # n is the number of dimensions and m is the number of patches n, m = x.shape random_sel = np.random.randint(0, m, 200) image_x = display_network(x[:, random_sel]) fig = plt.figure() plt.imshow(image_x, cmap=plt.cm.gray) plt.title('Raw patch images') """ Step 0b: Zero-mean the data (by row) """ x -= np.mean(x, axis=1).reshape(-1, 1) """ Step 1a: Implement PCA to obtain x_rot Implement PCA to obtain x_rot, the matrix in which the data is expressed with respect to the eigenbasis of sigma, which is the matrix U.
from scipy.optimize import minimize from initial_params import initial_params from sparse_autoencoder_cost import sparse_autoencoder_cost, sigmoid, der_sigmoid from load_mnist import generate_patch, load_data from display_network import display_network visible_size = 28 * 28 hidden_size = 196 sparsity_param = 0.1 lamda = 0.003 beta = 3 images = np.transpose(load_data())[:, 0:10000] patches = generate_patch() theta = initial_params(visible_size, hidden_size) J = lambda th: sparse_autoencoder_cost( visible_size, hidden_size, th, lambda x: sigmoid(x), lambda x: der_sigmoid(x), lamda, beta, sparsity_param, images ) options_ = {"maxiter": 800, "disp": True} result = minimize(J, theta, method="L-BFGS-B", jac=True, options=options_) opt_theta = result.x print result W1 = opt_theta[0 : hidden_size * visible_size].reshape(hidden_size, visible_size).transpose() display_network(W1)
if slope_ratio < alpha and abs(target_delta) > 1e-6 and adaptive_step: print("%.2f %.2f %.2f" % (cost0, cost1, slope_ratio)) print("Slope optimality %.2f, shrinking learning rate to %.2f" % ( slope_ratio, lr0 * beta, )) sess.run(lr_set, feed_dict={lr_p: lr0 * beta}) else: # see if our learning rate got too conservative, and increase it # 99 was ideal for gradient # if i>0 and i%50 == 0 and slope_ratio>0.99: if i > 0 and i % 50 == 0 and slope_ratio > 0.90 and adaptive_step: print("%.2f %.2f %.2f" % (cost0, cost1, slope_ratio)) print("Growing learning rate to %.2f" % (lr0 * growth_rate)) sess.run(lr_set, feed_dict={lr_p: lr0 * growth_rate}) if do_images and i > 0 and i % 100 == 0: Wf_ = sess.run("Wf_var/read:0") W1_ = u.unflatten_np(Wf_, fs[1:])[0] display_network.display_network(W1_.T, filename="pics/weights-%03d.png" % (frame_count, )) frame_count += 1 old_cost = cost0 old_i = i u.record_time() u.dump(costs, "new%d.csv" % (whitening_mode, )) u.summarize_time()
train_labels = load_MNIST.load_MNIST_labels('data/train-labels-idx1-ubyte') # implement MNIST sparse autoencoder visibleSize = 28 * 28 hiddenSize = 196 sparsityParam = 0.1 lambda_ = 3e-3 beta = 3 patches = train_images[:, :10000] opttheta = scipy.io.loadmat("opttheta.mat")["opttheta"].flatten() W1 = opttheta[:hiddenSize * visibleSize].reshape((hiddenSize, visibleSize), order='F') # also need to transpose individual columns display_network.display_network(transposeImageCols(W1.transpose()), "sample2.jpg") patchesTransposed = transposeImageCols(patches) # J1 = lambda x: sparse_autoencoder.sparse_autoencoder_cost_matlab(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patchesTransposed) # cost,grad=J1(opttheta) # print("My cost1 %.3f" %(cost)) # J2 = lambda x: sparse_autoencoder.sparse_autoencoder_cost(x, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patchesTransposed) # cost,grad=J2(opttheta) # print("My cost2 %.3f" %(cost)) cost3 = sparse_autoencoder.sparse_autoencoder_cost_tf(opttheta, visibleSize, hiddenSize, lambda_, sparsityParam, beta, patchesTransposed) print("My cost4 %.3f" % (cost3))
import functools import numpy as np import pca import sample_images import display_network if __name__=='__main__': num_samples = 10000 num_samples = 10000 m = sample_images.load_matlab_images('IMAGES_RAW.mat') patches = sample_images.sample(m, num_samples, size=(12,12), norm=None) display_network.display_network('raw-patches.png', patches) # ensure that patches have zero mean mean = np.mean(patches, axis=0) patches -= mean assert np.allclose(np.mean(patches, axis=0), np.zeros(patches.shape[1])) U, s, x_rot = pca.pca(patches) covar = pca.covariance(x_rot) display_network.array_to_file('covariance.png', covar) # percentage of variance # cumulative sum pov = np.array(functools.reduce( lambda t,l: t+[t[-1]+l] if len(t) > 0 else [l],
sae_cost = partial(sparse_autoencoder.cost, visible_size=visible_size, hidden_size=hidden_size, weight_decay=weight_decay, beta=beta, sparsity_param=sparsity_param, data=patches) # Train! trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, theta, maxfun=max_iter, m=1000, factr=1.0, pgtol=1e-100, iprint=1) ''' # numerical approximation (way too slow!) trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b( lambda x: sae_cost(x)[0], theta, approx_grad=True, maxfun=max_iter, m=1000, iprint=1) ''' # Save the trained weights W1, W2, b1, b2 = sparse_autoencoder.unflatten_params( trained, hidden_size, visible_size) display_network.display_network('weights.png', W1.T)
# debug (set to True in Ex 3) debug = True # ====================================================================== # Exercise 1: Load MNIST # In this exercise, you will load the mnist dataset # First download the dataset from the following website: # Training Images: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz # Training Labels: http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz # Loading Sample Images # Loading 10K images from MNIST database images = load_MNIST.load_MNIST_images('train-images.idx3-ubyte') patches = images[:, 0:10000] patches = patches[:, 1:200] display_network.display_network(patches[:, 0:100]) # Now you will use the display network function to display different sets if the MNIST dataset # The display is saved in the directory under the name weigths. # Display 10, 50 and 100 datasets ### YOUR CODE HERE ### # display 10 display_network.display_network(patches[:, 0:10], 'weights10.png') # display 50 display_network.display_network(patches[:, 0:50], 'weights50.png') # display 100 display_network.display_network(patches[:, 0:100], 'weights100.png') # Obtain random parameters theta # You need to implement utils_hw.initialize in utils_hw.py
def test_mnist(): '''Test the convolutional autoencder using MNIST.''' # %% # load MNIST as before mnist = input_data.read_data_sets('MNIST_data', one_hot=True) mean_img = np.mean(mnist.train.images, axis=0) ae = eXclusiveAutoencoder( input_dimensions = 784, layers = [ { 'n_channels': 144, 'reconstructive_regularizer': 1.0, 'weight_decay': 1.0, 'sparse_regularizer': 1.0, 'sparsity_level': 0.05, 'exclusive_regularizer': 1.0, 'exclusive_type': 'logcosh', 'exclusive_logcosh_scale': 10.0, 'corrupt_prob': 1.0, 'tied_weight': True, 'encode':'sigmoid', 'decode':'linear', 'pathways': [ # range(0, 144), range(0, 96), range(48, 144), ], }, ], init_encoder_weight = None, init_decoder_weight = None, init_encoder_bias = None, init_decoder_bias = None, ) # %% learning_rate = 0.01 n_reload_per_epochs = 10 n_display_per_epochs = 10000 batch_size = 2000 n_epochs = 100000 optimizer_list = [] for layer_i in range(1): optimizer_list.append(AMSGrad(learning_rate).minimize(ae['layerwise_cost'][layer_i]['total'], var_list=[ ae['encoder_weight'][layer_i], ae['encoder_bias'][layer_i], # ae['decoder_weights'][layer_i], # ae['decoder_biases'][layer_i], ])) # optimizer_full = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']['total']) optimizer_list.append(AMSGrad(learning_rate).minimize(ae['cost']['total'])) # %% # We create a session to use the graph sess = tf.Session() writer = tf.summary.FileWriter('logs', sess.graph) sess.run(tf.global_variables_initializer()) # %% # Fit all training data for optimizer_i, (optimizer) in enumerate(optimizer_list): for epoch_i in range(n_epochs): if (epoch_i) % n_reload_per_epochs == 0: batch_xs, batch_ys = mnist.train.next_batch(batch_size) batch_x0 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ batch_ys[:, 0], # batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) min_batch_size_x01 = np.min((batch_x0.shape[0], batch_x1.shape[0])) batch_x01 = 0.5*(batch_x0[:min_batch_size_x01]+batch_x1[:min_batch_size_x01]) min_batch_size_x012 = np.min((batch_x0.shape[0], batch_x1.shape[0], batch_x2.shape[0])) batch_x012 = 0.333*(batch_x0[:min_batch_size_x012]+batch_x1[:min_batch_size_x012]+batch_x2[:min_batch_size_x012]) batch_x1 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) batch_x2 = np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))]]) min_batch_size_x12 = np.min((batch_x1.shape[0], batch_x2.shape[0])) batch_x12 = 0.5*(batch_x1[:min_batch_size_x12]+batch_x2[:min_batch_size_x12]) # train = [] # train.append(batch_x012) train.append(batch_x01) train.append(batch_x12) # train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], # batch_ys[:, 1], # # batch_ys[:, 2], # # batch_ys[:, 3], # # batch_ys[:, 4], # # batch_ys[:, 5], # # batch_ys[:, 6], # # batch_ys[:, 7], # # batch_ys[:, 8], # # batch_ys[:, 9], # ]) == 1, axis=0))]])) # train.append(np.array([img - mean_img for img in batch_xs[np.where(np.any(np.array([ # # batch_ys[:, 0], # batch_ys[:, 1], # batch_ys[:, 2], # # batch_ys[:, 3], # # batch_ys[:, 4], # # batch_ys[:, 5], # # batch_ys[:, 6], # # batch_ys[:, 7], # # batch_ys[:, 8], # # batch_ys[:, 9], # ]) == 1, axis=0))]])) sess.run(optimizer, feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) if (epoch_i+1) % n_display_per_epochs == 0: if not optimizer is optimizer_list[-1]: cost_total = sess.run(ae['layerwise_cost'][optimizer_i]['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['layerwise_cost'][optimizer_i]['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['layerwise_cost'][optimizer_i]['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['layerwise_cost'][optimizer_i]['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['layerwise_cost'][optimizer_i]['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer:', optimizer_i+1, ', epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity) else: cost_total = sess.run(ae['cost']['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['cost']['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['cost']['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['cost']['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer: full,', 'epoch:', epoch_i+1, ', total cost:', cost_total, ', recon error:', cost_reconstruction_error, ', sparsity:', cost_sparsity, ', weight decay:', cost_weight_decay, ', exclusivity: ', cost_exclusivity) n_examples = 5120 test_xs, test_ys = mnist.test.next_batch(n_examples) test_xs = np.array([img - mean_img for img in test_xs[np.where(np.any(np.array([ test_ys[:, 0], test_ys[:, 1], test_ys[:, 2], # test_ys[:, 3], # test_ys[:, 4], # test_ys[:, 5], # test_ys[:, 6], # test_ys[:, 7], # test_ys[:, 8], # test_ys[:, 9], ]) == 1, axis=0))][:144]]) if not optimizer is optimizer_list[-1]: recon = sess.run(ae['layerwise_y'][layer_i], feed_dict={ae['x']: test_xs}) else: recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs}) weights = sess.run(ae['encoder_weight'][0]) # weights = np.transpose(weights, axes=(3,0,1,2)) # display_network(batch_x012[:144].transpose(), filename='mnist_batch_01.png') display_network(batch_x01[:144].transpose(), filename='mnist_batch_01.png') display_network(batch_x12[:144].transpose(), filename='mnist_batch_12.png') display_network(test_xs.transpose(), filename='mnist_test.png') display_network(recon.reshape((144,784)).transpose(), filename='mnist_results.png') display_network(weights, filename='mnist_weights.png') writer.close() return ae
a function that accepts one label argument (e.g. only get digits 5-9), a boolean (True if use only train/validation sets, False for test set), and a num_samples integer to choose only the first N samples. Only reads mnist data from a special pickled mnist file. Returns array of images with shape (784, num_images). """ training, valid, testing = read_mnist_file(filename) if train: t = np.array([e for e,l in izip(training[0], training[1]) if test(l)]) v = np.array([e for e,l in izip(valid[0], valid[1]) if test(l)]) images = np.vstack([t, v]).T tl = np.array([l for e,l in izip(training[0], training[1]) if test(l)]) vl = np.array([l for e,l in izip(valid[0], valid[1]) if test(l)]) labels = np.hstack([tl, vl]) else: t = testing images = np.array([e for e,l in izip(t[0], t[1]) if test(l)]).T labels = np.array([l for e,l in izip(t[0], t[1]) if test(l)]) assert images.shape[1] == len(labels) assert images.shape[0] == 784 patches = images[:,:num_samples] labels = labels[:num_samples] assert patches.shape[0] == 784 return patches, labels if __name__=='__main__': train, valid, test = read_mnist_file('../data/mnist.pkl.gz') display_network.display_network('mnist.png', train[0].T[:,:100])
# number of input units visible_size = 28 * 28 # number of input units hidden_size = 25 # desired average activation of the hidden units. # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", # in the lecture notes). # weight decay parameter lambda_ = 0.0001 # debug (set to True in Ex 3) debug = False ##====================================================================== ## Ex 1: Load MNIST ## In this example, you will load the mnist dataset ## First download the dataset from the following website: ##Training Images: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz ##Training Labels: http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz # Loading Sample Images #Loading 10K images from MNIST database images = load_MNIST.load_MNIST_images('../../data/mnist/train-images-idx3-ubyte') patches = images[:, 0:10000] patches = patches[:,1:200] display_network.display_network(patches[:,1:100])
import random import display_network import numpy as np ##================================================================ ## Step 0a: Load data # Here we provide the code to load natural image data into x. # x will be a 144 * 10000 matrix, where the kth column x(:, k) corresponds to # the raw image data from the kth 12x12 image patch sampled. # You do not need to change the code below. patches = sample_images.sample_images_raw() num_samples = patches.shape[1] random_sel = random.sample(range(num_samples), 400) display_network.display_network(patches[:, random_sel], 'raw_pca.png') ##================================================================ ## Step 0b: Zero-mean the data (by row) # You can make use of the mean and repmat/bsxfun functions. # patches = patches - patches.mean(axis=0) patch_mean = patches.mean(axis=1) patches = patches - np.tile(patch_mean, (patches.shape[1], 1)).transpose() ##================================================================ ## Step 1a: Implement PCA to obtain xRot # Implement PCA to obtain xRot, the matrix in which the data is expressed # with respect to the eigenbasis of sigma, which is the matrix U. sigma = patches.dot(patches.transpose()) / patches.shape[1]
import numpy as np import scipy.optimize import matplotlib.pyplot as plt from sparse_autoencoder import sparse_autoencoder_cost, initialize_parameters from display_network import display_network from load_MNIST import load_MNIST_images # Loading 10K images from MNIST database images = load_MNIST_images('data/mnist/train-images-idx3-ubyte') patches = images[:, :10000] n_patches = patches.shape[1] # Number of patches # Randomly sample 200 patches and save as an image file image = display_network(patches[:, [np.random.randint(n_patches) for i in range(200)]]) plt.figure() plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray) plt.imshow(image, cmap=plt.cm.gray) visible_size = patches.shape[0] # Number of input units hidden_size = 196 # Number of hidden units weight_decay_param = 3e-3 # Weight decay parameter, which is the lambda in lecture notes beta = 3 # Weight of sparsity penalty term sparsity_param = 0.1 # Desired average activation of the hidden units. # Randomly initialize the fitting parameters theta = initialize_parameters(hidden_size, visible_size)
import numpy as np import scipy.optimize import matplotlib.pyplot as plt from sparse_autoencoder import sparse_autoencoder_cost, initialize_parameters from display_network import display_network from load_MNIST import load_MNIST_images # Loading 10K images from MNIST database images = load_MNIST_images('data/mnist/train-images-idx3-ubyte') patches = images[:, :10000] n_patches = patches.shape[1] # Number of patches # Randomly sample 200 patches and save as an image file image = display_network( patches[:, [np.random.randint(n_patches) for i in range(200)]]) plt.figure() plt.imsave('sparse_autoencoder_minist_patches.png', image, cmap=plt.cm.gray) plt.imshow(image, cmap=plt.cm.gray) visible_size = patches.shape[0] # Number of input units hidden_size = 196 # Number of hidden units weight_decay_param = 3e-3 # Weight decay parameter, which is the lambda in lecture notes beta = 3 # Weight of sparsity penalty term sparsity_param = 0.1 # Desired average activation of the hidden units. # Randomly initialize the fitting parameters theta = initialize_parameters(hidden_size, visible_size)
input_size = 28 * 28 # Size of input vector (MNIST images are 28x28) num_classes = 10 # Number of classes (MNIST images fall into 10 classes) weight_decay = 1e-4 # Weight decay parameter max_iter = 400 theta = 0.005 * np.random.randn(num_classes * input_size) trained = softmax_train(input_size, num_classes, weight_decay, data, labels, max_iter) np.save('softmax.model', trained) display_network.display_network('softmax.png', trained.T) # test on the test data test_data, test_labels = sample_images.get_mnist_data('../data/mnist.pkl.gz', train=False, num_examples=1e10) predicted_labels = softmax_predict(trained, test_data) assert len(predicted_labels) == len(test_labels) print np.mean(predicted_labels == test_labels)
patches,_ = sample_images.get_mnist_data('../data/mnist.pkl.gz', lambda l: l >= 5, train=True, num_samples=num_samples) # set up L-BFGS args theta = sparse_autoencoder.initialize_params(hidden_size, visible_size) sae_cost = partial(sparse_autoencoder.cost, visible_size=visible_size, hidden_size=hidden_size, weight_decay=weight_decay, beta=beta, sparsity_param=sparsity_param, data=patches) # Train! trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, theta, maxfun=max_iter, m=100, factr=10.0, pgtol=1e-8, iprint=1) # Save the trained weights W1, W2, b1, b2 = sparse_autoencoder.unflatten_params(trained, hidden_size, visible_size) display_network.display_network('mnist-features.png', W1.T) np.save('mnist.5to9.model', trained)
# Set training options options_ = {'maxiter': 1000, 'disp': True} # Minimize cost function J by modifying parameters θ using L-BFGS-B optimization algo result = scipy.optimize.minimize(J, θ, method='L-BFGS-B', jac=True, options=options_) # Get optimized parameter vector opt_θ = result.x print(result) ############################################################################################ ############################################################################################ ############################################################################################ # =============================== Visualization ========================================== # ############################################################################################ # Slice fan-in weight vector of the hidden layer from optimized parameter vector W1 = opt_θ[0:hidden_size * input_size].reshape(hidden_size, input_size).transpose() # Reshape the fan-in weight vector to obtain features learned by each hidden layer neuron display_network.display_network(W1, filename='weights-' + str(hidden_size) + '-' + str(ρ) + '-' + str(λ) + '-' + str(β) + '.png') ############################################################################################ ############################################################################################
def test_bc(): '''Test the convolutional autoencder using MNIST.''' # %% # load MNIST as before n_channels = 144 filter_size = 28 stride_size = 7 input_size = 128 n_epochs = 10000 n_display = 100 n_reload = 100 patch_per_image = 10 patch_per_display = 16 test_xs, _ = read_hematoxylin_data_sets(input_size, patch_per_image) mean_img = np.mean(test_xs, axis=0) # mean_img = np.zeros((128, 128)) ae = eXclusiveConvolutionalAutoencoder( input_shape = [None, input_size, input_size, 1], layers = [ { 'n_channels': n_channels, 'reconstructive_regularizer': 1.0, 'weight_decay': 1.0, 'sparse_regularizer': 1.0, 'sparsity_level': 0.05, 'exclusive_regularizer': 1.0, 'tied_weight': True, 'filter_size': filter_size, 'stride_size': stride_size, 'corrupt_prob': 0.5, 'padding_type': 'SAME', 'encode':'sigmoid', 'decode':'linear', 'pathways': [ range(0, 128), range(0, 256), ], }, ], init_encoder_weights = None, init_decoder_weights = None, init_encoder_biases = None, init_decoder_biases = None, ) # %% learning_rate = 0.01 optimizer_list = [] for layer_i in range(1): optimizer_list.append(tf.train.AdamOptimizer(learning_rate).minimize(ae['layerwise_cost'][layer_i]['total'], var_list=[ ae['encoder_weights'][layer_i], ae['encoder_biases'][layer_i], # ae['decoder_weights'][layer_i], # ae['decoder_biases'][layer_i], ])) optimizer_full = tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']['total']) # %% # We create a session to use the graph sess = tf.Session() sess.run(tf.global_variables_initializer()) # %% # Fit all training data for layer_i in range(1): for epoch_i in range(n_epochs): if (epoch_i) % n_reload == 0: batch_xs, batch_ys = read_hematoxylin_data_sets(input_size, patch_per_image) train = [] train.append(np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in batch_xs[np.where(np.any(np.array([ batch_ys[:, 0], # batch_ys[:, 1], ]) == 1, axis=0))]])) train.append(np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], ]) == 1, axis=0))]])) sess.run(optimizer_list[layer_i], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) if (epoch_i+1) % n_display == 0: cost_total = sess.run(ae['layerwise_cost'][layer_i]['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['layerwise_cost'][layer_i]['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['layerwise_cost'][layer_i]['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['layerwise_cost'][layer_i]['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['layerwise_cost'][layer_i]['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer:{}, epoch:{:d}, cost:{:f}, recon:{:f}, sparsity:{:f}, weight:{:f}, exclusivity:{:f}'.format( layer_i+1, epoch_i+1, cost_total, cost_reconstruction_error, cost_sparsity, cost_weight_decay, cost_exclusivity)) test_xs, test_ys = read_hematoxylin_data_sets(input_size, patch_per_image) test_xs = np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in test_xs[np.where(np.any(np.array([ test_ys[:, 0], test_ys[:, 1], ]) == 1, axis=0))][np.random.randint(0, test_xs.shape[0], patch_per_display)]]) recon = sess.run(ae['layerwise_y'][layer_i], feed_dict={ae['x']: test_xs}) weights = sess.run(ae['encoder_weights'][0]) bias = sess.run(ae['encoder_biases'][0]) data = {'weights': weights, 'bias': bias} with open('bc.pickle', 'wb') as fp: pickle.dump(data, fp, protocol=pickle.HIGHEST_PROTOCOL) display_network(weights.transpose((3,0,1,2)).reshape((n_channels, filter_size*filter_size)).transpose(), filename='bc_weights.png') display_network(test_xs.reshape((patch_per_display,input_size*input_size)).transpose(), filename='bc_test.png') display_network(recon.reshape((patch_per_display,input_size*input_size)).transpose(), filename='bc_results.png') for epoch_i in range(n_epochs): if (epoch_i) % n_reload == 0: batch_xs, batch_ys = read_hematoxylin_data_sets(input_size, patch_per_image) train = [] train.append(np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in batch_xs[np.where(np.any(np.array([ batch_ys[:, 0], # batch_ys[:, 1], ]) == 1, axis=0))]])) train.append(np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in batch_xs[np.where(np.any(np.array([ # batch_ys[:, 0], batch_ys[:, 1], ]) == 1, axis=0))]])) sess.run(optimizer_full, feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) if (epoch_i+1) % n_display == 0: cost_total = sess.run(ae['cost']['total'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_reconstruction_error = sess.run(ae['cost']['reconstruction_error'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_sparsity = sess.run(ae['cost']['sparsity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) cost_weight_decay = sess.run(ae['cost']['weight_decay'], feed_dict={ae['training_x'][0]: train[0], ae['training_x'][1]: train[1]}) print('layer:{}, epoch:{:d}, cost:{:f}, recon:{:f}, sparsity:{:f}, weight:{:f}, exclusivity:{:f}'.format( 'F', epoch_i+1, cost_total, cost_reconstruction_error, cost_sparsity, cost_weight_decay, cost_exclusivity)) test_xs, test_ys = read_hematoxylin_data_sets(input_size, patch_per_image) test_xs = np.array([img.reshape((input_size, input_size, 1)) - mean_img.reshape((input_size, input_size, 1)) for img in test_xs[np.where(np.any(np.array([ test_ys[:, 0], test_ys[:, 1], ]) == 1, axis=0))][np.random.randint(0, test_xs.shape[0], patch_per_display)]]) recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs}) weights = sess.run(ae['encoder_weights'][0]) bias = sess.run(ae['encoder_biases'][0]) data = {'weights': weights, 'bias': bias} with open('bc.pickle', 'wb') as fp: pickle.dump(data, fp, protocol=pickle.HIGHEST_PROTOCOL) display_network(weights.transpose((3,0,1,2)).reshape((n_channels, filter_size*filter_size)).transpose(), filename='bc_weights.png') display_network(test_xs.reshape((patch_per_display,input_size*input_size)).transpose(), filename='bc_test.png') display_network(recon.reshape((patch_per_display,input_size*input_size)).transpose(), filename='bc_results.png')
mndata.train_img_fname = 'train-images.idx3-ubyte' mndata.train_lbl_fname = 'train-labels.idx1-ubyte' mndata.load_testing() X = mndata.test_images X0 = np.asarray(X)[:1000, :] / 256.0 X = X0 # K-MEANS CLUSTERING & PREDICTION K = 10 kmeans = KMeans(n_clusters=K).fit(X) pred_label = kmeans.predict(X) print(type(kmeans.cluster_centers_.T)) print(kmeans.cluster_centers_.T.shape) # PLOT CLUSTER CENTERS A = display_network(kmeans.cluster_centers_.T, K, 1) f1 = plt.imshow(A, interpolation='nearest', cmap="jet") f1.axes.get_xaxis().set_visible(False) f1.axes.get_yaxis().set_visible(False) plt.show() # SAVE PLOT # a colormap and a normalization instance cmap = plt.cm.jet norm = plt.Normalize(vmin=A.min(), vmax=A.max()) # map the normalized data to colors # image is now RGBA (512x512x4) image = cmap(norm(A)) scipy.misc.imsave('aa.png', image) # pip install pillow # PLOT NEIGHBORS
J = lambda theta : sparse_autoencoder_cost(theta, input_size, hidden_size_L1, lambda_, sparsity_param, beta, train_data) options = {'maxiter': maxiter, 'disp': True} results = scipy.optimize.minimize(J, sae1_theta, method='L-BFGS-B', jac=True, options=options) sae1_opt_theta = results['x'] print("Show the results of optimization as following.\n") print(results) # Visualize weights visualize_weights = False if visualize_weights: W1 = sae1_opt_theta[0:hidden_size_L1*input_size].reshape((hidden_size_L1, input_size)) image = display_network(W1.T) plt.figure() plt.imshow(image, cmap=plt.cm.gray) plt.show() """ STEP 2: Train the second sparse autoencoder This trains the second sparse autoencoder on the first autoencoder featurse. If you've correctly implemented sparse_autoencoder_cost, you don't need to change anything here. """ sae1_features = feedforward_autoencoder(sae1_opt_theta, hidden_size_L1, input_size, train_data)
hidden_size=hidden_size, weight_decay = weight_decay, beta=beta, sparsity_param=sparsity_param, data=patches) # Train! trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, theta, maxfun=max_iter, m=1000, factr=1.0, pgtol=1e-100, iprint=1) ''' # numerical approximation (way too slow!) trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b( lambda x: sae_cost(x)[0], theta, approx_grad=True, maxfun=max_iter, m=1000, iprint=1) ''' # Save the trained weights W1, W2, b1, b2 = sparse_autoencoder.unflatten_params(trained, hidden_size, visible_size) display_network.display_network('weights.png', W1.T)
# debug (set to True in Ex 3) debug = False # ====================================================================== # Exercise 1: Load MNIST # In this exercise, you will load the mnist dataset # First download the dataset from the following website: # Training Images: http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz # Training Labels: http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz # Loading Sample Images # Loading 10K images from MNIST database images = load_MNIST.load_MNIST_images('train-images.idx3-ubyte') patches = images[:, 0:10000] patches = patches[:, 1:200] display_network.display_network(patches[:, 0:100]) # Now you will use the display network function to display different sets if the MNIST dataset # The display is saved in the directory under the name weigths. # Display 10, 50 and 100 datasets ### YOUR CODE HERE ### # display 10 display_network.display_network(patches[:, 0:10], 'weights10.png') # display 50 display_network.display_network(patches[:, 0:50], 'weights50.png') # display 100 display_network.display_network(patches[:, 0:100], 'weights100.png') # Obtain random parameters theta # You need to implement utils_hw.initialize in utils_hw.py
print num_grad, grad # Compare numerically computed gradients with the ones obtained from backpropagation diff = np.linalg.norm(num_grad - grad) / np.linalg.norm(num_grad + grad) print diff print "Norm of the difference between numerical and analytical num_grad (should be < 1e-9)\n\n" ##====================================================================== ## STEP 4: After verifying that your implementation of # sparseAutoencoderCost is correct, You can start training your sparse # autoencoder with minFunc (L-BFGS). # Randomly initialize the parameters theta = sparse_autoencoder.initialize(hidden_size, visible_size) J = lambda x: sparse_autoencoder.sparse_autoencoder_cost(x, visible_size, hidden_size, lambda_, sparsity_param, beta, patches) options_ = {'maxiter': 400, 'disp': True} result = scipy.optimize.minimize(J, theta, method='L-BFGS-B', jac=True, options=options_) opt_theta = result.x print result ##====================================================================== ## STEP 5: Visualization W1 = opt_theta[0:hidden_size * visible_size].reshape(hidden_size, visible_size).transpose() display_network.display_network(W1)
num_samples = 10000 patches, _ = sample_images.get_mnist_data('../data/mnist.pkl.gz', train=True, num_examples=num_samples) # set up L-BFGS args theta = sparse_autoencoder.initialize_params(hidden_size, visible_size) sae_cost = partial(sparse_autoencoder.cost, visible_size=visible_size, hidden_size=hidden_size, weight_decay = weight_decay, beta=beta, sparsity_param=sparsity_param, data=patches) # Train! trained, cost, d = scipy.optimize.lbfgsb.fmin_l_bfgs_b(sae_cost, theta, maxfun=max_iter, m=100, factr=1.0, pgtol=1e-100, iprint=1) # Save the trained weights W1, W2, b1, b2 = sparse_autoencoder.unflatten_params(trained, hidden_size, visible_size) display_network.display_network('mnist-weights.png', W1.T)
def test_mnist(): '''Test the convolutional autoencder using MNIST.''' # %% # load MNIST as before mnist = input_data.read_data_sets('MNIST_data', one_hot=True) mean_img = np.mean(mnist.train.images, axis=0) learning_rate = 0.001 batch_size = 1000 n_epochs = 10000 n_filter_size = 14 n_reload_per_epochs = 100 n_display_per_epochs = 1000 input_shape = [None, 28, 28, 1] xae_layers = [ { 'n_channels': 144, 'reconstructive_regularizer': 1.0, 'weight_decay': 1.0, 'sparse_regularizer': 1.0, 'sparsity_level': 0.05, 'exclusive_regularizer': 1.0, 'tied_weight': True, 'conv_size': n_filter_size, 'conv_stride': 2, 'conv_padding': 'VALID', 'pool_size': 8, 'pool_stride': 1, 'pool_padding': 'VALID', 'corrupt_prob': 1.0, 'encode': 'lrelu', 'decode': 'linear', 'pathways': [ range(0, 96), range(48, 144), ], }, # { # 'n_channels': 256, # 'reconstructive_regularizer': 1.0, # 'weight_decay': 1.0, # 'sparse_regularizer': 1.0, # 'sparsity_level': 0.05, # 'exclusive_regularizer': 1.0, # 'tied_weight': True, # 'conv_size': 9, # 'conv_stride': 1, # 'conv_padding': 'VALID', # # 'pool_size': 9, # # 'pool_stride': 1, # # 'pool_padding': 'VALID', # 'corrupt_prob': 1.0, # 'encode':'sigmoid', 'decode':'linear', # 'pathways': [ # range(0, 96), # range(48, 144), # ], # }, ] ae = eXclusiveConvolutionalAutoencoder( input_shape=input_shape, layers=xae_layers, init_encoder_weight=None, init_decoder_weight=None, init_encoder_bias=None, init_decoder_bias=None, ) # %% optimizer_list = [] for layer_i in range(len(xae_layers)): optimizer_list.append( tf.train.AdamOptimizer(learning_rate).minimize( ae['layerwise_cost'][layer_i]['total'], var_list=[ ae['encoder_weight'][layer_i], ae['encoder_bias'][layer_i], # ae['decoder_weight'][layer_i], # ae['decoder_bias'][layer_i], ])) optimizer_list.append( tf.train.AdamOptimizer(learning_rate).minimize(ae['cost']['total'])) # %% # We create a session to use the graph config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) # %% # Fit all training data for layer_i, (optimizer) in enumerate(optimizer_list): for epoch_i in range(n_epochs): if (epoch_i) % n_reload_per_epochs == 0: batch_xs, batch_ys = mnist.train.next_batch(batch_size) train = [] train.append( np.array([ img.reshape((28, 28, 1)) - mean_img.reshape( (28, 28, 1)) for img in batch_xs[np.where( np.any( np.array([ batch_ys[:, 0], batch_ys[:, 1], # batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))] ])) train.append( np.array([ img.reshape((28, 28, 1)) - mean_img.reshape( (28, 28, 1)) for img in batch_xs[np.where( np.any( np.array([ # batch_ys[:, 0], batch_ys[:, 1], batch_ys[:, 2], # batch_ys[:, 3], # batch_ys[:, 4], # batch_ys[:, 5], # batch_ys[:, 6], # batch_ys[:, 7], # batch_ys[:, 8], # batch_ys[:, 9], ]) == 1, axis=0))] ])) # sess.run(optimizer, feed_dict={ae['training_x'][0]: train[0], }) sess.run(optimizer, feed_dict={ ae['training_x'][0]: train[0], ae['training_x'][1]: train[1], }) if (epoch_i + 1) % n_display_per_epochs == 0: data_dict = { ae['training_x'][0]: train[0], ae['training_x'][1]: train[1], } if optimizer is optimizer_list[-1]: cost_total = sess.run(ae['cost']['total'], feed_dict=data_dict) cost_reconstruction_error = sess.run( ae['cost']['reconstruction_error'], feed_dict=data_dict) cost_sparsity = sess.run(ae['cost']['sparsity'], feed_dict=data_dict) cost_exclusivity = sess.run(ae['cost']['exclusivity'], feed_dict=data_dict) cost_weight_decay = sess.run(ae['cost']['weight_decay'], feed_dict=data_dict) else: cost_total = sess.run( ae['layerwise_cost'][layer_i]['total'], feed_dict=data_dict) cost_reconstruction_error = sess.run( ae['layerwise_cost'][layer_i]['reconstruction_error'], feed_dict=data_dict) cost_sparsity = sess.run( ae['layerwise_cost'][layer_i]['sparsity'], feed_dict=data_dict) cost_exclusivity = sess.run( ae['layerwise_cost'][layer_i]['exclusivity'], feed_dict=data_dict) cost_weight_decay = sess.run( ae['layerwise_cost'][layer_i]['weight_decay'], feed_dict=data_dict) print( 'layer:{}, epoch:{:5d}, cost:{:.6f}, error: {:.6f}, sparsity: {:.6f}, exclusivity: {:.6f}, weight decay: {:.6f}' .format( optimizer is optimizer_list[-1] and 'A' or layer_i + 1, epoch_i + 1, cost_total, cost_reconstruction_error, cost_sparsity, cost_weight_decay, cost_exclusivity)) n_examples = 5120 test_xs, test_ys = mnist.test.next_batch(n_examples) test_xs = np.array([ img.reshape((28, 28, 1)) - mean_img.reshape((28, 28, 1)) for img in test_xs[np.where( np.any( np.array([ test_ys[:, 0], test_ys[:, 1], test_ys[:, 2], # test_ys[:, 3], # test_ys[:, 4], # test_ys[:, 5], # test_ys[:, 6], # test_ys[:, 7], # test_ys[:, 8], # test_ys[:, 9], ]) == 1, axis=0))][:256] ]) if optimizer is optimizer_list[-1]: recon = sess.run(ae['y'], feed_dict={ae['x']: test_xs}) else: recon = sess.run(ae['layerwise_y'][layer_i], feed_dict={ae['x']: test_xs}) weights = sess.run(ae['encoder_weight'][0]) weights = weights.transpose((3, 0, 1, 2)).reshape( (144, n_filter_size * n_filter_size)).transpose() display_network(weights, filename='mnist_weight.png') display_network(test_xs.reshape((256, 784)).transpose(), filename='mnist_test.png') display_network(recon.reshape((256, 784)).transpose(), filename='mnist_results.png')
import functools import numpy as np import pca import sample_images import display_network if __name__ == '__main__': num_samples = 10000 num_samples = 10000 m = sample_images.load_matlab_images('IMAGES_RAW.mat') patches = sample_images.sample(m, num_samples, size=(12, 12), norm=None) display_network.display_network('raw-patches.png', patches) # ensure that patches have zero mean mean = np.mean(patches, axis=0) patches -= mean assert np.allclose(np.mean(patches, axis=0), np.zeros(patches.shape[1])) U, s, x_rot = pca.pca(patches) covar = pca.covariance(x_rot) display_network.array_to_file('covariance.png', covar) # percentage of variance # cumulative sum pov = np.array( functools.reduce(lambda t, l: t + [t[-1] + l]
def train(): # STEP 0: Here we provide the relevant parameters values that will # allow your sparse autoencoder to get good filters; you do not need to # change the parameters below. patch_size = 8 num_patches = 10000 visible_size = patch_size ** 2 # number of input units hidden_size = 25 # number of hidden units sparsity_param = 0.01 # desired average activation of the hidden units. # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", # in the lecture notes). decay_lambda = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty term # STEP 1: Implement sampleIMAGES # After implementing sampleIMAGES, the display_network command should # display a random sample of 200 patches from the dataset patches = sample_images(patch_size, num_patches) # list = [randint(0, patches.shape[0]-1) for i in xrange(64)] # display_network(patches[list, :], 8) # Obtain random parameters theta # theta = initialize_parameters(visible_size, hidden_size) # STEP 2: Implement sparseAutoencoderCost # # You can implement all of the components (squared error cost, weight decay term, # sparsity penalty) in the cost function at once, but it may be easier to do # it step-by-step and run gradient checking (see STEP 3) after each step. We # suggest implementing the sparseAutoencoderCost function using the following steps: # # (a) Implement forward propagation in your neural network, and implement the # squared error term of the cost function. Implement backpropagation to # compute the derivatives. Then (using lambda=beta=0), run Gradient Checking # to verify that the calculations corresponding to the squared error cost # term are correct. # # (b) Add in the weight decay term (in both the cost function and the derivative # calculations), then re-run Gradient Checking to verify correctness. # # (c) Add in the sparsity penalty term, then re-run Gradient Checking to # verify correctness. # # Feel free to change the training settings when debugging your # code. (For example, reducing the training set size or # number of hidden units may make your code run faster; and setting beta # and/or lambda to zero may be helpful for debugging.) However, in your # final submission of the visualized weights, please use parameters we # gave in Step 0 above. # cost, grad = sparse_autoencoder_cost_and_grad(theta, visible_size, hidden_size, # decay_lambda, sparsity_param, beta, patches) # STEP 3: Gradient Checking # # Hint: If you are debugging your code, performing gradient checking on smaller models # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden # units) may speed things up. # First, lets make sure your numerical gradient computation is correct for a # simple function. After you have implemented compute_numerical_gradient, # run the following: # check_numerical_gradient() # Now we can use it to check your cost function and derivative calculations # for the sparse autoencoder. # func = lambda x: sparse_autoencoder_cost(x, visible_size, hidden_size, # decay_lambda, sparsity_param, beta, patches) # numgrad = compute_numerical_gradient(func, theta) # Use this to visually compare the gradients side by side # print numgrad, grad # Compare numerically computed gradients with the ones obtained from backpropagation # diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad) # Should be small. In our implementation, these values are usually less than 1e-9. # print diff # STEP 4: After verifying that your implementation of # sparse_autoencoder_cost is correct, You can start training your sparse # autoencoder with minFunc (L-BFGS). # Randomly initialize the parameters # Use minimize interface, and set jac=True, so it can accept cost and grad together theta = initialize_parameters(visible_size, hidden_size) func_args = (visible_size, hidden_size, decay_lambda, sparsity_param, beta, patches) res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B', jac=True, options={'maxiter': 400, 'disp': True}) # STEP 5: Visualization w1 = res.x[0: hidden_size * visible_size].reshape((visible_size, hidden_size)) display_network(w1.T, 5, save_figure_path='../data/sparse_autoencoder.png')
visibleSize = 8*8 # number of input units hiddenSize = 25 # number of hidden units sparsityParam = 0.01 # desired average activation of the hidden units. # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", # in the lecture notes). lambdaval = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty term ##====================================================================== ## STEP 1: Implement sampleIMAGES # # After implementing sampleIMAGES, the display_network command should # display a random sample of 200 patches from the dataset patches = sampleIMAGES() display_network(patches[:, randi(size(patches, 2), 200, 1).squeeze() ]) plt.show() pdb.set_trace() # Obtain random parameters theta theta = initializeParameters(hiddenSize, visibleSize) ##====================================================================== ## STEP 2: Implement sparseAutoencoderCost # # You can implement all of the components (squared error cost, weight decay term, # sparsity penalty) in the cost function at once, but it may be easier to do # it step-by-step and run gradient checking (see STEP 3) after each step. We # suggest implementing the sparseAutoencoderCost function using the following steps: # # (a) Implement forward propagation in your neural network, and implement the
a boolean (True if use only train/validation sets, False for test set), and a num_samples integer to choose only the first N samples. Only reads mnist data from a special pickled mnist file. Returns array of images with shape (784, num_images). """ training, valid, testing = read_mnist_file(filename) if train: t = np.array([e for e, l in izip(training[0], training[1]) if test(l)]) v = np.array([e for e, l in izip(valid[0], valid[1]) if test(l)]) images = np.vstack([t, v]).T tl = np.array( [l for e, l in izip(training[0], training[1]) if test(l)]) vl = np.array([l for e, l in izip(valid[0], valid[1]) if test(l)]) labels = np.hstack([tl, vl]) else: t = testing images = np.array([e for e, l in izip(t[0], t[1]) if test(l)]).T labels = np.array([l for e, l in izip(t[0], t[1]) if test(l)]) assert images.shape[1] == len(labels) assert images.shape[0] == 784 patches = images[:, :num_samples] labels = labels[:num_samples] assert patches.shape[0] == 784 return patches, labels if __name__ == '__main__': train, valid, test = read_mnist_file('data/mnist.pkl.gz') display_network.display_network('mnist.png', train[0].T[:, :100])
def train(): # STEP 0: Here we provide the relevant parameters values that will # allow your sparse autoencoder to get good filters; you do not need to # change the parameters below. patch_size = 8 num_patches = 10000 visible_size = patch_size**2 # number of input units hidden_size = 25 # number of hidden units sparsity_param = 0.01 # desired average activation of the hidden units. # (This was denoted by the Greek alphabet rho, which looks like a lower-case "p", # in the lecture notes). decay_lambda = 0.0001 # weight decay parameter beta = 3 # weight of sparsity penalty term # STEP 1: Implement sampleIMAGES # After implementing sampleIMAGES, the display_network command should # display a random sample of 200 patches from the dataset patches = sample_images(patch_size, num_patches) # list = [randint(0, patches.shape[0]-1) for i in xrange(64)] # display_network(patches[list, :], 8) # Obtain random parameters theta # theta = initialize_parameters(visible_size, hidden_size) # STEP 2: Implement sparseAutoencoderCost # # You can implement all of the components (squared error cost, weight decay term, # sparsity penalty) in the cost function at once, but it may be easier to do # it step-by-step and run gradient checking (see STEP 3) after each step. We # suggest implementing the sparseAutoencoderCost function using the following steps: # # (a) Implement forward propagation in your neural network, and implement the # squared error term of the cost function. Implement backpropagation to # compute the derivatives. Then (using lambda=beta=0), run Gradient Checking # to verify that the calculations corresponding to the squared error cost # term are correct. # # (b) Add in the weight decay term (in both the cost function and the derivative # calculations), then re-run Gradient Checking to verify correctness. # # (c) Add in the sparsity penalty term, then re-run Gradient Checking to # verify correctness. # # Feel free to change the training settings when debugging your # code. (For example, reducing the training set size or # number of hidden units may make your code run faster; and setting beta # and/or lambda to zero may be helpful for debugging.) However, in your # final submission of the visualized weights, please use parameters we # gave in Step 0 above. # cost, grad = sparse_autoencoder_cost_and_grad(theta, visible_size, hidden_size, # decay_lambda, sparsity_param, beta, patches) # STEP 3: Gradient Checking # # Hint: If you are debugging your code, performing gradient checking on smaller models # and smaller training sets (e.g., using only 10 training examples and 1-2 hidden # units) may speed things up. # First, lets make sure your numerical gradient computation is correct for a # simple function. After you have implemented compute_numerical_gradient, # run the following: # check_numerical_gradient() # Now we can use it to check your cost function and derivative calculations # for the sparse autoencoder. # func = lambda x: sparse_autoencoder_cost(x, visible_size, hidden_size, # decay_lambda, sparsity_param, beta, patches) # numgrad = compute_numerical_gradient(func, theta) # Use this to visually compare the gradients side by side # print numgrad, grad # Compare numerically computed gradients with the ones obtained from backpropagation # diff = np.linalg.norm(numgrad-grad)/np.linalg.norm(numgrad+grad) # Should be small. In our implementation, these values are usually less than 1e-9. # print diff # STEP 4: After verifying that your implementation of # sparse_autoencoder_cost is correct, You can start training your sparse # autoencoder with minFunc (L-BFGS). # Randomly initialize the parameters # Use minimize interface, and set jac=True, so it can accept cost and grad together theta = initialize_parameters(visible_size, hidden_size) func_args = (visible_size, hidden_size, decay_lambda, sparsity_param, beta, patches) res = minimize(sparse_autoencoder_cost_and_grad, x0=theta, args=func_args, method='L-BFGS-B', jac=True, options={ 'maxiter': 400, 'disp': True }) # STEP 5: Visualization w1 = res.x[0:hidden_size * visible_size].reshape( (visible_size, hidden_size)) display_network(w1.T, 5, save_figure_path='../data/sparse_autoencoder.png')
def __init__(self, scenario): self.readScenario(scenario) self.mLinkInsertMaxSpeedCapacity() # add max speed and capacity C = self.mLink2WeightedAdjacency(self.mLink, fieldNo=6) # capacity U = self.mLink2WeightedAdjacency(self.mLink, fieldNo=5) # max Speed L = self.mLink2WeightedAdjacency(self.mLink, fieldNo=3) # link distance S = ifn.capacity2stochastic(C) # Markov stochastic # first try at arbitrary kappa=100 pi = ifn.steadyStateMC(S, kappa=100) # node values F = ifn.idealFlow(S, pi) # ideal flow G = self.HadamardDivision(F, C) # congestion maxCongestion = np.max(G) if self.calibrationBasis == "flow": # calibrate with new kappa to reach totalFlow kappa = self.totalFlow else: # calibrationBasis=="congestion" # calibrate with new kappa to reach max congestion level kappa = 100 * float( self.maxAllowableCongestion) / maxCongestion # total flow # compute ideal flow and congestion pi = ifn.steadyStateMC(S, kappa) # node values F = ifn.idealFlow(S, pi) # scaled ideal flow G = self.HadamardDivision(F, C) # congestion maxCongestion = np.max(G) # compute link performances self.mLink = self.addFlow2mLink(self.mLink, F) # fieldNo=7 flow self.mLink = self.addFlow2mLink(self.mLink, G) # fieldNo=8 congestion level self.mLink = self.computeLinkPerformance( self.mLink, self.travelTimeModel, self.cloudNode) # fieldNo=9 to 11 # save output mLink mR, mC = self.mLink.shape fmt = "%d,%d,%d,%0.3f,%d,%0.3f,%d,%0.3f,%0.3f,%0.3f,%0.3f,%0.3f" header = "LinkNo,Node1,Node2,Dist,Lanes,MaxSpeed,Capacity,Flow,Congestion,Speed,TravelTime,Delay" mLink2 = self.mLink.T with open(self.folder + self.scenarioName + '.csv', 'w') as fh: for j in range(mC): col = mLink2[j, :] if j == 0: np.savetxt(fh, col.reshape(1, -1), fmt=fmt, header=header, delimiter=',') else: np.savetxt(fh, col.reshape(1, -1), fmt=fmt, delimiter=',') # network performance avgSpeed = np.nanmean(self.mLink[9, :]) avgTravelTime = np.nanmean(self.mLink[10, :]) avgDelay = np.nanmean(self.mLink[11, :]) avgDist = np.nanmean(self.mLink[3, :]) # save network performance with open(self.folder + self.scenarioName + '.net', 'w') as fh: fh.write("totalFlow=" + str(kappa) + "\n") # in pcu/hour fh.write("maxCongestion=" + str(maxCongestion) + "\n") fh.write("avgSpeed=" + str(avgSpeed) + "\n") # in km/hour fh.write("avgTravelTime=" + str(avgTravelTime) + "\n") # in hour fh.write("avgDelay=" + str(avgDelay) + "\n") # in hour fh.write("avgDist=" + str(avgDist) + "\n") # in meter # report print(self.scenarioName) print("Network performance:") print("\tTotal Flow = ", round(kappa, 2), " pcu/hour") print("\tMax Congestion = ", round(maxCongestion, 4)) print("\tAvg Link Speed =", round(avgSpeed, 4), " km/hour") print("\tAvg Link Travel Time = ", round(1000 * 60 * avgTravelTime / avgDist, 4), " min/km") print("\tAvg Link Delay = ", round(1000 * 3600 * avgDelay / avgDist, 4), " seconds/km") print("Basis:") print("\tAvg Link Distance = ", round(avgDist, 4), " m/link") print("\tAvg Link Travel Time = ", round(3600 * avgTravelTime, 4), " seconds/link") print("\tAvg Link Delay = ", round(3600 * avgDelay, 4), " seconds/link") arrThreshold = [0.8, 0.9, 1] plt = dn.display_network(self.mLink.T, 8, self.mNode.T, arrThreshold, 9) # display congestion plt.show()
a function that accepts one label argument (e.g. only get digits 5-9), a boolean (True if use only train/validation sets, False for test set), and a num_samples integer to choose only the first N samples. Only reads mnist data from a special pickled mnist file. Returns array of images with shape (784, num_images). """ training, valid, testing = read_mnist_file(filename) if train: t = np.array([e for e, l in izip(training[0], training[1]) if test(l)]) v = np.array([e for e, l in izip(valid[0], valid[1]) if test(l)]) images = np.vstack([t, v]).T tl = np.array([l for e, l in izip(training[0], training[1]) if test(l)]) vl = np.array([l for e, l in izip(valid[0], valid[1]) if test(l)]) labels = np.hstack([tl, vl]) else: t = testing images = np.array([e for e, l in izip(t[0], t[1]) if test(l)]).T labels = np.array([l for e, l in izip(t[0], t[1]) if test(l)]) assert images.shape[1] == len(labels) assert images.shape[0] == 784 patches = images[:, :num_samples] labels = labels[:num_samples] assert patches.shape[0] == 784 return patches, labels if __name__ == "__main__": train, valid, test = read_mnist_file("data/mnist.pkl.gz") display_network.display_network("mnist.png", train[0].T[:, :100])