def main(): # define global parameters filename = 'patches.mat' n_filters = 100 learn_rate = 0.001 iterations = [200] # load in data and preprocess print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = loadmat(file_path)['X'] data -= data.mean(axis=0) data = np.float32(data.T) # construct the network print "building model..." weights = tf.Variable(tf.random_uniform([n_filters, data.shape[1]])) model = SparseFilter(weights, data) # define loss, optimizer, and train function loss = tf.reduce_sum(model.feed_forward()) optimizer = tf.train.GradientDescentOptimizer(learn_rate) train = optimizer.minimize(loss) # initialize all the variables init = tf.initialize_all_variables() # run the session sess = tf.Session() sess.run(init) # train the sparse filtering network print "training network..." t = time.time() cost_running = [] # iterate over training epochs for epoch in xrange(iterations[0]): sess.run(train) current_cost = sess.run(loss) cost_running.append(current_cost) print("Cost at epoch %i: %0.4f" % (epoch, current_cost)) # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # plot the cost function over time c = {'layer0': cost_running} visualize.plotCost(c) # visualize the receptive fields of the first layer weights_final = sess.run(weights) print weights_final.shape visualize.drawplots(weights_final.T, color='gray', convolution='n', pad=0, examples=None, channels=1)
print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # save model to dictionary models[model_type[0]] = model # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # plot the cost function over time visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=convolution, pad=0, examples=None, channels=channels) # get activations of first layer and save in dictionary f_hat, _, _, _, _, _ = outputs[0](data) f_hats[model_type[0]] = f_hat # project activations of both networks up using local connections group_matrix = connections.gMatToroidal(n_filters,
import numpy as np from utilities.visualize import plotCost, drawplots from scipy.io import loadmat #################### MAIN SCRIPT ######################### # load data, normalize, and convert to float32 basepath = os.path.dirname(__file__) filename = 'patches.mat' filepath = os.path.join(basepath, "data", filename) data = loadmat(filepath)['X'] data -= data.mean(axis=0) data = np.float32(data) # construct the network wDims = [[100, 256]] model = sparse_filtering.network(model_type='SF', wDims=wDims, p=None, gMat=None, gSize=None, step=None, lr=0.01) train = model.training_functions(data) # train the sparse filtering network maxIter = 100 cost_master = [] for i in range(maxIter): cost, w = train[0]() cost_master.append(cost) print("The cost at iteration %i: %f" %(i, cost)) plotCost(cost_master) drawplots(w.T)
def main(): # parse options from the command line parser = argparse.ArgumentParser( prog='PROG', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ ------------------------------------------------------------------------------------------------------------- This is a deep neural network architecture for training sparse filters. Example uses: $ python test.py $ python test.py -m GroupSF -v 1 -g 3 -s 1 $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100 $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat -i 100 150 -t y -v 1 In the convolutional case, the extra "1" is added automatically for broadcasting. ------------------------------------------------------------------------------------------------------------- ''')) parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type") parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no") parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename") parser.add_argument( "-d", "--dimensions", type=int, nargs='+', default=([100, 256]), help= "the dimensions of the model: [neurons, input size] or [neurons, length, width]" ) parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions") parser.add_argument("-g", "--group", type=int, default=None, help="group size") parser.add_argument("-s", "--step", type=int, default=None, help="step size") parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate") parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations") parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots") parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS") parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'") parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'") parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data") parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples") parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch") parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'") args = parser.parse_args() args.dimensions = parse_dims(args) args.iterations = parse_iter(args) ''' =================================== Load in the data =================================== ''' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", args.filename) data = loadmat(file_path)['X'] # reshape and preprocess data print "pre-processing data ..." video = None if args.filename == 'patches_video.mat': video = data data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T if args.convolution == 'n': if args.whitening == 'y': data -= data.mean(axis=0) data = whiten(data) elif args.whitening == 'n' and args.channels == 1: data -= data.mean(axis=0) # elif args.whitening == 'n' and args.channels == 3: # data = np.float32(data) data = np.float32(data.T) elif args.convolution == 'y': if args.filename == 'kyotoData.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=9) elif args.filename == 'CIFAR_data.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=5) data = data[0:args.examples, :, :, :] elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat': data = np.float32( data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:args.examples, :, :, :] args.channels = data.shape[1] for channel in range(args.channels): data[:, channel, :, :] = np.reshape( scaling.LCNinput(data[:, channel, :, :].reshape( (data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), (data.shape[0], data.shape[2], data.shape[3])) # assert that batch size is valid and get number of batches n_batches, rem = divmod(data.shape[0], args.batch_size) assert rem == 0 # other assertions assert len(args.model) == len(args.iterations) if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF': assert args.group is not None assert args.step is not None ''' ============================= Build and train the network ============================= ''' # construct the network print "building model..." model = sf.Network(model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group, step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test, batch_size=args.batch_size ) # TODO: custom learning rates for each layer # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs if args.opt == 'GD': for epoch in xrange(args.iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) elif args.opt == 'L-BFGS': w = minimize(train[l], model.layers[l].w.eval().flatten(), method='L-BFGS-B', jac=True, options={ 'maxiter': args.iterations[l], 'disp': True }) if args.convolution == 'n': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1]) elif args.convolution == 'y': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1], args.dimensions[0][2], args.dimensions[0][3]) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model if args.aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif args.aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if args.aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if args.aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") for m in range(len(args.model)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, args.model[m], args.dimensions[m], args.iterations[m])) if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step)) ex = data.shape[0] if args.examples is not None: ex = args.examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if args.aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") ''' =============================== Verbosity Options ===================================== ''' # get variables and saves if args.verbosity >= 1: # # get variables of interest # activations_norm = {} # activations_raw = {} # activations_shuffled = {} # reconstruction = {} # error_recon = {} # pooled = {} # for l in xrange(len(args.dimensions)): # activations_norm['layer' + str(l)] = {} # activations_raw['layer' + str(l)] = {} # activations_shuffled['layer' + str(l)] = {} # reconstruction['layer' + str(l)] = {} # error_recon['layer' + str(l)] = {} # pooled['layer' + str(l)] = {} for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * args.batch_size end = begin + args.batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat # activations_raw['layer' + str(l)]['batch' + str(batch)] = f # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled # reconstruction['layer' + str(l)]['batch' + str(batch)] = err # error_recon['layer' + str(l)]['batch' + str(batch)] = rec # pooled['layer' + str(l)]['batch' + str(batch)] = p activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat activations_raw['layer' + str(l) + '_batch' + str(batch)] = f activations_shuffled['layer' + str(l) + '_batch' + str(batch)] = f_hat_shuffled reconstruction['layer' + str(l) + '_batch' + str(batch)] = err error_recon['layer' + str(l) + '_batch' + str(batch)] = rec pooled['layer' + str(l) + '_batch' + str(batch)] = p # save model as well as weights and activations separately savemat( directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_norm) savemat( directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_raw) if args.aws == 'y': k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') # savemat(directory_name + '/weights.mat', weights) # if args.aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size]) # # activations_norm['layer' + str(l)] = f_hat # activations_raw['layer' + str(l)] = f # activations_shuffled['layer' + str(l)] = f_hat_shuffled # reconstruction['layer' + str(l)] = err # error_recon['layer' + str(l)] = rec # pooled['layer' + str(l)] = p # # # save model as well as weights and activations separately # savemat(directory_name + '/weights.mat', weights) # savemat(directory_name + '/activations_norm.mat', activations_norm) # savemat(directory_name + '/activation_raw.mat', activations_raw) # display figures if args.verbosity == 2: # if GD, plot the cost function over time if args.opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution, pad=0, examples=None, channels=args.channels) # visualize the distribution of lifetime and population sparseness for l in xrange(len(args.dimensions)): layer = 'layer' + str(l) if args.convolution == 'n': visualize.dispSparseHist(activations_norm[layer], l) elif args.convolution == 'y': visualize.dispSparseHist(activations_shuffled[layer].reshape( args.dimensions[l][0], data.shape[0] * activations_shuffled[layer].shape[2] * activations_shuffled[layer].shape[3]), layer=l) # visualize the distribution of activity across the "cortical sheet" and reconstruction if args.filename == 'patches_video.mat': f_hat = activations_norm['layer0'].T.reshape( video.shape[0], video.shape[1], args.dimensions[0][0]) visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1) else: visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution, pad=1, examples=100) # # visualize reconstruction capabilities # if args.convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1) # elif args.convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=args.convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # additional visualizations for convolutional network if args.convolution == 'y': dim = activations_raw['layer0'].shape[2] # visualize an example of a convolved image visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # print activations_raw['layer0'] # visualize max-pooled activations and LCN output visualize.visualize_convolved_image( pooled['layer0'][0, :, :, :].reshape( 1, pooled['layer0'].shape[1], pooled['layer0'].shape[2], pooled['layer0'].shape[3]), dim=dim / 2) # visualize an example of a LCNed convolved image after max pooling # temp = activations_raw['layer0'] #[0, :, :, :] temp = pooled['layer0'] #[0, :, :, :] # print temp.shape for i in range(temp.shape[1]): temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape( (1, 1, dim / 2, dim / 2)), kernel_shape=5) # temp = scaling.LCNinput(temp, kernel_shape=5) visualize.visualize_convolved_image(temp, dim=dim / 2) # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if args.test == 'y': from sklearn import svm from sklearn.metrics import confusion_matrix train_labels = loadmat(file_path)['y'] file_path = os.path.join(base_path, "data", "CIFAR_test.mat") test_data = loadmat(file_path)['X'] test_labels = loadmat(file_path)['y'] # reshape and normalize the data if args.convolution == 'y': test_data = np.float32( test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])), int(np.sqrt(test_data.shape[1])))) test_data = scaling.LCNinput(test_data, kernel_shape=5) test_data = test_data[0:args.examples, :, :, :] # get SVM test results for pixels to last layer train_input = None for layer in range(model.n_layers + 1): # pixel inputs if layer == 0: test_input = test_data.reshape( test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = data.reshape( data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? test_input = test[layer - 1](test_data[0:args.batch_size]) test_input = test_input[0].reshape( test_input[0].shape[0], test_input[0].shape[1] * test_input[0].shape[2] * test_input[0].shape[3]) train_input = activations_norm['layer' + str(layer - 1)] train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # train linear support vector machine clf = svm.SVC(kernel="linear").fit( train_input, np.ravel(train_labels[0:args.examples])) # get predictions from SVM and calculate accuracy predictions = clf.predict(test_input) accuracy = clf.score(test_input, test_labels[0:args.examples]) # display results and log them print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) cm = confusion_matrix(test_labels[0:args.examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) log_file.close() # visualize the confusion matrix if args.test == 'y' and args.verbosity == 2: import pylab as pl pl.imshow(cm, interpolation='nearest') pl.title('Confusion Matrix for Network') pl.colorbar() pl.ylabel('True Label') pl.xlabel('Predicted Label') pl.show()
def main(): # get the folders in "saved" and select most recent base_path = os.path.dirname(__file__) folder_path = os.path.join(base_path, "saved") folders = os.listdir(folder_path) folder = folders[3] # select most frequent folder # -1 # load in activation data print "loading in the data..." file_path = os.path.join(folder_path, folder, "concatenated_activations.mat") # data = loadmat(file_path)['master'] # [examples, neurons, image-space] data = h5py.File(file_path, 'r')['master'] data = np.array(data) data = data.T print data.shape # TODO: scale and normalize data # load in data labels file_path = os.path.join(base_path, "data", "CIFAR_data.mat") train_labels = loadmat(file_path)['y'] # augment training_labels to account for extra examples in image-space y_labels = numpy.matlib.repmat(train_labels, 1, data.shape[2]).reshape((data.shape[0] * data.shape[2], 1)) # convert labels to binary vector lb = LabelBinarizer() lb.fit(train_labels) y_labels = lb.transform(y_labels) # perform neuron-wise regularized linear regression to obtain coefficients print "performing neuron-wise regularized linear regression..." neurons = data.shape[1] classes = 10 coefficients = np.zeros((neurons, classes)) for neuron in xrange(data.shape[1]): print neuron x = data[:, neuron, :].reshape(data.shape[0] * data.shape[2], 1) clf = Ridge(alpha=1.0) clf.fit(y_labels, x) coefficients[neuron, :] = clf.coef_ # save the coefficients c = {'coefficients': coefficients} coefficient_path = os.path.join(folder_path, folder, "coefficients.mat") savemat(coefficient_path, c) # visualize histogram of coefficients pl.hist(np.abs(coefficients.flatten()), bins=30) pl.title('Frequency Distribution of Coefficient Values') pl.xlabel('Coefficient Value') pl.ylabel('Frequency') pl.show() # todo: find the N sparse filters from the data model = ['SparseFilter'] n_filters = 10 input_dim = coefficients.shape[1] dimensions = ([n_filters, input_dim],) # number of filters equals number of classes pool = None group = None step = None learn_rate = .001 opt = 'GD' convolution = 'n' test = 'n' batch_size = 1000 random = 'n' weights = None iterations = 1000 channels = 1 n_batches = coefficients.shape[0] / batch_size if n_batches == 0: n_batches = 1 # construct the network print "building model..." model = sf.Network( model_type=model, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test, batch_size=batch_size, random=random, weights=weights ) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(np.float32(coefficients)) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} layer = None for l in xrange(model.n_layers): layer = 'layer' + str(l) cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost[layer] = cost_layer weights[layer] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # order the components based on their activations (proxy for amount of variance explained) activations, _, _, _, _, _ = outputs[0](np.float32(coefficients)) activations_summed = np.sum(np.abs(activations), axis=1) index = np.argsort(activations_summed) weights[layer] = weights[layer][index] # save the components (each column represents a component with each element the value for each object category) components_path = os.path.join(folder_path, folder, 'weights.mat') savemat(components_path, weights) # plot the cost function over time visualize.plotCost(cost) # visualize the components with respect to the object categories pl.imshow(weights[layer], interpolation='nearest') pl.title('Sparse Filtering Components') pl.xlabel('Weights') pl.ylabel('Filters') pl.xticks(np.arange(1, 10, 10)) pl.yticks(np.arange(1, 10, 10)) pl.show() # project the components back onto the cortical sheet (i.e., the dot product between each neuron's model # coefficients and each component) projections = activations visualize.drawplots(projections.T, color='gray', convolution=convolution, pad=0, examples=None, channels=channels) # todo: compare the similarity of adjacent neurons of different distances and visualize distance_measure = 'cityblock' max_distance = cdist(np.atleast_2d([0, 0]), np.atleast_2d([np.sqrt(neurons), np.sqrt(neurons)]), distance_measure) continuity_data = np.zeros((1, max_distance)) distances = distMat(neurons, d=neurons * 100, kind=distance_measure) pl.imshow(distances) pl.show() divisor = np.zeros((1, max_distance)) for i in xrange(neurons): for j in xrange(neurons): correlation = pearsonr(coefficients[i, :].T, coefficients[j, :].T) d = distances[i, j] print d, correlation continuity_data[0, d] += correlation[0] divisor[0, d] += 1 c += 1 correlation_averages = continuity_data / divisor correlation_averages = correlation_averages[~np.isnan(correlation_averages)] # correlation_std = np.std(continuity_data, axis=0) # correlation_std = correlation_std[~np.isnan(correlation_std)] # todo: allow computation of std temp_std = np.linspace(.2, .1, len(correlation_averages)) print temp_std print correlation_averages hypothetical_averages = [1., 0.7, 0.5, 0.4, 0.28, 0.21, 0.15, 0.09, 0.07, 0.05] hypothetical_stds = np.linspace(.07, .1, len(correlation_averages) - 1) fig = visualize.plot_mean_std(correlation_averages[0:10], temp_std[0:10], hypothetical_averages, hypothetical_stds) fig.show()
os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', {'cost': cost_layer}) if aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # save the target savemat(directory_name + '/target.mat', {'target': target}) if aws == 'y': k.key = directory_name + '/target.mat' k.set_contents_from_filename(directory_name + '/target.mat') os.remove(directory_name + '/target.mat') # save the inhibition savemat(directory_name + '/inhibition.mat', {'inhibition': a_out}) if aws == 'y': k.key = directory_name + '/inhibition.mat' k.set_contents_from_filename(directory_name + '/inhibition.mat') os.remove(directory_name + '/inhibition.mat') # plot the cost c = {'layer0': cost_layer} visualize.plotCost(c) # visualize the receptive fields of the first layer visualize.drawplots(weight.T, color='gray', convolution='n', pad=0, examples=None, channels=1)
def main(): # define global parameters model_type = ['SparseFilter'] convolution = 'n' filename = 'unlabeled_10000.mat' # 'STL_10_unlabeled_patches.h5' # _sample10' # 'patches.mat' # LCN # _raw10 # _raw1000 channels = 3 patch_size = 14 n_filters = 1600 # 1600 dimensions = ([n_filters, patch_size * patch_size * channels], ) # ([100, 256],) pool = None group = None step = None learn_rate = 0.0001 iterations = [100] # [50] # [1] verbosity = 2 opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 1000 # 360 # 8000 # todo: figure out why large batches produce nan cost # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = h5py.File(file_path, 'r')['patches'] data = np.array(data) data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) channel_length = patch_size**2 for channel in xrange(channels): start = channel * channel_length end = start + channel_length data[start:end] -= data[start:end].mean(axis=0) data[start:end] /= data[start:end].std(axis=0) + 1e-8 data[start:end] = whiten(data[start:end].T).T data = np.float32(data.T) # determine number of batches n_batches, rem = divmod(data.shape[0], batch_size) # construct the network print "building model..." model = sf.Network(model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random='y', weights=None) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[0]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32( np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) # save weights separately savemat(directory_name + '/weights.mat', weights) # create log file ex = None log_file = open(directory_name + "/log.txt", "wb") for m in range(len(model_type)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, model_type[m], dimensions[m], iterations[m])) if model == 'GroupSF' or model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (group, step)) ex = data.shape[0] if examples is not None: ex = examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() # get variables and save if verbosity >= 1: for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * batch_size end = begin + batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat activations_raw['layer' + str(l) + '_batch' + str(batch)] = f activations_shuffled['layer' + str(l) + '_batch' + str(batch)] = f_hat_shuffled reconstruction['layer' + str(l) + '_batch' + str(batch)] = err error_recon['layer' + str(l) + '_batch' + str(batch)] = rec pooled['layer' + str(l) + '_batch' + str(batch)] = p # save model as well as weights and activations separately savemat( directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_norm) savemat( directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_raw) # display figures if verbosity == 2: # if GD, plot the cost function over time if opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=convolution, pad=0, examples=None, channels=channels) # # visualize the distribution of lifetime and population sparseness # for l in xrange(len(dimensions)): # layer = 'layer' + str(l) # if convolution == 'n': # visualize.dispSparseHist(activations_norm[layer], l) # elif convolution == 'y': # visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0], # data.shape[0] * # activations_shuffled[layer].shape[2] * # activations_shuffled[layer].shape[3]), # layer=l) # # # visualize the distribution of activity across the "cortical sheet" and reconstruction # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution, # pad=1, examples=100) # # # visualize reconstruction capabilities # if convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1) # elif convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # # # additional visualizations for convolutional network # if convolution == 'y': # # dim = activations_raw['layer0'].shape[2] # # # visualize an example of a convolved image # visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # # print activations_raw['layer0'] # # # visualize max-pooled activations and LCN output # visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1, # pooled['layer0'].shape[1], # pooled['layer0'].shape[2], # pooled['layer0'].shape[3]), # dim=dim / 2) # # # visualize an example of a LCNed convolved image after max pooling # # temp = activations_raw['layer0'] #[0, :, :, :] # temp = pooled['layer0'] #[0, :, :, :] # # print temp.shape # for i in range(temp.shape[1]): # temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5) # # temp = scaling.LCNinput(temp, kernel_shape=5) # visualize.visualize_convolved_image(temp, dim=dim / 2) # # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if test_model == 'y': print 'testing...' from sklearn import svm from sklearn.metrics import confusion_matrix # set some new local parameters train_data_file = "STL_10_lcn_train.mat" train_labels_file = "train.mat" test_data_file = "STL_10_lcn_test.mat" test_labels_file = "test.mat" model_type = ["ConvolutionalSF"] dimensions = ([1, n_filters, patch_size, patch_size], ) convolution = 'y' test_model = 'y' batch_size = 100 # load in STL-10 training data (all pre-normalized using LCN) print "loading in training and test data..." file_path = os.path.join(base_path, "data", train_data_file) train_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", train_labels_file) train_labels = loadmat(file_path)['y'] # load in STL-10 test data (all pre-normalized using LCN) file_path = os.path.join(base_path, "data", test_data_file) test_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", test_labels_file) test_labels = loadmat(file_path)['y'] # read in the pre-defined fold indices file_path = os.path.join(base_path, "data", "train.mat") fold_indices = loadmat(file_path)['fold_indices'] fold_indices -= np.ones(fold_indices.shape) # make zero-index # initialize convolutional network with learned parameters from above old_weights = model.layers[0].w.eval().reshape( (-1, channels, patch_size, patch_size)) old_weights = theano.shared( old_weights.astype(dtype=theano.config.floatX)) test_model = sf.Network(model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random='y', weights=old_weights) # compile the training, output, and test functions for the network print "compiling theano functions..." _, _, test = test_model.training_functions(train_data) # train and test a SVM classifier for each layer (including pixels as baseline) accuracy = {} train_input = None test_input = None cm = None for layer in range( 1, 2 ): # range(test_model.n_layers + 1): # skipping pixels for now # create dictionary for layer accuracy['layer' + str(layer)] = {} # create quadrant pooling function based on size of output from layer quadrant_size = test[layer - 1](test_data[0]).shape[3] quad_pool = quadrant_pooling(quadrant_size) # loop over pre-defined folds n_folds = fold_indices.shape[1] for fold in xrange(n_folds): # get fold data train_data_fold = np.squeeze(train_data[fold_indices[0][fold]]) train_labels_fold = np.squeeze( train_labels[fold_indices[0][fold]]) # pixel inputs if layer == 0: if fold == 0: # only get test data once test_input = test_data.reshape( test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = train_data_fold.reshape( train_data_fold.shape[0], train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? if fold == 0: # only get test data once print "getting test data..." test_input = np.zeros( (test_data.shape[0], n_filters, 2, 2)) n_batches = test_data.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](test_data[batch_start:batch_end] ) # test_data[0:batch_size] temp = temp[0] # for i in xrange(2): # for j in xrange(2): # pool_size = 48 # i_start = i * pool_size # i_end = i_start + pool_size # j_start = j * pool_size # j_end = j_start + pool_size # test_input[batch_start:batch_end, :, i, j] = \ # np.sum( # temp[:, :, i_start:i_end, j_start:j_end], # axis=(2, 3) # ) test_input[batch_start:batch_end] = quad_pool(temp) test_input = test_input.reshape( test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]) print "getting training data..." # todo: also do quadrant pooling for training data (done) perhaps don't do batches here # train_input = test[layer - 1](train_data_fold) # test_data[0:batch_size] # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] * # train_input[0].shape[2] * train_input[0].shape[3]) train_input = np.zeros( (train_data_fold.shape[0], n_filters, 2, 2)) n_batches = train_data_fold.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](train_data_fold[batch_start:batch_end] ) # test_data[0:batch_size] temp = temp[0] # for i in xrange(2): # for j in xrange(2): # pool_size = 48 # i_start = i * pool_size # i_end = i_start + pool_size # j_start = j * pool_size # j_end = j_start + pool_size # train_input[batch_start:batch_end, :, i, j] = \ # np.sum( # temp[:, :, i_start:i_end, j_start:j_end], # axis=(2, 3) # ) train_input[batch_start:batch_end] = quad_pool(temp) train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # normalize the inputs for each dimension (zero-mean and unit-variance) if fold == 0: # only normalize test data once test_input -= test_input.mean(axis=1)[:, np.newaxis] test_input /= np.std(test_input, axis=1)[:, np.newaxis] train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # train linear support vector machine print("Training linear SVM...") clf = svm.SVC(kernel="linear").fit( train_input, np.ravel(train_labels_fold[0:examples])) # get predictions from SVM and calculate accuracy print("Making predictions...") predictions = clf.predict(test_input) accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score( test_input, test_labels[0:examples]) # display results and log them print( "Accuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])) cm = confusion_matrix(test_labels[0:examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])) log_file.close() # # visualize the confusion matrix # if test_model == 'y' and verbosity == 2: # # import pylab as pl # # pl.imshow(cm, interpolation='nearest') # pl.title('Confusion Matrix for Network') # pl.colorbar() # pl.ylabel('True Label') # pl.xlabel('Predicted Label') # pl.show() # save the test results savemat('accuracy', accuracy)
def main(): # parse options from the command line parser = argparse.ArgumentParser( prog='PROG', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ ------------------------------------------------------------------------------------------------------------- This is a deep neural network architecture for training sparse filters. Example uses: $ python test.py $ python test.py -m GroupSF -v 1 -g 3 -s 1 $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100 $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat -i 100 150 -t y -v 1 ------------------------------------------------------------------------------------------------------------- ''') ) parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type") parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no") parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename") parser.add_argument("-d", "--dimensions", type=int, nargs='+', default=([100, 256]), help="the dimensions of the model: [neurons, input size] or [neurons, length, width]") parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions") parser.add_argument("-g", "--group", type=int, default=None, help="group size") parser.add_argument("-s", "--step", type=int, default=None, help="step size") parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate") parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations") parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots") parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS") parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'") parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'") parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data") parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples") parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch") parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'") parser.add_argument("-r", "--random", default='n', help="type of batches: random = 'y'") args = parser.parse_args() args.dimensions = parse_dims(args) args.iterations = parse_iter(args) ''' =================================== Load in the data =================================== ''' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", args.filename) data = loadmat(file_path)['X'] # reshape and preprocess data print "pre-processing data ..." video = None if args.filename == 'patches_video.mat': video = data data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T if args.convolution == 'n': if args.whitening == 'y': data -= data.mean(axis=0) data = whiten(data.T).T elif args.whitening == 'n' and args.channels == 1: data -= data.mean(axis=0) # elif args.whitening == 'n' and args.channels == 3: # data = np.float32(data) data = np.float32(data.T) elif args.convolution == 'y': if args.filename == 'kyotoData.mat': data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=9) elif args.filename == 'CIFAR_data.mat': data = np.float32(data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=5) data = data[0:args.examples, :, :, :] elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat': data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:args.examples, :, :, :] args.channels = data.shape[1] for channel in range(args.channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3])) # assert that batch size is valid and get number of batches n_batches, rem = divmod(data.shape[0], args.batch_size) assert rem == 0 # other assertions assert len(args.model) == len(args.iterations) if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF': assert args.group is not None assert args.step is not None # assert that the number of neurons in each layer is a perfect square for layer in xrange(len(args.dimensions)): assert np.sqrt(args.dimensions[layer][0]) % np.floor(np.sqrt(args.dimensions[layer][0])) == 0 ''' ============================= Build and train the network ============================= ''' # construct the network print "building model..." model = sf.Network( model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group, step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test, batch_size=args.batch_size, random=args.random, weights=None ) # TODO: custom learning rates for each layer # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs if args.opt == 'GD': for epoch in xrange(args.iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) elif args.opt == 'L-BFGS': w = minimize(train[l], model.layers[l].w.eval().flatten(), method='L-BFGS-B', jac=True, options={'maxiter': args.iterations[l], 'disp': True}) if args.convolution == 'n': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1]) elif args.convolution == 'y': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1], args.dimensions[0][2], args.dimensions[0][3]) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model if args.aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif args.aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if args.aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if args.aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', cost) if args.aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args # for arg in args: # log_file.write( # args. # ) for m in range(len(args.model)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, args.model[m], args.dimensions[m], args.iterations[m]) ) if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF': log_file.write( " Groups: %d \n Step: %d" % (args.group, args.step) ) ex = data.shape[0] if args.examples is not None: ex = args.examples log_file.write( " Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening) ) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if args.aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") ''' =============================== Verbosity Options ===================================== ''' # get variables and saves if args.verbosity >= 1: # # get variables of interest # activations_norm = {} # activations_raw = {} # activations_shuffled = {} # reconstruction = {} # error_recon = {} # pooled = {} # for l in xrange(len(args.dimensions)): # activations_norm['layer' + str(l)] = {} # activations_raw['layer' + str(l)] = {} # activations_shuffled['layer' + str(l)] = {} # reconstruction['layer' + str(l)] = {} # error_recon['layer' + str(l)] = {} # pooled['layer' + str(l)] = {} for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * args.batch_size end = begin + args.batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat # activations_raw['layer' + str(l)]['batch' + str(batch)] = f # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled # reconstruction['layer' + str(l)]['batch' + str(batch)] = err # error_recon['layer' + str(l)]['batch' + str(batch)] = rec # pooled['layer' + str(l)]['batch' + str(batch)] = p # define [mini]batch title batch_title = 'layer' + str(l) + '_batch' + '%03d' % batch # define norm and raw file names norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat' raw_file_name = directory_name + '/activation_raw_' + batch_title + '.mat' activations_norm[batch_title] = f_hat activations_raw[batch_title] = f activations_shuffled[batch_title] = f_hat_shuffled reconstruction[batch_title] = err error_recon[batch_title] = rec pooled[batch_title] = p # save model as well as weights and activations separately savemat(norm_file_name, activations_norm) # savemat(raw_file_name, activations_raw) if args.aws == 'y': k.key = norm_file_name k.set_contents_from_filename(norm_file_name) os.remove(norm_file_name) # k.key = raw_file_name # k.set_contents_from_filename(raw_file_name) # os.remove(raw_file_name) # savemat(directory_name + '/weights.mat', weights) # if args.aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size]) # # activations_norm['layer' + str(l)] = f_hat # activations_raw['layer' + str(l)] = f # activations_shuffled['layer' + str(l)] = f_hat_shuffled # reconstruction['layer' + str(l)] = err # error_recon['layer' + str(l)] = rec # pooled['layer' + str(l)] = p # # # save model as well as weights and activations separately # savemat(directory_name + '/weights.mat', weights) # savemat(directory_name + '/activations_norm.mat', activations_norm) # savemat(directory_name + '/activation_raw.mat', activations_raw) # output helper file for concatenating activations helper = {'batches': n_batches, 'output_size': f_hat.shape} helper_file_name = directory_name + '/helper.mat' savemat(helper_file_name, helper) if args.aws == 'y': k.key = helper_file_name k.set_contents_from_filename(helper_file_name) os.remove(helper_file_name) # get data if not on AWS if args.aws == 'n': f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data) activations_norm = {"layer0": f_hat} # display figures if args.verbosity == 2: # if GD, plot the cost function over time if args.opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution, pad=0, examples=None, channels=args.channels) # visualize the distribution of lifetime and population sparseness for l in xrange(len(args.dimensions)): layer = 'layer' + str(l) if args.convolution == 'n': visualize.dispSparseHist(activations_norm[layer], l) elif args.convolution == 'y': visualize.dispSparseHist(activations_shuffled[layer].reshape(args.dimensions[l][0], data.shape[0] * activations_shuffled[layer].shape[2] * activations_shuffled[layer].shape[3]), layer=l) # visualize the distribution of activity across the "cortical sheet" and reconstruction if args.filename == 'patches_video.mat': f_hat = activations_norm['layer0'].T.reshape(video.shape[0], video.shape[1], args.dimensions[0][0]) visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1) else: visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution, pad=1, examples=100) # # visualize reconstruction capabilities # if args.convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1) # elif args.convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=args.convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # additional visualizations for convolutional network if args.convolution == 'y': dim = activations_raw['layer0'].shape[2] # visualize an example of a convolved image visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # print activations_raw['layer0'] # visualize max-pooled activations and LCN output visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1, pooled['layer0'].shape[1], pooled['layer0'].shape[2], pooled['layer0'].shape[3]), dim=dim / 2) # visualize an example of a LCNed convolved image after max pooling # temp = activations_raw['layer0'] #[0, :, :, :] temp = pooled['layer0'] #[0, :, :, :] # print temp.shape for i in range(temp.shape[1]): temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5) # temp = scaling.LCNinput(temp, kernel_shape=5) visualize.visualize_convolved_image(temp, dim=dim / 2) # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if args.test == 'y': from sklearn import svm from sklearn.metrics import confusion_matrix train_labels = loadmat(file_path)['y'] file_path = os.path.join(base_path, "data", "CIFAR_test.mat") test_data = loadmat(file_path)['X'] test_labels = loadmat(file_path)['y'] # reshape and normalize the data if args.convolution == 'y': test_data = np.float32(test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])), int(np.sqrt(test_data.shape[1])))) test_data = scaling.LCNinput(test_data, kernel_shape=5) test_data = test_data[0:args.examples, :, :, :] # get SVM test results for pixels to last layer train_input = None for layer in range(model.n_layers + 1): # pixel inputs if layer == 0: test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = data.reshape(data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? test_input = test[layer - 1](test_data[0:args.batch_size]) test_input = test_input[0].reshape(test_input[0].shape[0], test_input[0].shape[1] * test_input[0].shape[2] * test_input[0].shape[3]) train_input = activations_norm['layer' + str(layer - 1)] train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # train linear support vector machine clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels[0:args.examples])) # get predictions from SVM and calculate accuracy predictions = clf.predict(test_input) accuracy = clf.score(test_input, test_labels[0:args.examples]) # display results and log them print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) cm = confusion_matrix(test_labels[0:args.examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write( "\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy) ) log_file.close() # visualize the confusion matrix if args.test == 'y' and args.verbosity == 2: import pylab as pl pl.imshow(cm, interpolation='nearest') pl.title('Confusion Matrix for Network') pl.colorbar() pl.ylabel('True Label') pl.xlabel('Predicted Label') pl.show()
def main(): # define global parameters model_type = ["SparseFilter"] convolution = "n" filename = "unlabeled_10000.mat" # 'STL_10_unlabeled_patches.h5' # _sample10' # 'patches.mat' # LCN # _raw10 # _raw1000 channels = 3 patch_size = 14 n_filters = 1600 # 1600 dimensions = ([n_filters, patch_size * patch_size * channels],) # ([100, 256],) pool = None group = None step = None learn_rate = 0.0001 iterations = [100] # [50] # [1] verbosity = 2 opt = "GD" whitening = "y" test_model = "y" examples = None batch_size = 1000 # 360 # 8000 # todo: figure out why large batches produce nan cost # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = h5py.File(file_path, "r")["patches"] data = np.array(data) data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) channel_length = patch_size ** 2 for channel in xrange(channels): start = channel * channel_length end = start + channel_length data[start:end] -= data[start:end].mean(axis=0) data[start:end] /= data[start:end].std(axis=0) + 1e-8 data[start:end] = whiten(data[start:end].T).T data = np.float32(data.T) # determine number of batches n_batches, rem = divmod(data.shape[0], batch_size) # construct the network print "building model..." model = sf.Network( model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random="y", weights=None, ) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[0]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32(np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print ("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost["layer" + str(l)] = cost_layer weights["layer" + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print ("Elapsed training time: %f" % elapsed) # create sub-folder for saved model directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + "/model.pkl" pickle.dump(model, open(full_path, "w"), pickle.HIGHEST_PROTOCOL) # save weights separately savemat(directory_name + "/weights.mat", weights) # create log file ex = None log_file = open(directory_name + "/log.txt", "wb") for m in range(len(model_type)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, model_type[m], dimensions[m], iterations[m]) ) if model == "GroupSF" or model == "GroupConvolutionalSF": log_file.write(" Groups: %d \n Step: %d" % (group, step)) ex = data.shape[0] if examples is not None: ex = examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)) log_file.write("\nElapsed training time: %f" % elapsed) log_file.close() # get variables and save if verbosity >= 1: for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * batch_size end = begin + batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) activations_norm["layer" + str(l) + "_batch" + str(batch)] = f_hat activations_raw["layer" + str(l) + "_batch" + str(batch)] = f activations_shuffled["layer" + str(l) + "_batch" + str(batch)] = f_hat_shuffled reconstruction["layer" + str(l) + "_batch" + str(batch)] = err error_recon["layer" + str(l) + "_batch" + str(batch)] = rec pooled["layer" + str(l) + "_batch" + str(batch)] = p # save model as well as weights and activations separately savemat( directory_name + "/activations_norm_" + "layer" + str(l) + "_batch" + str(batch) + ".mat", activations_norm, ) savemat( directory_name + "/activation_raw_" + "layer" + str(l) + "_batch" + str(batch) + ".mat", activations_raw ) # display figures if verbosity == 2: # if GD, plot the cost function over time if opt == "GD": visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots( weights["layer0"].T, color="gray", convolution=convolution, pad=0, examples=None, channels=channels ) # # visualize the distribution of lifetime and population sparseness # for l in xrange(len(dimensions)): # layer = 'layer' + str(l) # if convolution == 'n': # visualize.dispSparseHist(activations_norm[layer], l) # elif convolution == 'y': # visualize.dispSparseHist(activations_shuffled[layer].reshape(dimensions[l][0], # data.shape[0] * # activations_shuffled[layer].shape[2] * # activations_shuffled[layer].shape[3]), # layer=l) # # # visualize the distribution of activity across the "cortical sheet" and reconstruction # visualize.drawplots(activations_norm['layer0'], color='gray', convolution=convolution, # pad=1, examples=100) # # # visualize reconstruction capabilities # if convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', convolution, 1) # elif convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # # # additional visualizations for convolutional network # if convolution == 'y': # # dim = activations_raw['layer0'].shape[2] # # # visualize an example of a convolved image # visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # # print activations_raw['layer0'] # # # visualize max-pooled activations and LCN output # visualize.visualize_convolved_image(pooled['layer0'][0, :, :, :].reshape(1, # pooled['layer0'].shape[1], # pooled['layer0'].shape[2], # pooled['layer0'].shape[3]), # dim=dim / 2) # # # visualize an example of a LCNed convolved image after max pooling # # temp = activations_raw['layer0'] #[0, :, :, :] # temp = pooled['layer0'] #[0, :, :, :] # # print temp.shape # for i in range(temp.shape[1]): # temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape((1, 1, dim / 2, dim / 2)), kernel_shape=5) # # temp = scaling.LCNinput(temp, kernel_shape=5) # visualize.visualize_convolved_image(temp, dim=dim / 2) # # print temp """ ================================ Test the Model ======================================= """ # test the model if evaluating classification performance if test_model == "y": print "testing..." from sklearn import svm from sklearn.metrics import confusion_matrix # set some new local parameters train_data_file = "STL_10_lcn_train.mat" train_labels_file = "train.mat" test_data_file = "STL_10_lcn_test.mat" test_labels_file = "test.mat" model_type = ["ConvolutionalSF"] dimensions = ([1, n_filters, patch_size, patch_size],) convolution = "y" test_model = "y" batch_size = 100 # load in STL-10 training data (all pre-normalized using LCN) print "loading in training and test data..." file_path = os.path.join(base_path, "data", train_data_file) train_data = loadmat(file_path)["X"] file_path = os.path.join(base_path, "data", train_labels_file) train_labels = loadmat(file_path)["y"] # load in STL-10 test data (all pre-normalized using LCN) file_path = os.path.join(base_path, "data", test_data_file) test_data = loadmat(file_path)["X"] file_path = os.path.join(base_path, "data", test_labels_file) test_labels = loadmat(file_path)["y"] # read in the pre-defined fold indices file_path = os.path.join(base_path, "data", "train.mat") fold_indices = loadmat(file_path)["fold_indices"] fold_indices -= np.ones(fold_indices.shape) # make zero-index # initialize convolutional network with learned parameters from above old_weights = model.layers[0].w.eval().reshape((-1, channels, patch_size, patch_size)) old_weights = theano.shared(old_weights.astype(dtype=theano.config.floatX)) test_model = sf.Network( model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random="y", weights=old_weights, ) # compile the training, output, and test functions for the network print "compiling theano functions..." _, _, test = test_model.training_functions(train_data) # train and test a SVM classifier for each layer (including pixels as baseline) accuracy = {} train_input = None test_input = None cm = None for layer in range(1, 2): # range(test_model.n_layers + 1): # skipping pixels for now # create dictionary for layer accuracy["layer" + str(layer)] = {} # create quadrant pooling function based on size of output from layer quadrant_size = test[layer - 1](test_data[0]).shape[3] quad_pool = quadrant_pooling(quadrant_size) # loop over pre-defined folds n_folds = fold_indices.shape[1] for fold in xrange(n_folds): # get fold data train_data_fold = np.squeeze(train_data[fold_indices[0][fold]]) train_labels_fold = np.squeeze(train_labels[fold_indices[0][fold]]) # pixel inputs if layer == 0: if fold == 0: # only get test data once test_input = test_data.reshape( test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3] ) train_input = train_data_fold.reshape( train_data_fold.shape[0], train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3], ) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? if fold == 0: # only get test data once print "getting test data..." test_input = np.zeros((test_data.shape[0], n_filters, 2, 2)) n_batches = test_data.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](test_data[batch_start:batch_end]) # test_data[0:batch_size] temp = temp[0] # for i in xrange(2): # for j in xrange(2): # pool_size = 48 # i_start = i * pool_size # i_end = i_start + pool_size # j_start = j * pool_size # j_end = j_start + pool_size # test_input[batch_start:batch_end, :, i, j] = \ # np.sum( # temp[:, :, i_start:i_end, j_start:j_end], # axis=(2, 3) # ) test_input[batch_start:batch_end] = quad_pool(temp) test_input = test_input.reshape( test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3] ) print "getting training data..." # todo: also do quadrant pooling for training data (done) perhaps don't do batches here # train_input = test[layer - 1](train_data_fold) # test_data[0:batch_size] # train_input = train_input[0].reshape(train_input[0].shape[0], train_input[0].shape[1] * # train_input[0].shape[2] * train_input[0].shape[3]) train_input = np.zeros((train_data_fold.shape[0], n_filters, 2, 2)) n_batches = train_data_fold.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](train_data_fold[batch_start:batch_end]) # test_data[0:batch_size] temp = temp[0] # for i in xrange(2): # for j in xrange(2): # pool_size = 48 # i_start = i * pool_size # i_end = i_start + pool_size # j_start = j * pool_size # j_end = j_start + pool_size # train_input[batch_start:batch_end, :, i, j] = \ # np.sum( # temp[:, :, i_start:i_end, j_start:j_end], # axis=(2, 3) # ) train_input[batch_start:batch_end] = quad_pool(temp) train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3] ) # normalize the inputs for each dimension (zero-mean and unit-variance) if fold == 0: # only normalize test data once test_input -= test_input.mean(axis=1)[:, np.newaxis] test_input /= np.std(test_input, axis=1)[:, np.newaxis] train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # train linear support vector machine print ("Training linear SVM...") clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples])) # get predictions from SVM and calculate accuracy print ("Making predictions...") predictions = clf.predict(test_input) accuracy["layer" + str(layer)]["fold" + str(fold)] = clf.score(test_input, test_labels[0:examples]) # display results and log them print ( "Accuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)]) ) cm = confusion_matrix(test_labels[0:examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy["layer" + str(layer)]["fold" + str(fold)]) ) log_file.close() # # visualize the confusion matrix # if test_model == 'y' and verbosity == 2: # # import pylab as pl # # pl.imshow(cm, interpolation='nearest') # pl.title('Confusion Matrix for Network') # pl.colorbar() # pl.ylabel('True Label') # pl.xlabel('Predicted Label') # pl.show() # save the test results savemat("accuracy", accuracy)