def main(): # define global parameters model_type = [ 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', # 'ConvolutionalSF' ] convolution = 'y' filename = "unlabeled_10000.mat" # train # unlabeled # STL_10_lcn_unlabeled.mat.h5 channels = 3 patch_size = 14 n_filters = [ 100, 400, 1600, 6400 # 800, # 1600 ] # # [100, 400, 1600, 6400, 25600] # 1600 # increasing neurons x4 maintains dimensionality dimensions = ( [n_filters[0], channels, 11, 11], [n_filters[1], n_filters[0], 4, 4], [n_filters[2], n_filters[1], 3, 3], [n_filters[3], n_filters[2], 2, 2], # [n_filters[4], n_filters[3], 3, 3] ) # ([n_filters, patch_size * patch_size * channels],) # ([100, 256],) pool = None group = None step = None learn_rate = 0.001 # 0.0001 iterations = [ 3, 3, 2, 2 # 1, # 1 ] # [5, 5, 5] # [50] # [100] verbosity = 0 opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 100 # 360 # 8000 lcn_kernel = [ 5, 5, 3, 3 ] # these may have to be odd values so that there is a middle aws = 'y' # # # load in data # print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = None if filename == 'train.mat' or filename == 'unlabeled_10000.mat': data = loadmat(file_path)['X'] elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5': data = h5py.File(file_path, 'r')['X'] data = np.array(data) data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) print "pre-processing data..." data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:examples, :, :, :] print data.shape if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat': for channel in range(channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3]) ) # # # determine number of batches # n_batches, rem = divmod(data.shape[0], batch_size) # # # construct the network # print "building model..." # model = sf.Network( # model_type=model_type, # weight_dims=dimensions, # p=pool, # group_size=group, # step=step, # lr=learn_rate, # opt=opt, # c=convolution, # test=test_model, # batch_size=batch_size, # random='y', # weights=None, # lcn_kernel=lcn_kernel # ) # # # compile the training, output, and test functions for the network # print "compiling theano functions..." # train, outputs, test = model.training_functions(data) # # # train the sparse filtering network # print "training network..." # start_time = time.time() # cost = {} # weights = {} # for l in xrange(model.n_layers): # # cost_layer = [] # w = None # # # iterate over training epochs # for epoch in xrange(iterations[l]): # # # go though [mini]batches # for batch_index in xrange(n_batches): # # # create index for random [mini]batch # index = np.int32(np.random.randint(data.shape[0], size=batch_size)) # # c, w = train[l](index=index) # cost_layer.append(c) # print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # # # add layer cost and weights to the dictionaries # cost['layer' + str(l)] = cost_layer # weights['layer' + str(l)] = w # # # calculate and display elapsed training time # elapsed = time.time() - start_time # print('Elapsed training time: %f' % elapsed) # # create sub-folder for saved model directory_name = None if aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) # directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" # directory_name = directory_format % time.localtime()[0:6] directory_name = "./saved/2016-01-25_19h17m41s" # os.mkdir(directory_name) # # # save the model for later use # full_path = directory_name + '/model.pkl' # pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) # if aws == 'y': # k.key = full_path # k.set_contents_from_filename(full_path) # os.remove(full_path) # # # save weights separately # savemat(directory_name + '/weights.mat', weights) # if aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # # save the cost functions # savemat(directory_name + '/cost.mat', cost) # if aws == 'y': # k.key = directory_name + '/cost.mat' # k.set_contents_from_filename(directory_name + '/cost.mat') # os.remove(directory_name + '/cost.mat') # # # create log file # log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args # for m in range(len(model_type)): # log_file.write( # "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, # model_type[m], # dimensions[m], # iterations[m]) # ) # if model == 'GroupSF' or model == 'GroupConvolutionalSF': # log_file.write( # " Groups: %d \n Step: %d" % (group, step) # ) # ex = data.shape[0] # if examples is not None: # ex = examples # # log_file.write( # " Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening) # ) # log_file.write('\nElapsed training time: %f' % elapsed) # log_file.close() # if aws == 'y': # k.key = directory_name + "/log.txt" # k.set_contents_from_filename(directory_name + "/log.txt") # os.remove(directory_name + "/log.txt") ''' ================================ Test the Model ======================================= ''' # todo: train a model and save it; then load in the model and test it so that grid search can be performed # load in the model if aws == 'y': k.key = directory_name + '/model.pkl' # model = k.read(k.key) model = pickle.loads(k.get_contents_as_string()) # open(model, 'rb') # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # test the model if evaluating classification performance if test_model == 'y': print 'testing...' from sklearn import svm # set some new local parameters train_data_file = "STL_10_lcn_train.mat" # "train.mat" train_labels_file = "train.mat" test_data_file = "STL_10_lcn_test.mat" # "test.mat" test_labels_file = "test.mat" batch_size = 100 # todo: read in lcn data # load in STL-10 training data (all pre-normalized using LCN) print "loading in training and test data..." file_path = os.path.join(base_path, "data", train_data_file) train_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", train_labels_file) train_labels = loadmat(file_path)['y'] # load in STL-10 test data (all pre-normalized using LCN) file_path = os.path.join(base_path, "data", test_data_file) test_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", test_labels_file) test_labels = loadmat(file_path)['y'] # # preproces training and test data # print "preprocessing training and test data..." # print train_data.shape # train_data = np.float32(train_data.reshape(-1, # 3, # int(np.sqrt(train_data.shape[1] / 3)), # int(np.sqrt(train_data.shape[1] / 3))) # ) # print train_data.shape # for channel in range(channels): # train_data[:, channel, :, :] = np.reshape(scaling.LCNinput(train_data[:, channel, :, :]. # reshape((train_data.shape[0], 1, # train_data.shape[2], # train_data.shape[3])), # kernel_shape=9), ( # train_data.shape[0], # train_data.shape[2], # train_data.shape[3])) # # test_data = np.float32(test_data.reshape(-1, # 3, # int(np.sqrt(test_data.shape[1] / 3)), # int(np.sqrt(test_data.shape[1] / 3))) # ) # for channel in range(channels): # test_data[:, channel, :, :] = np.reshape(scaling.LCNinput(test_data[:, channel, :, :]. # reshape((test_data.shape[0], 1, # test_data.shape[2], # test_data.shape[3])), # kernel_shape=9), ( # test_data.shape[0], # test_data.shape[2], # test_data.shape[3])) # read in the pre-defined fold indices file_path = os.path.join(base_path, "data", "train.mat") fold_indices = loadmat(file_path)['fold_indices'] fold_indices -= np.ones(fold_indices.shape) # make zero-index # train and test a SVM classifier for each layer (including pixels as baseline) accuracy = {} train_input = None test_input = None cm = None c_parameters = [0.02, 0.005, 0.002, 0.001] for layer in range(1, model.n_layers + 1): # range(test_model.n_layers + 1): # skipping pixels for now # create dictionary for layer and list for calculations accuracy['layer' + str(layer)] = {} accuracy_list = [] # create quadrant pooling function based on size of output from layer quadrant_size = test[layer - 1](test_data[0, :, :, :].reshape((1, 3, 96, 96)))[0].shape[3] / 2 print quadrant_size quad_pool = quadrant_pooling(quadrant_size) # loop over pre-defined folds n_folds = fold_indices.shape[1] for fold in xrange(n_folds): # get fold data fold_index = fold_indices[0][fold].astype('int') train_data_fold = np.squeeze(train_data[fold_index]) train_labels_fold = np.squeeze(train_labels[fold_index]) # pixel inputs if layer == 0: if fold == 0: # only get test data once test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = train_data_fold.reshape(train_data_fold.shape[0], train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? if fold == 0: # only get test data once print "getting test data..." test_input = np.zeros((test_data.shape[0], n_filters[layer - 1], 2, 2)) n_batches = test_data.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](test_data[batch_start:batch_end]) temp = temp[0] test_input[batch_start:batch_end] = quad_pool(temp)[0] test_input = test_input.reshape(test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]) print "getting training data..." train_input = np.zeros((train_data_fold.shape[0], n_filters[layer - 1], 2, 2)) n_batches = train_data_fold.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](train_data_fold[batch_start:batch_end]) temp = temp[0] train_input[batch_start:batch_end] = quad_pool(temp)[0] train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # normalize the inputs for each dimension (zero-mean and unit-variance) if fold == 0: # only normalize test data once test_input -= test_input.mean(axis=1)[:, np.newaxis] test_input /= np.std(test_input, axis=1)[:, np.newaxis] train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # train linear support vector machine print("Training linear SVM...") clf = svm.SVC(C=c_parameters[layer - 1], kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples])) # get predictions from SVM and calculate accuracy print("Making predictions...") accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(test_input, test_labels[0:examples]) accuracy_list.append(accuracy['layer' + str(layer)]['fold' + str(fold)]) training_accuracy = clf.score(train_input, np.ravel(train_labels_fold[0:examples])) # display results and log them print("Accuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])) print "classification performance on training set: %0.4f" % training_accuracy log_file = open(directory_name + "/log_test.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]) ) log_file.close() # calculate and print out average accuracy and std avg = np.mean(accuracy_list) std = np.std(accuracy_list) print "The overall accuracy of layer %d: %0.4f +/- (%0.4f)" % (layer, float(avg), float(std)) log_file = open(directory_name + "/log_test.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]) ) log_file.close() # save for aws if aws == 'y': k.key = directory_name + "/log_test.txt" k.set_contents_from_filename(directory_name + "/log_test.txt") # save the test results savemat('accuracy', accuracy)
def main(): # parse options from the command line parser = argparse.ArgumentParser( prog='PROG', formatter_class=argparse.RawDescriptionHelpFormatter, description=textwrap.dedent('''\ ------------------------------------------------------------------------------------------------------------- This is a deep neural network architecture for training sparse filters. Example uses: $ python test.py $ python test.py -m GroupSF -v 1 -g 3 -s 1 $ python test.py -m ConvolutionalSF -d 16 1 8 8 -v 1 -w y -c y -f CIFAR_data.mat -i 100 $ python test.py -m ConvolutionalSF ConvolutionalSF -d 16 1 6 6 16 16 4 4 -w y -c y -f CIFAR_data.mat -i 100 150 -t y -v 1 In the convolutional case, the extra "1" is added automatically for broadcasting. ------------------------------------------------------------------------------------------------------------- ''')) parser.add_argument("-m", "--model", default=['SparseFilter'], nargs='+', help="the model type") parser.add_argument("-c", "--convolution", default="n", help="convolution, yes or no") parser.add_argument("-f", "--filename", default="patches.mat", help="the data filename") parser.add_argument( "-d", "--dimensions", type=int, nargs='+', default=([100, 256]), help= "the dimensions of the model: [neurons, input size] or [neurons, length, width]" ) parser.add_argument("-p", "--pool", type=int, nargs='+', default=None, help="pooling dimensions") parser.add_argument("-g", "--group", type=int, default=None, help="group size") parser.add_argument("-s", "--step", type=int, default=None, help="step size") parser.add_argument("-l", "--learn_rate", type=float, default=.001, help="learning rate") parser.add_argument("-i", "--iterations", type=int, nargs='+', default=[100], help="number of iterations") parser.add_argument("-v", "--verbosity", type=int, default=0, help="verbosity: 0 no plot; 1 plots") parser.add_argument("-o", "--opt", default="GD", help="optimization method: GD or L-BFGS") parser.add_argument("-w", "--whitening", default='n', help="whitening: 'y' or 'n'") parser.add_argument("-t", "--test", default='n', help="test classification performance: 'y' or 'n'") parser.add_argument("-a", "--channels", type=int, default=1, help="number of channels in data") parser.add_argument("-e", "--examples", type=int, default=None, help="number of training examples") parser.add_argument("-b", "--batch_size", type=int, default=1000, help="number of examples in [mini]batch") parser.add_argument("-z", "--aws", default='n', help="run on aws: 'y' or 'n'") args = parser.parse_args() args.dimensions = parse_dims(args) args.iterations = parse_iter(args) ''' =================================== Load in the data =================================== ''' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", args.filename) data = loadmat(file_path)['X'] # reshape and preprocess data print "pre-processing data ..." video = None if args.filename == 'patches_video.mat': video = data data = data.reshape(data.shape[0] * data.shape[1], data.shape[2]).T if args.convolution == 'n': if args.whitening == 'y': data -= data.mean(axis=0) data = whiten(data) elif args.whitening == 'n' and args.channels == 1: data -= data.mean(axis=0) # elif args.whitening == 'n' and args.channels == 3: # data = np.float32(data) data = np.float32(data.T) elif args.convolution == 'y': if args.filename == 'kyotoData.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=9) elif args.filename == 'CIFAR_data.mat': data = np.float32( data.reshape(-1, 1, int(np.sqrt(data.shape[1])), int(np.sqrt(data.shape[1])))) data = scaling.LCNinput(data, kernel_shape=5) data = data[0:args.examples, :, :, :] elif args.filename == 'STL_10.mat' or args.filename == 'Lenna.mat': data = np.float32( data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:args.examples, :, :, :] args.channels = data.shape[1] for channel in range(args.channels): data[:, channel, :, :] = np.reshape( scaling.LCNinput(data[:, channel, :, :].reshape( (data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), (data.shape[0], data.shape[2], data.shape[3])) # assert that batch size is valid and get number of batches n_batches, rem = divmod(data.shape[0], args.batch_size) assert rem == 0 # other assertions assert len(args.model) == len(args.iterations) if args.model[0] == 'GroupSF' or args.model[0] == 'GroupConvolutionalSF': assert args.group is not None assert args.step is not None ''' ============================= Build and train the network ============================= ''' # construct the network print "building model..." model = sf.Network(model_type=args.model, weight_dims=args.dimensions, p=args.pool, group_size=args.group, step=args.step, lr=args.learn_rate, opt=args.opt, c=args.convolution, test=args.test, batch_size=args.batch_size ) # TODO: custom learning rates for each layer # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." t = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs if args.opt == 'GD': for epoch in xrange(args.iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): c, w = train[l](index=batch_index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) elif args.opt == 'L-BFGS': w = minimize(train[l], model.layers[l].w.eval().flatten(), method='L-BFGS-B', jac=True, options={ 'maxiter': args.iterations[l], 'disp': True }) if args.convolution == 'n': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1]) elif args.convolution == 'y': w = w.x.reshape(args.dimensions[0][0], args.dimensions[0][1], args.dimensions[0][2], args.dimensions[0][3]) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - t print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model if args.aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif args.aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if args.aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if args.aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") for m in range(len(args.model)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, args.model[m], args.dimensions[m], args.iterations[m])) if args.model == 'GroupSF' or args.model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (args.group, args.step)) ex = data.shape[0] if args.examples is not None: ex = args.examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (args.filename, ex, args.whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if args.aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") ''' =============================== Verbosity Options ===================================== ''' # get variables and saves if args.verbosity >= 1: # # get variables of interest # activations_norm = {} # activations_raw = {} # activations_shuffled = {} # reconstruction = {} # error_recon = {} # pooled = {} # for l in xrange(len(args.dimensions)): # activations_norm['layer' + str(l)] = {} # activations_raw['layer' + str(l)] = {} # activations_shuffled['layer' + str(l)] = {} # reconstruction['layer' + str(l)] = {} # error_recon['layer' + str(l)] = {} # pooled['layer' + str(l)] = {} for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * args.batch_size end = begin + args.batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1](data[begin:end]) # activations_norm['layer' + str(l)]['batch' + str(batch)] = f_hat # activations_raw['layer' + str(l)]['batch' + str(batch)] = f # activations_shuffled['layer' + str(l)]['batch' + str(batch)] = f_hat_shuffled # reconstruction['layer' + str(l)]['batch' + str(batch)] = err # error_recon['layer' + str(l)]['batch' + str(batch)] = rec # pooled['layer' + str(l)]['batch' + str(batch)] = p activations_norm['layer' + str(l) + '_batch' + str(batch)] = f_hat activations_raw['layer' + str(l) + '_batch' + str(batch)] = f activations_shuffled['layer' + str(l) + '_batch' + str(batch)] = f_hat_shuffled reconstruction['layer' + str(l) + '_batch' + str(batch)] = err error_recon['layer' + str(l) + '_batch' + str(batch)] = rec pooled['layer' + str(l) + '_batch' + str(batch)] = p # save model as well as weights and activations separately savemat( directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_norm) savemat( directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat', activations_raw) if args.aws == 'y': k.key = directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activations_norm_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') k.key = directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + \ str(batch) + '.mat' k.set_contents_from_filename(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') os.remove(directory_name + '/activation_raw_' + 'layer' + str(l) + '_batch' + str(batch) + '.mat') # savemat(directory_name + '/weights.mat', weights) # if args.aws == 'y': # k.key = directory_name + '/weights.mat' # k.set_contents_from_filename(directory_name + '/weights.mat') # os.remove(directory_name + '/weights.mat') # # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l](data[0:args.batch_size]) # # activations_norm['layer' + str(l)] = f_hat # activations_raw['layer' + str(l)] = f # activations_shuffled['layer' + str(l)] = f_hat_shuffled # reconstruction['layer' + str(l)] = err # error_recon['layer' + str(l)] = rec # pooled['layer' + str(l)] = p # # # save model as well as weights and activations separately # savemat(directory_name + '/weights.mat', weights) # savemat(directory_name + '/activations_norm.mat', activations_norm) # savemat(directory_name + '/activation_raw.mat', activations_raw) # display figures if args.verbosity == 2: # if GD, plot the cost function over time if args.opt == 'GD': visualize.plotCost(cost) # visualize the receptive fields of the first layer visualize.drawplots(weights['layer0'].T, color='gray', convolution=args.convolution, pad=0, examples=None, channels=args.channels) # visualize the distribution of lifetime and population sparseness for l in xrange(len(args.dimensions)): layer = 'layer' + str(l) if args.convolution == 'n': visualize.dispSparseHist(activations_norm[layer], l) elif args.convolution == 'y': visualize.dispSparseHist(activations_shuffled[layer].reshape( args.dimensions[l][0], data.shape[0] * activations_shuffled[layer].shape[2] * activations_shuffled[layer].shape[3]), layer=l) # visualize the distribution of activity across the "cortical sheet" and reconstruction if args.filename == 'patches_video.mat': f_hat = activations_norm['layer0'].T.reshape( video.shape[0], video.shape[1], args.dimensions[0][0]) visualize.videoCortex(f_hat[0:100, :, :], 'y', args.convolution, 1) else: visualize.drawplots(activations_norm['layer0'], color='gray', convolution=args.convolution, pad=1, examples=100) # # visualize reconstruction capabilities # if args.convolution == 'n': # visualize.drawReconstruction(data[:, 0:100], error_recon['layer0'][:, 0:100], 'y', args.convolution, 1) # elif args.convolution == 'y': # visualize.convolutional_reconstruction(data[0, :, :, :], activations_raw['layer0'], weights['layer0'], # color='gray', convolution=args.convolution) # print('Reconstructed error: %e' % reconstruction['layer0']) # additional visualizations for convolutional network if args.convolution == 'y': dim = activations_raw['layer0'].shape[2] # visualize an example of a convolved image visualize.visualize_convolved_image(activations_raw['layer0'], dim=dim) # print activations_raw['layer0'] # visualize max-pooled activations and LCN output visualize.visualize_convolved_image( pooled['layer0'][0, :, :, :].reshape( 1, pooled['layer0'].shape[1], pooled['layer0'].shape[2], pooled['layer0'].shape[3]), dim=dim / 2) # visualize an example of a LCNed convolved image after max pooling # temp = activations_raw['layer0'] #[0, :, :, :] temp = pooled['layer0'] #[0, :, :, :] # print temp.shape for i in range(temp.shape[1]): temp[0, i, :, :] = scaling.LCNinput(temp[0, i, :, :].reshape( (1, 1, dim / 2, dim / 2)), kernel_shape=5) # temp = scaling.LCNinput(temp, kernel_shape=5) visualize.visualize_convolved_image(temp, dim=dim / 2) # print temp ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if args.test == 'y': from sklearn import svm from sklearn.metrics import confusion_matrix train_labels = loadmat(file_path)['y'] file_path = os.path.join(base_path, "data", "CIFAR_test.mat") test_data = loadmat(file_path)['X'] test_labels = loadmat(file_path)['y'] # reshape and normalize the data if args.convolution == 'y': test_data = np.float32( test_data.reshape(-1, 1, int(np.sqrt(test_data.shape[1])), int(np.sqrt(test_data.shape[1])))) test_data = scaling.LCNinput(test_data, kernel_shape=5) test_data = test_data[0:args.examples, :, :, :] # get SVM test results for pixels to last layer train_input = None for layer in range(model.n_layers + 1): # pixel inputs if layer == 0: test_input = test_data.reshape( test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = data.reshape( data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? test_input = test[layer - 1](test_data[0:args.batch_size]) test_input = test_input[0].reshape( test_input[0].shape[0], test_input[0].shape[1] * test_input[0].shape[2] * test_input[0].shape[3]) train_input = activations_norm['layer' + str(layer - 1)] train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # train linear support vector machine clf = svm.SVC(kernel="linear").fit( train_input, np.ravel(train_labels[0:args.examples])) # get predictions from SVM and calculate accuracy predictions = clf.predict(test_input) accuracy = clf.score(test_input, test_labels[0:args.examples]) # display results and log them print("Accuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) cm = confusion_matrix(test_labels[0:args.examples], predictions) log_file = open(directory_name + "/log.txt", "a") log_file.write("\nAccuracy of the classifier at layer %1d: %0.4f" % (layer, accuracy)) log_file.close() # visualize the confusion matrix if args.test == 'y' and args.verbosity == 2: import pylab as pl pl.imshow(cm, interpolation='nearest') pl.title('Confusion Matrix for Network') pl.colorbar() pl.ylabel('True Label') pl.xlabel('Predicted Label') pl.show()
def main(): # define global parameters model_type = ['GroupSF'] convolution = 'n' filename = "unlabeled_10000.mat" input_examples = 10000 channels = 3 n_filters = [10000] n_hidden_previous_layer = 5000 dimensions = ([n_filters[0], n_hidden_previous_layer], ) pool = None group = 3 step = 1 learn_rate = 0.001 # 0.0001 iterations = [10] opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 1000 lcn_kernel = [ 5, 5, 3, 3, 3 ] # these may have to be odd values so that there is a middle aws = 'y' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = None if filename == 'train.mat' or filename == 'unlabeled_10000.mat': data = loadmat(file_path)['X'] elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5': data = h5py.File(file_path, 'r')['X'] data = np.array(data) data = data.T # preprocess the data and convert to float print "pre-processing data..." data = np.float32( data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:examples, :, :, :] print data.shape if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat': for channel in range(channels): data[:, channel, :, :] = np.reshape( scaling.LCNinput(data[:, channel, :, :].reshape( (data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), (data.shape[0], data.shape[2], data.shape[3])) # create sub-folder for saved model directory_name = None if aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # load in the front-end of the model and obtain output frontend_model_directory_name = "./saved/2016-01-26_18h54m23s" if aws == 'y': k.key = frontend_model_directory_name + '/model.pkl' model = pickle.loads(k.get_contents_as_string()) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # get output of frontend model to treat as input to DCTSF print "getting output of frontend model..." batch_size_out_data = 50 train_input = np.zeros((input_examples, n_hidden_previous_layer, 1, 1)) n_batches = input_examples / batch_size_out_data for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size_out_data batch_end = batch_start + batch_size_out_data temp = test[model.n_layers - 1](data[batch_start:batch_end]) train_input[batch_start:batch_end] = np.sum(temp[0], axis=(2, 3), keepdims=True) train_input = train_input.reshape( train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # normalize the output of the frontend model train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # make the data float32 train_input = np.float32(train_input) # determine number of batches n_batches, rem = divmod(data.shape[0], batch_size) # construct the network print "building model..." model = sf.Network(model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random='y', weights=None, lcn_kernel=lcn_kernel) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(train_input) # train the sparse filtering network print "training network..." start_time = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32( np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - start_time print('Elapsed training time: %f' % elapsed) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', cost) if aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args for m in range(len(model_type)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, model_type[m], dimensions[m], iterations[m])) if model == 'GroupSF' or model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (group, step)) ex = data.shape[0] if examples is not None: ex = examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") # get output activations for analyses for batch in xrange(n_batches): # get variables of interest activations_norm = {} activations_raw = {} activations_shuffled = {} reconstruction = {} error_recon = {} pooled = {} # f_hat, rec, err, f_hat_shuffled, f, p = outputs[l]() begin = batch * batch_size end = begin + batch_size f_hat, rec, err, f_hat_shuffled, f, p = outputs[model.n_layers - 1]( train_input[begin:end]) # define [mini]batch title batch_title = 'layer' + '_end' + '_batch' + '%03d' % batch # define norm and raw file names norm_file_name = directory_name + '/activations_norm_' + batch_title + '.mat' activations_norm[batch_title] = f_hat activations_raw[batch_title] = f activations_shuffled[batch_title] = f_hat_shuffled reconstruction[batch_title] = err error_recon[batch_title] = rec pooled[batch_title] = p # save model as well as weights and activations separately savemat(norm_file_name, activations_norm) if aws == 'y': k.key = norm_file_name k.set_contents_from_filename(norm_file_name) os.remove(norm_file_name) # output helper file for concatenating activations helper = {'batches': n_batches, 'output_size': f_hat.shape} helper_file_name = directory_name + '/helper.mat' savemat(helper_file_name, helper) if aws == 'y': k.key = helper_file_name k.set_contents_from_filename(helper_file_name) os.remove(helper_file_name)
print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = loadmat(file_path)['X'] # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) print "pre-processing data..." if filename == 'training.mat': data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[:, :, :, :] channels = data.shape[1] if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat': for channel in range(channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3]) ) elif filename == 'patches.mat': data -= data.mean(axis=0) data = np.float32(data.T) channels = 1 # set som more parameters aws = 'y' convolutional = 'n' neurons = 625
def main(): # define global parameters model_type = [ 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF' ] convolution = 'y' filename = "unlabeled_10000.mat" # train # unlabeled # STL_10_lcn_unlabeled.mat.h5 channels = 3 patch_size = 14 n_filters = [ 100, 400, 1600, 3000, 5000, ] # [100, 400, 1600, 6400, 25600] # 1600 # increasing neurons x4 maintains dimensionality dimensions = ( [n_filters[0], channels, 11, 11], [n_filters[1], n_filters[0], 4, 4], # 6 [n_filters[2], n_filters[1], 3, 3], # 4 [n_filters[3], n_filters[2], 2, 2], # 3 [n_filters[4], n_filters[3], 3, 3] # 2 ) # ([n_filters, patch_size * patch_size * channels],) # ([100, 256],) pool = None group = None step = None learn_rate = 0.0001 # 0.001 iterations = [ 1, 1, 1, 1, 1, # 1 ] # [5, 5, 5] # [50] # [100] verbosity = 0 opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 10 # 100 # 360 # 8000 lcn_kernel = [ 5, 5, 3, 3, 3 ] # these may have to be odd values so that there is a middle aws = 'y' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = None if filename == 'train.mat' or filename == 'unlabeled_10000.mat': data = loadmat(file_path)['X'] elif filename == 'unlabeled.mat' or filename == 'STL_10_lcn_unlabeled.mat.h5': data = h5py.File(file_path, 'r')['X'] data = np.array(data) data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) print "pre-processing data..." data = np.float32( data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:examples, :, :, :] print data.shape if filename == 'unlabeled.mat' or filename == 'unlabeled_10000.mat' or filename == 'train.mat': for channel in range(channels): data[:, channel, :, :] = np.reshape( scaling.LCNinput(data[:, channel, :, :].reshape( (data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), (data.shape[0], data.shape[2], data.shape[3])) # determine number of batches n_batches, rem = divmod(data.shape[0], batch_size) # construct the network print "building model..." model = sf.Network(model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random='y', weights=None, lcn_kernel=lcn_kernel) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." start_time = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32( np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - start_time print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model directory_name = None if aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', cost) if aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args for m in range(len(model_type)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, model_type[m], dimensions[m], iterations[m])) if model == 'GroupSF' or model == 'GroupConvolutionalSF': log_file.write(" Groups: %d \n Step: %d" % (group, step)) ex = data.shape[0] if examples is not None: ex = examples log_file.write(" Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening)) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt")
def main(): """ This script builds a deep convolutional sparse filtering network that has a final output [examples, maps, 1, 1], such that the entire image is viewed. The outputs of the final layer are concatenated together and serve as input to a new fully-connected network that uses the original sparse filtering object. The outputs of this fully connected layer are then used as input to a supervised classifier to evaluate the degree to which object categories are represented using fully unsupervised-learning. The standard sparse filtering algorithm can be replaced with the topographic version to evaluate semantic organization of high-level feature detectors. """ # define global parameters model_type = [ 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', 'ConvolutionalSF', # 'ConvolutionalSF' ] convolution = 'y' filename = "train.mat" # unlabeled channels = 3 patch_size = 14 n_filters = [ 100, 200, 400, 800, # 1600 ] # # [100, 400, 1600, 6400, 25600] # 1600 # increasing neurons x4 maintains dimensionality dimensions = ( [n_filters[0], channels, 11, 11], [n_filters[1], n_filters[0], 4, 4], [n_filters[2], n_filters[1], 3, 3], [n_filters[3], n_filters[2], 2, 2], # [n_filters[4], n_filters[3], 3, 3] ) # ([n_filters, patch_size * patch_size * channels],) # ([100, 256],) pool = None group = None step = None learn_rate = 0.001 # 0.0001 iterations = [ 1, 1, 1, 1, # 1 ] # [5, 5, 5] # [50] # [100] verbosity = 0 opt = 'GD' whitening = 'y' test_model = 'y' examples = None batch_size = 100 # 360 # 8000 lcn_kernel = [5, 4, 3, 2] aws = 'y' # load in data print "loading data..." base_path = os.path.dirname(__file__) file_path = os.path.join(base_path, "data", filename) data = loadmat(file_path)['X'] # data = h5py.File(file_path, 'r')['X'] # data = np.array(data) # data = data.T # preprocess the data and convert to float; NOTE: data may have already been normalized using LCN (check data read) print "pre-processing data..." data = np.float32(data.reshape(-1, 3, int(np.sqrt(data.shape[1] / 3)), int(np.sqrt(data.shape[1] / 3)))) data = data[0:examples, :, :, :] for channel in range(channels): data[:, channel, :, :] = np.reshape(scaling.LCNinput(data[:, channel, :, :]. reshape((data.shape[0], 1, data.shape[2], data.shape[3])), kernel_shape=9), ( data.shape[0], data.shape[2], data.shape[3])) # determine number of batches n_batches, rem = divmod(data.shape[0], batch_size) # construct the network print "building model..." model = sf.Network( model_type=model_type, weight_dims=dimensions, p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c=convolution, test=test_model, batch_size=batch_size, random='y', weights=None, lcn_kernel=lcn_kernel ) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(data) # train the sparse filtering network print "training network..." start_time = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32(np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - start_time print('Elapsed training time: %f' % elapsed) # create sub-folder for saved model directory_name = None if aws == 'n': directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) elif aws == 'y': import boto from boto.s3.key import Key s3 = boto.connect_s3() my_bucket = 'dlacombejr.bucket' bucket = s3.get_bucket(my_bucket) k = Key(bucket) directory_format = "./saved/%4d-%02d-%02d_%02dh%02dm%02ds" directory_name = directory_format % time.localtime()[0:6] os.mkdir(directory_name) # save the model for later use full_path = directory_name + '/model.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) # save weights separately savemat(directory_name + '/weights.mat', weights) if aws == 'y': k.key = directory_name + '/weights.mat' k.set_contents_from_filename(directory_name + '/weights.mat') os.remove(directory_name + '/weights.mat') # save the cost functions savemat(directory_name + '/cost.mat', cost) if aws == 'y': k.key = directory_name + '/cost.mat' k.set_contents_from_filename(directory_name + '/cost.mat') os.remove(directory_name + '/cost.mat') # create log file log_file = open(directory_name + "/log.txt", "wb") # todo: create log file by looping through args for m in range(len(model_type)): log_file.write( "Model layer %d: \n model:%s \n dimensions:%4s \n iterations:%3d \n" % (m, model_type[m], dimensions[m], iterations[m]) ) if model == 'GroupSF' or model == 'GroupConvolutionalSF': log_file.write( " Groups: %d \n Step: %d" % (group, step) ) ex = data.shape[0] if examples is not None: ex = examples log_file.write( " Data-set: %s \n Examples: %6d \n Whitened: %s" % (filename, ex, whitening) ) log_file.write('\nElapsed training time: %f' % elapsed) log_file.close() if aws == 'y': k.key = directory_name + "/log.txt" k.set_contents_from_filename(directory_name + "/log.txt") os.remove(directory_name + "/log.txt") # collect the activations from the last layer to train fully connected layer activations_concatenated = [] for batch in xrange(n_batches): begin = batch * batch_size end = begin + batch_size f_hat, _, _, _, _, _ = outputs[model.n_layers - 1](data[begin:end]) activations_concatenated.append( f_hat.reshape( batch_size, f_hat.shape[1] * f_hat.shape[2] * f_hat.shape[3] ) ) # normalize the input final_input = np.asarray(activations_concatenated) final_input -= final_input.mean(axis=1) # train a regular sparse filtering network on top of final layer print "building model..." model = sf.Network( model_type=['SparseFilter'], weight_dims=([1089, final_input.shape[1]],), # 33x33 is odd perfect square p=pool, group_size=group, step=step, lr=learn_rate, opt=opt, c='n', test=test_model, batch_size=batch_size, random='y', weights=None ) # compile the training, output, and test functions for the network print "compiling theano functions..." train, outputs, test = model.training_functions(final_input) # train the sparse filtering network print "training network..." start_time = time.time() cost = {} weights = {} for l in xrange(model.n_layers): cost_layer = [] w = None # iterate over training epochs for epoch in xrange(iterations[l]): # go though [mini]batches for batch_index in xrange(n_batches): # create index for random [mini]batch index = np.int32(np.random.randint(data.shape[0], size=batch_size)) c, w = train[l](index=index) cost_layer.append(c) print("Layer %i cost at epoch %i and batch %i: %f" % (l + 1, epoch, batch_index, c)) # add layer cost and weights to the dictionaries cost['layer' + str(l)] = cost_layer weights['layer' + str(l)] = w # calculate and display elapsed training time elapsed = time.time() - start_time print('Elapsed training time: %f' % elapsed) # save the model for later use full_path = directory_name + '/model2.pkl' pickle.dump(model, open(full_path, 'w'), pickle.HIGHEST_PROTOCOL) if aws == 'y': k.key = full_path k.set_contents_from_filename(full_path) os.remove(full_path) ''' ================================ Test the Model ======================================= ''' # test the model if evaluating classification performance if test_model == 'y': print 'testing...' from sklearn import svm # set some new local parameters train_data_file = "STL_10_lcn_train.mat" # "train.mat" train_labels_file = "train.mat" test_data_file = "STL_10_lcn_test.mat" # "test.mat" test_labels_file = "test.mat" batch_size = 100 # todo: read in lcn data # load in STL-10 training data (all pre-normalized using LCN) print "loading in training and test data..." file_path = os.path.join(base_path, "data", train_data_file) train_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", train_labels_file) train_labels = loadmat(file_path)['y'] # load in STL-10 test data (all pre-normalized using LCN) file_path = os.path.join(base_path, "data", test_data_file) test_data = loadmat(file_path)['X'] file_path = os.path.join(base_path, "data", test_labels_file) test_labels = loadmat(file_path)['y'] # # preproces training and test data # print "preprocessing training and test data..." # print train_data.shape # train_data = np.float32(train_data.reshape(-1, # 3, # int(np.sqrt(train_data.shape[1] / 3)), # int(np.sqrt(train_data.shape[1] / 3))) # ) # print train_data.shape # for channel in range(channels): # train_data[:, channel, :, :] = np.reshape(scaling.LCNinput(train_data[:, channel, :, :]. # reshape((train_data.shape[0], 1, # train_data.shape[2], # train_data.shape[3])), # kernel_shape=9), ( # train_data.shape[0], # train_data.shape[2], # train_data.shape[3])) # # test_data = np.float32(test_data.reshape(-1, # 3, # int(np.sqrt(test_data.shape[1] / 3)), # int(np.sqrt(test_data.shape[1] / 3))) # ) # for channel in range(channels): # test_data[:, channel, :, :] = np.reshape(scaling.LCNinput(test_data[:, channel, :, :]. # reshape((test_data.shape[0], 1, # test_data.shape[2], # test_data.shape[3])), # kernel_shape=9), ( # test_data.shape[0], # test_data.shape[2], # test_data.shape[3])) # read in the pre-defined fold indices file_path = os.path.join(base_path, "data", "train.mat") fold_indices = loadmat(file_path)['fold_indices'] fold_indices -= np.ones(fold_indices.shape) # make zero-index # train and test a SVM classifier for each layer (including pixels as baseline) accuracy = {} accuracy_list = [] train_input = None test_input = None cm = None for layer in range(1, 4): # range(test_model.n_layers + 1): # skipping pixels for now # create dictionary for layer accuracy['layer' + str(layer)] = {} # create quadrant pooling function based on size of output from layer quadrant_size = test[layer - 1](test_data[0, :, :, :].reshape((1, 3, 96, 96)))[0].shape[3] / 2 quad_pool = quadrant_pooling(quadrant_size) # loop over pre-defined folds n_folds = fold_indices.shape[1] for fold in xrange(n_folds): # get fold data fold_index = fold_indices[0][fold].astype('int') train_data_fold = np.squeeze(train_data[fold_index]) train_labels_fold = np.squeeze(train_labels[fold_index]) # pixel inputs if layer == 0: if fold == 0: # only get test data once test_input = test_data.reshape(test_data.shape[0], test_data.shape[1] * test_data.shape[2] * test_data.shape[3]) train_input = train_data_fold.reshape(train_data_fold.shape[0], train_data_fold.shape[1] * train_data_fold.shape[2] * train_data_fold.shape[3]) # hidden layers elif layer > 0: # get the output of the current layer in the model given the training / test data and then reshape # TODO: use raw output as training and testing data? if fold == 0: # only get test data once print "getting test data..." test_input = np.zeros((test_data.shape[0], n_filters[layer - 1], 2, 2)) n_batches = test_data.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](test_data[batch_start:batch_end]) temp = temp[0] test_input[batch_start:batch_end] = quad_pool(temp)[0] test_input = test_input.reshape(test_input.shape[0], test_input.shape[1] * test_input.shape[2] * test_input.shape[3]) print "getting training data..." train_input = np.zeros((train_data_fold.shape[0], n_filters[layer - 1], 2, 2)) n_batches = train_data_fold.shape[0] / batch_size for batch in xrange(n_batches): print "for batch %d" % batch batch_start = batch * batch_size batch_end = batch_start + batch_size temp = test[layer - 1](train_data_fold[batch_start:batch_end]) temp = temp[0] train_input[batch_start:batch_end] = quad_pool(temp)[0] train_input = train_input.reshape(train_input.shape[0], train_input.shape[1] * train_input.shape[2] * train_input.shape[3]) # normalize the inputs for each dimension (zero-mean and unit-variance) if fold == 0: # only normalize test data once test_input -= test_input.mean(axis=1)[:, np.newaxis] test_input /= np.std(test_input, axis=1)[:, np.newaxis] train_input -= train_input.mean(axis=1)[:, np.newaxis] train_input /= np.std(train_input, axis=1)[:, np.newaxis] # train linear support vector machine print("Training linear SVM...") clf = svm.SVC(kernel="linear").fit(train_input, np.ravel(train_labels_fold[0:examples])) # get predictions from SVM and calculate accuracy print("Making predictions...") accuracy['layer' + str(layer)]['fold' + str(fold)] = clf.score(test_input, test_labels[0:examples]) accuracy_list.append(accuracy['layer' + str(layer)]['fold' + str(fold)]) # display results and log them print("Accuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)])) log_file = open(directory_name + "/log_test.txt", "a") log_file.write( "\nAccuracy of the classifier for fold %d at layer %1d: %0.4f" % (fold, layer, accuracy['layer' + str(layer)]['fold' + str(fold)]) ) log_file.close() # calculate and print out average accuracy and std avg = np.mean(accuracy_list) std = np.std(accuracy_list) print "The overall accuracy of layer %d: %0.4f +/- (%0.4f)" % (layer, float(avg), float(std)) # save for aws if aws == 'y': k.key = directory_name + "/log_test.txt" k.set_contents_from_filename(directory_name + "/log_test.txt") # save the test results savemat('accuracy', accuracy)