def apply_sdA_sgd(training_data, validation_data, test_data, nChannels, dimHiddenLayers, nLabels, miniBatchSize, sgd_opts, dropout_rates, corruptionLevels, results_dir, momentum=None, monitoring_to_file=False): if not os.path.isdir(results_dir): os.makedirs(results_dir) os.chdir(results_dir) dimIn = training_data[0].get_value(borrow=True).shape[1] nBatchTrain = training_data[0].get_value( borrow=True).shape[0] / miniBatchSize nBatchVal = validation_data[0].get_value( borrow=True).shape[0] / miniBatchSize nBatchTest = test_data[0].get_value(borrow=True).shape[0] / miniBatchSize np_rng = np.random.RandomState(0) theano_rng = RandomStreams(123) clf = sdA(np_rng, dimIn=training_data[0].get_value(borrow=True).shape[1], dimHiddenLayers=dimHiddenLayers, dimOut=nLabels, dropout_rates=dropout_rates) # compile pre-training functions pretrain_fns = clf.get_pretrain_fns(training_data[0], miniBatchSize, sgd_opts['alpha_pre']) # compile fine-tuning functions train_model, validate_model, test_model = clf.get_finetune_functions( training_data, validation_data, test_data, miniBatchSize, momentum) # do the actual pre-training print('starting pre-training of sdA...') start_time = timeit.default_timer() for layerIdx in xrange(clf.nHiddenLayers): for epoch in xrange(sgd_opts['epochs_pre']): monitorCost = 0.0 for miniBatchIndex in xrange(nBatchTrain): iter = epoch * nBatchTrain + miniBatchIndex monitorCost += pretrain_fns[layerIdx]( miniBatchIndex, corruptionLevels[layerIdx]) # aggregate miniBatch costs if (iter + 1) % 1000 == 0: print( 'pre-training cost per minibatch for epoch {0}, minibatch {1}/{2}, layer {3}, is : {4:.5f}' .format(epoch + 1, miniBatchIndex + 1, nBatchTrain, layerIdx + 1, monitorCost / (iter + 1))) end_time = timeit.default_timer() run_time_pre = end_time - start_time print 'pre-training time taken: {}'.format(run_time_pre) pretrain_params = [param.get_value(borrow=False) for param in clf.params] # do the actual fine-tuning print('starting fine-tuning of sdA...') [ finetune_params, test_error, run_time, best_iter, temp_monitoring_filename ] = supervised_tuning(clf.params, train_model, validate_model, test_model, nBatchTrain, nBatchVal, nBatchTest, sgd_opts, monitoring_to_file) # If device was set to gpu, check it was actually used if theano.config.device == 'gpu': if np.any([ isinstance(x.op, cuda.GpuElemwise) for x in train_model.maker.fgraph.toposort() ]): print 'used the gpu during training' else: print 'used the cpu during training' # save results trainingParams = { 'sgd_opts': sgd_opts, 'miniBatchSize': miniBatchSize, 'dimIn': dimIn, 'dimHiddenLayers': dimHiddenLayers, 'dimOut': nLabels, 'dropout_rates': dropout_rates, 'corruptionLevels': corruptionLevels, 'momentum': momentum } results = [ 'test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time), 'best_iter: {}'.format(best_iter) ] with open('readme.txt', 'a') as f: f.write('\n {0}\n {1}\n'.format(trainingParams, results)) f.close() if monitoring_to_file: monitoring_filename = 'sdA_{0:.2%}_{1}_monitoring.txt'.format( test_error, dimHiddenLayers) # rename temp file created during training os.rename(temp_monitoring_filename, monitoring_filename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(monitoring_filename)) f.close() while True: answer = raw_input('plot pre-train filters (y/n)?') if answer == 'y': # saving pre-training filters if nChannels > 1: dimImage = int(np.sqrt(dimIn / nChannels)) # assume square image tile_shape = (10, nChannels ) # plot first 10 filters across each channel X = pretrain_params[0].T.reshape( -1, dimImage * dimImage ) # first nChannels rows relate to first filter (adjacent rows seperate channels) else: dimImage = int(np.sqrt(dimIn)) # assume square image tile_shape = (10, 10) # or just plot first 100 filters X = pretrain_params[0].T imageFilename = 'sdA_filters_{0:.2%}_{1}.png'.format( test_error, dimHiddenLayers) image = Image.fromarray( tile_raster_images(X=X, img_shape=(dimImage, dimImage), tile_shape=tile_shape, tile_spacing=(1, 1))) image.save(imageFilename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(imageFilename)) f.close() break elif answer == 'n': break else: print('invalid input, try again') while True: answer = raw_input('save model (y/n)? ') if answer == 'y': modelFilename = 'sdA_{0:.2%}_{1}.pkl'.format( test_error, dimHiddenLayers) best_model = { 'params': finetune_params, 'results': results, 'trainingParams': trainingParams } with open(modelFilename, 'wb') as f: cPickle.dump(best_model, f, -1) f.close() with open('readme.txt', 'a') as f: f.write('{0}\n'.format(modelFilename)) f.close() break elif answer == 'n': break else: print('invalid input, try again') os.chdir('../')
def apply_mlp_sgd(training_data, validation_data, test_data, dimHiddenLayers, nLabels, miniBatchSize, sgd_opts, lmbda, dropout_rates, activations, results_dir, momentum=None, monitoring_to_file=False): if not os.path.isdir(results_dir): os.makedirs(results_dir) os.chdir(results_dir) trainShape = training_data[0].get_value(borrow=True).shape valShape = validation_data[0].get_value(borrow=True).shape testShape = test_data[0].get_value(borrow=True).shape nBatchTrain = trainShape[0] / miniBatchSize nBatchVal = valShape[0] / miniBatchSize nBatchTest = testShape[0] / miniBatchSize rng = np.random.RandomState(0) clf = MLP(rng, trainShape[1], dimHiddenLayers, nLabels, dropout_rates, activations) train_model, validate_model, test_model = clf.get_training_functions( training_data, validation_data, test_data, miniBatchSize, lmbda, momentum) # Do the actual training. print('starting training...') [best_params, test_error, run_time, best_iter, temp_monitoring_filename ] = supervised_tuning(clf.params, train_model, validate_model, test_model, nBatchTrain, nBatchVal, nBatchTest, sgd_opts, monitoring_to_file) # If device was set to gpu, check it was actually used if theano.config.device == 'gpu': if np.any([ isinstance(x.op, cuda.GpuElemwise) for x in train_model.maker.fgraph.toposort() ]): print 'used the gpu during training' else: print 'used the cpu during training' # Save results trainingParams = { 'sgd_opts': sgd_opts, 'lmbda': lmbda, 'miniBatchSize': miniBatchSize, 'dimIn': trainShape[1], 'dimHiddenLayers': dimHiddenLayers, 'dimOut': nLabels, 'dropout_rates': dropout_rates, 'momentum': momentum, 'activations': activations } results = [ 'test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time), 'best_iter: {}'.format(best_iter) ] with open('readme.txt', 'a') as f: f.write('\n {0}\n {1}\n'.format(trainingParams, results)) f.close() if monitoring_to_file: monitoring_filename = 'MLP_{0:.2%}_{1}_monitoring.txt'.format( test_error, dimHiddenLayers) # rename temp file created during training os.rename(temp_monitoring_filename, monitoring_filename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(monitoring_filename)) f.close() while True: answer = raw_input('plot filters (y/n)?') if answer == 'y': filters = best_params[0].T imageFilename = 'MLP_filters_{0:.2%}_{1}.png'.format( test_error, dimHiddenLayers) dimImage = int(np.sqrt(trainShape[1])) image = Image.fromarray( tile_raster_images( X=filters, img_shape=(dimImage, dimImage), tile_shape=(100, 100), # plot first 100 tile_spacing=(1, 1))) image.save(imageFilename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(imageFilename)) f.close() break elif answer == 'n': break else: print('Invalid input, try again.') while True: answer = raw_input('save model (y/n)? ') if answer == 'y': modelFilename = 'MLP_{0:.2%}_{1}.pkl'.format( test_error, dimHiddenLayers) best_model = { 'params': best_params, 'results': results, 'trainingParams': trainingParams } with open(modelFilename, 'wb') as f: cPickle.dump(best_model, f, -1) f.close() with open('readme.txt', 'a') as f: f.write('{0}\n'.format(modelFilename)) f.close() break elif answer == 'n': break else: print('invalid input, try again') os.chdir('../')
def apply_dbn_sgd(training_data, validation_data, test_data, nChannels, dimHiddenLayers, nLabels, miniBatchSize, sgd_opts, persistent_bool, k, results_dir): if not os.path.isdir(results_dir): os.makedirs(results_dir) os.chdir(results_dir) dimIn = training_data[0].get_value(borrow=True).shape[1] nBatchTrain = training_data[0].get_value( borrow=True).shape[0] / miniBatchSize nBatchVal = validation_data[0].get_value( borrow=True).shape[0] / miniBatchSize nBatchTest = test_data[0].get_value(borrow=True).shape[0] / miniBatchSize np_rng = np.random.RandomState(0) theano_rng = RandomStreams(123) clf = dbn(np_rng, dimIn=training_data[0].get_value(borrow=True).shape[1], dimHiddenLayers=dimHiddenLayers, dimOut=nLabels) # get pre-training functions pretrain_fns = clf.get_pretrain_fns(training_data[0], miniBatchSize, sgd_opts['alpha_pre'], persistent_bool, k) # get fine-tuning functions train_model, validate_model, test_model = clf.get_finetune_functions( training_data, validation_data, test_data, miniBatchSize) # do the actual pre-training print('starting pre-training of deep belief...') start_time = timeit.default_timer() for layerIdx in xrange(clf.nHiddenLayers): for epoch in xrange(sgd_opts['epochs_pre']): monitorCost = 0.0 for miniBatchIndex in xrange(nBatchTrain): iter = epoch * nBatchTrain + miniBatchIndex monitorCost += pretrain_fns[layerIdx]( miniBatchIndex) # aggregate miniBatch costs if (iter + 1) % 1000 == 0: print( 'pre-training cost per minibatch for epoch {0}, minibatch {1}/{2}, layer {3}, is : {4:.5f}' .format(epoch + 1, miniBatchIndex + 1, nBatchTrain, layerIdx + 1, monitorCost / (iter + 1))) end_time = timeit.default_timer() run_time_pre = end_time - start_time print 'pre-training time taken: {}'.format(run_time_pre) pretrain_params = [param.get_value(borrow=False) for param in clf.params] # do the actual fine-tuning print('starting fine-tuning of deep belief...') [finetune_params, test_error, run_time, best_iter, _] = supervised_tuning(clf.params, train_model, validate_model, test_model, nBatchTrain, nBatchVal, nBatchTest, sgd_opts) # save results trainingParams = { 'sgd_opts': sdg_opts, 'miniBatchSize': miniBatchSize, 'dimHiddenLayers': dimHiddenLayers, 'persistent_bool': persistent_bool, 'k': k } results = [ 'test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time), 'best_iter: {}'.format(best_iter) ] with open('readme.txt', 'a') as f: f.write('\n {0}\n {1}\n'.format(trainingParams, results)) f.close() while True: answer = raw_input('plot pre-train filters (y/n)?') if answer == 'y': # saving pre-training filters if nChannels > 1: dimImage = int(np.sqrt(dimIn / nChannels)) # assume square image tile_shape = (10, nChannels ) # plot first 10 filters across each channel X = pretrain_params[0].T.reshape( -1, dimImage * dimImage ) # first nChannels rows relate to first filter (adjacent rows seperate channels) else: dimImage = int(np.sqrt(dimIn)) # assume square image tile_shape = (10, 10) # or just plot first 100 filters X = pretrain_params[0].T imageFilename = 'dbn_filters_{0}_{1:.2%}_persistence={2}_k={3}.png'.format( dimHiddenLayers, test_error, persistent_bool, k) image = Image.fromarray( tile_raster_images(X=X, img_shape=(dimImage, dimImage), tile_shape=tile_shape, tile_spacing=(1, 1))) image.save(imageFilename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(imageFilename)) f.close() break elif answer == 'n': break else: print('invalid input, try again') while True: answer = raw_input('save model (y/n)? ') if answer == 'y': modelFilename = 'dbn_{0:.2%}_{1}_persistence={2}_k={3}.pkl'.format( test_error, dimHiddenLayers, persistent_bool, k) best_model = { 'dimIn': dimIn, 'params': finetune_params, 'results': [ 'test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time) ], 'trainingParams': trainingParams } with open(modelFilename, 'wb') as f: cPickle.dump(best_model, f, -1) f.close() with open('readme.txt', 'a') as f: f.write('{0}\n'.format(modelFilename)) f.close() break elif answer == 'n': break else: print('invalid input, try again') os.chdir('../')
def apply_softmax_sgd(training_data, validation_data, test_data, nLabels, miniBatchSize, sgd_opts, lmbda, results_dir, monitoring_to_file=False): if not os.path.isdir(results_dir): os.makedirs(results_dir) os.chdir(results_dir) X = T.matrix('X') y = T.ivector('y') X_train, y_train = training_data X_val, y_val = validation_data X_test, y_test = test_data nBatchTrain = X_train.get_value(borrow=True).shape[0]/miniBatchSize nBatchVal = X_val.get_value(borrow=True).shape[0]/miniBatchSize nBatchTest = X_test.get_value(borrow=True).shape[0]/miniBatchSize rng = np.random.RandomState(0) dimIn = X_train.get_value(borrow=True).shape[1] clf = Softmax(rng = rng, input = X, dimIn = dimIn, dimOut = nLabels) train_model, validate_model, test_model = clf.get_training_functions( X, y, training_data, validation_data, test_data, miniBatchSize, lmbda) # Do the actual training. print('starting training...') [best_params, test_error, run_time, best_iter, temp_monitoring_filename] = supervised_tuning( clf.params, train_model, validate_model, test_model, nBatchTrain, nBatchVal, nBatchTest, sgd_opts ) # check if GPU was used if np.any([isinstance(x.op, cuda.GpuElemwise) for x in train_model.maker.fgraph.toposort()]): print 'used the gpu during training' else: print 'used the cpu during training' # Save results trainingParams = {'sgd_opts':sgd_opts, 'miniBatchSize':miniBatchSize, 'lmbda':lmbda} results = ['test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time), 'best_iter: {}'.format(best_iter)] with open('readme.txt', 'a') as f: f.write('\n {0}\n {1}\n'.format(trainingParams, results)) f.close() if monitoring_to_file: monitoring_filename = 'softmax_{0:.2%}_monitoring.txt'.format( test_error) # rename temp file created during training os.rename(temp_monitoring_filename, monitoring_filename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(monitoring_filename)) f.close() while True: answer = raw_input('save model (y/n)? ') if answer == 'y': modelFilename = 'softmax_{0:.2%}.pkl'.format(test_error) best_model = { 'dimIn': dimIn, 'params':best_params, 'results': results, 'trainingParams':trainingParams} with open(modelFilename, 'wb') as f: cPickle.dump(best_model, f, -1) f.close() with open('readme.txt', 'a') as f: f.write('{0}\n'.format(modelFilename)) f.close() break elif answer =='n': break else: print('invalid input, try again') os.chdir('../')
def apply_LeNet_sgd(training_data, validation_data, test_data, numFilters, nLabels, dimHiddenSig, filterDim, poolShape, miniBatchSize, sgd_opts, lmbda, dropout_rates, activations, results_dir, momentum = None, mc_samples=None, monitoring_to_file=False): """ X component of data sets passed in should be of shape [batch size, number of input feature maps, image height, image width] """ if not os.path.isdir(results_dir): os.makedirs(results_dir) os.chdir(results_dir) X = T.tensor4(name='X') y = T.ivector('y') trainShape = training_data[0].get_value(borrow=True).shape valShape = validation_data[0].get_value(borrow=True).shape testShape = test_data[0].get_value(borrow=True).shape nBatchTrain = trainShape[0]/miniBatchSize nBatchVal = valShape[0]/miniBatchSize nBatchTest = testShape[0]/miniBatchSize rng = np.random.RandomState(0) nChannels = trainShape[1] imRows = trainShape[2] imCols = trainShape[3] # Construct layers. layer0 = convPoolLayer(rng, input = X, #input_dropout = X, imageShape = (miniBatchSize, nChannels, imRows, imCols), filterShape = (numFilters[0], nChannels, filterDim, filterDim), poolShape = poolShape, p_dropout = dropout_rates[0], activation=activations[0]) imRows1 = (imRows-filterDim+1)/poolShape[0] imCols1 = (imCols-filterDim+1)/poolShape[1] layer1 = convPoolLayer(rng, input = layer0.output, #input_dropout = layer0.output_dropout, imageShape = (miniBatchSize, numFilters[0], imRows1, imCols1), filterShape = (numFilters[1], numFilters[0], filterDim, filterDim), poolShape = poolShape, p_dropout = dropout_rates[1], activation = activations[1]) imRows2 = (imRows1-filterDim+1)/poolShape[0] imCols2 = (imCols1-filterDim+1)/poolShape[1] # One row of input is the flattened units of all feature maps # corresponding to a particular image. layer2 = FullyConnectedLayer(rng, input = layer1.output.flatten(2), #input_dropout = layer1.output_dropout.flatten(2), dimIn = numFilters[1]*imRows2*imCols2, dimOut = dimHiddenSig, p_dropout = dropout_rates[2], activation = activations[2]) layer3 = FullyConnectedLayer(rng, input = layer2.output, #input_dropout = layer2.output_dropout, dimIn = layer2.dimOut, dimOut = dimHiddenSig, p_dropout = dropout_rates[3], activation = activations[3]) layer4 = Softmax(rng, input = layer3.output, #input_dropout = layer3.output_dropout, dimIn=layer3.dimOut, dimOut=nLabels, p_dropout = dropout_rates[4]) # Initialise LeNet layers = [layer0, layer1, layer2, layer3, layer4] clf = cnn(rng, layers, mc_samples) train_model, validate_model, test_model = clf.get_training_functions( X, y, training_data, validation_data, test_data, miniBatchSize, lmbda, momentum) # Do the actual training. print('starting training...') [best_params, test_error, run_time, best_iter, temp_monitoring_filename] = supervised_tuning( clf.params, train_model, validate_model, test_model, nBatchTrain, nBatchVal, nBatchTest, sgd_opts, monitoring_to_file) # If device was set to gpu, check it was actually used if theano.config.device == 'gpu': if np.any([isinstance(x.op, cuda.GpuElemwise) for x in train_model.maker.fgraph.toposort()]): print 'used the gpu during training' else: print 'used the cpu during training' # Save results trainingParams = {'sgd_opts':sgd_opts, 'lmbda':lmbda, 'numFilters':numFilters, 'dimHiddenSig': dimHiddenSig, 'miniBatchSize':miniBatchSize, 'filterDim':filterDim, 'poolShape':poolShape, 'dropout_rates':dropout_rates, 'momentum':momentum, 'mc_samples':mc_samples, 'activations':activations} results = ['test_error: {:.2%}'.format(test_error), 'run_time: {:.2f}s'.format(run_time), 'best_iter: {}'.format(best_iter)] with open('readme.txt', 'a') as f: f.write('\n {0}\n {1}\n'.format(trainingParams, results)) f.close() if monitoring_to_file: monitoring_filename = 'LeNet_{0:.2%}_{1}_monitoring.txt'.format( test_error, numFilters) # rename temp file created during training os.rename(temp_monitoring_filename, monitoring_filename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(monitoring_filename)) f.close() while True: answer = raw_input('plot filters (y/n)?') if answer == 'y': # First nChannels rows relate to first filter etc. filters = best_params[0].reshape((-1, filterDim*filterDim)) imageFilename = 'LeNet_filters_{0:.2%}_{1}.png'.format( test_error, numFilters) image = Image.fromarray(tile_raster_images( X=filters, img_shape=(filterDim, filterDim), tile_shape=(numFilters[0], nChannels), tile_spacing=(1, 1))) image.save(imageFilename) with open('readme.txt', 'a') as f: f.write('{0}\n'.format(imageFilename)) f.close() break elif answer == 'n': break else: print('Invalid input, try again.') while True: answer = raw_input('save model (y/n)? ') if answer == 'y': modelFilename = 'LeNet_{0:.2%}_{1}.pkl'.format( test_error, numFilters) best_model = { 'imageShape': [nChannels, imRows, imCols], 'params':best_params, 'results': results, 'trainingParams':trainingParams} with open(modelFilename, 'wb') as f: cPickle.dump(best_model, f, -1) f.close() with open('readme.txt', 'a') as f: f.write('{0}\n'.format(modelFilename)) f.close() break elif answer =='n': break else: print('invalid input, try again') os.chdir('../')