def predict(): """ An example of how to load a trained model and use it to predict labels. """ # load the saved model classifier = cPickle.load(open('best_model.pkl')) # compile a predictor function predict_model = theano.function( inputs=[classifier.input], outputs=classifier.y_pred) # We can test it on some examples from test test dataset='mnist.pkl.gz' datasets = load_data(dataset) test_set_x, test_set_y = datasets[2] test_set_x = test_set_x.get_value() predicted_values = predict_model(test_set_x[:10]) print ("Predicted values for the first 10 examples in test set:") print predicted_values
def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002, momentum=0.9, datasets=None, batch_size=128, hidden_layers_sizes=[1024, 1024, 1024], n_ins=784, n_outs=10, filename="../data/DBN.pickle", load=True, save=True, verbose=False, pretraining_start=0, pretraining_stop=-1, finetune=True, saveToDir = None, loadModelFromFile = None): folder_name = 'finetune_lr=%d' % finetune_lr + \ ' pretraining_epochs=%d-%d' % (pretraining_epochs[0], pretraining_epochs[1]) + \ ' pretrain_lr=%d-%d' % (pretrain_lr[0], pretrain_lr[1]) + \ ' k=%d' % k + \ ' weight_decay=%d' % weight_decay + \ ' momentum=%d' % momentum + \ ' batch_size=%d' % batch_size + \ ' hidden_layers_sizes=%d' % (hidden_layers_sizes[0]) if not os.path.exists(folder_name): os.makedirs(folder_name) if datasets is None: from load_data_MNIST import load_data datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState() #prepare save directory if saveToDir is not None: if saveToDir[-1] != '/': saveToDir += '/' if os.path.exists(saveToDir): timeStr = datetime.datetime.fromtimestamp(time.time()).strftime('%Y_%m_%d_%H_%M_%S') saveToDir = saveToDir[:-1]+'_'+timeStr+"/" os.makedirs(saveToDir) logger = Logger(saveToDir, verbose) #save run params logger.logParameter('Start time', datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) logger.logParameter('\nNet parameters','') logger.logParameter('pretraining epochs', ', '.join(str(x) for x in pretraining_epochs)) logger.logParameter('pretraining learning rate', ', '.join(str(x) for x in pretrain_lr)) logger.logParameter('finetuning learning rate',finetune_lr) logger.logParameter('weight decay', weight_decay) logger.logParameter('momentum', momentum) logger.logParameter('CD-k', k) logger.logParameter('inputs count', n_ins) logger.logParameter('outputs count', n_outs) logger.logParameter('hidden layers sizes', ', '.join(str(x) for x in hidden_layers_sizes)) logger.logParameter('batch size', batch_size) loaded = False if loadModelFromFile is not None: logger.logParameter('loading model', loadModelFromFile) logger.log('... trying to load the model from '+ loadModelFromFile) if os.path.isfile(loadModelFromFile): dbn = GRBM_DBN.load(loadModelFromFile) dbn.update_finetune_cost(weight_decay=weight_decay) loaded = True logger.log('... model loaded') else: logger.log('... couldn\' find the model file') if not loaded: logger.log('... building the model') # construct the Deep Belief Network dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs) ######################### # PRETRAINING THE MODEL # ######################### logger.log('... getting the pretraining functions') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) logger.log('... pre-training the model') start_time = time.clock() ## Pre-train layer-wise if pretraining_stop == -1: pretraining_stop = dbn.n_layers for i in xrange(pretraining_start, pretraining_stop): start_time_temp = time.clock() if i==0: pretrain_lr_new = pretrain_lr[0] pretraining_epochs_new = pretraining_epochs[0] else: pretrain_lr_new = pretrain_lr[1] pretraining_epochs_new = pretraining_epochs[1] # go through pretraining epochs for epoch in xrange(pretraining_epochs_new): if verbose: # weights image = Image.fromarray( tile_raster_images( X=dbn.rbm_layers[i].W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save(folder_name + '/filters_at_layer_%i_epoch_%i.png' % (i, epoch)) # probabilities X = valid_set_x[:20].eval() hMean = sigmoid(numpy.dot(X, dbn.rbm_layers[i].W.get_value(borrow=True)) + dbn.rbm_layers[i].hbias.get_value(borrow=True)) image = Image.fromarray(hMean * 256) image.save(folder_name + '/probabilities_at_layer_%i_epoch_%i.gif' % (i, epoch)) # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr_new)) end_time_temp = time.clock() logger.log('Pre-training layer %i, epoch %d, cost %f ' % (i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) )) end_time = time.clock() logger.log('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if saveToDir: logger.log('... saving the model') dbn.save(saveToDir+'pretrained_model') ######################## # FINETUNING THE MODEL # ######################## if finetune: # get the training, validation and testing function for the model logger.log('... getting the finetuning functions') train_fn, validate_model, test_model = dbn.build_finetune_functions(datasets=datasets, batch_size=batch_size, momentum=momentum) logger.log('... finetunning the model') best_params = None best_validation_loss = numpy.inf last_validation_loss = numpy.inf test_score = 0. start_time = time.clock() current_lr = finetune_lr done_looping = False epoch = 0 while not done_looping: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index, current_lr) iter = (epoch - 1) * n_train_batches + minibatch_index import warnings warnings.filterwarnings("ignore") validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) logger.log('epoch %i, validation error %f %%' % (epoch, this_validation_loss * 100)) if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss if this_validation_loss > last_validation_loss: current_lr /= 2. logger.log(('learning rate halved to %f') %(current_lr)) last_validation_loss = this_validation_loss if current_lr < 0.001: done_looping = True test_losses = test_model() test_score = numpy.mean(test_losses) end_time = time.clock() logger.log('Optimization complete with best validation score of %f %% with test performance %f %%' % (best_validation_loss * 100., test_score * 100.)) logger.log('The fine tuning code for file ' +os.path.split(__file__)[1] +' ran for %.2fm' % ((end_time - start_time)/ 60.)) if saveToDir: logger.log('... saving the final model') dbn.save(saveToDir+'final_model') return (best_validation_loss * 100., test_score * 100.) return (0., 0.)
def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002, momentum=0.9, datasets=None, batch_size=128, hidden_layers_sizes=[1024, 1024, 1024], n_ins=784, n_outs=10, filename="../data/DBN.pickle", load=True, save=True, verbose=False, pretraining_start=0, pretraining_stop=-1, finetune=True): if datasets is None: from load_data_MNIST import load_data datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState() loaded = False if load: print '... trying to load the model' if os.path.isfile(filename): dbn = GRBM_DBN.load(filename) dbn.update_finetune_cost(weight_decay=weight_decay) loaded = True print '... model loaded' else: print '... couldn\' find the model file' if not loaded: print '... building the model' # construct the Deep Belief Network dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise if pretraining_stop == -1: pretraining_stop = dbn.n_layers for i in xrange(pretraining_start, pretraining_stop): start_time_temp = time.clock() if i==0: pretrain_lr_new = pretrain_lr[0] pretraining_epochs_new = pretraining_epochs[0] else: pretrain_lr_new = pretrain_lr[1] pretraining_epochs_new = pretraining_epochs[1] # go through pretraining epochs for epoch in xrange(pretraining_epochs_new): if verbose: # weights image = Image.fromarray( tile_raster_images( X=dbn.rbm_layers[i].W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch)) # probabilities X = valid_set_x[:20].eval() hMean = sigmoid(numpy.dot(X, dbn.rbm_layers[i].W.get_value(borrow=True)) + dbn.rbm_layers[i].hbias.get_value(borrow=True)) image = Image.fromarray(hMean * 256) image.save('probabilities_at_layer_%i_epoch_%i.gif' % (i, epoch)) # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr_new)) end_time_temp = time.clock() print 'Pre-training layer %i, epoch %d, cost %f ' % (i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % ((end_time_temp - start_time_temp) ) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if save: print '... saving the model' dbn.save(filename) ######################## # FINETUNING THE MODEL # ######################## if finetune: # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, momentum=momentum) print '... finetunning the model' best_params = None best_validation_loss = numpy.inf last_validation_loss = numpy.inf test_score = 0. start_time = time.clock() current_lr = finetune_lr done_looping = False epoch = 0 while not done_looping: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index, current_lr) iter = (epoch - 1) * n_train_batches + minibatch_index import warnings warnings.filterwarnings("ignore") validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss if this_validation_loss > last_validation_loss: current_lr /= 2. print((' learning rate halved to %f') % (current_lr)) last_validation_loss = this_validation_loss if current_lr < 0.001: done_looping = True test_losses = test_model() test_score = numpy.mean(test_losses) end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if save: ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S') print '... saving the final model' dbn.save(re.sub('.pickle$', '', filename) + '_' + st + '.final.pickle') return (best_validation_loss * 100., test_score * 100.) return (0., 0.)
# -*- coding: utf-8 -*- from GRBM_DBN import test_GRBM_DBN from GRBM_DBN import GRBM_DBN from load_data_MNIST import load_data from load_data_MNIST import load_raw_data datasets = load_data() # # UCZYMY SIEĆ # test_score, val_score = test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[1, 1], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002, momentum=0.9, batch_size=128, datasets=datasets, hidden_layers_sizes=[784,784], finetune = False, saveToDir = '../results/MNIST/', loadModelFromFile = '', verbose = True) # # UŻYCIE WYUCZONEJ SIECI # dbn = GRBM_DBN.load('../results/MNIST/pretrained_model') train_set, valid_set, test_set = load_raw_data() #klasyfikacja pierwszych 13 wzorców print dbn.classify(train_set[0][1:13]) #realne klasy pierwszych 13 wzorców print train_set[1][1:13]
def test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002, momentum=0.9, datasets=None, batch_size=128, hidden_layers_sizes=[1024, 1024, 1024], n_ins=784, n_outs=10, filename="../data/DBN.pickle", load=True, save=True, verbose=False, pretraining_start=0, pretraining_stop=-1, finetune=True): if datasets is None: from load_data_MNIST import load_data datasets = load_data() train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # numpy random generator numpy_rng = numpy.random.RandomState() loaded = False if load: print '... trying to load the model' if os.path.isfile(filename): dbn = GRBM_DBN.load(filename) dbn.update_finetune_cost(weight_decay=weight_decay) loaded = True print '... model loaded' else: print '... couldn\' find the model file' if not loaded: print '... building the model' # construct the Deep Belief Network dbn = GRBM_DBN(numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise if pretraining_stop == -1: pretraining_stop = dbn.n_layers for i in xrange(pretraining_start, pretraining_stop): start_time_temp = time.clock() if i == 0: pretrain_lr_new = pretrain_lr[0] pretraining_epochs_new = pretraining_epochs[0] else: pretrain_lr_new = pretrain_lr[1] pretraining_epochs_new = pretraining_epochs[1] # go through pretraining epochs for epoch in xrange(pretraining_epochs_new): if verbose: # weights image = Image.fromarray( tile_raster_images( X=dbn.rbm_layers[i].W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch)) # probabilities X = valid_set_x[:20].eval() hMean = sigmoid( numpy.dot(X, dbn.rbm_layers[i].W.get_value( borrow=True)) + dbn.rbm_layers[i].hbias.get_value(borrow=True)) image = Image.fromarray(hMean * 256) image.save('probabilities_at_layer_%i_epoch_%i.gif' % (i, epoch)) # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr_new)) end_time_temp = time.clock() print 'Pre-training layer %i, epoch %d, cost %f ' % ( i + 1, epoch + 1, numpy.mean(c)) + ' ran for %d sec' % ( (end_time_temp - start_time_temp)) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if save: print '... saving the model' dbn.save(filename) ######################## # FINETUNING THE MODEL # ######################## if finetune: # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, momentum=momentum) print '... finetunning the model' best_params = None best_validation_loss = numpy.inf last_validation_loss = numpy.inf test_score = 0. start_time = time.clock() current_lr = finetune_lr done_looping = False epoch = 0 while not done_looping: epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index, current_lr) iter = (epoch - 1) * n_train_batches + minibatch_index import warnings warnings.filterwarnings("ignore") validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, validation error %f %%' % \ (epoch, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: best_validation_loss = this_validation_loss if this_validation_loss > last_validation_loss: current_lr /= 2. print((' learning rate halved to %f') % (current_lr)) last_validation_loss = this_validation_loss if current_lr < 0.001: done_looping = True test_losses = test_model() test_score = numpy.mean(test_losses) end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if save: ts = time.time() st = datetime.datetime.fromtimestamp(ts).strftime( '%Y-%m-%d_%H-%M-%S') print '... saving the final model' dbn.save( re.sub('.pickle$', '', filename) + '_' + st + '.final.pickle') return (best_validation_loss * 100., test_score * 100.) return (0., 0.)
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared(numpy.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')
def test_rbm(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function([index], cost, updates=updates, givens={x: train_set_x[index * batch_size: (index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = time.clock() # go through training epochs for epoch in xrange(training_epochs): # go through the training set mean_cost = [] for batch_index in xrange(n_train_batches): mean_cost += [train_rbm(batch_index)] print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost) if verbose: image = Image.fromarray( tile_raster_images( X=dbn.rbm_layers[i].W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1) ) ) image.save('filters_at_layer_%i_epoch_%i.png' % (i, epoch)) # Plot filters after each training epoch plotting_start = time.clock() # Construct image from the weight matrix image = PIL.Image.fromarray(tile_raster_images( X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = time.clock() plotting_time += (plotting_stop - plotting_start) end_time = time.clock() pretraining_time = (end_time - start_time) - plotting_time print ('Training took %f minutes' % (pretraining_time / 60.)) ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared(numpy.asarray( test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting [presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates = \ theano.scan(rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in xrange(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print ' ... plotting sample ', idx image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = PIL.Image.fromarray(image_data) image.save('samples.png') os.chdir('../')
from GRBM_DBN import test_GRBM_DBN from load_data_MNIST import load_data LAYER_SIZE = [256, 512, 1024] N_LAYERS = [2, 3, 4] ITERATIONS = 5 datasets = load_data() for _ in range(ITERATIONS): for n_layers in N_LAYERS: for layer_size in LAYER_SIZE: test_score, val_score = test_GRBM_DBN(finetune_lr=0.1, pretraining_epochs=[225, 75], pretrain_lr=[0.002, 0.02], k=1, weight_decay=0.0002, momentum=0.9, batch_size=128, datasets=datasets, hidden_layers_sizes=n_layers*[layer_size], load=False, filename=('../data/MNIST_%d_%d.pickle'%(layer_size, n_layers))) log = '../data/MNIST.log' with open(log, 'a') as f: f.write('LAYER_SIZE=%d, n_layers=%d, test_score=%f%%, val_score=%f%%\n' % (layer_size, n_layers, test_score, val_score))
def sgd_optimization_mnist(learning_rate=0.13, n_epochs=1000, dataset='mnist.pkl.gz', batch_size=600): """ Demonstrate stochastic gradient descent optimization of a log-linear model This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size ###################### # BUILD ACTUAL MODEL # ###################### print '... building the model' # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a # minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels # construct the logistic regression class # Each MNIST image has size 28*28 classifier = LogisticRegression(input=x, n_in=28 * 28, n_out=10) # the cost we minimize during training is the negative log likelihood of # the model in symbolic format cost = classifier.negative_log_likelihood(y) # compiling a Theano function that computes the mistakes that are made by # the model on a minibatch test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size: (index + 1) * batch_size], y: test_set_y[index * batch_size: (index + 1) * batch_size] } ) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size: (index + 1) * batch_size], y: valid_set_y[index * batch_size: (index + 1) * batch_size] } ) # compute the gradient of cost with respect to theta = (W,b) g_W = T.grad(cost=cost, wrt=classifier.W) g_b = T.grad(cost=cost, wrt=classifier.b) # start-snippet-3 # specify how to update the parameters of the model as a list of # (variable, update expression) pairs. updates = [(classifier.W, classifier.W - learning_rate * g_W), (classifier.b, classifier.b - learning_rate * g_b)] # compiling a Theano function `train_model` that returns the cost, but in # the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] } ) # end-snippet-3 ############### # TRAIN MODEL # ############### print '... training the model' # early-stopping parameters patience = 5000 # look as this many examples regardless patience_increase = 2 # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) # iteration number iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: # compute zero-one loss on validation set validation_losses = [validate_model(i) for i in xrange(n_valid_batches)] this_validation_loss = numpy.mean(validation_losses) print( 'epoch %i, minibatch %i/%i, validation error %f %%' % ( epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100. ) ) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss # test it on the test set test_losses = [test_model(i) for i in xrange(n_test_batches)] test_score = numpy.mean(test_losses) print( ( ' epoch %i, minibatch %i/%i, test error of' ' best model %f %%' ) % ( epoch, minibatch_index + 1, n_train_batches, test_score * 100. ) ) # save the best model with open('best_model.pkl', 'w') as f: cPickle.dump(classifier, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print( ( 'Optimization complete with best validation score of %f %%,' 'with test performance %f %%' ) % (best_validation_loss * 100., test_score * 100.) ) print 'The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))