def test_rbm(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='rbm_plots', n_hidden=500): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_dataset(dataset) train_set_x, train_set_y = datasets[0] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) print '...... building model ......' # construct the RBM class rbm = RBM(input=x, n_visible=28 * 28, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=15) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # start-snippet-5 # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') print '...... training ......' plotting_time = 0. start_time = timeit.default_timer() # go through training epochs for epoch in range(training_epochs): # go through the training set mean_cost = [] for batch_index in range(n_train_batches): mean_cost += [train_rbm(batch_index)] print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)) # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix image = Image.fromarray( tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) end_time = timeit.default_timer() pretraining_time = (end_time - start_time) - plotting_time print('Training took %f minutes' % (pretraining_time / 60.)) # end-snippet-5 start-snippet-6 ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray(test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) # end-snippet-6 start-snippet-7 plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every, name="gibbs_vhv") # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1), dtype='uint8') for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print(' ... plotting sample %d' % idx) image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf, img_shape=(28, 28), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = Image.fromarray(image_data) image.save('samples.png') # end-snippet-7 os.chdir('../')
def test_sdA(finetune_lr=0.1, pretraining_epochs=15, pretrain_lr=0.001, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=1): datasets = load_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size numpy_rng = numpy.random.RandomState(89677) print '...... building model ......' """sdA def __init__(self, numpy_rng,theano_rng=None,n_ins=784,hidden_layer_sizes=[500,500], n_out=10,corruption_levels=[0.1,0.1]): """ sda = sdA(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layer_sizes=[1000, 1000, 1000], corruption_levels=[0.1, 0.1, 0.1], n_out=10) #============================================================================== #============首先进行无监督学习,即根据给定的隐藏层节点个数构建自编码网络================ #============================================================================== print('...... getting the pretraining functions ......') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print('...... pre-training the model......') start_time = timeit.default_timer() corruption_levels = [.1, .2, .3] for i in range(sda.n_layers): for epoch in range(pretraining_epochs): c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch, numpy.mean(c))) end_time = timeit.default_timer() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % (end_time - start_time)) #============================================================================== #========进行监督学习,根据创建好的自编码网络,结合MLP和Softmax回归训练网络参数========== #============================================================================== print('...... getting the finetuning functions .......') train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print('...... finetunning the model.......') patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() #============================================================================== #======================All Done================================================ print(('Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % (end_time - start_time))
def test_DA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='DA_plots'): datasets = load_dataset(dataset) train_set_x, train_set_y = datasets[0] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size index = T.lscalar() x = T.matrix('x') if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #============================================================================== #================================Autoencoders ================================= print '...... building corruption = 0.0 model ......' rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_hidden=500, n_visible=28 * 28) cost, updates = da.get_cost_update(corruption_level=0., learning_rate=learning_rate) train_da = theano.function( inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) print '...... training corruption = 0.0 ......' start_time = timeit.default_timer() for epoch in range(training_epochs): c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print 'Training epoch %d,cost = ' % epoch, numpy.mean(c) end_time = timeit.default_timer() training_time = (end_time - start_time) print "###################" print da.W.shape print >> sys.stderr, ('The 0% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % (training_time)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(20, 20), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') #============================================================================== #========================Denoising Autoencoder================================= print '...... building corruption = 0.3 model ......' rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) da = dA(numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500) cost, updates = da.get_cost_update(corruption_level=0.3, learning_rate=learning_rate) train_da = theano.function( inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) print '...... training corruption = 0.0 ......' start_time = timeit.default_timer() for epoch in range(training_epochs): c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print >> sys.stderr, ('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % (training_time)) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(20, 20), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') os.chdir('../')
def test_DBN(finetune_lr=0.1, pretraining_epochs=100, pretrain_lr=0.01, k=1, training_epochs=1000, dataset='mnist.pkl.gz', batch_size=10): datasets = load_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size numpy_rng = numpy.random.RandomState(123) print('...... building the model ......') # construct the Deep Belief Network dbn = DBN(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[1000, 1000, 1000], n_outs=10) #======================Pretraining the model ================================== print('...... getting the pretraining functions ......') pretraining_fns = dbn.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, k=k) print('...... pre-training the model ......') start_time = timeit.default_timer() for i in range(dbn.n_layers): for epoch in range(pretraining_epochs): c = [] for batch_index in range(n_train_batches): c.append(pretraining_fns[i](index=batch_index, lr=pretrain_lr)) print('Pre-training layer {0}, epoch {1}, cost {2}'.format( i, epoch, numpy.mean(c))) end_time = timeit.default_timer() print('The pretraining code for file {0} ran for {1}m'.format( os.path.split(__file__)[1], ((end_time - start_time) / 60.))) #======================FineTuning the model =================================== print('...... getting the finetuning functions ......') train_fn, validate_model, test_model = dbn.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print('...... finetuning the model ......') patience = 4 * n_train_batches patience_increase = 2. improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience / 2) best_validation_loss = numpy.inf test_score = 0. start_time = timeit.default_timer() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete with best validation score of %f %%, ' 'obtained at iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The fine tuning code for file {0} ran for {1}m'.format( os.path.split(__file__)[1], ((end_time - start_time) / 60.)))
def evaluate_lenet5(learning_rate=0.1, n_epochs=200, dataset='mnist.pkl.gz', nkerns=[20, 50], batch_size=500): rng = numpy.random.RandomState(23455) datasets = load_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] n_test_batches = test_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size n_valid_batches //= batch_size n_test_batches //= batch_size index = T.lscalar() x = T.matrix('x') y = T.ivector('y') print '...... building the model ......' layer0_input = x.reshape((batch_size, 1, 28, 28)) layer0 = LeNetConvPoolLayer(rng=rng, input=layer0_input, image_shape=(batch_size, 1, 28, 28), filter_shape=(nkerns[0], 1, 5, 5), poolsize=(2, 2)) layer1 = LeNetConvPoolLayer(rng, input=layer0.output, image_shape=(batch_size, nkerns[0], 12, 12), filter_shape=(nkerns[1], nkerns[0], 5, 5), poolsize=(2, 2)) layer2_input = layer1.output.flatten(2) layer2 = HiddenLayer(rng=rng, input=layer2_input, n_in=nkerns[1] * 4 * 4, n_out=500, activation=T.tanh) layer3 = LogisticRegression(input=layer2.output, n_in=500, n_out=10) cost = layer3.negative_log_likelihood(y) test_model = theano.function( [index], layer3.error(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( [index], layer3.error(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) params = layer3.params + layer2.params + layer1.params + layer0.params grads = T.grad(cost, params) updates = [(param_i, param_i - learning_rate * grad_i) for param_i, grad_i in zip(params, grads)] train_model = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) print '...... training ......' patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = numpy.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): iter = (epoch - 1) * n_train_batches + minibatch_index if iter % 100 == 0: print('training @ iter = ', iter) cost_ij = train_model(minibatch_index) if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if this_validation_loss < best_validation_loss * \ improvement_threshold: patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print('Optimization complete.') print( 'Best validation score of %f %% obtained at iteration %i, ' 'with test performance %f %%' % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))
def test_mlp(learning_rate=0.01, L1_reg=0.00, L2_reg=0.0001, n_epochs=10, dataset='mnist.pkl.gz', batch_size=20, n_hidden=500): datasets = lr.load_dataset(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size print '...... building model ......' index = T.iscalar() x = T.matrix("x") y = T.ivector("y") rng = np.random.RandomState(1234) classifier = MLP(rng=rng, input=x, n_in=28 * 28, n_hidden=n_hidden, n_out=10) cost = (classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr) test_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }) validate_model = theano.function( inputs=[index], outputs=classifier.errors(y), givens={ x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size] }) gparams = [T.grad(cost, param) for param in classifier.params] #zip函数接受任意多个(包括0个和1个)序列作为参数,返回一个tuple列表 updates = [(param, param - learning_rate * gparam) for param, gparam in zip(classifier.params, gparams)] train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens={ x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size] }) patience = 10000 patience_increase = 2 improvement_threshold = 0.995 validation_frequency = min(n_train_batches, patience // 2) best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False print '...... training model ......' while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_model(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = [ validate_model(i) for i in range(n_valid_batches) ] this_validation_loss = np.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) if this_validation_loss < best_validation_loss: if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) best_validation_loss = this_validation_loss best_iter = iter test_losses = [ test_model(i) for i in range(n_test_batches) ] test_score = np.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.1fs' % ((end_time - start_time)))