def build_model(trainval_set, options): if options['retrain'] == 0: if options['verbose'] > 4: print >> sys.stderr, ('... building the model') # construct the stacked denoising autoencoder class train_set_x, train_set_y = trainval_set #print train_set_x.get_value(borrow=True).shape #print train_set_y.shape.eval() n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= options['batchsize'] #print >> sys.stderr, options['nclasses'] #print >> sys.stderr, train_set_y.eval() #aakak sda = SdA(numpy_rng=options['numpy_rng'], theano_rng=options['theano_rng'], n_ins = options['ndim'], hidden_layers_sizes=options['hlayers'], n_outs=options['nclasses'], n_outs_b=options['nclasses'], tau=None) if options['verbose'] > 4: print >> sys.stderr, ('... getting the pretraining functions') pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=options['batchsize'], tau=None) else: # Restoring to Finetuned values sda_reuse_pt_model = [] for para_copy in options['sda_reuse_model'].params: sda_reuse_pt_model.append(para_copy.get_value()) ### sda = options['sda_reuse_model'] for ids in range(len(sda.params)): sda.params_b[ids].set_value(sda_reuse_pt_model[ids]) # set the value n_outs = sda.params_b[-2].get_value().shape[0] if options['nclasses_source'] != options['nclasses']: print >> sys.stderr, ("Droping logistic layer...") sda.change_lastlayer(n_outs,options['nclasses']) # print sda.params[1].get_value()[-1] # print sda.params_b[1].get_value()[-1] # kkk ########### Reuse layer wise fine-tuning ################# #print '... getting the finetuning functions' #print 'Reuse layer wise finetuning' pretraining_fns = None return (sda,pretraining_fns)
def test_DimentionalReduction(dataset='mnist.pkl.gz', pretraining_epochs=50, pretrain_lr=0.01, batch_size=5): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] numpy_rng = numpy.random.RandomState(89677) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[300, 50, 2], n_outs=2) print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) corruption_levels = [0., 0., 0.] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) target = train_set_x.get_value() for dA_layer in sda.dA_layers: hidden_values_function = dA_layer.get_hidden_values2(sda.x) result_function = theano.function(inputs=[sda.x], outputs=hidden_values_function) target = result_function(target) print target colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] n = 0 for x, y in zip(target, train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1], c=colors[y]) n += 1 if n > 2000: break plt.show()
def test_DimentionalReduction(dataset='mnist.pkl.gz', pretraining_epochs=50, pretrain_lr=0.01, batch_size=5): datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] valid_set_x, valid_set_y = datasets[1] test_set_x, test_set_y = datasets[2] numpy_rng = numpy.random.RandomState(89677) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=28 * 28, hidden_layers_sizes=[300, 50, 2], n_outs=2 ) print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) corruption_levels = [0., 0., 0.] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) target = train_set_x.get_value() for dA_layer in sda.dA_layers: hidden_values_function = dA_layer.get_hidden_values2(sda.x) result_function = theano.function(inputs=[sda.x],outputs=hidden_values_function) target = result_function(target) print target colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','w'] n = 0 for x,y in zip(target,train_set_y.eval()): if y < len(colors): plt.scatter(x[0], x[1],c=colors[y]) n += 1 if n > 2000: break plt.show()
def new(cls, n_ins, hidden_layers_sizes, n_outs, output_folder=None): numpy_rng = numpy.random.RandomState(89677) sda = SdA(numpy_rng=numpy_rng, n_ins=n_ins, hidden_layers_sizes=hidden_layers_sizes, n_outs=n_outs) return cls(sda, output_folder)
def run_sda(datasets=None, batch_size=100, window_size=7, n_principle=3, pretraining_epochs=2000, pretrain_lr=0.02, training_epochs=10000, finetune_lr=0.008, hidden_layers_sizes=[310, 100], corruption_levels = [0., 0.]): """ This function maps spatial PCs to a deep representation. Parameters: datasets: A list containing 3 tuples. Each tuple have 2 entries, which are theano.shared variables. They stands for train, valid, test data. batch_size: Batch size. pretraining_epochs: Pretraining epoches. pretrain_lr: Pretraining learning rate. training_epochs: Fine-tuning epoches. finetune_lr: Fine-tuning learning rate. hidden_layers_sizes:A list containing integers. Each intger specifies a size of a hidden layer. corruption_levels: A list containing floats in the inteval [0, 1]. Each number specifies the corruption level of its corresponding hidden layer. Return: spatial_rep: 2-D numpy.array. Deep representation for each spatial sample. test_score: Accuracy this representations yield on the trained SdA. """ print 'finetuning learning rate=', finetune_lr print 'pretraining learning rate=', pretrain_lr print 'pretraining epoches=', pretraining_epochs print 'fine tuning epoches=', training_epochs print 'batch size=', batch_size print 'hidden layers sizes=', hidden_layers_sizes print 'corruption levels=', corruption_levels # compute number of minibatches for training, validation and testing n_train_batches = datasets[0][0].get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=datasets[0][0].get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=gnd_img.max()) ################################################################################ # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0], batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) if epoch % 100 == 0: print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ################################################################################ # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(10 * n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stdout, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # keep the following line consistent with line 227, function "prepare_data" filename = 'pavia_l1sda_pt%d_ft%d_lrp%.4f_f%.4f_bs%d_pca%d_ws%d' % \ (pretraining_epochs, training_epochs, pretrain_lr, finetune_lr, batch_size, n_principle, window_size) print '... saving parameters' sda.save_params(filename + '_params.pkl') print '... classifying test set with learnt model:' pred_func = theano.function(inputs=[sda.x], outputs=sda.logLayer.y_pred) pred_test = pred_func(datasets[2][0].get_value(borrow=True)) true_test = datasets[2][1].get_value(borrow=True) true_valid = datasets[1][1].get_value(borrow=True) true_train = datasets[0][1].get_value(borrow=True) result_analysis(pred_test, true_train, true_valid, true_test) print '... classifying the whole image with learnt model:' print '...... extracting data' data_spectral, data_spatial, _, _ = \ T_pca_constructor(hsi_img=img, gnd_img=gnd_img, n_principle=n_principle, window_size=window_size, flag='unsupervised', merge=True) start_time = time.clock() print '...... begin ' y = pred_func(data_spectral) + 1 print '...... done ' end_time = time.clock() print 'finished, running time:%fs' % (end_time - start_time) y_rgb = cmap[y, :] margin = (window_size / 2) * 2 # floor it to a multiple of 2 y_image = y_rgb.reshape(width - margin, height - margin, 3) scipy.misc.imsave(filename + 'wholeimg.png' , y_image) print 'Saving classification results' sio.savemat(filename + 'wholeimg.mat', {'y': y.reshape(width - margin, height - margin)}) ############################################################################ print '... performing Student\'s t-test' best_c = 10000. best_g = 10. svm_classifier = svm.SVC(C=best_c, gamma=best_g, kernel='rbf') svm_classifier.fit(datasets[0][0].get_value(), datasets[0][1].get_value()) data = [numpy.vstack((datasets[1][0].get_value(), datasets[2][0].get_value())), numpy.hstack((datasets[1][1].get_value(), datasets[2][1].get_value()))] numpy_rng = numpy.random.RandomState(89677) num_test = 100 print 'Total number of tests: %d' % num_test k_sae = [] k_svm = [] for i in xrange(num_test): [_, _], [_, _], [test_x, test_y], _ = \ train_valid_test(data, ratio=[0, 1, 1], batch_size=1, random_state=numpy_rng.random_integers(1e10)) test_y = test_y + 1 # fix the label scale problem pred_y = pred_func(test_x) cm = confusion_matrix(test_y, pred_y) pr_a = cm.trace()*1.0 / test_y.size pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \ (cm.sum(axis=1)*1.0/test_y.size)).sum() k_sae.append( (pr_a - pr_e) / (1 - pr_e) ) pred_y = svm_classifier.predict(test_x) cm = confusion_matrix(test_y, pred_y) pr_a = cm.trace()*1.0 / test_y.size pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \ (cm.sum(axis=1)*1.0/test_y.size)).sum() k_svm.append( (pr_a - pr_e) / (1 - pr_e) ) std_k_sae = numpy.std(k_sae) std_k_svm = numpy.std(k_svm) mean_k_sae = numpy.mean(k_sae) mean_k_svm = numpy.mean(k_svm) left = ( (mean_k_sae - mean_k_svm) * numpy.sqrt(num_test*2-2)) \ / ( numpy.sqrt(2./num_test) * num_test * (std_k_sae**2 + std_k_svm**2) ) rv = t(num_test*2.0 - 2) right = rv.ppf(0.95) print '\tstd\t\tmean' print 'k_sae\t%f\t%f' % (std_k_sae, mean_k_sae) print 'k_svm\t%f\t%f' % (std_k_svm, mean_k_svm) if left > right: print 'left = %f, right = %f, test PASSED.' % (left, right) else: print 'left = %f, right = %f, test FAILED.' % (left, right) return test_score
def run_sda(datasets=None, batch_size=100, window_size=7, n_principle=4, pretraining_epochs=2000, pretrain_lr=0.02, training_epochs=10000, finetune_lr=0.008, hidden_layers_sizes=[310, 100], corruption_levels=[0., 0.]): """ This function maps spatial PCs to a deep representation. Parameters: datasets: A list containing 3 tuples. Each tuple have 2 entries, which are theano.shared variables. They stands for train, valid, test data. batch_size: Batch size. pretraining_epochs: Pretraining epoches. pretrain_lr: Pretraining learning rate. training_epochs: Fine-tuning epoches. finetune_lr: Fine-tuning learning rate. hidden_layers_sizes:A list containing integers. Each intger specifies a size of a hidden layer. corruption_levels: A list containing floats in the inteval [0, 1]. Each number specifies the corruption level of its corresponding hidden layer. Return: spatial_rep: 2-D numpy.array. Deep representation for each spatial sample. test_score: Accuracy this representations yield on the trained SdA. """ print 'finetuning learning rate=', finetune_lr print 'pretraining learning rate=', pretrain_lr print 'pretraining epoches=', pretraining_epochs print 'fine tuning epoches=', training_epochs print 'batch size=', batch_size print 'hidden layers sizes=', hidden_layers_sizes print 'corruption levels=', corruption_levels # compute number of minibatches for training, validation and testing n_train_batches = datasets[0][0].get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=datasets[0][0].get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=gnd_img.max()) ################################################################################ # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0], batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) if epoch % 100 == 0: print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ################################################################################ # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 100 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(10 * n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_params = None best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) end_time = time.clock() print(('Optimization complete with best validation score of %f %%,' 'with test performance %f %%') % (best_validation_loss * 100., test_score * 100.)) print >> sys.stdout, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) # keep the following line consistent with line 227, function "prepare_data" filename = 'ksc_l1sda_pt%d_ft%d_lrp%.4f_f%.4f_bs%d_pca%d_ws%d' % \ (pretraining_epochs, training_epochs, pretrain_lr, finetune_lr, batch_size, n_principle, window_size) print '... classifying test set with learnt model:' pred_func = theano.function(inputs=[sda.x], outputs=sda.logLayer.y_pred) pred_test = pred_func(datasets[2][0].get_value(borrow=True)) true_test = datasets[2][1].get_value(borrow=True) true_valid = datasets[1][1].get_value(borrow=True) true_train = datasets[0][1].get_value(borrow=True) result_analysis(pred_test, true_train, true_valid, true_test) print '... classifying the whole image with learnt model:' print '...... extracting data' data_spectral, data_spatial, _, _ = \ T_pca_constructor(hsi_img=img, gnd_img=gnd_img, n_principle=n_principle, window_size=window_size, flag='unsupervised', merge=True) start_time = time.clock() print '...... begin ' y = pred_func(data_spectral) + 1 print '...... done ' end_time = time.clock() print 'finished, running time:%fs' % (end_time - start_time) y_rgb = cmap[y, :] margin = (window_size / 2) * 2 # floor it to a multiple of 2 y_image = y_rgb.reshape(width - margin, height - margin, 3) scipy.misc.imsave(filename + 'wholeimg.png', y_image) print 'Saving classification results' sio.savemat(filename + 'wholeimg.mat', {'y': y.reshape(width - margin, height - margin)}) ############################################################################ print '... performing Student\'s t-test' best_c = 10000. best_g = 10. svm_classifier = svm.SVC(C=best_c, gamma=best_g, kernel='rbf') svm_classifier.fit(datasets[0][0].get_value(), datasets[0][1].get_value()) data = [ numpy.vstack((datasets[1][0].get_value(), datasets[2][0].get_value())), numpy.hstack((datasets[1][1].get_value(), datasets[2][1].get_value())) ] numpy_rng = numpy.random.RandomState(89677) num_test = 100 print 'Total number of tests: %d' % num_test k_sae = [] k_svm = [] for i in xrange(num_test): [_, _], [_, _], [test_x, test_y], _ = \ train_valid_test(data, ratio=[0, 1, 1], batch_size=1, random_state=numpy_rng.random_integers(1e10)) test_y = test_y + 1 # fix the label scale problem pred_y = pred_func(test_x) cm = confusion_matrix(test_y, pred_y) pr_a = cm.trace() * 1.0 / test_y.size pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \ (cm.sum(axis=1)*1.0/test_y.size)).sum() k_sae.append((pr_a - pr_e) / (1 - pr_e)) pred_y = svm_classifier.predict(test_x) cm = confusion_matrix(test_y, pred_y) pr_a = cm.trace() * 1.0 / test_y.size pr_e = ((cm.sum(axis=0)*1.0/test_y.size) * \ (cm.sum(axis=1)*1.0/test_y.size)).sum() k_svm.append((pr_a - pr_e) / (1 - pr_e)) std_k_sae = numpy.std(k_sae) std_k_svm = numpy.std(k_svm) mean_k_sae = numpy.mean(k_sae) mean_k_svm = numpy.mean(k_svm) left = ( (mean_k_sae - mean_k_svm) * numpy.sqrt(num_test*2-2)) \ / ( numpy.sqrt(2./num_test) * num_test * (std_k_sae**2 + std_k_svm**2) ) rv = t(num_test * 2.0 - 2) right = rv.ppf(0.95) print '\tstd\t\tmean' print 'k_sae\t%f\t%f' % (std_k_sae, mean_k_sae) print 'k_svm\t%f\t%f' % (std_k_svm, mean_k_svm) if left > right: print 'left = %f, right = %f, test PASSED.' % (left, right) else: print 'left = %f, right = %f, test FAILED.' % (left, right) return test_score
print 'pretraining epoches=', pretraining_epochs print 'fine tuning epoches=', training_epochs print 'batch size=', batch_size print 'hidden layers sizes=', hidden_layers_sizes print 'corruption levels=', corruption_levels # compute number of minibatches for training, validation and testing n_train_batches = datasets[0][0].get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=bands, hidden_layers_sizes=hidden_layers_sizes, n_outs=gnd_img.max()) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0], batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs):
print 'pretraining epoches=', pretraining_epochs print 'fine tuning epoches=', training_epochs print 'batch size=', batch_size print 'hidden layers sizes=', hidden_layers_sizes print 'corruption levels=', corruption_levels # compute number of minibatches for training, validation and testing n_train_batches = datasets[0][0].get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=datasets[0][0].get_value(borrow=True).shape[1], hidden_layers_sizes=hidden_layers_sizes, n_outs=gnd_img.max()) ################################################################################ # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=datasets[0][0], batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise for i in xrange(sda.n_layers): # go through pretraining epochs
train_set_x = theano.shared(numpy.asarray(numpy.vstack(cute_data), dtype=theano.config.floatX), borrow=True) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches //= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print('... building the model') # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=data.shape[1], hidden_layers_sizes=[1000, 500, 20, data.shape[1]], n_outs=10 ) # end-snippet-3 start-snippet-4 pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print('... pre-training the model') start_time = timeit.default_timer() ## Pre-train layer-wise corruption_levels = [.1, .1, .1, .1] for i in range(sda.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set c = []
def main(): # setup output directory d = datetime.datetime.today() output_folder = "out/{}-{}-{}_{}:{}:{}".format(d.year, d.month, d.day, d.hour, d.minute, d.second) if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # load dataset datasets = load_data() train_set_x, train_set_y = util.shared_dataset(datasets[0]) valid_set_x, valid_set_y = util.shared_dataset(datasets[1]) test_set_x, test_set_y = util.shared_dataset(datasets[2]) train_set = (train_set_x, train_set_y) valid_set = (valid_set_x, valid_set_y) test_set = (test_set_x, test_set_y) n_input = train_set_x.get_value(borrow=True).shape[1] n_output = train_set_y.get_value(borrow=True).shape[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=numpy_rng, n_ins=n_input, hidden_layers_sizes=[1000, 1000, 1000], n_outs=n_output) predict_fn = sda.build_predict_function() ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print("Pre-training layer {}, epoch {}, cost ".format(i, epoch)), print("{}".format(numpy.mean(c))) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=(train_set, valid_set, test_set), batch_size=batch_size, learning_rate=finetune_lr) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ########### # PREDICT # ########### y_pred = predict_fn(test_set_x.get_value(borrow=True)) mae, mre = util.calculate_error_indexes(test_set_y, y_pred) print("-*-*RESULT*-*-") print("mae={}".format(mae)) print("mre={}".format(mre)) # plot for i in xrange(n_output): filename = "{}.png".format(str(i)) plot.savefig(filename, test_set_x, y_pred, indexes=[i])
def main(): # setup output directory d = datetime.datetime.today() output_folder = "out/{}-{}-{}_{}:{}:{}".format(d.year, d.month, d.day, d.hour, d.minute, d.second) if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # load dataset datasets = load_data() train_set_x, train_set_y = util.shared_dataset(datasets[0]) valid_set_x, valid_set_y = util.shared_dataset(datasets[1]) test_set_x, test_set_y = util.shared_dataset(datasets[2]) train_set = (train_set_x, train_set_y) valid_set = (valid_set_x, valid_set_y) test_set = (test_set_x, test_set_y) n_input = train_set_x.get_value(borrow=True).shape[1] n_output = train_set_y.get_value(borrow=True).shape[1] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size # numpy random generator # start-snippet-3 numpy_rng = numpy.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA( numpy_rng=numpy_rng, n_ins=n_input, hidden_layers_sizes=[1000, 1000, 1000], n_outs=n_output ) predict_fn = sda.build_predict_function() ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [.1, .2, .3] for i in xrange(sda.n_layers): # go through pretraining epochs for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr)) print("Pre-training layer {}, epoch {}, cost ".format(i, epoch)), print("{}".format(numpy.mean(c))) end_time = time.clock() print >> sys.stderr, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model = sda.build_finetune_functions( datasets=(train_set, valid_set, test_set), batch_size=batch_size, learning_rate=finetune_lr ) print '... finetunning the model' # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience / 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 while (epoch < training_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) iter = (epoch - 1) * n_train_batches + minibatch_index if (iter + 1) % validation_frequency == 0: validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if ( this_validation_loss < best_validation_loss * improvement_threshold ): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter # test it on the test set test_losses = test_model() test_score = numpy.mean(test_losses) print((' epoch %i, minibatch %i/%i, test error of ' 'best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.)) if patience <= iter: done_looping = True break end_time = time.clock() print( ( 'Optimization complete with best validation score of %f %%, ' 'on iteration %i, ' 'with test performance %f %%' ) % (best_validation_loss * 100., best_iter + 1, test_score * 100.) ) print >> sys.stderr, ('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) ########### # PREDICT # ########### y_pred = predict_fn(test_set_x.get_value(borrow=True)) mae, mre = util.calculate_error_indexes(test_set_y, y_pred) print("-*-*RESULT*-*-") print("mae={}".format(mae)) print("mre={}".format(mre)) # plot for i in xrange(n_output): filename = "{}.png".format(str(i)) plot.savefig(filename, test_set_x, y_pred, indexes=[i])
def build_model(trainval_set, options): if options['retrain'] == 0: if options['verbose'] > 4: print >> sys.stderr, ('... building the model') # construct the stacked denoising autoencoder class train_set_x, train_set_y = trainval_set #print train_set_x.get_value(borrow=True).shape #print train_set_y.shape.eval() n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= options['batchsize'] #print >> sys.stderr, options['nclasses'] #print >> sys.stderr, train_set_y.eval() #aakak sda = SdA(numpy_rng=options['numpy_rng'], theano_rng=options['theano_rng'], n_ins=options['ndim'], hidden_layers_sizes=options['hlayers'], n_outs=options['nclasses'], n_outs_b=options['nclasses'], tau=None) if options['verbose'] > 4: print >> sys.stderr, ('... getting the pretraining functions') pretraining_fns = sda.pretraining_functions( train_set_x=train_set_x, batch_size=options['batchsize'], tau=None) else: # Restoring to Finetuned values sda_reuse_pt_model = [] for para_copy in options['sda_reuse_model'].params: sda_reuse_pt_model.append(para_copy.get_value()) ### sda = options['sda_reuse_model'] for ids in range(len(sda.params)): sda.params_b[ids].set_value( sda_reuse_pt_model[ids]) # set the value n_outs = sda.params_b[-2].get_value().shape[0] if options['nclasses_source'] != options['nclasses']: print >> sys.stderr, ("Droping logistic layer...") sda.change_lastlayer(n_outs, options['nclasses']) # print sda.params[1].get_value()[-1] # print sda.params_b[1].get_value()[-1] # kkk ########### Reuse layer wise fine-tuning ################# #print '... getting the finetuning functions' #print 'Reuse layer wise finetuning' pretraining_fns = None return (sda, pretraining_fns)
def pretrain_SdA(pretraining_epochs=50, pretrain_lr=0.001, batch_size=100): """ Pretrain an SdA model for the given number of training epochs. The model is either initialized from scratch, or is reconstructed from a previously pickled model. :type pretraining_epochs: int :param pretraining_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type batch_size: int :param batch_size: train in mini-batches of this size """ current_dir = os.getcwd() os.chdir(options.dir) today = datetime.today() day = str(today.date()) hour = str(today.time()) output_filename = "stacked_denoising_autoencoder_" + options.arch + "." + day + "." + hour output_file = open(output_filename,'w') os.chdir(current_dir) print >> output_file, "Run on " + str(datetime.now()) # Get the training data sample from the input file data_set_file = openFile(str(options.inputfile), mode = 'r') datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 25, offset = options.offset) train_set_x = load_data_unlabeled(datafiles) data_set_file.close() # compute number of minibatches for training, validation and testing n_train_batches, n_features = train_set_x.get_value(borrow=True).shape n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) # Check if we can restore from a previously trained model, # otherwise construct a new SdA if options.restorefile is not None: print >> output_file, 'Unpickling the model from %s ...' % (options.restorefile) current_dir = os.getcwd() os.chdir(options.dir) f = file(options.restorefile, 'rb') sda_model = cPickle.load(f) f.close() os.chdir(current_dir) else: print '... building the model' arch_list_str = options.arch.split("-") arch_list = [int(item) for item in arch_list_str] corruption_list = [options.corruption for i in arch_list] sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features, hidden_layers_sizes=arch_list, corruption_levels = corruption_list, n_outs=-1) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = sda_model.corruption_levels learning_rates = [pretrain_lr * 10. for i in arch_list] learning_rates[0] = pretrain_lr for i in xrange(sda_model.n_layers): for epoch in xrange(pretraining_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=learning_rates[i], momentum=options.momentum, weight_decay=options.weight_decay)) print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print >> output_file, numpy.mean(c) if options.savefile is not None: print >> output_file, 'Pickling the model...' current_dir = os.getcwd() os.chdir(options.dir) f = file(options.savefile, 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() os.chdir(current_dir) end_time = time.clock() print >> output_file, ('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) output_file.close()
def test_mom_wd(filename, num_epochs=10, momentum=0., weight_decay=0., pretrain_lr=0.001, batch_size=10): """ Pretrain an SdA model using momentum, weight-decay, or both for the given number of training epochs. The model is initialized from scratch. :type filename: string :param filename: the prefix for the name of the file capturing the output of this test :type num_epochs: int :param num_epochs: number of epoch to do pretraining :type momentum: float :param momentum: momentum rate for updating parameters when pre-training :type weight_decay: float :param weight_decay: multiplicative factor for degrading the size of updates to weights effectively penalizing larger weights. :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type batch_size: int :param batch_size: train in mini-batches of this size """ current_dir = os.getcwd() os.chdir(options.dir) today = datetime.today() day = str(today.date()) hour = str(today.time()) output_filename = filename + "_sda_pretrain." + day + "." + hour output_file = open(output_filename,'w') os.chdir(current_dir) print >> output_file, "Run on " + str(datetime.now()) # Get the training data sample from the input file data_set_file = openFile(str(options.inputfile), mode = 'r') datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 10) train_set_x = load_data_unlabeled(datafiles) data_set_file.close() # compute number of minibatches for training, validation and testing n_train_batches, n_features = train_set_x.get_value(borrow=True).shape n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features, hidden_layers_sizes=[700, 700, 300, 50], corruption_levels = [0.2,0.2,0.2,0.2], n_outs=3, dA_losses=['squared','xent','xent','xent']) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [float(options.corruption), float(options.corruption), float(options.corruption), float(options.corruption)] for i in xrange(sda_model.n_layers): for epoch in xrange(num_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr, momentum=momentum, weight_decay=weight_decay)) print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print >> output_file, numpy.mean(c) end_time = time.clock() print >> output_file, ('Pretraining time for file ' + os.path.split(__file__)[1] + ' was %.2fm to go through %i epochs' % (((end_time - start_time) / 60.), (num_epochs / 2))) output_file.close()
w_test *= 1./split_level w_train *= 1./(1. - split_level) ###################### Build Model ######################## # compute number of minibatches for training, validation and testing n_train_batches = X.shape[0] n_train_batches /= batch_size # np random generator np_rng = np.random.RandomState(89677) print '... building the model' # construct the stacked denoising autoencoder class sda = SdA(numpy_rng=np_rng, n_ins=39, hidden_layers_sizes = hidden_layers_sizes, n_outs=2) if load_pretrain: f = open(pre_name, "rb") sda.load(f) f.close() else: ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda.pretraining_functions(train_set_x=theano.shared(X), batch_size=batch_size)
def run_classification(pretrain_lr=0.001, # SdA and DBN learning_rate=0.01, L1_reg=0.001, L2_reg=0.0001, pretraining_epochs=3, # SdA and DBN n_epochs=5, batch_size=64, display_step=1000, dataset='mnist.pkl.gz', n_in=28*28, # mnist image shape input_shape=(-1,1,28,28), # CNN and LeNet5, this is MNIST dimensions n_out=10, # number of MNIST classes n_hidden=1000, # (1-layer) MLP hidden_layers_sizes=[500,500,500], CNN_filter_size=20, # CNN LeNet5_filter_sizes=[50,20], # LeNet5 corruption_levels=[0.1,0.2,0.3], # SdA k=1, # DBN # model_name can be the name of a model to create, # or file path to load a saved file model_name='LogisticRegression', best_model_file_path='best_model.pkl' ): """ Demonstrate stochastic gradient descent optimization for a multilayer perceptron This is demonstrated on MNIST. :type learning_rate: float :param learning_rate: learning rate used (factor for the stochastic gradient :type L1_reg: float :param L1_reg: L1-norm's weight when added to the cost (see regularization) :type L2_reg: float :param L2_reg: L2-norm's weight when added to the cost (see regularization) :type n_epochs: int :param n_epochs: maximal number of epochs to run the optimizer :type dataset: string :param dataset: the path of the MNIST dataset file from http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz """ ###################### # Instance Variables # ###################### # instance variables to be used in some of the models below numpy_rng = np.random.RandomState(1234) ############# # Load Data # ############# datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] val_set_x, val_set_y = datasets[1] test_set_x, test_set_y = datasets[2] ################################### # Calculate number of Minibatches # ################################### n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size n_val_batches = val_set_x.get_value(borrow=True).shape[0] // batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] // batch_size ############################################ # allocate symbolic variables for the data # ############################################ index = T.lscalar() # index to a [mini]batch # generate symbolic variables for input (x and y represent a minibatch) x = T.matrix('x') # data, presented as rasterized images y = T.ivector('y') # labels, presented as 1D vector of [int] labels ############### # BUILD MODEL # ############### print('... building the model') model=None if model_name == 'LogisticRegression': model = LogisticRegression( input=x, n_in=n_in, n_out=n_out ) elif model_name == 'MLP': model = MLP( numpy_rng=numpy_rng, input=x, n_in=n_in, n_hidden=n_hidden, n_out=n_out ) elif model_name == 'DeepMLP': model = DeepMLP( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) elif model_name == 'CNN': model = CNN( numpy_rng=numpy_rng, input=x, input_shape=input_shape, filter_sizes=[CNN_filter_size], n_out=n_out, batch_size=batch_size ) elif model_name == 'LeNet5': model = LeNet5( numpy_rng=numpy_rng, input=x, input_shape=input_shape, filter_sizes=LeNet5_filter_sizes, n_out=n_out, batch_size=batch_size ) elif model_name == 'SdA': model = SdA( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) elif model_name == 'DBN': model = DBN( numpy_rng=numpy_rng, input=x, n_in=n_in, hidden_layers_sizes=hidden_layers_sizes, n_out=n_out ) # Assume the model_name is a path elif model_name != None: try: model = pickle.load(open(model_name)) except: raise "Error! Model file path not valid." else: raise "Error! No model selected." ######################################### # PRETRAINING THE MODEL (SdA, DBN Only) # ######################################### if (model_name == 'SdA') or (model_name == 'DBN'): print('... starting pretraining') ######################### # PreTraining Functions # ######################### print('... getting the pretraining functions') if model_name == 'SdA': pretraining_fns = model.pretraining_functions( x=x, # I had to move x here, instead of in the model, or there was an error. train_set_x=train_set_x, batch_size=batch_size) elif model_name == 'DBN': pretraining_fns = model.pretraining_functions( x=x, # I had to move x here, instead of in the model, or there was an error. train_set_x=train_set_x, batch_size=batch_size, k=k) ################## # PRETRAIN MODEL # ################## print('... pre-training the model') start_time = timeit.default_timer() if model_name == 'SdA': corruption_levels = [.1, .2, .3] ## Pre-train layer-wise for i in range(model.n_layers): # go through pretraining epochs for epoch in range(pretraining_epochs): # go through the training set cost = [] for batch_index in range(n_train_batches): if model_name == 'SdA': cost.append( pretraining_fns[i](index=batch_index, corruption=corruption_levels[i], lr=pretrain_lr) ) elif model_name == 'DBN': cost.append( pretraining_fns[i](index=batch_index, lr=pretrain_lr) ) print('Pre-training layer %i, epoch %d, cost %f' % (i, epoch+1, np.mean(cost, dtype='float64')) ) end_time = timeit.default_timer() print(('The pretraining code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr) print('...End of pre-training') ##################### # Training Function # ##################### cost, updates = model.get_cost_updates( y=y, L1_reg = L1_reg, L2_reg = L2_reg, learning_rate=learning_rate ) # compiling a Theano function `train_model` that returns the cost, but # in the same time updates the parameter of the model based on the rules # defined in `updates` train_model = theano.function( inputs=[index], outputs=model.get_latest_cost(), updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size] }, name='train' ) ################################## # Validation & Testing Functions # ################################## # compiling a Theano function that computes the mistakes that are made # by the model on a minibatch validate_model = theano.function( inputs=[index], outputs=[model.errors(y), model.get_loss(), model.get_L1(), model.get_L2_sqr()], givens={ x: val_set_x[index * batch_size:(index + 1) * batch_size], y: val_set_y[index * batch_size:(index + 1) * batch_size] }, name='validate' ) test_model = theano.function( inputs=[index], outputs=model.errors(y), givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size] }, name='test' ) ############### # TRAIN MODEL # ############### print('... training') # early-stopping parameters patience = 10 * n_train_batches # look as this many examples regardless patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches, patience // 2) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch best_validation_loss = np.inf best_iter = 0 test_score = 0. start_time = timeit.default_timer() epoch = 0 done_looping = False minibatch_training_costs = [] # go through training epochs while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 for minibatch_index in range(n_train_batches): ################# # Training Step # ################# latest_minibatch_training_cost = train_model(minibatch_index) minibatch_training_costs.append(latest_minibatch_training_cost) iter = (epoch - 1) * n_train_batches + minibatch_index if iter % display_step == 0: print('training @ iter = ', iter) if (iter + 1) % validation_frequency == 0: ################# # Training Loss # ################# this_training_loss = np.mean(minibatch_training_costs, dtype='float64') print('latest average training loss: %f' % (this_training_loss)) minibatch_training_costs = [] ################### # Validation Loss # ################### validation_losses = [validate_model(i)[0] for i in range(n_val_batches)] this_validation_loss = np.mean(validation_losses, dtype='float64') print('epoch %i, minibatch %i/%i, validation error %f %%' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.) ) ######################## # Validation Sublosses # ######################## # Latest sublosses for our models include: unregularized loss, L1_norm, L2_norm unregularized_losses = [validate_model(i)[1] for i in range(n_val_batches)] this_unregularized_loss = np.mean(unregularized_losses, dtype='float64') L1_losses = [validate_model(i)[2] for i in range(n_val_batches)] this_L1_loss = np.mean(L1_losses, dtype='float64') L2_sqr_losses = [validate_model(i)[3] for i in range(n_val_batches)] this_L2_sqr_loss = np.mean(L2_sqr_losses, dtype='float64') print('latest total validation loss: %f' % (this_unregularized_loss + this_L1_loss + this_L2_sqr_loss) ) print('latest unregularized loss: %f' % (this_unregularized_loss) ) print('latest L1_norm: %f' % (this_L1_loss) ) print('latest L2_norm: %f' % (this_L2_sqr_loss) ) ################### # Save Best Model # ################### # if we got the best validation score until now if this_validation_loss < best_validation_loss: #improve patience if loss improvement is good enough if this_validation_loss < (best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) ################### # Test Best Model # ################### test_losses = [test_model(i) for i in range(n_test_batches)] test_score = np.mean(test_losses, dtype='float64') print((' epoch %i, minibatch %i/%i, test error of best model %f %%') % (epoch, minibatch_index + 1, n_train_batches, test_score * 100.) ) ################### # Sav Best Model # ################### with open(best_model_file_path, 'wb') as f: pickle.dump(model, f) if patience <= iter: done_looping = True break end_time = timeit.default_timer() print(('Optimization complete. Best validation score of %f %% ' 'obtained at iteration %i, with test performance %f %%') % (best_validation_loss * 100., best_iter + 1, test_score * 100.)) print('The code run for %d epochs, with %f epochs/sec' % ( epoch, 1. * epoch / (end_time - start_time))) print(('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)), file=sys.stderr)
def pretrain(shared_args, private_args): """ Pretrain an SdA model for the given number of training epochs. The model is either initialized from scratch, or is reconstructed from a previously pickled model. :type shared_args: dict :param shared_args: dict containing all the arguments common to both models. :type private_args: dict :param private_args: dict containing all the arguments specific to each model spawned off this first process. """ # Import sandbox.cuda to bind the specified GPU to this subprocess # then import the remaining theano and model modules. import theano.sandbox.cuda theano.sandbox.cuda.use(private_args['gpu']) import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from SdA import SdA shared_args_dict = shared_args[0] current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) today = datetime.today() day = str(today.date()) hour = str(today.time()) arch_list = get_arch_list(private_args) corruption_list = [shared_args_dict['corruption'] for i in arch_list] layer_types = parse_layer_type(shared_args_dict['layertype'], len(arch_list)) output_filename = "hybrid_pretraining_sda_" + "_".join(elem for elem in layer_types) + private_args['arch'] + "." + day + "." + hour output_file = open(output_filename,'w') os.chdir(current_dir) print >> output_file, "Run on " + str(datetime.now()) # Get the training data sample from the input file data_set_file = openFile(str(shared_args_dict['input']), mode = 'r') datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 30, offset = shared_args_dict['offset']) if datafiles is None: print("No data was returned, exiting.") data_set_file.close() output_file.close() return train_set_x = load_data_unlabeled(datafiles) # DEBUG: get validation set too validation_datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 5, offset = shared_args_dict['offset'] + 30) valid_set_x = load_data_unlabeled(validation_datafiles) data_set_file.close() # compute number of minibatches for training, validation and testing n_train_batches, n_features = train_set_x.get_value(borrow=True).shape n_train_batches /= shared_args_dict['batch_size'] # numpy random generator numpy_rng = numpy.random.RandomState(89677) # Set the initial value of the learning rate learning_rate = theano.shared(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=theano.config.floatX)) # Check if we can restore from a previously trained model, # otherwise construct a new SdA if private_args.has_key('restore'): print >> output_file, 'Unpickling the model from %s ...' % (private_args['restore']) current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['restore'], 'rb') sda_model = cPickle.load(f) f.close() os.chdir(current_dir) else: print '... building the model' sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features, hidden_layers_sizes=arch_list, corruption_levels = corruption_list, layer_types=layer_types, loss=shared_args_dict['loss'], n_outs=-1, sparse_init=shared_args_dict['sparse_init'], opt_method=shared_args_dict['opt_method']) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') print '... getting the hybrid training functions' hybrid_pretraining_fns = sda_model.build_finetune_limited_reconstruction(train_set_x=train_set_x, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') # DEBUG: get full finetuning theano function # get the training, validation function for the model datasets = (train_set_x,valid_set_x) print '... getting the finetuning functions' finetune_train_fn, validate_model = sda_model.build_finetune_full_reconstruction( datasets=datasets, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') # DEBUG: should only have n_layers - 2 hybrid pretraining functions assert len(hybrid_pretraining_fns) == sda_model.n_layers - 2 print '... writing meta-data to output file' metadict = {'n_train_batches': n_train_batches} metadict = dict(metadict.items() + shared_args_dict.items()) write_metadata(output_file, metadict) print '... pre-training the model' start_time = time.clock() # Get corruption levels from the SdA. corruption_levels = sda_model.corruption_levels # Function to decrease the learning rate decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * shared_args_dict['lr_decay']}) # Function to reset the learning rate lr_val = T.scalar('original_lr') reset_learning_rate = theano.function(inputs=[lr_val], outputs=learning_rate, updates={learning_rate: lr_val}) # Set up functions for max norm regularization apply_max_norm_regularization = sda_model.max_norm_regularization() for i in xrange(sda_model.n_layers): for epoch in xrange(shared_args_dict['pretraining_epochs']): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i],momentum=shared_args_dict['momentum'])) print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print >> output_file, numpy.mean(c) print >> output_file, learning_rate.get_value(borrow=True) decay_learning_rate() apply_max_norm_regularization(norm_limit=shared_args_dict['maxnorm']) # Do hybrid pretraining only on the middle layer(s) if i > 0 and i < sda_model.n_layers - 1: for h_epoch in xrange(20): hybrid_c = [] for batch_index in xrange(n_train_batches): hybrid_c.append(hybrid_pretraining_fns[i-1](index=batch_index,momentum=shared_args_dict['momentum'])) print >> output_file, "Hybrid pre-training on layers %i and below, epoch %d, cost" % (i, h_epoch), print >> output_file, numpy.mean(hybrid_c) # Reset the learning rate reset_learning_rate(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=numpy.float32)) if private_args.has_key('save'): print >> output_file, 'Pickling the model...' current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['save'], 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() os.chdir(current_dir) print '... finetuning with final layer' best_validation_loss = numpy.inf for f_epoch in xrange(20): for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = finetune_train_fn(minibatch_index, shared_args_dict['momentum']) # DEBUG: monitor the training error print >> output_file, ('Fine-tuning epoch %i, minibatch %i/%i, training error %f ' % (f_epoch, minibatch_index + 1, n_train_batches, minibatch_avg_cost)) # apply max-norm regularization apply_max_norm_regularization(shared_args_dict['maxnorm']) # validate every epoch validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) # save best model that achieved this best loss if this_validation_loss < best_validation_loss: print >> output_file, 'Pickling the model...' current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['save'], 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() os.chdir(current_dir) print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f ' % (f_epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) end_time = time.clock() print >> output_file, ('The hybrid training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) output_file.close()
def test_SdA(sample_size = 60, finetune_lr = 0.01, pretraining_epochs = 20, pretrain_lr = 0.01, training_epochs = 100, batch_size = 30, corruption_levels = [0.2], hidden_layers_sizes = [2000], img_size = (1020,1020), img_size_test = (600,1020)): process = Process() img_input,img_labels = process.read_in_images(["train-input"],["train-labels"]) img_input = process.normalize(img_input) #img_input = process.apply_clahe(img_input) #img_input = process.local_normalization(img_input) img_input = img_input[:,:img_size[0],:img_size[1]] train_set = img_input valid_set = img_input[:1] test_set = img_input[:1,:img_size_test[0],:img_size_test[1]] train_set_x,train_set_y = process.manipulate(train_set),train_set valid_set_x,valid_set_y = process.manipulate(valid_set),valid_set test_set_x,test_set_y = process.manipulate(test_set),test_set train_set_x, table =process.generate_set(train_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size) valid_set_x, table =process.generate_set(valid_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size) test_set_x, table =process.generate_set(test_set_x, sample_size = sample_size, stride = sample_size, img_size = img_size_test) train_set_y, table =process.generate_set(train_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size) valid_set_y, table =process.generate_set(valid_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size) test_set_y, table =process.generate_set(test_set_y, sample_size = sample_size, stride = sample_size, img_size = img_size_test) train_set_x,train_set_y = train_set_x.astype(np.float32),train_set_y.astype(np.float32) valid_set_x,valid_set_y = valid_set_x.astype(np.float32),valid_set_y.astype(np.float32) test_set_x,test_set_y = test_set_x.astype(np.float32), test_set_y.astype(np.float32) train_set_x, train_set_y = theano.shared(train_set_x,borrow=True),theano.shared(train_set_y,borrow=True) valid_set_x, valid_set_y = theano.shared(valid_set_x,borrow=True),theano.shared(valid_set_y,borrow=True) test_set_x, test_set_y = theano.shared(test_set_x,borrow=True),theano.shared(test_set_y,borrow=True) n_train_batches = train_set_x.get_value(borrow=True).shape[0] n_train_batches /= batch_size np_rng = np.random.RandomState() print '... building the model' sda = SdA( numpy_rng = np_rng, n_ins = sample_size**2, n_outs = sample_size**2, hidden_layers_sizes = hidden_layers_sizes ) print '... Initializing pretraining functions' pretraining_fns, output_fn = sda.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size) print '... Layer-wise training of model' start_time = time.clock() for i in xrange(sda.n_layers): for epoch in xrange(pretraining_epochs): c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index = batch_index, corruption = corruption_levels[i], lr = pretrain_lr)) print 'Layer %i, epoch %d, cost ' % (i, epoch), print np.mean(c) end_time = time.clock() print >> sys.stderr, ('Layer-wise training ran for %.2fm' % ((end_time - start_time) / 60.)) ######################## # FINETUNING THE MODEL # ######################## datasets = [(train_set_x,train_set_y),(valid_set_x,valid_set_y),(test_set_x,test_set_y)] # get the training, validation and testing function for the model print '... getting the finetuning functions' train_fn, validate_model, test_model,output_fn = sda.build_finetune_functions( datasets=datasets, batch_size=batch_size, learning_rate=finetune_lr ) print '... finetuning of model' for n in xrange(training_epochs): costs = [] for i in xrange(n_train_batches): costs.append(train_fn(i)) cost = np.mean(costs) #val_cost = validate_model() print "Epoch:",n,"Cost:",cost #,"Validation cost:",val_cost print "Test cost:", test_model() out = np.zeros((0,sample_size**2)) for batch_index in xrange(train_set_x.get_value().shape[0]): out = np.vstack((out,output_fn(batch_index))) img_output = process.post_process(out, table, sample_size,img_shape=img_size_test) plt.figure() plt.imshow(test_set[0],cmap=plt.cm.gray) plt.figure() plt.imshow(img_output[0],cmap=plt.cm.gray) xz = process.xz_stack(img_input) for m in xrange(xz.shape[0]): for n in xrange(xz.shape[1]): xz[m,n] = (xz[m,n]-xz[m,n].mean())/xz[m,n].std() xz_train, table =process.generate_set(xz, sample_size = sample_size, stride = sample_size, img_size = img_size_test) xz_train = xz_train.astype(np.float32) test_set_x.set_value(xz_train) out = np.zeros((0,sample_size**2)) for batch_index in xrange(train_set_x.get_value().shape[0]): out = np.vstack((out,output_fn(batch_index))) img_output = process.post_process(out, table, sample_size,img_shape=img_size_test) plt.figure() plt.imshow(xz[0],cmap=plt.cm.gray) plt.figure() plt.imshow(img_output[0],cmap=plt.cm.gray) plt.show()
def test_restrict_norm_SdA(num_epochs=10, pretrain_lr=0.00001, lr_decay = 0.98, batch_size=20): """ Pretrain an SdA model for the given number of training epochs, applying norm restrictions on the W matrices. Try ReLU units, since their weights seem to blow up on this data set. :type num_epochs: int :param num_epochs: number of epoch to do pretraining :type pretrain_lr: float :param pretrain_lr: learning rate to be used during pre-training :type batch_size: int :param batch_size: train in mini-batches of this size """ layer_types=['ReLU','ReLU'] current_dir = os.getcwd() os.chdir(options.dir) today = datetime.today() day = str(today.date()) hour = str(today.time()) output_filename = "test_max_norm_sda_." + '_'.join([elem for elem in layer_types]) + day + "." + hour output_file = open(output_filename,'w') os.chdir(current_dir) print >> output_file, "Run on " + str(datetime.now()) # Get the training data sample from the input file data_set_file = openFile(str(options.inputfile), mode = 'r') datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 10) train_set_x = load_data_unlabeled(datafiles, features = (5,20)) data_set_file.close() # compute number of minibatches for training, validation and testing n_train_batches, n_features = train_set_x.get_value(borrow=True).shape n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(89677) print '... building the model' # Set the initial value of the learning rate learning_rate = theano.shared(numpy.asarray(pretrain_lr, dtype=theano.config.floatX)) # Function to decrease the learning rate decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * lr_decay}) sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features, hidden_layers_sizes=[5, 5], corruption_levels = [0.25, 0.25], layer_types=layer_types) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x, batch_size=batch_size, learning_rate=learning_rate) #print '... dumping pretraining functions to output file pre pickling' #print >> output_file, 'Pretraining functions, pre pickling' #for i in xrange(sda.n_layers): #theano.printing.debugprint(pretraining_fns[i], file = output_file, print_type=True) print '... getting the max-norm regularization functions' max_norm_regularization_fns = sda_model.max_norm_regularization() print '... pre-training the model' start_time = time.clock() ## Pre-train layer-wise corruption_levels = [float(options.corruption), float(options.corruption)] for i in xrange(sda_model.n_layers): for epoch in xrange(num_epochs): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i])) # regularize weights here scale = max_norm_regularization_fns[i](norm_limit=options.norm_limit) print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print >> output_file, numpy.mean(c) print >> output_file, 'Learning rate ' print >> output_file, learning_rate.get_value(borrow=True) print >> output_file, 'Scale ', scale decay_learning_rate() end_time = time.clock() print >> output_file, ('Pretraining time for file ' + os.path.split(__file__)[1] + ' was %.2fm to go through %i epochs' % (((end_time - start_time) / 60.), (num_epochs / 2))) # Pickle the SdA print >> output_file, 'Pickling the model...' f = file(options.savefile, 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() # Unpickle the SdA print >> output_file, 'Unpickling the model...' f = file(options.savefile, 'rb') pickled_sda = cPickle.load(f) f.close() # Test that the W-matrices and biases for the dA layers in sda are all close to the W-matrices # and biases freshly unpickled for i in xrange(pickled_sda.n_layers): pickled_dA_params = pickled_sda.dA_layers[i].get_params() fresh_dA_params = sda_model.dA_layers[i].get_params() if not numpy.allclose(pickled_dA_params[0].get_value(), fresh_dA_params[0].get_value()): print >> output_file, ("numpy says that Ws in layer %i are not close" % (i)) print >> output_file, "Norm for pickled dA " + pickled_dA_params[0].name + ": " print >> output_file, norm(pickled_dA_params[0].get_value()) print >> output_file, "Values for pickled dA " + pickled_dA_params[0].name + ": " print >> output_file, numpy.array_repr(pickled_dA_params[0].get_value()) print >> output_file, "Norm for fresh dA " + fresh_dA_params[0].name + ": " print >> output_file, norm(fresh_dA_params[0].get_value()) print >> output_file, "Values for fresh dA " + fresh_dA_params[0].name + ": " print >> output_file, numpy.array_repr(fresh_dA_params[0].get_value()) if not numpy.allclose(pickled_dA_params[1].get_value(), fresh_dA_params[1].get_value()): print >> output_file, ("numpy says that the biases in layer %i are not close" % (i)) print >> output_file, "Norm for pickled dA " + pickled_dA_params[1].name + ": " print >> output_file, norm(pickled_dA_params[1].get_value()) print >> output_file, "Values for pickled dA " + pickled_dA_params[1].name + ": " print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value()) print >> output_file, "Norm for fresh dA " + fresh_dA_params[1].name + ": " print >> output_file, norm(fresh_dA_params[1].get_value()) print >> output_file, "Values for fresh dA " + pickled_dA_params[1].name + ": " print >> output_file, numpy.array_repr(pickled_dA_params[1].get_value()) output_file.close()