def pretrain(shared_args, private_args): """ Pretrain an SdA model for the given number of training epochs. The model is either initialized from scratch, or is reconstructed from a previously pickled model. :type shared_args: dict :param shared_args: dict containing all the arguments common to both models. :type private_args: dict :param private_args: dict containing all the arguments specific to each model spawned off this first process. """ # Import sandbox.cuda to bind the specified GPU to this subprocess # then import the remaining theano and model modules. import theano.sandbox.cuda theano.sandbox.cuda.use(private_args['gpu']) import theano import theano.tensor as T from theano.tensor.shared_randomstreams import RandomStreams from SdA import SdA shared_args_dict = shared_args[0] current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) today = datetime.today() day = str(today.date()) hour = str(today.time()) arch_list = get_arch_list(private_args) corruption_list = [shared_args_dict['corruption'] for i in arch_list] layer_types = parse_layer_type(shared_args_dict['layertype'], len(arch_list)) output_filename = "hybrid_pretraining_sda_" + "_".join(elem for elem in layer_types) + private_args['arch'] + "." + day + "." + hour output_file = open(output_filename,'w') os.chdir(current_dir) print >> output_file, "Run on " + str(datetime.now()) # Get the training data sample from the input file data_set_file = openFile(str(shared_args_dict['input']), mode = 'r') datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 30, offset = shared_args_dict['offset']) if datafiles is None: print("No data was returned, exiting.") data_set_file.close() output_file.close() return train_set_x = load_data_unlabeled(datafiles) # DEBUG: get validation set too validation_datafiles = extract_unlabeled_chunkrange(data_set_file, num_files = 5, offset = shared_args_dict['offset'] + 30) valid_set_x = load_data_unlabeled(validation_datafiles) data_set_file.close() # compute number of minibatches for training, validation and testing n_train_batches, n_features = train_set_x.get_value(borrow=True).shape n_train_batches /= shared_args_dict['batch_size'] # numpy random generator numpy_rng = numpy.random.RandomState(89677) # Set the initial value of the learning rate learning_rate = theano.shared(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=theano.config.floatX)) # Check if we can restore from a previously trained model, # otherwise construct a new SdA if private_args.has_key('restore'): print >> output_file, 'Unpickling the model from %s ...' % (private_args['restore']) current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['restore'], 'rb') sda_model = cPickle.load(f) f.close() os.chdir(current_dir) else: print '... building the model' sda_model = SdA(numpy_rng=numpy_rng, n_ins=n_features, hidden_layers_sizes=arch_list, corruption_levels = corruption_list, layer_types=layer_types, loss=shared_args_dict['loss'], n_outs=-1, sparse_init=shared_args_dict['sparse_init'], opt_method=shared_args_dict['opt_method']) ######################### # PRETRAINING THE MODEL # ######################### print '... getting the pretraining functions' pretraining_fns = sda_model.pretraining_functions(train_set_x=train_set_x, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') print '... getting the hybrid training functions' hybrid_pretraining_fns = sda_model.build_finetune_limited_reconstruction(train_set_x=train_set_x, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') # DEBUG: get full finetuning theano function # get the training, validation function for the model datasets = (train_set_x,valid_set_x) print '... getting the finetuning functions' finetune_train_fn, validate_model = sda_model.build_finetune_full_reconstruction( datasets=datasets, batch_size=shared_args_dict['batch_size'], learning_rate=learning_rate, method='cm') # DEBUG: should only have n_layers - 2 hybrid pretraining functions assert len(hybrid_pretraining_fns) == sda_model.n_layers - 2 print '... writing meta-data to output file' metadict = {'n_train_batches': n_train_batches} metadict = dict(metadict.items() + shared_args_dict.items()) write_metadata(output_file, metadict) print '... pre-training the model' start_time = time.clock() # Get corruption levels from the SdA. corruption_levels = sda_model.corruption_levels # Function to decrease the learning rate decay_learning_rate = theano.function(inputs=[], outputs=learning_rate, updates={learning_rate: learning_rate * shared_args_dict['lr_decay']}) # Function to reset the learning rate lr_val = T.scalar('original_lr') reset_learning_rate = theano.function(inputs=[lr_val], outputs=learning_rate, updates={learning_rate: lr_val}) # Set up functions for max norm regularization apply_max_norm_regularization = sda_model.max_norm_regularization() for i in xrange(sda_model.n_layers): for epoch in xrange(shared_args_dict['pretraining_epochs']): # go through the training set c = [] for batch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=batch_index, corruption=corruption_levels[i],momentum=shared_args_dict['momentum'])) print >> output_file, 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print >> output_file, numpy.mean(c) print >> output_file, learning_rate.get_value(borrow=True) decay_learning_rate() apply_max_norm_regularization(norm_limit=shared_args_dict['maxnorm']) # Do hybrid pretraining only on the middle layer(s) if i > 0 and i < sda_model.n_layers - 1: for h_epoch in xrange(20): hybrid_c = [] for batch_index in xrange(n_train_batches): hybrid_c.append(hybrid_pretraining_fns[i-1](index=batch_index,momentum=shared_args_dict['momentum'])) print >> output_file, "Hybrid pre-training on layers %i and below, epoch %d, cost" % (i, h_epoch), print >> output_file, numpy.mean(hybrid_c) # Reset the learning rate reset_learning_rate(numpy.asarray(shared_args_dict['pretrain_lr'], dtype=numpy.float32)) if private_args.has_key('save'): print >> output_file, 'Pickling the model...' current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['save'], 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() os.chdir(current_dir) print '... finetuning with final layer' best_validation_loss = numpy.inf for f_epoch in xrange(20): for minibatch_index in xrange(n_train_batches): minibatch_avg_cost = finetune_train_fn(minibatch_index, shared_args_dict['momentum']) # DEBUG: monitor the training error print >> output_file, ('Fine-tuning epoch %i, minibatch %i/%i, training error %f ' % (f_epoch, minibatch_index + 1, n_train_batches, minibatch_avg_cost)) # apply max-norm regularization apply_max_norm_regularization(shared_args_dict['maxnorm']) # validate every epoch validation_losses = validate_model() this_validation_loss = numpy.mean(validation_losses) # save best model that achieved this best loss if this_validation_loss < best_validation_loss: print >> output_file, 'Pickling the model...' current_dir = os.getcwd() os.chdir(shared_args_dict['dir']) f = file(private_args['save'], 'wb') cPickle.dump(sda_model, f, protocol=cPickle.HIGHEST_PROTOCOL) f.close() os.chdir(current_dir) print >> output_file, ('epoch %i, minibatch %i/%i, validation error %f ' % (f_epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) end_time = time.clock() print >> output_file, ('The hybrid training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) output_file.close()