def pretraining(self, train_x = None, train_y = None): #print len(numpy.shape(train_y)) multi_classes = True if self.cfg.n_outs >= 3 else False train_y = train_y.astype(dtype = theano.config.floatX) train_y_T = train_y[numpy.newaxis].T if multi_classes == False: train_xy = numpy.hstack((train_x, train_y_T)) shared_train_xy = shared_dataset_X(train_xy) else: enc = OneHotEncoder(n_values = self.cfg.n_outs, dtype = theano.config.floatX, sparse=False) encode_train_y = enc.fit_transform(train_y_T) shared_train_xy = shared_dataset_X(numpy.hstack((train_x, encode_train_y))) log('> ... getting the pre-training functions') if train_x is None: # this means we are using the stream input from file pass else: # this means using numpy matrix as input start_layer_index = 0; start_epoch_index = 0 log('> ... pre-training the model') # layer by layer; for each layer, go through the epochs for i in range(start_layer_index, self.cfg.ptr_layer_number): pretraining_fn = self.pretraining_function(self.dA_layers[i], train_set_x = shared_train_xy, batch_size = self.cfg.batch_size) for epoch in range(start_epoch_index, self.cfg.epochs): # go through the training set c = [] # while (not self.cfg.train_sets.is_finish()): # self.cfg.train_sets.load_next_partition(self.cfg.train_xy) iteration_per_epoch = train_x.shape[0] / self.cfg.batch_size if train_x.shape[0] / self.cfg.batch_size else 1 for batch_index in xrange(iteration_per_epoch): # loop over mini-batches c.append(pretraining_fn(index=batch_index, corruption=self.cfg.corruption_levels[i], lr=self.cfg.learning_rates[i] , momentum=self.cfg.momentum )) # self.cfg.train_sets.initialize_read() log('> layer %i, epoch %d, reconstruction cost %f' % (i, epoch, numpy.mean(c))) hidden_values = self.dA_layers[i].transform(shared_train_xy.get_value()) if self.cfg.settings.has_key('firstlayer_xy') and self.cfg.settings['firstlayer_xy'] == 1: shared_train_xy = shared_dataset_X(hidden_values) else: # add y for every layer if multi_classes == False: train_xy = numpy.hstack((hidden_values, train_y_T)) shared_train_xy = shared_dataset_X(train_xy) else: shared_train_xy = shared_dataset_X(numpy.hstack((hidden_values, encode_train_y)))
def pretraining(self, train_x = None): log('> ... getting the pre-training functions') if train_x is not None: batch_size = self.cfg.batch_size if train_x.shape[0] > self.cfg.batch_size else train_x.shape[0] pretraining_fns = self.pretraining_functions(train_set_x=shared_dataset_X(train_x), batch_size=batch_size) # resume training start_layer_index = 0; start_epoch_index = log('> ... pre-training the model') # layer by layer; for each layer, go through the epochs for i in range(start_layer_index, self.cfg.ptr_layer_number): for epoch in range(start_epoch_index, self.cfg.epochs): # go through the training set c = [] for batch_index in xrange(train_x.shape[0] / batch_size): # loop over mini-batches c.append(pretraining_fns[i](index=batch_index, corruption=self.cfg.corruption_levels[i], lr=self.cfg.learning_rates[i] , momentum=self.cfg.momentum )) log('> layer %i, epoch %d, reconstruction cost %f' % (i, epoch, numpy.mean(c)))
def pretraining_with_estop(self, X_train_minmax, settings): batch_size = settings['batch_size'] corruption_levels = settings['corruption_levels'] pretrain_lr = settings['pretrain_lr'] momentum = settings['momentum'] pretraining_epochs = settings['pretraining_epochs'] n_visible = X_train_minmax.shape[1] # shuffle examples: from sklearn.utils import shuffle X_train_minmax = shuffle(X_train_minmax, random_state=0) train_set_x = shared_dataset_X(X_train_minmax[ :X_train_minmax.shape[0] / 2, :], borrow=True) valid_set_x = shared_dataset_X(X_train_minmax[ X_train_minmax.shape[0] / 2:, :], borrow=True) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] if n_train_batches <= batch_size: batch_size = n_train_batches n_train_batches /= batch_size # numpy random generator numpy_rng = numpy.random.RandomState(66) validation_funcs = self.get_cost_functions(valid_set_x) pretraining_fns = self.pretraining_functions(train_set_x, batch_size) # early-stopping parameters # patience = 1000 * n_train_batches patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant validation_frequency = min(n_train_batches * pretraining_epochs / 200, 100) # go through this many # minibatche before checking the network # on the validation set; in this case we # check every epoch print '... pre-training the model' start_time = time.clock() # Pre-train layer-wise for i in xrange(self.n_layers): # go through pretraining epochs best_params = None best_validation_loss = numpy.inf test_score = 0. done_looping = False epoch = 0 best_iter = 0 patience = 5000 # look as this many iterations while epoch < pretraining_epochs and (not done_looping): # go through the training set c = [] for minibatch_index in xrange(n_train_batches): c.append(pretraining_fns[i](index=minibatch_index, corruption=corruption_levels[i], lr=pretrain_lr, momentum = momentum)) iter = epoch * n_train_batches + minibatch_index +1 if (iter + 1) % validation_frequency == 0: this_validation_fn = validation_funcs[i] this_validation_loss = this_validation_fn() print('epoch %i, minibatch %i/%i, validation cost %f ' % (epoch, minibatch_index + 1, n_train_batches, this_validation_loss)) # if we got the best validation score until now if this_validation_loss < best_validation_loss: # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iter * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss best_iter = iter if patience <= iter: done_looping = True break print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), print numpy.mean(c) epoch += 1 end_time = time.clock() print >> sys.stderr, ('The pretraining code ran for %.2fm' % ((end_time - start_time) / 60.))