def train(model, data, functions, params): """Generic routine to perform training on the GPU using Theano-compiled functions and common parameters. This will run through a specified number of 'epochs', each consisting of a full pass through the training data. The epochs are broken into batches as normal for Stochastic Gradient Descent. functions: A dictionary containing all of the necessary functions for training. It will at least have 'momentum', 'update', and 'train_E' functions. 'momentum' updates the delta for each parameter, 'update' applies the current delta, and 'train_E' gets the current training cost. For supervised training, 'val_E' will usually be included so you can keep track of your progress on the validation set. params: Necessary training params: LR, training_batches, n_epochs, verbose, validation_batches, error (links to where best error is tracked). """ LR = params['LR'] Nb = 0 for chunk_i in range(len(data.b_samples)): Nb += params['t_batches'][chunk_i] print "Training {} epochs at LR = {} rho = {}".format( params['n_epochs'], LR, params['rho']) print "Using schedule:", sorted(params['LRsched'].items()) # reference augmentation for checking error (centered, no flip) T_aug = model.ref_aug # Main training loop start_time = time.clock() for epoch in range(params['n_epochs']): ct = 0 for chunk_i in range(len(data.b_samples)): data.T[0].set_value(data.raw[chunk_i]) data.T[1].set_value( np.asarray(data.labels[chunk_i], dtype=data.ltype)) for batch_i in range(params['t_batches'][chunk_i]): functions['momentum'](batch_i, LR, model.gen_aug()) functions['update']() if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0: print '.', ct += params['t_batches'][chunk_i] # check the weight distribution model.param_status(epoch, output=open("wlog", 'a')) # compute error on test and validation set c_train_error = [ functions['train_E'](i, T_aug) for i in xrange(params['t_batches'][-1]) ] if epoch in params['LRsched']: LR = params['LRsched'][epoch] err_train = np.mean(c_train_error) if 'val_E' in functions: c_val_error = [ functions['val_E'](i, T_aug) for i in xrange(params['v_batches']) ] err_val = np.mean(c_val_error) else: err_val = err_train # if we achieved a new best validation score # save the model and best validation score if err_val < getattr(params['error'], "best_error"): if params['verb']: print 'S', setattr(params['error'], "best_error", err_val) model.save_model() else: print ' ', curr_time = NNl.nice_time(time.clock() - start_time) if 'val_E' in functions: if params['verb']: print( "{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}". format(curr_time, epoch, LR, err_train, err_val)) else: print '.', params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format( curr_time, epoch, LR, err_train, err_val)) else: if params['verb']: print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format( curr_time, epoch, LR, err_train)) params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format( curr_time, epoch, LR, err_train))
def train(model, data, functions, params): """Generic routine to perform training on the GPU using Theano-compiled functions and common parameters. This will run through a specified number of 'epochs', each consisting of a full pass through the training data. The epochs are broken into batches as normal for Stochastic Gradient Descent. functions: A dictionary containing all of the necessary functions for training. It will at least have 'momentum', 'update', and 'train_E' functions. 'momentum' updates the delta for each parameter, 'update' applies the current delta, and 'train_E' gets the current training cost. For supervised training, 'val_E' will usually be included so you can keep track of your progress on the validation set. params: Necessary training params: LR, training_batches, n_epochs, verbose, validation_batches, error (links to where best error is tracked). """ LR = params['LR'] Nb = 0 for chunk_i in range(len(data.b_samples)): Nb += params['t_batches'][chunk_i] print "Training {} epochs at LR = {} rho = {}".format( params['n_epochs'], LR, params['rho']) print "Using schedule:", sorted(params['LRsched'].items()) # reference augmentation for checking error (centered, no flip) T_aug = model.ref_aug # Main training loop start_time = time.clock() for epoch in range(params['n_epochs']): ct = 0 for chunk_i in range(len(data.b_samples)): data.T[0].set_value(data.raw[chunk_i]) data.T[1].set_value(np.asarray(data.labels[chunk_i], dtype=data.ltype)) for batch_i in range(params['t_batches'][chunk_i]): functions['momentum'](batch_i, LR, model.gen_aug()) functions['update']() if params['verb'] and (ct + batch_i + 1) % int(Nb / 5) == 0: print '.', ct += params['t_batches'][chunk_i] # check the weight distribution model.param_status(epoch, output=open("wlog", 'a')) # compute error on test and validation set c_train_error = [functions['train_E'](i, T_aug) for i in xrange( params['t_batches'][-1])] if epoch in params['LRsched']: LR = params['LRsched'][epoch] err_train = np.mean(c_train_error) if 'val_E' in functions: c_val_error = [functions['val_E'](i, T_aug) for i in xrange(params['v_batches'])] err_val = np.mean(c_val_error) else: err_val = err_train # if we achieved a new best validation score # save the model and best validation score if err_val < getattr(params['error'], "best_error"): if params['verb']: print 'S', setattr(params['error'], "best_error", err_val) model.save_model() else: print ' ', curr_time = NNl.nice_time(time.clock() - start_time) if 'val_E' in functions: if params['verb']: print("{} | epoch {: >4}, LR={:.4f}, train: {:.5f}, val: {:.5f}" .format(curr_time, epoch, LR, err_train, err_val)) else: print '.', params['logfile'].write("{} {: >4} {:.6f} {:.8f} {:.8f}\n".format( curr_time, epoch, LR, err_train, err_val)) else: if params['verb']: print("{} | epoch {: >4}, LR={:.5f}, train: {:.6f}".format( curr_time, epoch, LR, err_train)) params['logfile'].write("{} {: >4} {:.6f} {:.8f}\n".format( curr_time, epoch, LR, err_train))