def load_data(self): """Makes the data handlers, and loads the data from disk""" emb_train, emb_test, instance_labels = self.filenames self.train_data = DataHandler(emb_train, max_size=200000) self.train_eval = GroupEvaluator(data=self.train_data) self.test_eval = GroupEvaluator( data=DataHandler(emb_test, max_size=200000)) self.instance_eval = InstanceEvaluator() self.instance_eval.load_labeled_instances(instance_labels)
#skip = int(100/fps) flen = [] for line in open(data_dir + '01_test_framenum.txt'): # test flen.append(line.strip()) maxlen = int(flen[tbidx]) # to get the alphas for the whole tbidx-th video print 'Video length:', maxlen print '-----' #print 'Skip set at', skip print 'Booting up the data handler' data_pb = TestTestProto(batch_size, maxlen, maxlen, dataset, data_dir, fps) # or TestTrainProto or TestValidProto dh = DataHandler(data_pb) dataset_size = dh.GetDatasetSize() num_batches = dataset_size / batch_size print 'Data handler ready' print '-----' params = src.actrec.init_params(options) params = src.actrec.load_params(model, params) tparams = src.actrec.init_tparams(params) trng, use_noise, inps, alphas, cost, opt_outs, preds = src.actrec.build_model( tparams, options) f_alpha = theano.function(inps, alphas, name='f_alpha', on_unused_input='ignore')
def train( dim_out=500, # hidden layer dim for outputs ctx_dim=1024, # context vector dimensionality dim=1024, # the number of LSTM units n_actions=3101, # number of actions to predict n_layers_att=1, n_layers_out=1, n_layers_init=1, ctx2out=False, patience=50, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., temperature_inverse=1.0, lrate=0.001, selector=False, maxlen=5, # maximum length of the video optimizer='sgd', batch_size=16, valid_batch_size=16, saveto='model.npz', validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates dataset='flickr8k', # dummy dataset, replace with video ones dictionary=None, # word dictionary use_dropout=False, reload_=False, training_stride=1, testing_stride=8, last_n=16, fps=30): # Model options model_options = locals().copy() #model_options = validate_options(model_options) # reload options if reload_ and os.path.exists(saveto): print "Reloading options" with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) print '-----' print 'Booting up all data handlers' data_pb = TrainProto(batch_size, maxlen, training_stride, dataset, fps) dh = DataHandler(data_pb) dataset_size = dh.GetDatasetSize() num_train_batches = dataset_size / batch_size if dataset_size % batch_size != 0: num_train_batches += 1 valid = True # not None test = True # not None data_test_train_pb = TestTrainProto(valid_batch_size, maxlen, testing_stride, dataset, fps) dh_test_train = DataHandler(data_test_train_pb) test_train_dataset_size = dh_test_train.GetDatasetSize() num_test_train_batches = test_train_dataset_size / valid_batch_size if test_train_dataset_size % valid_batch_size != 0: num_test_train_batches += 1 data_test_valid_pb = TestValidProto(valid_batch_size, maxlen, testing_stride, dataset, fps) dh_test_valid = DataHandler(data_test_valid_pb) test_valid_dataset_size = dh_test_valid.GetDatasetSize() num_test_valid_batches = test_valid_dataset_size / valid_batch_size if test_valid_dataset_size % valid_batch_size != 0: num_test_valid_batches += 1 data_test_test_pb = TestTestProto(valid_batch_size, maxlen, testing_stride, dataset, fps) dh_test_test = DataHandler(data_test_test_pb) test_test_dataset_size = dh_test_test.GetDatasetSize() num_test_test_batches = test_test_dataset_size / valid_batch_size if test_test_dataset_size % valid_batch_size != 0: num_test_test_batches += 1 print 'Data handlers ready' print '-----' print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): print "Reloading model" params = load_params(saveto, params) tparams = init_tparams(params) trng, use_noise, \ inps,\ cost, \ opts_out, preds, i_gate = \ build_model(tparams, model_options) ''' get_i_gate = theano.function(inps[0:2], i_gate, profile=False, on_unused_input='ignore') print 'build get_i_gate felished' x, vid, n_ex = dh_test_train.GetBatch(data_test_train_pb) mask = numpy.ones((maxlen, batch_size)).astype('float32') if n_ex != batch_size: mask[:,n_ex:] = numpy.zeros((maxlen, batch_size-n_ex)).astype('float32') i_gate_np = get_i_gate(x,mask) print len(i_gate_np) print len(i_gate_np[0]) print len(i_gate_np[0][0]) print i_gate_np[0][0][0].shape weig = numpy.zeros((7,7,30,batch_size)) for i in xrange(7): for j in xrange(7): for k in xrange(30): weig[i,j,k,:] = numpy.mean(i_gate_np[k][j][i],axis=1) dic = {'weig':weig, 'vid':vid} sio.savemat('weig.mat', {'dic':dic}) train_err = 0 valid_err = 0 test_err = 0 ''' # before any regularizer f_log_probs = theano.function(inps, -cost, profile=False) f_preds = theano.function(inps, preds, profile=False, on_unused_input='ignore') cost = cost.mean() if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay cost += 0.0001 * i_gate.sum() #if alpha_c > 0.: # alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c') # alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean() # cost += alpha_reg # gradient computation grads = tensor.grad(cost, wrt=itemlist(tparams)) lr = tensor.scalar(name='lr') f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' history_errs = [] # reload history if reload_ and os.path.exists(saveto): history_errs = numpy.load(saveto)['history_errs'].tolist() best_p = None bad_count = 0 uidx = 0 try: for epochidx in xrange(max_epochs): # If the input sequences are of variable length get mask from the data loader instead of setting them all to one mask = numpy.ones((maxlen, batch_size)).astype('float32') print 'Epoch ', epochidx n_examples_seen = 0 estop = False if epochidx > 0: dh.Reset() for tbidx in xrange(num_train_batches): n_examples_seen += batch_size uidx += 1 use_noise.set_value(1.) pd_start = time.time() x, y, n_ex = dh.GetBatch(data_pb) if n_ex != batch_size: mask[:, n_ex:] = numpy.zeros( (maxlen, batch_size - n_ex)).astype('float32') pd_duration = time.time() - pd_start if x == None: print 'Minibatch with zero sample under length ', maxlen continue ud_start = time.time() cost = f_grad_shared(x, mask, y) if uidx == 1: print 'Original Cost ', cost / x.shape[3] f_update(lrate) ud_duration = time.time() - ud_start if n_ex != batch_size: mask[:, n_ex:] = numpy.ones( (maxlen, batch_size - n_ex)).astype('float32') if numpy.isnan(cost): print 'NaN detected in cost' return 1., 1., 1. if numpy.isinf(cost): print 'INF detected in cost' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', epochidx, 'Update ', uidx, 'Cost ', cost / x.shape[ 3], 'PD ', pd_duration, 'UD ', ud_duration if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p != None: params = copy.copy(best_p) else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_err = 0 valid_err = 0 test_err = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) if valid is not None: valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) #if test is not None: # test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) history_errs.append([valid_err, test_err]) if epochidx == 0 or valid_err >= numpy.array( history_errs)[:, 0].max(): best_p = unzip( tparams) # p for min valid err / max valid acc print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err if n_ex == batch_size: print 'Seen %d training examples' % (n_examples_seen) else: print 'Seen %d training examples' % (n_examples_seen - batch_size + n_ex) use_noise.set_value(0.) train_err = 0 valid_err = 0 test_err = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) if valid is not None: valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) if test is not None: test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) history_errs.append([valid_err, test_err]) if epochidx == 0 or valid_err >= numpy.array( history_errs)[:, 0].max(): best_p = unzip(tparams) # p for min valid err / max valid acc print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err finally: #except KeyboardInterrupt: if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) train_err = 0 valid_err = 0 test_err = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' #train_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) if valid is not None: valid_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) if test is not None: test_err = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) print 'Accuracy: Train', train_err, 'Valid', valid_err, 'Test', test_err params = copy.copy(best_p) numpy.savez(saveto, zipped_params=best_p, train_err=train_err, valid_err=valid_err, test_err=test_err, history_errs=history_errs, **params) print model_options return train_err, valid_err, test_err
class GICF(object): """Defines the model described in the paper. Creates filenames for each dataset and loads the data. Then optimizes the cost function through the train method""" def __init__(self, dataset='movies'): """Initialize variables, Get filenames according to dataset load data and set parameters""" self.dataset = dataset self.filenames = self.get_filenames(dataset) self.test_groups = [] self.test_instances = [] self.train_data = None self.train_eval = None self.test_eval = None self.instance_eval = None self.group_acc = [] self.instance_auc = [] self.instance_acc = [] self.train_acc = [] self.total_iterations = 0 self.load_data() # takes a few seconds self.set_parameters() # set default parameters. Can be changed later self.embeddings_dimension = self.train_data.get_embeddings_dimension() self._print_titles = '#iter\tACC Train\tAUC Train\tACC Test\tAUC Test\t\t|\tACC Sent\tAUC Sent\t\tPRC Sent' def get_filenames(self, dataset='movies'): """Sets the filenames based on each dataset""" if dataset == 'movies': dir_name = 'data/movies/' elif dataset == 'yelp': dir_name = 'data/yelp/' elif dataset == 'amazon': dir_name = 'data/amazon/cells/' else: print('Wrong dataset Name.') return embeddings_file_train = dir_name + 'train.emb' # emb\t emb\t score\n embeddings_file_test = dir_name + 'test.emb' instance_labels = dir_name + 'test_sentences.emb' return embeddings_file_train, embeddings_file_test, instance_labels def load_data(self): """Makes the data handlers, and loads the data from disk""" emb_train, emb_test, instance_labels = self.filenames self.train_data = DataHandler(emb_train, max_size=200000) self.train_eval = GroupEvaluator(data=self.train_data) self.test_eval = GroupEvaluator( data=DataHandler(emb_test, max_size=200000)) self.instance_eval = InstanceEvaluator() self.instance_eval.load_labeled_instances(instance_labels) @property def _param_str(self): """A string with the parameters of the experiment""" return str(self.epochs) + 'x' + str(self.batch_size) + '_' + str( self.lr) + '_' + str( self.alpha_balance) + self.similarity_fn + str( self.sim_variance) def set_parameters(self, batch_size=500, alpha_balance=0.04, lr=0.1, momentum_value=0.7, similarity_fn='rbf', sim_variance=0.7071, epochs=3): """Set the parameters for the run/experiment""" self.alpha_balance = alpha_balance self.momentum_value = momentum_value self.similarity_fn = similarity_fn self.sim_variance = sim_variance self.epochs = epochs self.batch_size = batch_size self.lr = lr * self.batch_size # learning rate is a funciton of batch size self.run_name = self._param_str self.dir_name = self.similarity_fn + '_' + str( self.batch_size) + '_' + str(self.epochs) self.output_name = './training_output/' + self.dataset + '/' + self.dir_name + '_' self.train_data.set_batch_size(batch_size) def train(self): """Where the magic happens. Optimizes the cost function of the paper, based on the parameters given before. There is a terminating function which determines if optimization should end before the epochs end, based on essentially heuristics. Every 50 iterations prints progress. Keeps the best theta values based on the group reconstruction score. At the end prints detailed stats about classifying with that.""" print('Optimizing for ', self._param_str) self.total_iterations = 0 accs = [] #theta = np.random.random(self.embeddings_dimension) theta = np.zeros(self.embeddings_dimension) #theta=np.loadtxt('training_output/movies/rbf_100_300_300x100_10.0_0.04rbf0.7071_last_theta', delimiter=',') print(theta) best_theta = theta best_acc = 0 terminate = False for epoch in range(self.epochs): self.train_data.rewind_dataset(True) # reset and shuffle data if terminate: break print('-------epoch ', epoch, '-----------') print(self._print_titles) X, gs, gl = self.train_data.get_next_batch() while X is not None: # for each mini-batch # do gd step W_ij = similarity.get_sim_matrix(X, self.similarity_fn, self.sim_variance) # calculate y_hat and derivative Y_ij = af.calculate_y(X, theta) Y_der_ij = af.calculate_y_der(Y_ij, X) # calculate cost similarity_cost = af.similarity_derivative( Y_ij, Y_der_ij, W_ij) / (X.shape[0]**2) group_cost = self.alpha_balance * af.group_derivative( Y_ij, Y_der_ij, gs, gl) / float(len(gs)) #if self.total_iterations %8==0: theta_der = similarity_cost + group_cost #else: #theta_der = similarity_cost #print(theta_der) # new theta # theta = self.momentum_value * theta - self.lr / (epoch + 1) * theta_der #theta = theta - (1 - self.momentum_value) * self.lr / (epoch + 1) * theta_der#(1 - self.momentum_value) * self.total_iterations += 1 # print progress #if self.total_iterations % 50 == 0: acc = self._print_progress(theta) accs.append(acc) if Jilu[-1] < acc: Jilu.append(acc) else: Jilu.append(Jilu[-1]) if acc > best_acc: # save best theta, based on training set best_acc = acc best_theta = theta io.save_theta(theta, self.output_name + self._param_str, best=True) #if self._terminate_conditions(theta, accs): #if self.total_iterations == 100: # terminate = True # break X, gs, gl = self.train_data.get_next_batch() io.save_theta(theta, self.output_name + self._param_str + '_last') print('\n\n\n\t\t\t---BEST THETA VALUE (in training group)---') self._print_progress(best_theta, print_details=True) return self.train_acc, self.group_acc, self.instance_acc, self.instance_auc def _terminate_conditions(self, theta, accs): if np.isnan(theta[0]): return True variance = np.array(accs) if len(variance) > 50: variance = variance[:-50] # last 50 values var = np.var(variance) if self.total_iterations > 1500 and var < 0.00005: return True def _print_progress(self, theta, print_details=False): # iterations train accuracy, train AUC, test accuracy, test AUC | instance accuracy, instance auc, instance PRC print('%6d\t' % self.total_iterations, ) acc_train, auc_train = self.train_eval.evaluate_groups( theta, print_details) self.train_acc.append([acc_train]) print( round(100 * acc_train, 2), ' \t\t(', round(100 * auc_train, 2), ')\t', 'acc_train', ) acc, auc = self.test_eval.evaluate_groups(theta, print_details) self.group_acc.append(acc) accback = acc print( round(100 * acc, 2), ' \t\t(', round(100 * auc, 2), ')\t', 'evagroup', ) print('\t|\t', ) acc, auc = self.instance_eval.evaluate_instances(theta) auprc = self.instance_eval.evaluate_instances(theta, prc=True) self.instance_acc.append(acc) self.instance_auc.append(auc) XX = len(Jilu) XX = range(XX) # 以0开始的递增序列作为x轴数据 if print_details: plt.plot(XX, Jilu) # 只提供x轴,y轴参数,画最简单图形 plt.show() print(Jilu) print( round(100 * acc, 2), '\t\t(', round(100 * auc, 2), ')\t', ' \t(', round(100 * auprc, 2), ')\t', 'evainstance', ) return accback #acc_train # based on this we decide best theta
def train( dim_out=100, # hidden layer dim for outputs ctx_dim=512, # context vector dimensionality dim=1000, # the number of LSTM units n_actions=3, # number of actions to predict n_layers_att=1, n_layers_out=1, n_layers_init=1, ctx2out=False, patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., temperature_inverse=1.0, lrate=0.01, selector=False, maxlen=30, # maximum length of the video optimizer='adam', batch_size=16, valid_batch_size=16, saveto='model.npz', validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates dataset='flickr8k', # dummy dataset, replace with video ones dictionary=None, # word dictionary use_dropout=False, reload_=False, training_stride=1, testing_stride=8, last_n=16, fps=100, data_dir='/home/pmorerio/datasets/IIT_IFM/'): # Model options model_options = locals().copy() #model_options = validate_options(model_options) # reload options if reload_ and os.path.exists(saveto): print "Reloading options" with open('%s.pkl' % saveto, 'rb') as f: model_options = pkl.load(f) print '-----' print 'Booting up all data handlers' print 'Training set for actual training (randomized)' data_pb = TrainProto(batch_size, maxlen, training_stride, dataset, data_dir, fps) dh = DataHandler(data_pb) dataset_size = dh.GetDatasetSize() num_train_batches = dataset_size / batch_size if dataset_size % batch_size != 0: num_train_batches += 1 print num_train_batches, ' batches' valid = None # not None test = True # not None print 'Training set for training accuracy' # the training set is loaded twice: for actual training and for computing training error data_test_train_pb = TestTrainProto(valid_batch_size, maxlen, testing_stride, dataset, data_dir, fps) dh_test_train = DataHandler(data_test_train_pb) test_train_dataset_size = dh_test_train.GetDatasetSize() num_test_train_batches = test_train_dataset_size / valid_batch_size if test_train_dataset_size % valid_batch_size != 0: num_test_train_batches += 1 print num_test_train_batches, ' batches' if valid == True: print 'Validation set for validation accuracy' data_test_valid_pb = TestValidProto(valid_batch_size, maxlen, testing_stride, dataset, data_dir, fps) dh_test_valid = DataHandler(data_test_valid_pb) test_valid_dataset_size = dh_test_valid.GetDatasetSize() num_test_valid_batches = test_valid_dataset_size / valid_batch_size if test_valid_dataset_size % valid_batch_size != 0: num_test_valid_batches += 1 print num_test_valid_batches, ' batches' print 'Test set for test accuracy' data_test_test_pb = TestTestProto(valid_batch_size, maxlen, testing_stride, dataset, data_dir, fps) dh_test_test = DataHandler(data_test_test_pb) test_test_dataset_size = dh_test_test.GetDatasetSize() num_test_test_batches = test_test_dataset_size / valid_batch_size if test_test_dataset_size % valid_batch_size != 0: num_test_test_batches += 1 print num_test_test_batches, ' batches' print 'Data handlers ready' print '-----' print 'Building model' params = init_params(model_options) # actual parameter initialization # reload parameters if reload_ and os.path.exists(saveto): print "Reloading model" params = load_params(saveto, params) # simply initializes Theano shared variable according to param # numpy arrays -> theano shared variables tparams = init_tparams(params) # In order, we get: # 1) trng - theano random number generator # 2) use_noise - flag that turns on dropout # 3) inps - inputs for f_grad_shared # 4) alphas - the attention weigths # 4) cost - log likelihood for each sentence # 5) opts_out - optional outputs (e.g selector) # 6) preds - the computed labels trng, use_noise, \ inps, alphas, \ cost, \ opts_out, preds = \ build_model(tparams, model_options) # builds the whole computation graph # before any regularizer f_log_probs = theano.function(inps, -cost, profile=False) f_preds = theano.function(inps, preds, profile=False, on_unused_input='ignore') cost = cost.mean() # add L2 regularization costs if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay # add attention penalty to the cost #if alpha_c > 0.: #alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c') #alpha_reg = alpha_c * ((1.-alphas.sum(0))**2).sum(0).mean() #cost += alpha_reg # add ATTENTION FOCUS to the cost if alpha_c > 0.: alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c') alpha_reg = -alpha_c * ( alphas * tensor.log(alphas + 1e-8)).sum(0).sum(0).mean() cost += alpha_reg # Backpropagation # gradient computation grads = tensor.grad(cost, wrt=itemlist(tparams)) # f_grad_shared computes the cost and updates adaptive learning rate variables # f_update updates the weights of the model lr = tensor.scalar(name='lr') f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Optimization' history_acc = [] # reload history if reload_ and os.path.exists(saveto): history_acc = numpy.load(saveto)['history_acc'].tolist() best_p = None bad_count = 0 uidx = 0 train_acc = 0 valid_acc = 0 test_acc = 0 for epochidx in xrange(max_epochs): # If the input sequences are of variable length get mask from the data loader instead of setting them all to one mask = numpy.ones((maxlen, batch_size)).astype('float32') print 'Epoch ', epochidx n_examples_seen = 0 estop = False # not used #if epochidx > 0: dh.Reset() # training data is shuffled at each epoch in Reset() udtime = 0 pdtime = 0 for tbidx in xrange(num_train_batches): n_examples_seen += batch_size uidx += 1 use_noise.set_value(1.) pd_start = time.time() x, y, n_ex = dh.GetBatch( data_pb ) # looks really slow. this is maybe why also predictions are slow (must get batches for all train/test/valid) if n_ex != batch_size: mask[:, n_ex:] = numpy.zeros( (maxlen, batch_size - n_ex)).astype('float32') pdtime += time.time() - pd_start # pd stands for prepare data? #if x == None: # this gives a Warning. Replaced with -> if x is None: if x is None: print 'Minibatch with zero sample under length ', maxlen continue ud_start = time.time() cost = f_grad_shared(x, mask, y) f_update(lrate) udtime += time.time() - ud_start # ud stands for use data? if n_ex != batch_size: mask[:, n_ex:] = numpy.ones( (maxlen, batch_size - n_ex)).astype('float32') if numpy.isnan(cost): print 'NaN detected in cost' return 1., 1., 1. if numpy.isinf(cost): print 'INF detected in cost' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', epochidx, ' Update', uidx, ' Cost', cost, ' PD', pdtime / float( dispFreq), ' UD', udtime / float(dispFreq) pdtime = 0 udtime = 0 if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p != None: params = copy.copy(best_p) else: params = unzip(tparams) numpy.savez(saveto, history_acc=history_acc, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) train_acc = 0 valid_acc = 0 test_acc = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' train_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) if valid is not None: valid_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) if test is not None: test_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) history_acc.append([train_acc, valid_acc, test_acc]) if uidx == 0 or valid_acc >= numpy.array(history_acc)[:, 1].max(): best_p = unzip( tparams) # p for min valid err / max valid acc print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc #here ends the cycle over the batches if n_ex == batch_size: print 'Seen %d training examples' % (n_examples_seen) else: print 'Seen %d training examples' % (n_examples_seen - batch_size + n_ex) use_noise.set_value(0.) train_acc = 0 valid_acc = 0 test_acc = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' train_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) if valid is not None: valid_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) if test is not None: test_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) history_acc.append([train_acc, valid_acc, test_acc]) if epochidx == 0 or valid_acc >= numpy.array(history_acc)[:, 1].max(): best_p = unzip(tparams) # p for min valid err / max valid acc print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc # here ends the cycle over the epochs # use the best parameters for final checkpoint (if they exist) if best_p is not None: zipp(best_p, tparams) # if best param were found with validation, calculate accuracy with them if valid is not None: use_noise.set_value(0.) train_acc = 0 valid_acc = 0 test_acc = 0 print 'Computing predictions (This will take a while. Set the verbose flag if you want to see the progress)' train_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_train_pb, dh_test_train, test_train_dataset_size, num_test_train_batches, last_n, test=False) valid_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_valid_pb, dh_test_valid, test_valid_dataset_size, num_test_valid_batches, last_n, test=True) if test is not None: test_acc = pred_acc(saveto, valid_batch_size, f_preds, maxlen, data_test_test_pb, dh_test_test, test_test_dataset_size, num_test_test_batches, last_n, test=True) print 'Accuracy: Train', train_acc, 'Valid', valid_acc, 'Test', test_acc params = copy.copy(best_p) numpy.savez(saveto, zipped_params=best_p, train_acc=train_acc, valid_acc=valid_acc, test_acc=test_acc, history_acc=history_acc, **params) print model_options return train_acc, valid_acc, test_acc