def train_mlp(feature_dimension, context, hidden_size, weight_path, file_name1, file_name2, file_name3, L1_reg = 0.0, L2_reg = 0.0000, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): #voc_list = Vocabulary(path_name + 'train_modified1') #voc_list.vocab_create() #vocab = voc_list.vocab #vocab_size = voc_list.vocab_size #short_list = voc_list.short_list #short_list_size = voc_list.short_list_size #path = '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp' voc_list = Vocabularyhash('/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp') voc_list.hash_create() vocab = voc_list.voc_hash vocab_size = voc_list.vocab_size #dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, short_list ) #dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, short_list ) #dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size , short_list) dataprovider_train = DataProvider(path_name + 'train_modified1_20m', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid_modified1', vocab, vocab_size) dataprovider_test = DataProvider(path_name + 'test_modified1', vocab, vocab_size) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = MLP(rng = rng, input1 = x1, input2 = x2, n_in = vocab_size, fea_dim = int(feature_dimension), context_size = int(context), n_hidden =int(hidden_size), n_out = vocab_size) cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) frame_error = classifier.errors(y) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, y: test_set_y}) #validation_model validate_model = theano.function(inputs = [], outputs = [frame_error, log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, y: valid_set_y}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost], updates = updates, \ givens = {x1: train_set_x1, x2: train_set_x2, y: train_set_y}) print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = train_model(numpy.array(learnrate_schedular.get_rate(), dtype = 'float32')) progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1), dtype = 'float32')) train_set_x2.set_value(numpy.empty((1), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) classifier_name = 'MLP' + str(learnrate_schedular.epoch) save_mlp(classifier, weight_path+file_name1 , classifier_name) save_learningrate(learnrate_schedular.get_rate(), weight_path+file_name3, classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = validate_model() error_rate = out[0] likelihoods = out[1] valid_losses.append(error_rate) log_likelihood.append(likelihoods) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_x2.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) this_validation_loss = numpy.mean(valid_losses) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print this_validation_loss, entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood) likelihood_sum = (-numpy.sum(log_likelihood)/test_frames_showed) print 'entropy:', likelihood_sum
def train_mlprnn(weight_path=sys.argv[1], file_name1=sys.argv[2], L1_reg=0.0, L2_reg=0.0000, path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') x3 = T.fvector('x3') ht1 = T.fvector('ht1') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) rng = numpy.random.RandomState() classifier = MLP_RNN(rng=rng, input1=x1, input2=x2, input3=x3, initial_hidden=ht1, n_in=vocab_size, fea_dim=int(sys.argv[3]), context_size=2, n_hidden=int(sys.argv[4]), n_out=vocab_size) hidden_state = theano.shared( numpy.empty((int(sys.argv[4]), ), dtype='float32')) cost = classifier.cost(y) #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, x3: test_set_x3, ht1: hidden_state, y: test_set_y}) #validation_model validate_model = theano.function(inputs = [], outputs = [log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, x3: valid_set_x3, ht1: hidden_state, y: valid_set_y}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \ givens = {x1: train_set_x1, x2: train_set_x2, x3: train_set_x3, ht1: hidden_state, y: train_set_y}) f = h5py.File(weight_path + file_name1, "r") for i in xrange(0, classifier.no_of_layers, 2): path_modified = '/' + 'MLP' + str(2) + '/layer' + str(i / 2) if i == 4: classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype='float32'), borrow=True) else: classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype='float32'), borrow=True) classifier.MLPparams[i + 1].set_value(numpy.asarray( f[path_modified + "/b"].value, dtype='float32'), borrow=True) f.close() print '.....training' best_valid_loss = numpy.inf start_time = time.time() while (learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features3 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) train_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) train_set_x3.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) train_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = train_model( numpy.array(learnrate_schedular.get_rate(), dtype='float32')) hidden_state.set_value(numpy.asarray(out[1], dtype='float32'), borrow=True) progress += 1 if progress % 10000 == 0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1), dtype='float32')) train_set_x2.set_value(numpy.empty((1), dtype='float32')) train_set_x3.set_value(numpy.empty((1), dtype='float32')) train_set_y.set_value(numpy.empty((1), dtype='int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features3 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) valid_set_x3.set_value(numpy.asarray(temp_features3, dtype='float32'), borrow=True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = validate_model() #error_rate = out[0] likelihoods = out[0] #valid_losses.append(error_rate) log_likelihood.append(likelihoods) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress % 1000 == 0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) #this_validation_loss = numpy.mean(valid_losses) entropy = (-numpy.sum(log_likelihood) / valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features3 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) test_set_x3.set_value(numpy.asarray(temp_features3, dtype='float32'), borrow=True) test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress % 1000 == 0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) print numpy.sum(log_likelihood)
def train_mlp(L1_reg = 0.0, L2_reg = 0.0000, num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size voc_list_valid = Vocabulary(path_name + 'valid') voc_list_valid.vocab_create() count = voc_list_valid.count voc_list_test = Vocabulary(path_name + 'test') voc_list_test.vocab_create() no_test_tokens = voc_list_test.count print 'The number of sentenses in test set:', no_test_tokens #print 'number of words in valid data:', count dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size ) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size ) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size ) #learn_list = [0.1, 0.1, 0.1, 0.75, 0.5, 0.25, 0.125, 0.0625, 0] exp_name = 'fine_tuning.hdf5' posterior_path = 'log_likelihoods' print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x = T.fmatrix('x') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shares variables for train, valid and test train_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x = theano.shared(numpy.empty((1,1), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState(1234) classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden1 = 30, n_hidden2 = 60 , n_out = vocab_size) #classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden = 60, n_out = vocab_size) cost = classifier.negative_log_likelihood(y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate=0.001, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.1, min_derror_stop=.1, init_error=100.) #learnrate_schedular = LearningRateList(learn_list) frame_error = classifier.errors(y) likelihood = classifier.sum(y) #test model test_model = theano.function(inputs = [index], outputs = likelihood, \ givens = {x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #validation_model validate_model = theano.function(inputs = [index], outputs = [frame_error, likelihood], \ givens = {x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \ givens = {x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile = "pics/train.png", var_with_name_simple = True) #path_save = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/mlp/saved_weights/' print '.....training' best_valid_loss = numpy.inf epoch = 1 start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, shuffle_frames = True, offset=0, \ batch_size = batch_size, num_batches_per_bunch = num_batches_per_bunch) cache.data_provider = dataprovider_train cache.start() train_cost = [] while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple train_set_x.set_value(features, borrow=True) train_set_y.set_value(numpy.asarray(labels.flatten(), dtype = 'int32'), borrow=True) frames_showed += features.shape[0] train_batches = features.shape[0]/batch_size #print train_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): train_batches += 1 for i in xrange(train_batches): #train_cost.append(train_model(i, learnrate_schedular.get_rate())) train_model(i, learnrate_schedular.get_rate()) progress += 1 if progress%10==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x.set_value(numpy.empty((1,1), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) classifier_name = 'MLP' + str(learnrate_schedular.epoch) save_mlp(classifier, GlobalCfg.get_working_dir()+exp_name , classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10): # features, labels = feat_lab_tuple tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, offset = 0, num_batches_per_bunch = 16) #cache.deamon = True cache.data_provider = dataprovider_valid cache.start() #ex_num = 0 while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple valid_frames_showed += features.shape[0] valid_set_x.set_value(features, borrow=True) valid_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True) valid_batches = features.shape[0] / batch_size #print valid_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): valid_batches += 1 for i in xrange(valid_batches): #ex_num = ex_num + 1 out = validate_model(i) error_rate = out[0] likelihoods = out[1] valid_losses.append(error_rate) log_likelihood.append(likelihoods) #save_posteriors(likelihoods, GlobalCfg.get_working_dir() + posterior_path, str(ex_num), str(learnrate_schedular.epoch)) progress += 1 if progress%10==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) valid_set_x.set_value(numpy.empty((1,1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) end_epoch_time = time.time() print 'time taken for this epoch in seconds: %f' %(end_epoch_time - start_epoch_time) this_validation_loss = numpy.mean(valid_losses) loglikelihood_sum = numpy.sum(log_likelihood) #ppl = math.exp(- loglikelihood_sum /count) #print 'ppl:', ppl print 'error_rate:', this_validation_loss print 'valid log likelihood:', loglikelihood_sum #print 'mean log_probability', this_validation_loss #learnrate_schedular.get_next_rate(this_validation_loss * 100.) #learnrate_schedular.get_next_rate() #print 'epoch_number:', learnrate_schedular.epoch # logger.info('Epoch %i (lr: %f) took %f min (SPEED [presentations/second] training %f, cv %f), cv error %f %%' % \ # (self.cfg.finetune_scheduler.epoch-1, self.cfg.finetune_scheduler.get_rate(), \ # ((end_epoch_time-start_epoch_time)/60.0), (frames_showed/(start_valid_time-start_epoch_time)), \ # (valid_frames_showed/(stop_valid_time-start_valid_time)), this_validation_loss*100.)) #self.cfg.finetune_scheduler.get_next_rate(this_validation_loss*100.) if this_validation_loss < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(this_validation_loss * 100.) best_valid_loss = this_validation_loss #best_epoch = learnrate_schedular.epoch-1 else: #learnrate_schedular.epoch = learnrate_schedular.epoch + 1 learnrate_schedular.rate = 0.0 end_time = time.time() #print 'Optimization complete with best validation score of %f %%' % best_valid_loss * 100. print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood_test = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10): # features, labels = feat_lab_tuple tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, offset = 0, num_batches_per_bunch = 16) #cache.deamon = True cache.data_provider = dataprovider_test cache.start() #ex_num = 0 while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple test_frames_showed += features.shape[0] test_set_x.set_value(features, borrow=True) test_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True) test_batches = features.shape[0] / batch_size #print valid_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if(features.shape[0] % batch_size!=0 or features.shape[0] < batch_size): test_batches += 1 for i in xrange(test_batches): log_likelihood_test.append(test_model(i)) progress += 1 if progress%10==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) test_set_x.set_value(numpy.empty((1,1), 'float32')) test_set_y.set_value(numpy.empty((1), 'int32')) likelihood_sum = numpy.sum(log_likelihood_test) print 'likelihood_sum', likelihood_sum
def train_mlpclasses( path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/', n_hidden=int(sys.argv[1]), n_classes=int(sys.argv[2])): voc_list = Vocabulary(path_name + 'train', n_classes) voc_list.vocab_create() voc_list.class_label() vocab = voc_list.vocab vocab_size = voc_list.vocab_size classes = voc_list.classes dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, classes) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, classes) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size, classes) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') y_class = T.ivector('y_class') y_word = T.ivector('y_word') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_y_class = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) train_set_y_word = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_y_class = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) valid_set_y_word = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_y_class = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) test_set_y_word = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) rng = numpy.random.RandomState() classifier = MLPClasses(rng=rng, input1=x1, input2=x2, n_in=vocab_size, fea_dim=50, context_size=2, n_hidden=n_hidden, classes=classes) classcost = classifier.Classcost(y_class) wordcost = classifier.Wordcost(y_word) #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[3]), scale_by = .5, max_epochs = 9999,\ min_derror_ramp_start = .01, min_derror_stop = .01, init_error = 100.) class_log_likelihood = classifier.Classsum(y_class) word_log_likelihood = classifier.Wordsum(y_word) #test_model test_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, y_class: test_set_y_class, y_word: test_set_y_word}) #validation_model validate_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, y_class: valid_set_y_class, y_word: valid_set_y_word}) gradient_wordparam = [] gradient_classparam = [] gradient_param = [] #calculates the gradient of cost with respect to parameters for param, i in zip(classifier.Classparams, xrange(len(classifier.Classparams))): if i <= 1: gradient_param.append(T.grad(classcost, param)) else: gradient_classparam.append(T.grad(classcost, param)) for param, i in zip(classifier.Wordparams, xrange(len(classifier.Wordparams))): if i <= 1: gradient_param.append(T.grad(wordcost, param)) else: gradient_wordparam.append(T.grad(wordcost, param)) for i in xrange(len(gradient_wordparam)): gradient_param.append(gradient_classparam[i] + gradient_wordparam[i]) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [classcost, wordcost, classifier.WordoutputLayer.W, classifier.WordoutputLayer.b, class_log_likelihood, word_log_likelihood],\ updates = updates, givens = {x1: train_set_x1, x2: train_set_x2, y_class: train_set_y_class, y_word: train_set_y_word}) w_dict, b_dict = {}, {} for i in xrange(n_classes): W_values = numpy.asarray(rng.uniform( low=-numpy.sqrt(6. / (n_hidden + len(classes[i]))), high=numpy.sqrt(6. / (n_hidden + len(classes[i]))), size=(n_hidden, len(classes[i]))), dtype='float32') w_dict[i] = W_values b_values = numpy.zeros((len(classes[i]), ), dtype='float32') b_dict[i] = b_values print '.....training' best_valid_loss = numpy.inf start_time = time.time() while (learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch train_loglikelihood = [] frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) train_set_x2.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) train_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype='int32'), borrow=True) train_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype='int32'), borrow=True) classifier.WordoutputLayer.W.set_value(numpy.asarray( w_dict[labels[i][1]], dtype='float32'), borrow=True) classifier.WordoutputLayer.b.set_value(numpy.asarray( b_dict[labels[i][1]], dtype='float32'), borrow=True) out = train_model( numpy.asarray(learnrate_schedular.get_rate(), dtype='float32')) w_dict[labels[i][1]], b_dict[labels[i][1]] = out[2], out[3] train_loglikelihood.append(out[4] + out[5]) #print out[4] + out[5] progress += 1 if progress % 10000 == 0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1, ), dtype='float32')) train_set_x2.set_value(numpy.empty((1, ), dtype='float32')) train_set_y_class.set_value(numpy.empty((1), dtype='int32')) train_set_y_word.set_value(numpy.empty((1), dtype='int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) print numpy.sum(train_loglikelihood) print 'Validating...' log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) valid_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype='int32'), borrow=True) valid_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype='int32'), borrow=True) classifier.WordoutputLayer.W.set_value(numpy.asarray( w_dict[labels[i][1]], dtype='float32'), borrow=True) classifier.WordoutputLayer.b.set_value(numpy.asarray( b_dict[labels[i][1]], dtype='float32'), borrow=True) out = validate_model() log_likelihood.append(sum(out)) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_x2.set_value(numpy.empty((1), 'float32')) valid_set_y_class.set_value(numpy.empty((1), 'int32')) valid_set_y_word.set_value(numpy.empty((1), 'int32')) progress += 1 if progress % 1000 == 0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) entropy = (-numpy.sum(log_likelihood) / valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) test_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype='int32'), borrow=True) test_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype='int32'), borrow=True) classifier.WordoutputLayer.W.set_value(numpy.asarray( w_dict[labels[i][1]], dtype='float32'), borrow=True) classifier.WordoutputLayer.b.set_value(numpy.asarray( b_dict[labels[i][1]], dtype='float32'), borrow=True) out = test_model() log_likelihood.append(sum(out)) progress += 1 if progress % 1000 == 0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood) likelihood_sum = (-numpy.sum(log_likelihood) / test_frames_showed) print 'entropy:', likelihood_sum
def train_mlp( L1_reg=0.0, L2_reg=0.0000, num_batches_per_bunch=512, batch_size=1, num_bunches_queue=5, offset=0, path_name='/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/data/' ): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size voc_list_valid = Vocabulary(path_name + 'valid') voc_list_valid.vocab_create() count = voc_list_valid.count voc_list_test = Vocabulary(path_name + 'test') voc_list_test.vocab_create() no_test_tokens = voc_list_test.count print 'The number of sentenses in test set:', no_test_tokens #print 'number of words in valid data:', count dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size) #learn_list = [0.1, 0.1, 0.1, 0.75, 0.5, 0.25, 0.125, 0.0625, 0] exp_name = 'fine_tuning.hdf5' posterior_path = 'log_likelihoods' print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x = T.fmatrix('x') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shares variables for train, valid and test train_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'), allow_downcast=True) train_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) valid_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'), allow_downcast=True) valid_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) test_set_x = theano.shared(numpy.empty((1, 1), dtype='float32'), allow_downcast=True) test_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) rng = numpy.random.RandomState(1234) classifier = MLP(rng=rng, input=x, n_in=vocab_size, n_hidden1=30, n_hidden2=60, n_out=vocab_size) #classifier = MLP(rng = rng, input = x, n_in = vocab_size, n_hidden = 60, n_out = vocab_size) cost = classifier.negative_log_likelihood( y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate=0.001, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.1, min_derror_stop=.1, init_error=100.) #learnrate_schedular = LearningRateList(learn_list) frame_error = classifier.errors(y) likelihood = classifier.sum(y) #test model test_model = theano.function(inputs = [index], outputs = likelihood, \ givens = {x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size]}) #validation_model validate_model = theano.function(inputs = [index], outputs = [frame_error, likelihood], \ givens = {x: valid_set_x[index * batch_size:(index + 1) * batch_size], y: valid_set_y[index * batch_size:(index + 1) * batch_size]}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [index, theano.Param(learning_rate, default = 0.01)], outputs = cost, updates = updates, \ givens = {x: train_set_x[index * batch_size:(index + 1) * batch_size], y: train_set_y[index * batch_size:(index + 1) * batch_size]}) #theano.printing.pydotprint(train_model, outfile = "pics/train.png", var_with_name_simple = True) #path_save = '/afs/inf.ed.ac.uk/user/s12/s1264845/scratch/s1264845/mlp/saved_weights/' print '.....training' best_valid_loss = numpy.inf epoch = 1 start_time = time.time() while (learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, shuffle_frames = True, offset=0, \ batch_size = batch_size, num_batches_per_bunch = num_batches_per_bunch) cache.data_provider = dataprovider_train cache.start() train_cost = [] while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple train_set_x.set_value(features, borrow=True) train_set_y.set_value(numpy.asarray(labels.flatten(), dtype='int32'), borrow=True) frames_showed += features.shape[0] train_batches = features.shape[0] / batch_size #print train_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if (features.shape[0] % batch_size != 0 or features.shape[0] < batch_size): train_batches += 1 for i in xrange(train_batches): #train_cost.append(train_model(i, learnrate_schedular.get_rate())) train_model(i, learnrate_schedular.get_rate()) progress += 1 if progress % 10 == 0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x.set_value(numpy.empty((1, 1), dtype='float32')) train_set_y.set_value(numpy.empty((1), dtype='int32')) classifier_name = 'MLP' + str(learnrate_schedular.epoch) save_mlp(classifier, GlobalCfg.get_working_dir() + exp_name, classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10): # features, labels = feat_lab_tuple tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, offset=0, num_batches_per_bunch=16) #cache.deamon = True cache.data_provider = dataprovider_valid cache.start() #ex_num = 0 while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple valid_frames_showed += features.shape[0] valid_set_x.set_value(features, borrow=True) valid_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True) valid_batches = features.shape[0] / batch_size #print valid_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if (features.shape[0] % batch_size != 0 or features.shape[0] < batch_size): valid_batches += 1 for i in xrange(valid_batches): #ex_num = ex_num + 1 out = validate_model(i) error_rate = out[0] likelihoods = out[1] valid_losses.append(error_rate) log_likelihood.append(likelihoods) #save_posteriors(likelihoods, GlobalCfg.get_working_dir() + posterior_path, str(ex_num), str(learnrate_schedular.epoch)) progress += 1 if progress % 10 == 0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) valid_set_x.set_value(numpy.empty((1, 1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) end_epoch_time = time.time() print 'time taken for this epoch in seconds: %f' % (end_epoch_time - start_epoch_time) this_validation_loss = numpy.mean(valid_losses) loglikelihood_sum = numpy.sum(log_likelihood) #ppl = math.exp(- loglikelihood_sum /count) #print 'ppl:', ppl print 'error_rate:', this_validation_loss print 'valid log likelihood:', loglikelihood_sum #print 'mean log_probability', this_validation_loss #learnrate_schedular.get_next_rate(this_validation_loss * 100.) #learnrate_schedular.get_next_rate() #print 'epoch_number:', learnrate_schedular.epoch # logger.info('Epoch %i (lr: %f) took %f min (SPEED [presentations/second] training %f, cv %f), cv error %f %%' % \ # (self.cfg.finetune_scheduler.epoch-1, self.cfg.finetune_scheduler.get_rate(), \ # ((end_epoch_time-start_epoch_time)/60.0), (frames_showed/(start_valid_time-start_epoch_time)), \ # (valid_frames_showed/(stop_valid_time-start_valid_time)), this_validation_loss*100.)) #self.cfg.finetune_scheduler.get_next_rate(this_validation_loss*100.) if this_validation_loss < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate( this_validation_loss * 100.) best_valid_loss = this_validation_loss #best_epoch = learnrate_schedular.epoch-1 else: #learnrate_schedular.epoch = learnrate_schedular.epoch + 1 learnrate_schedular.rate = 0.0 end_time = time.time() #print 'Optimization complete with best validation score of %f %%' % best_valid_loss * 100. print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.) print 'Testing...' log_likelihood_test = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time #for feat_lab_tuple, path in HDFDatasetDataProviderUtt(devel_files_list, valid_dataset, randomize=False, max_utt=-10): # features, labels = feat_lab_tuple tqueue = TNetsCacheSimple.make_queue() cache = TNetsCacheSimple(tqueue, offset=0, num_batches_per_bunch=16) #cache.deamon = True cache.data_provider = dataprovider_test cache.start() #ex_num = 0 while True: feats_lab_tuple = TNetsCacheSimple.get_elem_from_queue(tqueue) if isinstance(feats_lab_tuple, TNetsCacheLastElem): break features, labels = feats_lab_tuple test_frames_showed += features.shape[0] test_set_x.set_value(features, borrow=True) test_set_y.set_value(numpy.asarray(labels.flatten(), 'int32'), borrow=True) test_batches = features.shape[0] / batch_size #print valid_batches #if there is any part left in utterance (smaller than a batch_size), take it into account at the end if (features.shape[0] % batch_size != 0 or features.shape[0] < batch_size): test_batches += 1 for i in xrange(test_batches): log_likelihood_test.append(test_model(i)) progress += 1 if progress % 10 == 0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) test_set_x.set_value(numpy.empty((1, 1), 'float32')) test_set_y.set_value(numpy.empty((1), 'int32')) likelihood_sum = numpy.sum(log_likelihood_test) print 'likelihood_sum', likelihood_sum
def train_mlpclasses(path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/', n_hidden = int(sys.argv[1]), n_classes = int(sys.argv[2])): voc_list = Vocabulary(path_name + 'train', n_classes) voc_list.vocab_create() voc_list.class_label() vocab = voc_list.vocab vocab_size = voc_list.vocab_size classes = voc_list.classes dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, classes) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, classes) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size, classes) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') y_class = T.ivector('y_class') y_word = T.ivector('y_word') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) train_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_y_class = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_y_word = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = MLPClasses(rng = rng, input1 = x1, input2 = x2, n_in = vocab_size, fea_dim = 50, context_size = 2, n_hidden = n_hidden, classes = classes) classcost = classifier.Classcost(y_class) wordcost = classifier.Wordcost(y_word) #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[3]), scale_by = .5, max_epochs = 9999,\ min_derror_ramp_start = .01, min_derror_stop = .01, init_error = 100.) class_log_likelihood = classifier.Classsum(y_class) word_log_likelihood = classifier.Wordsum(y_word) #test_model test_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, y_class: test_set_y_class, y_word: test_set_y_word}) #validation_model validate_model = theano.function(inputs = [], outputs = [class_log_likelihood, word_log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, y_class: valid_set_y_class, y_word: valid_set_y_word}) gradient_wordparam = [] gradient_classparam = [] gradient_param = [] #calculates the gradient of cost with respect to parameters for param, i in zip(classifier.Classparams, xrange(len(classifier.Classparams))): if i <= 1: gradient_param.append(T.grad(classcost, param)) else: gradient_classparam.append(T.grad(classcost, param)) for param, i in zip(classifier.Wordparams, xrange(len(classifier.Wordparams))): if i <= 1: gradient_param.append(T.grad(wordcost, param)) else: gradient_wordparam.append(T.grad(wordcost, param)) for i in xrange(len(gradient_wordparam)): gradient_param.append(gradient_classparam[i] + gradient_wordparam[i]) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [classcost, wordcost, classifier.WordoutputLayer.W, classifier.WordoutputLayer.b, class_log_likelihood, word_log_likelihood],\ updates = updates, givens = {x1: train_set_x1, x2: train_set_x2, y_class: train_set_y_class, y_word: train_set_y_word}) w_dict, b_dict = {}, {} for i in xrange(n_classes): W_values = numpy.asarray( rng.uniform( low = - numpy.sqrt(6./(n_hidden + len(classes[i]))), high = numpy.sqrt(6./(n_hidden + len(classes[i]))), size = (n_hidden, len(classes[i]))), dtype = 'float32') w_dict[i] = W_values b_values = numpy.zeros((len(classes[i]), ), dtype = 'float32') b_dict[i]= b_values print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch train_loglikelihood = [] frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_x2.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True) train_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True) classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True) classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True) out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32')) w_dict[labels[i][1]], b_dict[labels[i][1]] = out[2], out[3] train_loglikelihood.append(out[4]+out[5]) #print out[4] + out[5] progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32')) train_set_x2.set_value(numpy.empty((1, ), dtype = 'float32')) train_set_y_class.set_value(numpy.empty((1), dtype = 'int32')) train_set_y_word.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) print numpy.sum(train_loglikelihood) print 'Validating...' log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) valid_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True) valid_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True) classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True) classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True) out = validate_model() log_likelihood.append(sum(out)) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_x2.set_value(numpy.empty((1), 'float32')) valid_set_y_class.set_value(numpy.empty((1), 'int32')) valid_set_y_word.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) test_set_y_class.set_value(numpy.asarray([labels[i][1]], dtype = 'int32'), borrow = True) test_set_y_word.set_value(numpy.asarray([labels[i][0]], dtype = 'int32'), borrow = True) classifier.WordoutputLayer.W.set_value(numpy.asarray(w_dict[labels[i][1]], dtype = 'float32'), borrow = True) classifier.WordoutputLayer.b.set_value(numpy.asarray(b_dict[labels[i][1]], dtype = 'float32'), borrow = True) out = test_model() log_likelihood.append(sum(out)) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood) likelihood_sum = (-numpy.sum(log_likelihood)/test_frames_showed) print 'entropy:', likelihood_sum
def train_mlprnn(weight_path = sys.argv[1], file_name1 = sys.argv[2], L1_reg = 0.0, L2_reg = 0.0000, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size ) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size ) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') x3 = T.fvector('x3') ht1 = T.fvector('ht1') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_x3 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = MLP_RNN(rng = rng, input1 = x1, input2 = x2, input3 = x3, initial_hidden = ht1, n_in = vocab_size, fea_dim = int(sys.argv[3]), context_size = 2, n_hidden = int(sys.argv[4]) , n_out = vocab_size) hidden_state = theano.shared(numpy.empty((int(sys.argv[4]), ), dtype = 'float32')) cost = classifier.cost(y) #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = 0.05, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, x3: test_set_x3, ht1: hidden_state, y: test_set_y}) #validation_model validate_model = theano.function(inputs = [], outputs = [log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, x3: valid_set_x3, ht1: hidden_state, y: valid_set_y}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.RNNhiddenlayer.output], updates = updates, \ givens = {x1: train_set_x1, x2: train_set_x2, x3: train_set_x3, ht1: hidden_state, y: train_set_y}) f = h5py.File(weight_path+file_name1, "r") for i in xrange(0, classifier.no_of_layers, 2): path_modified = '/' + 'MLP'+ str(2) + '/layer' + str(i/2) if i == 4: classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype = 'float32'), borrow = True) else: classifier.MLPparams[i].set_value(numpy.asarray(f[path_modified + "/W"].value, dtype = 'float32'), borrow = True) classifier.MLPparams[i + 1].set_value(numpy.asarray(f[path_modified + "/b"].value, dtype = 'float32'), borrow = True) f.close() print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features3 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) train_set_x3.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = train_model(numpy.array(learnrate_schedular.get_rate(), dtype = 'float32')) hidden_state.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True) progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1), dtype = 'float32')) train_set_x2.set_value(numpy.empty((1), dtype = 'float32')) train_set_x3.set_value(numpy.empty((1), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features3 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) valid_set_x3.set_value(numpy.asarray(temp_features3, dtype = 'float32'), borrow = True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = validate_model() #error_rate = out[0] likelihoods = out[0] #valid_losses.append(error_rate) log_likelihood.append(likelihoods) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) #this_validation_loss = numpy.mean(valid_losses) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features2 = numpy.zeros(vocab_size, dtype = 'float32') temp_features3 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 temp_features3[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype = 'float32'), borrow = True) test_set_x3.set_value(numpy.asarray(temp_features3, dtype = 'float32'), borrow = True) test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) print numpy.sum(log_likelihood)
def train_mlp(feature_dimension, context, hidden_size, weight_path, file_name1, file_name2, file_name3, L1_reg=0.0, L2_reg=0.0000, path_name='/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): #voc_list = Vocabulary(path_name + 'train_modified1') #voc_list.vocab_create() #vocab = voc_list.vocab #vocab_size = voc_list.vocab_size #short_list = voc_list.short_list #short_list_size = voc_list.short_list_size #path = '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp' voc_list = Vocabularyhash( '/exports/work/inf_hcrc_cstr_udialogue/siva/data_normalization/vocab/wlist5c.nvp' ) voc_list.hash_create() vocab = voc_list.voc_hash vocab_size = voc_list.vocab_size #dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size, short_list ) #dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size, short_list ) #dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size , short_list) dataprovider_train = DataProvider(path_name + 'train_modified1_20m', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid_modified1', vocab, vocab_size) dataprovider_test = DataProvider(path_name + 'test_modified1', vocab, vocab_size) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x1 = T.fvector('x1') x2 = T.fvector('x2') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) train_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) valid_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) valid_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) test_set_x1 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_x2 = theano.shared(numpy.empty((1), dtype='float32'), allow_downcast=True) test_set_y = theano.shared(numpy.empty((1), dtype='int32'), allow_downcast=True) rng = numpy.random.RandomState() classifier = MLP(rng=rng, input1=x1, input2=x2, n_in=vocab_size, fea_dim=int(feature_dimension), context_size=int(context), n_hidden=int(hidden_size), n_out=vocab_size) cost = classifier.negative_log_likelihood( y) + L1_reg * classifier.L1 + L2_reg * classifier.L2_sqr #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate=0.005, scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) frame_error = classifier.errors(y) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x1: test_set_x1, x2: test_set_x2, y: test_set_y}) #validation_model validate_model = theano.function(inputs = [], outputs = [frame_error, log_likelihood], \ givens = {x1: valid_set_x1, x2: valid_set_x2, y: valid_set_y}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, param - learning_rate * gradient)) #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost], updates = updates, \ givens = {x1: train_set_x1, x2: train_set_x2, y: train_set_y}) print '.....training' best_valid_loss = numpy.inf start_time = time.time() while (learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) train_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) train_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = train_model( numpy.array(learnrate_schedular.get_rate(), dtype='float32')) progress += 1 if progress % 10000 == 0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1), dtype='float32')) train_set_x2.set_value(numpy.empty((1), dtype='float32')) train_set_y.set_value(numpy.empty((1), dtype='int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) classifier_name = 'MLP' + str(learnrate_schedular.epoch) save_mlp(classifier, weight_path + file_name1, classifier_name) save_learningrate(learnrate_schedular.get_rate(), weight_path + file_name3, classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) valid_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = validate_model() error_rate = out[0] likelihoods = out[1] valid_losses.append(error_rate) log_likelihood.append(likelihoods) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_x2.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress % 1000 == 0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) this_validation_loss = numpy.mean(valid_losses) entropy = (-numpy.sum(log_likelihood) / valid_frames_showed) print this_validation_loss, entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' % ((end_time - start_time) / 60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype='float32') temp_features2 = numpy.zeros(vocab_size, dtype='float32') temp_features1[temp[0]] = 1 temp_features2[temp[1]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype='float32'), borrow=True) test_set_x2.set_value(numpy.asarray(temp_features2, dtype='float32'), borrow=True) test_set_y.set_value(numpy.asarray([labels[i]], dtype='int32'), borrow=True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress % 1000 == 0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) save_posteriors(log_likelihood, likelihoods, weight_path + file_name2) print numpy.sum(log_likelihood) likelihood_sum = (-numpy.sum(log_likelihood) / test_frames_showed) print 'entropy:', likelihood_sum
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'): voc_list = Vocabulary(path_name + 'train') voc_list.vocab_create() vocab = voc_list.vocab vocab_size = voc_list.vocab_size dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size) dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size ) dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size ) print '..building the model' #symbolic variables for input, target vector and batch index index = T.lscalar('index') x = T.fvector('x') h0 = T.fvector('h0') y = T.ivector('y') learning_rate = T.fscalar('learning_rate') #theano shared variables for train, valid and test train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True) test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True) rng = numpy.random.RandomState() classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size) cost = classifier.negative_log_likelihood(y) ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32') ht1 = theano.shared(value = ht1_values, name = 'hidden_state') #constructor for learning rate class learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\ min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.) log_likelihood = classifier.sum(y) likelihood = classifier.likelihood(y) #test_model test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood], \ givens = {x: test_set_x1, y: test_set_y, h0: ht1}) #validation_model validate_model = theano.function(inputs = [], outputs = [log_likelihood], \ givens = {x: valid_set_x1, y: valid_set_y, h0: ht1}) gradient_param = [] #calculates the gradient of cost with respect to parameters for param in classifier.params: gradient_param.append(T.cast(T.grad(cost, param), 'float32')) updates = [] #updates the parameters for param, gradient in zip(classifier.params, gradient_param): updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32'))) #hidden_output = classifier.inputlayer.output #training_model train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \ givens = {x: train_set_x1, y: train_set_y, h0:ht1}) print '.....training' best_valid_loss = numpy.inf start_time = time.time() while(learnrate_schedular.get_rate() != 0): print 'learning_rate:', learnrate_schedular.get_rate() print 'epoch_number:', learnrate_schedular.epoch frames_showed, progress = 0, 0 start_epoch_time = time.time() dataprovider_train.reset() for feats_lab_tuple in dataprovider_train: features, labels = feats_lab_tuple if labels is None or features is None: continue frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32')) ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True) progress += 1 if progress%10000==0: end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32')) train_set_y.set_value(numpy.empty((1), dtype = 'int32')) end_time_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, frames_showed,(end_time_progress-start_epoch_time)) #classifier_name = 'MLP' + str(learnrate_schedular.epoch) #save_mlp(classifier, path+exp_name1 , classifier_name) print 'Validating...' valid_losses = [] log_likelihood = [] valid_frames_showed, progress = 0, 0 start_valid_time = time.time() # it is also stop of training time dataprovider_valid.reset() for feats_lab_tuple in dataprovider_valid: features, labels = feats_lab_tuple if labels is None or features is None: continue valid_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) log_likelihood.append(validate_model()) valid_set_x1.set_value(numpy.empty((1), 'float32')) valid_set_y.set_value(numpy.empty((1), 'int32')) progress += 1 if progress%1000==0: end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) end_time_valid_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time) entropy = (-numpy.sum(log_likelihood)/valid_frames_showed) print entropy, numpy.sum(log_likelihood) if entropy < best_valid_loss: learning_rate = learnrate_schedular.get_next_rate(entropy) best_valid_loss = entropy else: learnrate_schedular.rate = 0.0 end_time = time.time() print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.) print 'Testing...' log_likelihood = [] likelihoods = [] test_frames_showed, progress = 0, 0 start_test_time = time.time() # it is also stop of training time dataprovider_test.reset() for feats_lab_tuple in dataprovider_test: features, labels = feats_lab_tuple if labels is None or features is None: continue test_frames_showed += features.shape[0] for temp, i in zip(features, xrange(len(labels))): temp_features1 = numpy.zeros(vocab_size, dtype = 'float32') temp_features1[temp[0]] = 1 test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True) test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True) out = test_model() log_likelihood.append(out[0]) likelihoods.append(out[1]) progress += 1 if progress%1000==0: end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) end_time_test_progress = time.time() print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\ %(progress, test_frames_showed, end_time_test_progress - start_test_time) #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2) print numpy.sum(log_likelihood)