def __init__(self, seq_length): ## Define hyperparameters self.learning_rate = FIXED_PARAMETERS["learning_rate"] self.display_epoch_freq = 1 self.display_step_freq = 50 self.embedding_dim = FIXED_PARAMETERS["word_embedding_dim"] self.dim = FIXED_PARAMETERS["hidden_embedding_dim"] self.batch_size = FIXED_PARAMETERS["batch_size"] self.emb_train = FIXED_PARAMETERS["emb_train"] self.keep_rate = FIXED_PARAMETERS["keep_rate"] self.sequence_length = FIXED_PARAMETERS["seq_length"] self.alpha = FIXED_PARAMETERS["alpha"] logger.Log("Building model from %s.py" % (model)) self.model = MyModel(seq_length=self.sequence_length, emb_dim=self.embedding_dim, hidden_dim=self.dim, embeddings=loaded_embeddings, emb_train=self.emb_train) # Perform gradient descent with Adam self.optimizer = tf.train.AdamOptimizer(self.learning_rate, beta1=0.9, beta2=0.999).minimize( self.model.total_cost) # Boolean stating that training has not been completed, self.completed = False # tf things: initialize variables and create placeholder for session logger.Log("Initializing variables") self.init = tf.global_variables_initializer() self.sess = None self.saver = tf.train.Saver() logger.Log("sess=%s" % self.sess)
def restore_model(self): self.sess = tf.Session() self.sess.run(self.init) #self.saver.restore(self.sess, path) restored_model = FIXED_PARAMETERS["continue_training"] + ".ckpt" if ".ckpt" not in FIXED_PARAMETERS["continue_training"] else \ FIXED_PARAMETERS["continue_training"] #restored_model = self.saver.restore(self.sess, (restored_model + "_best")) #self.saver.restore(self.sess, (restored_model)) test_out = evaluate_classifier(self.classify, test_snli, FIXED_PARAMETERS["batch_size"])[0] logger.Log("Model restored=%s" % restored_model + "_best") logger.Log("Acc on SNLI test-set: %s" % (test_out)) test_out, total = evaluate_full(self.classify_out, test_snli, FIXED_PARAMETERS["batch_size"], name='test', ofile=None) best_dev, dtotal = evaluate_full( self.classify_out, dev_snli, FIXED_PARAMETERS["batch_size"], ## not needed name='dev', ofile=None) logger.Log("Best dev score: %s (tested on %d examples)" % (best_dev, dtotal)) logger.Log("Acc on SNLI test-set: %s (tested on %d examples)" % (test_out, total))
def restore(self, best=True): if True: path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt_best" else: path = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" try: self.sess = tf.Session() self.sess.run(self.init) self.saver.restore(self.sess, path) logger.Log("Model restored from file: %s" % path) except ValueError: logger.Log('WARNING: No best checkpoint found, using last one..')
def train(self, train_mnli, train_snli, dev_mat, dev_mismat, dev_snli): self.sess = tf.Session() self.sess.run(self.init) self.step = 1 self.epoch = 0 self.best_dev_snli = 0. self.best_strain_acc = 0. self.last_train_acc = [.001, .001, .001, .001, .001] self.best_step = 0 # Restore most recent checkpoint if it exists. # Also restore values for best dev-set accuracy and best training-set accuracy. ckpt_file = os.path.join(FIXED_PARAMETERS["ckpt_path"], modname) + ".ckpt" ### restor model in some other arbitrary way ################# restored_model = FIXED_PARAMETERS["continue_training"] + ".ckpt" if ".ckpt" not in FIXED_PARAMETERS["continue_training"] else \ FIXED_PARAMETERS["continue_training"] logger.Log("restored_model=%s" % restored_model) orig_test_score = 0.0 if os.path.isfile(restored_model + ".meta"): if os.path.isfile(restored_model + "_best.meta"): self.saver.restore(self.sess, (restored_model + "_best")) #dev_acc_snli, dev_cost_snli = evaluate_classifier(self.classify, dev_snli, self.batch_size) test_out = evaluate_classifier( self.classify, test_snli, FIXED_PARAMETERS["batch_size"])[0] logger.Log("Model restored=%s" % restored_model + "_best") logger.Log("Acc on SNLI test-set: %s" % (test_out)) orig_test_score = test_out logger.Log("Acc on SNLI test-set: %f" % ORIG_TEST_SCORE) training_data = train_snli ### Training cycle if FIXED_PARAMETERS["no_train"]: return 0.0 ### Training cycle logger.Log("Training...") while True: random.shuffle(training_data) avg_cost = 0. total_batch = int(len(training_data) / self.batch_size) # Loop over all batches in epoch for i in range(total_batch): # Assemble a minibatch of the next B examples minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres = self.get_minibatch( training_data, self.batch_size * i, self.batch_size * (i + 1)) # Run the optimizer to take a gradient step, and also fetch the value of the # cost function for logging feed_dict = { self.model.premise_x: minibatch_premise_vectors, self.model.hypothesis_x: minibatch_hypothesis_vectors, self.model.y: minibatch_labels, self.model.keep_rate_ph: self.keep_rate } _, c = self.sess.run([self.optimizer, self.model.total_cost], feed_dict) # Since a single epoch can take a ages for larger models (ESIM), # we'll print accuracy every 50 steps if self.step % self.display_step_freq == 0: #dev_acc_mat, dev_cost_mat = evaluate_classifier(self.classify, dev_mat, self.batch_size) #dev_acc_mismat, dev_cost_mismat = evaluate_classifier(self.classify, dev_mismat, self.batch_size) dev_acc_snli, dev_cost_snli = evaluate_classifier( self.classify, dev_snli, self.batch_size) strain_acc, strain_cost = evaluate_classifier( self.classify, train_snli[0:5000], self.batch_size) logger.Log( "Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t SNLI train acc: %f" % (self.step, 0.0, 0.0, dev_acc_snli, strain_acc)) logger.Log( "Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t SNLI train cost: %f" % (self.step, 0.0, 0.0, dev_cost_snli, strain_cost)) if self.step % 500 == 0: self.saver.save(self.sess, ckpt_file) best_test = 100 * (1 - self.best_dev_snli / dev_acc_snli) if best_test > 0.04: self.saver.save(self.sess, ckpt_file + "_best") self.best_dev_snli = dev_acc_snli self.best_strain_acc = strain_acc self.best_step = self.step logger.Log( "Checkpointing with new best SNLI-dev accuracy: %f" % (self.best_dev_snli)) self.step += 1 # Compute average loss avg_cost += c / (total_batch * self.batch_size) # Display some statistics about the epoch if self.epoch % self.display_epoch_freq == 0: logger.Log("Epoch: %i\t Avg. Cost: %f" % (self.epoch + 1, avg_cost)) self.epoch += 1 self.last_train_acc[(self.epoch % 5) - 1] = strain_acc #break (debugging) #if self.epoch >= 3: # break # Early stopping progress = 1000 * (sum(self.last_train_acc) / (5 * min(self.last_train_acc)) - 1) if (progress < 0.1) or (self.step > self.best_step + 30000): logger.Log("Best snli-dev accuracy: %s" % str(self.best_dev_snli)) #logger.Log("MultiNLI Train accuracy: %s" %(self.best_strain_acc)) self.completed = True break ## return best dev performance return self.best_dev_snli, orig_test_score
from ocnli.mnli_code.util.data_processing import * from ocnli.mnli_code.util.evaluate import * FIXED_PARAMETERS = params.load_parameters() modname = FIXED_PARAMETERS["model_name"] logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log" logger = logger.Logger(logpath) model = FIXED_PARAMETERS["model_type"] module = importlib.import_module(".".join(['ocnli.mnli_code.models', model])) MyModel = getattr(module, 'MyModel') # Logging parameter settings at each launch of training script # This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings. logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS) ######################### LOAD DATA ############################# logger.Log("Loading data") training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"], snli=True, partial_input=FIXED_PARAMETERS["partial_input"]) dev_snli = load_nli_data(FIXED_PARAMETERS["dev_snli"], snli=True, partial_input=FIXED_PARAMETERS["partial_input"]) try: test_snli = load_nli_data(FIXED_PARAMETERS["test_snli"], snli=True, partial_input=FIXED_PARAMETERS["partial_input"])