Ejemplo n.º 1
0
    def __init__(self, seq_length):
        ## Define hyperparameters
        self.learning_rate = FIXED_PARAMETERS["learning_rate"]
        self.display_epoch_freq = 1
        self.display_step_freq = 50
        self.embedding_dim = FIXED_PARAMETERS["word_embedding_dim"]
        self.dim = FIXED_PARAMETERS["hidden_embedding_dim"]
        self.batch_size = FIXED_PARAMETERS["batch_size"]
        self.emb_train = FIXED_PARAMETERS["emb_train"]
        self.keep_rate = FIXED_PARAMETERS["keep_rate"]
        self.sequence_length = FIXED_PARAMETERS["seq_length"]
        self.alpha = FIXED_PARAMETERS["alpha"]

        logger.Log("Building model from %s.py" % (model))
        self.model = MyModel(seq_length=self.sequence_length,
                             emb_dim=self.embedding_dim,
                             hidden_dim=self.dim,
                             embeddings=loaded_embeddings,
                             emb_train=self.emb_train)

        # Perform gradient descent with Adam
        self.optimizer = tf.train.AdamOptimizer(self.learning_rate,
                                                beta1=0.9,
                                                beta2=0.999).minimize(
                                                    self.model.total_cost)

        # Boolean stating that training has not been completed,
        self.completed = False

        # tf things: initialize variables and create placeholder for session
        logger.Log("Initializing variables")
        self.init = tf.global_variables_initializer()
        self.sess = None
        self.saver = tf.train.Saver()
        logger.Log("sess=%s" % self.sess)
Ejemplo n.º 2
0
    def restore_model(self):
        self.sess = tf.Session()
        self.sess.run(self.init)
        #self.saver.restore(self.sess, path)

        restored_model = FIXED_PARAMETERS["continue_training"] + ".ckpt" if ".ckpt" not in FIXED_PARAMETERS["continue_training"] else \
          FIXED_PARAMETERS["continue_training"]

        #restored_model =
        self.saver.restore(self.sess, (restored_model + "_best"))
        #self.saver.restore(self.sess, (restored_model))

        test_out = evaluate_classifier(self.classify, test_snli,
                                       FIXED_PARAMETERS["batch_size"])[0]
        logger.Log("Model restored=%s" % restored_model + "_best")
        logger.Log("Acc on SNLI test-set: %s" % (test_out))

        test_out, total = evaluate_full(self.classify_out,
                                        test_snli,
                                        FIXED_PARAMETERS["batch_size"],
                                        name='test',
                                        ofile=None)

        best_dev, dtotal = evaluate_full(
            self.classify_out,
            dev_snli,
            FIXED_PARAMETERS["batch_size"],  ## not needed 
            name='dev',
            ofile=None)

        logger.Log("Best dev score: %s (tested on %d examples)" %
                   (best_dev, dtotal))
        logger.Log("Acc on SNLI test-set: %s (tested on %d examples)" %
                   (test_out, total))
Ejemplo n.º 3
0
    def restore(self, best=True):
        if True:
            path = os.path.join(FIXED_PARAMETERS["ckpt_path"],
                                modname) + ".ckpt_best"
        else:
            path = os.path.join(FIXED_PARAMETERS["ckpt_path"],
                                modname) + ".ckpt"

        try:
            self.sess = tf.Session()
            self.sess.run(self.init)
            self.saver.restore(self.sess, path)
            logger.Log("Model restored from file: %s" % path)
        except ValueError:
            logger.Log('WARNING: No best checkpoint found, using last one..')
Ejemplo n.º 4
0
    def train(self, train_mnli, train_snli, dev_mat, dev_mismat, dev_snli):
        self.sess = tf.Session()
        self.sess.run(self.init)

        self.step = 1
        self.epoch = 0
        self.best_dev_snli = 0.
        self.best_strain_acc = 0.
        self.last_train_acc = [.001, .001, .001, .001, .001]
        self.best_step = 0

        # Restore most recent checkpoint if it exists.
        # Also restore values for best dev-set accuracy and best training-set accuracy.
        ckpt_file = os.path.join(FIXED_PARAMETERS["ckpt_path"],
                                 modname) + ".ckpt"

        ### restor model in some other arbitrary way
        #################
        restored_model = FIXED_PARAMETERS["continue_training"] + ".ckpt" if ".ckpt" not in FIXED_PARAMETERS["continue_training"] else \
          FIXED_PARAMETERS["continue_training"]
        logger.Log("restored_model=%s" % restored_model)
        orig_test_score = 0.0

        if os.path.isfile(restored_model + ".meta"):
            if os.path.isfile(restored_model + "_best.meta"):
                self.saver.restore(self.sess, (restored_model + "_best"))
                #dev_acc_snli, dev_cost_snli = evaluate_classifier(self.classify, dev_snli, self.batch_size)
                test_out = evaluate_classifier(
                    self.classify, test_snli,
                    FIXED_PARAMETERS["batch_size"])[0]
                logger.Log("Model restored=%s" % restored_model + "_best")
                logger.Log("Acc on SNLI test-set: %s" % (test_out))
                orig_test_score = test_out
                logger.Log("Acc on SNLI test-set: %f" % ORIG_TEST_SCORE)

        training_data = train_snli
        ### Training cycle
        if FIXED_PARAMETERS["no_train"]: return 0.0
        ### Training cycle
        logger.Log("Training...")

        while True:
            random.shuffle(training_data)
            avg_cost = 0.
            total_batch = int(len(training_data) / self.batch_size)

            # Loop over all batches in epoch
            for i in range(total_batch):
                # Assemble a minibatch of the next B examples
                minibatch_premise_vectors, minibatch_hypothesis_vectors, minibatch_labels, minibatch_genres = self.get_minibatch(
                    training_data, self.batch_size * i,
                    self.batch_size * (i + 1))

                # Run the optimizer to take a gradient step, and also fetch the value of the
                # cost function for logging
                feed_dict = {
                    self.model.premise_x: minibatch_premise_vectors,
                    self.model.hypothesis_x: minibatch_hypothesis_vectors,
                    self.model.y: minibatch_labels,
                    self.model.keep_rate_ph: self.keep_rate
                }
                _, c = self.sess.run([self.optimizer, self.model.total_cost],
                                     feed_dict)

                # Since a single epoch can take a  ages for larger models (ESIM),
                #  we'll print accuracy every 50 steps
                if self.step % self.display_step_freq == 0:

                    #dev_acc_mat, dev_cost_mat = evaluate_classifier(self.classify, dev_mat, self.batch_size)
                    #dev_acc_mismat, dev_cost_mismat = evaluate_classifier(self.classify, dev_mismat, self.batch_size)
                    dev_acc_snli, dev_cost_snli = evaluate_classifier(
                        self.classify, dev_snli, self.batch_size)
                    strain_acc, strain_cost = evaluate_classifier(
                        self.classify, train_snli[0:5000], self.batch_size)

                    logger.Log(
                        "Step: %i\t Dev-matched acc: %f\t Dev-mismatched acc: %f\t Dev-SNLI acc: %f\t SNLI train acc: %f"
                        % (self.step, 0.0, 0.0, dev_acc_snli, strain_acc))
                    logger.Log(
                        "Step: %i\t Dev-matched cost: %f\t Dev-mismatched cost: %f\t Dev-SNLI cost: %f\t SNLI train cost: %f"
                        % (self.step, 0.0, 0.0, dev_cost_snli, strain_cost))

                if self.step % 500 == 0:
                    self.saver.save(self.sess, ckpt_file)
                    best_test = 100 * (1 - self.best_dev_snli / dev_acc_snli)
                    if best_test > 0.04:
                        self.saver.save(self.sess, ckpt_file + "_best")
                        self.best_dev_snli = dev_acc_snli
                        self.best_strain_acc = strain_acc
                        self.best_step = self.step
                        logger.Log(
                            "Checkpointing with new best SNLI-dev accuracy: %f"
                            % (self.best_dev_snli))

                self.step += 1

                # Compute average loss
                avg_cost += c / (total_batch * self.batch_size)

            # Display some statistics about the epoch
            if self.epoch % self.display_epoch_freq == 0:
                logger.Log("Epoch: %i\t Avg. Cost: %f" %
                           (self.epoch + 1, avg_cost))

            self.epoch += 1
            self.last_train_acc[(self.epoch % 5) - 1] = strain_acc

            #break (debugging)
            #if self.epoch >= 3:
            #    break

            # Early stopping
            progress = 1000 * (sum(self.last_train_acc) /
                               (5 * min(self.last_train_acc)) - 1)

            if (progress < 0.1) or (self.step > self.best_step + 30000):
                logger.Log("Best snli-dev accuracy: %s" %
                           str(self.best_dev_snli))
                #logger.Log("MultiNLI Train accuracy: %s" %(self.best_strain_acc))
                self.completed = True
                break

        ## return best dev performance
        return self.best_dev_snli, orig_test_score
Ejemplo n.º 5
0
from ocnli.mnli_code.util.data_processing import *
from ocnli.mnli_code.util.evaluate import *

FIXED_PARAMETERS = params.load_parameters()
modname = FIXED_PARAMETERS["model_name"]
logpath = os.path.join(FIXED_PARAMETERS["log_path"], modname) + ".log"
logger = logger.Logger(logpath)

model = FIXED_PARAMETERS["model_type"]

module = importlib.import_module(".".join(['ocnli.mnli_code.models', model]))
MyModel = getattr(module, 'MyModel')

# Logging parameter settings at each launch of training script
# This will help ensure nothing goes awry in reloading a model and we consistenyl use the same hyperparameter settings.
logger.Log("FIXED_PARAMETERS\n %s" % FIXED_PARAMETERS)

######################### LOAD DATA #############################

logger.Log("Loading data")
training_snli = load_nli_data(FIXED_PARAMETERS["training_snli"],
                              snli=True,
                              partial_input=FIXED_PARAMETERS["partial_input"])
dev_snli = load_nli_data(FIXED_PARAMETERS["dev_snli"],
                         snli=True,
                         partial_input=FIXED_PARAMETERS["partial_input"])

try:
    test_snli = load_nli_data(FIXED_PARAMETERS["test_snli"],
                              snli=True,
                              partial_input=FIXED_PARAMETERS["partial_input"])