Beispiel #1
0
def compute_perplexity(model, sess, name):
    """Compute perplexity of the output of the model.
    Args:
      model: model for compute perplexity.
      sess: tensorflow session to use.
      name: name of the batch.
    Returns:
      The perplexity of the eval outputs.
    """
    total_loss = 0
    total_predict_count = 0
    start_time = time.time()
    step = 0

    while True:
        try:
            loss, predict_count, batch_size = model.eval(sess)
            total_loss += loss * batch_size
            total_predict_count += predict_count
            step += 1
            if step % 500 == 0:
                ls = total_loss / total_predict_count
                ppl = misc.safe_exp(ls)
                log.print_out("    ## After %d steps, loss %.2f - ppl %.3f" %
                              (step, ls, ppl))
        except tf.errors.OutOfRangeError:
            break

    perplexity = misc.safe_exp(total_loss / total_predict_count)
    log.print_time("  eval %s: perplexity %.2f" % (name, perplexity),
                   start_time)
    return perplexity
Beispiel #2
0
def decode_and_evaluate(name,
                        model,
                        sess,
                        out_file,
                        ref_file,
                        metrics,
                        beam_width,
                        num_translations_per_input=1,
                        decode=True):
    """Decode a test set and compute a score according to the evaluation task."""
    # Decode
    if decode:
        log.print_out("  decoding to output %s." % out_file)

        start_time = time.time()
        num_sentences = 0
        with codecs.getwriter("utf-8")(
                tf.gfile.GFile(out_file, mode="wb")) as trans_f:
            trans_f.write("")  # Write empty string to ensure file is created.

            num_translations_per_input = max(
                min(num_translations_per_input, beam_width), 1)

            i = 0
            while True:
                i += 1
                try:

                    if i % 1000 == 0:
                        log.print_out("    decoding step {}, num sentences {}".format(i, num_sentences))

                    ncm_outputs, _ = model.decode(sess)
                    if beam_width == 0:
                        ncm_outputs = np.expand_dims(ncm_outputs, 0)

                    batch_size = ncm_outputs.shape[1]
                    num_sentences += batch_size

                    for sent_id in range(batch_size):
                        translations = [get_translation(ncm_outputs[beam_id], sent_id)
                                        for beam_id in range(num_translations_per_input)]
                        trans_f.write(b"\t".join(translations).decode("utf-8") + "\n")
                except tf.errors.OutOfRangeError:
                    log.print_time(
                        "  done, num sentences %d, num translations per input %d" %
                        (num_sentences, num_translations_per_input), start_time)
                    break

    # Evaluation
    evaluation_scores = {}
    # if ref_file and tf.gfile.Exists(out_file):
    #     for metric in metrics:
    #         score = evaluate(ref_file, out_file, metric)
    #         evaluation_scores[metric] = score
    #         log.print_out("  %s %s: %.1f" % (metric, name, score))

    return evaluation_scores
Beispiel #3
0
    def _decode_and_evaluate(self,
                             model,
                             infer_sess,
                             iterator_feed_dict,
                             num_responses_per_input=1,
                             label="test"):
        start_time = time.time()
        num_sentences = 0

        out_file = os.path.join(self.config.model_dir, "output_%s" % label)

        _beam_width = self.config.beam_width if self._consider_beam() else 0

        num_responses_per_input = max(
            min(num_responses_per_input, _beam_width), 1)

        infer_sess.run(model.iterator.initializer,
                       feed_dict=iterator_feed_dict)

        with codecs.getwriter("utf-8")(tf.gfile.GFile(out_file,
                                                      mode="wb")) as trans_f:
            trans_f.write("")  # Write empty string to ensure file is created.

            while True:
                try:
                    ncm_outputs, _ = model.decode(infer_sess)
                    if self.config.beam_width == 0 or not self._consider_beam(
                    ):
                        ncm_outputs = np.expand_dims(ncm_outputs, 0)

                    batch_size = ncm_outputs.shape[1]
                    num_sentences += batch_size

                    for sent_id in range(batch_size):
                        responses = [
                            ncm_utils.get_translation(ncm_outputs[beam_id],
                                                      sent_id)
                            for beam_id in range(num_responses_per_input)
                        ]
                        trans_f.write(b"\t".join(responses).decode("utf-8") +
                                      "\n")
                except tf.errors.OutOfRangeError:
                    break

        log.print_time(
            "  Done, num sentences %d, num translations per input %d" %
            (num_sentences, num_responses_per_input), start_time)
Beispiel #4
0
    def train(self, target_session="", scope=None):
        assert self.config.num_turns >= 2
        if self.config.is_pretrain_enabled():
            assert self.config.num_pretrain_turns >= 2
            assert self.config.num_turns >= self.config.num_pretrain_turns

        out_dir = self.config.model_dir

        steps_per_stats = self.config.steps_per_stats
        steps_per_eval = 20 * steps_per_stats

        _helper = self._get_model_helper()

        self._pre_model_creation()

        train_model = _helper.create_train_model(self.config, scope)
        eval_model = _helper.create_eval_model(self.config, scope)
        infer_model = _helper.create_infer_model(self.config, scope)

        self._post_model_creation(train_model, eval_model, infer_model)

        # Preload data for sample decoding.
        dev_file = self.config.dev_data
        eval_data = self._load_data(dev_file, include_target=True)

        summary_name = "train_log"

        # Log and output files
        log_file = os.path.join(out_dir, "log_%d" % time.time())
        log_f = tf.gfile.GFile(log_file, mode="a")
        log.print_out("# log_file=%s" % log_file, log_f)

        self.config.save()
        log.print_out("# Configs saved")

        avg_step_time = 0.0

        # TensorFlow model
        config_proto = model_helper.get_config_proto(self.config.log_device)

        train_sess = tf.Session(
            target=target_session, config=config_proto, graph=train_model.graph)
        eval_sess = tf.Session(
            target=target_session, config=config_proto, graph=eval_model.graph)
        infer_sess = tf.Session(
            target=target_session, config=config_proto, graph=infer_model.graph)

        # Pretraining
        num_pretrain_steps = 0
        if self.config.is_pretrain_enabled():
            num_pretrain_steps = self.config.num_pretrain_steps

            pretrain_model = _helper.create_pretrain_model(self.config, scope)

            with tf.Session(
                    target=target_session, config=config_proto, graph=pretrain_model.graph) as pretrain_sess:
                self.pretrain(pretrain_sess, pretrain_model, log_f)

        with train_model.graph.as_default():
            loaded_train_model, global_step = model_helper.create_or_load_model(
                train_model.model, self.config.model_dir, train_sess, "train")

        # Summary writer
        summary_writer = tf.summary.FileWriter(
            os.path.join(out_dir, summary_name), train_model.graph)

        last_stats_step = global_step
        last_eval_step = global_step
        patience = self.config.patience

        stats = self.init_stats()
        speed, train_ppl = 0.0, 0.0
        start_train_time = time.time()

        log.print_out(
            "# Start step %d, epoch %d, lr %g, %s" %
            (global_step, self.config.epoch, loaded_train_model.learning_rate.eval(session=train_sess),
             time.ctime()),
            log_f)

        # Initialize all of the iterators
        skip_count = self.config.batch_size * self.config.epoch_step
        log.print_out("# Init train iterator for %d steps, skipping %d elements" %
                      (self.config.num_train_steps, skip_count))

        train_sess.run(
            train_model.iterator.initializer,
            feed_dict={train_model.skip_count_placeholder: skip_count})

        while self.config.epoch < self.config.num_train_epochs and patience > 0:

            ### Run a step ###
            start_time = time.time()
            try:
                step_result = loaded_train_model.train(train_sess)
                self.config.epoch_step += 1
            except tf.errors.OutOfRangeError:
                # Finished going through the training dataset.  Go to next epoch.
                sw = Stopwatch()
                self.run_sample_decode(infer_model, infer_sess,
                                       self.config.model_dir, summary_writer, eval_data)
                # if self.config.enable_epoch_evals:
                #     dev_ppl, test_ppl = self.run_full_eval(infer_model, eval_model,
                #                                            infer_sess, eval_sess,
                #                                            out_dir,
                #                                            fs.file_name(self.config.test_data) + '_' + global_step,
                #                                            summary_writer)
                #     log.print_out(
                #         "%% done epoch %d #%d  step %d - dev_ppl: %.2f test_ppl: %.2f @ eval time: %ds" %
                #         (self.config.epoch, self.config.epoch_step, global_step, dev_ppl, test_ppl, sw.elapsed()))
                # else:
                log.print_out(
                    "## Done epoch %d in %d steps. step %d @ eval time: %ds" %
                    (self.config.epoch, self.config.epoch_step, global_step, sw.elapsed()))

                self.config.epoch += 1
                self.config.epoch_step = 0
                self.config.save()

                train_sess.run(
                    train_model.iterator.initializer,
                    feed_dict={train_model.skip_count_placeholder: 0})
                continue

            # Write step summary and accumulate statistics
            global_step = self.update_stats(stats, summary_writer, start_time, step_result)

            # Once in a while, we print statistics.
            if global_step - last_stats_step >= steps_per_stats:
                last_stats_step = global_step
                train_ppl, speed, is_overflow = self.check_stats(stats, global_step, steps_per_stats, log_f)
                if is_overflow:
                    break

                # Reset statistics
                stats = self.init_stats()

            if global_step - last_eval_step >= steps_per_eval:
                last_eval_step = global_step

                log.print_out("# Save eval, global step %d" % global_step)
                log.add_summary(summary_writer, global_step, "train_ppl", train_ppl)

                # Save checkpoint
                loaded_train_model.saver.save(train_sess,
                                              self.config.checkpoint_file,
                                              global_step=global_step)

                # Evaluate on dev
                self.run_sample_decode(infer_model, infer_sess, out_dir, summary_writer, eval_data)
                dev_ppl, _ = self.run_internal_eval(eval_model, eval_sess, out_dir, summary_writer,
                                                    use_test_set=False)
                if dev_ppl < self.config.best_dev_ppl:
                    self.config.best_dev_ppl = dev_ppl
                    patience = self.config.patience
                    log.print_out('    ** Best model thus far, ep {}|{} dev_ppl {:.3f}'.format(
                        self.config.epoch,
                        self.config.epoch_step,
                        dev_ppl))
                elif dev_ppl > self.config.degrade_threshold * self.config.best_dev_ppl:
                    patience -= 1
                    log.print_out(
                        '    worsened, ep {}|{} patience {} best_dev_ppl {:.3f}'.format(
                            self.config.epoch,
                            self.config.epoch_step,
                            patience,
                            self.config.best_dev_ppl))

                # Save config parameters
                self.config.save()

        # Done training
        loaded_train_model.saver.save(
            train_sess,
            self.config.checkpoint_file,
            global_step=global_step)

        if self.config.enable_final_eval:
            dev_ppl, test_ppl = self.run_full_eval(infer_model, eval_model,
                                                   infer_sess, eval_sess,
                                                   out_dir,
                                                   fs.file_name(self.config.test_data) + '_final',
                                                   summary_writer)

            log.print_out(
                "# Final, step %d ep %d/%d lr %g "
                "step-time %.2f wps %.2fK train_ppl %.2f, dev_ppl %.2f, test_ppl %.2f, %s" %
                (global_step, self.config.epoch, self.config.epoch_step,
                 loaded_train_model.learning_rate.eval(session=train_sess),
                 avg_step_time, speed, train_ppl, dev_ppl, test_ppl, time.ctime()),
                log_f)
        else:
            log.print_out(
                "# Final, step %d ep %d/%d lr %g "
                "step-time %.2f wps %.2fK train_ppl %.2f best_dev_ppl %.2f, %s" %
                (global_step, self.config.epoch, self.config.epoch_step,
                 loaded_train_model.learning_rate.eval(session=train_sess),
                 avg_step_time, speed, train_ppl, self.config.best_dev_ppl, time.ctime()),
                log_f)

        log.print_time("# Done training!", start_train_time)

        summary_writer.close()

        eval_sess.close()
        infer_sess.close()
        train_sess.close()
Beispiel #5
0
    def test(self):
        start_test_time = time.time()

        assert self.config.n_responses >= 1

        if self.config.beam_width > 0:
            assert self.config.n_responses <= self.config.beam_width
        else:
            assert self.config.n_responses == 1

        self._pre_model_creation()

        infer_model = self._get_model_helper().create_infer_model(self.config)

        latest_ckpt = tf.train.latest_checkpoint(self.config.get_infer_model_dir())
        with tf.Session(
                config=model_helper.get_config_proto(self.config.log_device),
                graph=infer_model.graph) as infer_sess:
            loaded_infer_model = model_helper.load_model(
                infer_model.model, latest_ckpt, infer_sess, "infer")

            log.print_out("# Start decoding")
            log.print_out("  beam width: {}".format(self.config.beam_width))
            log.print_out("  length penalty: {}".format(self.config.length_penalty_weight))
            log.print_out("  sampling temperature: {}".format(self.config.sampling_temperature))
            log.print_out("  num responses per test instance: {}".format(self.config.n_responses))

            feed_dict = {
                infer_model.src_placeholder: self._load_data(self.config.test_data),
                infer_model.batch_size_placeholder: self.config.infer_batch_size,
            }

            if self.config.sampling_temperature > 0:
                label = "%s_t%.1f" % (
                    fs.file_name(self.config.test_data), self.config.sampling_temperature)
            else:
                label = "%s_bw%d_lp%.1f" % (
                    fs.file_name(self.config.test_data), self.config.beam_width, self.config.length_penalty_weight)

            self._decode_and_evaluate(loaded_infer_model, infer_sess, feed_dict,
                                      label=label,
                                      num_responses_per_input=self.config.n_responses)
        log.print_time("# Decoding done", start_test_time)

        eval_model = self._get_model_helper().create_eval_model(self.config)
        with tf.Session(
                config=model_helper.get_config_proto(self.config.log_device),
                graph=eval_model.graph) as eval_sess:
            loaded_eval_model = model_helper.load_model(
                eval_model.model, latest_ckpt, eval_sess, "eval")

            log.print_out("# Compute Perplexity")

            dev_eval_iterator_feed_dict = {
                eval_model.eval_file_placeholder: self.config.test_data
            }

            eval_sess.run(eval_model.iterator.initializer, feed_dict=dev_eval_iterator_feed_dict)
            model_helper.compute_perplexity(loaded_eval_model, eval_sess, "test")

        log.print_time("# Test finished", start_test_time)
Beispiel #6
0
    def test(self):
        start_test_time = time.time()

        assert self.config.n_responses >= 1

        if self.config.beam_width > 0:
            assert self.config.n_responses <= self.config.beam_width
        else:
            assert self.config.n_responses == 1

        self._pre_model_creation()

        infer_model = taware_helper.create_infer_model(taware_model.TopicAwareSeq2SeqModel, self.config)

        config_proto = models.model_helper.get_config_proto(self.config.log_device)

        ckpt = tf.train.latest_checkpoint(self.config.get_infer_model_dir())
        with tf.Session(graph=infer_model.graph, config=config_proto) as infer_sess:
            loaded_infer_model = model_helper.load_model(
                infer_model.model, ckpt, infer_sess, "infer")

            log.print_out("# Start decoding")
            log.print_out("  beam width: {}".format(self.config.beam_width))
            log.print_out("  length penalty: {}".format(self.config.length_penalty_weight))
            log.print_out("  sampling temperature: {}".format(self.config.sampling_temperature))
            log.print_out("  num responses per test instance: {}".format(self.config.n_responses))

            feed_dict = {
                infer_model.src_placeholder: self._load_data(self.config.test_data),
                infer_model.batch_size_placeholder: self.config.infer_batch_size,
            }

            infer_sess.run(infer_model.iterator.initializer, feed_dict=feed_dict)

            if self.config.sampling_temperature > 0:
                label = "%s_t%.1f" % (
                    fs.file_name(self.config.test_data), self.config.sampling_temperature)
            else:
                label = "%s_bw%d_lp%.1f" % (
                    fs.file_name(self.config.test_data), self.config.beam_width, self.config.length_penalty_weight)

            out_file = os.path.join(self.config.model_dir, "output_{}".format(label))

            eval_metric.decode_and_evaluate(
                "test",
                loaded_infer_model,
                infer_sess,
                out_file,
                ref_file=None,
                metrics=self.config.metrics,
                beam_width=self.config.beam_width,
                num_translations_per_input=self.config.n_responses)
        log.print_time("# Decoding done", start_test_time)

        eval_model = taware_helper.create_eval_model(taware_model.TopicAwareSeq2SeqModel, self.config)
        with tf.Session(
                config=models.model_helper.get_config_proto(self.config.log_device), graph=eval_model.graph) as eval_sess:
            loaded_eval_model = model_helper.load_model(
                eval_model.model, ckpt, eval_sess, "eval")

            log.print_out("# Compute Perplexity")

            feed_dict = {
                eval_model.eval_file_placeholder: self.config.test_data
            }

            eval_sess.run(eval_model.iterator.initializer, feed_dict=feed_dict)

            model_helper.compute_perplexity(loaded_eval_model, eval_sess, "test")

        log.print_time("# Test finished", start_test_time)
Beispiel #7
0
    def train(self, target_session="", scope=None):
        out_dir = self.config.model_dir
        model_dir = out_dir

        num_train_steps = self.config.num_train_steps
        steps_per_stats = self.config.steps_per_stats
        # steps_per_external_eval = self.config.steps_per_external_eval
        steps_per_eval = 20 * steps_per_stats
        # if not steps_per_external_eval:
        #     steps_per_external_eval = 5 * steps_per_eval

        self._pre_model_creation()

        train_model = taware_helper.create_train_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope)
        eval_model = taware_helper.create_eval_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope)
        infer_model = taware_helper.create_infer_model(taware_model.TopicAwareSeq2SeqModel, self.config, scope)

        # Preload data for sample decoding.
        dev_file = self.config.dev_data
        eval_data = self._load_data(dev_file, include_target=True)

        summary_name = "train_log"

        # Log and output files
        log_file = os.path.join(out_dir, "log_%d" % time.time())
        log_f = tf.gfile.GFile(log_file, mode="a")
        log.print_out("# log_file=%s" % log_file, log_f)

        avg_step_time = 0.0

        # TensorFlow model
        config_proto = models.model_helper.get_config_proto(self.config.log_device)

        train_sess = tf.Session(
            target=target_session, config=config_proto, graph=train_model.graph)
        eval_sess = tf.Session(
            target=target_session, config=config_proto, graph=eval_model.graph)
        infer_sess = tf.Session(
            target=target_session, config=config_proto, graph=infer_model.graph)

        with train_model.graph.as_default():
            loaded_train_model, global_step = model_helper.create_or_load_model(
                train_model.model, model_dir, train_sess, "train")

        # Summary writer
        summary_writer = tf.summary.FileWriter(
            os.path.join(out_dir, summary_name), train_model.graph)

        # First evaluation
        # self.run_full_eval(
        #    model_dir, infer_model, infer_sess,
        #    eval_model, eval_sess, summary_writer, eval_data)

        last_stats_step = global_step
        last_eval_step = global_step
        # last_external_eval_step = global_step
        patience = self.config.patience

        # This is the training loop.
        stats = self.init_stats()
        speed, train_ppl = 0.0, 0.0
        start_train_time = time.time()

        log.print_out(
            "# Start step %d, epoch %d, lr %g, %s" %
            (global_step, self.config.epoch, loaded_train_model.learning_rate.eval(session=train_sess),
             time.ctime()),
            log_f)

        self.config.save()
        log.print_out("# Configs saved")

        # Initialize all of the iterators
        skip_count = self.config.batch_size * self.config.epoch_step
        log.print_out("# Init train iterator for %d steps, skipping %d elements" %
                      (self.config.num_train_steps, skip_count))

        train_sess.run(
            train_model.iterator.initializer,
            feed_dict={train_model.skip_count_placeholder: skip_count})

        while self.config.epoch < self.config.num_train_epochs and patience > 0:
            ### Run a step ###
            start_time = time.time()
            try:
                step_result = loaded_train_model.train(train_sess)
                self.config.epoch_step += 1
            except tf.errors.OutOfRangeError:
                # Finished going through the training dataset.  Go to next epoch.
                sw = Stopwatch()
                log.print_out(
                    "# Finished an epoch, step %d. Perform external evaluation" %
                    global_step)
                self.run_sample_decode(infer_model, infer_sess,
                                       model_dir, summary_writer, eval_data)

                log.print_out(
                    "## Done epoch %d in %d steps. step %d @ eval time: %ds" %
                    (self.config.epoch, self.config.epoch_step, global_step, sw.elapsed()))

                self.config.epoch += 1
                self.config.epoch_step = 0
                self.config.save()

                train_sess.run(
                    train_model.iterator.initializer,
                    feed_dict={train_model.skip_count_placeholder: 0})
                continue

            # Write step summary and accumulate statistics
            global_step = self.update_stats(stats, summary_writer, start_time, step_result)

            # Once in a while, we print statistics.
            if global_step - last_stats_step >= steps_per_stats:
                last_stats_step = global_step
                train_ppl, speed, is_overflow = self.check_stats(stats, global_step, steps_per_stats, log_f)
                if is_overflow:
                    break

                # Reset statistics
                stats = self.init_stats()

            if global_step - last_eval_step >= steps_per_eval:
                last_eval_step = global_step

                log.print_out("# Save eval, global step %d" % global_step)
                log.add_summary(summary_writer, global_step, "train_ppl", train_ppl)

                # Save checkpoint
                loaded_train_model.saver.save(
                    train_sess,
                    self.config.checkpoint_file,
                    global_step=global_step)

                # Evaluate on dev
                self.run_sample_decode(infer_model, infer_sess, model_dir, summary_writer, eval_data)
                dev_ppl, _ = self.run_internal_eval(eval_model, eval_sess, model_dir, summary_writer, use_test_set=False)

                if dev_ppl < self.config.best_dev_ppl:
                    self.config.best_dev_ppl = dev_ppl
                    patience = self.config.patience
                    log.print_out('    ** Best model thus far, ep {}|{} dev_ppl {:.3f}'.format(
                        self.config.epoch,
                        self.config.epoch_step,
                        dev_ppl))
                elif dev_ppl > self.config.degrade_threshold * self.config.best_dev_ppl:
                    patience -= 1
                    log.print_out(
                        '    worsened, ep {}|{} patience {} best_dev_ppl {:.3f}'.format(
                            self.config.epoch,
                            self.config.epoch_step,
                            self.config.patience,
                            self.config.best_dev_ppl))

                # Save config parameters
                self.config.save()

            # if global_step - last_external_eval_step >= steps_per_external_eval:
            #     last_external_eval_step = global_step
            #
            #     # Save checkpoint
            #     loaded_train_model.saver.save(
            #         train_sess,
            #         self.config.checkpoint_file,
            #         global_step=global_step)
            #     self.run_sample_decode(infer_model, infer_sess,
            #                            model_dir, summary_writer, eval_data)
                # dev_scores, test_scores, _ = self.run_external_eval(infer_model, infer_sess, model_dir, summary_writer)

        # Done training
        loaded_train_model.saver.save(
            train_sess,
            self.config.checkpoint_file,
            global_step=global_step)

        # result_summary, _, dev_scores, test_scores, dev_ppl, test_ppl = self.run_full_eval(
        #     model_dir, infer_model, infer_sess,
        #     eval_model, eval_sess,
        #     summary_writer, eval_data)
        dev_scores, test_scores, dev_ppl, test_ppl = None, None, None, None
        result_summary = ""

        log.print_out(
            "# Final, step %d lr %g "
            "step-time %.2f wps %.2fK ppl %.2f, %s, %s" %
            (global_step, loaded_train_model.learning_rate.eval(session=train_sess),
             avg_step_time, speed, train_ppl, result_summary, time.ctime()),
            log_f)
        log.print_time("# Done training!", start_train_time)

        summary_writer.close()

        # log.print_out("# Start evaluating saved best models.")
        # for metric in self.config.metrics:
        #     best_model_dir = getattr(self.config, "best_" + metric + "_dir")
        #     summary_writer = tf.summary.FileWriter(
        #         os.path.join(best_model_dir, summary_name), infer_model.graph)
        #     result_summary, best_global_step, _, _, _, _ = self.run_full_eval(
        #         best_model_dir, infer_model, infer_sess, eval_model, eval_sess,
        #         summary_writer, eval_data)
        #     log.print_out("# Best %s, step %d "
        #                   "step-time %.2f wps %.2fK, %s, %s" %
        #                   (metric, best_global_step, avg_step_time, speed,
        #                    result_summary, time.ctime()), log_f)
        #     summary_writer.close()

        return (dev_scores, test_scores, dev_ppl, test_ppl, global_step)