Exemple #1
0
    def _sample_decode(self, model, global_step, sess,
                       iterator_src_placeholder,
                       iterator_batch_size_placeholder, eval_data,
                       summary_writer):
        """Pick a sentence and decode."""
        decode_id = random.randint(0, len(eval_data) - 1)
        log.print_out("  # {}".format(decode_id))

        iterator_feed_dict = {
            iterator_src_placeholder: [eval_data[decode_id]],
            iterator_batch_size_placeholder: 1,
        }
        sess.run(model.iterator.initializer, feed_dict=iterator_feed_dict)

        ncm_outputs, attention_summary = model.decode(sess)

        if self.config.beam_width > 0:
            # get the top translation.
            ncm_outputs = ncm_outputs[0]

        translation = ncm_utils.get_translation(ncm_outputs, sent_id=0)
        log.print_out("    sources:")

        utterances = eval_data[decode_id].split("\t")
        sources, target = utterances[:-1], utterances[-1]

        for t, src in enumerate(sources):
            log.print_out("      @{} {}".format(t + 1, src))
        log.print_out("    resp: {}".format(target))
        log.print_out(b"    generated: " + translation)

        # Summary
        if attention_summary is not None:
            summary_writer.add_summary(attention_summary, global_step)
def decode_and_evaluate(name,
                        model,
                        sess,
                        out_file,
                        ref_file,
                        metrics,
                        beam_width,
                        num_translations_per_input=1,
                        decode=True):
    """Decode a test set and compute a score according to the evaluation task."""
    # Decode
    if decode:
        log.print_out("  decoding to output '{}'".format(out_file))

        start_time = time.time()
        num_sentences = 0
        with codecs.getwriter("utf-8")(
                tf.gfile.GFile(out_file, mode="wb")) as trans_f:
            trans_f.write("")  # Write empty string to ensure file is created.

            num_translations_per_input = max(
                min(num_translations_per_input, beam_width), 1)

            i = 0
            while True:
                i += 1
                try:

                    if i % 1000 == 0:
                        log.print_out("    decoding step {}, num sentences {}".format(i, num_sentences))

                    ncm_outputs, _ = model.decode(sess)
                    if beam_width == 0:
                        ncm_outputs = np.expand_dims(ncm_outputs, 0)

                    batch_size = ncm_outputs.shape[1]
                    num_sentences += batch_size

                    for sent_id in range(batch_size):
                        translations = [get_translation(ncm_outputs[beam_id], sent_id)
                                        for beam_id in range(num_translations_per_input)]
                        trans_f.write(b"\t".join(translations).decode("utf-8") + "\n")
                except tf.errors.OutOfRangeError:
                    log.print_time(
                        "  Done, num sentences {}, num translations per input {}".format(
                            num_sentences, num_translations_per_input), start_time)
                    break

    # Evaluation
    evaluation_scores = {}
    # if ref_file and tf.gfile.Exists(out_file):
    #     for metric in metrics:
    #         score = evaluate(ref_file, out_file, metric)
    #         evaluation_scores[metric] = score
    #         log.print_out("  %s %s: %.1f" % (metric, name, score))

    return evaluation_scores
Exemple #3
0
    def test(self):
        start_test_time = time.time()

        assert self.config.n_responses >= 1

        if self.config.beam_width > 0:
            assert self.config.n_responses <= self.config.beam_width
        else:
            assert self.config.n_responses == 1

        self._pre_model_creation()

        infer_model = vanilla_helper.create_infer_model(self.config)

        config_proto = model_helper.get_config_proto(self.config.log_device)

        ckpt = tf.train.latest_checkpoint(self.config.get_infer_model_dir())
        with tf.Session(graph=infer_model.graph,
                        config=config_proto) as infer_sess:
            loaded_infer_model = model_helper.load_model(
                infer_model.model, ckpt, infer_sess, "infer")

            log.print_out("# Start decoding")
            log.print_out("  beam width: {}".format(self.config.beam_width))
            log.print_out("  length penalty: {}".format(
                self.config.length_penalty_weight))
            log.print_out("  sampling temperature: {}".format(
                self.config.sampling_temperature))
            log.print_out("  num responses per tests instance: {}".format(
                self.config.n_responses))

            feed_dict = {
                infer_model.src_placeholder:
                self._load_data(self.config.test_data),
                infer_model.batch_size_placeholder:
                self.config.infer_batch_size,
            }

            infer_sess.run(infer_model.iterator.initializer,
                           feed_dict=feed_dict)

            if self.config.sampling_temperature > 0:
                label = "%s_t%.1f" % (fs.file_name(
                    self.config.test_data), self.config.sampling_temperature)
            else:
                label = "%s_bw%d_lp%.1f" % (fs.file_name(
                    self.config.test_data), self.config.beam_width,
                                            self.config.length_penalty_weight)

            self._decode_and_evaluate(
                loaded_infer_model,
                infer_sess,
                feed_dict,
                label=label,
                num_responses_per_input=self.config.n_responses)
        log.print_time("# Decoding done", start_test_time)

        eval_model = vanilla_helper.create_eval_model(self.config)
        with tf.Session(config=model_helper.get_config_proto(
                self.config.log_device),
                        graph=eval_model.graph) as eval_sess:
            loaded_eval_model = model_helper.load_model(
                eval_model.model, ckpt, eval_sess, "eval")

            log.print_out("# Compute Perplexity")

            feed_dict = {
                eval_model.eval_file_placeholder: self.config.test_data
            }

            eval_sess.run(eval_model.iterator.initializer, feed_dict=feed_dict)

            model_helper.compute_perplexity(loaded_eval_model, eval_sess,
                                            "test")

        log.print_time("# Test finished", start_test_time)
Exemple #4
0
    def train(self, target_session="", scope=None):
        out_dir = self.config.model_dir
        model_dir = out_dir

        steps_per_stats = self.config.steps_per_stats
        steps_per_eval = self.config.steps_per_eval

        self._pre_model_creation()

        train_model = vanilla_helper.create_train_model(self.config, scope)
        eval_model = vanilla_helper.create_eval_model(self.config, scope)
        infer_model = vanilla_helper.create_infer_model(self.config, scope)

        # Preload data for sample decoding.
        eval_data = self._load_data(self.config.dev_data)
        self.config.dev_size = math.ceil(
            len(eval_data) / self.config.batch_size)

        summary_name = "train_log"

        # Log and output files
        log_file = os.path.join(out_dir, "log_%d" % time.time())
        log_f = tf.gfile.GFile(log_file, mode="a")
        log.print_out("# log_file=%s" % log_file, log_f)

        avg_step_time = 0.0

        # TensorFlow model
        config_proto = model_helper.get_config_proto(self.config.log_device)

        train_sess = tf.Session(target=target_session,
                                config=config_proto,
                                graph=train_model.graph)
        eval_sess = tf.Session(target=target_session,
                               config=config_proto,
                               graph=eval_model.graph)
        infer_sess = tf.Session(target=target_session,
                                config=config_proto,
                                graph=infer_model.graph)

        with train_model.graph.as_default():
            loaded_train_model, global_step = model_helper.create_or_load_model(
                train_model.model, model_dir, train_sess, "train")

        # Summary writer
        summary_writer = tf.summary.FileWriter(
            os.path.join(out_dir, summary_name), train_model.graph)

        # First evaluation
        # self.run_full_eval(
        #    model_dir, infer_model, infer_sess,
        #    eval_model, eval_sess, summary_writer, eval_data)

        last_stats_step = global_step
        last_eval_step = global_step
        # last_external_eval_step = global_step
        patience = self.config.patience

        # This is the training loop.
        stats = self.init_stats()
        speed, train_ppl = 0.0, 0.0
        start_train_time = time.time()

        self.config.save()

        # Initialize all of the iterators
        skip_count = self.config.batch_size * self.config.epoch_step
        lr = loaded_train_model.learning_rate.eval(session=train_sess)
        log.print_out(
            "# Starting step {}/{} (skipping {} elements), epoch {}/{}, lr {:f}, {}"
            .format(global_step, self.config.num_train_steps, skip_count,
                    self.config.epoch, self.config.num_train_epochs, lr,
                    time.ctime()), log_f)

        train_sess.run(
            train_model.iterator.initializer,
            feed_dict={train_model.skip_count_placeholder: skip_count})

        pbar = trange(self.config.num_train_steps, initial=global_step)
        pbar.set_postfix(lr=lr,
                         wps='0K',
                         ppl='inf',
                         gN='inf',
                         best_dev_ppl=self.config.best_dev_ppl)
        pbar.set_description("Ep {}/{}".format(self.config.epoch,
                                               self.config.num_train_epochs))

        while self.config.epoch < self.config.num_train_epochs and patience > 0:
            ### Run a step ###
            start_time = time.time()
            try:
                step_result = loaded_train_model.train(train_sess)
                self.config.epoch_step += 1
            except tf.errors.OutOfRangeError:
                # Finished going through the training dataset.  Go to next epoch.
                sw = Stopwatch()
                self.run_sample_decode(infer_model, infer_sess, model_dir,
                                       summary_writer, eval_data)

                log.print_out(
                    "## Done epoch {} in {} steps. step {} @ eval time: {}s".
                    format(self.config.epoch, self.config.epoch_step,
                           global_step, sw.elapsed()))

                self.config.epoch += 1
                self.config.epoch_step = 0
                self.config.save()
                pbar.set_description("Ep {}/{}".format(
                    self.config.epoch, self.config.num_train_epochs))

                # dev_scores, test_scores, _ = self.run_external_eval(infer_model, infer_sess, model_dir, summary_writer)
                train_sess.run(
                    train_model.iterator.initializer,
                    feed_dict={train_model.skip_count_placeholder: 0})
                continue

            # Write step summary and accumulate statistics
            global_step = self.update_stats(stats, summary_writer, start_time,
                                            step_result)

            # Once in a while, we print statistics.
            if global_step - last_stats_step >= steps_per_stats:
                train_ppl, speed, is_overflow = self.check_stats(
                    stats, global_step, steps_per_stats, log_f, pbar)
                pbar.update(global_step - last_stats_step)
                last_stats_step = global_step

                if is_overflow:
                    break

                # Reset statistics
                stats = self.init_stats()

            if global_step - last_eval_step >= steps_per_eval:
                last_eval_step = global_step

                log.print_out(
                    "# Save eval, global step {}".format(global_step))
                log.add_summary(summary_writer, global_step, "train_ppl",
                                train_ppl)

                # Save checkpoint
                loaded_train_model.saver.save(train_sess,
                                              os.path.join(
                                                  out_dir, "vanilla.ckpt"),
                                              global_step=global_step)

                # Evaluate on dev
                self.run_sample_decode(infer_model, infer_sess, model_dir,
                                       summary_writer, eval_data)
                dev_ppl = self.run_internal_eval(eval_model, eval_sess,
                                                 model_dir, summary_writer)

                if dev_ppl < self.config.best_dev_ppl:
                    self.config.best_dev_ppl = dev_ppl
                    patience = self.config.patience
                    log.print_out(
                        '    **** Best model so far @Ep {} @step {} (global {}) dev_ppl {:.3f}'
                        .format(self.config.epoch, self.config.epoch_step,
                                global_step, dev_ppl))
                elif dev_ppl > self.config.degrade_threshold * self.config.best_dev_ppl:
                    patience -= 1
                    log.print_out(
                        '    PPL got worse @Ep {} @step {} (global {}) patience {} '
                        'dev_ppl {:.3f} best_dev_ppl {:.3f}'.format(
                            self.config.epoch, self.config.epoch_step,
                            global_step, patience, dev_ppl,
                            self.config.best_dev_ppl))

                # Save config parameters
                self.config.save()

        pbar.close()
        # Done training
        loaded_train_model.saver.save(train_sess,
                                      os.path.join(out_dir, "vanilla.ckpt"),
                                      global_step=global_step)

        dev_scores, test_scores, dev_ppl, test_ppl = None, None, None, None

        log.print_out(
            "# Final, step {} ep {}/{} lr {:f} "
            "step-time {:.2f} wps {:.2f}K train_ppl {:.3f} best_dev_ppl {:.3f}, {}"
            .format(global_step, self.config.epoch, self.config.epoch_step,
                    loaded_train_model.learning_rate.eval(session=train_sess),
                    avg_step_time, speed, train_ppl, self.config.best_dev_ppl,
                    time.ctime()), log_f)
        log.print_time("# Done training!", start_train_time)

        if self.config.eval_best_model:
            log.print_out("Evaluating the best model begins...")
            test_ppl = self.run_infer_best_model(
                infer_model, eval_model, infer_sess, eval_sess,
                self.config.best_dev_ppl_dir,
                fs.file_name(self.config.test_data) + '_final', summary_writer)

            log.print_out(
                "# test_ppl {:.3f} w. beam_width: {} | length_penalty: {:.1f} | sampling_temperature: {:.1f}"
                .format(test_ppl, self.config.beam_width,
                        self.config.length_penalty_weight,
                        self.config.sampling_temperature), log_f)

        summary_writer.close()

        return (dev_scores, test_scores, dev_ppl, test_ppl, global_step)
    def __init__(self,
                 mode,
                 num_turns,
                 iterator,
                 params,
                 rev_vocab_table=None,
                 scope=None,
                 log_trainables=True):

        log.print_out("# creating %s graph ..." % mode)
        self.dtype = tf.float32

        self.mode = mode
        self.num_turns = num_turns - 1

        self.device_manager = DeviceManager()
        self.round_robin = RoundRobin(self.device_manager)
        self.num_gpus = min(params.num_gpus,
                            self.device_manager.num_available_gpus())
        log.print_out("# number of gpus %d" % self.num_gpus)

        self.iterator = iterator

        with tf.variable_scope(scope or 'thred_graph', dtype=self.dtype):
            self.init_embeddings(params.vocab_file,
                                 params.vocab_pkl,
                                 scope=scope)

            encoder_keep_prob, decoder_keep_prob = self.get_keep_probs(
                mode, params)
            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                context_keep_prob = 1.0 - params.context_dropout_rate
            else:
                context_keep_prob = 1.0

            with tf.variable_scope(scope or "build_network"):
                with tf.variable_scope(
                        "decoder/output_projection") as output_scope:
                    if params.boost_topic_gen_prob:
                        self.output_layer = taware_layer.JointDenseLayer(
                            params.vocab_size,
                            params.topic_vocab_size,
                            scope=output_scope,
                            name="output_projection")
                    else:
                        self.output_layer = layers_core.Dense(
                            params.vocab_size,
                            use_bias=False,
                            name="output_projection")

            self.batch_size = tf.size(self.iterator.source_sequence_lengths[0])

            devices = self.round_robin.assign(2, base=self.num_gpus - 1)
            encoder_results, encoder_state = self.__build_encoder(
                params, encoder_keep_prob)
            context_outputs, context_state = self.__build_context(
                params, encoder_results, encoder_state, context_keep_prob,
                devices[0])

            self.global_step = tf.Variable(0, trainable=False)
            self.use_scheduled_sampling = False
            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                self.sampling_probability = tf.constant(
                    params.scheduled_sampling_prob)
                self.sampling_probability = self._get_sampling_probability(
                    params, self.global_step, self.sampling_probability)
                self.use_scheduled_sampling = params.scheduled_sampling_prob > 0
            elif mode == tf.contrib.learn.ModeKeys.EVAL:
                self.sampling_probability = tf.constant(0.0)

            logits, sample_ids, _ = self.__build_decoder(
                params, context_outputs, context_state, decoder_keep_prob,
                devices[1])

            if mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(self.device_manager.tail_gpu()):
                    loss = self.__compute_loss(logits)
            else:
                loss, losses = None, None

            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                self.train_loss = loss
                self.word_count = sum(
                    [tf.reduce_sum(self.iterator.source_sequence_lengths[t]) for t in range(self.num_turns)]) + \
                                  tf.reduce_sum(self.iterator.target_sequence_length)
            elif mode == tf.contrib.learn.ModeKeys.EVAL:
                self.eval_loss = loss
            elif mode == tf.contrib.learn.ModeKeys.INFER:
                self.sample_words = rev_vocab_table.lookup(
                    tf.to_int64(sample_ids))

            if mode != tf.contrib.learn.ModeKeys.INFER:
                ## Count the number of predicted words for compute ppl.
                self.predict_count = tf.reduce_sum(
                    self.iterator.target_sequence_length)

            trainables = tf.trainable_variables()

            if mode == tf.contrib.learn.ModeKeys.TRAIN:
                self.learning_rate = tf.constant(params.learning_rate)
                # decay
                self.learning_rate = self._get_learning_rate_decay(
                    params, self.global_step, self.learning_rate)

                # Optimizer
                if params.optimizer.lower() == "sgd":
                    opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                    tf.summary.scalar("lr", self.learning_rate)
                elif params.optimizer.lower() == "adam":
                    opt = tf.train.AdamOptimizer(self.learning_rate)
                    tf.summary.scalar("lr", self.learning_rate)
                else:
                    raise ValueError('Unknown optimizer: ' + params.optimizer)

                # Gradients
                gradients = tf.gradients(self.train_loss,
                                         trainables,
                                         colocate_gradients_with_ops=True)

                clipped_grads, grad_norm = tf.clip_by_global_norm(
                    gradients, params.max_gradient_norm)
                grad_norm_summary = [tf.summary.scalar("grad_norm", grad_norm)]
                grad_norm_summary.append(
                    tf.summary.scalar("clipped_gradient",
                                      tf.global_norm(clipped_grads)))

                self.grad_norm = grad_norm

                self.update = opt.apply_gradients(zip(clipped_grads,
                                                      trainables),
                                                  global_step=self.global_step)

                # Summary
                self.train_summary = tf.summary.merge([
                    tf.summary.scalar("lr", self.learning_rate),
                    tf.summary.scalar("train_loss", self.train_loss),
                ] + grad_norm_summary)

            if mode == tf.contrib.learn.ModeKeys.INFER:
                self.infer_logits, self.sample_id = logits, sample_ids
                self.infer_summary = tf.no_op()

            # Saver
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=2)

            # Print trainable variables
            if log_trainables:
                log.print_out("# Trainable variables")
                for trainable in trainables:
                    log.print_out("  %s, %s, %s" %
                                  (trainable.name, str(trainable.get_shape()),
                                   trainable.op.device))
Exemple #6
0
    def _sample_decode(self, model, global_step, sess, src_placeholder,
                       batch_size_placeholder, eval_data, summary_writer):
        """Pick a sentence and decode."""
        decode_ids = np.random.randint(low=0, high=len(eval_data) - 1, size=1)

        sample_data = []
        for decode_id in decode_ids:
            sample_data.append(eval_data[decode_id])

        iterator_feed_dict = {
            src_placeholder: sample_data,
            batch_size_placeholder: len(decode_ids),
        }

        sess.run(model.iterator.initializer, feed_dict=iterator_feed_dict)
        ncm_outputs, infer_summary = model.decode(sess)

        for i, decode_id in enumerate(decode_ids):
            log.print_out("  # {}".format(decode_id))

            output = ncm_outputs[i]

            if self.config.beam_width > 0 and self._consider_beam():
                # get the top translation.
                output = output[0]

            translation = ncm_utils.get_translation(output, sent_id=0)
            delimited_sample = eval_data[decode_id].split("\t")
            utterances, topic = delimited_sample[:-1], delimited_sample[-1]
            sources, target = utterances[:-1], utterances[-1]

            log.print_out("    sources:")
            for t, src in enumerate(sources):
                log.print_out("      @{} {}".format(t + 1, src))
            log.print_out("    topic: {}".format(topic))
            log.print_out("    resp: {}".format(target))
            log.print_out(b"    generated: " + translation)

        # Summary
        if infer_summary is not None:
            summary_writer.add_summary(infer_summary, global_step)