コード例 #1
0
ファイル: nmt_predict.py プロジェクト: chengpohi/foobar
def eval(flags, default_hparams, target_session=""):
    """Run main."""
    # Job
    jobid = flags.jobid
    num_workers = flags.num_workers
    utils.print_out("# Job id %d" % jobid)

    # Random
    random_seed = flags.random_seed
    if random_seed is not None and random_seed > 0:
        utils.print_out("# Set random seed to %d" % random_seed)
        random.seed(random_seed + jobid)
        np.random.seed(random_seed + jobid)

    ## Decode
    out_dir = flags.out_dir
    if not tf.gfile.Exists(out_dir):
        raise IOError("%s path not exist." % out_dir)

    # Load hparams.
    hparams = create_or_load_hparams(out_dir, default_hparams,
                                     flags.hparams_path, False)

    # Inference indices
    hparams.inference_indices = None

    ckpt = tf.train.latest_checkpoint(out_dir)
    # get ckpt from out dir
    if not ckpt:
        raise IOError("%s ckpt not find in path." % ckpt)
    inference.predicate(ckpt, hparams, num_workers, jobid)
コード例 #2
0
def print_step_info(prefix, global_step, info, result_summary, log_f):
    """Print all info at the current global step."""
    utils.print_out(
        "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" %
        (prefix, global_step, info["learning_rate"], info["avg_step_time"],
         info["speed"], info["train_ppl"], info["avg_grad_norm"],
         result_summary, time.ctime()), log_f)
コード例 #3
0
ファイル: vocab_utils.py プロジェクト: chengpohi/foobar
def check_vocab(vocab_file,
                out_dir,
                check_special_token=True,
                sos=None,
                eos=None,
                unk=None):
    """Check if vocab_file doesn't exist, create from corpus_file."""
    if tf.gfile.Exists(vocab_file):
        utils.print_out("# Vocab file %s exists" % vocab_file)
        vocab, vocab_size = load_vocab(vocab_file)
        if check_special_token:
            # Verify if the vocab starts with unk, sos, eos
            # If not, prepend those tokens & generate a new vocab file
            if not unk: unk = UNK
            if not sos: sos = SOS
            if not eos: eos = EOS
            assert len(vocab) >= 3
            if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos:
                utils.print_out("The first 3 vocab words [%s, %s, %s]"
                                " are not [%s, %s, %s]" %
                                (vocab[0], vocab[1], vocab[2], unk, sos, eos))
                vocab = [unk, sos, eos] + vocab
                vocab_size += 3
                new_vocab_file = os.path.join(out_dir,
                                              os.path.basename(vocab_file))
                with codecs.getwriter("utf-8")(tf.gfile.GFile(
                        new_vocab_file, "wb")) as f:
                    for word in vocab:
                        f.write("%s\n" % word)
                vocab_file = new_vocab_file
    else:
        raise ValueError("vocab_file '%s' does not exist." % vocab_file)

    vocab_size = len(vocab)
    return vocab_size, vocab_file
コード例 #4
0
def _cell_list(unit_type,
               num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               num_gpus,
               base_gpu=0,
               single_cell_fn=None,
               residual_fn=None):
    """Create a list of RNN cells."""
    if not single_cell_fn:
        single_cell_fn = _single_cell

    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        utils.print_out("  cell %d" % i, new_line=False)
        single_cell = single_cell_fn(
            unit_type=unit_type,
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            mode=mode,
            residual_connection=(i >= num_layers - num_residual_layers),
            device_str=get_device_str(i + base_gpu, num_gpus),
            residual_fn=residual_fn)
        utils.print_out("")
        cell_list.append(single_cell)

    return cell_list
コード例 #5
0
def translate_and_return(hparams,
                         infer_model,
                         input_data,
                         loaded_infer_model,
                         sess):
    # Encode Data
    sess.run(
        infer_model.iterator.initializer,
        feed_dict={
            infer_model.src_placeholder: input_data,
            infer_model.batch_size_placeholder: hparams.infer_batch_size
        })
    # Decode
    utils.print_out("# Start decoding")
    res = nmt_utils.decode_and_return(
        "infer",
        loaded_infer_model,
        sess,
        None,
        ref_file=None,
        metrics=hparams.metrics,
        subword_option=hparams.subword_option,
        beam_width=hparams.beam_width,
        tgt_eos=hparams.eos,
        num_translations_per_input=hparams.num_translations_per_input)
    return res
コード例 #6
0
ファイル: model.py プロジェクト: chengpohi/foobar
    def _get_learning_rate_decay(self, hparams):
        """Get learning rate decay."""
        if hparams.decay_scheme in ["luong5", "luong10", "luong234"]:
            decay_factor = 0.5
            if hparams.decay_scheme == "luong5":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 5
            elif hparams.decay_scheme == "luong10":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 10
            elif hparams.decay_scheme == "luong234":
                start_decay_step = int(hparams.num_train_steps * 2 / 3)
                decay_times = 4
            remain_steps = hparams.num_train_steps - start_decay_step
            decay_steps = int(remain_steps / decay_times)
        elif not hparams.decay_scheme:  # no decay
            start_decay_step = hparams.num_train_steps
            decay_steps = 0
            decay_factor = 1.0
        elif hparams.decay_scheme:
            raise ValueError("Unknown decay scheme %s" % hparams.decay_scheme)
        utils.print_out("  decay_scheme=%s, start_decay_step=%d, decay_steps %d, "
                        "decay_factor %g" % (hparams.decay_scheme,
                                             start_decay_step,
                                             decay_steps,
                                             decay_factor))

        return tf.cond(
            self.global_step < start_decay_step,
            lambda: self.learning_rate,
            lambda: tf.train.exponential_decay(
                self.learning_rate,
                (self.global_step - start_decay_step),
                decay_steps, decay_factor, staircase=True),
            name="learning_rate_decay_cond")
コード例 #7
0
def load_model(model, ckpt, session, name):
    start_time = time.time()
    model.saver.restore(session, ckpt)
    session.run(tf.tables_initializer())
    utils.print_out("  loaded %s model parameters from %s, time %.2fs" %
                    (name, ckpt, time.time() - start_time))
    return model
コード例 #8
0
def run_main(flags,
             default_hparams,
             train_fn,
             inference_fn,
             target_session=""):
    """Run main."""
    # Job
    jobid = flags.jobid
    num_workers = flags.num_workers
    utils.print_out("# Job id %d" % jobid)

    # Random
    random_seed = flags.random_seed
    if random_seed is not None and random_seed > 0:
        utils.print_out("# Set random seed to %d" % random_seed)
        random.seed(random_seed + jobid)
        np.random.seed(random_seed + jobid)

    ## Train / Decode
    out_dir = flags.out_dir
    if not tf.gfile.Exists(out_dir):
        tf.gfile.MakeDirs(out_dir)

    # Load hparams.
    hparams = create_or_load_hparams(out_dir,
                                     default_hparams,
                                     flags.hparams_path,
                                     save_hparams=(jobid == 0))
    # Train
    train_fn(hparams, target_session=target_session)
コード例 #9
0
def ensure_compatible_hparams(hparams, default_hparams, hparams_path=""):
    """Make sure the loaded hparams is compatible with new changes."""
    default_hparams = utils.maybe_parse_standard_hparams(
        default_hparams, hparams_path)

    # Set num encoder/decoder layers (for old checkpoints)
    if hasattr(hparams, "num_layers"):
        if not hasattr(hparams, "num_encoder_layers"):
            hparams.add_hparam("num_encoder_layers", hparams.num_layers)
        if not hasattr(hparams, "num_decoder_layers"):
            hparams.add_hparam("num_decoder_layers", hparams.num_layers)

    # For compatible reason, if there are new fields in default_hparams,
    #   we add them to the current hparams
    default_config = default_hparams.values()
    config = hparams.values()
    for key in default_config:
        if key not in config:
            hparams.add_hparam(key, default_config[key])

    # Update all hparams' keys if override_loaded_hparams=True
    if getattr(default_hparams, "override_loaded_hparams", None):
        overwritten_keys = default_config.keys()
    else:
        # For inference
        overwritten_keys = INFERENCE_KEYS

    for key in overwritten_keys:
        if getattr(hparams, key) != default_config[key]:
            utils.print_out(
                "# Updating hparams.%s: %s -> %s" %
                (key, str(getattr(hparams, key)), str(default_config[key])))
            setattr(hparams, key, default_config[key])
    return hparams
コード例 #10
0
ファイル: model.py プロジェクト: chengpohi/foobar
    def _build_encoder(self, hparams):
        """Build an encoder."""
        num_layers = self.num_encoder_layers
        num_residual_layers = self.num_encoder_residual_layers
        iterator = self.iterator

        source = iterator.source
        if self.time_major:
            source = tf.transpose(source)

        with tf.variable_scope("encoder") as scope:
            dtype = scope.dtype
            # Look up embedding, emp_inp: [max_time, batch_size, num_units]
            encoder_emb_inp = tf.nn.embedding_lookup(
                self.embedding_encoder, source)

            # Encoder_outputs: [max_time, batch_size, num_units]
            if hparams.encoder_type == "uni":
                utils.print_out("  num_layers = %d, num_residual_layers=%d" %
                                (num_layers, num_residual_layers))
                cell = self._build_encoder_cell(
                    hparams, num_layers, num_residual_layers)

                encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                    cell,
                    encoder_emb_inp,
                    dtype=dtype,
                    sequence_length=iterator.source_sequence_length,
                    time_major=self.time_major,
                    swap_memory=True)
            elif hparams.encoder_type == "bi":
                num_bi_layers = int(num_layers / 2)
                num_bi_residual_layers = int(num_residual_layers / 2)
                utils.print_out("  num_bi_layers = %d, num_bi_residual_layers=%d" %
                                (num_bi_layers, num_bi_residual_layers))

                encoder_outputs, bi_encoder_state = (
                    self._build_bidirectional_rnn(
                        inputs=encoder_emb_inp,
                        sequence_length=iterator.source_sequence_length,
                        dtype=dtype,
                        hparams=hparams,
                        num_bi_layers=num_bi_layers,
                        num_bi_residual_layers=num_bi_residual_layers))

                if num_bi_layers == 1:
                    encoder_state = bi_encoder_state
                else:
                    # alternatively concat forward and backward states
                    encoder_state = []
                    for layer_id in range(num_bi_layers):
                        encoder_state.append(bi_encoder_state[0][layer_id])  # forward
                        encoder_state.append(bi_encoder_state[1][layer_id])  # backward
                    encoder_state = tuple(encoder_state)
            else:
                raise ValueError("Unknown encoder_type %s" % hparams.encoder_type)
        return encoder_outputs, encoder_state
コード例 #11
0
def _external_eval(model,
                   global_step,
                   sess,
                   hparams,
                   iterator,
                   iterator_feed_dict,
                   tgt_file,
                   label,
                   summary_writer,
                   save_on_best,
                   avg_ckpts=False):
    """External evaluation such as BLEU and ROUGE scores."""
    out_dir = hparams.out_dir
    decode = global_step > 0

    if avg_ckpts:
        label = "avg_" + label

    if decode:
        utils.print_out("# External evaluation, global step %d" % global_step)

    sess.run(iterator.initializer, feed_dict=iterator_feed_dict)

    output = os.path.join(out_dir, "output_%s" % label)
    scores = nmt_utils.decode_and_evaluate(
        label,
        model,
        sess,
        output,
        ref_file=tgt_file,
        metrics=hparams.metrics,
        subword_option=hparams.subword_option,
        beam_width=hparams.beam_width,
        tgt_eos=hparams.eos,
        decode=decode)
    # Save on best metrics
    if decode:
        for metric in hparams.metrics:
            if avg_ckpts:
                best_metric_label = "avg_best_" + metric
            else:
                best_metric_label = "best_" + metric

            utils.add_summary(summary_writer, global_step,
                              "%s_%s" % (label, metric), scores[metric])
            # metric: larger is better
            if save_on_best and scores[metric] > getattr(
                    hparams, best_metric_label):
                setattr(hparams, best_metric_label, scores[metric])
                model.saver.save(sess,
                                 os.path.join(
                                     getattr(hparams,
                                             best_metric_label + "_dir"),
                                     "translate.ckpt"),
                                 global_step=model.global_step)
        utils.save_hparams(out_dir, hparams)
    return scores
コード例 #12
0
ファイル: model.py プロジェクト: chengpohi/foobar
 def _get_infer_maximum_iterations(self, hparams, source_sequence_length):
     """Maximum decoding steps at inference time."""
     if hparams.tgt_max_len_infer:
         maximum_iterations = hparams.tgt_max_len_infer
         utils.print_out("  decoding maximum_iterations %d" % maximum_iterations)
     else:
         # TODO(thangluong): add decoding_length_factor flag
         decoding_length_factor = 2.0
         max_encoder_length = tf.reduce_max(source_sequence_length)
         maximum_iterations = tf.to_int32(tf.round(
             tf.to_float(max_encoder_length) * decoding_length_factor))
     return maximum_iterations
コード例 #13
0
    def build_graph(self, hparams, scope=None):
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):

            z_sample, Z = self.infer_z(hparams)

            with tf.variable_scope("generative_model", dtype=dtype):

                # P(x_1^m) language model
                lm_logits = self._build_language_model(hparams,
                                                       z_sample=z_sample)

                # P(y_1^n|x_1^m) encoder
                encoder_outputs, encoder_state = self._build_encoder(
                    hparams, z_sample=z_sample)

                # P(y_1^n|x_1^m) decoder
                tm_logits, sample_id, final_context_state = self._build_decoder(
                    encoder_outputs, encoder_state, hparams, z_sample=z_sample)

                # Loss
                if self.mode != tf.contrib.learn.ModeKeys.INFER:
                    with tf.device(
                            model_helper.get_device_str(
                                self.num_encoder_layers - 1, self.num_gpus)):

                        loss, components = self._compute_loss(
                            tm_logits,
                            lm_logits,
                            Z,
                            Z_source_target=(
                                hparams.z_inference_from == "source_target"),
                            r_train_mode=hparams.r_train_mode)
                else:
                    loss = None

        # Save for summaries.
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self._tm_loss = components[0]
            self._lm_loss = components[1]
            self._KL_Z = components[2]
            self._entropy = components[3]
            self._Z_networks_loss = components[4]
            self._elbo = -loss

            self._lm_accuracy = self._compute_accuracy(
                lm_logits,
                tf.argmax(self.source_output, axis=-1, output_type=tf.int32),
                self.source_sequence_length)

        return tm_logits, loss, final_context_state, sample_id
コード例 #14
0
ファイル: utils.py プロジェクト: Roxot/AEVNMT
def language_model(embeddings, sequence_length, hparams, mode, single_cell_fn,
    time_major, batch_size, z_sample=None):

  with tf.variable_scope("language_model") as scope:
    # Use decoder cell options.
    cell = model_helper.create_rnn_cell(
        unit_type=hparams.unit_type,
        num_units=hparams.num_units,
        num_layers=hparams.num_lm_layers,
        num_residual_layers=hparams.num_decoder_residual_layers,
        forget_bias=hparams.forget_bias,
        dropout=hparams.dropout,
        num_gpus=hparams.num_gpus,
        mode=mode,
        single_cell_fn=single_cell_fn)

    # Use a zero initial state or tanh(Wz) if provided (VAEJointModel).
    if z_sample is not None:
      utils.print_out("  initializing generative LM with tanh(Wz)")
      init_state_val = tf.tanh(tf.layers.dense(z_sample, hparams.num_units))
      init_state = make_initial_state(init_state_val, hparams.unit_type)
    else:
      utils.print_out("  initializing generative LM with zeros.")
      init_state = cell.zero_state(batch_size, scope.dtype)

    # Apply word dropout if set.
    if hparams.word_dropout > 0 and \
        (mode == tf.contrib.learn.ModeKeys.TRAIN):

      # Drop random words.
      noise_shape = [tf.shape(embeddings)[0],
          tf.shape(embeddings)[1], 1]
      embeddings = tf.nn.dropout(embeddings,
          (1.0 - hparams.word_dropout), noise_shape=noise_shape)

    # Run the RNN language model.
    helper = tf.contrib.seq2seq.TrainingHelper(
        embeddings,
        sequence_length,
        time_major=time_major)
    decoder = tf.contrib.seq2seq.BasicDecoder(
        cell,
        helper,
        initial_state=init_state)
    lm_outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder,
        output_time_major=time_major,
        impute_finished=True,
        scope=scope)

    return lm_outputs
コード例 #15
0
def _sample_decode(model, global_step, sess, hparams, iterator, src_data,
                   tgt_data, iterator_src_placeholder,
                   iterator_batch_size_placeholder, summary_writer):
    """Pick a sentence and decode."""
    decode_id = random.randint(0, len(src_data) - 1)
    utils.print_out("  # %d" % decode_id)

    iterator_feed_dict = {
        iterator_src_placeholder: [src_data[decode_id]],
        iterator_batch_size_placeholder: 1,
    }
    sess.run(iterator.initializer, feed_dict=iterator_feed_dict)

    nmt_outputs, attention_summary = model.decode(sess)

    if hparams.beam_width > 0:
        # get the top translation.
        nmt_outputs = nmt_outputs[0]

    translation = nmt_utils.get_translation(
        nmt_outputs,
        sent_id=0,
        tgt_eos=hparams.eos,
        subword_option=hparams.subword_option)
    utils.print_out("    src: %s" % src_data[decode_id])
    utils.print_out("    ref: %s" % tgt_data[decode_id])
    utils.print_out(b"    nmt: " + translation)

    # Summary
    if attention_summary is not None:
        summary_writer.add_summary(attention_summary, global_step)
コード例 #16
0
def single_worker_inference(infer_model,
                            ckpt,
                            inference_input_file,
                            inference_output_file,
                            hparams):
    """Inference with a single worker."""
    output_infer = inference_output_file

    # Read data
    infer_data = load_data(inference_input_file, hparams)

    infer_data = ["Lúc đấy tôi nghĩ chuyện này sẽ khó khăn gian khổ đây ."]

    with tf.Session(
            graph=infer_model.graph, config=utils.get_config_proto()) as sess:
        while True:
            # infer_data = ["Lúc đấy tôi nghĩ chuyện này sẽ khó khăn gian khổ đây ."]
            var = input("Input Vi Src: ")
            infer_data = [var]
            loaded_infer_model = model_helper.load_model(
                infer_model.model, ckpt, sess, "infer")
            sess.run(
                infer_model.iterator.initializer,
                feed_dict={
                    infer_model.src_placeholder: infer_data,
                    infer_model.batch_size_placeholder: hparams.infer_batch_size
                })
            # Decode
            utils.print_out("# Start decoding")
            if hparams.inference_indices:
                _decode_inference_indices(
                    loaded_infer_model,
                    sess,
                    output_infer=output_infer,
                    output_infer_summary_prefix=output_infer,
                    inference_indices=hparams.inference_indices,
                    tgt_eos=hparams.eos,
                    subword_option=hparams.subword_option)
            else:
                nmt_utils.decode_and_evaluate(
                    "infer",
                    loaded_infer_model,
                    sess,
                    output_infer,
                    ref_file=None,
                    metrics=hparams.metrics,
                    subword_option=hparams.subword_option,
                    beam_width=hparams.beam_width,
                    tgt_eos=hparams.eos,
                    num_translations_per_input=hparams.num_translations_per_input)
コード例 #17
0
def create_or_load_model(model, model_dir, session, name):
    """Create translation model and initialize or load parameters in session."""
    latest_ckpt = tf.train.latest_checkpoint(model_dir)
    if latest_ckpt:
        model = load_model(model, latest_ckpt, session, name)
    else:
        start_time = time.time()
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        utils.print_out(
            "  created %s model with fresh parameters, time %.2fs" %
            (name, time.time() - start_time))

    global_step = model.global_step.eval(session=session)
    return model, global_step
コード例 #18
0
def translate(ckpt,
              infer_data,
              inference_output_file,
              hparams,
              num_workers=1,
              jobid=0,
              scope=None):
    """Inference with a single worker."""
    output_infer = inference_output_file

    """Perform translation."""
    if hparams.inference_indices:
        assert num_workers == 1

    if not hparams.attention:
        model_creator = nmt_model.Model
    elif hparams.attention_architecture == "standard":
        model_creator = attention_model.AttentionModel
    elif hparams.attention_architecture in ["gnmt", "gnmt_v2"]:
        model_creator = gnmt_model.GNMTModel
    else:
        raise ValueError("Unknown model architecture")
    infer_model = model_helper.create_infer_model(model_creator, hparams, scope)

    with tf.Session(
            graph=infer_model.graph, config=utils.get_config_proto()) as sess:
        loaded_infer_model = model_helper.load_model(
            infer_model.model, ckpt, sess, "infer")
        # Encode Data
        sess.run(
            infer_model.iterator.initializer,
            feed_dict={
                infer_model.src_placeholder: infer_data,
                infer_model.batch_size_placeholder: hparams.infer_batch_size
            })
        # Decode
        utils.print_out("# Start decoding")
        return nmt_utils.decode_and_return(
            "infer",
            loaded_infer_model,
            sess,
            output_infer,
            ref_file=None,
            metrics=hparams.metrics,
            subword_option=hparams.subword_option,
            beam_width=hparams.beam_width,
            tgt_eos=hparams.eos,
            num_translations_per_input=hparams.num_translations_per_input)
コード例 #19
0
def process_stats(stats, info, global_step, steps_per_stats, log_f):
    """Update info and check for overflow."""
    # Update info
    info["avg_step_time"] = stats["step_time"] / steps_per_stats
    info["avg_grad_norm"] = stats["grad_norm"] / steps_per_stats
    info["train_ppl"] = utils.safe_exp(stats["loss"] / stats["predict_count"])
    info["speed"] = stats["total_count"] / (1000 * stats["step_time"])

    # Check for overflow
    is_overflow = False
    train_ppl = info["train_ppl"]
    if math.isnan(train_ppl) or math.isinf(train_ppl) or train_ppl > 1e20:
        utils.print_out("  step %d overflow, stop early" % global_step, log_f)
        is_overflow = True

    return is_overflow
コード例 #20
0
    def setup(self, flags):
        # Model output directory
        out_dir = flags.out_dir
        if out_dir and not tf.gfile.Exists(out_dir):
            tf.gfile.MakeDirs(out_dir)

        # Load hparams.
        default_hparams = create_hparams(flags)
        loaded_hparams = False
        if flags.ckpt:  # Try to load hparams from the same directory as ckpt
            ckpt_dir = os.path.dirname(flags.ckpt)
            ckpt_hparams_file = os.path.join(ckpt_dir, "hparams")
            if tf.gfile.Exists(ckpt_hparams_file) or flags.hparams_path:
                # Note: for some reason this will create an empty "best_bleu" directory and copy vocab files
                hparams = create_or_load_hparams(ckpt_dir,
                                                 default_hparams,
                                                 flags.hparams_path,
                                                 save_hparams=False)
                loaded_hparams = True

        assert loaded_hparams

        # GPU device
        config_proto = utils.get_config_proto(
            allow_soft_placement=True,
            num_intra_threads=hparams.num_intra_threads,
            num_inter_threads=hparams.num_inter_threads)
        utils.print_out("# Devices visible to TensorFlow: %s" %
                        repr(tf.Session(config=config_proto).list_devices()))

        # Inference indices (inference_indices is broken, but without setting it to None we'll crash)
        hparams.inference_indices = None

        # Create the graph
        model_creator = get_model_creator(hparams)
        infer_model = model_helper.create_infer_model(model_creator,
                                                      hparams,
                                                      scope=None)
        sess, loaded_infer_model = start_sess_and_load_model(
            infer_model, flags.ckpt, hparams)

        # Parameters needed by TF GNMT
        self.hparams = hparams

        self.infer_model = infer_model
        self.sess = sess
        self.loaded_infer_model = loaded_infer_model
コード例 #21
0
ファイル: cvaejoint.py プロジェクト: Roxot/AEVNMT
    def build_graph(self, hparams, scope=None):
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):

            z_sample, Z = self.infer_z(hparams)
            if hparams.z_inference_from == "source_target":
                raise NotImplementedError(
                    "source_target option not yet implemented for cvae")

            with tf.variable_scope("generative_model", dtype=dtype):

                # P(x_1^m) language model
                gauss_observations = self._build_language_model(
                    hparams, z_sample=z_sample)

                # P(y_1^n|x_1^m) encoder
                encoder_outputs, encoder_state = self._build_encoder(
                    hparams, z_sample=z_sample)

                # P(y_1^n|x_1^m) decoder
                tm_logits, sample_id, final_context_state = self._build_decoder(
                    encoder_outputs, encoder_state, hparams, z_sample=z_sample)

                # Loss
                if self.mode != tf.contrib.learn.ModeKeys.INFER:
                    with tf.device(
                            model_helper.get_device_str(
                                self.num_encoder_layers - 1, self.num_gpus)):
                        loss, components = self._compute_loss(
                            tm_logits, gauss_observations, Z)
                else:
                    loss = None

        # Save for summaries.
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self._tm_loss = components[0]
            self._lm_loss = components[1]
            self._KL_Z = components[2]
            self._entropy = components[3]
            self._elbo = -loss

        return tm_logits, loss, final_context_state, sample_id
コード例 #22
0
ファイル: nmt_utils.py プロジェクト: chengpohi/foobar
def decode_and_return(name,
                      model,
                      sess,
                      trans_file,
                      ref_file,
                      metrics,
                      subword_option,
                      beam_width,
                      tgt_eos,
                      num_translations_per_input=1):
    """Decode a test set and compute a score according to the evaluation task."""
    # Decode
    if not trans_file:
        utils.print_out("  decoding to output %s." % trans_file)

    start_time = time.time()
    num_sentences = 0
    num_translations_per_input = max(
        min(num_translations_per_input, beam_width), 1)
    res = []
    while True:
        try:
            nmt_outputs, _ = model.decode(sess)
            if beam_width == 0:
                nmt_outputs = np.expand_dims(nmt_outputs, 0)

            batch_size = nmt_outputs.shape[1]
            num_sentences += batch_size

            for sent_id in range(batch_size):
                for beam_id in range(num_translations_per_input):
                    translation = get_translation(
                        nmt_outputs[beam_id],
                        sent_id,
                        tgt_eos=tgt_eos,
                        subword_option=subword_option)
                    res.append(translation)
        except tf.errors.OutOfRangeError:
            utils.print_time(
                "  done, num sentences %d, num translations per input %d" %
                (num_sentences, num_translations_per_input), start_time)
            break  # Evaluation
    return translation
コード例 #23
0
    def infer_z(self, hparams):

        # Infer z from the embeddings
        if hparams.z_inference_from == "source_only":
            utils.print_out(" Inferring z from source only")
            Z_x = self._infer_z_from_embeddings(hparams, use_target=False)

            # Either use a sample or the mean.
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                z_sample = Z_x.sample()
            else:
                z_sample = Z_x.mean()

            return z_sample, Z_x
        elif hparams.z_inference_from == "source_target":
            utils.print_out(" Inferring z from both source and target")
            Z_xy = self._infer_z_from_embeddings(
                hparams, scope_name="z_inference_model_xy", use_target=True)

            if hparams.r_train_mode == "l2":
                deterministic_Z_x = True
            else:
                deterministic_Z_x = False

            Z_x = self._infer_z_from_embeddings(
                hparams,
                scope_name="z_inference_model_x",
                use_target=False,
                deterministic=deterministic_Z_x)

            # Either use a sample or the mean.
            if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
                z_sample = Z_xy.sample()
            else:
                if deterministic_Z_x:
                    z_sample = Z_x
                else:
                    z_sample = Z_x.mean()

            return z_sample, (Z_x, Z_xy)
        else:
            raise ValueError("Unknown z inference from option:"
                             " %s" % hparams.z_inference_from)
コード例 #24
0
ファイル: model.py プロジェクト: chengpohi/foobar
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

        Creates a sequence-to-sequence model with dynamic RNN decoder API.
        Args:
          hparams: Hyperparameter configurations.
          scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

        Returns:
          A tuple of the form (logits, loss, final_context_state),
          where:
            logits: float32 Tensor [batch_size x num_decoder_symbols].
            loss: the total loss / batch_size.
            final_context_state: The final state of decoder RNN.

        Raises:
          ValueError: if encoder_type differs from mono and bi, or
            attention_option is not (luong | scaled_luong |
            bahdanau | normed_bahdanau).
        """
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
            # Encoder
            encoder_outputs, encoder_state = self._build_encoder(hparams)

            ## Decoder
            logits, sample_id, final_context_state = self._build_decoder(
                encoder_outputs, encoder_state, hparams)

            ## Loss
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(model_helper.get_device_str(self.num_encoder_layers - 1,
                                                           self.num_gpus)):
                    loss = self._compute_loss(logits)
            else:
                loss = None

            return logits, loss, final_context_state, sample_id
コード例 #25
0
def _decode_inference_indices(model, sess, output_infer,
                              output_infer_summary_prefix,
                              inference_indices,
                              tgt_eos,
                              subword_option):
    """Decoding only a specific set of sentences."""
    utils.print_out("  decoding to output %s , num sents %d." %
                    (output_infer, len(inference_indices)))
    start_time = time.time()
    with codecs.getwriter("utf-8")(
            tf.gfile.GFile(output_infer, mode="wb")) as trans_f:
        trans_f.write("")  # Write empty string to ensure file is created.
        for decode_id in inference_indices:
            nmt_outputs, infer_summary = model.decode(sess)

            # get text translation
            assert nmt_outputs.shape[0] == 1
            translation = nmt_utils.get_translation(
                nmt_outputs,
                sent_id=0,
                tgt_eos=tgt_eos,
                subword_option=subword_option)

            if infer_summary is not None:  # Attention models
                image_file = output_infer_summary_prefix + str(decode_id) + ".png"
                utils.print_out("  save attention image to %s*" % image_file)
                image_summ = tf.Summary()
                image_summ.ParseFromString(infer_summary)
                with tf.gfile.GFile(image_file, mode="w") as img_f:
                    img_f.write(image_summ.value[0].image.encoded_image_string)

            trans_f.write("%s\n" % translation)
            utils.print_out(translation + b"\n")
    utils.print_time("  done", start_time)
コード例 #26
0
def _create_pretrained_emb_from_txt(vocab_file,
                                    embed_file,
                                    num_trainable_tokens=3,
                                    dtype=tf.float32,
                                    scope=None):
    """Load pretrain embeding from embed_file, and return an embedding matrix.

    Args:
      embed_file: Path to a Glove formated embedding txt file.
      num_trainable_tokens: Make the first n tokens in the vocab file as trainable
        variables. Default is 3, which is "<unk>", "<s>" and "</s>".
    """
    vocab, _ = vocab_utils.load_vocab(vocab_file)
    trainable_tokens = vocab[:num_trainable_tokens]

    utils.print_out("# Using pretrained embedding: %s." % embed_file)
    utils.print_out("  with trainable tokens: ")

    emb_dict, emb_size = vocab_utils.load_embed_txt(embed_file)
    for token in trainable_tokens:
        utils.print_out("    %s" % token)
        if token not in emb_dict:
            emb_dict[token] = [0.0] * emb_size

    emb_mat = np.array([emb_dict[token] for token in vocab],
                       dtype=dtype.as_numpy_dtype())
    emb_mat = tf.constant(emb_mat)
    emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1])
    with tf.variable_scope(scope or "pretrain_embeddings",
                           dtype=dtype) as scope:
        with tf.device(_get_embed_device(num_trainable_tokens)):
            emb_mat_var = tf.get_variable("emb_mat_var",
                                          [num_trainable_tokens, emb_size])
    return tf.concat([emb_mat_var, emb_mat_const], 0)
コード例 #27
0
ファイル: nmt_input.py プロジェクト: chengpohi/foobar
def ensure_compatible_hparams(hparams, default_hparams, hparams_path):
    """Make sure the loaded hparams is compatible with new changes."""
    default_hparams = utils.maybe_parse_standard_hparams(
        default_hparams, hparams_path)

    # For compatible reason, if there are new fields in default_hparams,
    #   we add them to the current hparams
    default_config = default_hparams.values()
    config = hparams.values()
    for key in default_config:
        if key not in config:
            hparams.add_hparam(key, default_config[key])

    # Update all hparams' keys if override_loaded_hparams=True
    if default_hparams.override_loaded_hparams:
        for key in default_config:
            if getattr(hparams, key) != default_config[key]:
                utils.print_out("# Updating hparams.%s: %s -> %s" %
                                (key, str(getattr(
                                    hparams, key)), str(default_config[key])))
                setattr(hparams, key, default_config[key])
    return hparams
コード例 #28
0
def before_train(loaded_train_model, train_model, train_sess, global_step,
                 hparams, log_f):
    """Misc tasks to do before training."""
    stats = init_stats()
    info = {
        "train_ppl": 0.0,
        "speed": 0.0,
        "avg_step_time": 0.0,
        "avg_grad_norm": 0.0,
        "learning_rate":
        loaded_train_model.learning_rate.eval(session=train_sess)
    }
    start_train_time = time.time()
    utils.print_out(
        "# Start step %d, lr %g, %s" %
        (global_step, info["learning_rate"], time.ctime()), log_f)

    # Initialize all of the iterators
    skip_count = hparams.batch_size * hparams.epoch_step
    utils.print_out("# Init train iterator, skipping %d elements" % skip_count)
    train_sess.run(train_model.iterator.initializer,
                   feed_dict={train_model.skip_count_placeholder: skip_count})

    return stats, info, start_train_time
コード例 #29
0
ファイル: model.py プロジェクト: chengpohi/foobar
    def _get_learning_rate_warmup(self, hparams):
        """Get learning rate warmup."""
        warmup_steps = hparams.warmup_steps
        warmup_scheme = hparams.warmup_scheme
        utils.print_out("  learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" %
                        (hparams.learning_rate, warmup_steps, warmup_scheme))

        # Apply inverse decay if global steps less than warmup steps.
        # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3)
        # When step < warmup_steps,
        #   learing_rate *= warmup_factor ** (warmup_steps - step)
        if warmup_scheme == "t2t":
            # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller
            warmup_factor = tf.exp(tf.log(0.01) / warmup_steps)
            inv_decay = warmup_factor ** (
                tf.to_float(warmup_steps - self.global_step))
        else:
            raise ValueError("Unknown warmup scheme %s" % warmup_scheme)

        return tf.cond(
            self.global_step < hparams.warmup_steps,
            lambda: inv_decay * self.learning_rate,
            lambda: self.learning_rate,
            name="learning_rate_warump_cond")
コード例 #30
0
    def _compute_loss(self,
                      tm_logits,
                      lm_logits,
                      Z,
                      Z_source_target=False,
                      r_train_mode="KLq"):

        # The cross-entropy under a reparameterizable sample of the latent variable(s).
        tm_loss = self._compute_categorical_loss(tm_logits, self.target_output,
                                                 self.target_sequence_length)

        # The cross-entropy for the language model also under a sample of the latent
        # variable(s). Not correct mathematically, if we use the relaxation.
        lm_loss = self._compute_dense_categorical_loss(
            lm_logits, self.source_output, self.source_sequence_length)

        # We use a heuristic as an unjustified approximation for monolingual
        # batches.
        max_source_time = self.get_max_time(lm_logits)
        source_weights = tf.sequence_mask(self.source_sequence_length,
                                          max_source_time,
                                          dtype=lm_logits.dtype)
        entropy = tf.cond(self.mono_batch,
                          true_fn=lambda: self._compute_categorical_entropy(
                              self.source, source_weights),
                          false_fn=lambda: tf.constant(0.))

        # We compute an analytical KL between the Gaussian variational approximation
        # and its Gaussian prior.
        if Z_source_target:
            Z_x, Z_xy = Z
            if self.mode != tf.contrib.learn.ModeKeys.TRAIN:
                Z_networks_loss = tf.constant(0.)
            else:
                if r_train_mode == "l2":
                    utils.print_out("Using l2 train mode for r.")
                    Z_networks_loss = tf.nn.l2_loss(Z_x - Z_xy.mean())
                elif r_train_mode == "KLq":
                    utils.print_out("Using KLq train mode for r.")
                    Z_networks_loss = Z_xy.kl_divergence(Z_x)
                    Z_networks_loss = tf.reduce_mean(Z_networks_loss)
                elif r_train_mode == "KLr":
                    utils.print_out("Using KLr train mode for r.")
                    Z_networks_loss = Z_x.kl_divergence(Z_xy)
                    Z_networks_loss = tf.reduce_mean(Z_networks_loss)
                elif r_train_mode == "JS":
                    utils.print_out("Using JS train mode for r.")
                    Z_networks_loss = Z_xy.kl_divergence(
                        Z_x) + Z_x.kl_divergence(Z_xy)
                    Z_networks_loss = tf.reduce_mean(Z_networks_loss)
                else:
                    raise ValueError("Unknown value for r_train_mode: %s" %
                                     r_train_mode)
                Z_networks_loss *= self.complexity_factor

            standard_normal = tf.contrib.distributions.MultivariateNormalDiag(
                tf.zeros_like(Z_xy.mean()), tf.ones_like(Z_xy.stddev()))
            KL_Z = Z_xy.kl_divergence(standard_normal)
        else:
            standard_normal = tf.contrib.distributions.MultivariateNormalDiag(
                tf.zeros_like(Z.mean()), tf.ones_like(Z.stddev()))
            KL_Z = Z.kl_divergence(standard_normal)
            Z_networks_loss = tf.constant(0.)

        KL_Z = tf.reduce_mean(KL_Z)
        self.KL = KL_Z

        return tm_loss + lm_loss + self.complexity_factor * KL_Z - entropy + Z_networks_loss, \
            (tm_loss, lm_loss, KL_Z, entropy, Z_networks_loss)