Beispiel #1
0
    def build_graph_dist_strategy(self, features, labels, mode, params):
        """Model function."""
        del labels, params
        misc_utils.print_out("Running dist_strategy mode_fn")

        hparams = self.hparams

        # Create a GNMT model for training.
        # assert (hparams.encoder_type == "gnmt" or
        #        hparams.attention_architecture in ["gnmt", "gnmt_v2"])
        with mixed_precision_scope():
            model = gnmt_model.GNMTModel(hparams, mode=mode, features=features)
            if mode == tf.contrib.learn.ModeKeys.INFER:
                sample_ids = model.sample_id
                reverse_target_vocab_table = lookup_ops.index_to_string_table_from_file(
                    hparams.tgt_vocab_file, default_value=vocab_utils.UNK)
                sample_words = reverse_target_vocab_table.lookup(
                    tf.to_int64(sample_ids))
                # make sure outputs is of shape [batch_size, time] or [beam_width,
                # batch_size, time] when using beam search.
                if hparams.time_major:
                    sample_words = tf.transpose(sample_words)
                elif sample_words.shape.ndims == 3:
                    # beam search output in [batch_size, time, beam_width] shape.
                    sample_words = tf.transpose(sample_words, [2, 0, 1])
                predictions = {"predictions": sample_words}
                # return loss, vars, grads, predictions, train_op, scaffold
                return None, None, None, predictions, None, None
            elif mode == tf.contrib.learn.ModeKeys.TRAIN:
                loss = model.train_loss
                train_op = model.update
                return loss, model.params, model.grads, None, train_op, None
            else:
                raise ValueError("Unknown mode in model_fn: %s" % mode)
Beispiel #2
0
def run_main(flags, default_hparams, eval_fn, target_session=""):
    """Run main."""
    # Job
    jobid = flags.jobid
    num_workers = flags.num_workers
    utils.print_out("# Job id %d" % jobid)

    # Random
    random_seed = flags.random_seed
    if random_seed is not None and random_seed > 0:
        utils.print_out("# Set random seed to %d" % random_seed)
        random.seed(random_seed + jobid)
        np.random.seed(random_seed + jobid)

    ## Train / Decode
    out_dir = flags.out_dir
    if not tf.gfile.Exists(out_dir): tf.gfile.MakeDirs(out_dir)

    # Load hparams.
    hparams = create_or_load_hparams(out_dir,
                                     default_hparams,
                                     flags.hparams_path,
                                     save_hparams=(jobid == 0))

    # Train
    eval_fn(hparams, target_session=target_session)
Beispiel #3
0
def load_model(model, ckpt, session, name):
    start_time = time.time()
    model.saver.restore(session, ckpt)
    session.run(tf.tables_initializer())
    utils.print_out("  loaded %s model parameters from %s, time %.2fs" %
                    (name, ckpt, time.time() - start_time))
    return model
def _cell_list(num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               single_cell_fn=None,
               residual_fn=None,
               global_step=None,
               fast_reverse=False,
               seq_len=None):
    """Create a list of RNN cells."""
    if not single_cell_fn:
        single_cell_fn = _single_cell

    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        utils.print_out("  cell %d" % i, new_line=False)
        single_cell = single_cell_fn(
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            mode=mode,
            residual_connection=(i >= num_layers - num_residual_layers),
            residual_fn=residual_fn,
            global_step=global_step,
            fast_reverse=fast_reverse,
            seq_len=seq_len)
        utils.print_out("")
        cell_list.append(single_cell)

    return cell_list
Beispiel #5
0
    def _get_learning_rate_decay(self, hparams):
        """Get learning rate decay."""
        if hparams.decay_scheme in ["luong5", "luong10", "luong234"]:
            decay_factor = 0.5
            if hparams.decay_scheme == "luong5":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 5
            elif hparams.decay_scheme == "luong10":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 10
            elif hparams.decay_scheme == "luong234":
                start_decay_step = int(hparams.num_train_steps * 2 / 3)
                decay_times = 4
            remain_steps = hparams.num_train_steps - start_decay_step
            decay_steps = int(remain_steps / decay_times)
        elif not hparams.decay_scheme:  # no decay
            start_decay_step = hparams.num_train_steps
            decay_steps = 0
            decay_factor = 1.0
        elif hparams.decay_scheme:
            raise ValueError("Unknown decay scheme %s" % hparams.decay_scheme)
        utils.print_out(
            "  decay_scheme=%s, start_decay_step=%d, decay_steps %d, "
            "decay_factor %g" % (hparams.decay_scheme, start_decay_step,
                                 decay_steps, decay_factor))

        return tf.cond(self.global_step < start_decay_step,
                       lambda: self.learning_rate,
                       lambda: tf.train.exponential_decay(self.learning_rate, (
                           self.global_step - start_decay_step),
                                                          decay_steps,
                                                          decay_factor,
                                                          staircase=True),
                       name="learning_rate_decay_cond")
def check_vocab(vocab_file, out_dir, check_special_token=True, sos=None,
                eos=None, unk=None):
  """Check if vocab_file doesn't exist, create from corpus_file."""
  if tf.gfile.Exists(vocab_file):
    utils.print_out("# Vocab file %s exists" % vocab_file)
    vocab, vocab_size = load_vocab(vocab_file)
    if check_special_token:
      # Verify if the vocab starts with unk, sos, eos
      # If not, prepend those tokens & generate a new vocab file
      if not unk: unk = UNK
      if not sos: sos = SOS
      if not eos: eos = EOS
      assert len(vocab) >= 3
      if vocab[0] != unk or vocab[1] != sos or vocab[2] != eos:
        utils.print_out("The first 3 vocab words [%s, %s, %s]"
                        " are not [%s, %s, %s]" %
                        (vocab[0], vocab[1], vocab[2], unk, sos, eos))
        vocab = [unk, sos, eos] + vocab
        vocab_size += 3
        new_vocab_file = os.path.join(out_dir, os.path.basename(vocab_file))
        with codecs.getwriter("utf-8")(
            tf.gfile.GFile(new_vocab_file, "wb")) as f:
          for word in vocab:
            f.write("%s\n" % word)
        vocab_file = new_vocab_file
  else:
    raise ValueError("vocab_file '%s' does not exist." % vocab_file)

  vocab_size = len(vocab)
  return vocab_size, vocab_file
Beispiel #7
0
    def _get_learning_rate_warmup(self, hparams):
        """Get learning rate warmup."""
        warmup_steps = hparams.warmup_steps
        warmup_scheme = hparams.warmup_scheme
        utils.print_out(
            "  learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" %
            (hparams.learning_rate, warmup_steps, warmup_scheme))
        if not warmup_scheme:
            return self.learning_rate

        # Apply inverse decay if global steps less than warmup steps.
        # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3)
        # When step < warmup_steps,
        #   learing_rate *= warmup_factor ** (warmup_steps - step)
        if warmup_scheme == "t2t":
            # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller
            warmup_factor = tf.exp(tf.log(0.01) / warmup_steps)
            inv_decay = warmup_factor**(tf.to_float(warmup_steps -
                                                    self.global_step))
        else:
            raise ValueError("Unknown warmup scheme %s" % warmup_scheme)

        return tf.cond(self.global_step < hparams.warmup_steps,
                       lambda: inv_decay * self.learning_rate,
                       lambda: self.learning_rate,
                       name="learning_rate_warump_cond")
Beispiel #8
0
def decode_and_evaluate(name,
                        model,
                        sess,
                        trans_file,
                        ref_file,
                        metrics,
                        subword_option,
                        beam_width,
                        tgt_eos,
                        num_translations_per_input=1,
                        decode=True):
    """Decode a test set and compute a score according to the evaluation task."""
    # Decode
    if decode:
        utils.print_out("  decoding to output %s." % trans_file)

        start_time = time.time()
        num_sentences = 0
        with codecs.getwriter("utf-8")(
                tf.gfile.GFile(trans_file, mode="wb")) as trans_f:
            trans_f.write("")  # Write empty string to ensure file is created.

            num_translations_per_input = max(
                min(num_translations_per_input, beam_width), 1)
            while True:
                try:
                  nmt_outputs, _ = model.decode(sess)
                  if beam_width == 0:
                      nmt_outputs = np.expand_dims(nmt_outputs, 0)

                  batch_size = nmt_outputs.shape[1]
                  num_sentences += batch_size

                  for sent_id in range(batch_size):
                      for beam_id in range(num_translations_per_input):
                          translation = get_translation(
                              nmt_outputs[beam_id],
                              sent_id,
                              tgt_eos=tgt_eos,
                              subword_option=subword_option)
                          trans_f.write((translation + b"\n").decode("utf-8"))
                except tf.errors.OutOfRangeError:
                    utils.print_time(
                        "  done, num sentences %d, num translations per input %d" %
                        (num_sentences, num_translations_per_input), start_time)
                    break

    # Evaluation
    evaluation_scores = {}
    if ref_file and tf.gfile.Exists(trans_file):
        for metric in metrics:
            score = evaluation_utils.evaluate(
                ref_file,
                trans_file,
                metric,
                subword_option=subword_option)
            evaluation_scores[metric] = score
            utils.print_out("  %s %s: %.1f" % (metric, name, score))

    return evaluation_scores
Beispiel #9
0
def extend_hparams(hparams):
    """Add new arguments to hparams."""
    # Sanity checks
    if hparams.subword_option and hparams.subword_option not in ["spm", "bpe"]:
        raise ValueError("subword option must be either spm, or bpe")
    if hparams.infer_mode == "beam_search" and hparams.beam_width <= 0:
        raise ValueError(
            "beam_width must greater than 0 when using beam_search"
            "decoder.")

    # Different number of encoder / decoder layers
    assert hparams.num_encoder_layers == hparams.num_decoder_layers

    # The first unidirectional layer (after the bi-directional layer) in
    # the GNMT encoder can't have residual connection due to the input is
    # the concatenation of fw_cell and bw_cell's outputs.
    num_encoder_residual_layers = hparams.num_encoder_layers - 2
    num_decoder_residual_layers = num_encoder_residual_layers
    _add_argument(hparams, "num_encoder_residual_layers",
                  num_encoder_residual_layers)
    _add_argument(hparams, "num_decoder_residual_layers",
                  num_decoder_residual_layers)

    ## Vocab
    # Get vocab file names first
    if hparams.vocab_prefix:
        src_vocab_file = hparams.vocab_prefix + "." + hparams.src
        tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt
    else:
        raise ValueError("hparams.vocab_prefix must be provided.")

    # Source vocab
    src_vocab_size, src_vocab_file = vocab_utils.check_vocab(
        src_vocab_file,
        hparams.out_dir,
        check_special_token=hparams.check_special_token,
        sos=hparams.sos,
        eos=hparams.eos,
        unk=vocab_utils.UNK)

    # Target vocab
    utils.print_out("  using source vocab for target")
    tgt_vocab_file = src_vocab_file
    tgt_vocab_size = src_vocab_size
    _add_argument(hparams, "src_vocab_size", src_vocab_size)
    _add_argument(hparams, "tgt_vocab_size", tgt_vocab_size)
    _add_argument(hparams, "src_vocab_file", src_vocab_file)
    _add_argument(hparams, "tgt_vocab_file", tgt_vocab_file)

    # Num embedding partitions
    _add_argument(hparams, "num_enc_emb_partitions",
                  hparams.num_embeddings_partitions)
    _add_argument(hparams, "num_dec_emb_partitions",
                  hparams.num_embeddings_partitions)

    # Pretrained Embeddings
    _add_argument(hparams, "src_embed_file", "")
    _add_argument(hparams, "tgt_embed_file", "")

    return hparams
Beispiel #10
0
    def _build_graph(self, hparams):
        dropout = hparams.dropout if self.mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0  ##??

        with tf.variable_scope("decoder_cell") as scope:
            # Cell Type
            if hparams.unit_type == "lstm":
                utils.print_out("  LSTM, forget_bias=%g" % hparams.forget_bias, new_line=False)
                cell = tf.contrib.rnn.BasicLSTMCell(
                    hparams.num_units,
                    forget_bias=hparams.forget_bias)
            elif hparams.unit_type == "gru":
                utils.print_out("  GRU", new_line=False)
                cell = tf.contrib.rnn.GRUCell(hparams.num_units)
            else:
                raise ValueError("Required decoder cell not supported!")

            # Wrap dropout to encoder cell
            if dropout > 0.0:
                cell = tf.contrib.rnn.DropoutWrapper(
                    cell=cell, input_keep_prob=(1.0 - dropout))

            # Add residual to encoder cell
            if hparams.residual:
                cell = tf.contrib.rnn.ResidualWrapper(cell)

            # Device Wrapper
            # if hparams.encoder_device:
            #     cell = tf.contrib.rnn.DeviceWrapper(cell, hparams.encoder_device)
            # self.decoder_scope = scope
            return cell
Beispiel #11
0
  def _build_model(self, hparams):
    """Builds a sequence-to-sequence model.

    Args:
      hparams: Hyperparameter configurations.

    Returns:
      For infrence, A tuple of the form
      (logits, decoder_cell_outputs, predicted_ids),
      where:
        logits: logits output of the decoder.
        decoder_cell_outputs: the output of decoder.
        predicted_ids: predicted ids from beam search.
      For training, returns the final loss

    Raises:
      ValueError: if encoder_type differs from mono and bi, or
        attention_option is not (luong | scaled_luong |
        bahdanau | normed_bahdanau).
    """
    # Encoder
    if hparams.language_model:  # no encoder for language modeling
      utils.print_out("  language modeling: no encoder")
      self.encoder_outputs = None
      encoder_state = None
    else:
      self.encoder_outputs, encoder_state = self._build_encoder(hparams)

    ## Decoder
    return self._build_decoder(self.encoder_outputs, encoder_state, hparams)
Beispiel #12
0
    def _get_learning_rate_decay(self, hparams):
        """Get learning rate decay."""
        if hparams.learning_rate_decay_scheme in ["luong", "luong10"]:
            start_factor = 2
            start_decay_step = int(hparams.num_train_steps / start_factor)
            decay_factor = 0.5

            # decay 5 times
            if hparams.learning_rate_decay_scheme == "luong":
                decay_steps = int(hparams.num_train_steps / (5 * start_factor))
            # decay 10 times
            elif hparams.learning_rate_decay_scheme == "luong10":
                decay_steps = int(hparams.num_train_steps /
                                  (10 * start_factor))
        else:
            start_decay_step = hparams.start_decay_step
            decay_steps = hparams.decay_steps
            decay_factor = hparams.decay_factor
        utils.print_out(
            "  decay_scheme=%s, start_decay_step=%d, decay_steps %d, "
            "decay_factor %g" %
            (hparams.learning_rate_decay_scheme, hparams.start_decay_step,
             hparams.decay_steps, hparams.decay_factor))

        return tf.cond(self.global_step < start_decay_step,
                       lambda: self.learning_rate,
                       lambda: tf.train.exponential_decay(self.learning_rate, (
                           self.global_step - start_decay_step),
                                                          decay_steps,
                                                          decay_factor,
                                                          staircase=True),
                       name="learning_rate_decay_cond")
Beispiel #13
0
def _cell_list(unit_type,
               num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               num_gpus,
               base_gpu=0,
               verbose=True):
    """Create a list of RNN cells."""
    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        if verbose:
            utils.print_out("  cell %d" % i, new_line=False)
        dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0  # Disable dropout outside training.
        single_cell = _single_cell(
            unit_type=unit_type,
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            residual_connection=(i >= num_layers - num_residual_layers
                                 ),  # Apply residual wrapper to last layers.
            device_str=get_device_str(
                i + base_gpu, num_gpus),  # Parallelize computation over GPUs
            verbose=verbose  # Whether to print to stdout
        )
        if verbose:
            utils.print_out("")  # create new line
        cell_list.append(single_cell)

    return cell_list
Beispiel #14
0
    def _compute_tower_grads(self,
                             tower_loss,
                             tower_params,
                             learning_rate,
                             use_fp16=False,
                             loss_scale=None,
                             colocate_gradients_with_ops=True):
        """docstring."""
        if use_fp16:
            assert loss_scale
            scaled_loss = tf.multiply(tower_loss,
                                      tf.convert_to_tensor(
                                          loss_scale, dtype=tower_loss.dtype),
                                      name="scaling_loss")
        else:
            scaled_loss = tower_loss

        opt = self.get_optimizer(self.hparams, learning_rate)
        grads_and_vars = opt.compute_gradients(
            scaled_loss,
            tower_params,
            colocate_gradients_with_ops=self.hparams.
            colocate_gradients_with_ops)
        grads = [x for (x, _) in grads_and_vars]
        assert grads
        for g in grads:
            assert g.dtype == tf.float32, "grad.dtype isn't fp32: %s" % g.name
        # Downscale grads
        for var, grad in zip(tower_params, grads):
            if grad is None:
                misc_utils.print_out("%s gradient is None!" % var.name)

        if use_fp16:
            grads = [grad * tf.reciprocal(loss_scale) for grad in grads]
        return tower_params, grads, opt
Beispiel #15
0
def ensure_compatible_hparams(hparams, default_hparams, hparams_path=""):
    """Make sure the loaded hparams is compatible with new changes."""
    default_hparams = utils.maybe_parse_standard_hparams(
        default_hparams, hparams_path)

    # Set num encoder/decoder layers (for old checkpoints)
    if hasattr(hparams, "num_layers"):
        if not hasattr(hparams, "num_encoder_layers"):
            hparams.add_hparam("num_encoder_layers", hparams.num_layers)
        if not hasattr(hparams, "num_decoder_layers"):
            hparams.add_hparam("num_decoder_layers", hparams.num_layers)

    # For compatible reason, if there are new fields in default_hparams,
    #   we add them to the current hparams
    default_config = default_hparams.values()
    config = hparams.values()
    for key in default_config:
        if key not in config:
            hparams.add_hparam(key, default_config[key])

    # Update all hparams' keys if override_loaded_hparams=True
    if getattr(default_hparams, "override_loaded_hparams", None):
        overwritten_keys = default_config.keys()
    else:
        # For inference
        overwritten_keys = INFERENCE_KEYS

    for key in overwritten_keys:
        if getattr(hparams, key) != default_config[key]:
            utils.print_out("# Updating hparams.%s: %s -> %s" %
                            (key, str(getattr(hparams, key)),
                             str(default_config[key])))
            setattr(hparams, key, default_config[key])
    return hparams
Beispiel #16
0
def _create_pretrained_emb_from_txt(vocab_file,
                                    embed_file,
                                    num_trainable_tokens=3,
                                    dtype=tf.float32,
                                    scope=None):
    """Load pretrain embeding from embed_file, and return an embedding matrix.

    Args:
      embed_file: Path to a Glove formated embedding txt file.
      num_trainable_tokens: Make the first n tokens in the vocab file as
      trainable
        variables. Default is 3, which is "<unk>", "<s>" and "</s>".
    """
    vocab, _ = vocab_utils.load_vocab(vocab_file)

    utils.print_out("# Using pretrained embedding: %s." % embed_file)
    utils.print_out("  with trainable tokens: ")

    emb_dict, emb_size = vocab_utils.load_embed_txt(embed_file)
    # Made a change to add all words present in vocab but not in the
    # pretrained embedding. Previous behaviours seems illogical
    for token in vocab:
        if token not in emb_dict:
            emb_dict[token] = [0.0] * emb_size

    emb_mat = np.array([emb_dict[token] for token in vocab],
                       dtype=dtype.as_numpy_dtype())
    emb_mat = tf.constant(emb_mat)
    emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1])
    with tf.variable_scope(scope or "pretrain_embeddings",
                           dtype=dtype) as scope:
        with tf.device(_get_embed_device(num_trainable_tokens)):
            emb_mat_var = tf.get_variable("emb_mat_var",
                                          [num_trainable_tokens, emb_size])
    return tf.concat([emb_mat_var, emb_mat_const], 0)
def extend_hparams(hparams):
    """Extend training hparams."""

    # Set num_residual_layers
    if hparams.residual and hparams.num_layers > 1:
        num_residual_layers = hparams.num_layers - 1
    else:
        num_residual_layers = 0
    hparams.add_hparam("num_residual_layers", num_residual_layers)
    print("hparams.vocab_file", hparams.vocab_file)
    hparams.add_hparam("vocab_size",
                       vocab_utils.get_vocab_size(hparams.vocab_file))
    hparams.add_hparam("t1", vocab_utils.start_of_turn1)
    hparams.add_hparam("t2", vocab_utils.start_of_turn2)
    hparams.add_hparam("eod", vocab_utils.end_of_dialogue)
    hparams.add_hparam("unk", vocab_utils.UNK)

    # Check out_dir
    if not tf.gfile.Exists(hparams.out_dir):
        utils.print_out("# Creating output directory %s ..." % hparams.out_dir)
        tf.gfile.MakeDirs(hparams.out_dir)
    # Evaluation
    for metric in hparams.metrics:
        hparams.add_hparam("best_" + metric, 0)  # larger is better
        best_metric_dir = os.path.join(hparams.out_dir, "best_" + metric)
        hparams.add_hparam("best_" + metric + "_dir", best_metric_dir)
        tf.gfile.MakeDirs(best_metric_dir)

    # path
    if not hparams.inference_output_file:
        # If not set, it will be set to inference_out.txt under variable out_dir
        hparams.inference_output_file = os.path.join(hparams.out_dir,
                                                     "inference_out.txt")
    return hparams
Beispiel #18
0
def before_train(train_model, train_sess, global_step, hparams, log_f,
                 tensor_or_op_name_to_replica_names):
    """Misc tasks to do before training."""
    stats = train.init_stats()
    lr_name = train_model.model.learning_rate.name
    assert len(tensor_or_op_name_to_replica_names[lr_name]) == 1
    lr = train_sess.run(tensor_or_op_name_to_replica_names[lr_name][0])
    info = {
        "train_ppl": 0.0,
        "speed": 0.0,
        "avg_step_time": 0.0,
        "avg_grad_norm": 0.0,
        "learning_rate": lr
    }
    start_train_time = time.time()
    utils.print_out(
        "# Start step %d, lr %g, %s" %
        (global_step, info["learning_rate"], time.ctime()), log_f)

    # Initialize all of the iterators
    skip_count = hparams.batch_size * hparams.epoch_step
    utils.print_out("# Init train iterator, skipping %d elements" % skip_count)
    skip_count_name = train_model.skip_count_placeholder.name
    feed_dict = {}
    num_skip_counts = len(tensor_or_op_name_to_replica_names[skip_count_name])
    for i in range(num_skip_counts):
        feed_dict[tensor_or_op_name_to_replica_names[skip_count_name][i]] = 0
    initializers = []
    init_name = train_model.iterator.initializer.name
    num_initializers = len(tensor_or_op_name_to_replica_names[init_name])
    for i in range(num_initializers):
        initializers.append(tensor_or_op_name_to_replica_names[init_name][i])
    train_sess.run(initializers, feed_dict=feed_dict)
    return stats, info, start_train_time
Beispiel #19
0
def print_variables_in_ckpt(ckpt_path):
    """Print a list of variables in a checkpoint together with their shapes."""
    utils.print_out("# Variables in ckpt %s" % ckpt_path)
    reader = tf.train.NewCheckpointReader(ckpt_path)
    variable_map = reader.get_variable_to_shape_map()
    for key in sorted(variable_map.keys()):
        utils.print_out("  %s: %s" % (key, variable_map[key]))
Beispiel #20
0
def tokenize(hparams, file, tokenized_file):
    utils.print_out("tokenizing {} -> {}".format(file, tokenized_file))
    with open(file, 'rb') as input_file:
        with open(tokenized_file, 'wb') as output_file:
            subprocess.run([hparams.tokenizer_file, '-l', hparams.src],
                           stdin=input_file,
                           stdout=output_file)
def _single_cell(num_units,
                 forget_bias,
                 dropout,
                 mode,
                 residual_connection=False,
                 residual_fn=None,
                 global_step=None,
                 fast_reverse=False,
                 seq_len=None):
    """Create an instance of a single RNN cell."""
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0

    # Cell Type
    utils.print_out("  LSTM, forget_bias=%g" % forget_bias, new_line=False)
    single_cell = tf.contrib.rnn.BasicLSTMCell(num_units,
                                               forget_bias=forget_bias)

    # Dropout (= 1 - keep_prob)
    enabled = (mode == tf.contrib.learn.ModeKeys.TRAIN
               ) or dropout > 0.0 or fast_reverse
    single_cell = CellWrapper(cell=single_cell,
                              input_keep_prob=(1.0 - dropout),
                              global_step=global_step,
                              seq_len=seq_len,
                              enabled=enabled)

    # Residual
    if residual_connection:
        single_cell = tf.contrib.rnn.ResidualWrapper(single_cell,
                                                     residual_fn=residual_fn)
        utils.print_out("  %s" % type(single_cell).__name__, new_line=False)

    return single_cell
def before_train(train_model, train_sess, global_step, hparams, log_f,
                 num_replicas_per_worker):
    """Misc tasks to do before training."""
    stats = train.init_stats()
    lr = train_sess.run(train_model.model.learning_rate)[0]
    info = {
        "train_ppl": 0.0,
        "speed": 0.0,
        "avg_step_time": 0.0,
        "avg_grad_norm": 0.0,
        "learning_rate": lr
    }
    start_train_time = time.time()
    utils.print_out(
        "# Start step %d, lr %g, %s" %
        (global_step, info["learning_rate"], time.ctime()), log_f)

    # Initialize all of the iterators
    skip_count = hparams.batch_size * hparams.epoch_step
    utils.print_out("# Init train iterator, skipping %d elements" % skip_count)
    skip_count = train_model.skip_count_placeholder
    feed_dict = {}
    feed_dict[skip_count] = [0 for i in range(num_replicas_per_worker)]
    initializers = []
    init = train_model.iterator.initializer
    train_sess.run(init, feed_dict=feed_dict)
    return stats, info, start_train_time
Beispiel #23
0
def before_train(loaded_train_model, train_model, train_sess, global_step,
                 hparams, log_f):
    """Misc tasks to do before training."""
    stats = init_stats()
    info = {
        "train_ppl": 0.0,
        "speed": 0.0,
        "avg_step_time": 0.0,
        "avg_grad_norm": 0.0,
        "avg_sequence_count": 0.0,
        "learning_rate":
        loaded_train_model.learning_rate.eval(session=train_sess)
    }
    start_train_time = time.time()
    utils.print_out(
        "# Start step %d, lr %g, %s" %
        (global_step, info["learning_rate"], time.ctime()), log_f)

    # Initialize all of the iterators
    skip_count = hparams.batch_size * hparams.epoch_step
    utils.print_out("# Init train iterator, skipping %d elements" % skip_count)
    train_sess.run(train_model.iterator.initializer,
                   feed_dict={train_model.skip_count_placeholder: skip_count})

    return stats, info, start_train_time
Beispiel #24
0
def _cell_list(unit_type,
               num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               dtype=None,
               single_cell_fn=None,
               residual_fn=None,
               use_block_lstm=False):
    """Create a list of RNN cells."""
    if not single_cell_fn:
        single_cell_fn = _single_cell

    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        utils.print_out("  cell %d" % i, new_line=False)
        single_cell = single_cell_fn(
            unit_type=unit_type,
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            mode=mode,
            dtype=dtype,
            residual_connection=(i >= num_layers - num_residual_layers),
            residual_fn=residual_fn,
            use_block_lstm=use_block_lstm)
        utils.print_out("")
        cell_list.append(single_cell)

    return cell_list
Beispiel #25
0
def print_step_info(prefix, global_step, info, result_summary, log_f):
    """Print all info at the current global step."""
    utils.print_out(
        "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" %
        (prefix, global_step, info["learning_rate"], info["avg_step_time"],
         info["speed"], info["train_ppl"], info["avg_grad_norm"],
         result_summary, time.ctime()), log_f)
def _build_encoder(model, encoder_emb_inp, hparams):
    """Build an seq2seq encoder."""
    num_layers = hparams.num_layers
    num_residual_layers = hparams.num_residual_layers

    iterator = model.iterator

    with tf.variable_scope("encoder") as scope:
        dtype = scope.dtype

        # Encoder_outpus: [max_time, batch_size, num_units]
        utils.print_out("  num_layers = %d, num_residual_layers=%d" %
                        (num_layers, num_residual_layers))
        cell = _build_encoder_cell(model,
                                   hparams,
                                   num_layers,
                                   num_residual_layers,
                                   base_gpu=model.global_gpu_num,
                                   all_layer_outputs=True)
        model.global_gpu_num += num_layers

        encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
            cell,
            encoder_emb_inp,
            dtype=dtype,
            sequence_length=iterator.dialogue_len,
            time_major=False,
            swap_memory=True)
    return encoder_outputs, encoder_state
Beispiel #27
0
def _cell_list(unit_type,
               num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               num_gpus,
               base_gpu=0,
               single_cell_fn=None,
               residual_fn=None):
    """Create a list of RNN cells."""
    if not single_cell_fn:
        single_cell_fn = _single_cell

    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        utils.print_out("  cell %d" % i, new_line=False)
        single_cell = single_cell_fn(
            unit_type=unit_type,
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            mode=mode,
            residual_connection=(i >= num_layers - num_residual_layers),
            device_str=get_device_str(i + base_gpu, num_gpus),
            residual_fn=residual_fn)
        utils.print_out("")
        cell_list.append(single_cell)

    return cell_list
Beispiel #28
0
def run_main(flags, default_hparams, train_fn, inference_fn):
    """Run main."""
    # Random
    random_seed = flags.random_seed
    if random_seed is not None and random_seed > 0:
        utils.print_out("# Set random seed to %d" % random_seed)
        random.seed(random_seed)
        np.random.seed(random_seed)

    ## Train / Decode
    out_dir = flags.out_dir
    if not tf.gfile.Exists(out_dir): tf.gfile.MakeDirs(out_dir)

    hparams = create_or_load_hparams(out_dir, default_hparams)

    if flags.inference_input_file:
        # Inference
        ckpt = flags.ckpt
        if not ckpt:
            ckpt = tf.train.latest_checkpoint(out_dir)
        inference_fn(ckpt, flags.inference_input_file,
                     flags.inference_output_file, hparams)
    else:
        # Train
        train_fn(default_hparams)
Beispiel #29
0
def ensure_compatible_hparams(hparams, default_hparams, hparams_path):
    """Make sure the loaded hparams is compatible with new changes."""
    default_hparams = utils.maybe_parse_standard_hparams(
        default_hparams, hparams_path)

    # For compatible reason, if there are new fields in default_hparams,
    #   we add them to the current hparams
    default_config = default_hparams.values()
    config = hparams.values()
    for key in default_config:
        if key not in config:
            hparams.add_hparam(key, default_config[key])

    # Make sure that the loaded model has latest values for the below keys
    updated_keys = [
        "out_dir", "num_gpus", "test_prefix", "beam_width",
        "length_penalty_weight", "num_train_steps"
    ]
    for key in updated_keys:
        if key in default_config and getattr(hparams,
                                             key) != default_config[key]:
            utils.print_out(
                "# Updating hparams.%s: %s -> %s" %
                (key, str(getattr(hparams, key)), str(default_config[key])))
            setattr(hparams, key, default_config[key])
    return hparams
Beispiel #30
0
def single_worker_inference(infer_model,
                            ckpt,
                            inference_input_file,
                            inference_output_file,
                            hparams):
    """Inference with a single worker."""
    output_infer = inference_output_file

    # Read data
    infer_data = load_data(inference_input_file, hparams)

    with tf.Session(config=utils.get_config_proto(), graph=infer_model.graph) as sess:
        loaded_infer_model = model_helper.load_model(infer_model.model, ckpt, sess, "infer")
        sess.run(infer_model.iterator.initializer,
                 feed_dict={
                     infer_model.src_placeholder: infer_data,
                     infer_model.batch_size_placeholder: hparams.infer_batch_size
                 })
        # Decode
        utils.print_out("# Start decoding")
        _decode_and_evaluate("infer",
                             loaded_infer_model,
                             sess,
                             output_infer,
                             ref_file=None,
                             subword_option=None,
                             beam_width=hparams.beam_width,
                             tgt_eos=hparams.eos,
                             num_translations_per_input=hparams.num_translations_per_input)