コード例 #1
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def load_model(model, ckpt, session, name):
    start_time = time.time()
    model.saver.restore(session, ckpt)
    session.run(tf.tables_initializer())
    utils.print_out("  loaded %s model parameters from %s, time %.2fs" %
                    (name, ckpt, time.time() - start_time))
    return model
コード例 #2
0
    def _get_learning_rate_decay(self, hparams):
        """Get learning rate decay."""
        if hparams.decay_scheme in ["luong5", "luong10", "luong234"]:
            decay_factor = 0.5
            if hparams.decay_scheme == "luong5":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 5
            elif hparams.decay_scheme == "luong10":
                start_decay_step = int(hparams.num_train_steps / 2)
                decay_times = 10
            elif hparams.decay_scheme == "luong234":
                start_decay_step = int(hparams.num_train_steps * 2 / 3)
                decay_times = 4
            remain_steps = hparams.num_train_steps - start_decay_step
            decay_steps = int(remain_steps / decay_times)
        elif not hparams.decay_scheme:  # no decay
            start_decay_step = hparams.num_train_steps
            decay_steps = 0
            decay_factor = 1.0
        elif hparams.decay_scheme:
            raise ValueError("Unknown decay scheme %s" % hparams.decay_scheme)
        utils.print_out(
            "  decay_scheme=%s, start_decay_step=%d, decay_steps %d, "
            "decay_factor %g" % (hparams.decay_scheme, start_decay_step,
                                 decay_steps, decay_factor))

        return tf.cond(self.global_step < start_decay_step,
                       lambda: self.learning_rate,
                       lambda: tf.train.exponential_decay(self.learning_rate, (
                           self.global_step - start_decay_step),
                                                          decay_steps,
                                                          decay_factor,
                                                          staircase=True),
                       name="learning_rate_decay_cond")
コード例 #3
0
def decode_and_evaluate(name,
                        model,
                        sess,
                        trans_file,
                        ref_file,
                        metrics,
                        subword_option,
                        beam_width,
                        tgt_eos,
                        num_translations_per_input=1,
                        decode=True):
  """Decode a test set and compute a score according to the evaluation task."""
  # Decode
  if decode:
    utils.print_out("  decoding to output %s." % trans_file)

    start_time = time.time()
    num_sentences = 0
    with codecs.getwriter("utf-8")(
        tf.gfile.GFile(trans_file, mode="wb")) as trans_f:
      trans_f.write("")  # Write empty string to ensure file is created.

      num_translations_per_input = max(
          min(num_translations_per_input, beam_width), 1)
      while True:
        try:
          nmt_outputs, _ = model.decode(sess)
          if beam_width == 0:
            nmt_outputs = np.expand_dims(nmt_outputs, 0)

          batch_size = nmt_outputs.shape[1]
          num_sentences += batch_size

          for sent_id in range(batch_size):
            for beam_id in range(num_translations_per_input):
              translation = get_translation(
                  nmt_outputs[beam_id],
                  sent_id,
                  tgt_eos=tgt_eos,
                  subword_option=subword_option)
              trans_f.write((translation + b"\n").decode("utf-8"))
        except tf.errors.OutOfRangeError:
          utils.print_time(
              "  done, num sentences %d, num translations per input %d" %
              (num_sentences, num_translations_per_input), start_time)
          break

  # Evaluation
  evaluation_scores = {}
  if ref_file and tf.gfile.Exists(trans_file):
    for metric in metrics:
      score = evaluation_utils.evaluate(
          ref_file,
          trans_file,
          metric,
          subword_option=subword_option)
      evaluation_scores[metric] = score
      utils.print_out("  %s %s: %.1f" % (metric, name, score))

  return evaluation_scores
コード例 #4
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def _cell_list(unit_type,
               num_units,
               num_layers,
               num_residual_layers,
               forget_bias,
               dropout,
               mode,
               num_gpus,
               base_gpu=0,
               single_cell_fn=None,
               residual_fn=None):
    """Create a list of RNN cells."""
    if not single_cell_fn:
        single_cell_fn = _single_cell

    # Multi-GPU
    cell_list = []
    for i in range(num_layers):
        utils.print_out("  cell %d" % i, new_line=False)
        single_cell = single_cell_fn(
            unit_type=unit_type,
            num_units=num_units,
            forget_bias=forget_bias,
            dropout=dropout,
            mode=mode,
            residual_connection=(i >= num_layers - num_residual_layers),
            device_str=get_device_str(i + base_gpu, num_gpus),
            residual_fn=residual_fn)
        utils.print_out("")
        cell_list.append(single_cell)

    return cell_list
コード例 #5
0
 def _get_infer_maximum_iterations(self, hparams, source_sequence_length):
     """Maximum decoding steps at inference time."""
     if hparams.tgt_max_len_infer:
         maximum_iterations = hparams.tgt_max_len_infer
         utils.print_out("  decoding maximum_iterations %d" %
                         maximum_iterations)
     else:
         # TODO(thangluong): add decoding_length_factor flag
         decoding_length_factor = 2.0
         max_encoder_length = tf.reduce_max(source_sequence_length)
         maximum_iterations = tf.to_int32(
             tf.round(
                 tf.to_float(max_encoder_length) * decoding_length_factor))
     return maximum_iterations
コード例 #6
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def create_or_load_model(model, model_dir, session, name):
    """Create translation model and initialize or load parameters in session."""
    latest_ckpt = tf.train.latest_checkpoint(model_dir)
    if latest_ckpt:
        model = load_model(model, latest_ckpt, session, name)
    else:
        start_time = time.time()
        session.run(tf.global_variables_initializer())
        session.run(tf.tables_initializer())
        utils.print_out(
            "  created %s model with fresh parameters, time %.2fs" %
            (name, time.time() - start_time))

    global_step = model.global_step.eval(session=session)
    return model, global_step
コード例 #7
0
    def build_graph(self, hparams, scope=None):
        """Subclass must implement this method.

        Creates a sequence-to-sequence model with dynamic RNN decoder API.
        Args:
          hparams: Hyperparameter configurations.
          scope: VariableScope for the created subgraph; default "dynamic_seq2seq".

        Returns:
          A tuple of the form (logits, loss, final_context_state, sample_id),
          where:
            logits: float32 Tensor [batch_size x num_decoder_symbols].
            loss: the total loss / batch_size.
            final_context_state: The final state of decoder RNN.
            sample_id: the ids of decoder sequence

        Raises:
          ValueError: if encoder_type differs from mono and bi, or
            attention_option is not (luong | scaled_luong |
            bahdanau | normed_bahdanau).
        """
        utils.print_out("# creating %s graph ..." % self.mode)
        dtype = tf.float32

        with tf.variable_scope(scope or "dynamic_seq2seq", dtype=dtype):
            # Encoder
            encoder_outputs, encoder_state = self._build_encoder(hparams)

            ## Decoder
            logits, sample_id, final_context_state = self._build_decoder(
                encoder_outputs, encoder_state, hparams)

            ## Loss infer 的时候不计算loss,提高计算速度
            if self.mode != tf.contrib.learn.ModeKeys.INFER:
                with tf.device(
                        model_helper.get_device_str(
                            self.num_encoder_layers - 1, self.num_gpus)):
                    loss = self._compute_loss(logits)
            else:
                loss = None

            return logits, loss, final_context_state, sample_id
コード例 #8
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def _create_pretrained_emb_from_txt(vocab_file,
                                    embed_file,
                                    num_trainable_tokens=3,
                                    dtype=tf.float32,
                                    scope=None):
    """Load pretrain embeding from embed_file, and return an embedding matrix.

  Args:
    embed_file: Path to a Glove formated embedding txt file.
    num_trainable_tokens: Make the first n tokens in the vocab file as trainable
      variables. Default is 3, which is "<unk>", "<s>" and "</s>".
  """
    vocab, _ = vocab_utils.load_vocab(vocab_file)
    trainable_tokens = vocab[:num_trainable_tokens]

    utils.print_out("# Using pretrained embedding: %s." % embed_file)
    utils.print_out("  with trainable tokens: ")

    emb_dict, emb_size = vocab_utils.load_embed_txt(embed_file)
    for token in trainable_tokens:
        utils.print_out("    %s" % token)
        if token not in emb_dict:
            emb_dict[token] = [0.0] * emb_size

    emb_mat = np.array([emb_dict[token] for token in vocab],
                       dtype=dtype.as_numpy_dtype())
    emb_mat = tf.constant(emb_mat)
    emb_mat_const = tf.slice(emb_mat, [num_trainable_tokens, 0], [-1, -1])
    with tf.variable_scope(scope or "pretrain_embeddings",
                           dtype=dtype) as scope:
        with tf.device(_get_embed_device(num_trainable_tokens)):
            emb_mat_var = tf.get_variable("emb_mat_var",
                                          [num_trainable_tokens, emb_size])
    return tf.concat([emb_mat_var, emb_mat_const], 0)
コード例 #9
0
    def _get_learning_rate_warmup(self, hparams):
        """Get learning rate warmup."""
        warmup_steps = hparams.warmup_steps
        warmup_scheme = hparams.warmup_scheme
        utils.print_out(
            "  learning_rate=%g, warmup_steps=%d, warmup_scheme=%s" %
            (hparams.learning_rate, warmup_steps, warmup_scheme))

        # Apply inverse decay if global steps less than warmup steps.
        # Inspired by https://arxiv.org/pdf/1706.03762.pdf (Section 5.3)
        # When step < warmup_steps,
        #   learing_rate *= warmup_factor ** (warmup_steps - step)
        if warmup_scheme == "t2t":
            # 0.01^(1/warmup_steps): we start with a lr, 100 times smaller
            warmup_factor = tf.exp(tf.log(0.01) / warmup_steps)
            inv_decay = warmup_factor**(tf.to_float(warmup_steps -
                                                    self.global_step))
        else:
            raise ValueError("Unknown warmup scheme %s" % warmup_scheme)

        return tf.cond(self.global_step < hparams.warmup_steps,
                       lambda: inv_decay * self.learning_rate,
                       lambda: self.learning_rate,
                       name="learning_rate_warump_cond")
コード例 #10
0
    def _build_encoder(self, hparams):
        """Build an encoder."""
        num_layers = self.num_encoder_layers
        num_residual_layers = self.num_encoder_residual_layers
        iterator = self.iterator

        source = iterator.source
        if self.time_major:
            source = tf.transpose(source)

        with tf.variable_scope("encoder") as scope:
            dtype = scope.dtype
            # Look up embedding, emp_inp: [max_time, batch_size, num_units]
            encoder_emb_inp = tf.nn.embedding_lookup(self.embedding_encoder,
                                                     source)

            # Encoder_outputs: [max_time, batch_size, num_units]
            if hparams.encoder_type == "uni":
                utils.print_out("  num_layers = %d, num_residual_layers=%d" %
                                (num_layers, num_residual_layers))
                cell = self._build_encoder_cell(hparams, num_layers,
                                                num_residual_layers)

                encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                    cell,
                    encoder_emb_inp,
                    dtype=dtype,
                    sequence_length=iterator.source_sequence_length,
                    time_major=self.time_major,
                    swap_memory=True)
            elif hparams.encoder_type == "bi":
                num_bi_layers = int(num_layers / 2)
                num_bi_residual_layers = int(num_residual_layers / 2)
                utils.print_out(
                    "  num_bi_layers = %d, num_bi_residual_layers=%d" %
                    (num_bi_layers, num_bi_residual_layers))

                encoder_outputs, bi_encoder_state = (
                    self._build_bidirectional_rnn(
                        inputs=encoder_emb_inp,
                        sequence_length=iterator.source_sequence_length,
                        dtype=dtype,
                        hparams=hparams,
                        num_bi_layers=num_bi_layers,
                        num_bi_residual_layers=num_bi_residual_layers))

                if num_bi_layers == 1:
                    encoder_state = bi_encoder_state
                else:
                    # alternatively concat forward and backward states
                    encoder_state = []
                    for layer_id in range(num_bi_layers):
                        encoder_state.append(
                            bi_encoder_state[0][layer_id])  # forward
                        encoder_state.append(
                            bi_encoder_state[1][layer_id])  # backward
                    encoder_state = tuple(encoder_state)
            else:
                raise ValueError("Unknown encoder_type %s" %
                                 hparams.encoder_type)
        return encoder_outputs, encoder_state
コード例 #11
0
    def __init__(self,
                 hparams,
                 mode,
                 iterator,
                 source_vocab_table,
                 target_vocab_table,
                 reverse_target_vocab_table=None,
                 scope=None,
                 extra_args=None):
        """Create the model.

        Args:
          hparams: Hyperparameter configurations.
          mode: TRAIN | EVAL | INFER
          iterator: Dataset Iterator that feeds data.
          source_vocab_table: Lookup table mapping source words to ids.
          target_vocab_table: Lookup table mapping target words to ids.
          reverse_target_vocab_table: Lookup table mapping ids to target words. Only
            required in INFER mode. Defaults to None.
          scope: scope of the model.
          extra_args: model_helper.ExtraArgs, for passing customizable functions.

        """
        assert isinstance(iterator, iterator_utils.BatchedInput)
        self.iterator = iterator
        self.mode = mode
        self.src_vocab_table = source_vocab_table
        self.tgt_vocab_table = target_vocab_table
        # source 词库大小
        self.src_vocab_size = hparams.src_vocab_size
        # target 词库大小
        self.tgt_vocab_size = hparams.tgt_vocab_size
        self.num_gpus = hparams.num_gpus
        self.time_major = hparams.time_major

        # extra_args: to make it flexible for adding external customizable code
        self.single_cell_fn = None
        if extra_args:  # 单一的RNNs
            self.single_cell_fn = extra_args.single_cell_fn

        # Set num layers of encoder layer and decoder layer
        self.num_encoder_layers = hparams.num_encoder_layers
        self.num_decoder_layers = hparams.num_decoder_layers
        assert self.num_encoder_layers
        assert self.num_decoder_layers

        # Set num residual layers 剩余???
        if hasattr(hparams,
                   "num_residual_layers"):  # compatible common_test_utils
            self.num_encoder_residual_layers = hparams.num_residual_layers
            self.num_decoder_residual_layers = hparams.num_residual_layers
        else:
            self.num_encoder_residual_layers = hparams.num_encoder_residual_layers
            self.num_decoder_residual_layers = hparams.num_decoder_residual_layers

        # Initializer 初始化随机种子等
        initializer = model_helper.get_initializer(hparams.init_op,
                                                   hparams.random_seed,
                                                   hparams.init_weight)
        tf.get_variable_scope().set_initializer(initializer)

        # Embeddings 逻辑实现包含:加载预训练的词向量或者在图中加入embedding layer
        self.init_embeddings(hparams, scope)
        # batch_size
        self.batch_size = tf.size(self.iterator.source_sequence_length)

        # Projection 定义decoder层输出的投影层FC,进行向量维度转化
        with tf.variable_scope(scope or "build_network"):
            with tf.variable_scope("decoder/output_projection"):
                self.output_layer = layers_core.Dense(hparams.tgt_vocab_size,
                                                      use_bias=False,
                                                      name="output_projection")

        ## Train graph build seq2seq model to train, 并且返回decoder结果
        res = self.build_graph(
            hparams, scope=scope)  # (logits, loss, final_context_state)

        # 如果是训练模式
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.train_loss = res[1]
            self.word_count = tf.reduce_sum(  # 总的词数量
                self.iterator.source_sequence_length) + tf.reduce_sum(
                    self.iterator.target_sequence_length)
        elif self.mode == tf.contrib.learn.ModeKeys.EVAL:
            self.eval_loss = res[1]
        elif self.mode == tf.contrib.learn.ModeKeys.INFER:
            self.infer_logits, _, self.final_context_state, self.sample_id = res
            # 将decoer得到的id结果转化为string 格式
            self.sample_words = reverse_target_vocab_table.lookup(
                tf.to_int64(self.sample_id))

        if self.mode != tf.contrib.learn.ModeKeys.INFER:
            ## Count the number of predicted words for compute ppl.
            self.predict_count = tf.reduce_sum(
                self.iterator.target_sequence_length)

        self.global_step = tf.Variable(0, trainable=False)
        params = tf.trainable_variables()

        # Gradients and SGD update operation for training the model.
        # Arrage for the embedding vars to appear at the beginning.
        if self.mode == tf.contrib.learn.ModeKeys.TRAIN:
            self.learning_rate = tf.constant(hparams.learning_rate)
            # warm-up
            self.learning_rate = self._get_learning_rate_warmup(hparams)
            # decay
            self.learning_rate = self._get_learning_rate_decay(hparams)

            # Optimizer
            if hparams.optimizer == "sgd":
                opt = tf.train.GradientDescentOptimizer(self.learning_rate)
                tf.summary.scalar("lr", self.learning_rate)
            elif hparams.optimizer == "adam":
                opt = tf.train.AdamOptimizer(self.learning_rate)

            # Gradients
            gradients = tf.gradients(self.train_loss,
                                     params,
                                     colocate_gradients_with_ops=hparams.
                                     colocate_gradients_with_ops)
            # 梯度截断
            clipped_grads, grad_norm_summary, grad_norm = model_helper.gradient_clip(
                gradients, max_gradient_norm=hparams.max_gradient_norm)
            self.grad_norm = grad_norm

            self.update = opt.apply_gradients(zip(clipped_grads, params),
                                              global_step=self.global_step)

            # Summary
            self.train_summary = tf.summary.merge([
                tf.summary.scalar("lr", self.learning_rate),
                tf.summary.scalar("train_loss", self.train_loss),
            ] + grad_norm_summary)

        if self.mode == tf.contrib.learn.ModeKeys.INFER:
            self.infer_summary = self._get_infer_summary(hparams)

        # Saver
        self.saver = tf.train.Saver(tf.global_variables(),
                                    max_to_keep=hparams.num_keep_ckpts)

        # Print trainable variables
        utils.print_out("# Trainable variables")
        for param in params:
            utils.print_out(
                "  %s, %s, %s" %
                (param.name, str(param.get_shape()), param.op.device))
コード例 #12
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def avg_checkpoints(model_dir, num_last_checkpoints, global_step,
                    global_step_name):
    """Average the last N checkpoints in the model_dir."""
    checkpoint_state = tf.train.get_checkpoint_state(model_dir)
    if not checkpoint_state:
        utils.print_out("# No checkpoint file found in directory: %s" %
                        model_dir)
        return None

    # Checkpoints are ordered from oldest to newest.
    checkpoints = (
        checkpoint_state.all_model_checkpoint_paths[-num_last_checkpoints:])

    if len(checkpoints) < num_last_checkpoints:
        utils.print_out(
            "# Skipping averaging checkpoints because not enough checkpoints is "
            "avaliable.")
        return None

    avg_model_dir = os.path.join(model_dir, "avg_checkpoints")
    if not tf.gfile.Exists(avg_model_dir):
        utils.print_out(
            "# Creating new directory %s for saving averaged checkpoints." %
            avg_model_dir)
        tf.gfile.MakeDirs(avg_model_dir)

    utils.print_out("# Reading and averaging variables in checkpoints:")
    var_list = tf.contrib.framework.list_variables(checkpoints[0])
    var_values, var_dtypes = {}, {}
    for (name, shape) in var_list:
        if name != global_step_name:
            var_values[name] = np.zeros(shape)

    for checkpoint in checkpoints:
        utils.print_out("    %s" % checkpoint)
        reader = tf.contrib.framework.load_checkpoint(checkpoint)
        for name in var_values:
            tensor = reader.get_tensor(name)
            var_dtypes[name] = tensor.dtype
            var_values[name] += tensor

    for name in var_values:
        var_values[name] /= len(checkpoints)

    # Build a graph with same variables in the checkpoints, and save the averaged
    # variables into the avg_model_dir.
    with tf.Graph().as_default():
        tf_vars = [
            tf.get_variable(v,
                            shape=var_values[v].shape,
                            dtype=var_dtypes[name]) for v in var_values
        ]

        placeholders = [
            tf.placeholder(v.dtype, shape=v.shape) for v in tf_vars
        ]
        assign_ops = [tf.assign(v, p) for (v, p) in zip(tf_vars, placeholders)]
        global_step_var = tf.Variable(global_step,
                                      name=global_step_name,
                                      trainable=False)
        saver = tf.train.Saver(tf.all_variables())

        with tf.Session() as sess:
            sess.run(tf.initialize_all_variables())
            for p, assign_op, (name, value) in zip(placeholders, assign_ops,
                                                   six.iteritems(var_values)):
                sess.run(assign_op, {p: value})

            # Use the built saver to save the averaged checkpoint. Only keep 1
            # checkpoint and the best checkpoint will be moved to avg_best_metric_dir.
            saver.save(sess, os.path.join(avg_model_dir, "translate.ckpt"))

    return avg_model_dir
コード例 #13
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def _single_cell(unit_type,
                 num_units,
                 forget_bias,
                 dropout,
                 mode,
                 residual_connection=False,
                 device_str=None,
                 residual_fn=None):
    """Create an instance of a single RNN cell."""
    # dropout (= 1 - keep_prob) is set to 0 during eval and infer
    dropout = dropout if mode == tf.contrib.learn.ModeKeys.TRAIN else 0.0

    # Cell Type
    if unit_type == "lstm":
        utils.print_out("  LSTM, forget_bias=%g" % forget_bias, new_line=False)
        single_cell = tf.contrib.rnn.BasicLSTMCell(num_units,
                                                   forget_bias=forget_bias)
    elif unit_type == "gru":
        utils.print_out("  GRU", new_line=False)
        single_cell = tf.contrib.rnn.GRUCell(num_units)
    elif unit_type == "layer_norm_lstm":
        utils.print_out("  Layer Normalized LSTM, forget_bias=%g" %
                        forget_bias,
                        new_line=False)
        single_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
            num_units, forget_bias=forget_bias, layer_norm=True)
    elif unit_type == "nas":
        utils.print_out("  NASCell", new_line=False)
        single_cell = tf.contrib.rnn.NASCell(num_units)
    else:
        raise ValueError("Unknown unit type %s!" % unit_type)

    # Dropout (= 1 - keep_prob)
    if dropout > 0.0:
        single_cell = tf.contrib.rnn.DropoutWrapper(cell=single_cell,
                                                    input_keep_prob=(1.0 -
                                                                     dropout))
        utils.print_out("  %s, dropout=%g " %
                        (type(single_cell).__name__, dropout),
                        new_line=False)

    # Residual
    if residual_connection:
        single_cell = tf.contrib.rnn.ResidualWrapper(single_cell,
                                                     residual_fn=residual_fn)
        utils.print_out("  %s" % type(single_cell).__name__, new_line=False)

    # Device Wrapper
    if device_str:
        single_cell = tf.contrib.rnn.DeviceWrapper(single_cell, device_str)
        utils.print_out("  %s, device=%s" %
                        (type(single_cell).__name__, device_str),
                        new_line=False)

    return single_cell
コード例 #14
0
ファイル: model_helper.py プロジェクト: ylwctyt/MQNLP
def create_emb_for_encoder_and_decoder(share_vocab,
                                       src_vocab_size,
                                       tgt_vocab_size,
                                       src_embed_size,
                                       tgt_embed_size,
                                       dtype=tf.float32,
                                       num_partitions=0,
                                       src_vocab_file=None,
                                       tgt_vocab_file=None,
                                       src_embed_file=None,
                                       tgt_embed_file=None,
                                       scope=None):
    """Create embedding matrix for both encoder and decoder.

  Args:
    share_vocab: A boolean. Whether to share embedding matrix for both
      encoder and decoder.
    src_vocab_size: An integer. The source vocab size.
    tgt_vocab_size: An integer. The target vocab size.
    src_embed_size: An integer. The embedding dimension for the encoder's
      embedding.
    tgt_embed_size: An integer. The embedding dimension for the decoder's
      embedding.
    dtype: dtype of the embedding matrix. Default to float32.
    num_partitions: number of partitions used for the embedding vars.
    scope: VariableScope for the created subgraph. Default to "embedding".

  Returns:
    embedding_encoder: Encoder's embedding matrix.
    embedding_decoder: Decoder's embedding matrix.

  Raises:
    ValueError: if use share_vocab but source and target have different vocab
      size.
  """

    if num_partitions <= 1:
        partitioner = None
    else:
        # Note: num_partitions > 1 is required for distributed training due to
        # embedding_lookup tries to colocate single partition-ed embedding variable
        # with lookup ops. This may cause embedding variables being placed on worker
        # jobs.
        partitioner = tf.fixed_size_partitioner(num_partitions)

    if (src_embed_file or tgt_embed_file) and partitioner:
        raise ValueError(
            "Can't set num_partitions > 1 when using pretrained embedding")

    with tf.variable_scope(scope or "embeddings",
                           dtype=dtype,
                           partitioner=partitioner) as scope:
        # Share embedding
        if share_vocab:
            if src_vocab_size != tgt_vocab_size:
                raise ValueError(
                    "Share embedding but different src/tgt vocab sizes"
                    " %d vs. %d" % (src_vocab_size, tgt_vocab_size))
            assert src_embed_size == tgt_embed_size
            utils.print_out("# Use the same embedding for source and target")
            vocab_file = src_vocab_file or tgt_vocab_file
            embed_file = src_embed_file or tgt_embed_file

            embedding_encoder = _create_or_load_embed("embedding_share",
                                                      vocab_file, embed_file,
                                                      src_vocab_size,
                                                      src_embed_size, dtype)
            embedding_decoder = embedding_encoder
        else:
            with tf.variable_scope("encoder", partitioner=partitioner):
                embedding_encoder = _create_or_load_embed(
                    "embedding_encoder", src_vocab_file, src_embed_file,
                    src_vocab_size, src_embed_size, dtype)

            with tf.variable_scope("decoder", partitioner=partitioner):
                embedding_decoder = _create_or_load_embed(
                    "embedding_decoder", tgt_vocab_file, tgt_embed_file,
                    tgt_vocab_size, tgt_embed_size, dtype)

    return embedding_encoder, embedding_decoder
コード例 #15
0
ファイル: gnmt_model.py プロジェクト: ylwctyt/MQNLP
    def _build_encoder(self, hparams):
        """Build a GNMT encoder."""
        # 编码使用的RNN类型, 单向LSTM or BiLSTM
        if hparams.encoder_type == "uni" or hparams.encoder_type == "bi":
            return super(GNMTModel, self)._build_encoder(hparams)

        if hparams.encoder_type != "gnmt":
            raise ValueError("Unknown encoder_type %s" % hparams.encoder_type)

        # Build GNMT encoder.
        # RNN层数
        num_bi_layers = 1
        #
        num_uni_layers = self.num_encoder_layers - num_bi_layers
        utils.print_out("  num_bi_layers = %d" % num_bi_layers)
        utils.print_out("  num_uni_layers = %d" % num_uni_layers)

        iterator = self.iterator
        source = iterator.source
        if self.time_major:
            source = tf.transpose(source)

        with tf.variable_scope("encoder") as scope:
            dtype = scope.dtype

            # Look up embedding, emp_inp: [max_time, batch_size, num_units]
            #   when time_major = True
            # embedding look_up
            encoder_emb_inp = tf.nn.embedding_lookup(self.embedding_encoder,
                                                     source)

            # Execute _build_bidirectional_rnn from Model class
            # BLSTM 用于输入编码
            bi_encoder_outputs, bi_encoder_state = self._build_bidirectional_rnn(
                inputs=encoder_emb_inp,
                sequence_length=iterator.source_sequence_length,
                dtype=dtype,
                hparams=hparams,
                num_bi_layers=num_bi_layers,
                num_bi_residual_layers=0,  # no residual connection
            )

            uni_cell = model_helper.create_rnn_cell(
                unit_type=hparams.unit_type,
                num_units=hparams.num_units,
                num_layers=num_uni_layers,
                num_residual_layers=self.num_encoder_residual_layers,
                forget_bias=hparams.forget_bias,
                dropout=hparams.dropout,
                num_gpus=self.num_gpus,
                base_gpu=1,
                mode=self.mode,
                single_cell_fn=self.single_cell_fn)

            # encoder_outputs: size [max_time, batch_size, num_units]
            #   when time_major = True
            # 使用LSTM 用于encoder
            encoder_outputs, encoder_state = tf.nn.dynamic_rnn(
                uni_cell,
                bi_encoder_outputs,
                dtype=dtype,
                sequence_length=iterator.source_sequence_length,
                time_major=self.time_major)

            # Pass all encoder state except the first bi-directional layer's state to
            # decoder.  ??? why use first bi-lstm for input
            encoder_state = (bi_encoder_state[1],) + (
                (encoder_state,) if num_uni_layers == 1 else encoder_state)

        return encoder_outputs, encoder_state