def _build_embed_encoder_and_attention(self, x, q,
                                         context_lengths, question_lengths):
    """Build modeling encoder and return start/end logits.

    Args:
      x: context Tensor of shape [batch, max_context_length, dim].
      q: question Tensor of shape [batch, max_question_length, dim].
      context_lengths: length Tensor of shape [batch].
      question_lengths: length Tensor of shape [batch].

    Returns:
      x_final: output Tensor of shape [batch, max_context_length, dim].
    """
    input_keep_prob = self.config['input_keep_prob']

    # Embedding encoder
    encoder_emb = self.init_submodule(
        self.config['encoder_emb'], name='xq_encoder')
    x = encoder_emb(x, context_lengths)['outputs']
    q = encoder_emb(q, question_lengths)['outputs']
    x = squad_helper.dropout_wrapper(x, input_keep_prob, self.mode)
    q = squad_helper.dropout_wrapper(q, input_keep_prob, self.mode)

    # Context-Question Attention
    with tf.variable_scope('attention'):
      xq, qx = squad_helper.bi_attention_memory_efficient_dcn(
          a=q,
          b=x,
          mask_a=question_lengths,
          mask_b=context_lengths)

    x_final = tf.concat([x, xq, x * xq, x * qx], 2)
    return x_final
  def _build_embed(self, features, training):
    """Build embedding model and return document/question representations.

    Args:
      features: dict; input data.
      training: bool; whether we are under training mode.

    Returns:
      x: context Tensor of shape [batch, max_context_length, dim].
      q: question Tensor of shape [batch, max_question_length, dim].
    """
    config = self.config
    tf.logging.info('\n\n\n\n\n')
    tf.logging.info(config)
    tf.logging.info('\n\n\n\n\n')
    embedder = self.init_submodule(config['embedding'])
    xc, qc, xw, qw, _, _, _, _ = embedder(features)
    emb = embedder(features)
    # Concatenate word and character embeddings
    x = tf.concat([emb['xc'], emb['xw']], 2)
    q = tf.concat([emb['qc'], emb['qw']], 2)
    x = squad_helper.dropout_wrapper(x, config['input_keep_prob'], self.mode)
    q = squad_helper.dropout_wrapper(q, config['input_keep_prob'], self.mode)
    x = squad_helper.highway_layer(
        x, num_layers=2, activation='relu', training=training,
        dropout_rate=1-config['hw_keep_prob'])
    q = squad_helper.highway_layer(
        q, num_layers=2, activation='relu', training=training,
        dropout_rate=1-config['hw_keep_prob'], reuse=True)

    return x, q
  def _build_model_encoder(self, x, context_lengths, gpu_id=0, num_gpus=1):
    """Build modeling encoder and return start/end logits.

    Args:
      x: input Tensor of shape [batch, max_length, dim].
      context_lengths: length Tensor of shape [batch].
      gpu_id: start GPU id.
      num_gpus: number of GPUs available.

    Returns:
      logits_start: Tensor of shape [batch, max_length].
      logits_end: Tensor of shape [batch, max_length].
      modeling_layers: a list of modeling layers, from bottom to top,
        each has shape [batch, max_length, dim].
    """
    output_keep_prob = self.config['output_keep_prob']

    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      encoder_model = self.init_submodule(
          self.config['encoder_model'], name='encoder_model')
      x0 = encoder_model(x, context_lengths)['outputs']
      x0 = squad_helper.dropout_wrapper(x0, output_keep_prob, self.mode)
      x1 = encoder_model(x0, context_lengths)['outputs']
      x1 = squad_helper.dropout_wrapper(x1, output_keep_prob, self.mode)

    gpu_id += 1
    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      x2 = encoder_model(x1, context_lengths)['outputs']
      x2 = squad_helper.dropout_wrapper(x2, output_keep_prob, self.mode)
      x3 = encoder_model(x2, context_lengths)['outputs']
      x3 = squad_helper.dropout_wrapper(x3, output_keep_prob, self.mode)

      logits_start = squad_helper.exp_mask(
          tf.squeeze(
              tf.layers.dense(tf.concat([x1, x2], 2), 1, name='logits1'), 2),
          context_lengths)
      logits_end = squad_helper.exp_mask(
          tf.squeeze(
              tf.layers.dense(tf.concat([x1, x3], 2), 1, name='logits2'), 2),
          context_lengths)
    modeling_layers = [x0, x1, x2, x3]

    return logits_start, logits_end, modeling_layers