def _build(self, features):
    training = self.mode == tf.estimator.ModeKeys.TRAIN
    base_gpu = 0
    num_gpus = 4
    context_lengths = features['context_num_words']

    # Deep Embedding
    gpu_id = base_gpu
    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      x, q = self._build_embed(features, training)

    # Embed Encoder & Attention
    gpu_id += 1
    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      x_final = self._build_embed_encoder_and_attention(
          x, q, context_lengths, features['question_num_words'])

    # Modeling encoder
    gpu_id += 1
    logits_start, logits_end, modeling_layers = self._build_model_encoder(
        x_final, context_lengths,
        gpu_id=gpu_id, num_gpus=num_gpus)

    # Predict no_answer_bias
    no_answer_bias = self._build_no_answer_bias(
        modeling_layers, context_lengths)

    # Predictions
    predictions = self._build_predictions(features, logits_start, logits_end,
                                          no_answer_bias)

    misc_util.print_vars(label='All variables')
    return predictions
  def _build_model_encoder(self, x, context_lengths, gpu_id=0, num_gpus=1):
    """Build modeling encoder and return start/end logits.

    Args:
      x: input Tensor of shape [batch, max_length, dim].
      context_lengths: length Tensor of shape [batch].
      gpu_id: start GPU id.
      num_gpus: number of GPUs available.

    Returns:
      logits_start: Tensor of shape [batch, max_length].
      logits_end: Tensor of shape [batch, max_length].
      modeling_layers: a list of modeling layers, from bottom to top,
        each has shape [batch, max_length, dim].
    """
    output_keep_prob = self.config['output_keep_prob']

    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      encoder_model = self.init_submodule(
          self.config['encoder_model'], name='encoder_model')
      x0 = encoder_model(x, context_lengths)['outputs']
      x0 = squad_helper.dropout_wrapper(x0, output_keep_prob, self.mode)
      x1 = encoder_model(x0, context_lengths)['outputs']
      x1 = squad_helper.dropout_wrapper(x1, output_keep_prob, self.mode)

    gpu_id += 1
    with tf.device(misc_util.get_device_str(gpu_id, num_gpus)):
      x2 = encoder_model(x1, context_lengths)['outputs']
      x2 = squad_helper.dropout_wrapper(x2, output_keep_prob, self.mode)
      x3 = encoder_model(x2, context_lengths)['outputs']
      x3 = squad_helper.dropout_wrapper(x3, output_keep_prob, self.mode)

      logits_start = squad_helper.exp_mask(
          tf.squeeze(
              tf.layers.dense(tf.concat([x1, x2], 2), 1, name='logits1'), 2),
          context_lengths)
      logits_end = squad_helper.exp_mask(
          tf.squeeze(
              tf.layers.dense(tf.concat([x1, x3], 2), 1, name='logits2'), 2),
          context_lengths)
    modeling_layers = [x0, x1, x2, x3]

    return logits_start, logits_end, modeling_layers
Exemple #3
0
def build_embedding_layer(features, mode, params, reuse=False):
    """Common embedding layer for feature and kernel functions.

  Args:
    features: A dictionary containing features, directly copied from `model_fn`.
    mode: Mode.
    params: Contains parameters, directly copied from `model_fn`.
    reuse: Reuse variables.

  Returns:
    `(x, q)` where `x` is embedded representation of context, and `q` is the
    embedded representation of the question.
  """
    with tf.variable_scope('embedding_layer', reuse=reuse):
        training = mode == tf.estimator.ModeKeys.TRAIN
        with tf.variable_scope('embedding'):
            if params.get('use_char', True):
                tf.logging.info('# Char embeddings')
                # self-trained character embedding
                char_emb_mat = tf.get_variable(
                    'char_emb_mat',
                    [params['char_vocab_size'], params['char_emb_size']])
                if training:
                    char_emb_mat = tf.nn.dropout(
                        char_emb_mat,
                        keep_prob=1.0 - params['char_embedding_dropout'],
                        noise_shape=[params['char_vocab_size'], 1])
                xc = tf.nn.embedding_lookup(
                    char_emb_mat, features['indexed_context_chars'][:,
                                                                    1:-1, :])
                qc = tf.nn.embedding_lookup(
                    char_emb_mat, features['indexed_question_chars'][:,
                                                                     1:-1, :])
                xc = tf.reduce_max(xc, 2)
                qc = tf.reduce_max(qc, 2)
            else:
                xc, qc = None, None

            # glove embedding
            if params['use_glove']:
                _, xw, qw = squad_helper.glove_layer(features, mode, params)
            else:
                xw, qw = None, None

            # MT ELMO
            x_mt, q_mt = None, None
            gpu_id = 1
            if params['mt_elmo']:
                tf.logging.info('# MT ELMO gpu_id %d/%d', gpu_id,
                                params['num_gpus'])
                with tf.device(
                        misc_util.get_device_str(gpu_id, params['num_gpus'])):
                    # Translation vectors
                    x_mt = squad_helper.embed_translation(
                        features['context_words'],
                        features['context_num_words'], params['mt_ckpt_path'],
                        params['include_mt_embeddings'])
                    q_mt = squad_helper.embed_translation(
                        features['question_words'],
                        features['question_num_words'], params['mt_ckpt_path'],
                        params['include_mt_embeddings'])

            # ELMO
            x_elmo, q_elmo = None, None
            if params['elmo']:
                gpu_id += 1
                tf.logging.info('# ELMO gpu_id %d/%d', gpu_id,
                                params['num_gpus'])
                with tf.device(
                        misc_util.get_device_str(gpu_id, params['num_gpus'])):
                    # elmo vectors
                    if params['elmo_option'] == 'elmo':
                        x_elmo = squad_helper.embed_elmo_chars(
                            features['indexed_context_chars'], 128,
                            params['elmo_path'], training, params['num_gpus'],
                            params['base_gpu_elmo'])
                        q_elmo = squad_helper.embed_elmo_chars(
                            features['indexed_question_chars'], 128,
                            params['elmo_path'], training, params['num_gpus'],
                            params['base_gpu_elmo'])
                    else:
                        x_elmo = squad_helper.embed_elmo_sentences(
                            features['tokenized_context'], MAX_BATCH_SIZE,
                            params['elmo_path'], training,
                            params['elmo_option'])
                        q_elmo = squad_helper.embed_elmo_sentences(
                            features['tokenized_question'], MAX_BATCH_SIZE,
                            params['elmo_path'], training,
                            params['elmo_option'])

    tf.logging.info('# Done build_embedding_layer')

    return xc, qc, xw, qw, x_mt, q_mt, x_elmo, q_elmo