def _build(self, features): training = self.mode == tf.estimator.ModeKeys.TRAIN base_gpu = 0 num_gpus = 4 context_lengths = features['context_num_words'] # Deep Embedding gpu_id = base_gpu with tf.device(misc_util.get_device_str(gpu_id, num_gpus)): x, q = self._build_embed(features, training) # Embed Encoder & Attention gpu_id += 1 with tf.device(misc_util.get_device_str(gpu_id, num_gpus)): x_final = self._build_embed_encoder_and_attention( x, q, context_lengths, features['question_num_words']) # Modeling encoder gpu_id += 1 logits_start, logits_end, modeling_layers = self._build_model_encoder( x_final, context_lengths, gpu_id=gpu_id, num_gpus=num_gpus) # Predict no_answer_bias no_answer_bias = self._build_no_answer_bias( modeling_layers, context_lengths) # Predictions predictions = self._build_predictions(features, logits_start, logits_end, no_answer_bias) misc_util.print_vars(label='All variables') return predictions
def _build_model_encoder(self, x, context_lengths, gpu_id=0, num_gpus=1): """Build modeling encoder and return start/end logits. Args: x: input Tensor of shape [batch, max_length, dim]. context_lengths: length Tensor of shape [batch]. gpu_id: start GPU id. num_gpus: number of GPUs available. Returns: logits_start: Tensor of shape [batch, max_length]. logits_end: Tensor of shape [batch, max_length]. modeling_layers: a list of modeling layers, from bottom to top, each has shape [batch, max_length, dim]. """ output_keep_prob = self.config['output_keep_prob'] with tf.device(misc_util.get_device_str(gpu_id, num_gpus)): encoder_model = self.init_submodule( self.config['encoder_model'], name='encoder_model') x0 = encoder_model(x, context_lengths)['outputs'] x0 = squad_helper.dropout_wrapper(x0, output_keep_prob, self.mode) x1 = encoder_model(x0, context_lengths)['outputs'] x1 = squad_helper.dropout_wrapper(x1, output_keep_prob, self.mode) gpu_id += 1 with tf.device(misc_util.get_device_str(gpu_id, num_gpus)): x2 = encoder_model(x1, context_lengths)['outputs'] x2 = squad_helper.dropout_wrapper(x2, output_keep_prob, self.mode) x3 = encoder_model(x2, context_lengths)['outputs'] x3 = squad_helper.dropout_wrapper(x3, output_keep_prob, self.mode) logits_start = squad_helper.exp_mask( tf.squeeze( tf.layers.dense(tf.concat([x1, x2], 2), 1, name='logits1'), 2), context_lengths) logits_end = squad_helper.exp_mask( tf.squeeze( tf.layers.dense(tf.concat([x1, x3], 2), 1, name='logits2'), 2), context_lengths) modeling_layers = [x0, x1, x2, x3] return logits_start, logits_end, modeling_layers
def build_embedding_layer(features, mode, params, reuse=False): """Common embedding layer for feature and kernel functions. Args: features: A dictionary containing features, directly copied from `model_fn`. mode: Mode. params: Contains parameters, directly copied from `model_fn`. reuse: Reuse variables. Returns: `(x, q)` where `x` is embedded representation of context, and `q` is the embedded representation of the question. """ with tf.variable_scope('embedding_layer', reuse=reuse): training = mode == tf.estimator.ModeKeys.TRAIN with tf.variable_scope('embedding'): if params.get('use_char', True): tf.logging.info('# Char embeddings') # self-trained character embedding char_emb_mat = tf.get_variable( 'char_emb_mat', [params['char_vocab_size'], params['char_emb_size']]) if training: char_emb_mat = tf.nn.dropout( char_emb_mat, keep_prob=1.0 - params['char_embedding_dropout'], noise_shape=[params['char_vocab_size'], 1]) xc = tf.nn.embedding_lookup( char_emb_mat, features['indexed_context_chars'][:, 1:-1, :]) qc = tf.nn.embedding_lookup( char_emb_mat, features['indexed_question_chars'][:, 1:-1, :]) xc = tf.reduce_max(xc, 2) qc = tf.reduce_max(qc, 2) else: xc, qc = None, None # glove embedding if params['use_glove']: _, xw, qw = squad_helper.glove_layer(features, mode, params) else: xw, qw = None, None # MT ELMO x_mt, q_mt = None, None gpu_id = 1 if params['mt_elmo']: tf.logging.info('# MT ELMO gpu_id %d/%d', gpu_id, params['num_gpus']) with tf.device( misc_util.get_device_str(gpu_id, params['num_gpus'])): # Translation vectors x_mt = squad_helper.embed_translation( features['context_words'], features['context_num_words'], params['mt_ckpt_path'], params['include_mt_embeddings']) q_mt = squad_helper.embed_translation( features['question_words'], features['question_num_words'], params['mt_ckpt_path'], params['include_mt_embeddings']) # ELMO x_elmo, q_elmo = None, None if params['elmo']: gpu_id += 1 tf.logging.info('# ELMO gpu_id %d/%d', gpu_id, params['num_gpus']) with tf.device( misc_util.get_device_str(gpu_id, params['num_gpus'])): # elmo vectors if params['elmo_option'] == 'elmo': x_elmo = squad_helper.embed_elmo_chars( features['indexed_context_chars'], 128, params['elmo_path'], training, params['num_gpus'], params['base_gpu_elmo']) q_elmo = squad_helper.embed_elmo_chars( features['indexed_question_chars'], 128, params['elmo_path'], training, params['num_gpus'], params['base_gpu_elmo']) else: x_elmo = squad_helper.embed_elmo_sentences( features['tokenized_context'], MAX_BATCH_SIZE, params['elmo_path'], training, params['elmo_option']) q_elmo = squad_helper.embed_elmo_sentences( features['tokenized_question'], MAX_BATCH_SIZE, params['elmo_path'], training, params['elmo_option']) tf.logging.info('# Done build_embedding_layer') return xc, qc, xw, qw, x_mt, q_mt, x_elmo, q_elmo