def test_stacked_bilstm_compatibility(self): checkpoint_dir = tempfile.mkdtemp(prefix="checkpoint_dir") checkpoint_path = os.path.join(checkpoint_dir, "model.ckpt") hidden_size = 10 num_layers = 3 dropout_ratio = 0.0 input_emb = np.random.uniform(size=[3, 5, 9]).astype(np.float32) input_len = [4, 5, 2] # Make sure we fail explicitly if the specified devices can't be used. config = tf.ConfigProto(allow_soft_placement=False, log_device_placement=True) with tf.Graph().as_default(): with tf.device("/gpu:0"): output_emb = cudnn_layers.stacked_bilstm( input_emb=input_emb, input_len=input_len, hidden_size=hidden_size, num_layers=num_layers, dropout_ratio=dropout_ratio, mode=tf.estimator.ModeKeys.TRAIN, use_cudnn=True) saver = tf.train.Saver() with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) gpu_output_emb = sess.run(output_emb) saver.save(sess, checkpoint_path) with tf.Graph().as_default(): with tf.device("/cpu:0"): output_emb = cudnn_layers.stacked_bilstm( input_emb=input_emb, input_len=input_len, hidden_size=hidden_size, num_layers=num_layers, dropout_ratio=dropout_ratio, mode=tf.estimator.ModeKeys.TRAIN, use_cudnn=False) saver = tf.train.Saver() with tf.Session(config=config) as sess: saver.restore(sess, checkpoint_path) cpu_output_emb = sess.run(output_emb) for c, g, l in zip(cpu_output_emb, gpu_output_emb, input_len): self.assertAllClose(c[:l], g[:l])
def test_stacked_bilstm(self): with tf.Graph().as_default(): input_emb = tf.random_uniform([3, 5, 8]) input_len = tf.constant([4, 5, 2]) output_emb = cudnn_layers.stacked_bilstm( input_emb=input_emb, input_len=input_len, hidden_size=10, num_layers=3, dropout_ratio=0.2, mode=tf.estimator.ModeKeys.TRAIN) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) actual_output_emb = sess.run(output_emb) self.assertAllEqual(actual_output_emb.shape, [3, 5, 10 * 2])
def apply_lstm(x, seq_len): """Run a bi-directional LSTM over the `x`. Args: x: <tf.float32>[batch, seq_len, dim] seq_len: <tf.int32>[batch] for None, sequence lengths of `seq2` Returns: out, <tf.float32>[batch, seq_len, out_dim] """ return cudnn_layers.stacked_bilstm(input_emb=x, input_len=seq_len, hidden_size=FLAGS.lstm_dim, num_layers=1, dropout_ratio=0.0, mode=tf_estimator.ModeKeys.TRAIN, use_cudnn=None)
def score_endpoints(question_emb, question_len, context_emb, context_len, hidden_size, num_layers, dropout_ratio, mode, use_cudnn=None): """Compute two scores over context words based on the input embeddings. Args: question_emb: <float32> [batch_size, max_question_len, hidden_size] question_len: <int32> [batch_size] context_emb: <float32>[batch_size, max_context_len, hidden_size] context_len: <int32> [batch_size] hidden_size: Size of hidden layers. num_layers: Number of LSTM layers. dropout_ratio: The probability of dropping out hidden units. mode: Object of type tf.estimator.ModeKeys. use_cudnn: Specify the use of cudnn. `None` denotes automatic selection. Returns: start_scores: <float32> [batch_size, max_context_words] end_scores: <float32> [batch_size, max_context_words] """ # [batch_size, max_question_len] question_mask = tf.sequence_mask(question_len, tensor_utils.shape(question_emb, 1), dtype=tf.float32) # [batch_size, max_context_len, hidden_size] attended_emb = _attend_to_question(context_emb=context_emb, question_emb=question_emb, question_mask=question_mask, hidden_size=hidden_size) # [batch_size, max_context_len, hidden_size * 2] context_emb = tf.concat([context_emb, attended_emb], -1) with tf.variable_scope("contextualize_context"): # [batch_size, max_context_len, hidden_size] contextualized_context_emb = cudnn_layers.stacked_bilstm( input_emb=context_emb, input_len=context_len, hidden_size=hidden_size, num_layers=num_layers, dropout_ratio=dropout_ratio, mode=mode, use_cudnn=use_cudnn) with tf.variable_scope("contextualize_question"): # [batch_size, max_question_len, hidden_size] contextualized_question_emb = cudnn_layers.stacked_bilstm( input_emb=question_emb, input_len=question_len, hidden_size=hidden_size, num_layers=num_layers, dropout_ratio=dropout_ratio, mode=mode, use_cudnn=use_cudnn) if mode == tf_estimator.ModeKeys.TRAIN: contextualized_context_emb = tf.nn.dropout(contextualized_context_emb, 1.0 - dropout_ratio) contextualized_question_emb = tf.nn.dropout( contextualized_question_emb, 1.0 - dropout_ratio) # [batch_size, hidden_size] pooled_question_emb = _attention_pool(contextualized_question_emb, question_mask) if mode == tf_estimator.ModeKeys.TRAIN: pooled_question_emb = tf.nn.dropout(pooled_question_emb, 1.0 - dropout_ratio) # [batch_size, max_context_len] with tf.variable_scope("start_scores"): start_scores = _bilinear_score(contextualized_context_emb, pooled_question_emb) with tf.variable_scope("end_scores"): end_scores = _bilinear_score(contextualized_context_emb, pooled_question_emb) context_log_mask = tf.log( tf.sequence_mask(context_len, tensor_utils.shape(context_emb, 1), dtype=tf.float32)) start_scores += context_log_mask end_scores += context_log_mask return start_scores, end_scores