Beispiel #1
0
    def test_stacked_bilstm_compatibility(self):
        checkpoint_dir = tempfile.mkdtemp(prefix="checkpoint_dir")
        checkpoint_path = os.path.join(checkpoint_dir, "model.ckpt")
        hidden_size = 10
        num_layers = 3
        dropout_ratio = 0.0
        input_emb = np.random.uniform(size=[3, 5, 9]).astype(np.float32)
        input_len = [4, 5, 2]

        # Make sure we fail explicitly if the specified devices can't be used.
        config = tf.ConfigProto(allow_soft_placement=False,
                                log_device_placement=True)

        with tf.Graph().as_default():
            with tf.device("/gpu:0"):
                output_emb = cudnn_layers.stacked_bilstm(
                    input_emb=input_emb,
                    input_len=input_len,
                    hidden_size=hidden_size,
                    num_layers=num_layers,
                    dropout_ratio=dropout_ratio,
                    mode=tf.estimator.ModeKeys.TRAIN,
                    use_cudnn=True)
            saver = tf.train.Saver()
            with tf.Session(config=config) as sess:
                sess.run(tf.global_variables_initializer())
                gpu_output_emb = sess.run(output_emb)
                saver.save(sess, checkpoint_path)

        with tf.Graph().as_default():
            with tf.device("/cpu:0"):
                output_emb = cudnn_layers.stacked_bilstm(
                    input_emb=input_emb,
                    input_len=input_len,
                    hidden_size=hidden_size,
                    num_layers=num_layers,
                    dropout_ratio=dropout_ratio,
                    mode=tf.estimator.ModeKeys.TRAIN,
                    use_cudnn=False)
            saver = tf.train.Saver()
            with tf.Session(config=config) as sess:
                saver.restore(sess, checkpoint_path)
                cpu_output_emb = sess.run(output_emb)

        for c, g, l in zip(cpu_output_emb, gpu_output_emb, input_len):
            self.assertAllClose(c[:l], g[:l])
Beispiel #2
0
 def test_stacked_bilstm(self):
     with tf.Graph().as_default():
         input_emb = tf.random_uniform([3, 5, 8])
         input_len = tf.constant([4, 5, 2])
         output_emb = cudnn_layers.stacked_bilstm(
             input_emb=input_emb,
             input_len=input_len,
             hidden_size=10,
             num_layers=3,
             dropout_ratio=0.2,
             mode=tf.estimator.ModeKeys.TRAIN)
         with tf.Session() as sess:
             sess.run(tf.global_variables_initializer())
             actual_output_emb = sess.run(output_emb)
         self.assertAllEqual(actual_output_emb.shape, [3, 5, 10 * 2])
Beispiel #3
0
def apply_lstm(x, seq_len):
    """Run a bi-directional LSTM over the `x`.

  Args:
    x: <tf.float32>[batch, seq_len, dim]
    seq_len: <tf.int32>[batch] for None, sequence lengths of `seq2`

  Returns:
    out, <tf.float32>[batch, seq_len, out_dim]
  """
    return cudnn_layers.stacked_bilstm(input_emb=x,
                                       input_len=seq_len,
                                       hidden_size=FLAGS.lstm_dim,
                                       num_layers=1,
                                       dropout_ratio=0.0,
                                       mode=tf_estimator.ModeKeys.TRAIN,
                                       use_cudnn=None)
def score_endpoints(question_emb,
                    question_len,
                    context_emb,
                    context_len,
                    hidden_size,
                    num_layers,
                    dropout_ratio,
                    mode,
                    use_cudnn=None):
    """Compute two scores over context words based on the input embeddings.

  Args:
    question_emb: <float32> [batch_size, max_question_len, hidden_size]
    question_len: <int32> [batch_size]
    context_emb: <float32>[batch_size, max_context_len, hidden_size]
    context_len: <int32> [batch_size]
    hidden_size: Size of hidden layers.
    num_layers: Number of LSTM layers.
    dropout_ratio: The probability of dropping out hidden units.
    mode: Object of type tf.estimator.ModeKeys.
    use_cudnn: Specify the use of cudnn. `None` denotes automatic selection.

  Returns:
    start_scores: <float32> [batch_size, max_context_words]
    end_scores: <float32> [batch_size, max_context_words]
  """
    # [batch_size, max_question_len]
    question_mask = tf.sequence_mask(question_len,
                                     tensor_utils.shape(question_emb, 1),
                                     dtype=tf.float32)

    # [batch_size, max_context_len, hidden_size]
    attended_emb = _attend_to_question(context_emb=context_emb,
                                       question_emb=question_emb,
                                       question_mask=question_mask,
                                       hidden_size=hidden_size)

    # [batch_size, max_context_len, hidden_size * 2]
    context_emb = tf.concat([context_emb, attended_emb], -1)

    with tf.variable_scope("contextualize_context"):
        # [batch_size, max_context_len, hidden_size]
        contextualized_context_emb = cudnn_layers.stacked_bilstm(
            input_emb=context_emb,
            input_len=context_len,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout_ratio=dropout_ratio,
            mode=mode,
            use_cudnn=use_cudnn)
    with tf.variable_scope("contextualize_question"):
        # [batch_size, max_question_len, hidden_size]
        contextualized_question_emb = cudnn_layers.stacked_bilstm(
            input_emb=question_emb,
            input_len=question_len,
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout_ratio=dropout_ratio,
            mode=mode,
            use_cudnn=use_cudnn)
    if mode == tf_estimator.ModeKeys.TRAIN:
        contextualized_context_emb = tf.nn.dropout(contextualized_context_emb,
                                                   1.0 - dropout_ratio)
        contextualized_question_emb = tf.nn.dropout(
            contextualized_question_emb, 1.0 - dropout_ratio)

    # [batch_size, hidden_size]
    pooled_question_emb = _attention_pool(contextualized_question_emb,
                                          question_mask)

    if mode == tf_estimator.ModeKeys.TRAIN:
        pooled_question_emb = tf.nn.dropout(pooled_question_emb,
                                            1.0 - dropout_ratio)

    # [batch_size, max_context_len]
    with tf.variable_scope("start_scores"):
        start_scores = _bilinear_score(contextualized_context_emb,
                                       pooled_question_emb)
    with tf.variable_scope("end_scores"):
        end_scores = _bilinear_score(contextualized_context_emb,
                                     pooled_question_emb)
    context_log_mask = tf.log(
        tf.sequence_mask(context_len,
                         tensor_utils.shape(context_emb, 1),
                         dtype=tf.float32))
    start_scores += context_log_mask
    end_scores += context_log_mask
    return start_scores, end_scores