Exemple #1
0
    def testLuongScaledDType(self, dtype):
        # Test case for GitHub issue 18099
        encoder_outputs = self.encoder_outputs.astype(dtype)
        decoder_inputs = self.decoder_inputs.astype(dtype)
        attention_mechanism = wrapper.LuongAttention(
            units=self.units,
            memory=encoder_outputs,
            memory_sequence_length=self.encoder_sequence_length,
            scale=True,
            dtype=dtype,
        )
        cell = keras.layers.LSTMCell(self.units,
                                     recurrent_activation="sigmoid")
        cell = wrapper.AttentionWrapper(cell, attention_mechanism)

        sampler = sampler_py.TrainingSampler()
        my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)

        final_outputs, final_state, _ = my_decoder(
            decoder_inputs,
            initial_state=cell.get_initial_state(batch_size=self.batch,
                                                 dtype=dtype),
            sequence_length=self.decoder_sequence_length)
        self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
        self.assertEqual(final_outputs.rnn_output.dtype, dtype)
        self.assertIsInstance(final_state, wrapper.AttentionWrapperState)
Exemple #2
0
def test_luong_scaled_dtype(dtype):
    dummy_data = DummyData2()
    # Test case for GitHub issue 18099
    encoder_outputs = dummy_data.encoder_outputs.astype(dtype)
    decoder_inputs = dummy_data.decoder_inputs.astype(dtype)
    attention_mechanism = wrapper.LuongAttention(
        units=dummy_data.units,
        memory=encoder_outputs,
        memory_sequence_length=dummy_data.encoder_sequence_length,
        scale=True,
        dtype=dtype,
    )
    cell = tf.keras.layers.LSTMCell(dummy_data.units,
                                    recurrent_activation="sigmoid",
                                    dtype=dtype)
    cell = wrapper.AttentionWrapper(cell, attention_mechanism, dtype=dtype)

    sampler = sampler_py.TrainingSampler()
    my_decoder = basic_decoder.BasicDecoder(cell=cell,
                                            sampler=sampler,
                                            dtype=dtype)

    final_outputs, final_state, _ = my_decoder(
        decoder_inputs,
        initial_state=cell.get_initial_state(batch_size=dummy_data.batch,
                                             dtype=dtype),
        sequence_length=dummy_data.decoder_sequence_length,
    )
    assert isinstance(final_outputs, basic_decoder.BasicDecoderOutput)
    assert final_outputs.rnn_output.dtype == dtype
    assert isinstance(final_state, wrapper.AttentionWrapperState)
Exemple #3
0
    def testBahdanauNormalizedDType(self, dtype):
        encoder_outputs = self.encoder_outputs.astype(dtype)
        decoder_inputs = self.decoder_inputs.astype(dtype)
        attention_mechanism = wrapper.BahdanauAttention(
            units=self.units,
            memory=encoder_outputs,
            memory_sequence_length=self.encoder_sequence_length,
            normalize=True,
            dtype=dtype,
        )
        cell = tf.keras.layers.LSTMCell(
            self.units, recurrent_activation="sigmoid", dtype=dtype
        )
        cell = wrapper.AttentionWrapper(cell, attention_mechanism, dtype=dtype)

        sampler = sampler_py.TrainingSampler()
        my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler, dtype=dtype)

        final_outputs, final_state, _ = my_decoder(
            decoder_inputs,
            initial_state=cell.get_initial_state(batch_size=self.batch, dtype=dtype),
            sequence_length=self.decoder_sequence_length,
        )
        self.assertIsInstance(final_outputs, basic_decoder.BasicDecoderOutput)
        self.assertEqual(final_outputs.rnn_output.dtype, dtype)
        self.assertIsInstance(final_state, wrapper.AttentionWrapperState)
Exemple #4
0
    def _testDynamicDecodeRNNWithTrainingHelperMatchesDynamicRNN(
            self, use_sequence_length):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        max_out = max(sequence_length)

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time,
                                     input_depth).astype(np.float32)
            inputs = tf.constant(inputs)

            cell = tf.keras.layers.LSTMCell(cell_depth)
            zero_state = cell.get_initial_state(batch_size=batch_size,
                                                dtype=tf.float32)
            sampler = sampler_py.TrainingSampler()
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell,
                sampler=sampler,
                impute_finished=use_sequence_length)

            final_decoder_outputs, final_decoder_state, _ = my_decoder(  # pylint: disable=not-callable
                inputs,
                initial_state=zero_state,
                sequence_length=sequence_length)

            rnn = tf.keras.layers.RNN(cell,
                                      return_sequences=True,
                                      return_state=True)
            mask = (tf.sequence_mask(sequence_length, maxlen=max_time)
                    if use_sequence_length else None)
            outputs = rnn(inputs, mask=mask, initial_state=zero_state)
            final_rnn_outputs = outputs[0]
            final_rnn_state = outputs[1:]
            if use_sequence_length:
                final_rnn_outputs *= tf.cast(tf.expand_dims(mask, -1),
                                             final_rnn_outputs.dtype)

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate({
                "final_decoder_outputs": final_decoder_outputs,
                "final_decoder_state": final_decoder_state,
                "final_rnn_outputs": final_rnn_outputs,
                "final_rnn_state": final_rnn_state
            })

            # Decoder only runs out to max_out; ensure values are identical
            # to dynamic_rnn, which also zeros out outputs and passes along
            # state.
            self.assertAllClose(
                eval_result["final_decoder_outputs"].rnn_output,
                eval_result["final_rnn_outputs"][:, 0:max_out, :])
            if use_sequence_length:
                self.assertAllClose(eval_result["final_decoder_state"],
                                    eval_result["final_rnn_state"])
Exemple #5
0
def test_basic_decoder_with_attention_wrapper():
    units = 32
    vocab_size = 1000
    attention_mechanism = attention_wrapper.LuongAttention(units)
    cell = tf.keras.layers.LSTMCell(units)
    cell = attention_wrapper.AttentionWrapper(cell, attention_mechanism)
    output_layer = tf.keras.layers.Dense(vocab_size)
    sampler = sampler_py.TrainingSampler()
    # BasicDecoder should accept a non initialized AttentionWrapper.
    basic_decoder.BasicDecoder(cell, sampler, output_layer=output_layer)
Exemple #6
0
def test_dynamic_decode_rnn_with_training_helper_matches_dynamic_rnn(
    use_sequence_length, ):
    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = 10
    max_out = max(sequence_length)

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    inputs = tf.constant(inputs)

    cell = tf.keras.layers.LSTMCell(cell_depth)
    zero_state = cell.get_initial_state(batch_size=batch_size,
                                        dtype=tf.float32)
    sampler = sampler_py.TrainingSampler()
    my_decoder = basic_decoder.BasicDecoder(
        cell=cell, sampler=sampler, impute_finished=use_sequence_length)

    (
        final_decoder_outputs,
        final_decoder_state,
        _,
    ) = my_decoder(inputs,
                   initial_state=zero_state,
                   sequence_length=sequence_length)

    rnn = tf.keras.layers.RNN(cell, return_sequences=True, return_state=True)
    mask = (tf.sequence_mask(sequence_length, maxlen=max_time)
            if use_sequence_length else None)
    outputs = rnn(inputs, mask=mask, initial_state=zero_state)
    final_rnn_outputs = outputs[0]
    final_rnn_state = outputs[1:]
    if use_sequence_length:
        final_rnn_outputs *= tf.cast(tf.expand_dims(mask, -1),
                                     final_rnn_outputs.dtype)

    eval_result = {
        "final_decoder_outputs": final_decoder_outputs,
        "final_decoder_state": final_decoder_state,
        "final_rnn_outputs": final_rnn_outputs,
        "final_rnn_state": final_rnn_state,
    }

    # Decoder only runs out to max_out; ensure values are identical
    # to dynamic_rnn, which also zeros out outputs and passes along
    # state.
    np.testing.assert_allclose(
        eval_result["final_decoder_outputs"].rnn_output,
        eval_result["final_rnn_outputs"][:, 0:max_out, :],
    )
    if use_sequence_length:
        np.testing.assert_allclose(eval_result["final_decoder_state"],
                                   eval_result["final_rnn_state"])
Exemple #7
0
def test_dynamic_decode_rnn(time_major, maximum_iterations):

    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = 10
    max_out = max(sequence_length)

    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=time_major)
    my_decoder = basic_decoder.BasicDecoder(
        cell=cell,
        sampler=sampler,
        output_time_major=time_major,
        maximum_iterations=maximum_iterations,
    )

    @tf.function(
        input_signature=(
            tf.TensorSpec([None, None, input_depth], dtype=tf.float32),
            tf.TensorSpec([None], dtype=tf.int32),
        )
    )
    def _decode(inputs, sequence_length):
        batch_size_t = tf.shape(sequence_length)[0]
        initial_state = cell.get_initial_state(
            batch_size=batch_size_t, dtype=inputs.dtype
        )
        return my_decoder(
            inputs, initial_state=initial_state, sequence_length=sequence_length
        )

    inputs = tf.random.normal([batch_size, max_time, input_depth])
    if time_major:
        inputs = tf.transpose(inputs, perm=[1, 0, 2])
    final_outputs, _, final_sequence_length = _decode(inputs, sequence_length)

    def _t(shape):
        if time_major:
            return (shape[1], shape[0]) + shape[2:]
        return shape

    assert (batch_size,) == tuple(final_sequence_length.shape.as_list())
    # Mostly a smoke test
    time_steps = max_out
    expected_length = sequence_length
    if maximum_iterations is not None:
        time_steps = min(max_out, maximum_iterations)
        expected_length = [min(x, maximum_iterations) for x in expected_length]
    assert _t((batch_size, time_steps, cell_depth)) == final_outputs.rnn_output.shape
    assert _t((batch_size, time_steps)) == final_outputs.sample_id.shape
    np.testing.assert_array_equal(expected_length, final_sequence_length)
Exemple #8
0
def test_dynamic_decode_rnn(time_major, maximum_iterations):

    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = 10
    max_out = max(sequence_length)

    if time_major:
        inputs = np.random.randn(max_time, batch_size,
                                 input_depth).astype(np.float32)
    else:
        inputs = np.random.randn(batch_size, max_time,
                                 input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=time_major)
    my_decoder = basic_decoder.BasicDecoder(
        cell=cell,
        sampler=sampler,
        output_time_major=time_major,
        maximum_iterations=maximum_iterations,
    )

    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    (
        final_outputs,
        unused_final_state,
        final_sequence_length,
    ) = my_decoder(input_t,
                   initial_state=initial_state,
                   sequence_length=sequence_length)

    def _t(shape):
        if time_major:
            return (shape[1], shape[0]) + shape[2:]
        return shape

    assert (batch_size, ) == tuple(final_sequence_length.get_shape().as_list())
    # Mostly a smoke test
    time_steps = max_out
    expected_length = sequence_length
    if maximum_iterations is not None:
        time_steps = min(max_out, maximum_iterations)
        expected_length = [min(x, maximum_iterations) for x in expected_length]
    if maximum_iterations != 0:
        assert (_t((batch_size, time_steps,
                    cell_depth)) == final_outputs.rnn_output.shape)
        assert _t((batch_size, time_steps)) == final_outputs.sample_id.shape
    np.testing.assert_array_equal(expected_length, final_sequence_length)
Exemple #9
0
    def _testDynamicDecodeRNNWithTrainingHelperMatchesDynamicRNN(
            self, use_sequence_length):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        max_out = max(sequence_length)

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time,
                                     input_depth).astype(np.float32)
            inputs = constant_op.constant(inputs)

            cell = rnn_cell.LSTMCell(cell_depth)
            zero_state = cell.zero_state(dtype=dtypes.float32,
                                         batch_size=batch_size)
            sampler = sampler_py.TrainingSampler()
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell,
                sampler=sampler,
                impute_finished=use_sequence_length)

            final_decoder_outputs, final_decoder_state, _ = my_decoder(
                inputs,
                initial_state=zero_state,
                sequence_length=sequence_length)

            final_rnn_outputs, final_rnn_state = rnn.dynamic_rnn(
                cell,
                inputs,
                sequence_length=sequence_length
                if use_sequence_length else None,
                initial_state=zero_state)

            self.evaluate(variables.global_variables_initializer())
            eval_result = self.evaluate({
                "final_decoder_outputs": final_decoder_outputs,
                "final_decoder_state": final_decoder_state,
                "final_rnn_outputs": final_rnn_outputs,
                "final_rnn_state": final_rnn_state
            })

            # Decoder only runs out to max_out; ensure values are identical
            # to dynamic_rnn, which also zeros out outputs and passes along
            # state.
            self.assertAllClose(
                eval_result["final_decoder_outputs"].rnn_output,
                eval_result["final_rnn_outputs"][:, 0:max_out, :])
            if use_sequence_length:
                self.assertAllClose(eval_result["final_decoder_state"],
                                    eval_result["final_rnn_state"])
Exemple #10
0
    def _testWithMaybeMultiAttention(self,
                                     is_multi,
                                     create_attention_mechanisms,
                                     expected_final_output,
                                     expected_final_state,
                                     attention_mechanism_depths,
                                     alignment_history=False,
                                     expected_final_alignment_history=None,
                                     attention_layer_sizes=None,
                                     attention_layers=None,
                                     create_query_layer=False,
                                     create_memory_layer=True,
                                     create_attention_kwargs=None):
        # Allow is_multi to be True with a single mechanism to enable test for
        # passing in a single mechanism in a list.
        assert len(create_attention_mechanisms) == 1 or is_multi
        encoder_sequence_length = [3, 2, 3, 1, 1]
        decoder_sequence_length = [2, 0, 1, 2, 3]
        batch_size = 5
        encoder_max_time = 8
        decoder_max_time = 4
        input_depth = 7
        encoder_output_depth = 10
        cell_depth = 9
        create_attention_kwargs = create_attention_kwargs or {}

        if attention_layer_sizes is not None:
            # Compute sum of attention_layer_sizes. Use encoder_output_depth if
            # None.
            attention_depth = sum(
                attention_layer_size or encoder_output_depth
                for attention_layer_size in attention_layer_sizes)
        elif attention_layers is not None:
            # Compute sum of attention_layers output depth.
            attention_depth = sum(
                attention_layer.compute_output_shape(
                    [batch_size, cell_depth +
                     encoder_output_depth]).dims[-1].value
                for attention_layer in attention_layers)
        else:
            attention_depth = encoder_output_depth * len(
                create_attention_mechanisms)

        decoder_inputs = np.random.randn(batch_size, decoder_max_time,
                                         input_depth).astype(np.float32)
        encoder_outputs = np.random.randn(batch_size, encoder_max_time,
                                          encoder_output_depth).astype(
                                              np.float32)

        attention_mechanisms = []
        for creator, depth in zip(create_attention_mechanisms,
                                  attention_mechanism_depths):
            # Create a memory layer with deterministic initializer to avoid
            # randomness in the test between graph and eager.
            if create_query_layer:
                create_attention_kwargs["query_layer"] = keras.layers.Dense(
                    depth, kernel_initializer="ones", use_bias=False)
            if create_memory_layer:
                create_attention_kwargs["memory_layer"] = keras.layers.Dense(
                    depth, kernel_initializer="ones", use_bias=False)

            attention_mechanisms.append(
                creator(units=depth,
                        memory=encoder_outputs,
                        memory_sequence_length=encoder_sequence_length,
                        **create_attention_kwargs))

        with self.cached_session(use_gpu=True):
            attention_layer_size = attention_layer_sizes
            attention_layer = attention_layers
            if not is_multi:
                if attention_layer_size is not None:
                    attention_layer_size = attention_layer_size[0]
                if attention_layer is not None:
                    attention_layer = attention_layer[0]
            cell = keras.layers.LSTMCell(cell_depth,
                                         recurrent_activation="sigmoid",
                                         kernel_initializer="ones",
                                         recurrent_initializer="ones")
            cell = wrapper.AttentionWrapper(
                cell,
                attention_mechanisms if is_multi else attention_mechanisms[0],
                attention_layer_size=attention_layer_size,
                alignment_history=alignment_history,
                attention_layer=attention_layer)
            if cell._attention_layers is not None:
                for layer in cell._attention_layers:
                    layer.kernel_initializer = initializers.glorot_uniform(
                        seed=1337)

            sampler = sampler_py.TrainingSampler()
            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
            initial_state = cell.get_initial_state(dtype=tf.float32,
                                                   batch_size=batch_size)
            final_outputs, final_state, _ = my_decoder(
                decoder_inputs,
                initial_state=initial_state,
                sequence_length=decoder_sequence_length)

            self.assertIsInstance(final_outputs,
                                  basic_decoder.BasicDecoderOutput)
            self.assertIsInstance(final_state, wrapper.AttentionWrapperState)

            expected_time = (expected_final_state.time
                             if tf.executing_eagerly() else None)
            self.assertEqual(
                (batch_size, expected_time, attention_depth),
                tuple(final_outputs.rnn_output.get_shape().as_list()))
            self.assertEqual(
                (batch_size, expected_time),
                tuple(final_outputs.sample_id.get_shape().as_list()))

            self.assertEqual(
                (batch_size, attention_depth),
                tuple(final_state.attention.get_shape().as_list()))
            self.assertEqual(
                (batch_size, cell_depth),
                tuple(final_state.cell_state[0].get_shape().as_list()))
            self.assertEqual(
                (batch_size, cell_depth),
                tuple(final_state.cell_state[1].get_shape().as_list()))

            if alignment_history:
                if is_multi:
                    state_alignment_history = []
                    for history_array in final_state.alignment_history:
                        history = history_array.stack()
                        self.assertEqual(
                            (expected_time, batch_size, encoder_max_time),
                            tuple(history.get_shape().as_list()))
                        state_alignment_history.append(history)
                    state_alignment_history = tuple(state_alignment_history)
                else:
                    state_alignment_history = \
                        final_state.alignment_history.stack()
                    self.assertEqual(
                        (expected_time, batch_size, encoder_max_time),
                        tuple(state_alignment_history.get_shape().as_list()))
                tf.nest.assert_same_structure(
                    cell.state_size,
                    cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32))
                # Remove the history from final_state for purposes of the
                # remainder of the tests.
                final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
            else:
                state_alignment_history = ()

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate({
                "final_outputs":
                final_outputs,
                "final_state":
                final_state,
                "state_alignment_history":
                state_alignment_history,
            })

            final_output_info = tf.nest.map_structure(
                get_result_summary, eval_result["final_outputs"])
            final_state_info = tf.nest.map_structure(
                get_result_summary, eval_result["final_state"])
            print("final_output_info: ", final_output_info)
            print("final_state_info: ", final_state_info)

            tf.nest.map_structure(self.assertAllCloseOrEqual,
                                  expected_final_output, final_output_info)
            tf.nest.map_structure(self.assertAllCloseOrEqual,
                                  expected_final_state, final_state_info)
            # by default, the wrapper emits attention as output
            if alignment_history:
                final_alignment_history_info = tf.nest.map_structure(
                    get_result_summary, eval_result["state_alignment_history"])
                print("final_alignment_history_info: ",
                      final_alignment_history_info)
                tf.nest.map_structure(
                    self.assertAllCloseOrEqual,
                    # outputs are batch major but the stacked TensorArray is
                    # time major
                    expected_final_alignment_history,
                    final_alignment_history_info)
Exemple #11
0
def _test_with_attention(
    create_attention_mechanism,
    expected_final_output,
    expected_final_state,
    attention_mechanism_depth=3,
    alignment_history=False,
    expected_final_alignment_history=None,
    attention_layer_size=6,
    attention_layer=None,
    create_query_layer=False,
    create_memory_layer=True,
    create_attention_kwargs=None,
):
    attention_layer_sizes = ([attention_layer_size]
                             if attention_layer_size is not None else None)
    attention_layers = [attention_layer
                        ] if attention_layer is not None else None
    create_attention_mechanisms = [create_attention_mechanism]
    attention_mechanism_depths = [attention_mechanism_depth]
    assert len(create_attention_mechanisms) == 1
    encoder_sequence_length = [3, 2, 3, 1, 1]
    decoder_sequence_length = [2, 0, 1, 2, 3]
    batch_size = 5
    encoder_max_time = 8
    decoder_max_time = 4
    input_depth = 7
    encoder_output_depth = 10
    cell_depth = 9
    create_attention_kwargs = create_attention_kwargs or {}

    if attention_layer_sizes is not None:
        # Compute sum of attention_layer_sizes. Use encoder_output_depth if
        # None.
        attention_depth = sum(
            attention_layer_size or encoder_output_depth
            for attention_layer_size in attention_layer_sizes)
    elif attention_layers is not None:
        # Compute sum of attention_layers output depth.
        attention_depth = sum(
            attention_layer.compute_output_shape(
                [batch_size, cell_depth + encoder_output_depth]).dims[-1].value
            for attention_layer in attention_layers)
    else:
        attention_depth = encoder_output_depth * len(
            create_attention_mechanisms)

    decoder_inputs = np.random.randn(batch_size, decoder_max_time,
                                     input_depth).astype(np.float32)
    encoder_outputs = np.random.randn(batch_size, encoder_max_time,
                                      encoder_output_depth).astype(np.float32)

    attention_mechanisms = []
    for creator, depth in zip(create_attention_mechanisms,
                              attention_mechanism_depths):
        # Create a memory layer with deterministic initializer to avoid
        # randomness in the test between graph and eager.
        if create_query_layer:
            create_attention_kwargs["query_layer"] = tf.keras.layers.Dense(
                depth, kernel_initializer="ones", use_bias=False)
        if create_memory_layer:
            create_attention_kwargs["memory_layer"] = tf.keras.layers.Dense(
                depth, kernel_initializer="ones", use_bias=False)

        attention_mechanisms.append(
            creator(
                units=depth,
                memory=encoder_outputs,
                memory_sequence_length=encoder_sequence_length,
                **create_attention_kwargs,
            ))

    attention_layer_size = attention_layer_sizes
    attention_layer = attention_layers
    if attention_layer_size is not None:
        attention_layer_size = attention_layer_size[0]
    if attention_layer is not None:
        attention_layer = attention_layer[0]
    cell = tf.keras.layers.LSTMCell(
        cell_depth,
        recurrent_activation="sigmoid",
        kernel_initializer="ones",
        recurrent_initializer="ones",
    )
    cell = wrapper.AttentionWrapper(
        cell,
        attention_mechanisms[0],
        attention_layer_size=attention_layer_size,
        alignment_history=alignment_history,
        attention_layer=attention_layer,
    )
    if cell._attention_layers is not None:
        for layer in cell._attention_layers:
            layer.kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
                seed=1337)

    sampler = sampler_py.TrainingSampler()
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
    initial_state = cell.get_initial_state(dtype=tf.float32,
                                           batch_size=batch_size)
    final_outputs, final_state, _ = my_decoder(
        decoder_inputs,
        initial_state=initial_state,
        sequence_length=decoder_sequence_length,
    )

    assert isinstance(final_outputs, basic_decoder.BasicDecoderOutput)
    assert isinstance(final_state, wrapper.AttentionWrapperState)

    expected_time = max(decoder_sequence_length)
    assert (batch_size, expected_time, attention_depth) == tuple(
        final_outputs.rnn_output.get_shape().as_list())
    assert (batch_size, expected_time) == tuple(
        final_outputs.sample_id.get_shape().as_list())

    assert (batch_size, attention_depth) == tuple(
        final_state.attention.get_shape().as_list())
    assert (batch_size, cell_depth) == tuple(
        final_state.cell_state[0].get_shape().as_list())
    assert (batch_size, cell_depth) == tuple(
        final_state.cell_state[1].get_shape().as_list())

    if alignment_history:
        state_alignment_history = final_state.alignment_history.stack()
        assert (expected_time, batch_size, encoder_max_time) == tuple(
            state_alignment_history.get_shape().as_list())
        tf.nest.assert_same_structure(
            cell.state_size,
            cell.get_initial_state(batch_size=batch_size, dtype=tf.float32),
        )
        # Remove the history from final_state for purposes of the
        # remainder of the tests.
        final_state = final_state._replace(alignment_history=())  # pylint: disable=protected-access
    else:
        state_alignment_history = ()

    final_outputs = tf.nest.map_structure(np.array, final_outputs)
    final_state = tf.nest.map_structure(np.array, final_state)
    state_alignment_history = tf.nest.map_structure(np.array,
                                                    state_alignment_history)
    final_output_info = tf.nest.map_structure(get_result_summary,
                                              final_outputs)

    final_state_info = tf.nest.map_structure(get_result_summary, final_state)

    tf.nest.map_structure(assert_allclose_or_equal, expected_final_output,
                          final_output_info)
    tf.nest.map_structure(assert_allclose_or_equal, expected_final_state,
                          final_state_info)
    # by default, the wrapper emits attention as output
    if alignment_history:
        final_alignment_history_info = tf.nest.map_structure(
            get_result_summary, state_alignment_history)
        tf.nest.map_structure(
            assert_allclose_or_equal,
            # outputs are batch major but the stacked TensorArray is
            # time major
            expected_final_alignment_history,
            final_alignment_history_info,
        )
Exemple #12
0
def test_step_with_training_helper_output_layer(use_output_layer):
    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = 10
    output_layer_depth = 3

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=False)
    if use_output_layer:
        output_layer = tf.keras.layers.Dense(output_layer_depth,
                                             use_bias=False)
        expected_output_depth = output_layer_depth
    else:
        output_layer = None
        expected_output_depth = cell_depth
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell,
                                            sampler=sampler,
                                            output_layer=output_layer)

    (first_finished, first_inputs,
     first_state) = my_decoder.initialize(input_t,
                                          initial_state=initial_state,
                                          sequence_length=sequence_length)
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(expected_output_depth,
                                             tf.TensorShape(
                                                 [])) == output_size)

    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert type(step_outputs) is basic_decoder.BasicDecoderOutput
    assert (batch_size, expected_output_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    if use_output_layer:
        # The output layer was accessed
        assert len(output_layer.variables) == 1

    eval_result = {
        "batch_size": batch_size_t,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished,
    }

    np.testing.assert_equal(
        np.asanyarray([False, False, False, False, True]),
        eval_result["first_finished"].numpy(),
    )
    np.testing.assert_equal(
        np.asanyarray([False, False, False, True, True]),
        eval_result["step_finished"].numpy(),
    )
    assert output_dtype.sample_id == eval_result[
        "step_outputs"].sample_id.dtype
    np.testing.assert_equal(
        np.argmax(eval_result["step_outputs"].rnn_output, -1),
        eval_result["step_outputs"].sample_id,
    )
Exemple #13
0
def test_step_with_training_helper_masked_input(use_mask):
    batch_size = 5
    max_time = 8
    sequence_length = [max_time] * batch_size if use_mask is None else [
        3, 4, 3, 1, 0
    ]
    sequence_length = np.array(sequence_length, dtype=np.int32)
    mask = [[True] * l + [False] * (max_time - l) for l in sequence_length]
    input_depth = 7
    cell_depth = 10
    output_layer_depth = 3

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=False)
    output_layer = tf.keras.layers.Dense(output_layer_depth, use_bias=False)
    expected_output_depth = output_layer_depth
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell,
                                            sampler=sampler,
                                            output_layer=output_layer)

    if use_mask is None:
        (first_finished, first_inputs,
         first_state) = my_decoder.initialize(input_t,
                                              initial_state=initial_state)
    elif use_mask:
        (first_finished, first_inputs,
         first_state) = my_decoder.initialize(input_t,
                                              initial_state=initial_state,
                                              mask=mask)
    else:
        (first_finished, first_inputs, first_state) = my_decoder.initialize(
            input_t,
            initial_state=initial_state,
            sequence_length=sequence_length,
        )

    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(expected_output_depth,
                                             tf.TensorShape(
                                                 [])) == output_size)

    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert type(step_outputs) is basic_decoder.BasicDecoderOutput
    assert (batch_size, expected_output_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    assert len(output_layer.variables) == 1

    eval_result = {
        "batch_size": batch_size_t,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished,
    }

    np.testing.assert_equal(sequence_length == 0,
                            eval_result["first_finished"])
    np.testing.assert_equal((np.maximum(sequence_length - 1, 0) == 0),
                            eval_result["step_finished"])
    assert output_dtype.sample_id == eval_result[
        "step_outputs"].sample_id.dtype
    np.testing.assert_equal(
        np.argmax(eval_result["step_outputs"].rnn_output, -1),
        eval_result["step_outputs"].sample_id,
    )
Exemple #14
0
    def testStepWithTrainingHelperOutputLayer(self, use_output_layer):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        output_layer_depth = 3

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                np.float32
            )
            input_t = tf.constant(inputs)
            cell = tf.keras.layers.LSTMCell(cell_depth)
            sampler = sampler_py.TrainingSampler(time_major=False)
            if use_output_layer:
                output_layer = tf.keras.layers.Dense(output_layer_depth, use_bias=False)
                expected_output_depth = output_layer_depth
            else:
                output_layer = None
                expected_output_depth = cell_depth
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell, sampler=sampler, output_layer=output_layer
            )

            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                input_t, initial_state=initial_state, sequence_length=sequence_length
            )
            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(
                    expected_output_depth, tf.TensorShape([])
                ),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            self.assertIsInstance(step_outputs, basic_decoder.BasicDecoderOutput)
            self.assertEqual(
                (batch_size, expected_output_depth), step_outputs[0].get_shape()
            )
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())

            if use_output_layer:
                # The output layer was accessed
                self.assertEqual(len(output_layer.variables), 1)

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            self.assertAllEqual(
                [False, False, False, False, True], eval_result["first_finished"]
            )
            self.assertAllEqual(
                [False, False, False, True, True], eval_result["step_finished"]
            )
            self.assertEqual(
                output_dtype.sample_id, eval_result["step_outputs"].sample_id.dtype
            )
            self.assertAllEqual(
                np.argmax(eval_result["step_outputs"].rnn_output, -1),
                eval_result["step_outputs"].sample_id,
            )
Exemple #15
0
    def testStepWithTrainingHelperMaskedInput(self, use_mask):
        batch_size = 5
        max_time = 8
        sequence_length = (
            [max_time] * batch_size if use_mask is None else [3, 4, 3, 1, 0]
        )
        sequence_length = np.array(sequence_length, dtype=np.int32)
        mask = [[True] * l + [False] * (max_time - l) for l in sequence_length]
        input_depth = 7
        cell_depth = 10
        output_layer_depth = 3

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                np.float32
            )
            input_t = tf.constant(inputs)
            cell = tf.keras.layers.LSTMCell(cell_depth)
            sampler = sampler_py.TrainingSampler(time_major=False)
            output_layer = tf.keras.layers.Dense(output_layer_depth, use_bias=False)
            expected_output_depth = output_layer_depth
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell, sampler=sampler, output_layer=output_layer
            )

            if use_mask is None:
                (first_finished, first_inputs, first_state) = my_decoder.initialize(
                    input_t, initial_state=initial_state
                )
            elif use_mask:
                (first_finished, first_inputs, first_state) = my_decoder.initialize(
                    input_t, initial_state=initial_state, mask=mask
                )
            else:
                (first_finished, first_inputs, first_state) = my_decoder.initialize(
                    input_t,
                    initial_state=initial_state,
                    sequence_length=sequence_length,
                )

            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(
                    expected_output_depth, tf.TensorShape([])
                ),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
            self.assertEqual(
                (batch_size, expected_output_depth), step_outputs[0].get_shape()
            )
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())

            self.assertLen(output_layer.variables, 1)

            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            self.assertAllEqual(sequence_length == 0, eval_result["first_finished"])
            self.assertAllEqual(
                (np.maximum(sequence_length - 1, 0) == 0), eval_result["step_finished"]
            )
            self.assertEqual(
                output_dtype.sample_id, eval_result["step_outputs"].sample_id.dtype
            )
            self.assertAllEqual(
                np.argmax(eval_result["step_outputs"].rnn_output, -1),
                eval_result["step_outputs"].sample_id,
            )
Exemple #16
0
    def _testDecodeRNN(self, time_major, maximum_iterations=None):

        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        max_out = max(sequence_length)

        with self.cached_session(use_gpu=True):
            if time_major:
                inputs = np.random.randn(max_time, batch_size, input_depth).astype(
                    np.float32
                )
            else:
                inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                    np.float32
                )
            input_t = tf.constant(inputs)
            cell = tf.keras.layers.LSTMCell(cell_depth)
            sampler = sampler_py.TrainingSampler(time_major=time_major)
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell,
                sampler=sampler,
                output_time_major=time_major,
                maximum_iterations=maximum_iterations,
            )

            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            (final_outputs, unused_final_state, final_sequence_length,) = my_decoder(
                input_t, initial_state=initial_state, sequence_length=sequence_length
            )

            def _t(shape):
                if time_major:
                    return (shape[1], shape[0]) + shape[2:]
                return shape

            if not tf.executing_eagerly():
                self.assertEqual(
                    (batch_size,), tuple(final_sequence_length.get_shape().as_list())
                )
                self.assertEqual(
                    _t((batch_size, None, cell_depth)),
                    tuple(final_outputs.rnn_output.get_shape().as_list()),
                )
                self.assertEqual(
                    _t((batch_size, None)),
                    tuple(final_outputs.sample_id.get_shape().as_list()),
                )

            self.evaluate(tf.compat.v1.global_variables_initializer())
            final_outputs = self.evaluate(final_outputs)
            final_sequence_length = self.evaluate(final_sequence_length)

            # Mostly a smoke test
            time_steps = max_out
            expected_length = sequence_length
            if maximum_iterations is not None:
                time_steps = min(max_out, maximum_iterations)
                expected_length = [min(x, maximum_iterations) for x in expected_length]
            if tf.executing_eagerly() and maximum_iterations != 0:
                self.assertEqual(
                    _t((batch_size, time_steps, cell_depth)),
                    final_outputs.rnn_output.shape,
                )
                self.assertEqual(
                    _t((batch_size, time_steps)), final_outputs.sample_id.shape
                )
            self.assertItemsEqual(expected_length, final_sequence_length)