예제 #1
0
def test_step_with_greedy_embedding_helper():
    batch_size = 5
    vocabulary_size = 7
    cell_depth = vocabulary_size  # cell's logits must match vocabulary size
    input_depth = 10
    start_tokens = np.random.randint(0, vocabulary_size, size=batch_size)
    end_token = 1

    embeddings = np.random.randn(vocabulary_size,
                                 input_depth).astype(np.float32)
    embeddings_t = tf.constant(embeddings)
    cell = tf.keras.layers.LSTMCell(vocabulary_size)
    sampler = sampler_py.GreedyEmbeddingSampler()
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
    (first_finished, first_inputs, first_state) = my_decoder.initialize(
        embeddings_t,
        start_tokens=start_tokens,
        end_token=end_token,
        initial_state=initial_state,
    )
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(cell_depth, tf.TensorShape(
        [])) == output_size)
    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
    assert (batch_size, cell_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    expected_sample_ids = np.argmax(step_outputs.rnn_output, -1)
    expected_step_finished = expected_sample_ids == end_token
    expected_step_next_inputs = embeddings[expected_sample_ids]
    np.testing.assert_equal(
        np.asanyarray([False, False, False, False, False]),
        first_finished,
    )
    np.testing.assert_equal(expected_step_finished, step_finished)
    assert output_dtype.sample_id == step_outputs.sample_id.dtype
    np.testing.assert_equal(expected_sample_ids, step_outputs.sample_id)
    np.testing.assert_equal(expected_step_next_inputs, step_next_inputs)
예제 #2
0
    def testNotUseAttentionLayer(self):
        create_attention_mechanism = wrapper.BahdanauAttention
        create_attention_kwargs = {"kernel_initializer": "ones"}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 10), dtype=np.dtype("float32"), mean=0.078317143
            ),
            sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype("int32"), mean=4.2),
        )
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=0.89382392),
                ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=1.722382),
            ],
            attention=ResultSummary(
                shape=(5, 10), dtype=np.dtype("float32"), mean=0.026356646
            ),
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.125
            ),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.125
            ),
            alignment_history=(),
        )

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            attention_layer_size=None,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs,
        )
예제 #3
0
    def testLuongNotNormalized(self):
        create_attention_mechanism = wrapper.LuongAttention

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=-0.06124732),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=2.73333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.52021580),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=1.0964939)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=-0.0318060),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.125),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.125),
            alignment_history=())

        self._testWithAttention(create_attention_mechanism,
                                expected_final_output,
                                expected_final_state,
                                attention_mechanism_depth=9)
예제 #4
0
    def testBahdanauNormalized(self):
        create_attention_mechanism = wrapper.BahdanauAttention
        create_attention_kwargs = {"kernel_initializer": "ones", "normalize": True}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 6), dtype=np.dtype("float32"), mean=-0.008089137
            ),
            sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype("int32"), mean=2.8),
        )
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=0.49166861),
                ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=1.01068615),
            ],
            attention=ResultSummary(
                shape=(5, 6), dtype=np.dtype("float32"), mean=0.042427111
            ),
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.125
            ),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.125
            ),
            alignment_history=(),
        )

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs,
        )
예제 #5
0
    def testLuongNotNormalized(self):
        create_attention_mechanism = wrapper.LuongAttention

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.05481226),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.13333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.38453412),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.5785929)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.16311775),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.125),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.125),
            alignment_history=())

        self._testWithAttention(create_attention_mechanism,
                                expected_final_output,
                                expected_final_state,
                                attention_mechanism_depth=9)
예제 #6
0
def test_luong_not_normalized():
    set_random_state_for_tf_and_np()
    policy = tf.keras.mixed_precision.experimental.global_policy()
    create_attention_mechanism = wrapper.LuongAttention

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=policy.compute_dtype, mean=-0.06124732
        ),
        sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype("int32"), mean=2.73333333),
    )
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=[
            ResultSummary(shape=(5, 9), dtype=policy.compute_dtype, mean=0.52021580),
            ResultSummary(shape=(5, 9), dtype=policy.compute_dtype, mean=1.0964939),
        ],
        attention=ResultSummary(
            shape=(5, 6), dtype=policy.compute_dtype, mean=-0.0318060
        ),
        alignments=ResultSummary(shape=(5, 8), dtype=policy.compute_dtype, mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=policy.compute_dtype, mean=0.125
        ),
        alignment_history=(),
    )

    _test_with_attention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depth=9,
    )
예제 #7
0
def test_luong_scaled():
    set_random_state_for_tf_and_np()
    create_attention_mechanism = wrapper.LuongAttention
    create_attention_kwargs = {"scale": True}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=-0.06124732
        ),
        sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype("int32"), mean=2.73333333),
    )
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=[
            ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=0.52021580),
            ResultSummary(shape=(5, 9), dtype=np.dtype("float32"), mean=1.0964939),
        ],
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=-0.0318060
        ),
        alignments=ResultSummary(shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125
        ),
        alignment_history=(),
    )

    _test_with_attention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depth=9,
        create_attention_kwargs=create_attention_kwargs,
    )
예제 #8
0
    def testBahdanauMonotonicNotNormalized(self):
        create_attention_mechanism = wrapper.BahdanauMonotonicAttention
        create_attention_kwargs = {"kernel_initializer": "ones"}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 6), dtype=np.dtype("float32"), mean=-0.009921653),
            sample_id=ResultSummary(
                shape=(5, 3), dtype=np.dtype("int32"), mean=3.13333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=0.44612807),
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=0.95786464)
            ],
            attention=ResultSummary(
                shape=(5, 6), dtype=np.dtype("float32"), mean=0.038682378),
            time=3,
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.09778417),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.09778417),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.10261579603)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs)
예제 #9
0
    def testLuongMonotonicScaled(self):
        create_attention_mechanism = wrapper.LuongMonotonicAttention
        create_attention_kwargs = {"scale": True}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 6), dtype=np.dtype("float32"), mean=0.003664831),
            sample_id=ResultSummary(
                shape=(5, 3), dtype=np.dtype("int32"), mean=3.06666666))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=0.54318606),
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=1.12592840)
            ],
            attention=ResultSummary(
                shape=(5, 6), dtype=np.dtype("float32"), mean=0.059128221),
            time=3,
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.05112994),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.05112994),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.06994973868)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            attention_mechanism_depth=9,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_attention_kwargs=create_attention_kwargs)
예제 #10
0
    def testBahdanauNotNormalized(self):
        create_attention_mechanism = wrapper.BahdanauAttention
        create_attention_kwargs = {"kernel_initializer": "ones"}
        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 6), dtype=np.dtype(np.float32),
                mean=-0.003204414),
            sample_id=ResultSummary(
                shape=(5, 3), dtype=np.dtype(np.int32), mean=3.2))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype(np.float32), mean=0.40868404),
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype(np.float32), mean=0.89017969)
            ],
            attention=ResultSummary(
                shape=(5, 6), dtype=np.dtype(np.float32), mean=0.041453815),
            time=3,
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype(np.float32), mean=0.125)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            alignment_history=True,
            create_query_layer=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_attention_kwargs=create_attention_kwargs)
예제 #11
0
    def testLuongMonotonicNotNormalized(self):
        self.skipTest(
            "Resolve https://github.com/tensorflow/addons/issues/781")
        create_attention_mechanism = wrapper.LuongMonotonicAttention

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(
                shape=(5, 3, 6), dtype=np.dtype("float32"), mean=0.003664831),
            sample_id=ResultSummary(
                shape=(5, 3), dtype=np.dtype("int32"), mean=3.06666666))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=0.54318606),
                ResultSummary(
                    shape=(5, 9), dtype=np.dtype("float32"), mean=1.12592840)
            ],
            attention=ResultSummary(
                shape=(5, 6), dtype=np.dtype("float32"), mean=0.059128221),
            time=3,
            alignments=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.05112994),
            attention_state=ResultSummary(
                shape=(5, 8), dtype=np.dtype("float32"), mean=0.05112994),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.06994973868)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            attention_mechanism_depth=9,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history)
예제 #12
0
    def testBahdanauMonotonicNormalized(self):
        create_attention_mechanism = wrapper.BahdanauMonotonicAttention
        create_attention_kwargs = {
            "kernel_initializer": "ones",
            "normalize": True
        }
        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.007140680),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.26666666),
        )
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.47012400),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=1.0249618),
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.068432882),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.0615656),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.0615656),
            alignment_history=(),
        )
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.07909643)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs,
        )
예제 #13
0
def test_bahdanau_not_normalized():
    set_random_state_for_tf_and_np()
    create_attention_mechanism = wrapper.BahdanauAttention
    create_attention_kwargs = {"kernel_initializer": "ones"}
    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(shape=(5, 3, 6),
                                 dtype=np.dtype(np.float32),
                                 mean=-0.003204414),
        sample_id=ResultSummary(shape=(5, 3),
                                dtype=np.dtype(np.int32),
                                mean=3.2),
    )
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=[
            ResultSummary(shape=(5, 9),
                          dtype=np.dtype(np.float32),
                          mean=0.40868404),
            ResultSummary(shape=(5, 9),
                          dtype=np.dtype(np.float32),
                          mean=0.89017969),
        ],
        attention=ResultSummary(shape=(5, 6),
                                dtype=np.dtype(np.float32),
                                mean=0.041453815),
        alignments=ResultSummary(shape=(5, 8),
                                 dtype=np.dtype(np.float32),
                                 mean=0.125),
        attention_state=ResultSummary(shape=(5, 8),
                                      dtype=np.dtype(np.float32),
                                      mean=0.125),
        alignment_history=(),
    )
    expected_final_alignment_history = ResultSummary(shape=(3, 5, 8),
                                                     dtype=np.dtype(
                                                         np.float32),
                                                     mean=0.125)

    _test_with_attention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        alignment_history=True,
        create_query_layer=True,
        expected_final_alignment_history=expected_final_alignment_history,
        create_attention_kwargs=create_attention_kwargs,
    )
예제 #14
0
    def testBahdanauMonotonicNormalized(self):
        create_attention_mechanism = wrapper.BahdanauMonotonicAttention
        create_attention_kwargs = {
            "kernel_initializer": "ones",
            "normalize": True
        }
        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.043294173),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.53333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.40034312),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.5925445)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.096119694),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.1211452),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.1211452),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.12258384)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs)
예제 #15
0
def test_luong_monotonic_not_normalized():
    set_random_state_for_tf_and_np()
    create_attention_mechanism = wrapper.LuongMonotonicAttention

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(shape=(5, 3, 6),
                                 dtype=np.dtype("float32"),
                                 mean=0.003664831),
        sample_id=ResultSummary(shape=(5, 3),
                                dtype=np.dtype("int32"),
                                mean=3.06666666),
    )
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=[
            ResultSummary(shape=(5, 9),
                          dtype=np.dtype("float32"),
                          mean=0.54318606),
            ResultSummary(shape=(5, 9),
                          dtype=np.dtype("float32"),
                          mean=1.12592840),
        ],
        attention=ResultSummary(shape=(5, 6),
                                dtype=np.dtype("float32"),
                                mean=0.059128221),
        alignments=ResultSummary(shape=(5, 8),
                                 dtype=np.dtype("float32"),
                                 mean=0.05112994),
        attention_state=ResultSummary(shape=(5, 8),
                                      dtype=np.dtype("float32"),
                                      mean=0.05112994),
        alignment_history=(),
    )
    expected_final_alignment_history = ResultSummary(shape=(3, 5, 8),
                                                     dtype=np.dtype("float32"),
                                                     mean=0.06994973868)

    _test_with_attention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depth=9,
        alignment_history=True,
        expected_final_alignment_history=expected_final_alignment_history,
    )
예제 #16
0
    def testLuongMonotonicScaled(self):
        create_attention_mechanism = wrapper.LuongMonotonicAttention
        create_attention_kwargs = {"scale": True}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.027387079),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.13333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.32660431),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.52464348)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.089345723),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.11831035),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.11831035),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.12194442004)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            attention_mechanism_depth=9,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_attention_kwargs=create_attention_kwargs)
예제 #17
0
    def testBahdanauMonotonicNotNormalized(self):
        create_attention_mechanism = wrapper.BahdanauMonotonicAttention
        create_attention_kwargs = {"kernel_initializer": "ones"}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.041342419),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.53333333))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.33866978),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.46913195)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.092498459),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.12079944),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.12079944),
            alignment_history=())
        expected_final_alignment_history = ResultSummary(
            shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.121448785067)

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            alignment_history=True,
            expected_final_alignment_history=expected_final_alignment_history,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs)
예제 #18
0
    def testBahdanauNormalized(self):
        create_attention_mechanism = wrapper.BahdanauAttention
        create_attention_kwargs = {
            "kernel_initializer": "ones",
            "normalize": True
        }

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 6),
                                     dtype=np.dtype("float32"),
                                     mean=0.047594748),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.6))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.41311637),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.61683208)
            ],
            attention=ResultSummary(shape=(5, 6),
                                    dtype=np.dtype("float32"),
                                    mean=0.090581432),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.125),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.125),
            alignment_history=())

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs)
예제 #19
0
    def testNotUseAttentionLayer(self):
        create_attention_mechanism = wrapper.BahdanauAttention
        create_attention_kwargs = {"kernel_initializer": "ones"}

        expected_final_output = basic_decoder.BasicDecoderOutput(
            rnn_output=ResultSummary(shape=(5, 3, 10),
                                     dtype=np.dtype("float32"),
                                     mean=0.072406612),
            sample_id=ResultSummary(shape=(5, 3),
                                    dtype=np.dtype("int32"),
                                    mean=3.86666666))
        expected_final_state = wrapper.AttentionWrapperState(
            cell_state=[
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=0.61177742),
                ResultSummary(shape=(5, 9),
                              dtype=np.dtype("float32"),
                              mean=1.032002)
            ],
            attention=ResultSummary(shape=(5, 10),
                                    dtype=np.dtype("float32"),
                                    mean=0.011346335),
            time=3,
            alignments=ResultSummary(shape=(5, 8),
                                     dtype=np.dtype("float32"),
                                     mean=0.125),
            attention_state=ResultSummary(shape=(5, 8),
                                          dtype=np.dtype("float32"),
                                          mean=0.125),
            alignment_history=())

        self._testWithAttention(
            create_attention_mechanism,
            expected_final_output,
            expected_final_state,
            attention_layer_size=None,
            create_query_layer=True,
            create_attention_kwargs=create_attention_kwargs)
예제 #20
0
    def testStepWithTrainingHelperOutputLayer(self, use_output_layer):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = 10
        output_layer_depth = 3

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                np.float32
            )
            input_t = tf.constant(inputs)
            cell = tf.keras.layers.LSTMCell(cell_depth)
            sampler = sampler_py.TrainingSampler(time_major=False)
            if use_output_layer:
                output_layer = tf.keras.layers.Dense(output_layer_depth, use_bias=False)
                expected_output_depth = output_layer_depth
            else:
                output_layer = None
                expected_output_depth = cell_depth
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(
                cell=cell, sampler=sampler, output_layer=output_layer
            )

            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                input_t, initial_state=initial_state, sequence_length=sequence_length
            )
            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(
                    expected_output_depth, tf.TensorShape([])
                ),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            self.assertIsInstance(step_outputs, basic_decoder.BasicDecoderOutput)
            self.assertEqual(
                (batch_size, expected_output_depth), step_outputs[0].get_shape()
            )
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())

            if use_output_layer:
                # The output layer was accessed
                self.assertEqual(len(output_layer.variables), 1)

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            self.assertAllEqual(
                [False, False, False, False, True], eval_result["first_finished"]
            )
            self.assertAllEqual(
                [False, False, False, True, True], eval_result["step_finished"]
            )
            self.assertEqual(
                output_dtype.sample_id, eval_result["step_outputs"].sample_id.dtype
            )
            self.assertAllEqual(
                np.argmax(eval_result["step_outputs"].rnn_output, -1),
                eval_result["step_outputs"].sample_id,
            )
예제 #21
0
    def testStepWithScheduledEmbeddingTrainingHelper(self):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        vocabulary_size = 10

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                np.float32
            )
            input_t = tf.constant(inputs)
            embeddings = np.random.randn(vocabulary_size, input_depth).astype(
                np.float32
            )
            half = tf.constant(0.5)
            cell = tf.keras.layers.LSTMCell(vocabulary_size)
            sampler = sampler_py.ScheduledEmbeddingTrainingSampler(
                sampling_probability=half, time_major=False
            )
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                input_t,
                sequence_length=sequence_length,
                embedding=embeddings,
                initial_state=initial_state,
            )
            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(vocabulary_size, tf.TensorShape([])),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            self.assertTrue(isinstance(step_outputs, basic_decoder.BasicDecoderOutput))
            self.assertEqual((batch_size, vocabulary_size), step_outputs[0].get_shape())
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, vocabulary_size), first_state[0].get_shape())
            self.assertEqual((batch_size, vocabulary_size), first_state[1].get_shape())
            self.assertEqual((batch_size, vocabulary_size), step_state[0].get_shape())
            self.assertEqual((batch_size, vocabulary_size), step_state[1].get_shape())
            self.assertEqual((batch_size, input_depth), step_next_inputs.get_shape())

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            self.assertAllEqual(
                [False, False, False, False, True], eval_result["first_finished"]
            )
            self.assertAllEqual(
                [False, False, False, True, True], eval_result["step_finished"]
            )
            sample_ids = eval_result["step_outputs"].sample_id
            self.assertEqual(output_dtype.sample_id, sample_ids.dtype)
            batch_where_not_sampling = np.where(sample_ids == -1)
            batch_where_sampling = np.where(sample_ids > -1)
            self.assertAllClose(
                eval_result["step_next_inputs"][batch_where_sampling],
                embeddings[sample_ids[batch_where_sampling]],
            )
            self.assertAllClose(
                eval_result["step_next_inputs"][batch_where_not_sampling],
                np.squeeze(inputs[batch_where_not_sampling, 1], axis=0),
            )
예제 #22
0
def test_step_with_scheduled_output_training_helper(sampling_probability,
                                                    use_next_inputs_fn,
                                                    use_auxiliary_inputs):
    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = input_depth
    if use_auxiliary_inputs:
        auxiliary_input_depth = 4
        auxiliary_inputs = np.random.randn(
            batch_size, max_time, auxiliary_input_depth).astype(np.float32)
    else:
        auxiliary_inputs = None

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampling_probability = tf.constant(sampling_probability)

    if use_next_inputs_fn:

        def next_inputs_fn(outputs):
            # Use deterministic function for test.
            samples = tf.argmax(outputs, axis=1)
            return tf.one_hot(samples, cell_depth, dtype=tf.float32)

    else:
        next_inputs_fn = None

    sampler = sampler_py.ScheduledOutputTrainingSampler(
        sampling_probability=sampling_probability,
        time_major=False,
        next_inputs_fn=next_inputs_fn,
    )
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)

    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)

    (first_finished, first_inputs, first_state) = my_decoder.initialize(
        input_t,
        sequence_length=sequence_length,
        initial_state=initial_state,
        auxiliary_inputs=auxiliary_inputs,
    )
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(cell_depth, tf.TensorShape(
        [])) == output_size)
    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)

    if use_next_inputs_fn:
        output_after_next_inputs_fn = next_inputs_fn(step_outputs.rnn_output)

    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
    assert (batch_size, cell_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    fetches = {
        "batch_size": batch_size_t.numpy(),
        "first_finished": first_finished.numpy(),
        "first_inputs": first_inputs.numpy(),
        "first_state": np.asanyarray(first_state),
        "step_outputs": step_outputs,
        "step_state": np.asanyarray(step_state),
        "step_next_inputs": step_next_inputs.numpy(),
        "step_finished": step_finished.numpy(),
    }

    if use_next_inputs_fn:
        fetches["output_after_next_inputs_fn"] = output_after_next_inputs_fn

    eval_result = fetches

    np.testing.assert_equal(
        np.asanyarray([False, False, False, False, True]),
        eval_result["first_finished"],
    )
    np.testing.assert_equal(
        np.asanyarray([False, False, False, True, True]),
        eval_result["step_finished"],
    )

    sample_ids = eval_result["step_outputs"].sample_id.numpy()
    assert output_dtype.sample_id == sample_ids.dtype
    batch_where_not_sampling = np.where(np.logical_not(sample_ids))
    batch_where_sampling = np.where(sample_ids)

    auxiliary_inputs_to_concat = (auxiliary_inputs[:,
                                                   1] if use_auxiliary_inputs
                                  else np.array([]).reshape(
                                      batch_size, 0).astype(np.float32))

    expected_next_sampling_inputs = np.concatenate(
        (
            eval_result["output_after_next_inputs_fn"].numpy()
            [batch_where_sampling] if use_next_inputs_fn else
            eval_result["step_outputs"].rnn_output.numpy()
            [batch_where_sampling],
            auxiliary_inputs_to_concat[batch_where_sampling],
        ),
        axis=-1,
    )

    np.testing.assert_equal(
        eval_result["step_next_inputs"][batch_where_sampling],
        expected_next_sampling_inputs,
    )

    np.testing.assert_equal(
        eval_result["step_next_inputs"][batch_where_not_sampling],
        np.concatenate(
            (
                np.squeeze(inputs[batch_where_not_sampling, 1], axis=0),
                auxiliary_inputs_to_concat[batch_where_not_sampling],
            ),
            axis=-1,
        ),
    )
예제 #23
0
def test_step_with_inference_helper_multilabel():
    batch_size = 5
    vocabulary_size = 7
    cell_depth = vocabulary_size
    start_token = 0
    end_token = 6

    start_inputs = tf.one_hot(
        np.ones(batch_size, dtype=np.int32) * start_token, vocabulary_size)

    # The sample function samples independent bernoullis from the logits.
    def sample_fn(x):
        return sampler_py.bernoulli_sample(logits=x, dtype=tf.bool)

    # The next inputs are a one-hot encoding of the sampled labels.
    def next_inputs_fn(x):
        return tf.cast(x, tf.float32)

    def end_fn(sample_ids):
        return sample_ids[:, end_token]

    cell = tf.keras.layers.LSTMCell(vocabulary_size)
    sampler = sampler_py.InferenceSampler(
        sample_fn,
        sample_shape=[cell_depth],
        sample_dtype=tf.bool,
        end_fn=end_fn,
        next_inputs_fn=next_inputs_fn,
    )
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
    (first_finished, first_inputs,
     first_state) = my_decoder.initialize(start_inputs,
                                          initial_state=initial_state)
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert basic_decoder.BasicDecoderOutput(cell_depth,
                                            cell_depth) == output_size
    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.bool) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
    assert (batch_size, cell_depth) == step_outputs[0].shape
    assert (batch_size, cell_depth) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    eval_result = {
        "batch_size": batch_size_t.numpy(),
        "first_finished": first_finished.numpy(),
        "first_inputs": first_inputs.numpy(),
        "first_state": np.asanyarray(first_state),
        "step_outputs": step_outputs,
        "step_state": np.asanyarray(step_state),
        "step_next_inputs": step_next_inputs.numpy(),
        "step_finished": step_finished.numpy(),
    }

    sample_ids = eval_result["step_outputs"].sample_id.numpy()
    assert output_dtype.sample_id == sample_ids.dtype
    expected_step_finished = sample_ids[:, end_token]
    expected_step_next_inputs = sample_ids.astype(np.float32)
    np.testing.assert_equal(expected_step_finished,
                            eval_result["step_finished"])
    np.testing.assert_equal(expected_step_next_inputs,
                            eval_result["step_next_inputs"])
예제 #24
0
def test_step_with_sample_embedding_helper():
    batch_size = 5
    vocabulary_size = 7
    cell_depth = vocabulary_size  # cell's logits must match vocabulary size
    input_depth = 10
    np.random.seed(0)
    start_tokens = np.random.randint(0, vocabulary_size, size=batch_size)
    end_token = 1

    embeddings = np.random.randn(vocabulary_size,
                                 input_depth).astype(np.float32)
    embeddings_t = tf.constant(embeddings)
    cell = tf.keras.layers.LSTMCell(vocabulary_size)
    sampler = sampler_py.SampleEmbeddingSampler(seed=0)
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
    (first_finished, first_inputs, first_state) = my_decoder.initialize(
        embeddings_t,
        start_tokens=start_tokens,
        end_token=end_token,
        initial_state=initial_state,
    )
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(cell_depth, tf.TensorShape(
        [])) == output_size)
    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
    assert (batch_size, cell_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    eval_result = {
        "batch_size": batch_size_t,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished,
    }

    sample_ids = eval_result["step_outputs"].sample_id
    assert output_dtype.sample_id == sample_ids.dtype
    expected_step_finished = sample_ids == end_token
    expected_step_next_inputs = embeddings[sample_ids, :]
    np.testing.assert_equal(np.asanyarray(expected_step_finished),
                            eval_result["step_finished"].numpy())
    np.testing.assert_equal(expected_step_next_inputs,
                            eval_result["step_next_inputs"])
예제 #25
0
def test_step_with_scheduled_embedding_training_helper():
    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    vocabulary_size = 10

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    embeddings = np.random.randn(vocabulary_size,
                                 input_depth).astype(np.float32)
    half = tf.constant(0.5)
    cell = tf.keras.layers.LSTMCell(vocabulary_size)
    sampler = sampler_py.ScheduledEmbeddingTrainingSampler(
        sampling_probability=half, time_major=False)
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
    (first_finished, first_inputs, first_state) = my_decoder.initialize(
        input_t,
        sequence_length=sequence_length,
        embedding=embeddings,
        initial_state=initial_state,
    )
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(vocabulary_size, tf.TensorShape(
        [])) == output_size)

    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
    assert (batch_size, vocabulary_size) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, vocabulary_size) == first_state[0].shape
    assert (batch_size, vocabulary_size) == first_state[1].shape
    assert (batch_size, vocabulary_size) == step_state[0].shape
    assert (batch_size, vocabulary_size) == step_state[1].shape
    assert (batch_size, input_depth) == step_next_inputs.shape

    eval_result = {
        "batch_size": batch_size_t.numpy(),
        "first_finished": first_finished.numpy(),
        "first_inputs": first_inputs.numpy(),
        "first_state": np.asanyarray(first_state),
        "step_outputs": step_outputs,
        "step_state": np.asanyarray(step_state),
        "step_next_inputs": step_next_inputs.numpy(),
        "step_finished": step_finished.numpy(),
    }

    np.testing.assert_equal(
        np.asanyarray([False, False, False, False, True]),
        eval_result["first_finished"],
    )
    np.testing.assert_equal(
        np.asanyarray([False, False, False, True, True]),
        eval_result["step_finished"],
    )
    sample_ids = eval_result["step_outputs"].sample_id.numpy()
    assert output_dtype.sample_id == sample_ids.dtype
    batch_where_not_sampling = np.where(sample_ids == -1)
    batch_where_sampling = np.where(sample_ids > -1)

    np.testing.assert_equal(
        eval_result["step_next_inputs"][batch_where_sampling],
        embeddings[sample_ids[batch_where_sampling]],
    )
    np.testing.assert_equal(
        eval_result["step_next_inputs"][batch_where_not_sampling],
        np.squeeze(inputs[batch_where_not_sampling, 1], axis=0),
    )
예제 #26
0
def test_step_with_training_helper_output_layer(use_output_layer):
    sequence_length = [3, 4, 3, 1, 0]
    batch_size = 5
    max_time = 8
    input_depth = 7
    cell_depth = 10
    output_layer_depth = 3

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=False)
    if use_output_layer:
        output_layer = tf.keras.layers.Dense(output_layer_depth,
                                             use_bias=False)
        expected_output_depth = output_layer_depth
    else:
        output_layer = None
        expected_output_depth = cell_depth
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell,
                                            sampler=sampler,
                                            output_layer=output_layer)

    (first_finished, first_inputs,
     first_state) = my_decoder.initialize(input_t,
                                          initial_state=initial_state,
                                          sequence_length=sequence_length)
    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(expected_output_depth,
                                             tf.TensorShape(
                                                 [])) == output_size)

    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert type(step_outputs) is basic_decoder.BasicDecoderOutput
    assert (batch_size, expected_output_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    if use_output_layer:
        # The output layer was accessed
        assert len(output_layer.variables) == 1

    eval_result = {
        "batch_size": batch_size_t,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished,
    }

    np.testing.assert_equal(
        np.asanyarray([False, False, False, False, True]),
        eval_result["first_finished"].numpy(),
    )
    np.testing.assert_equal(
        np.asanyarray([False, False, False, True, True]),
        eval_result["step_finished"].numpy(),
    )
    assert output_dtype.sample_id == eval_result[
        "step_outputs"].sample_id.dtype
    np.testing.assert_equal(
        np.argmax(eval_result["step_outputs"].rnn_output, -1),
        eval_result["step_outputs"].sample_id,
    )
예제 #27
0
def test_step_with_training_helper_masked_input(use_mask):
    batch_size = 5
    max_time = 8
    sequence_length = [max_time] * batch_size if use_mask is None else [
        3, 4, 3, 1, 0
    ]
    sequence_length = np.array(sequence_length, dtype=np.int32)
    mask = [[True] * l + [False] * (max_time - l) for l in sequence_length]
    input_depth = 7
    cell_depth = 10
    output_layer_depth = 3

    inputs = np.random.randn(batch_size, max_time,
                             input_depth).astype(np.float32)
    input_t = tf.constant(inputs)
    cell = tf.keras.layers.LSTMCell(cell_depth)
    sampler = sampler_py.TrainingSampler(time_major=False)
    output_layer = tf.keras.layers.Dense(output_layer_depth, use_bias=False)
    expected_output_depth = output_layer_depth
    initial_state = cell.get_initial_state(batch_size=batch_size,
                                           dtype=tf.float32)
    my_decoder = basic_decoder.BasicDecoder(cell=cell,
                                            sampler=sampler,
                                            output_layer=output_layer)

    if use_mask is None:
        (first_finished, first_inputs,
         first_state) = my_decoder.initialize(input_t,
                                              initial_state=initial_state)
    elif use_mask:
        (first_finished, first_inputs,
         first_state) = my_decoder.initialize(input_t,
                                              initial_state=initial_state,
                                              mask=mask)
    else:
        (first_finished, first_inputs, first_state) = my_decoder.initialize(
            input_t,
            initial_state=initial_state,
            sequence_length=sequence_length,
        )

    output_size = my_decoder.output_size
    output_dtype = my_decoder.output_dtype
    assert (basic_decoder.BasicDecoderOutput(expected_output_depth,
                                             tf.TensorShape(
                                                 [])) == output_size)

    assert basic_decoder.BasicDecoderOutput(tf.float32,
                                            tf.int32) == output_dtype

    (
        step_outputs,
        step_state,
        step_next_inputs,
        step_finished,
    ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
    batch_size_t = my_decoder.batch_size

    assert len(first_state) == 2
    assert len(step_state) == 2
    assert type(step_outputs) is basic_decoder.BasicDecoderOutput
    assert (batch_size, expected_output_depth) == step_outputs[0].shape
    assert (batch_size, ) == step_outputs[1].shape
    assert (batch_size, cell_depth) == first_state[0].shape
    assert (batch_size, cell_depth) == first_state[1].shape
    assert (batch_size, cell_depth) == step_state[0].shape
    assert (batch_size, cell_depth) == step_state[1].shape

    assert len(output_layer.variables) == 1

    eval_result = {
        "batch_size": batch_size_t,
        "first_finished": first_finished,
        "first_inputs": first_inputs,
        "first_state": first_state,
        "step_outputs": step_outputs,
        "step_state": step_state,
        "step_next_inputs": step_next_inputs,
        "step_finished": step_finished,
    }

    np.testing.assert_equal(sequence_length == 0,
                            eval_result["first_finished"])
    np.testing.assert_equal((np.maximum(sequence_length - 1, 0) == 0),
                            eval_result["step_finished"])
    assert output_dtype.sample_id == eval_result[
        "step_outputs"].sample_id.dtype
    np.testing.assert_equal(
        np.argmax(eval_result["step_outputs"].rnn_output, -1),
        eval_result["step_outputs"].sample_id,
    )
예제 #28
0
    def testStepWithInferenceHelperMultilabel(self):
        batch_size = 5
        vocabulary_size = 7
        cell_depth = vocabulary_size
        start_token = 0
        end_token = 6

        start_inputs = tf.one_hot(
            np.ones(batch_size, dtype=np.int32) * start_token, vocabulary_size
        )

        # The sample function samples independent bernoullis from the logits.
        def sample_fn(x):
            return sampler_py.bernoulli_sample(logits=x, dtype=tf.bool)

        # The next inputs are a one-hot encoding of the sampled labels.
        def next_inputs_fn(x):
            return tf.cast(x, tf.float32)

        def end_fn(sample_ids):
            return sample_ids[:, end_token]

        with self.cached_session(use_gpu=True):
            cell = tf.keras.layers.LSTMCell(vocabulary_size)
            sampler = sampler_py.InferenceSampler(
                sample_fn,
                sample_shape=[cell_depth],
                sample_dtype=tf.bool,
                end_fn=end_fn,
                next_inputs_fn=next_inputs_fn,
            )
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                start_inputs, initial_state=initial_state
            )
            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(cell_depth, cell_depth), output_size
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.bool), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            assert len(first_state) == 2
            self.assertLen(step_state, 2)
            assert isinstance(step_outputs, basic_decoder.BasicDecoderOutput)
            self.assertEqual((batch_size, cell_depth), step_outputs[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            assert (batch_size, cell_depth) == step_state[1].get_shape()

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            sample_ids = eval_result["step_outputs"].sample_id
            self.assertEqual(output_dtype.sample_id, sample_ids.dtype)
            expected_step_finished = sample_ids[:, end_token]
            expected_step_next_inputs = sample_ids.astype(np.float32)
            self.assertAllEqual(expected_step_finished, eval_result["step_finished"])
            self.assertAllEqual(
                expected_step_next_inputs, eval_result["step_next_inputs"]
            )
예제 #29
0
    def testStepWithInferenceHelperCategorical(self):
        batch_size = 5
        vocabulary_size = 7
        cell_depth = vocabulary_size
        start_token = 0
        end_token = 6

        start_inputs = tf.one_hot(
            np.ones(batch_size, dtype=np.int32) * start_token, vocabulary_size
        )

        # The sample function samples categorically from the logits.
        def sample_fn(x):
            return sampler_py.categorical_sample(logits=x)

        # The next inputs are a one-hot encoding of the sampled labels.
        def next_inputs_fn(x):
            return tf.one_hot(x, vocabulary_size, dtype=tf.float32)

        def end_fn(sample_ids):
            return tf.equal(sample_ids, end_token)

        with self.cached_session(use_gpu=True):
            cell = tf.keras.layers.LSTMCell(vocabulary_size)
            sampler = sampler_py.InferenceSampler(
                sample_fn,
                sample_shape=(),
                sample_dtype=tf.int32,
                end_fn=end_fn,
                next_inputs_fn=next_inputs_fn,
            )
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )
            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)
            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                start_inputs, initial_state=initial_state
            )

            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(cell_depth, tf.TensorShape([])),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)
            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            self.assertTrue(isinstance(step_outputs, basic_decoder.BasicDecoderOutput))
            self.assertEqual((batch_size, cell_depth), step_outputs[0].get_shape())
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())

            self.evaluate(tf.compat.v1.global_variables_initializer())
            eval_result = self.evaluate(
                {
                    "batch_size": batch_size_t,
                    "first_finished": first_finished,
                    "first_inputs": first_inputs,
                    "first_state": first_state,
                    "step_outputs": step_outputs,
                    "step_state": step_state,
                    "step_next_inputs": step_next_inputs,
                    "step_finished": step_finished,
                }
            )

            sample_ids = eval_result["step_outputs"].sample_id
            self.assertEqual(output_dtype.sample_id, sample_ids.dtype)
            expected_step_finished = sample_ids == end_token
            expected_step_next_inputs = np.zeros((batch_size, vocabulary_size))
            expected_step_next_inputs[np.arange(batch_size), sample_ids] = 1.0
            self.assertAllEqual(expected_step_finished, eval_result["step_finished"])
            self.assertAllEqual(
                expected_step_next_inputs, eval_result["step_next_inputs"]
            )
예제 #30
0
    def _testStepWithScheduledOutputTrainingHelper(
        self, sampling_probability, use_next_inputs_fn, use_auxiliary_inputs
    ):
        sequence_length = [3, 4, 3, 1, 0]
        batch_size = 5
        max_time = 8
        input_depth = 7
        cell_depth = input_depth
        if use_auxiliary_inputs:
            auxiliary_input_depth = 4
            auxiliary_inputs = np.random.randn(
                batch_size, max_time, auxiliary_input_depth
            ).astype(np.float32)
        else:
            auxiliary_inputs = None

        with self.cached_session(use_gpu=True):
            inputs = np.random.randn(batch_size, max_time, input_depth).astype(
                np.float32
            )
            input_t = tf.constant(inputs)
            cell = tf.keras.layers.LSTMCell(cell_depth)
            sampling_probability = tf.constant(sampling_probability)

            if use_next_inputs_fn:

                def next_inputs_fn(outputs):
                    # Use deterministic function for test.
                    samples = tf.argmax(outputs, axis=1)
                    return tf.one_hot(samples, cell_depth, dtype=tf.float32)

            else:
                next_inputs_fn = None

            sampler = sampler_py.ScheduledOutputTrainingSampler(
                sampling_probability=sampling_probability,
                time_major=False,
                next_inputs_fn=next_inputs_fn,
            )
            initial_state = cell.get_initial_state(
                batch_size=batch_size, dtype=tf.float32
            )

            my_decoder = basic_decoder.BasicDecoder(cell=cell, sampler=sampler)

            (first_finished, first_inputs, first_state) = my_decoder.initialize(
                input_t,
                sequence_length=sequence_length,
                initial_state=initial_state,
                auxiliary_inputs=auxiliary_inputs,
            )
            output_size = my_decoder.output_size
            output_dtype = my_decoder.output_dtype
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(cell_depth, tf.TensorShape([])),
                output_size,
            )
            self.assertEqual(
                basic_decoder.BasicDecoderOutput(tf.float32, tf.int32), output_dtype
            )

            (
                step_outputs,
                step_state,
                step_next_inputs,
                step_finished,
            ) = my_decoder.step(tf.constant(0), first_inputs, first_state)

            if use_next_inputs_fn:
                output_after_next_inputs_fn = next_inputs_fn(step_outputs.rnn_output)

            batch_size_t = my_decoder.batch_size

            self.assertLen(first_state, 2)
            self.assertLen(step_state, 2)
            self.assertTrue(isinstance(step_outputs, basic_decoder.BasicDecoderOutput))
            self.assertEqual((batch_size, cell_depth), step_outputs[0].get_shape())
            self.assertEqual((batch_size,), step_outputs[1].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), first_state[1].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[0].get_shape())
            self.assertEqual((batch_size, cell_depth), step_state[1].get_shape())

            self.evaluate(tf.compat.v1.global_variables_initializer())

            fetches = {
                "batch_size": batch_size_t,
                "first_finished": first_finished,
                "first_inputs": first_inputs,
                "first_state": first_state,
                "step_outputs": step_outputs,
                "step_state": step_state,
                "step_next_inputs": step_next_inputs,
                "step_finished": step_finished,
            }
            if use_next_inputs_fn:
                fetches["output_after_next_inputs_fn"] = output_after_next_inputs_fn

            eval_result = self.evaluate(fetches)

            self.assertAllEqual(
                [False, False, False, False, True], eval_result["first_finished"]
            )
            self.assertAllEqual(
                [False, False, False, True, True], eval_result["step_finished"]
            )

            sample_ids = eval_result["step_outputs"].sample_id
            self.assertEqual(output_dtype.sample_id, sample_ids.dtype)
            batch_where_not_sampling = np.where(np.logical_not(sample_ids))
            batch_where_sampling = np.where(sample_ids)

            auxiliary_inputs_to_concat = (
                auxiliary_inputs[:, 1]
                if use_auxiliary_inputs
                else np.array([]).reshape(batch_size, 0).astype(np.float32)
            )

            expected_next_sampling_inputs = np.concatenate(
                (
                    eval_result["output_after_next_inputs_fn"][batch_where_sampling]
                    if use_next_inputs_fn
                    else eval_result["step_outputs"].rnn_output[batch_where_sampling],
                    auxiliary_inputs_to_concat[batch_where_sampling],
                ),
                axis=-1,
            )
            self.assertAllClose(
                eval_result["step_next_inputs"][batch_where_sampling],
                expected_next_sampling_inputs,
            )

            self.assertAllClose(
                eval_result["step_next_inputs"][batch_where_not_sampling],
                np.concatenate(
                    (
                        np.squeeze(inputs[batch_where_not_sampling, 1], axis=0),
                        auxiliary_inputs_to_concat[batch_where_not_sampling],
                    ),
                    axis=-1,
                ),
            )