예제 #1
0
    def __init__(self,
                 d_model,
                 dropout_rate,
                 initializer_scale=1.0,
                 norm_epsilon=1e-6):
        """Create an EncoderConvolutionalLayer.

    Args:
      d_model: a positive integer, the dimension of the model dim.
      dropout_rate: a float between 0 and 1.
      initializer_scale: a positive float, the scale for the initializers of the
        separable convolutional filters.
      norm_epsilon: a small positive float, the epsilon for the layer norm.
    """
        self._dropout_rate = dropout_rate
        self._norm_epsilon = norm_epsilon
        self._conv3x1 = transformer_layers.Conv1DLayer(filter_size=3,
                                                       output_size=int(
                                                           d_model / 2),
                                                       activation="relu")
        self._sep_conv9x1 = transformer_layers.SeparableConv1DLayer(
            min_relative_pos=-4,
            max_relative_pos=4,
            output_size=int(d_model / 2),
            depthwise_filter_initializer_scale=initializer_scale,
            pointwise_filter_initializer_scale=initializer_scale)
예제 #2
0
 def test_conv1d_call_same_input_output_dims(self):
     batch = 2
     d_model = 6
     length = 3
     inputs = np.random.randint(0, 10, size=[batch, length])
     inputs_mtf = self.converter.convert_np_array_to_mtf_tensor(
         inputs, dim_names=["batch", "length"])
     # Dummy context with necessary information for Conv1DLayer.call
     Context = collections.namedtuple(
         "Context", ["inputs", "activation_dtype", "mode"])
     context = Context(inputs=inputs_mtf,
                       activation_dtype=tf.float32,
                       mode="train")
     x = np.random.randn(batch, length, d_model)
     x_mtf = self.converter.convert_np_array_to_mtf_tensor(
         x, dtype=tf.float32, dim_names=["batch", "length", "d_model"])
     conv_layer = transformer_layers.Conv1DLayer(filter_size=3,
                                                 output_size=d_model)
     output_mtf = conv_layer.call(context, x_mtf)
     self.assertAllEqual([batch, length, d_model],
                         output_mtf.shape.to_integer_list)
예제 #3
0
    def test_conv1d_call_incremental_mode(self):
        batch = 2
        d_model = 6
        length = 4
        filter_size = 3
        output_size = 2

        state = np.random.randn(batch, filter_size, d_model)
        context = get_dummy_decoder_context(self.converter,
                                            batch=batch,
                                            d_model=d_model,
                                            length=length,
                                            state=state)

        x = np.random.randn(batch, d_model)
        x_mtf = self.converter.convert_np_array_to_mtf_tensor(
            x, dtype=tf.float32, dim_names=["batch", "d_model"])

        conv_filter = np.random.randn(1, filter_size, d_model, output_size)

        def mock_initializer():
            # pylint: disable=unused-argument
            def conv_init(shape, dtype, **unused_kwargs):
                return conv_filter

            return conv_init

        with mock.patch.object(tf, "glorot_uniform_initializer",
                               mock_initializer):
            conv_layer = transformer_layers.Conv1DLayer(
                filter_size=filter_size, output_size=output_size)
            output_mtf = conv_layer.call(context, x_mtf)
        actual = self.converter.convert_mtf_tensor_to_np_array(output_mtf)

        # [batch, 2, d_model], [batch, 1, d_model] -> [batch, 3, d_model]
        padded_x = np.concatenate([state[:, 1:, :], x[:, np.newaxis, :]],
                                  axis=1)
        # b: batch h: fake height, l: length (or filter), d: d_model, o: output_size
        expected = np.einsum("bld,hldo->bo", padded_x, conv_filter)
        self.assertAllClose(actual, expected)