def __init__(self, d_model, dropout_rate, initializer_scale=1.0, norm_epsilon=1e-6): """Create an EncoderConvolutionalLayer. Args: d_model: a positive integer, the dimension of the model dim. dropout_rate: a float between 0 and 1. initializer_scale: a positive float, the scale for the initializers of the separable convolutional filters. norm_epsilon: a small positive float, the epsilon for the layer norm. """ self._dropout_rate = dropout_rate self._norm_epsilon = norm_epsilon self._conv3x1 = transformer_layers.Conv1DLayer(filter_size=3, output_size=int( d_model / 2), activation="relu") self._sep_conv9x1 = transformer_layers.SeparableConv1DLayer( min_relative_pos=-4, max_relative_pos=4, output_size=int(d_model / 2), depthwise_filter_initializer_scale=initializer_scale, pointwise_filter_initializer_scale=initializer_scale)
def test_conv1d_call_same_input_output_dims(self): batch = 2 d_model = 6 length = 3 inputs = np.random.randint(0, 10, size=[batch, length]) inputs_mtf = self.converter.convert_np_array_to_mtf_tensor( inputs, dim_names=["batch", "length"]) # Dummy context with necessary information for Conv1DLayer.call Context = collections.namedtuple( "Context", ["inputs", "activation_dtype", "mode"]) context = Context(inputs=inputs_mtf, activation_dtype=tf.float32, mode="train") x = np.random.randn(batch, length, d_model) x_mtf = self.converter.convert_np_array_to_mtf_tensor( x, dtype=tf.float32, dim_names=["batch", "length", "d_model"]) conv_layer = transformer_layers.Conv1DLayer(filter_size=3, output_size=d_model) output_mtf = conv_layer.call(context, x_mtf) self.assertAllEqual([batch, length, d_model], output_mtf.shape.to_integer_list)
def test_conv1d_call_incremental_mode(self): batch = 2 d_model = 6 length = 4 filter_size = 3 output_size = 2 state = np.random.randn(batch, filter_size, d_model) context = get_dummy_decoder_context(self.converter, batch=batch, d_model=d_model, length=length, state=state) x = np.random.randn(batch, d_model) x_mtf = self.converter.convert_np_array_to_mtf_tensor( x, dtype=tf.float32, dim_names=["batch", "d_model"]) conv_filter = np.random.randn(1, filter_size, d_model, output_size) def mock_initializer(): # pylint: disable=unused-argument def conv_init(shape, dtype, **unused_kwargs): return conv_filter return conv_init with mock.patch.object(tf, "glorot_uniform_initializer", mock_initializer): conv_layer = transformer_layers.Conv1DLayer( filter_size=filter_size, output_size=output_size) output_mtf = conv_layer.call(context, x_mtf) actual = self.converter.convert_mtf_tensor_to_np_array(output_mtf) # [batch, 2, d_model], [batch, 1, d_model] -> [batch, 3, d_model] padded_x = np.concatenate([state[:, 1:, :], x[:, np.newaxis, :]], axis=1) # b: batch h: fake height, l: length (or filter), d: d_model, o: output_size expected = np.einsum("bld,hldo->bo", padded_x, conv_filter) self.assertAllClose(actual, expected)