def __init__(self, word_vocab_size, word_embed_size, type_vocab_size, output_embed_size, max_sequence_length=512, normalization_type='no_norm', initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02), dropout_rate=0.1, **kwargs): """Class initialization. Args: word_vocab_size: Number of words in the vocabulary. word_embed_size: Word embedding size. type_vocab_size: Number of word types. output_embed_size: Embedding size for the final embedding output. max_sequence_length: Maximum length of input sequence. normalization_type: String. The type of normalization_type, only `no_norm` and `layer_norm` are supported. initializer: The initializer to use for the embedding weights and linear projection weights. dropout_rate: Dropout rate. **kwargs: keyword arguments. """ super(MobileBertEmbedding, self).__init__(**kwargs) self.word_vocab_size = word_vocab_size self.word_embed_size = word_embed_size self.type_vocab_size = type_vocab_size self.output_embed_size = output_embed_size self.max_sequence_length = max_sequence_length self.normalization_type = normalization_type self.initializer = tf.keras.initializers.get(initializer) self.dropout_rate = dropout_rate self.word_embedding = on_device_embedding.OnDeviceEmbedding( self.word_vocab_size, self.word_embed_size, initializer=initializer, name='word_embedding') self.type_embedding = on_device_embedding.OnDeviceEmbedding( self.type_vocab_size, self.output_embed_size, initializer=initializer, name='type_embedding') self.pos_embedding = position_embedding.PositionEmbedding( max_length=max_sequence_length, initializer=initializer, name='position_embedding') self.word_embedding_proj = tf.keras.layers.experimental.EinsumDense( 'abc,cd->abd', output_shape=[None, self.output_embed_size], kernel_initializer=initializer, bias_axes='d', name='embedding_projection') self.layer_norm = _get_norm_layer(normalization_type, 'embedding_norm') self.dropout_layer = tf.keras.layers.Dropout( self.dropout_rate, name='embedding_dropout')
def test_dynamic_layer_output_shape(self): max_sequence_length = 40 test_layer = position_embedding.PositionEmbedding( use_dynamic_slicing=True, max_sequence_length=max_sequence_length) # Create a 3-dimensional input (the first dimension is implicit). width = 30 input_tensor = tf.keras.Input(shape=(None, width)) output_tensor = test_layer(input_tensor) # When using dynamic positional embedding shapes, the output is expected # to be the same as the input shape in all dimensions - but may be None if # the input shape is None there. expected_output_shape = [None, None, width] self.assertEqual(expected_output_shape, output_tensor.shape.as_list())
def test_float16_dtype(self): test_layer = position_embedding.PositionEmbedding(dtype="float16") # Create a 3-dimensional input (the first dimension is implicit). sequence_length = 21 width = 30 input_tensor = tf.keras.Input(shape=(sequence_length, width)) output_tensor = test_layer(input_tensor) # When using static positional embedding shapes, the output is expected # to be the same as the input shape in all dimensions save batch. expected_output_shape = [None, sequence_length, width] self.assertEqual(expected_output_shape, output_tensor.shape.as_list()) # The default output dtype for this layer should be tf.float32. self.assertEqual(tf.float16, output_tensor.dtype)
def test_dynamic_layer_slicing(self): max_sequence_length = 40 test_layer = position_embedding.PositionEmbedding( use_dynamic_slicing=True, max_sequence_length=max_sequence_length) # Create a 3-dimensional input (the first dimension is implicit). width = 30 input_tensor = tf.keras.Input(shape=(None, width)) output_tensor = test_layer(input_tensor) model = tf.keras.Model(input_tensor, output_tensor) # Create input data that is shorter than max_sequence_length, which should # trigger a down-slice. input_length = 17 # Note: This test explicitly uses a batch size of 1. This is to get around # Keras' restriction on Model invocations: inputs are expected to have the # same batch cardinality as outputs. In practice, this layer should be used # inside a model, where it can be projected when added to another tensor. input_data = np.ones((1, input_length, width)) output_data = model.predict(input_data) self.assertAllEqual([1, input_length, width], output_data.shape)