def __init__(self, config, padding_id=0, **kwargs): super(Transformer, self).__init__(**kwargs) # Set up the bias layers. self._padding_bias = bias.PaddingBias(padding_id, name="padding_bias") self._causal_bias = bias.CausalBias(name="causal_bias") # Set up the inner encoder and decoder and output layer. self._encoder = encoder.TransformerEncoder( vocab_size=config['vocab_size'], num_layers=config['num_layers'], d_model=config['d_model'], d_filter=config['d_filter'], num_heads=config['num_heads'], dropout_rate=config['dropout_rate'], ffn_activation=config['ffn_activation'], layer_norm_epsilon=config['layer_norm_epsilon'], name="encoder") self._decoder = decoder.TransformerDecoder( vocab_size=config['vocab_size'], num_layers=config['num_layers'], d_model=config['d_model'], d_filter=config['d_filter'], num_heads=config['num_heads'], dropout_rate=config['dropout_rate'], ffn_activation=config['ffn_activation'], layer_norm_epsilon=config['layer_norm_epsilon'], encoder_decoder=True, name="decoder") self._to_out = tf.keras.layers.Dense(config['output_vocab_size'])
def test_output_value(self): padding_id = -1 x = np.array([[0, 0, -1]]) expected_output = np.array([[[[0, 0, -1e9]]]]) padding_bias = bias.PaddingBias(padding_id=padding_id) output = padding_bias(x) self.assertAllEqual(expected_output, output)
def test_output_shape(self): batch_size = 2 length = 10 padding_id = -1 x = tf.ones([batch_size, length]) padding_bias = bias.PaddingBias(padding_id=padding_id) output = padding_bias(x) self.assertShapeEqual(np.zeros((batch_size, 1, 1, length)), output)
def __init__(self, config, padding_id=0, **kwargs): super(TransformerEncoder, self).__init__(**kwargs) # Set up the bias layer. self._padding_bias = bias.PaddingBias(padding_id, name="padding_bias") # Set up the inner encoder. self._encoder = encoder.TransformerEncoder( vocab_size=config['vocab_size'], num_layers=config['num_layers'], d_model=config['d_model'], d_filter=config['d_filter'], num_heads=config['num_heads'], dropout_rate=config['dropout_rate'], ffn_activation=config['ffn_activation'], layer_norm_epsilon=config['layer_norm_epsilon'], name="encoder")