def __init__(self, config, padding_id=0, **kwargs):
        super(Transformer, self).__init__(**kwargs)

        # Set up the bias layers.
        self._padding_bias = bias.PaddingBias(padding_id, name="padding_bias")
        self._causal_bias = bias.CausalBias(name="causal_bias")

        # Set up the inner encoder and decoder and output layer.
        self._encoder = encoder.TransformerEncoder(
            vocab_size=config['vocab_size'],
            num_layers=config['num_layers'],
            d_model=config['d_model'],
            d_filter=config['d_filter'],
            num_heads=config['num_heads'],
            dropout_rate=config['dropout_rate'],
            ffn_activation=config['ffn_activation'],
            layer_norm_epsilon=config['layer_norm_epsilon'],
            name="encoder")
        self._decoder = decoder.TransformerDecoder(
            vocab_size=config['vocab_size'],
            num_layers=config['num_layers'],
            d_model=config['d_model'],
            d_filter=config['d_filter'],
            num_heads=config['num_heads'],
            dropout_rate=config['dropout_rate'],
            ffn_activation=config['ffn_activation'],
            layer_norm_epsilon=config['layer_norm_epsilon'],
            encoder_decoder=True,
            name="decoder")
        self._to_out = tf.keras.layers.Dense(config['output_vocab_size'])
Exemple #2
0
    def test_output_value(self):
        padding_id = -1
        x = np.array([[0, 0, -1]])
        expected_output = np.array([[[[0, 0, -1e9]]]])

        padding_bias = bias.PaddingBias(padding_id=padding_id)
        output = padding_bias(x)
        self.assertAllEqual(expected_output, output)
Exemple #3
0
    def test_output_shape(self):
        batch_size = 2
        length = 10
        padding_id = -1
        x = tf.ones([batch_size, length])

        padding_bias = bias.PaddingBias(padding_id=padding_id)
        output = padding_bias(x)
        self.assertShapeEqual(np.zeros((batch_size, 1, 1, length)), output)
    def __init__(self, config, padding_id=0, **kwargs):
        super(TransformerEncoder, self).__init__(**kwargs)

        # Set up the bias layer.
        self._padding_bias = bias.PaddingBias(padding_id, name="padding_bias")

        # Set up the inner encoder.
        self._encoder = encoder.TransformerEncoder(
            vocab_size=config['vocab_size'],
            num_layers=config['num_layers'],
            d_model=config['d_model'],
            d_filter=config['d_filter'],
            num_heads=config['num_heads'],
            dropout_rate=config['dropout_rate'],
            ffn_activation=config['ffn_activation'],
            layer_norm_epsilon=config['layer_norm_epsilon'],
            name="encoder")