def __init__(self, params, train, name=None):
        super(DecoderStack, self).__init__(name=name)

        # 0 -----------------------
        self_attention_layer_0 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 0", name="dec-selfatt-0")
        enc_dec_attention_layer_0 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 0", name="dec-enc-0")
        feed_forward_network_0 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-0")
        self.self_attention_wrapper_0 = PrePostProcessingWrapper(self_attention_layer_0, params, train, name="dec-selfattwrap-0")
        self.enc_dec_attention_wrapper_0 = PrePostProcessingWrapper(enc_dec_attention_layer_0, params, train, name="dec-encwrap-0")
        self.feed_forward_wrapper_0 = PrePostProcessingWrapper(feed_forward_network_0, params, train, name="dec-ffnwrap-0")

        # 1 -----------------------
        self_attention_layer_1 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 1", name="dec-selfatt-1")
        enc_dec_attention_layer_1 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 1", name="dec-enc-1")
        feed_forward_network_1 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-1")
        self.self_attention_wrapper_1 = PrePostProcessingWrapper(self_attention_layer_1, params, train, name="dec-selfattwrap-1")
        self.enc_dec_attention_wrapper_1 = PrePostProcessingWrapper(enc_dec_attention_layer_1, params, train, name="dec-encwrap-1")
        self.feed_forward_wrapper_1 = PrePostProcessingWrapper(feed_forward_network_1, params, train, name="dec-ffnwrap-1")

        # 2 -----------------------
        self_attention_layer_2 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 2", name="dec-selfatt-2")
        enc_dec_attention_layer_2 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 2", name="dec-enc-2")
        feed_forward_network_2 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-2")
        self.self_attention_wrapper_2 = PrePostProcessingWrapper(self_attention_layer_2, params, train, name="dec-selfattwrap-2")
        self.enc_dec_attention_wrapper_2 = PrePostProcessingWrapper(enc_dec_attention_layer_2, params, train, name="dec-encwrap-2")
        self.feed_forward_wrapper_2 = PrePostProcessingWrapper(feed_forward_network_2, params, train, name="dec-ffnwrap-2")

        # 
        self.output_normalization = layer_norm.LayerNormalization(params["hidden_size"], name="dec-norm")
Exemple #2
0
 def build(self, input_shape):
     """Builds the decoder stack."""
     params = self.params
     self_attention_layer = attention_layer.SelfAttention(
         params["hidden_size"], params["num_heads"],
         params["attention_dropout"])
     enc_dec_attention_layer = attention_layer.Attention(
         params["hidden_size"], params["num_heads"],
         params["attention_dropout"])
     feed_forward_network = feed_forward_layer.FeedForwardNetwork(
         params["hidden_size"], params["filter_size"], params["relu_dropout"], 0.02)
     self.self_attention_layer = PrePostProcessingWrapper(self_attention_layer, params)
     self.enc_dec_attention_layer = PrePostProcessingWrapper(enc_dec_attention_layer, params)
     self.feed_forward_network = PrePostProcessingWrapper(feed_forward_network, params)
     self.output_normalization = LayerNormalization(params["hidden_size"])
     super(DecoderStack, self).build(input_shape)
Exemple #3
0
    def build(self, input_shape):
        params = self.params
        for _ in range(params['num_hidden_layers']):
            self_attention_layer = attention_layer.SelfAttention(
                params['hidden_size'], params['num_heads'], params['attention_dropout'])
            enc_dec_attention_layer = attention_layer.Attention(
                params['hidden_size'], params['num_heads'], params['attention_dropout'])
            feed_forward_network = ffn_layer.FeedForwardNetwork(
                params['hidden_size'], params['filter_size'], params['relu_dropout'])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params),
                PrePostProcessingWrapper(enc_dec_attention_layer, params),
                PrePostProcessingWrapper(feed_forward_network, params)
            ])
        self.output_normalization = tf.keras.layers.LayerNormalization(
            epsilon=1e-6, dtype='float32')
        super(DecoderStack, self).build(input_shape)
Exemple #4
0
    def __init__(self, params, train):
        super(DecoderStack, self).__init__()
        self.layers = []
        for _ in range(params["num_hidden_layers"]):
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            enc_dec_attention_layer = attention_layer.Attention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(
                params["hidden_size"], params["filter_size"],
                params["relu_dropout"], train, params["allow_ffn_pad"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params, train),
                PrePostProcessingWrapper(enc_dec_attention_layer, params,
                                         train),
                PrePostProcessingWrapper(feed_forward_network, params, train)
            ])

        self.output_normalization = LayerNormalization(params["hidden_size"])