Ejemplo n.º 1
0
    def build(self, input_shape):
        """Builds the encoder stack."""
        params = self.params
        self.projection_layer = ffn_layer.FeedForwardNetwork(
            params["enc_hidden_size"], params["enc_filter_size"],
            params["relu_dropout"])

        for _ in range(params["num_hidden_layers"]):
            # Create sublayers for each layer.
            self_attention_layer = attention_layer.SelfAttention(
                params["enc_hidden_size"], params["num_heads"],
                params["attention_dropout"])
            feed_forward_network = ffn_layer.FeedForwardNetwork(
                params["enc_hidden_size"], params["enc_filter_size"],
                params["relu_dropout"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params,
                                         params["enc_hidden_size"]),
                PrePostProcessingWrapper(feed_forward_network, params,
                                         params["enc_hidden_size"])
            ])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(
            params["enc_hidden_size"])
        super(EncoderStack, self).build(input_shape)
Ejemplo n.º 2
0
    def build(self, input_shape):
        """Builds the decoder stack."""
        params = self.params
        for _ in range(params["num_hidden_layers"]):
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"])
            enc_dec_attention_layer = attention_layer.Attention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"])
            feed_forward_network = ffn_layer.FeedForwardNetwork(
                params["hidden_size"], params["filter_size"], params["relu_dropout"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params),
                PrePostProcessingWrapper(enc_dec_attention_layer, params),
                PrePostProcessingWrapper(feed_forward_network, params)
            ])
        self.output_normalization = LayerNormalization(params["hidden_size"])
        super(DecoderStack, self).build(input_shape)