Esempio n. 1
0
    def build(self, input_shape):
        """Builds the encoder stack."""
        params = self.params
        self.projection_layer = ffn_layer.FeedForwardNetwork(
            params["enc_hidden_size"], params["enc_filter_size"],
            params["relu_dropout"])

        for _ in range(params["num_hidden_layers"]):
            # Create sublayers for each layer.
            self_attention_layer = attention_layer.SelfAttention(
                params["enc_hidden_size"], params["num_heads"],
                params["attention_dropout"])
            feed_forward_network = ffn_layer.FeedForwardNetwork(
                params["enc_hidden_size"], params["enc_filter_size"],
                params["relu_dropout"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params,
                                         params["enc_hidden_size"]),
                PrePostProcessingWrapper(feed_forward_network, params,
                                         params["enc_hidden_size"])
            ])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(
            params["enc_hidden_size"])
        super(EncoderStack, self).build(input_shape)
Esempio n. 2
0
    def build(self, input_shape):
        """Builds the decoder stack."""
        params = self.params
        for _ in range(params["num_hidden_layers"]):
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"])
            enc_dec_attention_layer = attention_layer.Attention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"])
            feed_forward_network = ffn_layer.FeedForwardNetwork(
                params["hidden_size"], params["filter_size"], params["relu_dropout"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params),
                PrePostProcessingWrapper(enc_dec_attention_layer, params),
                PrePostProcessingWrapper(feed_forward_network, params)
            ])
        self.output_normalization = LayerNormalization(params["hidden_size"])
        super(DecoderStack, self).build(input_shape)
Esempio n. 3
0
    def __init__(self, params, name=None):
        """Initialize layers to build Transformer model.

        Args:
          params: hyperparameter object defining layer sizes, dropout values, etc.
          name: name of the model.
        """
        super(Transformer, self).__init__(name=name)
        self.params = params
        self.embedding_softmax_layer = embedding_layer.EmbeddingSharedWeights(
            params["vocab_size"], params["hidden_size"])
        self.encoder_px_stack = EncoderStack(params, "px_encoder")
        self.encoder_npx_stack = EncoderStack(params, "npx_encoder")
        self.property_lstm = ffn_layer.PropertyLSTMNetwork(
            params["property_inner_size"],
            params["property_size"],
            params["relu_dropout"],
        )
        self.property_transformation_dense = ffn_layer.FeedForwardNetwork(
            params["enc_hidden_size"], params["enc_filter_size"],
            params["relu_dropout"])
        self.decoder_stack = DecoderStack(params)