def __init__(self, params, train):
        super(DecoderStack, self).__init__()
        self.layers = []
        for _ in range(params["num_hidden_layers"]):
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(  # NOTYPO
                params["hidden_size"], params["filter_size"],
                params["relu_dropout"], train, params["allow_ffn_pad"])

            proj_layer = tf.layers.Dense(
                # TODO  加了一层MLP,project the concatenated [encoder activations, hidden state] to the hidden size
                params["hidden_size"],
                use_bias=True,
                name="proj_layer")

            self.layers.append([
                transformer.PrePostProcessingWrapper(self_attention_layer,
                                                     params, train),
                transformer.PrePostProcessingWrapper(feed_forward_network,
                                                     params, train), proj_layer
            ])

        self.output_normalization = transformer.LayerNormalization(
            params["hidden_size"])
    def __init__(self, params, train):
        super(EncoderStack, self).__init__()
        self.layers = []
        for _ in range(params["num_hidden_layers"]):
            # Create sublayers for each layer.
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(
                params["hidden_size"], params["filter_size"],
                params["relu_dropout"], train, params["allow_ffn_pad"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params, train),
                PrePostProcessingWrapper(feed_forward_network, params, train)])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(params["hidden_size"])