def __init__(self, params): super(DecoderStack, self).__init__() self.params = params self.layers = [] for _ in range(params.decoder_num_layers): self_attention_layer = LightConv(params, padding='VALID') enc_dec_attention_layer = MultiHeadAttentionLayer( params.num_heads, params.hidden_size, params.keep_prob) feed_forward_network = FeedForwardLayer(params.hidden_size, params.ff_size, params.keep_prob) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, params), PrePostProcessingWrapper(enc_dec_attention_layer, params), PrePostProcessingWrapper(feed_forward_network, params) ]) self.output_normalization = LayerNormalization(params.hidden_size)
def __init__(self, params): super(EncoderStack, self).__init__() self.params = params self.layers = [] for _ in range(params.encoder_num_layers): # Create sublayers for each layer. self_attention_layer = SelfAttentionLayer(params.num_heads, params.hidden_size, params.keep_prob) feed_forward_network = FeedForwardLayer(params.hidden_size, params.ff_size, params.keep_prob) self.layers.append([ PrePostProcessingWrapper(self_attention_layer, params), PrePostProcessingWrapper(feed_forward_network, params) ]) # Create final layer normalization layer. self.output_normalization = LayerNormalization(params.hidden_size)