Beispiel #1
0
    def __init__(self, params, train, name=None):
        super(EncoderStack, self).__init__(name=name)
        # self.layers = []
        # for idx in range(params["num_hidden_layers"]):  # 参数为6
        
        # 0. -------------------------------
        # 初始化 selfAttention 层
        self_attention_layer_0 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Encoder self-attention 0", name="enc-selfatt-0")
        # 初始化 前向全连接 层
        feed_forward_network_0 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="enc-ffn-0")
        
        # PrePostProcessingWrapper的目的是进行包装
        # 具体操作为: layer_norm -> 具体操作 -> dropout -> resdual-connect
        self.self_attention_wrapper_0 = PrePostProcessingWrapper(self_attention_layer_0, params, train, name="enc-selfattwrap-0")
        self.feed_forward_wrapper_0 = PrePostProcessingWrapper(feed_forward_network_0, params, train, name="enc-ffnwrap-0")

        # 1. -------------------------------
        self_attention_layer_1 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Encoder self-attention 1", name="enc-selfatt-1")
        feed_forward_network_1 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="enc-ffn-1")
        self.self_attention_wrapper_1 = PrePostProcessingWrapper(self_attention_layer_1, params, train, name="enc-selfattwrap-1")
        self.feed_forward_wrapper_1 = PrePostProcessingWrapper(feed_forward_network_1, params, train, name="enc-ffnwrap-1")
        
        # 2. -------------------------------
        self_attention_layer_2 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Encoder self-attention 2", name="enc-selfatt-2")
        feed_forward_network_2 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="enc-ffn-2")
        self.self_attention_wrapper_2 = PrePostProcessingWrapper(self_attention_layer_2, params, train, name="enc-selfattwrap-2")
        self.feed_forward_wrapper_2 = PrePostProcessingWrapper(feed_forward_network_2, params, train, name="enc-ffnwrap-2")
        
        # layer-norm 层,用于最终输出时使用
        self.output_normalization = layer_norm.LayerNormalization(params["hidden_size"], name="enc-norm")
    def __init__(self, params, train, name=None):
        super(DecoderStack, self).__init__(name=name)

        # 0 -----------------------
        self_attention_layer_0 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 0", name="dec-selfatt-0")
        enc_dec_attention_layer_0 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 0", name="dec-enc-0")
        feed_forward_network_0 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-0")
        self.self_attention_wrapper_0 = PrePostProcessingWrapper(self_attention_layer_0, params, train, name="dec-selfattwrap-0")
        self.enc_dec_attention_wrapper_0 = PrePostProcessingWrapper(enc_dec_attention_layer_0, params, train, name="dec-encwrap-0")
        self.feed_forward_wrapper_0 = PrePostProcessingWrapper(feed_forward_network_0, params, train, name="dec-ffnwrap-0")

        # 1 -----------------------
        self_attention_layer_1 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 1", name="dec-selfatt-1")
        enc_dec_attention_layer_1 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 1", name="dec-enc-1")
        feed_forward_network_1 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-1")
        self.self_attention_wrapper_1 = PrePostProcessingWrapper(self_attention_layer_1, params, train, name="dec-selfattwrap-1")
        self.enc_dec_attention_wrapper_1 = PrePostProcessingWrapper(enc_dec_attention_layer_1, params, train, name="dec-encwrap-1")
        self.feed_forward_wrapper_1 = PrePostProcessingWrapper(feed_forward_network_1, params, train, name="dec-ffnwrap-1")

        # 2 -----------------------
        self_attention_layer_2 = attention_layer.SelfAttention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder self-attention 2", name="dec-selfatt-2")
        enc_dec_attention_layer_2 = attention_layer.Attention(params["hidden_size"], params["num_heads"], params["attention_dropout"], train, n="Decoder-encoder attention 2", name="dec-enc-2")
        feed_forward_network_2 = ffn_layer.FeedFowardNetwork(params["hidden_size"], params["filter_size"], params["relu_dropout"], train, params["allow_ffn_pad"], name="dec-ffn-2")
        self.self_attention_wrapper_2 = PrePostProcessingWrapper(self_attention_layer_2, params, train, name="dec-selfattwrap-2")
        self.enc_dec_attention_wrapper_2 = PrePostProcessingWrapper(enc_dec_attention_layer_2, params, train, name="dec-encwrap-2")
        self.feed_forward_wrapper_2 = PrePostProcessingWrapper(feed_forward_network_2, params, train, name="dec-ffnwrap-2")

        # 
        self.output_normalization = layer_norm.LayerNormalization(params["hidden_size"], name="dec-norm")
Beispiel #3
0
    def __init__(self, train):
        super(EncoderStack, self).__init__()
        self.layers = []
        for _ in range(hiddenlayers):
            # Create sublayers for each layer.
            self_attention_layer = attention_layer.SelfAttention(
                hiddensize, numhead, attentiondropout, train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(
                hiddensize, filtersize, reludropout, train, True)

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, train),
                PrePostProcessingWrapper(feed_forward_network, train)
            ])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(hiddensize)
Beispiel #4
0
    def __init__(self, params, train):
        super(EncoderStack, self).__init__()
        self.layers = []
        for _ in range(params.num_hidden_layers):
            # Create sublayers for each layer.
            self_attention_layer = attention_layer.SelfAttention(
                params.hidden_size, params.num_heads, params.attention_dropout,
                train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(
                params.hidden_size, params.filter_size, params.relu_dropout,
                train, params.allow_ffn_pad)
            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params, train),
                PrePostProcessingWrapper(feed_forward_network, params, train)
            ])

        # Create final layer normalization layer.
        self.output_normalization = LayerNormalization(params.hidden_size)
Beispiel #5
0
    def __init__(self, params, train):
        super(DecoderStack, self).__init__()
        self.layers = []
        for _ in range(params["num_hidden_layers"]):
            self_attention_layer = attention_layer.SelfAttention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            enc_dec_attention_layer = attention_layer.Attention(
                params["hidden_size"], params["num_heads"],
                params["attention_dropout"], train)
            feed_forward_network = ffn_layer.FeedFowardNetwork(
                params["hidden_size"], params["filter_size"],
                params["relu_dropout"], train, params["allow_ffn_pad"])

            self.layers.append([
                PrePostProcessingWrapper(self_attention_layer, params, train),
                PrePostProcessingWrapper(enc_dec_attention_layer, params,
                                         train),
                PrePostProcessingWrapper(feed_forward_network, params, train)
            ])

        self.output_normalization = LayerNormalization(params["hidden_size"])