Ejemplo n.º 1
0
class V_FFNBlock(object):
    def __init__(self, config, in_size, out_size, float_dtype, training):

        self.in_size = in_size
        self.middle_size = config.pre_source_ffn_middle_size  #默认2048
        self.out_size = out_size
        self.float_dtype = float_dtype
        self.training = training

        # Build layers
        self.pre_ffn = ProcessingLayer(self.in_size,
                                       use_layer_norm=True,
                                       dropout_rate=0.,
                                       training=training,
                                       name='pre_ffn_sublayer')

        self.ffn1 = FeedForwardLayer(
            self.in_size,  #全连接神经网络
            self.middle_size,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='vffn_sublayer1')

        self.ffn2 = FeedForwardLayer(
            self.middle_size,
            self.out_size,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='vffn_sublayer')

        #对输入进行dropout(+残差,如果forward给出)操作
        self.post_ffn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_vffn_sublayer')

    #在embedding后添加的全连接层
    def forward(self, inputs):
        ffn_inputs = self.pre_ffn.forward(inputs)
        ffn1_outputs = self.ffn1.forward(ffn_inputs)
        ffn_outputs = self.ffn2.forward(ffn1_outputs)
        block_out = self.post_ffn.forward(ffn_outputs)
        return block_out
Ejemplo n.º 2
0
    def __init__(self,
                 config,
                 float_dtype,
                 self_attention,
                 training,
                 from_rnn=False,
                 tie_attention=False):
        # Set attributes
        self.self_attention = self_attention
        if not tie_attention:
            if self_attention:
                attn_name = 'self_attn'
            else:
                attn_name = 'cross_attn'
        else:
            attn_name = 'tied_attn'

        memory_size = config.state_size
        if from_rnn:
            memory_size *= 2

        if config.layer_normalization_type == 'layernorm':
            layernorm = LayerNormLayer
        elif config.layer_normalization_type == 'rmsnorm':
            layernorm = RMSNormLayer

        # Build layers
        self.pre_attn = ProcessingLayer(
            config.state_size,
            use_layer_norm=layernorm,
            dropout_rate=0.,
            training=training,
            name='pre_{:s}_sublayer'.format(attn_name))

        self.attn = MultiHeadAttentionLayer(
            memory_size,
            config.state_size,
            config.state_size,
            config.state_size,
            config.state_size,
            config.transformer_num_heads,
            float_dtype,
            dropout_attn=config.transformer_dropout_attn,
            drophead=config.transformer_drophead,
            training=training,
            name='{:s}_sublayer'.format(attn_name))

        self.post_attn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_{:s}_sublayer'.format(attn_name))
Ejemplo n.º 3
0
class FFNBlock(object):
    """ Defines a single feed-forward network block (referred to as 'sub-layer' in the transformer paper) comprising of
    a single feed-forward network preceded by a pre-processing layer and followed by a post-processing layer. """
    def __init__(self, config, ffn_dims, float_dtype, is_final, training):
        # Set attributes
        self.is_final = is_final

        if config.layer_normalization_type == 'layernorm':
            layernorm = LayerNormLayer
        elif config.layer_normalization_type == 'rmsnorm':
            layernorm = RMSNormLayer

        # Build layers
        self.pre_ffn = ProcessingLayer(config.state_size,
                                       use_layer_norm=layernorm,
                                       dropout_rate=0.,
                                       training=training,
                                       name='pre_ffn_sublayer')
        self.ffn = FeedForwardNetwork(
            ffn_dims,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='ffn_sublayer')
        self.post_ffn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_ffn_sublayer')
        if is_final:
            self.pre_final = ProcessingLayer(config.state_size,
                                             use_layer_norm=layernorm,
                                             dropout_rate=0.,
                                             training=training,
                                             name='final_transform')

    def forward(self, inputs):
        """ Propagates input data through the block. """
        ffn_inputs = self.pre_ffn.forward(inputs)
        ffn_outputs = self.ffn.forward(ffn_inputs)
        block_out = self.post_ffn.forward(ffn_outputs, residual_inputs=inputs)
        if self.is_final:
            block_out = self.pre_final.forward(block_out)
        return block_out
Ejemplo n.º 4
0
    def __init__(self, config, in_size, out_size, float_dtype, training):

        self.in_size = in_size
        self.middle_size = config.pre_source_ffn_middle_size  #默认2048
        self.out_size = out_size
        self.float_dtype = float_dtype
        self.training = training

        # Build layers
        self.pre_ffn = ProcessingLayer(self.in_size,
                                       use_layer_norm=True,
                                       dropout_rate=0.,
                                       training=training,
                                       name='pre_ffn_sublayer')

        self.ffn1 = FeedForwardLayer(
            self.in_size,  #全连接神经网络
            self.middle_size,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='vffn_sublayer1')

        self.ffn2 = FeedForwardLayer(
            self.middle_size,
            self.out_size,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='vffn_sublayer')

        #对输入进行dropout(+残差,如果forward给出)操作
        self.post_ffn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_vffn_sublayer')
Ejemplo n.º 5
0
    def __init__(self, config, ffn_dims, float_dtype, is_final, training):
        # Set attributes
        self.is_final = is_final

        # Build layers
        self.pre_ffn = ProcessingLayer(config.state_size,
                                       use_layer_norm=True,
                                       dropout_rate=0.,
                                       training=training,
                                       name='pre_ffn_sublayer')
        self.ffn = FeedForwardNetwork(
            ffn_dims,
            float_dtype,
            use_bias=True,
            activation=tf.nn.relu,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_relu,
            training=training,
            name='ffn_sublayer')
        self.post_ffn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_ffn_sublayer')
        if is_final:
            self.pre_final = ProcessingLayer(config.state_size,
                                             use_layer_norm=True,
                                             dropout_rate=0.,
                                             training=training,
                                             name='final_transform')
Ejemplo n.º 6
0
class AttentionBlock(object):
    """ Defines a single attention block (referred to as 'sub-layer' in the paper) comprising of a single multi-head
    attention layer preceded by a pre-processing layer and followed by a post-processing layer. """
    def __init__(self,
                 config,
                 float_dtype,
                 self_attention,
                 training,
                 from_rnn=False,
                 tie_attention=False):
        # Set attributes
        self.self_attention = self_attention
        if not tie_attention:
            if self_attention:
                attn_name = 'self_attn'
            else:
                attn_name = 'cross_attn'
        else:
            attn_name = 'tied_attn'

        memory_size = config.state_size
        if from_rnn:
            memory_size *= 2

        # Build layers
        self.pre_attn = ProcessingLayer(
            config.state_size,
            use_layer_norm=True,
            dropout_rate=0.,
            training=training,
            name='pre_{:s}_sublayer'.format(attn_name))

        self.attn = MultiHeadAttentionLayer(
            memory_size,
            config.state_size,
            config.state_size,
            config.state_size,
            config.state_size,
            config.transformer_num_heads,
            float_dtype,
            dropout_attn=config.transformer_dropout_attn,
            training=training,
            name='{:s}_sublayer'.format(attn_name))

        self.post_attn = ProcessingLayer(
            config.state_size,
            use_layer_norm=False,
            dropout_rate=config.transformer_dropout_residual,
            training=training,
            name='post_{:s}_sublayer'.format(attn_name))

    def forward(self, inputs, memory_context, attn_mask, layer_memories=None):
        """ Propagates input data through the block. """
        if not self.self_attention:
            assert (memory_context is not None), \
                'Encoder memories have to be provided for encoder-decoder attention computation.'
        attn_inputs = self.pre_attn.forward(inputs)
        attn_outputs, layer_memories = self.attn.forward(
            attn_inputs, memory_context, attn_mask, layer_memories)
        block_out = self.post_attn.forward(attn_outputs,
                                           residual_inputs=inputs)
        return block_out, layer_memories