Example #1
0
 def __init__(self,
              head_size,
              num_heads,
              dropout=0.0,
              kernel_regularizer=L2,
              bias_regularizer=L2,
              name="mhsa_module",
              **kwargs):
     super(MHSAModule, self).__init__(name=name, **kwargs)
     self.pc = PositionalEncoding(name=f"{name}_pe")
     self.ln = tf.keras.layers.LayerNormalization(
         name=f"{name}_ln",
         gamma_regularizer=kernel_regularizer,
         beta_regularizer=bias_regularizer)
     self.mha = tfa.layers.MultiHeadAttention(
         head_size=head_size,
         num_heads=num_heads,
         name=f"{name}_mhsa",
         kernel_regularizer=kernel_regularizer,
         bias_regularizer=bias_regularizer)
     self.do = tf.keras.layers.Dropout(dropout, name=f"{name}_dropout")
     self.res_add = tf.keras.layers.Add(name=f"{name}_add")
Example #2
0
    def __init__(self, vocab_size, embed_dim, dropout, att_type, att_heads,
                 att_mid_dim, att_mid_drop, bifeat_emb_act, bifeat_emb_drop,
                 ff_dropout, layer_num):
        super(Decoder, self).__init__()
        self.att_heads = att_heads
        self.layers = nn.ModuleList([])
        self.embed_dim = embed_dim
        for i in range(layer_num):
            sublayer = DecoderLayer(embed_dim=embed_dim,
                                    dropout=dropout,
                                    att_type=att_type,
                                    att_heads=att_heads,
                                    att_mid_dim=att_mid_dim,
                                    att_mid_drop=att_mid_drop,
                                    bifeat_emb_act=bifeat_emb_act,
                                    bifeat_emb_drop=bifeat_emb_drop,
                                    ff_dropout=ff_dropout,
                                    last_layer=(i == layer_num - 1))
            self.layers.append(sublayer)

        self.dropout = nn.Dropout(cfg.MODEL.DROPOUT_WORD_EMBED)
        self.embed_tokens = nn.Embedding(vocab_size, embed_dim)
        self.embed_scale = math.sqrt(embed_dim)
        self.embed_positions = PositionalEncoding(
            embed_dim, cfg.MODEL.TRANSFORMER.PE_MAX_LEN)

        self.layer_norm_word = torch.nn.LayerNorm(embed_dim)
        self.generator = nn.Linear(embed_dim, vocab_size)

        self.wbil1 = nn.Sequential(nn.Linear(embed_dim, embed_dim),
                                   utils.activation(cfg.MODEL.BILINEAR.ACT),
                                   torch.nn.LayerNorm(embed_dim))
        self.wbil2 = nn.Sequential(nn.Linear(embed_dim, embed_dim),
                                   utils.activation(cfg.MODEL.BILINEAR.ACT),
                                   torch.nn.LayerNorm(embed_dim))
        self.wbi_drop = nn.Dropout(cfg.MODEL.BILINEAR.DECODE_DROPOUT)
        self.dropout_lm = nn.Dropout(cfg.MODEL.DROPOUT_LM)

        self.proj_norm = nn.Sequential(
            nn.Linear(embed_dim * (layer_num + 1), 2 * embed_dim), nn.GLU(),
            torch.nn.LayerNorm(embed_dim))

        self.clear_buffer()
Example #3
0
class MHSAModule(tf.keras.layers.Layer):
    def __init__(self,
                 head_size,
                 num_heads,
                 dropout=0.0,
                 kernel_regularizer=L2,
                 bias_regularizer=L2,
                 name="mhsa_module",
                 **kwargs):
        super(MHSAModule, self).__init__(name=name, **kwargs)
        self.pc = PositionalEncoding(name=f"{name}_pe")
        self.ln = tf.keras.layers.LayerNormalization(
            name=f"{name}_ln",
            gamma_regularizer=kernel_regularizer,
            beta_regularizer=bias_regularizer)
        self.mha = tfa.layers.MultiHeadAttention(
            head_size=head_size,
            num_heads=num_heads,
            name=f"{name}_mhsa",
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer)
        self.do = tf.keras.layers.Dropout(dropout, name=f"{name}_dropout")
        self.res_add = tf.keras.layers.Add(name=f"{name}_add")

    def call(self, inputs, **kwargs):
        outputs = self.pc(inputs)
        outputs = self.ln(outputs)
        outputs = self.mha([outputs, outputs, outputs])
        outputs = self.do(outputs)
        outputs = self.res_add([inputs, outputs])
        return outputs

    def get_config(self):
        conf = super(MHSAModule, self).get_config()
        conf.update(self.pc.get_config())
        conf.update(self.ln.get_config())
        conf.update(self.mha.get_config())
        conf.update(self.do.get_config())
        conf.update(self.res_add.get_config())
        return conf
Example #4
0
 def _build_block(self):
     self.layers.append(PositionalEncoding())
     for i in range(self.n_convs):
         self.layers.extend(self._build_residual(nested_layer='conv', name=self.name+"_conv{}".format(i)))
     self.layers.extend(self._build_residual(nested_layer='attn', name=self.name+'_selfAttn'))
     self.layers.extend(self._build_residual(nested_layer='pwffn', name=self.name+"_pwffn"))