Exemple #1
0
    def __init__(self,
                 model_dim,
                 num_heads,
                 dropout=0.1,
                 batch_first=False,
                 masked_layers=False):
        super().__init__()
        self.num_heads = num_heads
        self.model_dim = model_dim
        self.batch_first = batch_first
        self.masked_layers = masked_layers

        assert model_dim % num_heads == 0

        self.head_dim = model_dim // num_heads

        self.query_projection = MaskedFunction(
            XavierLinear(model_dim, model_dim, bias=False))
        self.key_projection = MaskedFunction(
            XavierLinear(model_dim, model_dim, bias=False))
        self.value_projection = MaskedFunction(
            XavierLinear(model_dim, model_dim, bias=False))

        self.out_projection = MaskedFunction(
            XavierLinear(model_dim, model_dim, bias=False))

        self.attn_dropout = nn.Dropout(dropout)
Exemple #2
0
    def __init__(self, h, d_model, attn_p=0.1, static=True, share=3):
        super(MultiHeadAttention, self).__init__()
        self.h = h
        self.d = d_model
        self.share = share

        assert d_model % h == 0

        self.d_head = d_model // h
        self.fc_query = MaskedFunction(
            XavierLinear(d_model, h * self.d_head, bias=False))
        self.fc_key = MaskedFunction(
            XavierLinear(d_model, h * self.d_head, bias=False))
        self.fc_value = MaskedFunction(
            XavierLinear(d_model, h * self.d_head, bias=False))

        self.fc_concat = MaskedFunction(
            XavierLinear(h * self.d_head, d_model, bias=False))

        self.sm = nn.Softmax(dim=-1)

        if static:
            self.attn_dropout = StaticDropout(attn_p)
        else:
            self.attn_dropout = nn.Dropout(attn_p)
Exemple #3
0
 def build_feed_forward(self):
     self.preprocess_ffn = self.get_preprocessing_module()
     self.feed_forward = MaskedFunction(
         get_feed_forward(self.feed_forward_type, self.model_dim,
                          self.feed_forward_dim, self.feed_forward_dropout,
                          self.weight_norm))
     self.postprocess_ffn = self.get_postprocessing_module()
    def __init__(self,
                 model_dim,
                 sequence='nda',
                 dropout=0.0,
                 elementwise_affine=True,
                 gated_residuals=False,
                 masking=False):
        super(PrePostProcessing, self).__init__()
        self.masking = masking
        self.gated_residuals = gated_residuals
        self.steps = sequence

        if self.gated_residuals:
            self.k = nn.Parameter(torch.ones(1))

        if 'n' in self.steps:
            layer_norm = nn.LayerNorm([model_dim],
                                      elementwise_affine=elementwise_affine)
            self.layer_norm = MaskedFunction(layer_norm)
        if 'd' in self.steps:
            self.dropout = nn.Dropout(dropout, inplace=False)