예제 #1
0
    def __init__(self, dictionary, embed_dim, out_channels, kernel_size,
                 **kwargs):
        super().__init__(dictionary)
        # word embedding + positional embedding
        self.embed = Embedding(
            len(dictionary), embed_dim)  # , padding_idx=self.dictionary.pad())

        self.position_encoding = kwargs.get('position_encoding', None)
        if self.position_encoding == 'learned':
            self.position_embed = Parameter(1,
                                            kwargs['max_tokens'],
                                            embed_dim,
                                            initializer=trunc_normal(mean=0.,
                                                                     std=0.02))
        else:
            self.position_embed = None
        # pooling
        pooling = kwargs.get('pooling', None)
        self.pooling = pooling1d(pooling)
        if 'weighted' in pooling:
            self.weight_layer = Linear(embed_dim, 1, bias=False)
        else:
            self.weight_layer = None
        # conv1d
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        # padding mode = ['valid'(default), 'same']
        self.padding = kwargs.get('padding', 'valid')
        if self.padding == 'same':
            self.padding_size = []
            for kernel_sz in self.kernel_size:
                padding_right = (kernel_sz - 1) // 2
                padding_left = kernel_sz - 1 - padding_right
                self.padding_size.append((
                    0,
                    0,
                    padding_left,
                    padding_right,
                ))
        self.conv_layers = nn.ModuleList([])
        # input: [bsz, 1, seq_len, embed_dim]
        # filters = 1 -> embed_dim
        # kernel_size = (kernel_width, embed_dim)
        # =>  output: [bsz, embed_dim, seq_len - kernel_width + 1]
        for idx, kernel_sz in enumerate(self.kernel_size):
            self.conv_layers.append(
                Conv2d(in_channels=1,
                       out_channels=embed_dim,
                       kernel_size=(kernel_sz, embed_dim)))

        self.residual = kwargs.get('residual', False)  # residual
        self.dropout = kwargs.get('dropout', None)
        activation_fn = kwargs.get('activation_fn', None)
        self.activation_fn = get_activation(
            activation_fn) if activation_fn else None
예제 #2
0
 def __init__(self, dictionary, embed_dim, **kwargs):
     super().__init__(dictionary)
     self.embed = nn.Embedding(len(dictionary),
                               embed_dim,
                               padding_idx=self.dictionary.pad())
     # self.embed = Embedding(len(dictionary), embed_dim, padding_idx=None)
     self.dropout = kwargs.get('dropout', None)
     self.embed.weight.data.copy_(
         F.dropout(self.embed.weight.data, self.dropout))
     pooling = kwargs.get('pooling', None)
     self.pooling = pooling1d(pooling)
     self.dropout = kwargs.get('dropout', None)
     if self.pooling:
         self.weight_layer = Linear(
             embed_dim, 1, bias=False) if 'weighted' in pooling else None
예제 #3
0
    def __init__(self,
                 dictionary, embed_dim, token_types, max_positions,
                 self_attn_layers, attention_heads, ffn_embed_dim, activation_fn,
                 dropout, **kwargs,
                 ):
        super(SelfAttnEncoder, self).__init__(dictionary)
        # word embedding
        self.embed = Embedding(
            len(dictionary), embed_dim, padding_idx=self.dictionary.pad(),
            initializer=trunc_normal(mean=.0, std=.02),
        )
        # type embedding
        if token_types is not None:
            self.type_embed = Embedding(
                token_types, embed_dim,
                initializer=trunc_normal(mean=.0, std=.02),
            )
        else:
            self.type_embed = None
        # positional embedding
        if max_positions is not None:
            self.positional_embed = Parameter(
                1, max_positions, embed_dim,
                initializer=trunc_normal(mean=.0, std=.02),
            )
        else:
            self.positional_embed = None
        # layer norm for embedding
        self.embed_layer_norm = LayerNorm(embed_dim)
        self.dropout = dropout

        # self attn
        self.num_layers = self_attn_layers
        self.layers = nn.ModuleList(
            [TransformerEncoderLayer(embed_dim, attention_heads, dropout, ffn_embed_dim, activation_fn)
             for _ in range(self_attn_layers)]
        )

        # pooling
        pooling = kwargs.get('pooling', None)
        self.pooling = pooling1d(pooling)
        if 'weighted' in pooling:
            self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform())
        else:
            self.weight_layer = None
예제 #4
0
 def __init__(
     self,
     dictionary,
     embed_dim,
     pooling='weighted_mean',
     dropout=0.1,
     **kwargs,
 ):
     super().__init__(dictionary)
     self.padding_idx = self.dictionary.pad()
     self.embed = Embedding(len(dictionary),
                            embed_dim,
                            padding_idx=self.padding_idx,
                            initializer=xavier_uniform())
     self.dropout = dropout
     self.pooling = pooling1d(pooling)
     if self.pooling:
         self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) \
             if 'weighted' in pooling else None
예제 #5
0
 def __init__(
         self,
         dictionary,
         embed_dim,
         dropout,
         # rnn config
         rnn_cell,
         rnn_hidden_dim,
         rnn_dropout,
         rnn_num_layers=1,
         rnn_bidirectional=False,
         **kwargs):
     super().__init__(dictionary)
     # word embedding + positional embedding
     self.embed = Embedding(len(dictionary),
                            embed_dim,
                            initializer=xavier_uniform())
     self.dropout = dropout
     # pooling
     pooling = kwargs.get('pooling', None)
     self.pooling = pooling1d(pooling)
     if 'weighted' in pooling:
         self.weight_layer = Linear(embed_dim,
                                    1,
                                    bias=False,
                                    weight_initializer=xavier_uniform())
     else:
         self.weight_layer = None
     # rnn
     self.rnn_dropout = rnn_dropout
     self.rnn_num_layers = rnn_num_layers
     self.rnn_bidirectional = rnn_bidirectional
     self.rnn = getattr(nn, str.upper(rnn_cell))(
         embed_dim,
         rnn_hidden_dim,
         num_layers=rnn_num_layers,
         dropout=self.rnn_dropout,  # rnn inner dropout between layers
         bidirectional=rnn_bidirectional,
         batch_first=True,
     )