def __init__(self, dictionary, embed_dim, out_channels, kernel_size, **kwargs): super().__init__(dictionary) # word embedding + positional embedding self.embed = Embedding( len(dictionary), embed_dim) # , padding_idx=self.dictionary.pad()) self.position_encoding = kwargs.get('position_encoding', None) if self.position_encoding == 'learned': self.position_embed = Parameter(1, kwargs['max_tokens'], embed_dim, initializer=trunc_normal(mean=0., std=0.02)) else: self.position_embed = None # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False) else: self.weight_layer = None # conv1d self.out_channels = out_channels self.kernel_size = kernel_size # padding mode = ['valid'(default), 'same'] self.padding = kwargs.get('padding', 'valid') if self.padding == 'same': self.padding_size = [] for kernel_sz in self.kernel_size: padding_right = (kernel_sz - 1) // 2 padding_left = kernel_sz - 1 - padding_right self.padding_size.append(( 0, 0, padding_left, padding_right, )) self.conv_layers = nn.ModuleList([]) # input: [bsz, 1, seq_len, embed_dim] # filters = 1 -> embed_dim # kernel_size = (kernel_width, embed_dim) # => output: [bsz, embed_dim, seq_len - kernel_width + 1] for idx, kernel_sz in enumerate(self.kernel_size): self.conv_layers.append( Conv2d(in_channels=1, out_channels=embed_dim, kernel_size=(kernel_sz, embed_dim))) self.residual = kwargs.get('residual', False) # residual self.dropout = kwargs.get('dropout', None) activation_fn = kwargs.get('activation_fn', None) self.activation_fn = get_activation( activation_fn) if activation_fn else None
def __init__(self, dictionary, embed_dim, **kwargs): super().__init__(dictionary) self.embed = nn.Embedding(len(dictionary), embed_dim, padding_idx=self.dictionary.pad()) # self.embed = Embedding(len(dictionary), embed_dim, padding_idx=None) self.dropout = kwargs.get('dropout', None) self.embed.weight.data.copy_( F.dropout(self.embed.weight.data, self.dropout)) pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) self.dropout = kwargs.get('dropout', None) if self.pooling: self.weight_layer = Linear( embed_dim, 1, bias=False) if 'weighted' in pooling else None
def __init__(self, dictionary, embed_dim, token_types, max_positions, self_attn_layers, attention_heads, ffn_embed_dim, activation_fn, dropout, **kwargs, ): super(SelfAttnEncoder, self).__init__(dictionary) # word embedding self.embed = Embedding( len(dictionary), embed_dim, padding_idx=self.dictionary.pad(), initializer=trunc_normal(mean=.0, std=.02), ) # type embedding if token_types is not None: self.type_embed = Embedding( token_types, embed_dim, initializer=trunc_normal(mean=.0, std=.02), ) else: self.type_embed = None # positional embedding if max_positions is not None: self.positional_embed = Parameter( 1, max_positions, embed_dim, initializer=trunc_normal(mean=.0, std=.02), ) else: self.positional_embed = None # layer norm for embedding self.embed_layer_norm = LayerNorm(embed_dim) self.dropout = dropout # self attn self.num_layers = self_attn_layers self.layers = nn.ModuleList( [TransformerEncoderLayer(embed_dim, attention_heads, dropout, ffn_embed_dim, activation_fn) for _ in range(self_attn_layers)] ) # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) else: self.weight_layer = None
def __init__( self, dictionary, embed_dim, pooling='weighted_mean', dropout=0.1, **kwargs, ): super().__init__(dictionary) self.padding_idx = self.dictionary.pad() self.embed = Embedding(len(dictionary), embed_dim, padding_idx=self.padding_idx, initializer=xavier_uniform()) self.dropout = dropout self.pooling = pooling1d(pooling) if self.pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) \ if 'weighted' in pooling else None
def __init__( self, dictionary, embed_dim, dropout, # rnn config rnn_cell, rnn_hidden_dim, rnn_dropout, rnn_num_layers=1, rnn_bidirectional=False, **kwargs): super().__init__(dictionary) # word embedding + positional embedding self.embed = Embedding(len(dictionary), embed_dim, initializer=xavier_uniform()) self.dropout = dropout # pooling pooling = kwargs.get('pooling', None) self.pooling = pooling1d(pooling) if 'weighted' in pooling: self.weight_layer = Linear(embed_dim, 1, bias=False, weight_initializer=xavier_uniform()) else: self.weight_layer = None # rnn self.rnn_dropout = rnn_dropout self.rnn_num_layers = rnn_num_layers self.rnn_bidirectional = rnn_bidirectional self.rnn = getattr(nn, str.upper(rnn_cell))( embed_dim, rnn_hidden_dim, num_layers=rnn_num_layers, dropout=self.rnn_dropout, # rnn inner dropout between layers bidirectional=rnn_bidirectional, batch_first=True, )