def __init__( self, d_model: int = 512, # dimension of model num_heads: int = 8, # number of attention heads d_ff: int = 2048, # dimension of feed forward network dropout_p: float = 0.3, # probability of dropout ffnet_style: str = 'ff' # style of feed forward network ) -> None: super(SpeechTransformerEncoderLayer, self).__init__() self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads), d_model) self.feed_forward = AddNorm(PositionWiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style), d_model)
def __init__(self, d_model: int = 512, num_heads: int = 8, d_ff: int = 2048, dropout_p: float = 0.3, ffnet_style: str = 'ff') -> None: super(TransformerEncoderLayer, self).__init__() self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads), d_model) self.feed_forward = AddNorm( PoswiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style), d_model)
def __init__( self, num_classes: int, # number of classfication max_length: int = 120, # a maximum allowed length for the sequence to be processed hidden_dim: int = 1024, # dimension of RNN`s hidden state vector sos_id: int = 1, # start of sentence token`s id eos_id: int = 2, # end of sentence token`s id attn_mechanism: str = 'multi-head', # type of attention mechanism num_heads: int = 4, # number of attention heads num_layers: int = 2, # number of RNN layers rnn_type: str = 'lstm', # type of RNN cell dropout_p: float = 0.3, # dropout probability device: str = 'cuda' ) -> None: # device - 'cuda' or 'cpu' super(Seq2seqDecoder, self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type, dropout_p, False, device) self.num_classes = num_classes self.num_heads = num_heads self.max_length = max_length self.eos_id = eos_id self.sos_id = sos_id self.acoutsic_weight = 0.9 # acoustic model weight self.language_weight = 0.1 # language model weight self.attn_mechanism = attn_mechanism.lower() self.embedding = nn.Embedding(num_classes, hidden_dim) self.input_dropout = nn.Dropout(dropout_p) if self.attn_mechanism == 'loc': self.attention = AddNorm( LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim) elif self.attn_mechanism == 'multi-head': self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads), hidden_dim) elif self.attn_mechanism == 'additive': self.attention = AddNorm(AdditiveAttention(hidden_dim), hidden_dim) elif self.attn_mechanism == 'scaled-dot': self.attention = AddNorm(ScaledDotProductAttention(hidden_dim), hidden_dim) else: raise ValueError( "Unsupported attention: %s".format(attn_mechanism)) self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True), hidden_dim) self.generator = Linear(hidden_dim, num_classes, bias=False)