예제 #1
0
파일: layers.py 프로젝트: shleee47/KoSpeech
 def __init__(
         self,
         d_model: int = 512,             # dimension of model
         num_heads: int = 8,             # number of attention heads
         d_ff: int = 2048,               # dimension of feed forward network
         dropout_p: float = 0.3,         # probability of dropout
         ffnet_style: str = 'ff'         # style of feed forward network
 ) -> None:
     super(SpeechTransformerEncoderLayer, self).__init__()
     self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads), d_model)
     self.feed_forward = AddNorm(PositionWiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style), d_model)
예제 #2
0
 def __init__(self,
              d_model: int = 512,
              num_heads: int = 8,
              d_ff: int = 2048,
              dropout_p: float = 0.3,
              ffnet_style: str = 'ff') -> None:
     super(TransformerEncoderLayer, self).__init__()
     self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads),
                                   d_model)
     self.feed_forward = AddNorm(
         PoswiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style),
         d_model)
예제 #3
0
    def __init__(
        self,
        num_classes: int,  # number of classfication
        max_length:
        int = 120,  # a maximum allowed length for the sequence to be processed
        hidden_dim: int = 1024,  # dimension of RNN`s hidden state vector
        sos_id: int = 1,  # start of sentence token`s id
        eos_id: int = 2,  # end of sentence token`s id
        attn_mechanism: str = 'multi-head',  # type of attention mechanism
        num_heads: int = 4,  # number of attention heads
        num_layers: int = 2,  # number of RNN layers
        rnn_type: str = 'lstm',  # type of RNN cell
        dropout_p: float = 0.3,  # dropout probability
        device: str = 'cuda'
    ) -> None:  # device - 'cuda' or 'cpu'
        super(Seq2seqDecoder,
              self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type,
                             dropout_p, False, device)
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.max_length = max_length
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.acoutsic_weight = 0.9  # acoustic model weight
        self.language_weight = 0.1  # language model weight
        self.attn_mechanism = attn_mechanism.lower()
        self.embedding = nn.Embedding(num_classes, hidden_dim)
        self.input_dropout = nn.Dropout(dropout_p)

        if self.attn_mechanism == 'loc':
            self.attention = AddNorm(
                LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim)
        elif self.attn_mechanism == 'multi-head':
            self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads),
                                     hidden_dim)
        elif self.attn_mechanism == 'additive':
            self.attention = AddNorm(AdditiveAttention(hidden_dim), hidden_dim)
        elif self.attn_mechanism == 'scaled-dot':
            self.attention = AddNorm(ScaledDotProductAttention(hidden_dim),
                                     hidden_dim)
        else:
            raise ValueError(
                "Unsupported attention: %s".format(attn_mechanism))

        self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True),
                                  hidden_dim)
        self.generator = Linear(hidden_dim, num_classes, bias=False)