예제 #1
0
 def __init__(
         self,
         d_model: int = 512,             # dimension of model
         num_heads: int = 8,             # number of attention heads
         d_ff: int = 2048,               # dimension of feed forward network
         dropout_p: float = 0.3,         # probability of dropout
         ffnet_style: str = 'ff'         # style of feed forward network
 ) -> None:
     super(SpeechTransformerDecoderLayer, self).__init__()
     self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads), d_model)
     self.memory_attention = AddNorm(MultiHeadAttention(d_model, num_heads), d_model)
     self.feed_forward = AddNorm(PositionWiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style), d_model)
예제 #2
0
 def __init__(
         self,
         d_model: int = 512,  # dimension of model
         num_heads: int = 8,  # number of attention heads
         d_ff: int = 2048,  # dimension of feed forward network
         dropout_p: float = 0.3,  # probability of dropout
 ) -> None:
     super(TransformerDecoderLayer, self).__init__()
     self.self_attention_prenorm = nn.LayerNorm(d_model)
     self.encoder_attention_prenorm = nn.LayerNorm(d_model)
     self.feed_forward_prenorm = nn.LayerNorm(d_model)
     self.self_attention = MultiHeadAttention(d_model, num_heads)
     self.encoder_attention = MultiHeadAttention(d_model, num_heads)
     self.feed_forward = PositionwiseFeedForward(d_model, d_ff, dropout_p)
예제 #3
0
 def __init__(self,
              d_model: int = 512,
              num_heads: int = 8,
              d_ff: int = 2048,
              dropout_p: float = 0.3,
              ffnet_style: str = 'ff') -> None:
     super(TransformerDecoderLayer, self).__init__()
     self.self_attention = AddNorm(MultiHeadAttention(d_model, num_heads),
                                   d_model)
     self.memory_attention = AddNorm(MultiHeadAttention(d_model, num_heads),
                                     d_model)
     self.feed_forward = AddNorm(
         PoswiseFeedForwardNet(d_model, d_ff, dropout_p, ffnet_style),
         d_model)
예제 #4
0
파일: decoder.py 프로젝트: kldami/KoSpeech
    def __init__(
        self,
        num_classes: int,  # number of classfication
        max_length:
        int = 120,  # a maximum allowed length for the sequence to be processed
        hidden_dim: int = 1024,  # dimension of RNN`s hidden state vector
        sos_id: int = 1,  # start of sentence token`s id
        eos_id: int = 2,  # end of sentence token`s id
        num_heads: int = 4,  # number of attention heads
        num_layers: int = 3,  # number of RNN layers
        rnn_type: str = 'lstm',  # type of RNN cell
        dropout_p: float = 0.3,  # dropout probability
        device: str = 'cuda'  # device - 'cuda' or 'cpu'
    ) -> None:
        super(SpellingCorrectorDecoder, self).__init__()
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.max_length = max_length
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.embedding = nn.Embedding(num_classes, hidden_dim)
        self.input_dropout = nn.Dropout(dropout_p)

        self.layers = nn.ModuleList([
            SpellingCorrectorDecoderLayer(hidden_dim=hidden_dim,
                                          rnn_type=rnn_type,
                                          dropout_p=dropout_p,
                                          device=device)
            for _ in range(num_layers)
        ])

        self.attention = MultiHeadAttention(hidden_dim)
예제 #5
0
파일: decoder.py 프로젝트: rheehot/KoSpeech
    def __init__(self,
                 num_classes: int,                    # number of classfication
                 max_length: int = 120,               # a maximum allowed length for the sequence to be processed
                 hidden_dim: int = 1024,              # dimension of RNN`s hidden state vector
                 sos_id: int = 1,                     # start of sentence token`s id
                 eos_id: int = 2,                     # end of sentence token`s id
                 attn_mechanism: str = 'multi-head',  # type of attention mechanism
                 num_heads: int = 4,                  # number of attention heads
                 num_layers: int = 2,                 # number of RNN layers
                 rnn_type: str = 'lstm',              # type of RNN cell
                 dropout_p: float = 0.3,              # dropout probability
                 device: str = 'cuda') -> None:       # device - 'cuda' or 'cpu'
        super(SpeechDecoderRNN, self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type, dropout_p, False, device)
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.max_length = max_length
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.attn_mechanism = attn_mechanism.lower()
        self.embedding = nn.Embedding(num_classes, hidden_dim)
        self.input_dropout = nn.Dropout(dropout_p)

        if self.attn_mechanism == 'loc':
            self.attention = AddNorm(LocationAwareAttention(hidden_dim, smoothing=True), hidden_dim)
        elif self.attn_mechanism == 'multi-head':
            self.attention = AddNorm(MultiHeadAttention(hidden_dim, num_heads), hidden_dim)
        elif self.attn_mechanism == 'additive':
            self.attention = AddNorm(AdditiveAttention(hidden_dim), hidden_dim)
        elif self.attn_mechanism == 'scaled-dot':
            self.attention = AddNorm(ScaledDotProductAttention(hidden_dim), hidden_dim)
        else:
            raise ValueError("Unsupported attention: %s".format(attn_mechanism))

        self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True), hidden_dim)
        self.generator = Linear(hidden_dim, num_classes, bias=False)
예제 #6
0
 def __init__(
     self,
     num_classes: int,  # number of classfication
     max_length:
     int = 120,  # a maximum allowed length for the sequence to be processed
     hidden_dim: int = 1024,  # dimension of RNN`s hidden state vector
     sos_id: int = 1,  # start of sentence token`s id
     eos_id: int = 2,  # end of sentence token`s id
     attn_mechanism: str = 'multi-head',  # type of attention mechanism
     num_heads: int = 4,  # number of attention heads
     num_layers: int = 2,  # number of RNN layers
     rnn_type: str = 'lstm',  # type of RNN cell
     dropout_p: float = 0.3,  # dropout probability
     device: str = 'cuda'
 ) -> None:  # device - 'cuda' or 'cpu'
     super(LanguageDecoderRNN,
           self).__init__(hidden_dim, hidden_dim, num_layers, rnn_type,
                          dropout_p, False, device)
     self.num_classes = num_classes
     self.num_heads = num_heads
     self.max_length = max_length
     self.eos_id = eos_id
     self.sos_id = sos_id
     self.attn_mechanism = attn_mechanism.lower()
     self.embedding = nn.Embedding(num_classes, hidden_dim)
     self.input_dropout = nn.Dropout(dropout_p)
     self.attention = AddNorm(MultiHeadAttention(hidden_dim), hidden_dim)
     self.projection = AddNorm(Linear(hidden_dim, hidden_dim, bias=True),
                               hidden_dim)
     self.generator = Linear(hidden_dim, num_classes, bias=False)
예제 #7
0
파일: decoder.py 프로젝트: Rhcsky/KoSpeech
    def __init__(
        self,
        num_classes: int,
        max_length: int = 150,
        hidden_state_dim: int = 1024,
        pad_id: int = 0,
        sos_id: int = 1,
        eos_id: int = 2,
        attn_mechanism: str = 'multi-head',
        num_heads: int = 4,
        num_layers: int = 2,
        rnn_type: str = 'lstm',
        dropout_p: float = 0.3,
    ) -> None:
        super(DecoderRNN, self).__init__()
        self.hidden_state_dim = hidden_state_dim
        self.num_classes = num_classes
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.max_length = max_length
        self.eos_id = eos_id
        self.sos_id = sos_id
        self.pad_id = pad_id
        self.attn_mechanism = attn_mechanism.lower()
        self.embedding = nn.Embedding(num_classes, hidden_state_dim)
        self.input_dropout = nn.Dropout(dropout_p)
        rnn_cell = self.supported_rnns[rnn_type.lower()]
        self.rnn = rnn_cell(
            input_size=hidden_state_dim,
            hidden_size=hidden_state_dim,
            num_layers=num_layers,
            bias=True,
            batch_first=True,
            dropout=dropout_p,
            bidirectional=False,
        )

        if self.attn_mechanism == 'loc':
            self.attention = LocationAwareAttention(hidden_state_dim,
                                                    attn_dim=hidden_state_dim,
                                                    smoothing=False)
        elif self.attn_mechanism == 'multi-head':
            self.attention = MultiHeadAttention(hidden_state_dim,
                                                num_heads=num_heads)
        elif self.attn_mechanism == 'additive':
            self.attention = AdditiveAttention(hidden_state_dim)
        elif self.attn_mechanism == 'scaled-dot':
            self.attention = ScaledDotProductAttention(dim=hidden_state_dim)
        else:
            raise ValueError(
                "Unsupported attention: %s".format(attn_mechanism))

        self.fc = nn.Sequential(
            Linear(hidden_state_dim << 1, hidden_state_dim),
            nn.Tanh(),
            View(shape=(-1, self.hidden_state_dim), contiguous=True),
            Linear(hidden_state_dim, num_classes),
        )