Esempio n. 1
0
 def __init__(self,
              vocab_size,
              key_size,
              query_size,
              value_size,
              num_hiddens,
              norm_shape,
              ffn_num_input,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              use_bias=False,
              **kwargs):
     super(TransformerEncoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = am.PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for i in range(num_layers):
         self.blks.add_module(
             "block" + str(i),
             am.EncoderBlock(key_size, query_size, value_size, num_hiddens,
                             norm_shape, ffn_num_input, ffn_num_hiddens,
                             num_heads, dropout, use_bias))
Esempio n. 2
0
 def __init__(self, vocab_size, key_size, query_size, value_size,
              num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens,
              num_heads, num_layers, dropout, **kwargs):
     super(TransformerDecoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.encoder = am.BERTEncoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = am.PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for i in range(num_layers):
         self.blks.add_module(
             "block" + str(i),
             am.DecoderBlock(key_size, query_size, value_size, num_hiddens,
                             norm_shape, ffn_num_input, ffn_num_hiddens,
                             num_heads, dropout, i))
     self.dense = nn.Linear(num_hiddens, vocab_size)