Esempio n. 1
0
 def __init__(self, vocab_size, key_size, query_size, value_size,
              num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens,
              num_heads, num_layers, dropout, **kwargs):
     super(TransformerDecoder, self).__init__(**kwargs)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.encoder = am.BERTEncoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.embedding = nn.Embedding(vocab_size, num_hiddens)
     self.pos_encoding = am.PositionalEncoding(num_hiddens, dropout)
     self.blks = nn.Sequential()
     for i in range(num_layers):
         self.blks.add_module(
             "block" + str(i),
             am.DecoderBlock(key_size, query_size, value_size, num_hiddens,
                             norm_shape, ffn_num_input, ffn_num_hiddens,
                             num_heads, dropout, i))
     self.dense = nn.Linear(num_hiddens, vocab_size)
Esempio n. 2
0
 def __init__(self,
              vocab_size,
              num_hiddens,
              norm_shape,
              ffn_num_input,
              ffn_num_hiddens,
              num_heads,
              num_layers,
              dropout,
              max_len=1000,
              key_size=768,
              query_size=768,
              value_size=768,
              hid_in_features=768,
              mlm_in_features=768,
              nsp_in_features=768):
     super(BERTModel, self).__init__()
     self.encoder = am.BERTEncoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.hidden = nn.Sequential(nn.Linear(hid_in_features, num_hiddens),
                                 nn.Tanh())
     self.mlm = am.MaskLM(vocab_size, num_hiddens, mlm_in_features)
Esempio n. 3
0
 def __init__(self, vocab_size, key_size, query_size, value_size,
              num_hiddens, norm_shape, ffn_num_input, ffn_num_hiddens,
              num_heads, num_layers, dropout, **kwargs):
     super(BERTDecoder, self).__init__(**kwargs)
     self.encoder = am.BERTEncoder(vocab_size,
                                   num_hiddens,
                                   norm_shape,
                                   ffn_num_input,
                                   ffn_num_hiddens,
                                   num_heads,
                                   num_layers,
                                   dropout,
                                   max_len=max_len,
                                   key_size=key_size,
                                   query_size=query_size,
                                   value_size=value_size)
     self.num_hiddens = num_hiddens
     self.num_layers = num_layers
     self.dense = nn.Linear(num_hiddens, vocab_size)