예제 #1
0
파일: model.py 프로젝트: xuehuiping/DCA
def build_decoder(hidden_size: int, embedding_dim: int, vocab_size: int):
    assert embedding_dim == hidden_size, (
        f"Different embedding_dim and hidden_size not implemented")
    decoder_layer = LSTMLayer(input_size=hidden_size, hidden_size=hidden_size)

    generator = Generator(hidden_size, embedding_dim)

    vocab_predictor = VocabPredictor(hidden_size, vocab_size)

    word_attention = AdditiveAttention(hidden_size, bias=True)
    agent_attention = AdditiveAttention(hidden_size, bias=True)

    return Decoder(decoder_layer=decoder_layer, vocab_predictor=vocab_predictor,
                   generator=generator, word_attention=word_attention,
                   agent_attention=agent_attention)
예제 #2
0
파일: main.py 프로젝트: jiangqn/seq2seq
 def _make_model(self):
     # embedding
     embedding = nn.Embedding(num_embeddings=self._config.vocab_size,
                              embedding_dim=self._config.embed_size)
     embedding.weight.data.copy_(
         torch.from_numpy(np.load(self._config.embedding_file_name)))
     embedding.weight.requires_grad = False
     # encoder
     encoder = Encoder(rnn_type=self._config.rnn_type,
                       embed_size=self._config.embed_size,
                       hidden_size=self._config.hidden_size,
                       num_layers=self._config.num_layers,
                       bidirectional=self._config.bidirectional,
                       dropout=self._config.dropout)
     # birdge
     bridge = Bridge(rnn_type=self._config.rnn_type,
                     hidden_size=self._config.hidden_size,
                     bidirectional=self._config.bidirectional)
     # decoder rnn cell
     if self._config.rnn_type == 'LSTM':
         rnn_cell = MultiLayerLSTMCells(
             input_size=2 * self._config.embed_size,
             hidden_size=self._config.hidden_size,
             num_layers=self._config.num_layers,
             dropout=self._config.dropout)
     else:
         rnn_cell = MultiLayerGRUCells(input_size=2 *
                                       self._config.embed_size,
                                       hidden_size=self._config.hidden_size,
                                       num_layers=self._config.num_layers,
                                       dropout=self._config.dropout)
     # attention
     if self._config.attention_type == 'Dot':
         attention = DotAttention()
     elif self._config.attention_type == 'ScaledDot':
         attention = ScaledDotAttention()
     elif self._config.attention_type == 'Additive':
         attention = AdditiveAttention(query_size=self._config.hidden_size,
                                       key_size=self._config.hidden_size)
     elif self._config.attention_type == 'Multiplicative':
         attention = MultiplicativeAttention(
             query_size=self._config.hidden_size,
             key_size=self._config.hidden_size)
     elif self._config.attention_type == 'MLP':
         attention = MultiLayerPerceptronAttention(
             query_size=self._config.hidden_size,
             key_size=self._config.hidden_size,
             out_size=1)
     else:
         raise ValueError('No Supporting.')
     # decoder
     decoder = Decoder(embedding, rnn_cell, attention,
                       self._config.hidden_size)
     # model
     model = Seq2Seq(embedding, encoder, bridge, decoder)
     return model
예제 #3
0
 def __init__(self, hparams, weight=None):
     super(NRMS, self).__init__()
     self.hparams = hparams
     self.doc_encoder = DocEncoder(hparams, weight=weight)
     self.mha = nn.MultiheadAttention(hparams['encoder_size'],
                                      hparams['nhead'],
                                      dropout=0.1)
     self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size'])
     self.additive_attn = AdditiveAttention(hparams['encoder_size'],
                                            hparams['v_size'])
     self.criterion = nn.CrossEntropyLoss()
예제 #4
0
 def __init__(self, hparams, weight=None) -> None:
     super(DocEncoder, self).__init__()
     self.hparams = hparams
     if weight is None:
         self.embedding = nn.Embedding(22537, 300, padding_idx=0)
     else:
         self.embedding = nn.Embedding.from_pretrained(weight,
                                                       freeze=False,
                                                       padding_idx=0)
     self.mha = nn.MultiheadAttention(hparams['embed_size'],
                                      num_heads=hparams['nhead'],
                                      dropout=0.1)
     self.proj = nn.Linear(hparams['embed_size'], hparams['encoder_size'])
     self.additive_attn = AdditiveAttention(hparams['encoder_size'],
                                            hparams['v_size'])
예제 #5
0
    def __init__(self, hparams, weight=None):
        """Init News Encoder.

        Args:
            hparams (dict): Configuration parameters.
            weight (tensor): Embeding weight.
        """
        super(NRMS, self).__init__()
        self.hparams = hparams
        self.doc_encoder = DocEncoder(hparams, weight=weight)
        self.mha = nn.MultiheadAttention(hparams['encoder_size'],
                                         hparams['nhead'],
                                         dropout=0.1)
        self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size'])
        self.additive_attn = AdditiveAttention(hparams['encoder_size'],
                                               hparams['v_size'])
        self.criterion = nn.CrossEntropyLoss()
예제 #6
0
    def __init__(self, hparams, weight=None):
        super(NRMS, self).__init__()
        self.hparams = hparams
        self.doc_encoder = DocEncoder(hparams, weight=weight)
        # proj = InProjContainer(nn.Linear(hparams['encoder_size'], hparams['encoder_size']),
        #                        nn.Linear(hparams['encoder_size'], hparams['encoder_size']),
        #                        nn.Linear(hparams['encoder_size'], hparams['encoder_size']))
        self.mha = nn.MultiheadAttention(hparams['encoder_size'],
                                         hparams['nhead'],
                                         dropout=0.1)

        # self.mha = MultiheadAttentionContainer(nhead=hparams['nhead'],
        #                                        in_proj_container=proj,
        #                                        attention_layer=ScaledDotProduct(),
        #                                        out_proj=nn.Linear(hparams['encoder_size'], hparams['encoder_size']))
        self.proj = nn.Linear(hparams['encoder_size'], hparams['encoder_size'])
        self.additive_attn = AdditiveAttention(hparams['encoder_size'],
                                               hparams['v_size'])
        self.criterion = nn.CrossEntropyLoss()
예제 #7
0
    def __init__(self, hparams, weight=None) -> None:
        """Init News Encoder.

        Args:
            hparams (dict): Configuration parameters.
            weight (tensor): Embeding weight.
        """
        super(DocEncoder, self).__init__()
        self.hparams = hparams
        if weight is None:
            self.embedding = nn.Embedding(100, 300)
        else:
            self.embedding = nn.Embedding.from_pretrained(weight,
                                                          freeze=False,
                                                          padding_idx=0)
        self.mha = nn.MultiheadAttention(hparams['embed_size'],
                                         hparams['nhead'], 0.1)
        self.proj = nn.Linear(hparams['embed_size'], hparams['encoder_size'])
        self.additive_attn = AdditiveAttention(hparams['encoder_size'],
                                               hparams['v_size'])
예제 #8
0
def attention():
    from model.attention import MultiheadAttentionContainer, InProjContainer, ScaledDotProduct, AdditiveAttention

    q = torch.rand(10, 16, 200)
    k = v = torch.rand(10, 16, 200)

    print('MultiHead: ')
    proj = InProjContainer(nn.Linear(200, 200), nn.Linear(200, 200),
                           nn.Linear(200, 200))
    mha = MultiheadAttentionContainer(nhead=8,
                                      in_proj_container=proj,
                                      attention_layer=ScaledDotProduct(),
                                      out_proj=nn.Linear(200, 200))
    output, score = mha(q, k, v)
    print(output.shape)
    print('Additive: ')
    attn = AdditiveAttention(200, 300)
    output = output.permute(1, 0, 2)
    output, score = attn(output)
    print(output.size(), score.size())
예제 #9
0
 def _make_model(self):
     embedding = nn.Embedding(self._config.vocab_size,
                              self._config.embed_size)
     embedding.weight.data.copy_(
         torch.from_numpy(np.load(self._config.embedding_file_name)))
     embedding.weight.requires_grad = False
     encoder = Encoder(self._config.embed_size, self._config.hidden_size,
                       self._config.num_layers, self._config.bidirectional,
                       self._config.dropout)
     bridge = Bridge(self._config.hidden_size, self._config.bidirectional)
     lstm_cell = MultiLayerLSTMCells(2 * self._config.embed_size,
                                     self._config.hidden_size,
                                     self._config.num_layers,
                                     dropout=self._config.dropout)
     # attention = MultiplicativeAttention(self._config.hidden_size, self._config.hidden_size)
     attention = AdditiveAttention(self._config.hidden_size,
                                   self._config.hidden_size)
     decoder = Decoder(embedding, lstm_cell, attention,
                       self._config.hidden_size)
     model = Seq2Seq(embedding, encoder, bridge, decoder)
     return model