Exemple #1
0
class Seq2Seq(nn.Module):
    def __init__(self, args):
        super(Seq2Seq, self).__init__()

        self.beam = args.beam

        self.encoder = Encoder(args)
        self.decoder = Decoder(args)

    def forward(self, input_words, output_words, train=True):
        #Encoderに投げる
        encoder_outputs, encoder_hidden = self.encoder(
            input_words)  #(batch,seq_len,hidden_size*2)
        output=self.decoder(encoder_outputs,encoder_hidden,output_words,train) if self.beam==False else \
                self.decoder.beam_decode(encoder_outputs,encoder_hidden,output_words,train)
        return output
class Seq2Seq(nn.Module):
    def __init__(self,
                 vocab_size,
                 embed_size,
                 hidden_size,
                 rnn_type='LSTM',
                 num_layers=1,
                 bidirectional=False,
                 attention_type='Bilinear',
                 dropout=0):
        super(Seq2Seq, self).__init__()
        self.embedding = nn.Embedding(num_embeddings=vocab_size,
                                      embedding_dim=embed_size)
        self.encoder = Encoder(embed_size=embed_size,
                               hidden_size=hidden_size,
                               rnn_type=rnn_type,
                               num_layers=num_layers,
                               bidirectional=bidirectional,
                               dropout=dropout)
        self.decoder = Decoder(embedding=self.embedding,
                               hidden_size=hidden_size,
                               rnn_type=rnn_type,
                               num_layers=num_layers,
                               attention_type=attention_type,
                               dropout=dropout)

    def load_pretrained_embeddings(self, path):
        self.embedding.weight.data.copy_(torch.from_numpy(
            np.load(path)))  # Load pretrained embeddings
        self.embedding.weight.requires_grad = False  # Don't update word vectors during training

    def forward(self, src, trg):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param trg: LongTensor (batch_size, trg_time_step)
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        return self.decoder(src_memory, src_mask, init_states, init_output,
                            trg)

    def encode(self, src):
        """
        :param src: LongTensor (batch_size, time_step)
        :param src_lens: LongTensor (batch_size,)
        :return:
        """
        src = sentence_clip(src)
        src_mask = (src != PAD_INDEX)
        src_lens = src_mask.long().sum(dim=1, keepdim=False)
        src_embedding = self.embedding(
            src)  # Tensor(batch_size, time_step, embed_size)
        src_memory, final_states = self.encoder(src_embedding, src_lens)
        return src_memory, src_mask, src_lens, final_states

    def decode(self, src, max_len):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param max_len: int
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        outputs = self.decoder.decode(src_memory, src_mask, init_states,
                                      init_output, max_len)
        return outputs

    def beam_decode(self, src, max_len, beam_size):
        """
        :param src: LongTensor (batch_size, src_time_step)
        :param max_len: int
        :param beam_size: int
        :return:
        """
        src_memory, src_mask, src_lens, init_states = self.encode(src)
        init_output = self.decoder.get_init_output(src_memory, src_lens,
                                                   init_states)
        outputs = self.decoder.beam_decode(src_memory, src_mask, init_states,
                                           init_output, max_len, beam_size)
        return outputs