) # [num_sentence * 1] 이 단어들이 계속 가면서 다음 단어로 바뀜 처음에는 모두 [SOS] sentences = words # 문장들이 저장되는 곳, 나중에 여기서 최고 확률의 [EOS]를 뽑는다. end_sentences = [] # NUM_TOP_PROB갯수만큼의 끝난 문장들만 모아둠 end_sentences_score = [ ] # NUM_TOP_PROB갯수만큼의 끝난 문장들의 [END]가 떴을때 score를 모아둠. len_sentence = 1 with torch.no_grad(): encoded_img = encoder(img) encoded_img = encoded_img.reshape(1, -1, 2048).expand( num_sentence, -1, -1) # [num_sentence * ENCODER_OUTPUT_SIZE^2 * 2048] hidden, cell = decoder.init_hidden_cell_state(encoded_img) is_first = True # decoder 시작. SOS를 제외하고 너무 길지 않게만 반복, 여러개 문장으로 시작해서 최종 END의 while len_sentence < args.MAX_SENTENCE_LEN: embedded_words = decoder.embedding(words).squeeze( 1) # [num_sentence * EMBED_SIZE] attentioned_encoder_output, _ = decoder.attention_module( encoded_img, hidden) # 이제부터는 그냥 무조건 다 PAD가 아니다. gs = torch.sigmoid(decoder.sag(hidden)) attentioned_encoder_output = attentioned_encoder_output * gs new_inputs = torch.cat( [embedded_words, attentioned_encoder_output], dim=1) hidden, cell = decoder.LSTMCell(new_inputs, (hidden, cell)) preds = decoder.last_fc( hidden ) # [(<num_sentence) * VOCAB_SIZE] => train과 마찬가지로 end가 나오면 배치가 작아짐. preds = preds + top_prev_prob.expand_as( preds) # 이전 확률과 새롭게 예측한 확률을 더해서 계속 누적해나감. if is_first: # 만약 처음이면 5개의 모든 결과가 같으므로 처음 것에서만 5개를 뽑음
class Transformer_Pointer(nn.Module): def __init__(self, config): super().__init__() self.encoder_word = Encoder(config, config.src_vocab_size) self.encoder_char = Encoder(config, config.tgt_vocab_size) self.pointer = Pointer(config) self.attention = Luong_Attention(config) self.decoder = Decoder(config) self.linear_out = nn.Linear(config.model_size, config.tgt_vocab_size) self.softmax = nn.Softmax(dim=-1) self.s_len = config.s_len self.bos = config.bos # add <bos> to sentence def convert(self, x): """ :param x:(batch, s_len) (word_1, word_2, ... , word_n) :return:(batch, s_len) (<bos>, word_1, ... , word_n-1) """ if torch.cuda.is_available(): start = (torch.ones(x.size(0), 1) * self.bos).type( torch.cuda.LongTensor) else: start = (torch.ones(x.size(0), 1) * self.bos).type( torch.LongTensor) x = torch.cat((start, x), dim=1) return x[:, :-1] def forward(self, x_w, x_c, y): """ :param x_w: :param x_c: :param y: :return: (batch, s_len, vocab_size) """ y_s = self.convert(y) encoder_out = self.encoder_word(x_w) encoder_attn = self.encoder_char(x_c) final = [] for i in range(self.s_len): dec_output = self.decoder(x_w, y_s[:, :i + 1], encoder_out) emb = self.decoder.embedding(y_s[:, i].unsqueeze(1)) output = self.linear_out(dec_output[:, -1, :]) # gen (batch, vocab_size) gen = self.softmax(output) # pointer # ptr (batch, c_len) # context (batch, 1, model_size) ptr, context = self.attention(dec_output[:, -1, :].unsqueeze(1), encoder_attn) # prob (batch, ) prob = self.pointer(emb, dec_output[:, -1, :].unsqueeze(1), context).unsqueeze(1) final_out = (1 - prob) * gen final_out = final_out.scatter_add_(1, x_c, prob * ptr) final.append(final_out) return torch.stack(final) def sample(self, x_w, x_c): encoder_out = self.encoder_word(x_w) encoder_attn = self.encoder_char(x_c) start = torch.ones(x_w.size(0)) * self.bos start = start.unsqueeze(1) if torch.cuda.is_available(): start = start.type(torch.cuda.LongTensor) else: start = start.type(torch.LongTensor) # the first <start> out = torch.ones(x_w.size(0)) * self.bos out = out.unsqueeze(1) final = [] for i in range(self.s_len): if torch.cuda.is_available(): out = out.type(torch.cuda.LongTensor) else: out = out.type(torch.LongTensor) dec_output = self.decoder(x_w, out, encoder_out) emb = self.decoder.embedding(out[:, -1].unsqueeze(1)) output = self.linear_out(dec_output[:, -1, :]) gen = self.softmax(output) ptr, context = self.attention(dec_output[:, -1, :].unsqueeze(1), encoder_attn) # prob (batch, ) prob = self.pointer(emb, dec_output[:, -1, :].unsqueeze(1), context).unsqueeze(1) final_out = (1 - prob) * gen final_out = final_out.scatter_add_(1, x_c, prob * ptr) final.append(final_out) gen = torch.argmax(gen, dim=-1).unsqueeze(1) out = torch.cat((out, gen), dim=1) return torch.stack(final), out