def _get_reward(self, y_hat, y, n_gram=6): # NLTK의 sentence_gleu 함수를 사용하여,y와 y_hat의 GLEU 값을 구해서 출력 # This method gets the reward based on the sampling result and reference sentence. # For now, we uses GLEU in NLTK, but you can used your own well-defined reward function. # In addition, GLEU is variation of BLEU, and it is more fit to reinforcement learning. # Since we don't calculate reward score exactly as same as multi-bleu.perl, # (especialy we do have different tokenization,) I recommend to set n_gram to 6. # |y| = (batch_size, length1) # |y_hat| = (batch_size, length2) scores = [] # Actually, below is really far from parallized operations. # Thus, it may cause slow training. for b in range(y.size(0)): ref = [] hyp = [] for t in range(y.size(1)): ref += [str(int(y[b, t]))] if y[b, t] == data_loader.EOS: break for t in range(y_hat.size(1)): hyp += [str(int(y_hat[b, t]))] if y_hat[b, t] == data_loader.EOS: break # for nltk.bleu & nltk.gleu scores += [score_func([ref], hyp, max_len=n_gram) * 100.] # for utils.score_sentence # scores += [score_func(ref, hyp, 4, smooth = 1)[-1] * 100.] scores = torch.FloatTensor(scores).to(y.device) # |scores| = (batch_size) return scores
def _get_reward(y_hat, y, n_gram=6): # This method gets the reward based on the sampling result and reference sentence. # For now, we uses GLEU in NLTK, but you can used your own well-defined reward function. # In addition, GLEU is variation of BLEU, and it is more fit to reinforcement learning. # Since we don't calculate reward score exactly as same as multi-bleu.perl, # (especialy we do have different tokenization,) I recommend to set n_gram to 6. # |y| = (batch_size, length1) # |y_hat| = (batch_size, length2) with torch.no_grad(): scores = [] for b in range(y.size(0)): ref = [] hyp = [] for t in range(y.size(-1)): ref += [str(int(y[b, t]))] if y[b, t] == data_loader.EOS: break for t in range(y_hat.size(-1)): hyp += [str(int(y_hat[b, t]))] if y_hat[b, t] == data_loader.EOS: break # Below lines are slower than naive for loops in above. # ref = y[b].masked_select(y[b] != data_loader.PAD).tolist() # hyp = y_hat[b].masked_select(y_hat[b] != data_loader.PAD).tolist() # for nltk.bleu & nltk.gleu scores += [score_func([ref], hyp, max_len=n_gram) * 100.] scores = torch.FloatTensor(scores).to(y.device) # |scores| = (batch_size) return scores