コード例 #1
0
    def forward(self, s_t, prev_s, sum_k_emb):
        '''Perform intra_decoder attention
        Args
        :param s_t: hidden state of decoder at current time step
        :param prev_s: If intra_decoder attention, contains list of previous decoder hidden states
        '''
        at = None
        if config.intra_decoder is False:
            ct_d = get_cuda(T.zeros(s_t.size()))  # set c1_d to vector of zeros
        elif prev_s is None:
            ct_d = get_cuda(T.zeros(s_t.size()))
            prev_s = s_t.unsqueeze(1)  #batch_size, 1, hid_size
        else:
            # Standard attention technique (eq 1 in Pointer-Generator Networks - https://arxiv.org/pdf/1704.04368.pdf)
            # et = tanh ( W_prev(prev_s)  + W_s(st_hat) )
            et = self.W_prev(prev_s)  # batch_size,t-1,hid_size
            dec_fea = self.W_s(s_t).unsqueeze(1)  # batch_size,1,hid_size
            et = et + dec_fea

            if config.key_attention:
                k_t = self.W_t(sum_k_emb).unsqueeze(1)
                if k_t.shape[0] == et.shape[0]: et = et + k_t

            et = T.tanh(et)  # batch_size,t-1,hid_size
            et = self.v(et).squeeze(2)  # batch_size,t-1
            # intra-decoder attention     (eq 7 & 8 in DEEP REINFORCED MODEL - https://arxiv.org/pdf/1705.04304.pdf)
            at = F.softmax(et, dim=1).unsqueeze(1)  #batch_size, 1, t-1
            ct_d = T.bmm(at, prev_s).squeeze(
                1
            )  #batch_size, hid_size    #  將 previous decoder hidden states 與 attention distribution 做矩阵乘法得 decoder context vector
            prev_s = T.cat(
                [prev_s, s_t.unsqueeze(1)], dim=1
            )  #batch_size, t, hid_size  # 將目前計算的decoder state 合併到 previous decoder hidden states
            at = at.squeeze(1)  #batch_size, t-1 # 過去關注的t-個時間點的attention score
        return ct_d, prev_s, at
コード例 #2
0
    def __init__(self, start_id, end_id, unk_id, hidden_state, context):
        # beam_size = batch_size * beam_n
        h, c = hidden_state  #(hid_size,)
        self.tokens = T.LongTensor(config.beam_size, 1).fill_(
            start_id)  #(beam_size, t) after t time steps
        # 初始beam score分數為-30
        self.scores = T.FloatTensor(config.beam_size, 1).fill_(
            -30)  #beam_size,1; Initial score of beams = -30
        self.tokens, self.scores = get_cuda(self.tokens), get_cuda(self.scores)
        self.scores[0][0] = 0

        # 每個batch中欲被decode的元素,將根據beam_size進行複製
        #At time step t=0, all beams should extend from a single beam. So, I am giving high initial score to 1st beam
        self.hid_h = h.unsqueeze(0).repeat(config.beam_size,
                                           1)  #beam_size, hid_size
        self.hid_c = c.unsqueeze(0).repeat(config.beam_size,
                                           1)  #beam_size, hid_size
        # print('self.hid_h',self.hid_h.shape);print('self.hid_c',self.hid_c.shape)
        # print('context',context.shape)
        self.context = context.unsqueeze(0).repeat(config.beam_size,
                                                   1)  #beam_size, 2*hid_size
        self.sum_temporal_srcs = None
        self.prev_s = None
        self.done = False
        self.end_id = end_id
        self.unk_id = unk_id
コード例 #3
0
    def __init__(self, pre_train_emb, word_emb_type, vocab):
        super(Model, self).__init__()
        self.encoder = Encoder()
        self.decoder = Decoder()

        self.embeds = get_init_embedding(config, vocab)
        self.encoder = get_cuda(self.encoder)
        self.decoder = get_cuda(self.decoder)
        self.embeds = get_cuda(self.embeds)
コード例 #4
0
ファイル: rl_util.py プロジェクト: Leyan529/Master-Summarizer
def reward_function(decoded_sents, original_sents):
    rouge = Rouge()
    try:
        scores = rouge.get_scores(decoded_sents, original_sents)
    except Exception:
        # print("Rouge failed for multi sentence evaluation.. Finding exact pair")
        scores = []
        for i in range(len(decoded_sents)):
            try:
                score = rouge.get_scores(decoded_sents[i], original_sents[i])
            except Exception:
                # print("Error occured at:")
                # print("decoded_sents:", decoded_sents[i])
                # print("original_sents:", original_sents[i])
                score = [{"rouge-l": {"r": 0.0}}]
            scores.append(score[0])
    rewards = [score["rouge-l"]["r"] for score in scores]
    rewards = get_cuda(T.FloatTensor(rewards))
    return rewards
コード例 #5
0
    def forward(self, st_hat, h, enc_padding_mask, sum_temporal_srcs,
                sum_k_emb):
        ''' Perform attention over encoder hidden states
        :param st_hat: decoder hidden state at current time step
        :param h: encoder hidden states
        :param enc_padding_mask:
        :param sum_temporal_srcs: if using intra-temporal attention, contains summation of attention weights from previous decoder time steps
        Self Attention也经常被称为intra Attention(内部Attention)
        Source内部元素之间或者Target内部元素之间发生的Attention机制,也可以理解为Target=Source这种特殊情况下的注意力计算机制。
        其具体计算过程是一样的,只是计算对象发生了变化而已
        '''

        # Standard attention technique (eq 1 in h Pointer-Generator Networks - https://arxiv.org/pdf/1704.04368.pdf)
        # et = tanh ( W_h(h) + W_s(st_hat) )
        # print(h.shape);print(config.hidden_dim * 2, config.hidden_dim * 2)
        # print('h',h.shape)
        et = self.W_h(h)  # batch_size,n_seq,2*hid_size
        # print('et1',et.shape)
        dec_fea = self.W_s(st_hat).unsqueeze(1)  # batch_size,1,2*hid_size
        # print('dec_fea',dec_fea.shape)
        # print('h',h.shape)
        # print('st_hat',st_hat.shape)
        et = et + dec_fea  # et => incorporate h_td (hidden decoder state) & h_te (hidden encoder state)
        # print('et2',et.shape)
        if config.key_attention:
            k_t = self.W_t(sum_k_emb).unsqueeze(1)
            if k_t.shape[0] == et.shape[0]: et = et + k_t
        et = T.tanh(et)  # batch_size,b_seq_len,2*hid_size
        et = self.v(et).squeeze(2)  # batch_size,b_seq_len
        # print('et3',et.shape)
        # intra-temporal attention     (eq 3 in DEEP REINFORCED MODEL - https://arxiv.org/pdf/1705.04304.pdf)
        if config.intra_encoder:
            exp_et = T.exp(et)
            if sum_temporal_srcs is None:
                et1 = exp_et  # eq 3 if t = 1 condition
                sum_temporal_srcs = get_cuda(
                    T.FloatTensor(et.size()).fill_(1e-10)) + exp_et
            else:
                et1 = exp_et / sum_temporal_srcs  # eq 3 otherwise condition   #batch_size, b_seq_len
                sum_temporal_srcs = sum_temporal_srcs + exp_et  # 針對自己過去所有的 source attention score 加總 (self-attention)
        else:
            # (eq 2 in h Pointer-Generator Networks - https://arxiv.org/pdf/1704.04368.pdf)
            et1 = F.softmax(et, dim=1)  # et = softmax(et)
        # et1 最後加權的attention score
        # assign 0 probability for padded elements


#         print('et1',et1)
#         print('enc_padding_mask',enc_padding_mask)
# print('----------------------------')
        at = et1 * enc_padding_mask
        # torch.sum(input, dim, keepdim=False, out=None) → Tensor 返回新的张量,其中包括输入张量input中指定维度dim中每行的和。
        # 若keepdim值为True,则在输出张量中,除了被操作的dim维度值降为1,其它维度与输入张量input相同
        normalization_factor = at.sum(1, keepdim=True)
        at = at / normalization_factor  # 做 normalization 得 context vector

        at = at.unsqueeze(
            1
        )  #batch_size,1,b_seq_len          # torch.unsqueeze()这个函数主要是对数据维度进行扩充。给指定位置加上维数为一的维度
        # Compute encoder context vector
        ct_e = T.bmm(
            at, h
        )  #batch_size, 1, 2*hid_size      #  將 encoder hidden states 與 attention distribution 做矩阵乘法得 context vector
        ct_e = ct_e.squeeze(1)
        at = at.squeeze(1)  # torch.squeeze() 这个函数主要对数据的维度进行压缩,去掉维数为1的的维度
        # print('ct_e',ct_e.shape)
        # print('at',at.shape)
        # print('h',h.shape)
        # print('sum_temporal_srcs',sum_temporal_srcs.shape)
        # print('-------------------------------------------------')
        return ct_e, at, sum_temporal_srcs  # context vector , attention score , sum_temporal_srcs (value != None if self attention )