def zero_state(self, inputs, encoder):
     batch_size = inputs.size(0)
     h0 = Variable(torch.zeros(encoder.num_layers * 2, batch_size,
                               self.enc_hidden_dim),
                   requires_grad=False)
     c0 = Variable(torch.zeros(encoder.num_layers * 2, batch_size,
                               self.enc_hidden_dim),
                   requires_grad=False)
     return set_cuda(h0, self.use_cuda), set_cuda(c0, self.use_cuda)
Beispiel #2
0
    def unpack_batch(cls, batch, cuda):

        words = set_cuda(get_long_tensor(batch.word, batch.batch_size), cuda)
        masks = set_cuda(torch.eq(words, 0), cuda)
        pos = set_cuda(get_long_tensor(batch.pos, batch.batch_size), cuda)
        ner = set_cuda(get_long_tensor(batch.ner, batch.batch_size), cuda)
        coref = set_cuda(get_long_tensor(batch.coref, batch.batch_size), cuda)
        ucca_enc = set_cuda(get_long_tensor(batch.ucca_enc, batch.batch_size),
                            cuda)

        rel = set_cuda(torch.LongTensor(batch.rel), cuda)

        input = Input(batch_size=batch.batch_size,
                      word=words,
                      mask=masks,
                      pos=pos,
                      ner=ner,
                      coref=coref,
                      ucca_enc=ucca_enc,
                      len=batch.len,
                      head=batch.head,
                      ucca_head=batch.ucca_head,
                      ucca_multi_head=batch.ucca_multi_head,
                      ucca_dist_from_mh_path=batch.ucca_dist_from_mh_path,
                      subj_p=batch.subj_p,
                      obj_p=batch.obj_p,
                      id=batch.id,
                      orig_idx=batch.orig_idx)

        return input, rel
 def dec_zero_state(self, batch_size):
     h0 = Variable(torch.zeros(batch_size, self.dec_hidden_dim),
                   requires_grad=False)
     c0 = Variable(torch.zeros(batch_size, self.dec_hidden_dim),
                   requires_grad=False)
     return set_cuda(h0, self.use_cuda), set_cuda(c0, self.use_cuda)
    def decode(self,
               dec_inputs,
               dec_hidden,
               ctx,
               ctx_tokens,
               ctx_mask=None,
               indi_h=None,
               inference=False):

        batch_size = dec_inputs.size(0)
        seq_len = dec_inputs.size(1)
        ctx_len = ctx_tokens.size(1)
        copy_indices = set_cuda(
            torch.LongTensor(batch_size).zero_() + -1,
            self.use_cuda)  # by default do not hard copy

        if self.use_indi:
            dec_inputs_with_indi = torch.cat([
                dec_inputs,
                indi_h.unsqueeze(1).expand(-1, dec_inputs.size(1), -1)
            ],
                                             dim=2)
        else:
            dec_inputs_with_indi = dec_inputs  # no indi
        h, h_tilde, h_c, attn, cov, dec_hidden = self.decoder(
            dec_inputs_with_indi, dec_hidden, ctx, ctx_mask)
        h_tilde_flat = h_tilde.contiguous().view(-1,
                                                 h_tilde.size(2))  # B*L x dim
        decoder_logits = self.dec2vocab(h_tilde_flat).view(
            h_tilde.size(0), h_tilde.size(1), -1)
        decoder_logits[:, :, constant.PAD_ID] = -constant.INFINITY_NUMBER
        decoder_logits[:, :, constant.UNK_ID] = -constant.INFINITY_NUMBER
        decoder_probs = self.get_prob(decoder_logits)  # [B, QT, V]
        decoder_probs = torch.log(decoder_probs + 1e-12)

        copier_probs = attn
        copier_probs = torch.log(copier_probs + 1e-12)

        h_flat = h.contiguous().view(-1, h.size(2))
        h_c_flat = h_c.contiguous().view(-1, h_c.size(2))
        dec_inputs_flat = dec_inputs.contiguous().view(-1, dec_inputs.size(2))
        c = self.combiner(torch.cat([h_flat, h_c_flat, dec_inputs_flat],
                                    dim=1)).view(batch_size, seq_len,
                                                 -1)  # [B, QT, 2]
        cpy_prob = c[:, :, 0]
        dec_prob = c[:, :, 1]

        if inference:
            assert cpy_prob.size(
                1
            ) == seq_len == 1, "Inference mode has to decode one step at a time."
            use_cpy = (cpy_prob > dec_prob).float()
            cpy_prob = use_cpy
            dec_prob = 1.0 - use_cpy
            # find copy indices
            _, copy_indices = copier_probs.squeeze(1).max(dim=1)
            for i, c in enumerate(use_cpy.squeeze(1).data):
                if c == 0:
                    copy_indices[i] = -1  # do not hard copy
            copy_indices = copy_indices.data

        expanded_cpy_prob = cpy_prob.unsqueeze(2).expand(
            batch_size, seq_len, ctx_len)
        expanded_dec_prob = dec_prob.unsqueeze(2).expand(
            batch_size, seq_len, self.vocab_size)

        full_copier_probs = set_cuda(Variable(torch.zeros(batch_size, seq_len, self.vocab_size) + -1e10), \
                self.use_cuda)
        expanded_ctx_tokens = ctx_tokens.unsqueeze(1).expand_as(copier_probs)
        combined_copier_probs = copier_probs + expanded_cpy_prob  # combine before scatter
        full_copier_probs.scatter_(2, expanded_ctx_tokens,
                                   combined_copier_probs)  # scatter info back

        combined_probs = torch.exp(full_copier_probs) + torch.exp(
            expanded_dec_prob + decoder_probs)
        log_probs = torch.log(combined_probs)

        if inference:
            return log_probs, dec_hidden, copy_indices
        return log_probs, dec_hidden, attn, cov
Beispiel #5
0
    def decode(self,
               dec_inputs,
               dec_hidden,
               ctx,
               ctx_tokens,
               ctx_mask=None,
               bg_h=None,
               inference=False):
        """
        Every input should be batch first.
        For inference mode, the output will be decided to be either copier prob or decoder prob.
        """
        batch_size = dec_inputs.size(0)
        seq_len = dec_inputs.size(1)
        ctx_len = ctx_tokens.size(1)
        copy_indices = set_cuda(
            torch.LongTensor(batch_size).zero_() + -1,
            self.use_cuda)  # by default do not hard copy

        if self.use_bg:
            dec_inputs_with_bg = torch.cat([
                dec_inputs,
                bg_h.unsqueeze(1).expand(-1, dec_inputs.size(1), -1)
            ],
                                           dim=2)
        else:
            dec_inputs_with_bg = dec_inputs  # no bg
        # attentional decoder
        h, h_tilde, h_c, attn, cov, dec_hidden = self.decoder(
            dec_inputs_with_bg, dec_hidden, ctx, ctx_mask)
        # vocab prediction layer
        h_tilde_flat = h_tilde.contiguous().view(-1,
                                                 h_tilde.size(2))  # B*L x dim
        decoder_logits = self.dec2vocab(h_tilde_flat).view(
            h_tilde.size(0), h_tilde.size(1), -1)
        # force PAD and UNK logits to -inf
        decoder_logits[:, :, constant.PAD_ID] = -constant.INFINITY_NUMBER
        decoder_logits[:, :, constant.UNK_ID] = -constant.INFINITY_NUMBER
        decoder_probs = self.get_prob(decoder_logits)  # [B, QT, V]
        decoder_probs = torch.log(decoder_probs + 1e-12)

        # copy network
        copier_probs = attn
        copier_probs = torch.log(copier_probs + 1e-12)
        # [B, QT, CT]

        # combine
        h_flat = h.contiguous().view(-1, h.size(2))
        h_c_flat = h_c.contiguous().view(-1, h_c.size(2))
        dec_inputs_flat = dec_inputs.contiguous().view(-1, dec_inputs.size(2))
        c = self.combiner(torch.cat([h_flat, h_c_flat, dec_inputs_flat],
                                    dim=1)).view(batch_size, seq_len,
                                                 -1)  # [B, QT, 2]
        cpy_prob = c[:, :, 0]
        dec_prob = c[:, :, 1]

        # if inference=True, do hard selection
        if inference:
            assert cpy_prob.size(
                1
            ) == seq_len == 1, "Inference mode has to decode one step at a time."
            use_cpy = (cpy_prob > dec_prob).float()
            cpy_prob = use_cpy
            dec_prob = 1.0 - use_cpy
            # find copy indices
            _, copy_indices = copier_probs.squeeze(1).max(dim=1)
            for i, c in enumerate(use_cpy.squeeze(1).data):
                if c == 0:
                    copy_indices[i] = -1  # do not hard copy
            copy_indices = copy_indices.data

        expanded_cpy_prob = cpy_prob.unsqueeze(2).expand(
            batch_size, seq_len, ctx_len)
        expanded_dec_prob = dec_prob.unsqueeze(2).expand(
            batch_size, seq_len, self.vocab_size)

        # scatter probs back
        full_copier_probs = set_cuda(Variable(torch.zeros(batch_size, seq_len, self.vocab_size) + -1e10), \
                self.use_cuda)
        expanded_ctx_tokens = ctx_tokens.unsqueeze(1).expand_as(copier_probs)
        combined_copier_probs = copier_probs + expanded_cpy_prob  # combine before scatter
        full_copier_probs.scatter_(2, expanded_ctx_tokens,
                                   combined_copier_probs)  # scatter info back

        # combine in log space
        combined_probs = torch.exp(full_copier_probs) + torch.exp(
            expanded_dec_prob + decoder_probs)
        log_probs = torch.log(combined_probs)

        # output dec_hidden for future decoding
        if inference:
            return log_probs, dec_hidden, copy_indices
        return log_probs, dec_hidden, attn, cov
Beispiel #6
0
    def decode(self,
               dec_inputs,
               dec_hidden,
               ctx,
               ctx_tokens,
               ctx_mask=None,
               inference=False):

        batch_size = dec_inputs.size(0)
        seq_len = dec_inputs.size(1)
        ctx_len = ctx_tokens.size(1)
        copy_indices = set_cuda(
            torch.LongTensor(batch_size).zero_() + -1,
            self.use_cuda)  # by default do not hard copy

        # attentional decoder
        h, h_tilde, h_c, attn, cov, dec_hidden = self.decoder(
            dec_inputs, dec_hidden, ctx, ctx_mask)

        # vocab prediction layer
        h_tilde_flat = h_tilde.contiguous().view(-1,
                                                 h_tilde.size(2))  # B*L x dim
        decoder_logits = self.to_vocab(h_tilde_flat).view(
            h_tilde.size(0), h_tilde.size(1), -1)

        # force PAD and UNK logits to -inf
        decoder_logits[:, :, constant.PAD_ID] = -constant.INFINITY_NUMBER
        decoder_logits[:, :, constant.UNK_ID] = -constant.INFINITY_NUMBER
        decoder_probs = self.get_prob(decoder_logits)  # [B, QT, V]
        decoder_probs = torch.log(decoder_probs + 1e-12)

        # copy network
        copier_probs = attn
        copier_probs = torch.log(copier_probs + 1e-12)
        # [B, QT, CT]

        # combine all of the outputs
        h_flat = h.contiguous().view(-1, h.size(2))
        h_c_flat = h_c.contiguous().view(-1, h_c.size(2))
        dec_inputs_flat = dec_inputs.contiguous().view(-1, dec_inputs.size(2))
        c = self.sequential(
            torch.cat([h_flat, h_c_flat, dec_inputs_flat],
                      dim=1)).view(batch_size, seq_len, -1)  # [B, QT, 2]
        cpy_prob = c[:, :, 0]
        dec_prob = c[:, :, 1]

        if inference:
            use_cpy = (cpy_prob > dec_prob).float()
            cpy_prob = use_cpy
            dec_prob = 1.0 - use_cpy
            _, copy_indices = copier_probs.squeeze(1).max(dim=1)
            for i, c in enumerate(use_cpy.squeeze(1).data):
                if c == 0:
                    copy_indices[i] = -1
            copy_indices = copy_indices.data

        expanded_cpy_prob = cpy_prob.unsqueeze(2).expand(
            batch_size, seq_len, ctx_len)
        expanded_dec_prob = dec_prob.unsqueeze(2).expand(
            batch_size, seq_len, self.vocab_size)

        full_copier_probs = set_cuda(Variable(torch.zeros(batch_size, seq_len, self.vocab_size) + -1e10), \
                self.use_cuda)
        expanded_ctx_tokens = ctx_tokens.unsqueeze(1).expand_as(copier_probs)
        combined_copier_probs = copier_probs + expanded_cpy_prob  # combine before scatter
        full_copier_probs.scatter_(2, expanded_ctx_tokens,
                                   combined_copier_probs)  # scatter info back

        combined_probs = torch.exp(full_copier_probs) + torch.exp(
            expanded_dec_prob + decoder_probs)
        log_probs = torch.log(combined_probs)

        # output dec_hidden for next step(s) decoding
        if inference:
            return log_probs, dec_hidden, copy_indices
        return log_probs, dec_hidden, attn, cov