Ejemplo n.º 1
0
Archivo: utils.py Proyecto: neulab/lrlm
    def forward(self, input: Tensor, target: LongTensor) -> Tensor:  # type: ignore
        """
        hidden :: [len*bsz x d_proj]
        target :: [len*bsz]
        """
        input_shape = input.size()
        input = input.contiguous().view(-1, input_shape[-1])
        target = target.contiguous().view(-1)

        if input.size(0) != target.size(0):
            raise RuntimeError('Input and target should have the same size '
                               'in the batch dimension.')

        if self.n_clusters == 0:
            logits = self._compute_logits(input, self.out_layers[0].weight,
                                          self.out_layers[0].bias, self.out_projs[0])
            nll = F.nll_loss(logits, target, reduction='none')
        else:
            weights, biases = self._construct_weights()

            head_weight, head_bias = weights[0], biases[0]
            head_proj = self.out_projs[0] if len(self.out_projs) > 0 else None

            head_logits = self._compute_logits(input, head_weight, head_bias, head_proj)
            head_log_probs = F.log_softmax(head_logits, dim=1)

            nonzero_indices: List[torch.ByteTensor] = [
                ((target >= l) & (target < r)).nonzero().squeeze()
                for l, r in zip(self.cutoffs[:-1], self.cutoffs[1:])
            ]
            head_indices: LongTensor = target.clone()
            for idx, indices in enumerate(nonzero_indices):
                if indices.numel() == 0:
                    continue
                index = self.shortlist_size + self.n_clusters - 1 - idx
                head_indices.index_fill_(0, indices, index)

            head_nll = F.nll_loss(head_log_probs, head_indices, reduction='none')

            for idx, indices in enumerate(nonzero_indices):
                if indices.numel() == 0:
                    continue

                weight_i, bias_i = weights[idx + 1], biases[idx + 1]
                proj_i = self.out_projs[idx + 1] if len(self.out_projs) > idx + 1 else None

                cluster_hidden = input.index_select(0, indices)
                cluster_target = target.index_select(0, indices) - self.cutoffs[idx]

                cluster_logits = self._compute_logits(cluster_hidden, weight_i, bias_i, proj_i)
                cluster_nll = F.cross_entropy(cluster_logits, cluster_target, reduction='none')

                tail_nll = torch.zeros_like(head_nll)
                tail_nll.index_copy_(0, indices, cluster_nll)
                head_nll = head_nll + tail_nll

            nll = head_nll

        nll = nll.view(input_shape[:-1])
        return nll
Ejemplo n.º 2
0
 def _get_loss(logits: torch.LongTensor, targets: torch.LongTensor,
               target_mask: torch.LongTensor) -> torch.Tensor:
     relevant_targets = targets[:, 1:].contiguous()
     relevant_mask = target_mask[:, 1:].contiguous()  # bs * decoding_step
     # return my_sequence_cross_entropy_with_logits(logits.contiguous(), relevant_targets, relevant_mask)
     return util.sequence_cross_entropy_with_logits(logits.contiguous(),
                                                    relevant_targets,
                                                    relevant_mask)
Ejemplo n.º 3
0
 def wrap(b: torch.LongTensor):
     if b is None:
         return b
     if len(b.size()) > 1 and isinstance(b, list):
         b = torch.stack(b, 0)
     b = b.contiguous()
     if self.cuda:
         b = b.cuda()
     b = Variable(b, volatile=self.volatile, requires_grad=False)
     return b
Ejemplo n.º 4
0
    def _get_loss(
            logits: torch.FloatTensor, targets: torch.LongTensor, target_mask: torch.FloatTensor
    ) -> torch.Tensor:
        logits = logits.contiguous()
        # shape: (batch_size, num_decoding_steps)
        relevant_targets = targets.contiguous()

        # shape: (batch_size, num_decoding_steps)
        relevant_mask = target_mask.contiguous()

        return util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask)
Ejemplo n.º 5
0
    def _get_loss_custom(logits: torch.LongTensor,
                         targets: torch.LongTensor,
                         target_mask: torch.LongTensor,
                         training: bool = True) -> torch.LongTensor:
        """
        As opposed to get_loss, logits and targets are of same size
        """
        relevant_targets = targets.contiguous(
        )  # (batch_size, num_decoding_steps)
        relevant_mask = target_mask.contiguous(
        )  # (batch_size, num_decoding_steps)
        # loss = util.sequence_cross_entropy_with_logits(logits, relevant_targets, relevant_mask)

        if training:
            loss = sequence_cross_entropy_with_logits(logits, relevant_targets,
                                                      relevant_mask)
        else:
            loss = sequence_cross_entropy_with_logits(logits,
                                                      relevant_targets,
                                                      relevant_mask,
                                                      average=None)

        return loss
Ejemplo n.º 6
0
    def get_rseq(self, rel: torch.LongTensor, tem: torch.LongTensor):

        r_e = self.embedding['rel'](rel)
        r_e = r_e.unsqueeze(0).transpose(0, 1)

        bs = tem.size(0)
        tem_len = tem.size(1)
        tem = tem.contiguous()
        tem = tem.view(bs * tem_len)

        token_e = self.embedding['tem'](tem)
        token_e = token_e.view(bs, tem_len, self.emb_dim)
        seq_e = torch.cat((r_e, token_e), 1)

        hidden_tem = self.lstm(seq_e)
        hidden_tem = hidden_tem[0, :, :]
        rseq_e = hidden_tem

        return rseq_e
Ejemplo n.º 7
0
    def _get_loss(self, scores: torch.Tensor, targets: torch.LongTensor,
                  generate_mask: torch.LongTensor, copy_mask: torch.LongTensor,
                  target_mask: torch.LongTensor) -> torch.Tensor:
        """
        :param scores:  (batch_size, decode_length, num_class + encode_length)
        :param targets: (batch_size, decode_length + 1)
        :param generate_mask: (batch_size, decode_length + 1), where 1.0 indicates the target word is selected from target
                              vocabulary, 0.0 indicates the target is copied from entity candidates
        :param copy_mask:     (batch_size, decode_length + 1, encode_length), where 1.0 indicates that the target word
                              is copied from this source word
        :param target_mask:   (batch_size, decode_length)
        :return:
        """
        batch_size, decode_length, _ = scores.size()
        # (batch_size, decode_length, num_class)
        generate_scores = scores[:, :, :self._num_classes]
        # (batch_size, decode_length, encode_length)
        copy_scores = scores[:, :, self._num_classes:]

        # shape: (batch_size * decode_length, 1)
        relevant_targets = targets[:, 1:].contiguous().view(-1, 1)
        target_generate_scores = torch.gather(generate_scores.view(
            -1, self._num_classes),
                                              dim=1,
                                              index=relevant_targets)
        target_scores = target_generate_scores.view(batch_size, decode_length)

        target_scores = target_scores * generate_mask[:, 1:]

        target_scores += (copy_scores *
                          copy_mask[:, 1:, :].float()).sum(dim=-1)

        # shape: (batch_size, decode_length)
        relevant_mask = target_mask.contiguous().float()
        loss = -target_scores.log() * relevant_mask
        loss = loss.sum(dim=-1) / relevant_mask.sum(dim=-1)
        loss = loss.sum() / batch_size
        return loss