Ejemplo n.º 1
0
    def forward(self, inp: torch.FloatTensor, tgt: torch.LongTensor):
        if inp.size(0) != tgt.size(0):
            raise RuntimeError('Input and target should have the same size '
                               'in the batch dimension.')
        num_elms = 0
        entry_size = tgt.size(0)
        output = inp.new_zeros(entry_size)  # log probabilities
        gather_inds = tgt.new_empty(entry_size)  # tgt indices in head

        for i in range(self.n_clusters + 1):
            target_mask, rel_inds = \
                get_cluster_members(i, tgt, self.cutoffs, self.ent_slices)
            # members of the current cluster
            members = target_mask.nonzero().squeeze()
            if members.numel() == 0:
                continue
            if i == 0:  # Head cluster
                # Head cluster also needs to compute relative indices
                gather_inds.index_copy_(0, members, rel_inds[target_mask])
            else:  # Tail clusters including entity clusters
                cluster_index = self.cutoffs[0] + i - 1
                gather_inds.index_fill_(0, members, cluster_index)

                # Subset of input which elements should be in this cluster
                input_subset = inp.index_select(0, members)
                # Forward
                cluster_output = self.tail[i - 1](input_subset)
                cluster_logprob = F.log_softmax(cluster_output, dim=1)
                relative_target = rel_inds[target_mask]
                local_logprob = \
                    cluster_logprob.gather(1, relative_target.unsqueeze(1))
                output.index_copy_(0, members, local_logprob.squeeze(1))

            num_elms += members.numel()

        if num_elms != entry_size:
            logger.error('used_rows ({}) and batch_size ({}) does not match'
                         ''.format(num_elms, entry_size))
            raise RuntimeError("Target values should be in [0, {}], "
                               "but values in range [{}, {}] "
                               "were found. ".format(self.n_classes - 1,
                                                     tgt.min().item(),
                                                     tgt.max().item()))

        head_output = self.head(inp)
        head_logprob = F.log_softmax(head_output, dim=1)
        output += head_logprob.gather(1, gather_inds.unsqueeze(1)).squeeze()

        # return neglog
        return -output
Ejemplo n.º 2
0
def paste(background: Tensor, patch: Tensor, x: LongTensor, y: LongTensor, mask: Optional[Tensor] = None):
    """
    Pastes the given patch into the background image tensor at the specified location.
    Optionally a mask of the same size as the patch can be passed in to blend the
    pasted contents with the background.

    :param background: A batch of image tensors of shape (B, C, H, W) that represent the background
    :param patch: A batch of image tensors of shape (B, C, h, w) which values get pasted into the background
    :param x: The horizontal integer coordinates relative to the top left corner of the background image.
        This tensor must be a one-dimensional tensor of shape (B, ).
    :param y: The vertical integer coordinates relative to the top left corner of the background image.
        This tensor must be a one-dimensional tensor of shape (B, ).
    :param mask: A mask of the same size as the patch that is used to blend foreground and background values.
        It is optional and defaults to ones (all is foreground).
    :return: The composite tensor of background and foreground values of shape (B, C, H, W).

    Note:
        1.  The X- and Y-coordinates can exceed the range of the background image (negative and positive).
            The background will be dynamically padded and cropped again after pasting such that the
            contents can go over the borders of the background image.
        2.  Currently it only supports integer locations.
        3.  All tensors must be on the same device.
    """
    # background: (B, C, H, W)
    # patch, mask: (B, C, h, w)
    # x, y: (B, )
    b, c, H, W = background.shape
    _, _, h, w = patch.shape
    mask = torch.ones_like(patch) if mask is None else mask
    device = background.device
    assert b == patch.size(0) == mask.size(0)
    assert b == x.size(0) == y.size(0)
    assert c == patch.size(1) == mask.size(1)
    assert h == mask.size(-2)
    assert w == mask.size(-1)
    assert 1 == x.ndimension() == y.ndimension()
    assert device == patch.device == x.device == y.device == mask.device
    x = x.long()
    y = y.long()

    # dynamically pad background for patches that go over borders
    left = min(x.min().abs().item(), 0)
    top = min(y.min().abs().item(), 0)
    right = max(x.max().item() + w - W, 0)
    bottom = max(y.max().item() + h - H, 0)
    background = nn.functional.pad(background, pad=[left, right, top, bottom])

    # generate indices
    gridb, gridc, gridy, gridx = torch.meshgrid(
        torch.arange(b, device=device),
        torch.arange(c, device=device),
        torch.arange(h, device=device),
        torch.arange(w, device=device)
    )
    x = x.view(b, 1, 1, 1).repeat(1, c, h, w)
    y = y.view(b, 1, 1, 1).repeat(1, c, h, w)
    x = x + gridx + left
    y = y + gridy + top

    # we need to ignore negative indices, or pasted conent will be rolled to the other side
    mask = mask * (x >= 0) * (y >= 0)
    # paste
    one = torch.tensor(1, dtype=mask.dtype)
    background[(gridb, gridc, y, x)] = mask * patch + (one - mask) * background[(gridb, gridc, y, x)]
    # crop away the padded regions
    background = background[..., top:(top + H), left:(left + W)]
    return background
Ejemplo n.º 3
0
    def forward(
        self,
        x: Union[rnn.PackedSequence, torch.Tensor],
        hx: HiddenState = None,
        lengths: torch.LongTensor = None,
        enforce_sorted: bool = True,
    ) -> Tuple[Union[rnn.PackedSequence, torch.Tensor], HiddenState]:
        """
        Forward function of rnn that allows zero-length sequences.

        Functions as normal for RNN. Only changes output if lengths are defined.

        Args:
            x (Union[rnn.PackedSequence, torch.Tensor]): input to RNN. either packed sequence or tensor of
                padded sequences
            hx (HiddenState, optional): hidden state. Defaults to None.
            lengths (torch.LongTensor, optional): lengths of sequences. If not None, used to determine correct returned
                hidden state. Can contain zeros. Defaults to None.
            enforce_sorted (bool, optional): if lengths are passed, determines if RNN expects them to be sorted.
                Defaults to True.

        Returns:
            Tuple[Union[rnn.PackedSequence, torch.Tensor], HiddenState]: output and hidden state.
                Output is packed sequence if input has been a packed sequence.
        """
        if isinstance(x, rnn.PackedSequence) or lengths is None:
            assert lengths is None, "cannot combine x of type PackedSequence with lengths argument"
            return super().forward(x, hx=hx)
        else:
            min_length = lengths.min()
            max_length = lengths.max()
            assert min_length >= 0, "sequence lengths must be great equals 0"

            if max_length == 0:
                hidden_state = self.init_hidden_state(x)
                if self.batch_first:
                    out = torch.zeros(lengths.size(0),
                                      x.size(1),
                                      self.hidden_size,
                                      dtype=x.dtype,
                                      device=x.device)
                else:
                    out = torch.zeros(x.size(0),
                                      lengths.size(0),
                                      self.hidden_size,
                                      dtype=x.dtype,
                                      device=x.device)
                return out, hidden_state
            else:
                pack_lengths = lengths.where(lengths > 0,
                                             torch.ones_like(lengths))
                packed_out, hidden_state = super().forward(
                    rnn.pack_padded_sequence(x,
                                             pack_lengths.cpu(),
                                             enforce_sorted=enforce_sorted,
                                             batch_first=self.batch_first),
                    hx=hx,
                )
                # replace hidden cell with initial input if encoder_length is zero to determine correct initial state
                if min_length == 0:
                    no_encoding = (
                        lengths == 0
                    )[None, :,
                      None]  # shape: n_layers * n_directions x batch_size x hidden_size
                    if hx is None:
                        initial_hidden_state = self.init_hidden_state(x)
                    else:
                        initial_hidden_state = hx
                    # propagate initial hidden state when sequence length was 0
                    hidden_state = self.handle_no_encoding(
                        hidden_state, no_encoding, initial_hidden_state)

                # return unpacked sequence
                out, _ = rnn.pad_packed_sequence(packed_out,
                                                 batch_first=self.batch_first)
                return out, hidden_state