Exemple #1
0
    def forward(
            self,
            theta: torch.Tensor,  # (N, D)
            x: torch.Tensor,  # (N, *)
            mask: torch.BoolTensor = None,  # (D,)
    ) -> torch.Tensor:
        if mask is None:
            mask = self.default
        elif self.hyper is not None:
            self.hyper(self.net, mask * 2. - 1.)

        if mask.dim() == 1 and theta.size(-1) < mask.numel():
            blank = theta.new_zeros(theta.shape[:-1] + mask.shape)
            blank[..., mask] = theta
            theta = blank
        elif mask.dim() > 1 and theta.shape != mask.shape:
            batch_shape = theta.shape[:-1]
            stack_shape = batch_shape + mask.shape[:-1]
            view_shape = batch_shape + (1, ) * (mask.dim() - 1)

            theta = theta.view(view_shape +
                               theta.shape[-1:]).expand(stack_shape +
                                                        theta.shape[-1:])
            x = x.view(view_shape + x.shape[-1:]).expand(stack_shape +
                                                         x.shape[-1:])

        theta = self.standardize(theta) * mask

        if self.hyper is None:
            theta = torch.cat(torch.broadcast_tensors(theta, mask * 2. - 1.),
                              dim=-1)

        return self.net(theta, x)
Exemple #2
0
def replace_masked_values(
    tensor: torch.Tensor, mask: torch.BoolTensor, replace_with: float
) -> torch.Tensor:
    if tensor.dim() != mask.dim():
        raise ConfigurationError(
            "tensor.dim() (%d) != mask.dim() (%d)" % (tensor.dim(), mask.dim())
        )
    return tensor.masked_fill(~mask, replace_with)
Exemple #3
0
def masked_log_softmax(vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1) -> torch.Tensor:
    """
    `torch.nn.functional.log_softmax(vector)` does not work if some elements of `vector` should be
    masked.  This performs a log_softmax on just the non-masked portions of `vector`.  Passing
    `None` in for the mask is also acceptable; you'll just get a regular log_softmax.
    `vector` can have an arbitrary number of dimensions; the only requirement is that `mask` is
    broadcastable to `vector's` shape.  If `mask` has fewer dimensions than `vector`, we will
    unsqueeze on dimension 1 until they match.  If you need a different unsqueezing of your mask,
    do it yourself before passing the mask into this function.
    In the case that the input vector is completely masked, the return value of this function is
    arbitrary, but not `nan`.  You should be masking the result of whatever computation comes out
    of this in that case, anyway, so the specific values returned shouldn't matter.  Also, the way
    that we deal with this case relies on having single-precision floats; mixing half-precision
    floats with fully-masked vectors will likely give you `nans`.
    If your logits are all extremely negative (i.e., the max value in your logit vector is -50 or
    lower), the way we handle masking here could mess you up.  But if you've got logit values that
    extreme, you've got bigger problems than this.
    """
    if mask is not None:
        while mask.dim() < vector.dim():
            mask = mask.unsqueeze(1)
        # vector + mask.log() is an easy way to zero out masked elements in logspace, but it
        # results in nans when the whole vector is masked.  We need a very small value instead of a
        # zero in the mask for these cases.
        vector = vector + (mask + 1e-30).log()
    return torch.nn.functional.log_softmax(vector, dim=dim)
    def forward(self,
                features,
                hidden,
                is_start: torch.BoolTensor,
                additional=None):
        '''
        Forward pass with sanity check.

        Args:
            features: list of tensor. Current feature map.
            hidden: list of tensor or None. The previous hidden state.
            is_start: a batch of bool tensors indicating if the input x is the \
                start of a video.
        '''
        assert is_start.dim() == 1 and is_start.dtype == torch.bool

        if hidden is None:
            hidden = [
                torch.zeros_like(features[i]) for i in range(self.num_levels)
            ]
        else:
            assert len(features) == len(hidden) == self.num_levels
            # if any image in the batch is a start of a video,
            # reset the corresponding hidden state
            if is_start.any():
                for level_hid in hidden:
                    assert level_hid.shape[0] == len(is_start)
                    level_hid[is_start].zero_()

        fused = self.fuse(features, hidden, additional)
        return fused
Exemple #5
0
def replace_masked_values(tensor: torch.Tensor, mask: torch.BoolTensor,
                          replace_with: float) -> torch.Tensor:
    """
    # Reference : allennlp

    Replaces all masked values in `tensor` with `replace_with`.  `mask` must be broadcastable
    to the same shape as `tensor`. We require that `tensor.dim() == mask.dim()`, as otherwise we
    won't know which dimensions of the mask to unsqueeze.

    This just does `tensor.masked_fill()`, except the pytorch method fills in things with a mask
    value of 1, where we want the opposite.  You can do this in your own code with
    `tensor.masked_fill(~mask, replace_with)`.
    """
    if tensor.dim() != mask.dim():
        raise ConfigurationError("tensor.dim() (%d) != mask.dim() (%d)" %
                                 (tensor.dim(), mask.dim()))
    return tensor.masked_fill(~mask, replace_with)
Exemple #6
0
def masked_softmax(
    vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1, memory_efficient: bool = False,
) -> torch.Tensor:
    if mask is None:
        result = torch.nn.functional.softmax(vector, dim=dim)
    else:
        while mask.dim() < vector.dim():
            mask = mask.unsqueeze(1)
        if not memory_efficient:
            result = torch.nn.functional.softmax(vector * mask, dim=dim)
            result = result * mask
            result = result / (
                result.sum(dim=dim, keepdim=True) + tiny_value_of_dtype(result.dtype)
            )
        else:
            masked_vector = vector.masked_fill(~mask, min_value_of_dtype(vector.dtype))
            result = torch.nn.functional.softmax(masked_vector, dim=dim)
    return result
Exemple #7
0
def masked_softmax(
    vector: torch.Tensor,
    mask: torch.BoolTensor,
    dim: int = -1,
    memory_efficient: bool = False,
) -> torch.Tensor:
    """
    `torch.nn.functional.softmax(vector)` does not work if some elements of `vector` should be
    masked.  This performs a softmax on just the non-masked portions of `vector`.  Passing
    `None` in for the mask is also acceptable; you'll just get a regular softmax.

    `vector` can have an arbitrary number of dimensions; the only requirement is that `mask` is
    broadcastable to `vector's` shape.  If `mask` has fewer dimensions than `vector`, we will
    unsqueeze on dimension 1 until they match.  If you need a different unsqueezing of your mask,
    do it yourself before passing the mask into this function.

    If `memory_efficient` is set to true, we will simply use a very large negative number for those
    masked positions so that the probabilities of those positions would be approximately 0.
    This is not accurate in math, but works for most cases and consumes less memory.

    In the case that the input vector is completely masked and `memory_efficient` is false, this function
    returns an array of `0.0`. This behavior may cause `NaN` if this is used as the last layer of
    a model that uses categorical cross-entropy loss. Instead, if `memory_efficient` is true, this function
    will treat every element as equal, and do softmax over equal numbers.
    """
    if mask is None:
        result = torch.nn.functional.softmax(vector, dim=dim)
    else:
        while mask.dim() < vector.dim():
            mask = mask.unsqueeze(1)
        if not memory_efficient:
            # To limit numerical errors from large vector elements outside the mask, we zero these out.
            result = torch.nn.functional.softmax(vector * mask, dim=dim)
            result = result * mask
            result = result / (
                result.sum(dim=dim, keepdim=True) + tiny_value_of_dtype(result.dtype)
            )
        else:
            masked_vector = vector.masked_fill(~mask, min_value_of_dtype(vector.dtype))
            result = torch.nn.functional.softmax(masked_vector, dim=dim)
    return result
Exemple #8
0
    def forward(self,
                x,
                is_start: torch.BoolTensor = None,
                labels: List[ImageObjects] = None):
        '''
        Forward pass

        Args:
            x: a batch of images, e.g. shape(8,3,608,608)
            is_start: a batch of bool tensors indicating if the input x is the \
                start of a video.
            labels: a batch of ground truth
        '''
        assert x.dim() == 4
        if is_start is None:
            is_start = torch.zeros(x.shape[0],
                                   dtype=torch.bool,
                                   device=x.device)
        assert is_start.dim() == 1 and is_start.shape[0] == x.shape[0]
        self.img_size = x.shape[2:4]

        _hidden = {}
        # backbone
        features = self.backbone(x)
        if 'backbone' in self.hid_names:
            _hidden['backbone'] = [f.detach().clone() for f in features]

        # feature fusion
        features = self.fpn(features)
        if 'fpn' in self.hid_names:
            _hidden['fpn'] = [f.detach().clone() for f in features]

        # feature aggregation
        features = self.agg(features, self.hidden, is_start)

        # raw prediction
        all_branch_preds = self.rpn(features)
        if 'raw_pred' in self.hid_names:
            pred_copy = []
            for level_pred in all_branch_preds:
                _copy = dict([(k, v.detach().clone())
                              for k, v in level_pred.items()])
                pred_copy.append(_copy)
            _hidden['raw_pred'] = pred_copy

        # final prediction layer
        dts_all = []
        losses_all = []
        for i, raw_preds in enumerate(all_branch_preds):
            dts, loss = self.det_layers[i](raw_preds, self.img_size, labels)
            dts_all.append(dts)
            losses_all.append(loss)

        # merge the predictions from all feature levels
        batch_bbs = torch.cat([d['bbox'] for d in dts_all], dim=1).detach()
        batch_cls_idx = torch.cat([d['class_idx'] for d in dts_all],
                                  dim=1).detach()
        batch_scores = torch.cat([d['score'] for d in dts_all], dim=1).detach()
        batch_pred_objects = []
        # iterate over every image in the batch
        for bbs, cls_idx, scores in zip(batch_bbs, batch_cls_idx,
                                        batch_scores):
            # initialize the pred objects in current image
            p_objs = ImageObjects(bboxes=bbs,
                                  cats=cls_idx,
                                  scores=scores,
                                  bb_format=self.bb_format,
                                  img_hw=self.img_size)
            batch_pred_objects.append(p_objs)
        if 'final_pred' in self.hid_names:
            _hidden['final_pred'] = batch_pred_objects

        self.hidden = _hidden

        if labels is None:
            return batch_pred_objects

        if self.check_gt_assignment:
            total_gt_num = sum([len(t) for t in labels])
            assigned = sum(branch._assigned_num for branch in self.det_layers)
            assert assigned == total_gt_num, f'{assigned} != {total_gt_num}'
        self.loss_str = ''
        for m in self.det_layers:
            self.loss_str += m.loss_str + '\n'
        loss = sum(losses_all)
        return loss
Exemple #9
0
def masked_log_softmax(vector: torch.Tensor, mask: torch.BoolTensor, dim: int = -1) -> torch.Tensor:
    if mask is not None:
        while mask.dim() < vector.dim():
            mask = mask.unsqueeze(1)
        vector = vector + (mask + tiny_value_of_dtype(vector.dtype)).log()
    return torch.nn.functional.log_softmax(vector, dim=dim)
Exemple #10
0
    def _viterbi_decode(
        self, emissions: torch.FloatTensor, mask: torch.BoolTensor
    ) -> List[List[int]]:
        # emissions: (seq_length, batch_size, num_tags)
        # mask: (seq_length, batch_size)
        assert emissions.dim() == 3 and mask.dim() == 2
        assert emissions.shape[:2] == mask.shape
        assert emissions.size(2) == self.num_tags
        assert mask[0].all()

        seq_length, batch_size = mask.shape

        # Start transition and first emission
        # shape: (batch_size, num_tags)
        score = self.start_transitions + emissions[0]
        history = []

        # score is a tensor of size (batch_size, num_tags) where for every batch,
        # value at column j stores the score of the best tag sequence so far that ends
        # with tag j
        # history saves where the best tags candidate transitioned from; this is used
        # when we trace back the best tag sequence

        # Viterbi algorithm recursive case: we compute the score of the best tag sequence
        # for every possible next tag
        for i in range(1, seq_length):
            # Broadcast viterbi score for every possible next tag
            # shape: (batch_size, num_tags, 1)
            broadcast_score = score.unsqueeze(2)

            # Broadcast emission score for every possible current tag
            # shape: (batch_size, 1, num_tags)
            broadcast_emission = emissions[i].unsqueeze(1)

            # Compute the score tensor of size (batch_size, num_tags, num_tags) where
            # for each sample, entry at row i and column j stores the score of the best
            # tag sequence so far that ends with transitioning from tag i to tag j and emitting
            # shape: (batch_size, num_tags, num_tags)
            next_score = broadcast_score + self.transitions + broadcast_emission

            # Find the maximum score over all possible current tag
            # shape: (batch_size, num_tags)
            next_score, indices = next_score.max(dim=1)

            # Set score to the next score if this timestep is valid (mask == 1)
            # and save the index that produces the next score
            # shape: (batch_size, num_tags)
            score = torch.where(mask[i].unsqueeze(1), next_score, score)
            history.append(indices)

        # End transition score
        # shape: (batch_size, num_tags)
        score += self.end_transitions

        # Now, compute the best path for each sample

        # shape: (batch_size,)
        seq_ends = mask.long().sum(dim=0) - 1
        best_tags_list = []

        for idx in range(batch_size):
            # Find the tag which maximizes the score at the last timestep; this is our best tag
            # for the last timestep
            _, best_last_tag = score[idx].max(dim=0)
            best_tags = [best_last_tag.item()]

            # We trace back where the best last tag comes from, append that to our best tag
            # sequence, and trace it back again, and so on
            for hist in reversed(history[: seq_ends[idx]]):
                best_last_tag = hist[idx][best_tags[-1]]
                best_tags.append(best_last_tag.item())

            # Reverse the order because we start from the last timestep
            best_tags.reverse()
            best_tags_list.append(best_tags)

        return best_tags_list