Ejemplo n.º 1
0
    def update(self, pred: torch.Tensor, target: torch.Tensor):
        """
        Update state with predictions and targets.

        Args:
            preds: Predictions from model
            target: Ground truth values
        """
        # preds, target = _input_format(self.num_classes, preds, target, self.threshold, self.multilabel)
        check_is_tensor(pred, "pred")
        check_is_tensor(target, "target")
        check_ndim_match(pred, target, "pred", "target")
        check_dimension(pred, -1, 6, "pred")
        check_dimension(target, -1, 5, "pred")

        # restrict the number of predicted boxes to the top K highest confidence boxes
        if self.pred_box_limit is not None and pred.shape[
                -2] > self.pred_box_limit:
            indices = pred[..., -2].argsort()
            pred = pred[indices, ...]
            assert pred.shape[-2] <= self.pred_box_limit

        # restrict pred and target to class of interest
        if self.pos_label is not None:
            pred_keep = pred[..., -1] == self.pos_label
            pred = pred[pred_keep]
            target_keep = target[..., -1] == self.pos_label
            target = target[target_keep]

        pred_score, target_class, binary_target = self.get_pred_target_pairs(
            pred, target)
        self.pred_score = torch.cat([self.pred_score, pred_score])
        self.target_class = torch.cat([self.target_class, target_class])
        self.binary_target = torch.cat([self.binary_target, binary_target])
Ejemplo n.º 2
0
    def create_regression_target(bbox: Tensor, stride: int,
                                 size_target: Tuple[int, int]) -> Tensor:
        r"""Given a set of anchor boxes, creates regression targets each anchor box.
        Each location in the resultant target gives the distance from that location to the
        left, top, right, and bottom of the ground truth anchor box (in that order).

        Args:
            bbox (:class:`torch.Tensor`):
                Ground truth anchor boxes in form :math:`x_1, y_1, x_2, y_2`.

            stride (int):
                Stride at the FPN level for which the target is being created

        Shapes:
            * ``bbox`` - :math:`(*, N, 4)`
            * Output - :math:`(*, N, 2)`, :math:`(*, N, 4)`
        """
        check_is_tensor(bbox, "bbox")
        check_dimension(bbox, -1, 4, "bbox")

        # create starting grid

        num_boxes = bbox.shape[-2]
        height, width = size_target[0], size_target[1]
        grid = FCOSLoss.coordinate_grid(height,
                                        width,
                                        stride,
                                        indexing="xy",
                                        device=bbox.device)
        grid = grid.unsqueeze_(0).repeat(num_boxes, 2, 1, 1)

        # compute distance to box edges relative to each grid location
        grid.sub_(bbox[..., None, None]).abs_()
        return grid
Ejemplo n.º 3
0
def complete_iou_loss(inputs: Tensor, targets: Tensor, reduction: str = "mean") -> Tensor:
    # validation
    check_is_tensor(inputs, "inputs")
    check_is_tensor(targets, "targets")
    check_dimension(inputs, -1, 4, "inputs")
    check_dimension(targets, -1, 4, "targets")
    check_shapes_match(inputs, targets, "inputs", "targets")
    inputs = inputs.float()
    targets = targets.float()

    # compute euclidean distance between pred and true box centers
    pred_size = inputs[..., 2:] - inputs[..., :2]
    target_size = targets[..., 2:] - targets[..., :2]
    pred_center = pred_size.div(2).add(inputs[..., :2])
    target_center = target_size.div(2).add(targets[..., :2])
    euclidean_dist_squared = (pred_center - target_center).pow(2).sum(dim=-1)

    # compute c, the diagonal length of smallest box enclosing pred and true
    min_coords = torch.min(inputs[..., :2], targets[..., :2])
    max_coords = torch.max(inputs[..., 2:], targets[..., 2:])
    c_squared = (max_coords - min_coords).pow(2).sum(dim=-1)

    # compute diou
    diou = euclidean_dist_squared / c_squared

    # compute vanilla IoU
    pred_area = pred_size[..., 0] * pred_size[..., 1]
    target_area = target_size[..., 0] * target_size[..., 1]
    lt = torch.max(inputs[..., :2], targets[..., :2])
    rb = torch.min(inputs[..., 2:], targets[..., 2:])
    wh = (rb - lt).clamp(min=0)
    inter = wh[..., 0] * wh[..., 1]
    iou = inter / (pred_area + target_area - inter).clamp_min(1e-9)

    # compute v, which measure aspect ratio consistency
    pred_w, pred_h = pred_size[..., 0], pred_size[..., 1]
    target_w, target_h = target_size[..., 0], target_size[..., 1]
    _ = torch.atan(target_w / target_h) - torch.atan(pred_w / pred_h)
    v = 4 / pi ** 2 * _.pow(2)

    # compute alpha, the tradeoff parameter
    alpha = v / ((1 - iou) + v).clamp_min(1e-5)

    # compute the final ciou loss
    loss = 1 - iou + diou + alpha * v

    if reduction == "mean":
        return loss.mean()
    elif reduction == "sum":
        return loss.sum()
    elif reduction == "none":
        return loss
    else:
        raise ValueError(f"Unknown reduction {reduction}")
Ejemplo n.º 4
0
    def compute_centerness_targets(reg_targets: Tensor) -> Tensor:
        r"""Computes centerness targets given regression targets.

        Under FCOS, a target regression map is created for each FPN level. Any map location
        that lies within a ground truth bounding box is assigned a regression target based on
        the left, right, top, and bottom distance from that location to the edges of the ground
        truth box.

        .. image:: ./fcos_target.png
            :width: 200px
            :align: center
            :height: 600px
            :alt: FCOS Centerness Target

        For each of these locations with regression targets :math:`l^*, r^*, t^*, b^*`,
        a "centerness" target is created as follows:

        .. math::
            centerness = \sqrt{\frac{\min(l^*, r*^}{\max(l^*, r*^} \times \frac{\min(t^*, b*^}{\max(t^*, b*^}}

        Args:
            reg_targets (:class:`torch.Tensor`):
                Ground truth regression featuremap in form :math:`x_1, y_1, x_2, y_2`.

        Shapes:
            * ``reg_targets`` - :math:`(..., 4)`
            * Output - :math:`(..., 1)`
        """
        check_is_tensor(reg_targets, "reg_targets")
        check_dimension(reg_targets, -1, 4, "reg_targets")

        left_right = reg_targets[..., (0, 2)].float()
        top_bottom = reg_targets[..., (1, 3)].float()

        lr_min = left_right.amin(dim=-1).clamp_min_(0)
        lr_max = left_right.amax(dim=-1).clamp_min_(1)
        tb_min = top_bottom.amin(dim=-1).clamp_min_(0)
        tb_max = top_bottom.amax(dim=-1).clamp_min_(1)

        centerness_lr = lr_min.true_divide_(lr_max)
        centerness_tb = tb_min.true_divide_(tb_max)
        centerness = centerness_lr.mul_(centerness_tb).sqrt_().unsqueeze_(-1)

        assert centerness.shape[:-1] == reg_targets.shape[:-1]
        assert centerness.shape[-1] == 1
        assert centerness.ndim == reg_targets.ndim
        return centerness
Ejemplo n.º 5
0
    def create_classification_target(
        bbox: Tensor,
        cls: Tensor,
        mask: Tensor,
        num_classes: int,
        size_target: Tuple[int, int],
    ) -> Tensor:
        check_is_tensor(bbox, "bbox")
        check_is_tensor(cls, "cls")
        check_is_tensor(mask, "mask")
        check_dimension_match(bbox, cls, -2, "bbox", "cls")
        check_dimension_match(bbox, mask, 0, "bbox", "mask")
        check_dimension(bbox, -1, 4, "bbox")
        check_dimension(cls, -1, 1, "cls")

        target = torch.zeros(num_classes,
                             *mask.shape[-2:],
                             device=mask.device,
                             dtype=torch.float)

        box_id, h, w = mask.nonzero(as_tuple=True)
        class_id = cls[box_id, 0]
        target[class_id, h, w] = 1.0
        return target
Ejemplo n.º 6
0
def visualize_bbox(
    img: Union[Tensor, ndarray],
    bbox: Union[Tensor, ndarray],
    classes: Optional[Union[Tensor, ndarray]] = None,
    scores: Optional[Union[Tensor, ndarray]] = None,
    class_names: Optional[Dict[int, str]] = None,
    box_color: Tuple[int, int, int] = (255, 0, 0),
    text_color: Tuple[int, int, int] = (255, 255, 255),
    label_alpha: float = 0.4,
    thickness: int = 2,
    pad_value: float = -1,
) -> Tensor:
    r"""Adds bounding box visualization to an input array

    Args:
        img (Tensor or numpy.ndarray):
            Background image

        bbox (Tensor or numpy.ndarray):
            Anchor boxes to draw

        classes (Tensor or numpy.ndarray, optional):
            Class labels associated with each anchor box

        scores (Tensor or numpy.ndarray, optional):
            Class scores associated with each anchor box

        class_names (dict, optional):
            Dictionary mapping integer class labels to string names.
            If ``label`` is supplied but ``class_names`` is not, integer
            class labels will be used.

        box_color (tuple of ints, optional):
            A 3-tuple giving the RGB color value to use for anchor boxes.

        text_color (tuple of ints, optional):
            A 3-tuple giving the RGB color value to use for labels.

        label_alpha (float, optional):
            Alpha to apply to the colored background for class labels.

        thickness (int, optional):
            Specifies the thickness of anchor boxes.

        pad_value (float, optional):
            The padding value used when batching boxes and labels

    Returns:
        :class:`torch.Tensor` or :class:`numpy.ndarray` (depending on what was given for `img`)
        with the output image.

    Shape:
        * ``img`` - :math:`(B, C, H, W)` or :math:`(C, H, W)` or :math:`(H, W)`
        * ``bbox`` - :math:`(B, N, 4)` or :math:`(N, 4)`
        * ``classes`` - :math:`(B, N, 1)` or :math:`(N, 1)`
        * ``scores`` - :math:`(B, N, S)` or :math:`(N, S)`
        *  Output - same as ``img``
    """
    # type check
    check_is_array(img, "img")
    check_is_array(bbox, "bbox")
    classes is None or check_is_array(classes, "classes")
    scores is None or check_is_array(scores, "scores")

    # ndim check
    classes is None or check_ndim_match(bbox, classes, "bbox", "classes")
    scores is None or check_ndim_match(bbox, scores, "bbox", "scores")

    # more ndim checks, ensure if one input is batched then all inputs are batched
    boxes_batched = bbox.ndim == 3
    img_batched = img.ndim == 4
    if img_batched != boxes_batched:
        raise ValueError(f"Expected bbox.ndim == 3 when img.ndim == 4, found {bbox.shape}, {img.shape}")
    if boxes_batched:
        if classes is not None and classes.ndim != 3:
            raise ValueError(f"Expected classes.ndim == 3, found {classes.ndim}")
        if scores is not None and scores.ndim != 3:
            raise ValueError(f"Expected scores.ndim == 3, found {scores.ndim}")
    batched = img_batched

    # individual dimension checks
    check_dimension(bbox, dim=-1, size=4, name="bbox")
    classes is None or check_dimension(classes, dim=-1, size=1, name="classes")
    classes is None or check_dimension_match(bbox, classes, -2, "bbox", "classes")
    scores is None or check_dimension_match(bbox, scores, -2, "bbox", "scores")
    img_shape = img.shape[-2:]

    # convert to cpu tensor
    img, bbox = (torch.as_tensor(x).cpu() for x in (img, bbox))
    classes, scores = (torch.as_tensor(x).cpu() if x is not None else None for x in (classes, scores))

    # add a channel dimension to img if not present
    if img.ndim == 2:
        img = img.view(1, *img.shape)

    # add a batch dimension if not present
    img = img.view(1, *img.shape) if not batched else img
    bbox = bbox.view(1, *bbox.shape) if not batched else bbox
    if classes is not None:
        classes = classes.view(1, *classes.shape) if not batched else classes
    if scores is not None:
        scores = scores.view(1, *scores.shape) if not batched else scores

    # convert image to 8-bit and convert to channels_last
    img_was_float = img.is_floating_point()
    img = to_8bit(img.clone(), per_channel=False, same_on_batch=True)
    img = img.permute(0, 2, 3, 1).contiguous()

    # convert img to color if grayscale input
    if img.shape[-1] == 1:
        img = img.repeat(1, 1, 1, 3)

    # get box indices that arent padding
    valid_indices = (bbox == pad_value).all(dim=-1).logical_not_()

    # iterate over each batch, building bbox overlay
    result = []
    batch_size = bbox.shape[0]
    for batch_idx in range(batch_size):
        # if this fails with cryptic cv errors, ensure that img is contiguous
        result_i = img[batch_idx].numpy()

        # extract valid boxes for this batch
        valid_indices_i = valid_indices[batch_idx]
        bbox_i = bbox[batch_idx][valid_indices_i]
        scores_i = scores[batch_idx][valid_indices_i] if scores is not None else None
        classes_i = classes[batch_idx][valid_indices_i] if classes is not None else None

        # loop over each box and draw the annotation onto result_i
        for box_idx, coords in enumerate(bbox_i):
            x_min, y_min, x_max, y_max = [int(c) for c in coords]

            # draw the bounding box
            cv2.rectangle(  # type: ignore
                result_i,
                (x_min, y_min),
                (x_max, y_max),
                box_color,
                thickness,
            )

            # add class labels to bounding box text if present
            text = ""
            if classes_i is not None:
                cls = int(classes_i[box_idx].item())
                # use class integer -> str name if mapping is given, otherwise use class integer
                if class_names is not None:
                    text += class_names.get(cls, f"Class {cls}")
                else:
                    text += f"Class {cls}"

            # add score labels to bounding box text if present
            if scores_i is not None:
                if classes_i is not None:
                    text += " - "
                # add the first score
                text += f"{scores_i[box_idx, 0].item():0.3f}"
                # if multiple scores are present, add those
                num_scores = scores_i.shape[-1]
                for score_idx in range(1, num_scores):
                    text += f" | {scores_i[box_idx, score_idx].item():0.3f}"

            # tag bounding box with class name / integer id
            ((text_width, text_height), _) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)  # type: ignore
            cv2.rectangle(  # type: ignore
                result_i, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), box_color, -1
            )  # type: ignore
            cv2.putText(  # type: ignore
                result_i,
                text,
                (x_min, y_min - int(0.3 * text_height)),
                cv2.FONT_HERSHEY_SIMPLEX,  # type: ignore
                0.35,
                text_color,
                lineType=cv2.LINE_AA,  # type: ignore
            )

        # permute back to channels first and add to result list
        result_i = torch.from_numpy(result_i).permute(-1, 0, 1)
        result.append(result_i)

    if len(result) > 1:
        result = torch.stack(result, dim=0)
    else:
        result = result[0]

    # ensure we include a batch dim if one was present in inputs
    if batched and batch_size == 1:
        result = result.view(1, *result.shape)

    if img_was_float:
        result = result.float().div_(255)

    return result
Ejemplo n.º 7
0
    def bbox_to_mask(bbox: Tensor,
                     stride: int,
                     size_target: Tuple[int, int],
                     center_radius: Optional[float] = None) -> Tensor:
        r"""Creates a mask for each input anchor box indicating which heatmap locations for that
        box should be positive examples. Under FCOS, a target maps are created for each FPN level.
        Any map location that lies within ``center_radius * stride`` units from the center of the
        ground truth bounding box is considered a positive example for regression and classification.

        This method creates a mask for FPN level with stride ``stride``. The mask will have shape
        :math:`(N, H, W)` where :math:`(H, W)` are given in ``size_target``. Mask locations that
        lie within ``center_radius * stride`` units of the box center will be ``True``. If
        ``center_radius=None``, all locations within a box will be considered positive.

        Args:
            bbox (:class:`torch.Tensor`):
                Ground truth anchor boxes in form :math:`x_1, y_1, x_2, y_2`.

            stride (int):
                Stride at the FPN level for which the target is being created

            size_target (tuple of int, int):
                Height and width of the mask. Should match the height and width of the FPN
                level for which a target is being created.

            center_radius (float, optional):
                Radius (in units of ``stride``) about the center of each box for which examples
                should be considered positive. If ``center_radius=None``, all locations within
                a box will be considered positive.

        Shapes:
            * ``reg_targets`` - :math:`(..., 4, H, W)`
            * Output - :math:`(..., 1, H, W)`
        """
        check_is_tensor(bbox, "bbox")
        check_dimension(bbox, -1, 4, "bbox")

        # create mesh grid of size `size_target`
        # locations in grid give h/w at center of that location
        #
        # we will compare bbox coords against this grid to find locations that lie within
        # the center_radius of bbox
        num_boxes = bbox.shape[-2]
        h = torch.arange(size_target[0], dtype=torch.float, device=bbox.device)
        w = torch.arange(size_target[1], dtype=torch.float, device=bbox.device)
        mask = (torch.stack(torch.meshgrid(h, w), 0).mul_(stride).add_(
            stride / 2).unsqueeze_(0).expand(num_boxes, -1, -1, -1))

        # get edge coordinates of each box based on whole box or center sampled
        lower_bound = bbox[..., :2]
        upper_bound = bbox[..., 2:]
        if center_radius is not None:
            assert center_radius >= 1
            # update bounds according to radius from center
            center = (bbox[..., :2] + bbox[..., 2:]).true_divide(2)
            offset = center.new_tensor([stride, stride]).mul_(center_radius)
            lower_bound = torch.max(lower_bound, center - offset[None])
            upper_bound = torch.min(upper_bound, center + offset[None])

        # x1y1 to h1w1, add h/w dimensions, convert to strided coords
        lower_bound = lower_bound[..., (1, 0), None, None]
        upper_bound = upper_bound[..., (1, 0), None, None]

        # use edge coordinates to create a binary mask
        mask = (mask >= lower_bound).logical_and_(mask <= upper_bound).all(
            dim=-3)
        return mask