def update(self, pred: torch.Tensor, target: torch.Tensor): """ Update state with predictions and targets. Args: preds: Predictions from model target: Ground truth values """ # preds, target = _input_format(self.num_classes, preds, target, self.threshold, self.multilabel) check_is_tensor(pred, "pred") check_is_tensor(target, "target") check_ndim_match(pred, target, "pred", "target") check_dimension(pred, -1, 6, "pred") check_dimension(target, -1, 5, "pred") # restrict the number of predicted boxes to the top K highest confidence boxes if self.pred_box_limit is not None and pred.shape[ -2] > self.pred_box_limit: indices = pred[..., -2].argsort() pred = pred[indices, ...] assert pred.shape[-2] <= self.pred_box_limit # restrict pred and target to class of interest if self.pos_label is not None: pred_keep = pred[..., -1] == self.pos_label pred = pred[pred_keep] target_keep = target[..., -1] == self.pos_label target = target[target_keep] pred_score, target_class, binary_target = self.get_pred_target_pairs( pred, target) self.pred_score = torch.cat([self.pred_score, pred_score]) self.target_class = torch.cat([self.target_class, target_class]) self.binary_target = torch.cat([self.binary_target, binary_target])
def create_regression_target(bbox: Tensor, stride: int, size_target: Tuple[int, int]) -> Tensor: r"""Given a set of anchor boxes, creates regression targets each anchor box. Each location in the resultant target gives the distance from that location to the left, top, right, and bottom of the ground truth anchor box (in that order). Args: bbox (:class:`torch.Tensor`): Ground truth anchor boxes in form :math:`x_1, y_1, x_2, y_2`. stride (int): Stride at the FPN level for which the target is being created Shapes: * ``bbox`` - :math:`(*, N, 4)` * Output - :math:`(*, N, 2)`, :math:`(*, N, 4)` """ check_is_tensor(bbox, "bbox") check_dimension(bbox, -1, 4, "bbox") # create starting grid num_boxes = bbox.shape[-2] height, width = size_target[0], size_target[1] grid = FCOSLoss.coordinate_grid(height, width, stride, indexing="xy", device=bbox.device) grid = grid.unsqueeze_(0).repeat(num_boxes, 2, 1, 1) # compute distance to box edges relative to each grid location grid.sub_(bbox[..., None, None]).abs_() return grid
def complete_iou_loss(inputs: Tensor, targets: Tensor, reduction: str = "mean") -> Tensor: # validation check_is_tensor(inputs, "inputs") check_is_tensor(targets, "targets") check_dimension(inputs, -1, 4, "inputs") check_dimension(targets, -1, 4, "targets") check_shapes_match(inputs, targets, "inputs", "targets") inputs = inputs.float() targets = targets.float() # compute euclidean distance between pred and true box centers pred_size = inputs[..., 2:] - inputs[..., :2] target_size = targets[..., 2:] - targets[..., :2] pred_center = pred_size.div(2).add(inputs[..., :2]) target_center = target_size.div(2).add(targets[..., :2]) euclidean_dist_squared = (pred_center - target_center).pow(2).sum(dim=-1) # compute c, the diagonal length of smallest box enclosing pred and true min_coords = torch.min(inputs[..., :2], targets[..., :2]) max_coords = torch.max(inputs[..., 2:], targets[..., 2:]) c_squared = (max_coords - min_coords).pow(2).sum(dim=-1) # compute diou diou = euclidean_dist_squared / c_squared # compute vanilla IoU pred_area = pred_size[..., 0] * pred_size[..., 1] target_area = target_size[..., 0] * target_size[..., 1] lt = torch.max(inputs[..., :2], targets[..., :2]) rb = torch.min(inputs[..., 2:], targets[..., 2:]) wh = (rb - lt).clamp(min=0) inter = wh[..., 0] * wh[..., 1] iou = inter / (pred_area + target_area - inter).clamp_min(1e-9) # compute v, which measure aspect ratio consistency pred_w, pred_h = pred_size[..., 0], pred_size[..., 1] target_w, target_h = target_size[..., 0], target_size[..., 1] _ = torch.atan(target_w / target_h) - torch.atan(pred_w / pred_h) v = 4 / pi ** 2 * _.pow(2) # compute alpha, the tradeoff parameter alpha = v / ((1 - iou) + v).clamp_min(1e-5) # compute the final ciou loss loss = 1 - iou + diou + alpha * v if reduction == "mean": return loss.mean() elif reduction == "sum": return loss.sum() elif reduction == "none": return loss else: raise ValueError(f"Unknown reduction {reduction}")
def compute_centerness_targets(reg_targets: Tensor) -> Tensor: r"""Computes centerness targets given regression targets. Under FCOS, a target regression map is created for each FPN level. Any map location that lies within a ground truth bounding box is assigned a regression target based on the left, right, top, and bottom distance from that location to the edges of the ground truth box. .. image:: ./fcos_target.png :width: 200px :align: center :height: 600px :alt: FCOS Centerness Target For each of these locations with regression targets :math:`l^*, r^*, t^*, b^*`, a "centerness" target is created as follows: .. math:: centerness = \sqrt{\frac{\min(l^*, r*^}{\max(l^*, r*^} \times \frac{\min(t^*, b*^}{\max(t^*, b*^}} Args: reg_targets (:class:`torch.Tensor`): Ground truth regression featuremap in form :math:`x_1, y_1, x_2, y_2`. Shapes: * ``reg_targets`` - :math:`(..., 4)` * Output - :math:`(..., 1)` """ check_is_tensor(reg_targets, "reg_targets") check_dimension(reg_targets, -1, 4, "reg_targets") left_right = reg_targets[..., (0, 2)].float() top_bottom = reg_targets[..., (1, 3)].float() lr_min = left_right.amin(dim=-1).clamp_min_(0) lr_max = left_right.amax(dim=-1).clamp_min_(1) tb_min = top_bottom.amin(dim=-1).clamp_min_(0) tb_max = top_bottom.amax(dim=-1).clamp_min_(1) centerness_lr = lr_min.true_divide_(lr_max) centerness_tb = tb_min.true_divide_(tb_max) centerness = centerness_lr.mul_(centerness_tb).sqrt_().unsqueeze_(-1) assert centerness.shape[:-1] == reg_targets.shape[:-1] assert centerness.shape[-1] == 1 assert centerness.ndim == reg_targets.ndim return centerness
def create_classification_target( bbox: Tensor, cls: Tensor, mask: Tensor, num_classes: int, size_target: Tuple[int, int], ) -> Tensor: check_is_tensor(bbox, "bbox") check_is_tensor(cls, "cls") check_is_tensor(mask, "mask") check_dimension_match(bbox, cls, -2, "bbox", "cls") check_dimension_match(bbox, mask, 0, "bbox", "mask") check_dimension(bbox, -1, 4, "bbox") check_dimension(cls, -1, 1, "cls") target = torch.zeros(num_classes, *mask.shape[-2:], device=mask.device, dtype=torch.float) box_id, h, w = mask.nonzero(as_tuple=True) class_id = cls[box_id, 0] target[class_id, h, w] = 1.0 return target
def visualize_bbox( img: Union[Tensor, ndarray], bbox: Union[Tensor, ndarray], classes: Optional[Union[Tensor, ndarray]] = None, scores: Optional[Union[Tensor, ndarray]] = None, class_names: Optional[Dict[int, str]] = None, box_color: Tuple[int, int, int] = (255, 0, 0), text_color: Tuple[int, int, int] = (255, 255, 255), label_alpha: float = 0.4, thickness: int = 2, pad_value: float = -1, ) -> Tensor: r"""Adds bounding box visualization to an input array Args: img (Tensor or numpy.ndarray): Background image bbox (Tensor or numpy.ndarray): Anchor boxes to draw classes (Tensor or numpy.ndarray, optional): Class labels associated with each anchor box scores (Tensor or numpy.ndarray, optional): Class scores associated with each anchor box class_names (dict, optional): Dictionary mapping integer class labels to string names. If ``label`` is supplied but ``class_names`` is not, integer class labels will be used. box_color (tuple of ints, optional): A 3-tuple giving the RGB color value to use for anchor boxes. text_color (tuple of ints, optional): A 3-tuple giving the RGB color value to use for labels. label_alpha (float, optional): Alpha to apply to the colored background for class labels. thickness (int, optional): Specifies the thickness of anchor boxes. pad_value (float, optional): The padding value used when batching boxes and labels Returns: :class:`torch.Tensor` or :class:`numpy.ndarray` (depending on what was given for `img`) with the output image. Shape: * ``img`` - :math:`(B, C, H, W)` or :math:`(C, H, W)` or :math:`(H, W)` * ``bbox`` - :math:`(B, N, 4)` or :math:`(N, 4)` * ``classes`` - :math:`(B, N, 1)` or :math:`(N, 1)` * ``scores`` - :math:`(B, N, S)` or :math:`(N, S)` * Output - same as ``img`` """ # type check check_is_array(img, "img") check_is_array(bbox, "bbox") classes is None or check_is_array(classes, "classes") scores is None or check_is_array(scores, "scores") # ndim check classes is None or check_ndim_match(bbox, classes, "bbox", "classes") scores is None or check_ndim_match(bbox, scores, "bbox", "scores") # more ndim checks, ensure if one input is batched then all inputs are batched boxes_batched = bbox.ndim == 3 img_batched = img.ndim == 4 if img_batched != boxes_batched: raise ValueError(f"Expected bbox.ndim == 3 when img.ndim == 4, found {bbox.shape}, {img.shape}") if boxes_batched: if classes is not None and classes.ndim != 3: raise ValueError(f"Expected classes.ndim == 3, found {classes.ndim}") if scores is not None and scores.ndim != 3: raise ValueError(f"Expected scores.ndim == 3, found {scores.ndim}") batched = img_batched # individual dimension checks check_dimension(bbox, dim=-1, size=4, name="bbox") classes is None or check_dimension(classes, dim=-1, size=1, name="classes") classes is None or check_dimension_match(bbox, classes, -2, "bbox", "classes") scores is None or check_dimension_match(bbox, scores, -2, "bbox", "scores") img_shape = img.shape[-2:] # convert to cpu tensor img, bbox = (torch.as_tensor(x).cpu() for x in (img, bbox)) classes, scores = (torch.as_tensor(x).cpu() if x is not None else None for x in (classes, scores)) # add a channel dimension to img if not present if img.ndim == 2: img = img.view(1, *img.shape) # add a batch dimension if not present img = img.view(1, *img.shape) if not batched else img bbox = bbox.view(1, *bbox.shape) if not batched else bbox if classes is not None: classes = classes.view(1, *classes.shape) if not batched else classes if scores is not None: scores = scores.view(1, *scores.shape) if not batched else scores # convert image to 8-bit and convert to channels_last img_was_float = img.is_floating_point() img = to_8bit(img.clone(), per_channel=False, same_on_batch=True) img = img.permute(0, 2, 3, 1).contiguous() # convert img to color if grayscale input if img.shape[-1] == 1: img = img.repeat(1, 1, 1, 3) # get box indices that arent padding valid_indices = (bbox == pad_value).all(dim=-1).logical_not_() # iterate over each batch, building bbox overlay result = [] batch_size = bbox.shape[0] for batch_idx in range(batch_size): # if this fails with cryptic cv errors, ensure that img is contiguous result_i = img[batch_idx].numpy() # extract valid boxes for this batch valid_indices_i = valid_indices[batch_idx] bbox_i = bbox[batch_idx][valid_indices_i] scores_i = scores[batch_idx][valid_indices_i] if scores is not None else None classes_i = classes[batch_idx][valid_indices_i] if classes is not None else None # loop over each box and draw the annotation onto result_i for box_idx, coords in enumerate(bbox_i): x_min, y_min, x_max, y_max = [int(c) for c in coords] # draw the bounding box cv2.rectangle( # type: ignore result_i, (x_min, y_min), (x_max, y_max), box_color, thickness, ) # add class labels to bounding box text if present text = "" if classes_i is not None: cls = int(classes_i[box_idx].item()) # use class integer -> str name if mapping is given, otherwise use class integer if class_names is not None: text += class_names.get(cls, f"Class {cls}") else: text += f"Class {cls}" # add score labels to bounding box text if present if scores_i is not None: if classes_i is not None: text += " - " # add the first score text += f"{scores_i[box_idx, 0].item():0.3f}" # if multiple scores are present, add those num_scores = scores_i.shape[-1] for score_idx in range(1, num_scores): text += f" | {scores_i[box_idx, score_idx].item():0.3f}" # tag bounding box with class name / integer id ((text_width, text_height), _) = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1) # type: ignore cv2.rectangle( # type: ignore result_i, (x_min, y_min - int(1.3 * text_height)), (x_min + text_width, y_min), box_color, -1 ) # type: ignore cv2.putText( # type: ignore result_i, text, (x_min, y_min - int(0.3 * text_height)), cv2.FONT_HERSHEY_SIMPLEX, # type: ignore 0.35, text_color, lineType=cv2.LINE_AA, # type: ignore ) # permute back to channels first and add to result list result_i = torch.from_numpy(result_i).permute(-1, 0, 1) result.append(result_i) if len(result) > 1: result = torch.stack(result, dim=0) else: result = result[0] # ensure we include a batch dim if one was present in inputs if batched and batch_size == 1: result = result.view(1, *result.shape) if img_was_float: result = result.float().div_(255) return result
def bbox_to_mask(bbox: Tensor, stride: int, size_target: Tuple[int, int], center_radius: Optional[float] = None) -> Tensor: r"""Creates a mask for each input anchor box indicating which heatmap locations for that box should be positive examples. Under FCOS, a target maps are created for each FPN level. Any map location that lies within ``center_radius * stride`` units from the center of the ground truth bounding box is considered a positive example for regression and classification. This method creates a mask for FPN level with stride ``stride``. The mask will have shape :math:`(N, H, W)` where :math:`(H, W)` are given in ``size_target``. Mask locations that lie within ``center_radius * stride`` units of the box center will be ``True``. If ``center_radius=None``, all locations within a box will be considered positive. Args: bbox (:class:`torch.Tensor`): Ground truth anchor boxes in form :math:`x_1, y_1, x_2, y_2`. stride (int): Stride at the FPN level for which the target is being created size_target (tuple of int, int): Height and width of the mask. Should match the height and width of the FPN level for which a target is being created. center_radius (float, optional): Radius (in units of ``stride``) about the center of each box for which examples should be considered positive. If ``center_radius=None``, all locations within a box will be considered positive. Shapes: * ``reg_targets`` - :math:`(..., 4, H, W)` * Output - :math:`(..., 1, H, W)` """ check_is_tensor(bbox, "bbox") check_dimension(bbox, -1, 4, "bbox") # create mesh grid of size `size_target` # locations in grid give h/w at center of that location # # we will compare bbox coords against this grid to find locations that lie within # the center_radius of bbox num_boxes = bbox.shape[-2] h = torch.arange(size_target[0], dtype=torch.float, device=bbox.device) w = torch.arange(size_target[1], dtype=torch.float, device=bbox.device) mask = (torch.stack(torch.meshgrid(h, w), 0).mul_(stride).add_( stride / 2).unsqueeze_(0).expand(num_boxes, -1, -1, -1)) # get edge coordinates of each box based on whole box or center sampled lower_bound = bbox[..., :2] upper_bound = bbox[..., 2:] if center_radius is not None: assert center_radius >= 1 # update bounds according to radius from center center = (bbox[..., :2] + bbox[..., 2:]).true_divide(2) offset = center.new_tensor([stride, stride]).mul_(center_radius) lower_bound = torch.max(lower_bound, center - offset[None]) upper_bound = torch.min(upper_bound, center + offset[None]) # x1y1 to h1w1, add h/w dimensions, convert to strided coords lower_bound = lower_bound[..., (1, 0), None, None] upper_bound = upper_bound[..., (1, 0), None, None] # use edge coordinates to create a binary mask mask = (mask >= lower_bound).logical_and_(mask <= upper_bound).all( dim=-3) return mask