def track_center(long_term_roi_buffer, matches, reference_frame_proposals, kf_idx): kf_boxes = Boxes(long_term_roi_buffer[kf_idx]) trans = reference_frame_proposals.proposal_boxes[matches].get_centers( ) - kf_boxes.get_centers() trans = torch.cat([trans, trans], 1) long_term_roi_buffer[kf_idx] += trans return long_term_roi_buffer
def _match_anchors(self, gt_boxes: Boxes, anchors: List[Boxes]): """ Match ground-truth boxes to a set of multi-level anchors. Args: gt_boxes: Ground-truth boxes from instances of an image. anchors: List of anchors for each feature map (of different scales). Returns: torch.Tensor A tensor of shape `(M, R)`, given `M` ground-truth boxes and total `R` anchor points from all feature levels, indicating the quality of match between m-th box and r-th anchor. Higher value indicates better match. """ # Naming convention: (M = ground-truth boxes, R = anchor points) # Anchor points are represented as square boxes of size = stride. num_anchors_per_level = [len(x) for x in anchors] anchors = Boxes.cat(anchors) # (R, 4) anchor_centers = anchors.get_centers() # (R, 2) anchor_sizes = anchors.tensor[:, 2] - anchors.tensor[:, 0] # (R, ) lower_bound = anchor_sizes * 4 lower_bound[:num_anchors_per_level[0]] = 0 upper_bound = anchor_sizes * 8 upper_bound[-num_anchors_per_level[-1]:] = float("inf") gt_centers = gt_boxes.get_centers() # FCOS with center sampling: anchor point must be close enough to # ground-truth box center. center_dists = (anchor_centers[None, :, :] - gt_centers[:, None, :]).abs_() sampling_regions = self.center_sampling_radius * anchor_sizes[None, :] match_quality_matrix = center_dists.max( dim=2).values < sampling_regions pairwise_dist = pairwise_point_box_distance(anchor_centers, gt_boxes) pairwise_dist = pairwise_dist.permute(1, 0, 2) # (M, R, 4) # The original FCOS anchor matching rule: anchor point must be inside GT. match_quality_matrix &= pairwise_dist.min(dim=2).values > 0 # Multilevel anchor matching in FCOS: each anchor is only responsible # for certain scale range. pairwise_dist = pairwise_dist.max(dim=2).values match_quality_matrix &= (pairwise_dist > lower_bound[None, :]) & ( pairwise_dist < upper_bound[None, :]) # Match the GT box with minimum area, if there are multiple GT matches. gt_areas = gt_boxes.area() # (M, ) match_quality_matrix = match_quality_matrix.to(torch.float32) match_quality_matrix *= 1e8 - gt_areas[:, None] return match_quality_matrix # (M, R)
def pairwise_dist(points: torch.Tensor, boxes: Boxes): """ Given a points grid and a list of boxes, computer the distance between points and boxes. The results are normalized by box sizes. Args: points Tensor(N, 2): Array of N (x, y) points. boxes (Boxes): Contains M boxes. Returns: Tensor: Distance, size (N, M). """ # Mx2, 2 box_centers = boxes.get_centers() boxes = boxes.tensor box_wh = boxes[:, 2:] - boxes[:, :2] # N, M, 2 distance = (points[:, None] - box_centers[None]) / box_wh[None] del box_wh return distance.norm(dim=2)
def _get_keypoints_from_boxes(boxes: Boxes, num_keypoints: int): """ Use box center as keypoints """ centers = boxes.get_centers() kpts = torch.cat((centers, torch.ones(centers.shape[0], 1)), dim=1) kpts = kpts.repeat(1, num_keypoints).reshape(len(boxes), num_keypoints, 3) return kpts
def boxes_to_edge_features(boxes: Boxes, image_size: Tuple[int, int]): """Compute pairwise edge features from the bounding boxes of detected objects. - Euclidean distance between box centers relative to sqrt(area) of the image - Sin and cos of the delta between box centers - Intersection over union - Relative area of the first box w.r.t. the second box Args: boxes: image_size: (height, width) Returns: """ height, width = image_size # Boxes are represented as N x (x1, y1, x2, y2): N = len(boxes) indices = torch.from_numpy(np.indices((N, N)).reshape(2, -1)) # 2 x (N*N) centers = boxes.get_centers() # N x 2 areas = boxes.area() # N # delta[i, j] = centers[j] - centers[i] delta = centers[None, :, :] - centers[:, None, :] # N x N x 2 delta = delta.view(N * N, 2) # N*N x 2 relative_dist = delta.norm(dim=1) / np.sqrt(height * width) # N*N angles = torch.atan2(delta[:, 1], delta[:, 0]) # N*N sin = torch.sin(angles) # N*N cos = torch.cos(angles) # N*N def quantize_angles(angles): """Quantize angles in the ranges 45-135, 135-225, 225-315, 315-45""" angles = angles - np.pi / 4 top_half = torch.sin(angles) >= 0 right_half = torch.sin(angles) >= 0 result = torch.empty(len(angles), 4, dtype=torch.bool) result[:, 0] = top_half & right_half result[:, 1] = top_half & ~right_half result[:, 2] = ~top_half & ~right_half result[:, 3] = ~top_half & right_half return result.float() quadrants = quantize_angles(angles) iou = pairwise_iou(boxes, boxes) # N x N iou = iou.view(N * N) # N*N # relative_area[i, j] = area[i] / area[j] relative_area = areas[:, None] / areas[None, :] # N x N relative_area = relative_area.view(N * N) # N*N features = torch.stack( [ relative_dist, sin, cos, *quadrants.unbind(dim=1), iou, relative_area, 1 / relative_area, ], dim=1, ) # N x num_feats # Remove elements on the diagonal (i.e. self-relationships) mask = indices[0] != indices[1] features = features[mask] # (N*N - N) x 4 indices = indices[:, mask] # 2 x (N*N -1) return features, indices