def label_proposals(self, proposals, targets): proposals_with_gt = [] self.num_boxes = np.min([len(x.proposal_boxes) for x in proposals]) for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 _, indices = torch.sort(proposals_per_image.objectness_logits, descending=True) sampled_idxs = indices[:self.num_boxes] proposals_per_image = proposals_per_image[sampled_idxs] match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, matched_labels = self.proposal_matcher( match_quality_matrix) gt_classes = self._label_proposals(matched_idxs, matched_labels, targets_per_image.gt_classes) proposals_per_image.gt_classes = gt_classes if has_gt: for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_per_image.has(trg_name): proposals_per_image.set(trg_name, trg_value[matched_idxs]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_idxs), 4))) proposals_per_image.gt_boxes = gt_boxes proposals_with_gt.append(proposals_per_image) return proposals_with_gt
def get_ground_truth(self, anchors: List[Boxes], gt_instances: List[Instances], num_classes: int) -> Tuple[List[Tensor], List[Tensor]]: """ Extract the ground truth classes and boxes from a list of Instances objects. Args: anchors (List[Boxes]): A list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. gt_instances (List[Instances]): A list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. num_classes (int): The number of classes. Returns: gt_classes (List[Tensor]): List of #img tensors. i-th element is a vector of classes whose length is the total number of anchors across all feature maps (sum(Hi * Wi * A)). Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. matched_gt_boxes (List[Tensor]): i-th element is a Rx4 tensor, where R is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as foreground. """ anchors_boxes: Boxes = Boxes.cat(anchors) gt_classes: List[Tensor] = [] matched_gt_boxes: List[Tensor] = [] for gt_instance in gt_instances: match_quality_matrix: Tensor = pairwise_iou(gt_instance.gt_boxes, anchors_boxes) matched_idxs, anchor_classes = self.anchor_matcher(match_quality_matrix) del match_quality_matrix if len(gt_instance) > 0: matched_gt_boxes_i: Tensor = gt_instance.gt_boxes.tensor[matched_idxs] gt_classes_i: Tensor = gt_instance.gt_classes[matched_idxs] # Anchors with class 0 are treated as background. gt_classes_i[anchor_classes == 0] = num_classes # Anchors with class -1 are ignored. gt_classes_i[anchor_classes == -1] = -1 else: matched_gt_boxes_i = torch.zeros_like(anchors_boxes.tensor) gt_classes_i = torch.zeros_like(matched_idxs) + num_classes gt_classes.append(gt_classes_i) matched_gt_boxes.append(matched_gt_boxes_i) return gt_classes, matched_gt_boxes
def get_ground_truth(self, anchors, targets): """ Args: anchors (list[list[Boxes]]): a list of N=#image elements. Each is a list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. targets (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Specify `targets` during training only. Returns: gt_classes (Tensor): An integer tensor of shape (N, R) storing ground-truth labels for each anchor. R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels. Anchors with an IoU with some target higher than the foreground threshold are assigned their corresponding label in the [0, K-1] range. Anchors whose IoU are below the background threshold are assigned the label "K". Anchors whose IoU are between the foreground and background thresholds are assigned a label "-1", i.e. ignore. gt_anchors_deltas (Tensor): Shape (N, R, 4). The last dimension represents ground-truth box2box transform targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. The values in the tensor are meaningful only when the corresponding anchor is labeled as foreground. """ gt_classes = [] gt_anchors_deltas = [] anchors = [Boxes.cat(anchors_i) for anchors_i in anchors] # list[Tensor(R, 4)], one for each image for anchors_per_image, targets_per_image in zip(anchors, targets): match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, anchors_per_image) gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix) has_gt = len(targets_per_image) > 0 if has_gt: # ground truth box regression matched_gt_boxes = targets_per_image.gt_boxes[gt_matched_idxs] gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas( anchors_per_image.tensor, matched_gt_boxes.tensor) gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs] # Anchors with label 0 are treated as background. gt_classes_i[anchor_labels == 0] = self.num_classes # Anchors with label -1 are ignored. gt_classes_i[anchor_labels == -1] = -1 else: gt_classes_i = torch.zeros_like( gt_matched_idxs) + self.num_classes gt_anchors_reg_deltas_i = torch.zeros_like( anchors_per_image.tensor) gt_classes.append(gt_classes_i) gt_anchors_deltas.append(gt_anchors_reg_deltas_i) return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
def get_ground_truth(self, centers: torch.Tensor, strides, init_boxes, gt_instances): """ Get gt according to the init box prediction. The labels for init boxes are generated from point-based distance matching, and the labels refine boxes are generated from the init boxes using the same way with RetinaNet, where the init boxes are regarded as anchors. Args: centers: (X, 2), center coordinates for points in all feature levels. strides: (X), strides for each point in all feature levels. init_boxes: (N, X, 4), init box predection. gt_instances (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Returns: Tensor (N, X): Foreground/background label for init boxes. It is used to select positions where the init box regression loss is computed. Tensor (N, X, 4): Label for init boxes, will be masked by binary label above. Tensor (N, X): Classification label at all positions, including values -1 for ignoring, [0, self.num_classes -1] fore foreground positions, and self.num_classes for background positions. Tensor (N, X, 4): Label for refine boxes, only foreground positions are considered. """ # the init_bbox uses point-based nearest assign, the refine_bbox uses IoU based assign init_objectness_labels = [] init_bbox_labels = [] cls_labels = [] refine_bbox_labels = [] for i, targets_per_image in enumerate(gt_instances): image_size = targets_per_image.image_size centers_invalid = (centers[:, 0] >= image_size[1]).logical_or( centers[:, 1] >= image_size[0]) init_objectness_label, init_bbox_label = self.matcher( centers, strides, targets_per_image.gt_boxes) init_objectness_label[centers_invalid] = 0 match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, Boxes(init_boxes[i])) gt_matched_idxs, bbox_mached = self.bbox_matcher( match_quality_matrix) cls_label = targets_per_image.gt_classes[gt_matched_idxs] cls_label[bbox_mached == 0] = self.num_classes cls_label[centers_invalid] = -1 refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs] init_objectness_labels.append(init_objectness_label) init_bbox_labels.append(init_bbox_label) cls_labels.append(cls_label) refine_bbox_labels.append(refine_bbox_label.tensor) return torch.stack(init_objectness_labels), \ torch.stack(init_bbox_labels), \ torch.stack(cls_labels), \ torch.stack(refine_bbox_labels)
def match_pred_to_gt(gt_boxes, pred_boxes, iou_thresh): match_quality_matrix = pairwise_iou(pred_boxes, gt_boxes) matched_vals, matches = match_quality_matrix.max(dim=1) valid_pred_ids = matched_vals < iou_thresh matched_pred_ids = np.where(valid_pred_ids)[0] return matched_pred_ids
def _match_annotations(self, image_annotations, image_predictions): # TODO: Evaluate the number of detected instances. prediction_boxes = Boxes.cat(_extract_instances_property(image_predictions, "bbox")) annotation_boxes = Boxes.cat(_extract_instances_property(image_annotations, "bbox")) match_quality_matrix = pairwise_iou(annotation_boxes, prediction_boxes) matched_idxs, matched_labels = self._bbox_matcher(match_quality_matrix) matched_image_annotations = [image_annotations[i] for i in matched_idxs] return matched_image_annotations, matched_labels
def test_pairwise_iou(self): boxes1, boxes2 = self.create_boxes() expected_ious = torch.tensor([ [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], ]) ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) self.assertTrue(torch.allclose(ious, expected_ious))
def bbox_targets(self, candidate_bboxes, gt_bboxes, gt_labels, pos_iou_thr=0.5, neg_iou_thr=0.4, gt_max_matching=True): """ Target assign: MaxIoU assign Args: candidate_bboxes: gt_bboxes: gt_labels: pos_iou_thr: neg_iou_thr: gt_max_matching: Returns: """ if candidate_bboxes.size(0) == 0 or gt_bboxes.tensor.size(0) == 0: raise ValueError('No gt or anchors') candidate_bboxes[:, 0].clamp_(min=0) candidate_bboxes[:, 1].clamp_(min=0) candidate_bboxes[:, 2].clamp_(min=0) candidate_bboxes[:, 3].clamp_(min=0) num_candidates = candidate_bboxes.size(0) overlaps = pairwise_iou(Boxes(candidate_bboxes), gt_bboxes) assigned_labels = overlaps.new_full((overlaps.size(0), ), self.num_classes, dtype=torch.long) # for each anchor, which gt best overlaps with it # for each anchor, the max iou of all gts max_overlaps, argmax_overlaps = overlaps.max(dim=1) # for each gt, which anchor best overlaps with it # for each gt, the max iou of all proposals gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=0) bg_inds = max_overlaps < neg_iou_thr assigned_labels[bg_inds] = self.num_classes fg_inds = max_overlaps >= pos_iou_thr assigned_labels[fg_inds] = gt_labels[argmax_overlaps[fg_inds]] if gt_max_matching: fg_inds = torch.nonzero(overlaps == gt_max_overlaps)[:, 0] assigned_labels[fg_inds] = gt_labels[argmax_overlaps[fg_inds]] assigned_bboxes = overlaps.new_zeros((num_candidates, 4)) fg_inds = (assigned_labels >= 0) & (assigned_labels != self.num_classes) assigned_bboxes[fg_inds] = gt_bboxes.tensor[argmax_overlaps[fg_inds]] return assigned_bboxes, assigned_labels
def _build_graph(boxes, iou_threshold): """Build graph based on box IoU""" # overlaps = box_utils.bbox_overlaps( # boxes.astype(dtype=np.float32, copy=False), # boxes.astype(dtype=np.float32, copy=False)) overlaps = pairwise_iou(Boxes(boxes), Boxes(boxes)) overlaps = overlaps.data.cpu().numpy() return (overlaps > iou_threshold).astype(np.float32)
def match_gt_to_pred(gt_boxes, pred_boxes, iou_thresh): match_quality_matrix = pairwise_iou(gt_boxes, pred_boxes) matched_vals, matches = match_quality_matrix.max(dim=1) valid_gt_ids = matched_vals > iou_thresh matched_gt_ids = np.where(valid_gt_ids)[0] matched_pred_ids = matches[valid_gt_ids] return matched_gt_ids, matched_pred_ids
def label_anchors(self, anchors, gt_instances): """ Args: anchors (list[Boxes]): A list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. gt_instances (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across all feature maps (sum(Hi * Wi * A)). Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as foreground. """ anchors = Boxes.cat(anchors) # Rx4 gt_labels, gt_labels_1, gt_labels_2 = [], [], [] #change matched_gt_boxes = [] for gt_per_image in gt_instances: match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) matched_idxs, anchor_labels = self.anchor_matcher( match_quality_matrix) del match_quality_matrix if len(gt_per_image) > 0: matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] gt_labels_i = gt_per_image.gt_classes[matched_idxs] gt_labels_i_1 = gt_per_image.gt_classes_1[matched_idxs] gt_labels_i_2 = gt_per_image.gt_classes_2[matched_idxs] # Anchors with label 0 are treated as background. gt_labels_i[anchor_labels == 0] = self.num_classes gt_labels_i_1[anchor_labels == 0] = 3 gt_labels_i_2[anchor_labels == 0] = 3 # Anchors with label -1 are ignored. gt_labels_i[anchor_labels == -1] = -1 gt_labels_i_1[anchor_labels == -1] = -1 gt_labels_i_2[anchor_labels == -1] = -1 else: matched_gt_boxes_i = torch.zeros_like(anchors.tensor) gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes gt_labels_i_1 = torch.zeros_like(matched_idxs) + 3 gt_labels_i_2 = torch.zeros_like(matched_idxs) + 3 gt_labels.append(gt_labels_i) gt_labels_1.append(gt_labels_i_1) gt_labels_2.append(gt_labels_i_2) matched_gt_boxes.append(matched_gt_boxes_i) return gt_labels, gt_labels_1, gt_labels_2, matched_gt_boxes
def get_ground_truth(self, points: torch.Tensor, init_boxes, gt_instances): object_sizes_of_interest = [ [-1, 64], [64, 128], [128, 256], [256, 512], [512, INF], ] expanded_object_sizes_of_interest = [] for l, points_per_level in enumerate(points): object_sizes_of_interest_per_level = \ points_per_level.new_tensor(object_sizes_of_interest[l]) expanded_object_sizes_of_interest.append( object_sizes_of_interest_per_level[None].expand(len(points_per_level), -1) ) expanded_object_sizes_of_interest = torch.cat(expanded_object_sizes_of_interest, dim=0) init_gt_classes, init_reg_targets = compute_targets_for_locations( points, gt_instances, expanded_object_sizes_of_interest, self.fpn_strides, self.center_sampling_radius, self.num_classes ) centers = torch.cat(points, 0) # [X,2] cls_labels = [] refine_bbox_labels = [] for i, targets_per_image in enumerate(gt_instances): image_size = targets_per_image.image_size centers_invalid = (centers[:, 0] >= image_size[1]).logical_or( centers[:, 1] >= image_size[0]) match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, Boxes(init_boxes[i])) gt_matched_idxs, bbox_mached = self.bbox_matcher(match_quality_matrix) cls_label = targets_per_image.gt_classes[gt_matched_idxs] cls_label[bbox_mached == 0] = self.num_classes cls_label[centers_invalid] = -1 refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs] # change bbox to ltrb refine_bbox_label = refine_bbox_label.tensor # [X,4] xs, ys = centers[:, 0], centers[:, 1] l = xs - refine_bbox_label[:, 0] t = ys - refine_bbox_label[:, 1] r = refine_bbox_label[:, 2] - xs b = refine_bbox_label[:, 3] - ys refine_bbox_label = torch.stack([l, t, r, b], dim=1) cls_labels.append(cls_label) refine_bbox_labels.append(refine_bbox_label) refine_gt_classes = torch.stack(cls_labels) refine_reg_targets = torch.stack(refine_bbox_labels) return init_gt_classes, init_reg_targets, refine_gt_classes, refine_reg_targets
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, "batch size shoud be equal to 1" # overlaps: (rois x gt_boxes) gt_boxes = proposals["gt_boxes"] gt_labels = proposals["gt_classes"] gt_scores = proposals["gt_scores"] # overlaps = box_utils.bbox_overlaps( # all_rois.astype(dtype=np.float32, copy=False), # gt_boxes.astype(dtype=np.float32, copy=False)) overlaps = pairwise_iou(Boxes(all_rois), Boxes(gt_boxes)) overlaps = overlaps.data.cpu().numpy() gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap # fg_inds = np.where(max_overlaps >= cfg_TRAIN_FG_THRESH)[0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg_TRAIN_FG_THRESH)[0] ig_inds = np.where(max_overlaps < cfg_TRAIN_BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 labels[bg_inds] = 0 gt_assignment[bg_inds] = -1 img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32) pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32) for i in xrange(gt_boxes.shape[0]): po_index = np.where(gt_assignment == i)[0] img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index]) pc_labels[i] = gt_labels[i, 0] pc_count[i] = len(po_index) pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]]) return ( labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights, )
def label_anchors(self, anchors, gt_instances): """ Args: anchors (list[Boxes]): A list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. gt_instances (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Returns: list[Tensor]: List of #img tensors. i-th element is a vector of labels whose length is the total number of anchors across all feature maps (sum(Hi * Wi * A)). Label values are in {-1, 0, ..., K}, with -1 means ignore, and K means background. list[Tensor]: i-th element is a Rx4 tensor, where R is the total number of anchors across feature maps. The values are the matched gt boxes for each anchor. Values are undefined for those anchors not labeled as foreground. """ anchors = Boxes.cat(anchors) # Rx4 num_anchors = anchors.tensor.shape[0] gt_labels, matched_gt_boxes, matched_gt_marks, matched_gt_marks_labels = [[] for _ in range(4)] for gt_per_image in gt_instances: match_quality_matrix = pairwise_iou(gt_per_image.gt_boxes, anchors) matched_idxs, anchor_labels = self.anchor_matcher(match_quality_matrix) del match_quality_matrix if len(gt_per_image) > 0: matched_gt_boxes_i = gt_per_image.gt_boxes.tensor[matched_idxs] matched_gt_marks_iv = gt_per_image.gt_keypoints.tensor[matched_idxs] matched_gt_marks_i = matched_gt_marks_iv[:, :, :2].flatten(1) matched_gt_marks_labels_i = matched_gt_marks_iv[:, :, 2].flatten(1) matched_gt_marks_labels_i, _ = torch.min(matched_gt_marks_labels_i, dim=1) gt_labels_i = gt_per_image.gt_classes[matched_idxs] # Anchors with label 0 are treated as background. gt_labels_i[anchor_labels == 0] = self.num_classes # Anchors with label -1 are ignored. gt_labels_i[anchor_labels == -1] = -1 else: matched_gt_boxes_i = torch.zeros_like(anchors.tensor) gt_labels_i = torch.zeros_like(matched_idxs) + self.num_classes matched_gt_marks_i = torch.zeros(num_anchors, self.num_landmark * 2).to(self.device) matched_gt_marks_labels_i = torch.zeros(num_anchors).to(self.device) gt_labels.append(gt_labels_i) matched_gt_boxes.append(matched_gt_boxes_i) matched_gt_marks.append(matched_gt_marks_i) matched_gt_marks_labels.append(matched_gt_marks_labels_i) return gt_labels, matched_gt_boxes, matched_gt_marks, matched_gt_marks_labels
def get_transform(self, img, boxes): """ Args: img (ndarray): of shape HxWxC(RGB). The array can be of type uint8 in range [0, 255], or floating point in range [0, 255]. annotations (list[dict[str->str]]): Each item in the list is a bbox label of an object. The object is represented by a dict, which contains: - bbox (list): bbox coordinates, top left and bottom right. - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`, `XYWH_ABS` and so on... """ sample_mode = (1, *self.min_ious, 0) h, w = img.shape[:2] boxes = torch.tensor(boxes) while True: mode = np.random.choice(sample_mode) if mode == 1: return NoOpTransform() min_iou = mode for _ in range(50): new_w = np.random.uniform(self.min_crop_size * w, w) new_h = np.random.uniform(self.min_crop_size * h, h) # h / w in [0.5, 2] if new_h / new_w < 0.5 or new_h / new_w > 2: continue left = np.random.uniform(w - new_w) top = np.random.uniform(h - new_h) patch = torch.tensor([left, top, left + new_w, top + new_h], dtype=torch.int) overlaps = pairwise_iou(Boxes(patch.view(-1, 4)), Boxes(boxes.view(-1, 4))) if overlaps.min() < min_iou: continue # center of boxes should inside the crop img center = (boxes[:, :2] + boxes[:, 2:]) / 2 mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) if not mask.any(): continue return CropTransform(int(left), int(top), int(new_w), int(new_h))
def pairwise_tracker(pred1, pred2): boxes1 = pred1.get('pred_boxes') boxes2 = pred2.get('pred_boxes') categories1 = pred1.get('pred_classes') categories2 = pred2.get('pred_classes') boxes_overlaps = pairwise_iou(boxes1, boxes2) objects_overlaps = (categories1[:, None] == categories2[None, :]) * boxes_overlaps return objects_overlaps
def label_and_sample_proposals(self, proposals: List[Instances], targets: List[Instances], branch: str = "") -> List[Instances]: gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, matched_labels = self.proposal_matcher( match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes) proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes if has_gt: sampled_targets = matched_idxs[sampled_idxs] for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_per_image.has(trg_name): proposals_per_image.set(trg_name, trg_value[sampled_targets]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_idxs), 4))) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append( (gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) storage = get_event_storage() storage.put_scalar("roi_head/num_target_fg_samples_" + branch, np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_target_bg_samples_" + branch, np.mean(num_bg_samples)) return proposals_with_gt
def test(cfg, data_loader_iter, anchors, matcher, raw_matcher): batched_inputs = next(data_loader_iter) gt_instances = [x["instances"].to(device) for x in batched_inputs] gt_boxes = [x.gt_boxes for x in gt_instances] image_sizes = [x.image_size for x in gt_instances] del gt_instances for image_size_i, gt_boxes_i in zip(image_sizes, gt_boxes): match_quality_matrix = pairwise_iou(gt_boxes_i, anchors) raw_matched_idxs, raw_gt_labels_i = raw_matcher(match_quality_matrix) matched_idxs, gt_labels_i = matcher(match_quality_matrix) import pdb pdb.set_trace()
def _filter_positive_proposals( self, proposal_boxes: Boxes, scores: torch.Tensor, gt_boxes: Boxes, gt_classes: torch.Tensor, ) -> Tuple[Boxes, torch.Tensor, torch.Tensor]: """Filter for desired targets for the DAG algo Parameters ---------- proposal_boxes : Boxes Proposal boxes directly from RPN scores : torch.Tensor Softmaxed scores for each proposal box gt_boxes : Boxes Ground truth boxes gt_classes : torch.Tensor Ground truth classes Returns ------- Tuple[Boxes, torch.Tensor] filtered_target_boxes, corresponding_class_labels """ n_proposals = len(proposal_boxes) proposal_gt_ious = pairwise_iou(proposal_boxes, gt_boxes) # For each proposal_box, pair with a gt_box, i.e. find gt_box with highest IoU # IoU with paired gt_box, idx of paired gt_box paired_ious, paired_gt_idx = proposal_gt_ious.max(dim=1) # Filter for IoUs > 0.1 iou_cond = paired_ious > 0.1 # Filter for score of proposal > 0.1 # Get class of paired gt_box gt_classes_repeat = gt_classes.repeat(n_proposals, 1) paired_gt_classes = gt_classes_repeat[torch.arange(n_proposals), paired_gt_idx] # Get scores of corresponding class paired_scores = scores[torch.arange(n_proposals), paired_gt_classes] score_cond = paired_scores > 0.1 # Filter for positive proposals and their corresponding gt labels cond = iou_cond & score_cond return proposal_boxes[cond], paired_gt_classes[cond].to(self.device)
def _match_and_label_boxes(self, proposals, stage, targets): """ Match proposals with groundtruth using the matcher at the given stage. Label the proposals as foreground or background based on the match. Args: proposals (list[Instances]): One Instances for each image, with the field "proposal_boxes". stage (int): the current stage targets (list[Instances]): the ground truth instances Returns: list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" """ num_fg_samples, num_bg_samples = [], [] for proposals_per_image, targets_per_image in zip(proposals, targets): match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) # proposal_labels are 0 or 1 matched_idxs, proposal_labels = self.proposal_matchers[stage]( match_quality_matrix) if len(targets_per_image) > 0: gt_classes = targets_per_image.gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposal_labels == 0] = self.num_classes gt_boxes = targets_per_image.gt_boxes[matched_idxs] else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(proposals_per_image), 4))) proposals_per_image.gt_classes = gt_classes proposals_per_image.gt_boxes = gt_boxes num_fg_samples.append((proposal_labels == 1).sum().item()) num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) # Log the number of fg/bg samples in each stage storage = get_event_storage() storage.put_scalar( "stage{}/roi_head/num_fg_samples".format(stage), sum(num_fg_samples) / len(num_fg_samples), ) storage.put_scalar( "stage{}/roi_head/num_bg_samples".format(stage), sum(num_bg_samples) / len(num_bg_samples), ) return proposals
def rep_box_loss(self): # get positive (foreground) proposals (P+ in RepLoss paper) bg_class_ind = self.pred_class_logits.shape[1] - 1 fg_inds = torch.nonzero((self.gt_classes >= 0) & (self.gt_classes < bg_class_ind)).squeeze(1) # IOUs here also deal with regressed boxes boxes = Boxes(self.predict_boxes_all()) # set of regressed boxes of positive proposals fg_boxes = boxes[fg_inds] # index of ground truth box for each positive proposal fg_gt_inds = self.gt_box_inds[fg_inds] # rep box loss is sum of IOUs of boxes with different GT # targets num_gts = torch.max(self.gt_box_inds) + 1 device = self.pred_proposal_deltas.device sum = torch.tensor(0.0, device=device) num_examples = torch.tensor(0.0, device=device) for i in range(num_gts): boxes_i = fg_boxes[fg_gt_inds == i] for j in range(num_gts): boxes_j = fg_boxes[fg_gt_inds == j] if i != j: iou_matrix = pairwise_iou(boxes_i, boxes_j) losses = smooth_ln(iou_matrix, sigma=self.rep_box_sigma) sum += torch.sum(iou_matrix) num_examples += 1.0 # every i,j was counted twice sum /= 2.0 num_examples /= 2.0 if self.d2_normalize: # if 'Detectron2 loss' enabled: # as in FastRCNNOutputs:smooth_l1_loss, divide by total examples instead of total # foreground examples to weight each foreground example the same loss_rep_box = sum / self.gt_classes.numel() elif num_examples > 0: loss_rep_box = sum / num_examples else: loss_rep_box = sum # = 0.0 # print("loss_rep_box", loss_rep_box) return loss_rep_box
def forward_cmil(self, C, D, proposals): """ Args: x: per-region features of shape (N, ...) for N bounding boxes to predict. Returns: Tensor: shape (N,K+1), scores for each of the N box. Each row contains the scores for K object categories and 1 background class. Tensor: bounding box regression deltas for each box. Shape is shape (N,Kx4), or (N,4) for class-agnostic regression. """ if proposals is None: scores = F.softmax(C, dim=1) * F.softmax(D, dim=0) elif len(proposals) == 1: scores = F.softmax(C, dim=1) * F.softmax(D, dim=0) else: num_preds_per_image = [len(p) for p in proposals] scores = cat( [ F.softmax(c, dim=1) * F.softmax(d, dim=0) for c, d in zip( C.split(num_preds_per_image, dim=0), D.split(num_preds_per_image, dim=0) ) ], dim=0, ) proposal_deltas = torch.zeros( scores.shape[0], self.num_bbox_reg_classes * self.box_dim, dtype=scores.dtype, device=scores.device, requires_grad=False, ) # num_preds_per_image = [len(p) for p in proposals] rois_obn_score = torch.sum(scores, dim=1, keepdim=True) # rois_obn_score = torch.clamp(rois_obn_score, min=1e-6, max=1.0 - 1e-6) assert proposals J = cat([pairwise_iou(p.proposal_boxes, p.proposal_boxes) for p in proposals], dim=0) MC, MD = self.roi_merge(rois_obn_score.cpu(), J.cpu(), C.cpu(), D.cpu()) return MC.to(C.device), MD.to(D.device), scores, proposal_deltas
def find_correct_detections(self, detections, ground_truths): detected_bbxs = detections['instances'].get('pred_boxes') gt_cls_ids = [ self.internal_dataset_mapping[gt['category_id']] for gt in ground_truths ] gt_cls_ids = torch.tensor(gt_cls_ids).to(detected_bbxs.device) # To recheck and use the following condition for efficiency # if len(detected_bbxs)==0 or len(ground_truths)==0 or set(gt_cls_ids.tolist())==set([-1]): if len(detected_bbxs) == 0 or len(ground_truths) == 0: correct = torch.zeros((len(detected_bbxs), ), dtype=torch.bool) return correct pred_classes = detections['instances'].get('pred_classes') gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) for obj in ground_truths if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape(-1, 4) gt_boxes = Boxes(gt_boxes).to(detected_bbxs.device) gt_ann_id = [gt['id'] for gt in ground_truths] gt_ann_id = torch.tensor(gt_ann_id).to(detected_bbxs.device) correct = torch.ones(len(detections['instances']), dtype=torch.bool) overlaps = pairwise_iou(detected_bbxs, gt_boxes) max_iou, max_iou_indx = torch.max(overlaps, dim=-1) correct[max_iou < 0.5] = False correct[gt_cls_ids[max_iou_indx] != pred_classes] = False # Mark duplicate detections as incorrect # navigate through all detections and assign them to a specific annotation/class id detected_anns = [] correct = correct.tolist() for i, (g_ann, correct_status) in enumerate( zip(gt_ann_id[max_iou_indx].tolist(), correct)): if g_ann in detected_anns: if correct_status: correct[i] = False else: if correct_status: detected_anns.append(g_ann) correct = torch.tensor(correct, dtype=torch.bool) return correct
def get_ground_truth(self, points: torch.Tensor, strides, init_boxes, gt_instances): centers = torch.cat(points, 0) strides = torch.cat(strides, 0) init_objectness_labels = [] init_bbox_labels = [] cls_labels = [] refine_bbox_labels = [] center_scores = [] for i, targets_per_image in enumerate(gt_instances): image_size = targets_per_image.image_size centers_invalid = (centers[:, 0] >= image_size[1]).logical_or( centers[:, 1] >= image_size[0]) init_objectness_label, init_bbox_label = self.matcher( centers, strides, targets_per_image.gt_boxes) init_objectness_label[centers_invalid] = 0 match_quality_matrix = pairwise_iou(targets_per_image.gt_boxes, Boxes(init_boxes[i])) max_qualities, _ = match_quality_matrix.max(1) max_qualities = torch.clamp(max_qualities, min=1e-5) center_score, _ = (match_quality_matrix / max_qualities[:, None]).max(0) gt_matched_idxs, bbox_mached = self.bbox_matcher( match_quality_matrix) cls_label = targets_per_image.gt_classes[gt_matched_idxs] cls_label[bbox_mached == 0] = self.num_classes cls_label[centers_invalid] = -1 refine_bbox_label = targets_per_image.gt_boxes[gt_matched_idxs] init_objectness_labels.append(init_objectness_label) init_bbox_labels.append(init_bbox_label) cls_labels.append(cls_label) refine_bbox_labels.append(refine_bbox_label.tensor) center_scores.append(center_score) init_objectness_labels = torch.stack(init_objectness_labels) init_bbox_labels = torch.stack(init_bbox_labels) refine_gt_classes = torch.stack(cls_labels) refine_reg_targets = torch.stack(refine_bbox_labels) center_scores = torch.stack(center_scores) return init_objectness_labels, init_bbox_labels, refine_gt_classes, refine_reg_targets, center_scores
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 4). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single Boxes per image anchors = [BUABoxes.cat(anchors_i) for anchors_i in self.anchors] for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ match_quality_matrix = pairwise_iou(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = self.anchor_matcher( match_quality_matrix) if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box( image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def get_nonzeroiou_unionboxes(boxes1, boxes2): iou = pairwise_iou(boxes1, boxes2) non_zero = (iou > 0).nonzero() union_boxes = [] for i in range(non_zero.shape[0]): pre_union_boxes, _ = get_union_box( Boxes(boxes1.tensor[non_zero[i][0]:non_zero[i][0] + 1]), Boxes(boxes2.tensor[non_zero[i][1]:non_zero[i][1] + 1])) union_boxes.append(pre_union_boxes.tensor) if union_boxes: union_boxes = torch.cat(union_boxes, dim=0) second_boxes = boxes2.tensor[torch.sum(iou > 0, dim=0) > 0] else: device = boxes1.tensor.device union_boxes = torch.zeros(0, 4).to(dtype=torch.float32, device=device) second_boxes = torch.zeros(0, 4).to(dtype=torch.float32, device=device) return Boxes(union_boxes), Boxes(second_boxes)
def test_pairwise_iou(self): boxes1 = torch.tensor([[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]) boxes2 = torch.tensor([ [0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 0.5, 1.0], [0.0, 0.0, 1.0, 0.5], [0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 1.0], [0.5, 0.5, 1.5, 1.5], ]) expected_ious = torch.tensor([ [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], [1.0, 0.5, 0.5, 0.25, 0.25, 0.25 / (2 - 0.25)], ]) ious = pairwise_iou(Boxes(boxes1), Boxes(boxes2)) self.assertTrue(torch.allclose(ious, expected_ious))
def drop_duplicates(outputs): instances = [i for i in range(len(outputs["instances"]))] intersect_box = [] for i in range(len(outputs["instances"].pred_boxes)): bboxes_1 = outputs["instances"].pred_boxes[i] for j in range(len(outputs["instances"].pred_boxes)): bboxes_2 = outputs["instances"].pred_boxes[j] if i != j: iou = structures.pairwise_iou(bboxes_1, bboxes_2) if iou > 0.3: if (outputs["instances"].scores[i] < outputs["instances"].scores[j]): if i not in intersect_box: intersect_box.append(i) for intersect in intersect_box: instances.remove(intersect) return instances
def label_and_sample_long_term(self, proposals, targets): """ See :class:`StROIHeads.label_and_sample_proposals`. """ gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) targets_reference_frame = targets[0] # == targets_per_image proposals_reference_frame = proposals[0] # == proposals_per_image num_gts = len(targets_reference_frame) match_quality_matrix = pairwise_iou( targets_reference_frame.gt_boxes, proposals_reference_frame.proposal_boxes) matched_idxs, matched_labels = self.long_term_proposal_matcher( match_quality_matrix) sampled_idxs = list(range(self.longterm_proposals - num_gts)) + list( range( len(proposals_reference_frame) - num_gts, len(proposals_reference_frame))) proposals_reference_frame = proposals_reference_frame[sampled_idxs] assert num_gts # We index all the attributes of targets that start with "gt_" sampled_targets = matched_idxs[sampled_idxs] matched_labels = matched_labels[sampled_idxs] for (trg_name, trg_value) in targets_reference_frame.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_reference_frame.has(trg_name): proposals_reference_frame.set(trg_name, trg_value[sampled_targets]) mask = matched_labels == 0 proposals_reference_frame.gt_id_track[mask] = -1 return proposals_reference_frame
def _merge_overlapping( boxes: Boxes, classes: torch.LongTensor, relation_indexes: torch.LongTensor, nms_threshold: float, ): # Boxes are candidate for merging if their IoU is above a threshold iou_above_thres = pairwise_iou(boxes, boxes) > nms_threshold # Also, they have to belong to the same class to be candidates. # Here we treat "person subj" and "person obj" as two # separate classes, to avoid merging cases of "person hugs person" # where the two people have high overlap but must remain separate obj_idx = relation_indexes[1] obj_is_person = classes[obj_idx] == 0 classes_tmp = classes.clone() classes_tmp[obj_idx[obj_is_person]] = -1 same_class = classes_tmp[:, None] == classes_tmp[None, :] candidates = iou_above_thres & same_class keep = [] visited = torch.full((len(boxes), ), False, dtype=torch.bool) relation_indexes = relation_indexes.clone() for old_box_idx, skip in enumerate(visited): if skip: continue new_box_idx = len(keep) keep.append(old_box_idx) matches = torch.nonzero(candidates[old_box_idx, :] & ~visited, as_tuple=True)[0] visited[matches] = True rel_idx_to_fix = torch.any( relation_indexes[:, :, None] == matches[None, None, :], dim=2) relation_indexes[rel_idx_to_fix] = new_box_idx return boxes[keep], classes[keep], relation_indexes