def label_and_sample_proposals(self, proposals, targets): """ Prepare some proposals to be used to train the RROI heads. It performs box matching between `proposals` and `targets`, and assigns training labels to the proposals. It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than `self.positive_sample_fraction. Args: See :meth:`StandardROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the rotated proposal boxes - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) - gt_classes: the ground-truth classification lable for each proposal """ gt_boxes = [x.gt_boxes for x in targets] if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) matched_idxs, matched_labels = self.proposal_matcher(match_quality_matrix) sampled_idxs, gt_classes = self._sample_proposals( matched_idxs, matched_labels, targets_per_image.gt_classes ) proposals_per_image = proposals_per_image[sampled_idxs] proposals_per_image.gt_classes = gt_classes if has_gt: sampled_targets = matched_idxs[sampled_idxs] proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] else: gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_idxs), 5)) ) proposals_per_image.gt_boxes = gt_boxes num_bg_samples.append((gt_classes == self.num_classes).sum().item()) num_fg_samples.append(gt_classes.numel() - num_bg_samples[-1]) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt
def compute_iou_dt_gt(self, dt, gt, is_crowd): if self.is_rotated(dt) or self.is_rotated(gt): # TODO: take is_crowd into consideration assert all(c == 0 for c in is_crowd) dt = RotatedBoxes(self.boxlist_to_tensor(dt, output_box_dim=5)) gt = RotatedBoxes(self.boxlist_to_tensor(gt, output_box_dim=5)) return pairwise_iou_rotated(dt, gt) else: # This is the same as the classical COCO evaluation return maskUtils.iou(dt, gt, is_crowd)
def _get_ground_truth(self): """ Returns: gt_objectness_logits: list of N tensors. Tensor i is a vector whose length is the total number of anchors in image i (i.e., len(anchors[i])). Label values are in {-1, 0, 1}, with meanings: -1 = ignore; 0 = negative class; 1 = positive class. gt_anchor_deltas: list of N tensors. Tensor i has shape (len(anchors[i]), 5). """ gt_objectness_logits = [] gt_anchor_deltas = [] # Concatenate anchors from all feature maps into a single RotatedBoxes per image anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in self.anchors] for image_size_i, anchors_i, gt_boxes_i in zip(self.image_sizes, anchors, self.gt_boxes): """ image_size_i: (h, w) for the i-th image anchors_i: anchors for i-th image gt_boxes_i: ground-truth boxes for i-th image """ # DEBUG #assert torch.all(gt_boxes_i.tensor[:,2] > 1e-5) #assert torch.all(gt_boxes_i.tensor[:,3] > 1e-5) #assert torch.all(anchors_i.tensor[:,2] > 1e-5) #assert torch.all(anchors_i.tensor[:,3] > 1e-5) match_quality_matrix = pairwise_iou_rotated(gt_boxes_i, anchors_i) matched_idxs, gt_objectness_logits_i = self.anchor_matcher( match_quality_matrix) if self.boundary_threshold >= 0: # Discard anchors that go out of the boundaries of the image # NOTE: This is legacy functionality that is turned off by default in Detectron2 anchors_inside_image = anchors_i.inside_box( image_size_i, self.boundary_threshold) gt_objectness_logits_i[~anchors_inside_image] = -1 if len(gt_boxes_i) == 0: # These values won't be used anyway since the anchor is labeled as background gt_anchor_deltas_i = torch.zeros_like(anchors_i.tensor) else: # TODO wasted computation for ignored boxes matched_gt_boxes = gt_boxes_i[matched_idxs] gt_anchor_deltas_i = self.box2box_transform.get_deltas( anchors_i.tensor, matched_gt_boxes.tensor) gt_objectness_logits.append(gt_objectness_logits_i) gt_anchor_deltas.append(gt_anchor_deltas_i) return gt_objectness_logits, gt_anchor_deltas
def _match_and_label_boxes(self, proposals, stage, targets): """ Match proposals with groundtruth using the matcher at the given stage. Label the proposals as foreground or background based on the match. Args: proposals (list[Instances]): One Instances for each image, with the field "proposal_boxes". stage (int): the current stage targets (list[Instances]): the ground truth instances Returns: list[Instances]: the same proposals, but with fields "gt_classes" and "gt_boxes" """ num_fg_samples, num_bg_samples = [], [] for proposals_per_image, targets_per_image in zip(proposals, targets): match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) # proposal_labels are 0 or 1 matched_idxs, proposal_labels = self.proposal_matchers[stage](match_quality_matrix) if len(targets_per_image) > 0: gt_classes = targets_per_image.gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposal_labels == 0] = self.num_classes gt_boxes = targets_per_image.gt_boxes[matched_idxs] else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros((len(proposals_per_image), 4)) ) proposals_per_image.gt_classes = gt_classes proposals_per_image.gt_boxes = gt_boxes num_fg_samples.append((proposal_labels == 1).sum().item()) num_bg_samples.append(proposal_labels.numel() - num_fg_samples[-1]) # Log the number of fg/bg samples in each stage storage = get_event_storage() storage.put_scalar( "stage{}/roi_head/num_fg_samples".format(stage), sum(num_fg_samples) / len(num_fg_samples), ) storage.put_scalar( "stage{}/roi_head/num_bg_samples".format(stage), sum(num_bg_samples) / len(num_bg_samples), ) return proposals
def computeIoU(self, imgId, catId): p = self.params if p.useCats: gt = self._gts[imgId, catId] dt = self._dts[imgId, catId] else: gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]] dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]] if len(gt) == 0 and len(dt) == 0: return [] inds = np.argsort([-d['score'] for d in dt], kind='mergesort') dt = [dt[i] for i in inds] if len(dt) > p.maxDets[-1]: dt = dt[0:p.maxDets[-1]] ious = np.zeros((len(dt), len(gt))) for j, g in enumerate(gt): for i, d in enumerate(dt): # create bounds for ignore regions(double the gt bbox) gt_rotated_box = RotatedBoxes( torch.tensor(g['bbox'], dtype=torch.float).view(-1, 5)) dt_rotated_box = RotatedBoxes( torch.tensor(d['bbox'], dtype=torch.float).view(-1, 5)) ious[i, j] = pairwise_iou_rotated(gt_rotated_box, dt_rotated_box) del gt_rotated_box, dt_rotated_box # if p.iouType == 'segm': # g = [g['segmentation'] for g in gt] # d = [d['segmentation'] for d in dt] # elif p.iouType == 'bbox': # g = [g['bbox'] for g in gt] # d = [d['bbox'] for d in dt] # else: # raise Exception('unknown iouType for iou computation') # # # compute iou between each dt and gt region # iscrowd = [int(o['iscrowd']) for o in gt] # ious = maskUtils.iou(d,g,iscrowd) del gt, dt return ious
def label_and_sample_proposals(self, proposals, targets): """ Prepare some proposals to be used to train the RROI heads. It performs box matching between `proposals` and `targets`, and assign training labels to the lproposals. It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than `self.positive_sample_fraction. Args: See :meth:`StandardROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the proposal rotated boxes - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) - other fields such as "gt_classes" and "gt_masks" that are included in `targets`. """ gt_boxes = [x.gt_boxes for x in targets] # Augment proposals with ground-truth boxes. # In the case of learned proposals (e.g., RPN), in the beginning of training # the proposals are of low quality due to random initialization. # It's possible that none of these initial # proposals have high enough overlap with the gt objects to be used # as positive examples for the second stage components (box head, # cls head, mask head). Adding the gt boxes to the set of proposals # ensures that the second stage components will have some positive # examples from the start of training. For RPN, this augmentation improves # convergence and empirically improves box AP on COCO by about 0.5 # points (under one tested configuration). proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes ) matched_idxs, proposals_labels = self.proposal_matcher(match_quality_matrix) # Get the corresponding GT for each proposal if has_gt: gt_classes = targets_per_image.gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposals_labels == 0] = self.num_classes # Label ignore proposals (-1 label) gt_classes[proposals_labels == -1] = -1 else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes sampled_fg_inds, sampled_bg_inds = subsample_labels( gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes, ) sampled_inds = torch.cat([sampled_fg_inds, sampled_bg_inds], dim=0) proposals_per_image = proposals_per_image[sampled_inds] proposals_per_image.gt_classes = gt_classes[sampled_inds] if has_gt: sampled_targets = matched_idxs[sampled_inds] proposals_per_image.gt_boxes = targets_per_image.gt_boxes[sampled_targets] else: gt_boxes = RotatedBoxes( targets_per_image.gt_boxes.tensor.new_zeros((len(sampled_inds), 5)) ) proposals_per_image.gt_boxes = gt_boxes num_fg_samples.append(sampled_fg_inds.numel()) num_bg_samples.append(sampled_bg_inds.numel()) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP" and "AP50". """ OVTHRES = 0.25 # TODO: make this configurable ANGLEMAX = 30 def load_grasps(path): # TODO: duplicate code, see dataloader with open(path) as f: for i, line in enumerate(f): # careful: potential mistake in jacquard format description on website, jaw and opening interchanged! xc, yc, a, jaw, opening = [float(v) for v in line[:-1].split(';')] # jaw = h, opening = w according to jacquard paper yield (xc, yc, opening, jaw, -a) comm.synchronize() if not comm.is_main_process(): return mAP, mPrec, mRec, mAcc = 0, 0, 0, 0 nTotal = len(self._predictions) mTps, mFps = 0,0 for pred in self._predictions: file_name, scores, boxes, classes = pred boxes_gt = RotatedBoxes(list(load_grasps(file_name))) # init true positives, false positives tps, fps = [], [] # sort by confidence/score boxes = boxes[np.argsort(-scores, kind='mergesort')] TOP_N = 1 for j in range(TOP_N): box = boxes[j] angle = box.tensor.squeeze()[2] sector = classes[j] ovmax = float('-inf') for k in range(len(boxes_gt)): box_gt = boxes_gt[k] angle_gt = box_gt.tensor.squeeze()[2] print(sector*10, angle_gt) # compute iou on GPU iou = pairwise_iou_rotated(box, box_gt) # TODO: assumes len(gts)>len(scores) # get best match max_iou = torch.max(iou) ovmax = max((ovmax, max_iou)) if ovmax > OVTHRES and abs(angle-angle_gt) <= ANGLEMAX: tps.append(1) fps.append(0) mTps += 1 else: fps.append(1) tps.append(0) mFps += 1 # compute precision and recall fp = np.cumsum(np.array(fps)) tp = np.cumsum(np.array(tps)) rec = tp / np.maximum(TOP_N, torch.finfo(torch.float64).eps) # avoid divide by zero #brec = tp / np.maximum(len(boxes_gt), torch.finfo(torch.float64).eps) # avoid divide by zero prec = tp / np.maximum(tp + fp, torch.finfo(torch.float64).eps) # avoid divide by zero # let pascal voc compute ap ap = voc_ap(rec, prec) mAP += ap / nTotal acc = mTps / (mTps+mFps) ret = OrderedDict() ret["grasp"] = {"mAP": mAP*100, "mAcc:": acc*100} # TODO: add segm return ret
def get_ground_truth(self, anchors, targets): """ Args: anchors (list[list[Boxes]]): a list of N=#image elements. Each is a list of #feature level Boxes. The Boxes contains anchors of this image on the specific feature level. targets (list[Instances]): a list of N `Instances`s. The i-th `Instances` contains the ground-truth per-instance annotations for the i-th input image. Specify `targets` during training only. Returns: gt_classes (Tensor): An integer tensor of shape (N, R) storing ground-truth labels for each anchor. R is the total number of anchors, i.e. the sum of Hi x Wi x A for all levels. Anchors with an IoU with some target higher than the foreground threshold are assigned their corresponding label in the [0, K-1] range. Anchors whose IoU are below the background threshold are assigned the label "K". Anchors whose IoU are between the foreground and background thresholds are assigned a label "-1", i.e. ignore. gt_anchors_deltas (Tensor): Shape (N, R, 4). The last dimension represents ground-truth box2box transform targets (dx, dy, dw, dh) that map each anchor to its matched ground-truth box. The values in the tensor are meaningful only when the corresponding anchor is labeled as foreground. """ gt_classes = [] gt_anchors_deltas = [] anchors = [RotatedBoxes.cat(anchors_i) for anchors_i in anchors] # list[Tensor(R, 4)], one for each image for anchors_per_image, targets_per_image in zip(anchors, targets): match_quality_matrix = pairwise_iou_rotated( targets_per_image.gt_boxes, anchors_per_image) # adjust the scores of 'relation' and 'complexes' cases in the matrix # gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix, targets_per_image, anchors_per_image) gt_matched_idxs, anchor_labels = self.matcher(match_quality_matrix) # ground truth box regression matched_gt_boxes = targets_per_image[gt_matched_idxs].gt_boxes gt_anchors_reg_deltas_i = self.box2box_transform.get_deltas( anchors_per_image.tensor, matched_gt_boxes.tensor) # ground truth classes has_gt = len(targets_per_image) > 0 if has_gt: gt_classes_i = targets_per_image.gt_classes[gt_matched_idxs] # Anchors with label 0 are treated as background. gt_classes_i[anchor_labels == 0] = self.num_classes # Anchors with label -1 are ignored. gt_classes_i[anchor_labels == -1] = -1 else: gt_classes_i = torch.zeros_like( gt_matched_idxs) + self.num_classes gt_classes.append(gt_classes_i) gt_anchors_deltas.append(gt_anchors_reg_deltas_i) del anchors return torch.stack(gt_classes), torch.stack(gt_anchors_deltas)
def evaluate(self): """ Returns: dict: has a key "segm", whose value is a dict of "AP" and "AP50". """ OVTHRES = 0.25 # TODO: make this configurable ANGLEMAX = 30 comm.synchronize() if not comm.is_main_process(): return mAP, mPrec, mRec, mAcc = 0, 0, 0, 0 nTotal = len(self._predictions) mTps, mFps, mTns, mFns = 0, 0, 0, 0 for pred in self._predictions: file_name, neg_file_name, scores, boxes, classes = pred boxes_gt = None neg_boxes_gt = None with open(file_name) as f: boxes_gt = RotatedBoxes(list(Grasp.load_grasps_plain(f))) with open(neg_file_name) as f: neg_boxes_gt = RotatedBoxes(list(Grasp.load_grasps_plain(f))) # init true positives, false positives, true negatives, false negatives tps, fps, tns, fns = [], [], [], [] # sort by confidence/score boxes = boxes[np.argsort(-scores, kind='mergesort')] TOP_N = 1 for j in range(TOP_N): box = boxes[j] angle = box.tensor.squeeze()[2] class_ = classes[j] if class_ == 0: #grasp ovmax = float('-inf') for k in range(len(boxes_gt)): box_gt = boxes_gt[k] angle_gt = box_gt.tensor.squeeze()[2] #print(sector*10, angle_gt) # compute iou on GPU iou = pairwise_iou_rotated( box, box_gt) # TODO: assumes len(gts)>len(scores) # get best match max_iou = torch.max(iou) ovmax = max((ovmax, max_iou)) if ovmax > OVTHRES and abs(angle - angle_gt) <= ANGLEMAX: tps.append(1) fps.append(0) tns.append(0) fns.append(0) mTps += 1 else: tps.append(0) fps.append(1) tns.append(0) fns.append(0) mFps += 1 else: ovmax = float('-inf') for k in range(len(neg_boxes_gt)): box_gt = neg_boxes_gt[k] angle_gt = box_gt.tensor.squeeze()[2] #print(sector*10, angle_gt) # compute iou on GPU iou = pairwise_iou_rotated( box, box_gt) # TODO: assumes len(gts)>len(scores) # get best match max_iou = torch.max(iou) ovmax = max((ovmax, max_iou)) if ovmax > OVTHRES: # and abs(angle-angle_gt) <= ANGLEMAX: tps.append(0) fps.append(0) tns.append(1) fns.append(0) mTns += 1 else: tps.append(0) fps.append(0) tns.append(0) fns.append(1) mFns += 1 # compute precision and recall fp = np.cumsum(np.array(fps)) tp = np.cumsum(np.array(tps)) fn = np.cumsum(np.array(fns)) rec = tp / np.maximum( tp + fn, torch.finfo(torch.float64).eps) # avoid divide by zero #brec = tp / np.maximum(len(boxes_gt), torch.finfo(torch.float64).eps) # avoid divide by zero prec = tp / np.maximum( tp + fp, torch.finfo(torch.float64).eps) # avoid divide by zero # let pascal voc compute ap ap = voc_ap(rec, prec) mAP += ap / nTotal acc = (mTps + mTns) / (mTps + mFps + mTns + mFns) what = mTps / (mTps + mFps) ret = OrderedDict() ret["grasp"] = { "mAP": mAP * 100, "mAcc:": acc * 100, "mWhatever": what * 100 } # TODO: add segm return ret
def _evaluate_rotated_box_proposals(dataset_predictions, coco_api, thresholds=None, area="all", limit=None): """ Evaluate detection proposal recall metrics. This function is a much faster alternative to the official COCO API recall evaluation code. However, it produces slightly different results. """ # Record max overlap value for each gt box # Return vector of overlap values areas = { "all": 0, "small": 1, "medium": 2, "large": 3, "96-128": 4, "128-256": 5, "256-512": 6, "512-inf": 7, } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], ] # 512-inf assert area in areas, "Unknown area range: {}".format(area) area_range = area_ranges[areas[area]] gt_overlaps = [] num_pos = 0 for prediction_dict in dataset_predictions: predictions = prediction_dict["proposals"] # sort predictions in descending order # TODO maybe remove this and make it explicit in the documentation inds = predictions.objectness_logits.sort(descending=True)[1] predictions = predictions[inds] ann_ids = coco_api.getAnnIds(imgIds=prediction_dict["image_id"]) anno = coco_api.loadAnns(ann_ids) gt_boxes = [ BoxMode.convert(obj["bbox"], BoxMode.XYWHA_ABS, BoxMode.XYWHA_ABS) for obj in anno if obj["iscrowd"] == 0 ] gt_boxes = torch.as_tensor(gt_boxes).reshape( -1, 5) # guard against no boxes gt_boxes = RotatedBoxes(gt_boxes) gt_areas = torch.as_tensor( [obj["area"] for obj in anno if obj["iscrowd"] == 0]) if len(gt_boxes) == 0 or len(predictions) == 0: continue valid_gt_inds = (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]) gt_boxes = gt_boxes[valid_gt_inds] num_pos += len(gt_boxes) if len(gt_boxes) == 0: continue if limit is not None and len(predictions) > limit: predictions = predictions[:limit] overlaps = pairwise_iou_rotated(predictions.proposal_boxes, gt_boxes) _gt_overlaps = torch.zeros(len(gt_boxes)) for j in range(min(len(predictions), len(gt_boxes))): # find which proposal box maximally covers each gt box # and get the iou amount of coverage for each gt box max_overlaps, argmax_overlaps = overlaps.max(dim=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ovr, gt_ind = max_overlaps.max(dim=0) assert gt_ovr >= 0 # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert _gt_overlaps[j] == gt_ovr # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps.append(_gt_overlaps) gt_overlaps = (torch.cat(gt_overlaps, dim=0) if len(gt_overlaps) else torch.zeros(0, dtype=torch.float32)) gt_overlaps, _ = torch.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = torch.arange(0.5, 0.95 + 1e-5, step, dtype=torch.float32) recalls = torch.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).float().sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { "ar": ar, "recalls": recalls, "thresholds": thresholds, "gt_overlaps": gt_overlaps, "num_pos": num_pos, }