def _sample_proposals(self, matched_idxs, matched_labels, gt_classes): """ Based on the matching between N proposals and M groundtruth, sample the proposals and set their classification labels. Args: matched_idxs (Tensor): a vector of length N, each is the best-matched gt index in [0, M) for each proposal. matched_labels (Tensor): a vector of length N, the matcher's label (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. gt_classes (Tensor): a vector of length M. Returns: Tensor: a vector of indices of sampled proposals. Each is in [0, N). Tensor: a vector of the same length, the classification label for each sampled proposal. Each sample is labeled as either a category in [0, num_classes) or the background (num_classes). """ has_gt = gt_classes.numel() > 0 # Get the corresponding GT for each proposal if has_gt: gt_classes = gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[matched_labels == 0] = self.num_classes # Label ignore proposals (-1 label) gt_classes[matched_labels == -1] = -1 else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes sampled_fg_idxs, sampled_bg_idxs = subsample_labels( gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes) sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) return sampled_idxs, gt_classes[sampled_idxs]
def _sample_proposals( self, matched_idxs: torch.Tensor, matched_labels: torch.Tensor, gt_classes: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: """ Based on the matching between N proposals and M groundtruth, sample the proposals and set their classification labels. Args: matched_idxs (Tensor): a vector of length N, each is the best-matched gt index in [0, M) for each proposal. matched_labels (Tensor): a vector of length N, the matcher's label (one of cfg.MODEL.ROI_HEADS.IOU_LABELS) for each proposal. gt_classes (Tensor): a vector of length M. Returns: Tensor: a vector of indices of sampled proposals. Each is in [0, N). Tensor: a vector of the same length, the classification label for each sampled proposal. Each sample is labeled as either a category in [0, num_classes) or the background (num_classes). """ if self.random_sample_size: diff = self.random_sample_size_upper_bound - self.random_sample_size_lower_bound sample_factor = self.random_sample_size_upper_bound - np.random.rand( 1)[0] * diff nms_topk = int(matched_idxs.shape[0] * sample_factor) matched_idxs = matched_idxs[:nms_topk] matched_labels = matched_labels[:nms_topk] has_gt = gt_classes.numel() > 0 # Get the corresponding GT for each proposal if has_gt: gt_classes = gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[matched_labels == 0] = self.num_classes # Label ignore proposals (-1 label) gt_classes[matched_labels == -1] = -1 else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes sampled_fg_idxs, sampled_bg_idxs = subsample_labels( gt_classes, self.batch_size_per_image, self.positive_fraction, self.num_classes) sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) if self.random_proposal_drop: diff = self.random_proposal_drop_upper_bound - self.random_proposal_drop_lower_bound sample_factor = self.random_proposal_drop_upper_bound - np.random.rand( 1)[0] * diff nms_topk = int(sampled_idxs.shape[0] * sample_factor) subsample_idxs = np.random.choice(sampled_idxs.shape[0], nms_topk, replace=False) subsample_idxs = torch.from_numpy(subsample_idxs).to( sampled_idxs.device) sampled_idxs = sampled_idxs[subsample_idxs] return sampled_idxs, gt_classes[sampled_idxs]
def resample(label): """ Randomly sample a subset of positive and negative examples by overwriting the label vector to the ignore value (-1) for all elements that are not included in the sample. """ pos_idx, neg_idx = subsample_labels(label, self.batch_size_per_image, self.positive_fraction, 0) # Fill with the ignore label (-1), then set positive and negative labels label.fill_(-1) label.scatter_(0, pos_idx, 1) label.scatter_(0, neg_idx, 0) return label
def _subsample_labels(self, label): """ Randomly sample a subset of positive and negative examples, and overwrite the label vector to the ignore value (-1) for all elements that are not included in the sample. Args: labels (Tensor): a vector of -1, 0, 1. Will be modified in-place and returned. """ pos_idx, neg_idx = subsample_labels(label, self.batch_size_per_image, self.positive_fraction, 0) # Fill with the ignore label (-1), then set positive and negative labels label.fill_(-1) label.scatter_(0, pos_idx, 1) label.scatter_(0, neg_idx, 0) return label
def _sample_proposals(self, matched_idxs, matched_labels, gt_classes): """ modified from roi_heads """ has_gt = gt_classes.numel() > 0 # Get the corresponding GT for each proposal if has_gt: gt_classes = gt_classes[matched_idxs] # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[matched_labels == 0] = self.num_classes # Label ignore proposals (-1 label) gt_classes[matched_labels == -1] = -1 else: gt_classes = torch.zeros_like(matched_idxs) + self.num_classes sampled_fg_idxs, sampled_bg_idxs = subsample_labels( gt_classes, self.batch_size_per_image, self.positive_sample_fraction, self.num_classes ) sampled_idxs = torch.cat([sampled_fg_idxs, sampled_bg_idxs], dim=0) return sampled_idxs, gt_classes[sampled_idxs]
def label_and_sample_proposals(self, proposals, targets, isassociation=False): """ Prepare some proposals to be used to train the ROI heads. It performs box matching between `proposals` and `targets`, and assigns training labels to the proposals. It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, with a fraction of positives that is no larger than `self.positive_sample_fraction. Args: See :meth:`ROIHeads.forward` Returns: list[Instances]: length `N` list of `Instances`s containing the proposals sampled for training. Each `Instances` has the following fields: - proposal_boxes: the proposal boxes - gt_boxes: the ground-truth box that the proposal is assigned to (this is only meaningful if the proposal has a label > 0; if label = 0 then the ground-truth box is random) Other fields such as "gt_classes", "gt_masks", that's included in `targets`. """ gt_boxes = [x.gt_boxes for x in targets] if targets[0].has('gt_light'): gt_light = [x.gt_light for x in targets] # Augment proposals with ground-truth boxes. # In the case of learned proposals (e.g., RPN), when training starts # the proposals will be low quality due to random initialization. # It's possible that none of these initial # proposals have high enough overlap with the gt objects to be used # as positive examples for the second stage components (box head, # cls head, mask head). Adding the gt boxes to the set of proposals # ensures that the second stage components will have some positive # examples from the start of training. For RPN, this augmentation improves # convergence and empirically improves box AP on COCO by about 0.5 # points (under one tested configuration). if self.proposal_append_gt: proposals = add_ground_truth_to_proposals( gt_boxes, proposals, gt_light) else: gt_light = None if self.proposal_append_gt: proposals = add_ground_truth_to_proposals(gt_boxes, proposals) proposals_with_gt = [] num_fg_samples = [] num_bg_samples = [] for proposals_per_image, targets_per_image in zip(proposals, targets): has_gt = len(targets_per_image) > 0 match_quality_matrix = pairwise_iou( targets_per_image.gt_boxes, proposals_per_image.proposal_boxes) matched_idxs, proposals_labels = self.proposal_matcher( match_quality_matrix) if isassociation: num_classes = self.num_classes - 1 else: num_classes = self.num_classes # Get the corresponding GT for each proposal if has_gt: gt_classes = targets_per_image.gt_classes[matched_idxs] # print(gt_classes) # Label unmatched proposals (0 label from matcher) as background (label=num_classes) gt_classes[proposals_labels == 0] = num_classes # Label ignore proposals (-1 label) gt_classes[proposals_labels == -1] = -1 else: gt_classes = torch.zeros_like(matched_idxs) + num_classes sampled_fg_inds, sampled_bg_inds = subsample_labels( gt_classes, self.batch_size_per_image, self.positive_sample_fraction, num_classes, ) sampled_inds = torch.cat([sampled_fg_inds, sampled_bg_inds], dim=0) proposals_per_image = proposals_per_image[sampled_inds] proposals_per_image.gt_classes = gt_classes[sampled_inds] # We index all the attributes of targets that start with "gt_" # and have not been added to proposals yet (="gt_classes"). if has_gt: sampled_targets = matched_idxs[sampled_inds] # NOTE: here the indexing waste some compute, because heads # like masks, keypoints, etc, will filter the proposals again, # (by foreground/background, or number of keypoints in the image, etc) # so we essentially index the data twice. for (trg_name, trg_value) in targets_per_image.get_fields().items(): if trg_name.startswith( "gt_") and not proposals_per_image.has(trg_name): proposals_per_image.set(trg_name, trg_value[sampled_targets]) else: gt_boxes = Boxes( targets_per_image.gt_boxes.tensor.new_zeros( (len(sampled_inds), 4))) proposals_per_image.gt_boxes = gt_boxes if gt_light != None: gt_light = Boxes( targets_per_image.gt_light.tensor.new_zeros( (len(sampled_inds), 4))) proposal_per_image.gt_light = gt_light num_fg_samples.append(sampled_fg_inds.numel()) num_bg_samples.append(sampled_bg_inds.numel()) proposals_with_gt.append(proposals_per_image) # Log the number of fg/bg samples that are selected for training ROI heads storage = get_event_storage() storage.put_scalar("roi_head/num_fg_samples", np.mean(num_fg_samples)) storage.put_scalar("roi_head/num_bg_samples", np.mean(num_bg_samples)) return proposals_with_gt