def __call__(self, proposals_with_targets): """ Filters proposals with targets to keep only the ones relevant for DensePose training proposals: list(Instances), each element of the list corresponds to various instances (proposals, GT for boxes and densepose) for one image """ proposals_filtered = [] for proposals_per_image in proposals_with_targets: if not hasattr(proposals_per_image, "gt_densepose"): continue assert hasattr(proposals_per_image, "gt_boxes") assert hasattr(proposals_per_image, "proposal_boxes") gt_boxes = proposals_per_image.gt_boxes est_boxes = proposals_per_image.proposal_boxes # apply match threshold for densepose head iou = matched_boxlist_iou(gt_boxes, est_boxes) iou_select = iou > self.iou_threshold proposals_per_image = proposals_per_image[iou_select] assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) # filter out any target without densepose annotation gt_densepose = proposals_per_image.gt_densepose assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose) selected_indices = [ i for i, dp_target in enumerate(gt_densepose) if dp_target is not None ] if len(selected_indices) != len(gt_densepose): proposals_per_image = proposals_per_image[selected_indices] assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes) assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose) proposals_filtered.append(proposals_per_image) return proposals_filtered
def select_proposals_with_visible_keypoints(proposals): """ Args: proposals (list[Instances]): a list of N Instances, where N is the number of images. Returns: proposals: only contains proposals with at least one visible keypoint. Note that this is still slightly different from Detectron. In Detectron, proposals for training keypoint head are re-sampled from all the proposals with IOU>threshold & >=1 visible keypoint. Here, the proposals are first sampled from all proposals with IOU>threshold, then proposals with no visible keypoint are filtered out. This strategy seems to make no difference on Detectron and is easier to implement. """ ret = [] all_num_fg = [] for proposals_per_image in proposals: gt_boxes = proposals_per_image.gt_boxes est_boxes = proposals_per_image.proposal_boxes iou = matched_boxlist_iou(gt_boxes, est_boxes) iou_select = iou > 0.55 # self.iou_threshold proposals_per_image = proposals_per_image[iou_select] # #fg x K x 3 gt_keypoints = proposals_per_image.gt_keypoints.tensor vis_mask = gt_keypoints[:, :, 2] >= 1 xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1] proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze( dim=1) # #fg x 1 x 4 kp_in_box = ((xs >= proposal_boxes[:, :, 0]) & (xs <= proposal_boxes[:, :, 2]) & (ys >= proposal_boxes[:, :, 1]) & (ys <= proposal_boxes[:, :, 3])) selection = (kp_in_box & vis_mask).any(dim=1) selection_idxs = torch.nonzero(selection).squeeze(1) all_num_fg.append(selection_idxs.numel()) ret.append(proposals_per_image[selection_idxs]) storage = get_event_storage() storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg)) return ret
def __call__(self, features: List[torch.Tensor], proposals_with_targets: List[Instances]): """ Filters proposals with targets to keep only the ones relevant for DensePose training Args: features (list[Tensor]): input data as a list of features, each feature is a tensor. Axis 0 represents the number of images `N` in the input data; axes 1-3 are channels, height, and width, which may vary between features (e.g., if a feature pyramid is used). proposals_with_targets (list[Instances]): length `N` list of `Instances`. The i-th `Instances` contains instances (proposals, GT) for the i-th input image, Returns: list[Tensor]: filtered features list[Instances]: filtered proposals """ proposals_filtered = [] # TODO: the commented out code was supposed to correctly deal with situations # where no valid DensePose GT is available for certain images. The corresponding # image features were sliced and proposals were filtered. This led to performance # deterioration, both in terms of runtime and in terms of evaluation results. # # feature_mask = torch.ones( # len(proposals_with_targets), # dtype=torch.bool, # device=features[0].device if len(features) > 0 else torch.device("cpu"), # ) for i, proposals_per_image in enumerate(proposals_with_targets): if not proposals_per_image.has("gt_densepose") and ( not proposals_per_image.has("gt_masks") or not self.keep_masks): # feature_mask[i] = 0 continue gt_boxes = proposals_per_image.gt_boxes est_boxes = proposals_per_image.proposal_boxes # apply match threshold for densepose head iou = matched_boxlist_iou(gt_boxes, est_boxes) iou_select = iou > self.iou_threshold proposals_per_image = proposals_per_image[iou_select] N_gt_boxes = len(proposals_per_image.gt_boxes) assert N_gt_boxes == len(proposals_per_image.proposal_boxes), ( f"The number of GT boxes {N_gt_boxes} is different from the " f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}" ) # filter out any target without suitable annotation if self.keep_masks: gt_masks = (proposals_per_image.gt_masks if hasattr( proposals_per_image, "gt_masks") else [None] * N_gt_boxes) else: gt_masks = [None] * N_gt_boxes gt_densepose = (proposals_per_image.gt_densepose if hasattr( proposals_per_image, "gt_densepose") else [None] * N_gt_boxes) assert len(gt_masks) == N_gt_boxes assert len(gt_densepose) == N_gt_boxes selected_indices = [ i for i, (dp_target, mask_target) in enumerate(zip(gt_densepose, gt_masks)) if (dp_target is not None) or (mask_target is not None) ] # if not len(selected_indices): # feature_mask[i] = 0 # continue if len(selected_indices) != N_gt_boxes: proposals_per_image = proposals_per_image[selected_indices] assert len(proposals_per_image.gt_boxes) == len( proposals_per_image.proposal_boxes) proposals_filtered.append(proposals_per_image) # features_filtered = [feature[feature_mask] for feature in features] # return features_filtered, proposals_filtered return features, proposals_filtered
def __call__(self, proposals_with_targets): """ Filters proposals with targets to keep only the ones relevant for DensePose training proposals: list(Instances), each element of the list corresponds to various instances (proposals, GT for boxes and densepose) for one image """ selection_masks = [] # MY CODE proposals_filtered = [] for proposals_per_image in proposals_with_targets: if not hasattr(proposals_per_image, "gt_densepose"): continue assert hasattr(proposals_per_image, "gt_boxes") assert hasattr(proposals_per_image, "proposal_boxes") gt_boxes = proposals_per_image.gt_boxes est_boxes = proposals_per_image.proposal_boxes # apply match threshold for densepose head iou = matched_boxlist_iou(gt_boxes, est_boxes) iou_select = iou > self.iou_threshold proposals_per_image = proposals_per_image[iou_select] assert len(proposals_per_image.gt_boxes) == len( proposals_per_image.proposal_boxes) # filter out any target without densepose annotation gt_densepose = proposals_per_image.gt_densepose assert len(proposals_per_image.gt_boxes) == len( proposals_per_image.gt_densepose) selected_indices = [ i for i, dp_target in enumerate(gt_densepose) if dp_target is not None ] if len(selected_indices) != len(gt_densepose): proposals_per_image = proposals_per_image[selected_indices] # if proposals_per_image.proposal_boxes.tensor.size(0) == 0: if len(proposals_per_image) == 0: mask = iou > 9000. # just big number > 100 selection_masks.append(mask) continue assert len(proposals_per_image.gt_boxes) == len( proposals_per_image.proposal_boxes) assert len(proposals_per_image.gt_boxes) == len( proposals_per_image.gt_densepose) proposals_filtered.append(proposals_per_image) mask = copy.deepcopy(iou_select) i = 0 selected_num = 0 for j in range(len(iou_select)): if iou_select[j]: if i not in selected_indices: mask[j] = False else: selected_num += 1 i += 1 assert selected_num == len(selected_indices) selection_masks.append(mask) return proposals_filtered, selection_masks
def forward(self, batched_inputs): """ Args: batched_inputs: a list, batched outputs of :class:`DatasetMapper` . Each item in the list contains the inputs for one image. For now, each item in the list is a dict that contains: * image: Tensor, image in (C, H, W) format. * instances (optional): groundtruth :class:`Instances` * proposals (optional): :class:`Instances`, precomputed proposals. Other information that's included in the original dicts, such as: * "height", "width" (int): the output resolution of the model, used in inference. See :meth:`postprocess` for details. Returns: list[dict]: Each dict is the output for one input image. The dict contains one key "instances" whose value is a :class:`Instances`. The :class:`Instances` object has the following keys: "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints" """ if not self.training: return self.inference(batched_inputs) images = self.preprocess_image(batched_inputs) if "instances" in batched_inputs[0]: gt_instances = [ x["instances"].to(self.device) for x in batched_inputs ] elif "targets" in batched_inputs[0]: log_first_n( logging.WARN, "'targets' in the model inputs is now renamed to 'instances'!", n=10) gt_instances = [ x["targets"].to(self.device) for x in batched_inputs ] else: gt_instances = None features = self.backbone(images.tensor) proposals, proposal_losses = self.proposal_generator( images, features, gt_instances) _, outputs_classic, outputs = self.tsd(images, features, proposals, gt_instances) detector_classic_losses = outputs_classic.losses() detector_losses = outputs.losses() detector_classic_losses[ 'loss_cls_classic'] = detector_classic_losses.pop('loss_cls') detector_classic_losses[ 'loss_box_reg_classic'] = detector_classic_losses.pop( 'loss_box_reg') if self.vis_period > 0: storage = get_event_storage() if storage.iter % self.vis_period == 0: self.visualize_training(batched_inputs, proposals) # Progressive constraints margin_regression_losses = 0 predict_boxes_classic = outputs_classic.predict_boxes_for_gt_classes() predict_boxes = outputs.predict_boxes_for_gt_classes() idx = -1 endIdx = 0 ind = outputs.gt_classes != (outputs.pred_proposal_deltas.size(1) / 4) for pbc, pb in zip(predict_boxes_classic, predict_boxes): idx += 1 startIdx = endIdx endIdx += outputs.num_preds_per_image[idx] iind = ind[startIdx:endIdx] margin_regression_losses += F.relu(self.MR - abs( matched_boxlist_iou(Boxes(pbc[iind]), outputs.gt_boxes[startIdx:endIdx][iind]) - matched_boxlist_iou(Boxes(pb[iind]), outputs. gt_boxes[startIdx:endIdx][iind]))).mean() margin_regression_losses = margin_regression_losses / len( predict_boxes) margin_classification_losses = 0 for ppc, pc in zip(outputs_classic.predict_probs(), outputs.predict_probs()): margin_classification_losses += F.relu(self.MC - (abs(ppc - pc)).sum(1)).mean() margin_classification_losses = margin_classification_losses / len( outputs.predict_probs()) losses = {} losses.update(detector_classic_losses) losses.update(detector_losses) losses.update(proposal_losses) losses.update({ 'loss_margin_classification': margin_classification_losses, 'loss_margin_regression': margin_regression_losses }) return losses