예제 #1
0
 def __call__(self, proposals_with_targets):
     """
     Filters proposals with targets to keep only the ones relevant for
     DensePose training
     proposals: list(Instances), each element of the list corresponds to
         various instances (proposals, GT for boxes and densepose) for one
         image
     """
     proposals_filtered = []
     for proposals_per_image in proposals_with_targets:
         if not hasattr(proposals_per_image, "gt_densepose"):
             continue
         assert hasattr(proposals_per_image, "gt_boxes")
         assert hasattr(proposals_per_image, "proposal_boxes")
         gt_boxes = proposals_per_image.gt_boxes
         est_boxes = proposals_per_image.proposal_boxes
         # apply match threshold for densepose head
         iou = matched_boxlist_iou(gt_boxes, est_boxes)
         iou_select = iou > self.iou_threshold
         proposals_per_image = proposals_per_image[iou_select]
         assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
         # filter out any target without densepose annotation
         gt_densepose = proposals_per_image.gt_densepose
         assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
         selected_indices = [
             i for i, dp_target in enumerate(gt_densepose) if dp_target is not None
         ]
         if len(selected_indices) != len(gt_densepose):
             proposals_per_image = proposals_per_image[selected_indices]
         assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.proposal_boxes)
         assert len(proposals_per_image.gt_boxes) == len(proposals_per_image.gt_densepose)
         proposals_filtered.append(proposals_per_image)
     return proposals_filtered
예제 #2
0
def select_proposals_with_visible_keypoints(proposals):
    """
    Args:
        proposals (list[Instances]): a list of N Instances, where N is the
            number of images.

    Returns:
        proposals: only contains proposals with at least one visible keypoint.

    Note that this is still slightly different from Detectron.
    In Detectron, proposals for training keypoint head are re-sampled from
    all the proposals with IOU>threshold & >=1 visible keypoint.

    Here, the proposals are first sampled from all proposals with
    IOU>threshold, then proposals with no visible keypoint are filtered out.
    This strategy seems to make no difference on Detectron and is easier to implement.
    """
    ret = []
    all_num_fg = []
    for proposals_per_image in proposals:
        gt_boxes = proposals_per_image.gt_boxes
        est_boxes = proposals_per_image.proposal_boxes
        iou = matched_boxlist_iou(gt_boxes, est_boxes)
        iou_select = iou > 0.55  # self.iou_threshold
        proposals_per_image = proposals_per_image[iou_select]
        # #fg x K x 3
        gt_keypoints = proposals_per_image.gt_keypoints.tensor
        vis_mask = gt_keypoints[:, :, 2] >= 1
        xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1]
        proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(
            dim=1)  # #fg x 1 x 4
        kp_in_box = ((xs >= proposal_boxes[:, :, 0])
                     & (xs <= proposal_boxes[:, :, 2])
                     & (ys >= proposal_boxes[:, :, 1])
                     & (ys <= proposal_boxes[:, :, 3]))
        selection = (kp_in_box & vis_mask).any(dim=1)
        selection_idxs = torch.nonzero(selection).squeeze(1)
        all_num_fg.append(selection_idxs.numel())
        ret.append(proposals_per_image[selection_idxs])

    storage = get_event_storage()
    storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg))
    return ret
예제 #3
0
    def __call__(self, features: List[torch.Tensor],
                 proposals_with_targets: List[Instances]):
        """
        Filters proposals with targets to keep only the ones relevant for
        DensePose training

        Args:
            features (list[Tensor]): input data as a list of features,
                each feature is a tensor. Axis 0 represents the number of
                images `N` in the input data; axes 1-3 are channels,
                height, and width, which may vary between features
                (e.g., if a feature pyramid is used).
            proposals_with_targets (list[Instances]): length `N` list of
                `Instances`. The i-th `Instances` contains instances
                (proposals, GT) for the i-th input image,
        Returns:
            list[Tensor]: filtered features
            list[Instances]: filtered proposals
        """
        proposals_filtered = []
        # TODO: the commented out code was supposed to correctly deal with situations
        # where no valid DensePose GT is available for certain images. The corresponding
        # image features were sliced and proposals were filtered. This led to performance
        # deterioration, both in terms of runtime and in terms of evaluation results.
        #
        # feature_mask = torch.ones(
        #    len(proposals_with_targets),
        #    dtype=torch.bool,
        #    device=features[0].device if len(features) > 0 else torch.device("cpu"),
        # )
        for i, proposals_per_image in enumerate(proposals_with_targets):
            if not proposals_per_image.has("gt_densepose") and (
                    not proposals_per_image.has("gt_masks")
                    or not self.keep_masks):
                # feature_mask[i] = 0
                continue
            gt_boxes = proposals_per_image.gt_boxes
            est_boxes = proposals_per_image.proposal_boxes
            # apply match threshold for densepose head
            iou = matched_boxlist_iou(gt_boxes, est_boxes)
            iou_select = iou > self.iou_threshold
            proposals_per_image = proposals_per_image[iou_select]

            N_gt_boxes = len(proposals_per_image.gt_boxes)
            assert N_gt_boxes == len(proposals_per_image.proposal_boxes), (
                f"The number of GT boxes {N_gt_boxes} is different from the "
                f"number of proposal boxes {len(proposals_per_image.proposal_boxes)}"
            )
            # filter out any target without suitable annotation
            if self.keep_masks:
                gt_masks = (proposals_per_image.gt_masks if hasattr(
                    proposals_per_image, "gt_masks") else [None] * N_gt_boxes)
            else:
                gt_masks = [None] * N_gt_boxes
            gt_densepose = (proposals_per_image.gt_densepose if hasattr(
                proposals_per_image, "gt_densepose") else [None] * N_gt_boxes)
            assert len(gt_masks) == N_gt_boxes
            assert len(gt_densepose) == N_gt_boxes
            selected_indices = [
                i
                for i, (dp_target,
                        mask_target) in enumerate(zip(gt_densepose, gt_masks))
                if (dp_target is not None) or (mask_target is not None)
            ]
            # if not len(selected_indices):
            #     feature_mask[i] = 0
            #     continue
            if len(selected_indices) != N_gt_boxes:
                proposals_per_image = proposals_per_image[selected_indices]
            assert len(proposals_per_image.gt_boxes) == len(
                proposals_per_image.proposal_boxes)
            proposals_filtered.append(proposals_per_image)
        # features_filtered = [feature[feature_mask] for feature in features]
        # return features_filtered, proposals_filtered
        return features, proposals_filtered
예제 #4
0
    def __call__(self, proposals_with_targets):
        """
        Filters proposals with targets to keep only the ones relevant for
        DensePose training
        proposals: list(Instances), each element of the list corresponds to
            various instances (proposals, GT for boxes and densepose) for one
            image
        """
        selection_masks = []  # MY CODE
        proposals_filtered = []
        for proposals_per_image in proposals_with_targets:
            if not hasattr(proposals_per_image, "gt_densepose"):
                continue
            assert hasattr(proposals_per_image, "gt_boxes")
            assert hasattr(proposals_per_image, "proposal_boxes")
            gt_boxes = proposals_per_image.gt_boxes
            est_boxes = proposals_per_image.proposal_boxes
            # apply match threshold for densepose head
            iou = matched_boxlist_iou(gt_boxes, est_boxes)

            iou_select = iou > self.iou_threshold
            proposals_per_image = proposals_per_image[iou_select]
            assert len(proposals_per_image.gt_boxes) == len(
                proposals_per_image.proposal_boxes)
            # filter out any target without densepose annotation
            gt_densepose = proposals_per_image.gt_densepose
            assert len(proposals_per_image.gt_boxes) == len(
                proposals_per_image.gt_densepose)
            selected_indices = [
                i for i, dp_target in enumerate(gt_densepose)
                if dp_target is not None
            ]
            if len(selected_indices) != len(gt_densepose):
                proposals_per_image = proposals_per_image[selected_indices]

            # if proposals_per_image.proposal_boxes.tensor.size(0) == 0:
            if len(proposals_per_image) == 0:
                mask = iou > 9000.  # just big number > 100
                selection_masks.append(mask)
                continue

            assert len(proposals_per_image.gt_boxes) == len(
                proposals_per_image.proposal_boxes)
            assert len(proposals_per_image.gt_boxes) == len(
                proposals_per_image.gt_densepose)
            proposals_filtered.append(proposals_per_image)

            mask = copy.deepcopy(iou_select)
            i = 0
            selected_num = 0
            for j in range(len(iou_select)):
                if iou_select[j]:
                    if i not in selected_indices:
                        mask[j] = False
                    else:
                        selected_num += 1
                    i += 1
            assert selected_num == len(selected_indices)
            selection_masks.append(mask)

        return proposals_filtered, selection_masks
예제 #5
0
    def forward(self, batched_inputs):
        """
        Args:
            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
                Each item in the list contains the inputs for one image.
                For now, each item in the list is a dict that contains:

                * image: Tensor, image in (C, H, W) format.
                * instances (optional): groundtruth :class:`Instances`
                * proposals (optional): :class:`Instances`, precomputed proposals.

                Other information that's included in the original dicts, such as:

                * "height", "width" (int): the output resolution of the model, used in inference.
                  See :meth:`postprocess` for details.

        Returns:
            list[dict]:
                Each dict is the output for one input image.
                The dict contains one key "instances" whose value is a :class:`Instances`.
                The :class:`Instances` object has the following keys:
                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
        """
        if not self.training:
            return self.inference(batched_inputs)

        images = self.preprocess_image(batched_inputs)
        if "instances" in batched_inputs[0]:
            gt_instances = [
                x["instances"].to(self.device) for x in batched_inputs
            ]
        elif "targets" in batched_inputs[0]:
            log_first_n(
                logging.WARN,
                "'targets' in the model inputs is now renamed to 'instances'!",
                n=10)
            gt_instances = [
                x["targets"].to(self.device) for x in batched_inputs
            ]
        else:
            gt_instances = None

        features = self.backbone(images.tensor)

        proposals, proposal_losses = self.proposal_generator(
            images, features, gt_instances)
        _, outputs_classic, outputs = self.tsd(images, features, proposals,
                                               gt_instances)

        detector_classic_losses = outputs_classic.losses()
        detector_losses = outputs.losses()
        detector_classic_losses[
            'loss_cls_classic'] = detector_classic_losses.pop('loss_cls')
        detector_classic_losses[
            'loss_box_reg_classic'] = detector_classic_losses.pop(
                'loss_box_reg')

        if self.vis_period > 0:
            storage = get_event_storage()
            if storage.iter % self.vis_period == 0:
                self.visualize_training(batched_inputs, proposals)

        # Progressive constraints
        margin_regression_losses = 0
        predict_boxes_classic = outputs_classic.predict_boxes_for_gt_classes()
        predict_boxes = outputs.predict_boxes_for_gt_classes()

        idx = -1
        endIdx = 0
        ind = outputs.gt_classes != (outputs.pred_proposal_deltas.size(1) / 4)
        for pbc, pb in zip(predict_boxes_classic, predict_boxes):
            idx += 1
            startIdx = endIdx
            endIdx += outputs.num_preds_per_image[idx]
            iind = ind[startIdx:endIdx]
            margin_regression_losses += F.relu(self.MR - abs(
                matched_boxlist_iou(Boxes(pbc[iind]),
                                    outputs.gt_boxes[startIdx:endIdx][iind]) -
                matched_boxlist_iou(Boxes(pb[iind]), outputs.
                                    gt_boxes[startIdx:endIdx][iind]))).mean()
        margin_regression_losses = margin_regression_losses / len(
            predict_boxes)

        margin_classification_losses = 0
        for ppc, pc in zip(outputs_classic.predict_probs(),
                           outputs.predict_probs()):
            margin_classification_losses += F.relu(self.MC -
                                                   (abs(ppc -
                                                        pc)).sum(1)).mean()
        margin_classification_losses = margin_classification_losses / len(
            outputs.predict_probs())

        losses = {}
        losses.update(detector_classic_losses)
        losses.update(detector_losses)
        losses.update(proposal_losses)
        losses.update({
            'loss_margin_classification': margin_classification_losses,
            'loss_margin_regression': margin_regression_losses
        })
        return losses