Beispiel #1
0
    def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]):
        """
        Args:
            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
                used to construct this module.
            box_lists (list[Boxes] | list[RotatedBoxes]):
                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
                The box coordinates are defined on the original image and
                will be scaled by the `scales` argument of :class:`ROIPooler`.
        Returns:
            Tensor:
                A tensor of shape (M, C, output_size, output_size) where M is the total number of
                boxes aggregated over all N batch images and C is the number of channels in `x`.
        """
        num_level_assignments = len(self.level_poolers)

        assert isinstance(x, list) and isinstance(
            box_lists, list
        ), "Arguments to pooler must be lists"
        assert (
            len(x) == num_level_assignments
        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
            num_level_assignments, len(x)
        )

        assert len(box_lists) == x[0].size(
            0
        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
            x[0].size(0), len(box_lists)
        )
        if len(box_lists) == 0:
            return torch.zeros(
                (0, x[0].shape[1]) + self.output_size, device=x[0].device, dtype=x[0].dtype
            )

        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)

        if num_level_assignments == 1:
            return self.level_poolers[0](x[0], pooler_fmt_boxes)

        level_assignments = assign_boxes_to_levels(
            box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
        )

        num_boxes = pooler_fmt_boxes.size(0)
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype, device = x[0].dtype, x[0].device
        output = torch.zeros(
            (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
        )

        for level, pooler in enumerate(self.level_poolers):
            inds = nonzero_tuple(level_assignments == level)[0]
            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
            # Use index_put_ instead of advance indexing, to avoid pytorch/issues/49852
            output.index_put_((inds,), pooler(x[level], pooler_fmt_boxes_level))

        return output
Beispiel #2
0
    def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
        """
        Args:
            All boxes are tensors with the same shape Rx(4 or 5).
            gt_classes is a long tensor of shape R, the gt class label of each proposal.
            R shall be the number of proposals.
        """
        box_dim = proposal_boxes.shape[1]  # 4 or 5
        # Regression loss is only computed for foreground proposals (those matched to a GT)
        fg_inds = nonzero_tuple((gt_classes >= 0)
                                & (gt_classes < self.num_classes))[0]
        if pred_deltas.shape[1] == box_dim:  # cls-agnostic regression
            fg_pred_deltas = pred_deltas[fg_inds]
        else:
            fg_pred_deltas = pred_deltas.view(-1, self.num_classes,
                                              box_dim)[fg_inds,
                                                       gt_classes[fg_inds]]

        if self.box_reg_loss_type == "smooth_l1":
            gt_pred_deltas = self.box2box_transform.get_deltas(
                proposal_boxes[fg_inds],
                gt_boxes[fg_inds],
            )
            loss_box_reg = smooth_l1_loss(fg_pred_deltas,
                                          gt_pred_deltas,
                                          self.smooth_l1_beta,
                                          reduction="sum")
        elif self.box_reg_loss_type == "giou":
            fg_pred_boxes = self.box2box_transform.apply_deltas(
                fg_pred_deltas, proposal_boxes[fg_inds])
            loss_box_reg = giou_loss(fg_pred_boxes,
                                     gt_boxes[fg_inds],
                                     reduction="sum")
        elif self.box_reg_loss_type == "diou":
            fg_pred_boxes = self.box2box_transform.apply_deltas(
                fg_pred_deltas, proposal_boxes[fg_inds])
            loss_box_reg = diou_loss(fg_pred_boxes,
                                     gt_boxes[fg_inds],
                                     reduction="sum")
        elif self.box_reg_loss_type == "ciou":
            fg_pred_boxes = self.box2box_transform.apply_deltas(
                fg_pred_deltas, proposal_boxes[fg_inds])
            loss_box_reg = ciou_loss(fg_pred_boxes,
                                     gt_boxes[fg_inds],
                                     reduction="sum")
        else:
            raise ValueError(
                f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")
        # The reg loss is normalized using the total number of regions (R), not the number
        # of foreground regions even though the box regression loss is only defined on
        # foreground regions. Why? Because doing so gives equal training influence to
        # each foreground example. To see how, consider two different minibatches:
        #  (1) Contains a single foreground region
        #  (2) Contains 100 foreground regions
        # If we normalize by the number of foreground regions, the single example in
        # minibatch (1) will be given 100 times as much influence as each foreground
        # example in minibatch (2). Normalizing by the total number of regions, R,
        # means that the single example in minibatch (1) and each of the 100 examples
        # in minibatch (2) are given equal influence.
        return loss_box_reg / max(gt_classes.numel(), 1.0)  # return 0 if empty
Beispiel #3
0
def subsample_labels(
    labels: torch.Tensor, num_samples: int, positive_fraction: float, bg_label: int
):
    """
    Return `num_samples` (or fewer, if not enough found)
    random samples from `labels` which is a mixture of positives & negatives.
    It will try to return as many positives as possible without
    exceeding `positive_fraction * num_samples`, and then try to
    fill the remaining slots with negatives.

    Args:
        labels (Tensor): (N, ) label vector with values:
            * -1: ignore
            * bg_label: background ("negative") class
            * otherwise: one or more foreground ("positive") classes
        num_samples (int): The total number of labels with value >= 0 to return.
            Values that are not sampled will be filled with -1 (ignore).
        positive_fraction (float): The number of subsampled labels with values > 0
            is `min(num_positives, int(positive_fraction * num_samples))`. The number
            of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
            In order words, if there are not enough positives, the sample is filled with
            negatives. If there are also not enough negatives, then as many elements are
            sampled as is possible.
        bg_label (int): label index of background ("negative") class.

    Returns:
        pos_idx, neg_idx (Tensor):
            1D vector of indices. The total length of both is `num_samples` or fewer.
    """
    positive = nonzero_tuple((labels != -1) & (labels != bg_label))[0]
    negative = nonzero_tuple(labels == bg_label)[0]

    num_pos = int(num_samples * positive_fraction)
    # protect against not enough positive examples
    num_pos = min(positive.numel(), num_pos)
    num_neg = num_samples - num_pos
    # protect against not enough negative examples
    num_neg = min(negative.numel(), num_neg)

    # randomly select positive and negative examples
    perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
    perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]

    pos_idx = positive[perm1]
    neg_idx = negative[perm2]
    return pos_idx, neg_idx
Beispiel #4
0
    def smooth_l1_loss(self):
        """
        Compute the smooth L1 loss for box regression.

        Returns:
            scalar Tensor
        """
        if self._no_instances:
            return 0.0 * self.pred_proposal_deltas.sum()
        gt_proposal_deltas = self.box2box_transform.get_deltas(
            self.proposals.tensor, self.gt_boxes.tensor)
        box_dim = gt_proposal_deltas.size(1)  # 4 or 5
        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
        device = self.pred_proposal_deltas.device

        bg_class_ind = self.pred_class_logits.shape[1] - 1

        # Box delta loss is only computed between the prediction for the gt class k
        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
        # for non-gt classes and background.
        # Empty fg_inds produces a valid loss of zero as long as the size_average
        # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally
        # and would produce a nan loss).
        fg_inds = nonzero_tuple((self.gt_classes >= 0)
                                & (self.gt_classes < bg_class_ind))[0]
        if cls_agnostic_bbox_reg:
            # pred_proposal_deltas only corresponds to foreground class for agnostic
            gt_class_cols = torch.arange(box_dim, device=device)
        else:
            fg_gt_classes = self.gt_classes[fg_inds]
            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
            # where b is the dimension of box representation (4 or 5)
            # Note that compared to Detectron1,
            # we do not perform bounding box regression for background classes.
            gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(
                box_dim, device=device)

        loss_box_reg = smooth_l1_loss(
            self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
            gt_proposal_deltas[fg_inds],
            self.smooth_l1_beta,
            reduction="sum",
        )
        # The loss is normalized using the total number of regions (R), not the number
        # of foreground regions even though the box regression loss is only defined on
        # foreground regions. Why? Because doing so gives equal training influence to
        # each foreground example. To see how, consider two different minibatches:
        #  (1) Contains a single foreground region
        #  (2) Contains 100 foreground regions
        # If we normalize by the number of foreground regions, the single example in
        # minibatch (1) will be given 100 times as much influence as each foreground
        # example in minibatch (2). Normalizing by the total number of regions, R,
        # means that the single example in minibatch (1) and each of the 100 examples
        # in minibatch (2) are given equal influence.
        loss_box_reg = loss_box_reg / self.gt_classes.numel()
        return loss_box_reg
Beispiel #5
0
 def pool_with_assignments(self, features, box_lists, level_assignments):
     pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)
     output = dict()
     level2inds = dict()
     for level, pooler in enumerate(self.level_poolers):
         inds = nonzero_tuple(level_assignments == level)[0]
         pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
         output[level] = pooler(features[level], pooler_fmt_boxes_level)
         level2inds[level] = inds
     return output, level2inds
    def box_reg_loss(self):
        """
        Deprecated
        """
        if self._no_instances:
            return 0.0 * self.pred_proposal_deltas.sum()

        box_dim = self.proposals.tensor.size(1)  # 4 or 5
        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
        device = self.pred_proposal_deltas.device

        bg_class_ind = self.pred_class_logits.shape[1] - 1
        # Box delta loss is only computed between the prediction for the gt class k
        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
        # for non-gt classes and background.
        # Empty fg_inds should produce a valid loss of zero because reduction=sum.
        fg_inds = nonzero_tuple((self.gt_classes >= 0)
                                & (self.gt_classes < bg_class_ind))[0]

        if cls_agnostic_bbox_reg:
            # pred_proposal_deltas only corresponds to foreground class for agnostic
            gt_class_cols = torch.arange(box_dim, device=device)
        else:
            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
            # where b is the dimension of box representation (4 or 5)
            # Note that compared to Detectron1,
            # we do not perform bounding box regression for background classes.
            gt_class_cols = box_dim * self.gt_classes[
                fg_inds, None] + torch.arange(box_dim, device=device)

        if self.box_reg_loss_type == "smooth_l1":
            gt_proposal_deltas = self.box2box_transform.get_deltas(
                self.proposals.tensor, self.gt_boxes.tensor)
            loss_box_reg = smooth_l1_loss(
                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
                gt_proposal_deltas[fg_inds],
                self.smooth_l1_beta,
                reduction="sum",
            )
        elif self.box_reg_loss_type == "giou":
            fg_pred_boxes = self.box2box_transform.apply_deltas(
                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
                self.proposals.tensor[fg_inds],
            )
            loss_box_reg = giou_loss(
                fg_pred_boxes,
                self.gt_boxes.tensor[fg_inds],
                reduction="sum",
            )
        else:
            raise ValueError(
                f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")

        loss_box_reg = loss_box_reg / self.gt_classes.numel()
        return loss_box_reg
Beispiel #7
0
    def box_reg_loss(self):
        """
        change _no_instance handling and normalization
        """
        if self._no_instances:
            print('No instance in box reg loss')
            return self.pred_proposal_deltas.sum() * 0.

        box_dim = self.gt_boxes.tensor.size(1)  # 4 or 5
        cls_agnostic_bbox_reg = self.pred_proposal_deltas.size(1) == box_dim
        device = self.pred_proposal_deltas.device

        bg_class_ind = self.pred_class_logits.shape[1] - 1

        fg_inds = nonzero_tuple((self.gt_classes >= 0)
                                & (self.gt_classes < bg_class_ind))[0]
        if cls_agnostic_bbox_reg:
            gt_class_cols = torch.arange(box_dim, device=device)
        else:
            fg_gt_classes = self.gt_classes[fg_inds]
            gt_class_cols = box_dim * fg_gt_classes[:, None] + torch.arange(
                box_dim, device=device)

        if self.box_reg_loss_type == "smooth_l1":
            gt_proposal_deltas = self.box2box_transform.get_deltas(
                self.proposals.tensor, self.gt_boxes.tensor)
            loss_box_reg = smooth_l1_loss(
                self.pred_proposal_deltas[fg_inds[:, None], gt_class_cols],
                gt_proposal_deltas[fg_inds],
                self.smooth_l1_beta,
                reduction="sum",
            )
        elif self.box_reg_loss_type == "giou":
            loss_box_reg = giou_loss(
                self._predict_boxes()[fg_inds[:, None], gt_class_cols],
                self.gt_boxes.tensor[fg_inds],
                reduction="sum",
            )
        else:
            raise ValueError(
                f"Invalid bbox reg loss type '{self.box_reg_loss_type}'")

        if self.fix_norm_reg:
            loss_box_reg = loss_box_reg / self.box_batch_size
        else:
            loss_box_reg = loss_box_reg / self.gt_classes.numel()
        return loss_box_reg
Beispiel #8
0
def select_proposals_with_visible_keypoints(
        proposals: List[Instances]) -> List[Instances]:
    """
    Args:
        proposals (list[Instances]): a list of N Instances, where N is the
            number of images.

    Returns:
        proposals: only contains proposals with at least one visible keypoint.

    Note that this is still slightly different from Detectron.
    In Detectron, proposals for training keypoint head are re-sampled from
    all the proposals with IOU>threshold & >=1 visible keypoint.

    Here, the proposals are first sampled from all proposals with
    IOU>threshold, then proposals with no visible keypoint are filtered out.
    This strategy seems to make no difference on Detectron and is easier to implement.
    """
    ret = []
    all_num_fg = []
    for proposals_per_image in proposals:
        # If empty/unannotated image (hard negatives), skip filtering for train
        if len(proposals_per_image) == 0:
            ret.append(proposals_per_image)
            continue
        gt_keypoints = proposals_per_image.gt_keypoints.tensor
        # #fg x K x 3
        vis_mask = gt_keypoints[:, :, 2] >= 1
        xs, ys = gt_keypoints[:, :, 0], gt_keypoints[:, :, 1]
        proposal_boxes = proposals_per_image.proposal_boxes.tensor.unsqueeze(
            dim=1)  # #fg x 1 x 4
        kp_in_box = ((xs >= proposal_boxes[:, :, 0])
                     & (xs <= proposal_boxes[:, :, 2])
                     & (ys >= proposal_boxes[:, :, 1])
                     & (ys <= proposal_boxes[:, :, 3]))
        selection = (kp_in_box & vis_mask).any(dim=1)
        selection_idxs = nonzero_tuple(selection)[0]
        all_num_fg.append(selection_idxs.numel())
        ret.append(proposals_per_image[selection_idxs])

    storage = get_event_storage()
    storage.put_scalar("keypoint_head/num_fg_samples", np.mean(all_num_fg))
    return ret
Beispiel #9
0
 def set_low_quality_matches_(self, match_labels, match_quality_matrix):
     """
     Produce additional matches for predictions that have only low-quality matches.
     Specifically, for each ground-truth G find the set of predictions that have
     maximum overlap with it (including ties); for each prediction in that set, if
     it is unmatched, then match it to the ground-truth G.
     This function implements the RPN assignment case (i) in Sec. 3.1.2 of
     :paper:`Faster R-CNN`.
     """
     # For each gt, find the prediction with which it has highest quality
     highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
     # Find the highest quality match available, even if it is low, including ties.
     # Note that the matches qualities must be positive due to the use of
     # `torch.nonzero`.
     _, pred_inds_with_highest_quality = nonzero_tuple(
         match_quality_matrix == highest_quality_foreach_gt[:, None])
     # If an anchor was labeled positive only due to a low-quality match
     # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B.
     # This follows the implementation in Detectron, and is found to have no significant impact.
     match_labels[pred_inds_with_highest_quality] = 1
Beispiel #10
0
    def box_reg_loss(self, proposal_boxes, gt_boxes, pred_deltas, gt_classes):
        """
        Args:
            proposal_boxes/gt_boxes are tensors with the same shape (R, 4 or 5).
            pred_deltas has shape (R, 4 or 5), or (R, num_classes * (4 or 5)).
            gt_classes is a long tensor of shape R, the gt class label of each proposal.
            R shall be the number of proposals.
        """
        box_dim = proposal_boxes.shape[1]  # 4 or 5
        # Regression loss is only computed for foreground proposals (those matched to a GT)
        fg_inds = nonzero_tuple((gt_classes >= 0)
                                & (gt_classes < self.num_classes))[0]
        if pred_deltas.shape[1] == box_dim:  # cls-agnostic regression
            fg_pred_deltas = pred_deltas[fg_inds]
        else:
            fg_pred_deltas = pred_deltas.view(-1, self.num_classes,
                                              box_dim)[fg_inds,
                                                       gt_classes[fg_inds]]

        loss_box_reg = _dense_box_regression_loss(
            [proposal_boxes[fg_inds]],
            self.box2box_transform,
            [fg_pred_deltas.unsqueeze(0)],
            [gt_boxes[fg_inds]],
            ...,
            self.box_reg_loss_type,
            self.smooth_l1_beta,
        )

        # The reg loss is normalized using the total number of regions (R), not the number
        # of foreground regions even though the box regression loss is only defined on
        # foreground regions. Why? Because doing so gives equal training influence to
        # each foreground example. To see how, consider two different minibatches:
        #  (1) Contains a single foreground region
        #  (2) Contains 100 foreground regions
        # If we normalize by the number of foreground regions, the single example in
        # minibatch (1) will be given 100 times as much influence as each foreground
        # example in minibatch (2). Normalizing by the total number of regions, R,
        # means that the single example in minibatch (1) and each of the 100 examples
        # in minibatch (2) are given equal influence.
        return loss_box_reg / max(gt_classes.numel(), 1.0)  # return 0 if empty
Beispiel #11
0
    def inference_single_image(
        self,
        anchors: List[Boxes],
        box_cls: List[Tensor],
        box_delta: List[Tensor],
        image_size: Tuple[int, int],
    ):
        """
        Single-image inference. Return bounding-box detection results by thresholding
        on scores and applying non-maximum suppression (NMS).

        Arguments:
            anchors (list[Boxes]): list of #feature levels. Each entry contains
                a Boxes object, which contains all the anchors in that feature level.
            box_cls (list[Tensor]): list of #feature levels. Each entry contains
                tensor of size (H x W x A, K)
            box_delta (list[Tensor]): Same shape as 'box_cls' except that K becomes 4.
            image_size (tuple(H, W)): a tuple of the image height and width.

        Returns:
            Same as `inference`, but for only one image.
        """
        boxes_all = []
        scores_all = []
        class_idxs_all = []

        # Iterate over every feature level
        for box_cls_i, box_reg_i, anchors_i in zip(box_cls, box_delta,
                                                   anchors):
            # (HxWxAxK,)
            predicted_prob = box_cls_i.flatten().sigmoid_()

            # Apply two filtering below to make NMS faster.
            # 1. Keep boxes with confidence score higher than threshold
            keep_idxs = predicted_prob > self.test_score_thresh
            predicted_prob = predicted_prob[keep_idxs]
            topk_idxs = nonzero_tuple(keep_idxs)[0]

            # 2. Keep top k top scoring boxes only
            num_topk = min(self.test_topk_candidates, topk_idxs.size(0))
            # torch.sort is actually faster than .topk (at least on GPUs)
            predicted_prob, idxs = predicted_prob.sort(descending=True)
            predicted_prob = predicted_prob[:num_topk]
            topk_idxs = topk_idxs[idxs[:num_topk]]

            anchor_idxs = topk_idxs // self.num_classes
            classes_idxs = topk_idxs % self.num_classes

            box_reg_i = box_reg_i[anchor_idxs]
            anchors_i = anchors_i[anchor_idxs]
            # predict boxes
            predicted_boxes = self.box2box_transform.apply_deltas(
                box_reg_i, anchors_i.tensor)

            boxes_all.append(predicted_boxes)
            scores_all.append(predicted_prob)
            class_idxs_all.append(classes_idxs)

        boxes_all, scores_all, class_idxs_all = [
            cat(x) for x in [boxes_all, scores_all, class_idxs_all]
        ]
        keep = batched_nms(boxes_all, scores_all, class_idxs_all,
                           self.test_nms_thresh)
        keep = keep[:self.max_detections_per_image]

        result = Instances(image_size)
        result.pred_boxes = Boxes(boxes_all[keep])
        result.scores = scores_all[keep]
        result.pred_classes = class_idxs_all[keep]
        return result
Beispiel #12
0
	def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]):
		"""
		Args:
			x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
				used to construct this module.
			box_lists (list[Boxes] | list[RotatedBoxes]):
				A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
				The box coordinates are defined on the original image and
				will be scaled by the `scales` argument of :class:`ROIPooler`.

		Returns:
			Tensor:
				A tensor of shape (M, C, output_size, output_size) where M is the total number of
				boxes aggregated over all N batch images and C is the number of channels in `x`.
		"""
		num_level_assignments = len(self.level_poolers)

		assert isinstance(x, list) and isinstance(
			box_lists, list
		), "Arguments to pooler must be lists"
		assert (
			len(x) == num_level_assignments
		), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
			num_level_assignments, len(x)
		)

		assert len(box_lists) == x[0].size(
			0
		), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
			x[0].size(0), len(box_lists)
		)
		if len(box_lists) == 0:
			return torch.zeros(
				(0, x[0].shape[1]) + self.output_size, device=x[0].device, dtype=x[0].dtype
			)

		pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)

		if num_level_assignments == 1:
			return self.level_poolers[0](x[0], pooler_fmt_boxes)

		level_assignments = assign_boxes_to_levels(
			box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
		)

		num_boxes = pooler_fmt_boxes.size(0)
		num_channels = x[0].shape[1]
		output_size = self.output_size[0]

		dtype, device = x[0].dtype, x[0].device

		if(self.output_size == 14):
			boxes = pooler_fmt_boxes
			scales = []
			for i in range(boxes.shape[0]):
				scales.append(self.level_poolers[level_assignments[i]].spatial_scale)
			scale = torch.tensor(scales,device=device)
			boxes[:,1:3] = torch.floor(boxes[:,1:3]*scale[:,None])
			boxes[:,3:5] = torch.ceil(boxes[:,3:5]*scale[:,None])
			boxes = boxes.to(device=device,dtype=torch.long)

			boxes[boxes[:,1]< 0,1] = 0
			boxes[boxes[:,2]< 0,2] = 0

			#boxes[boxes[:,3] >= feats[0].shape[-1],3] = feats[0].shape[-1]-1
			#boxes[boxes[:,4] >= feats[0].shape[-2],4] = feats[0].shape[-2]-1
			mask = torch.logical_and((boxes[:,3]-boxes[:,1]) > 1,(boxes[:,4]-boxes[:,2]) > 1)
			height = boxes[:,4] - boxes[:,2] + 1
			width = boxes[:,3] - boxes[:,1] + 1
			if boxes.shape > 0 :
				max_h,max_w = torch.max(torch.max(height),0)[0], torch.max(torch.max(width),0)[0]
				max_h,max_w = torch.max(torch.tensor([max_h,3])), torch.max(torch.tensor([max_w,3]))
			else:
				max_h,max_w = torch.tensor(1,device=device),torch.tensor(1,device=device)
			output = torch.zeros(
				(num_boxes, num_channels, max_h, max_w), dtype=dtype, device=device
			)
			
			
			for i in range(boxes.shape[0]):
				ind,x0,y0,x1,y1 = boxes[i]
				print(x1,x[level_assignments[i]][0].shape[-1]-1)
				x1 = torch.min(x1,x[level_assignments[i]][0].shape[-1]-1)
				y1 = torch.min(y1,x[level_assignments[i]][0].shape[-2]-1)
				boxes[i][3] = x1
				boxes[i][4] = y1
				output[i,:,:y1-y0+1,:x1-x0+1] = x[level_assignments[i]][ind][:,y0:y1+1,x0:x1+1]

			boxes[:,0] = torch.arange(boxes.shape[0]) ## i changes this from 0
			boxes[:,3:5] -= boxes[:,1:3]
			boxes[:,1:3] = 0


			return output, boxes
		
		elif(self.output_size == 7):

			output = torch.zeros(
			(num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device)

			for level, pooler in enumerate(self.level_poolers):
				inds = nonzero_tuple(level_assignments == level)[0]
				pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
				output[inds] = pooler(x[level], pooler_fmt_boxes_level)

			return output
Beispiel #13
0
    def binary_cross_entropy_loss(self):
        """
        Compute the cross_entropy_loss for attribute Classification.

        Returns:
            scalar Tensor
        """
        if self._no_instances:
            return 0.0 * self.pred_attr_class_logits.sum()
        #gt_proposal_deltas = self.box2box_transform.get_deltas(
        #    self.proposals.tensor, self.gt_boxes.tensor
        #)
        #ignore_nan_attr_class = 0
        attr_dim = self.max_attr_pred
        #box_dim = gt_proposal_deltas.size(1)  # 4 or 5
        cls_agnostic_attr_reg = self.pred_attr_class_logits.size(
            1) == self.num_attr_classes
        device = self.pred_proposal_deltas.device

        bg_class_ind = self.pred_class_logits.shape[1] - 1
        # print("BG Class Ind", bg_class_ind)
        # BG Class Ind = 46: Number of Classes - background
        # Box delta loss is only computed between the prediction for the gt class k
        # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
        # for non-gt classes and background.
        # Empty fg_inds produces a valid loss of zero as long as the size_average
        # arg to smooth_l1_loss is False (otherwise it uses torch.mean internally
        # and would produce a nan loss).
        fg_inds = nonzero_tuple((self.gt_classes >= 0)
                                & (self.gt_classes < bg_class_ind))[0]
        #print("fg_ind: ", fg_inds)
        # fg_ind: Has indices of Proposal with valid classes
        # For Ex: There are 130 valid proposals. So dimension would be [130]
        if cls_agnostic_attr_reg:
            # pred_proposal_deltas only corresponds to foreground class for agnostic
            #print("Entered Loss for Attr Class Agnostic")
            gt_class_cols = torch.arange(self.num_attr_classes, device=device)
        else:
            fg_gt_classes = self.gt_classes[fg_inds]
            #[ANMOL] print("fg_gt_classes: ", fg_gt_classes.shape)
            #[ANMOL] fg_gt_classes: has classes for valid indices of proposals found by fg_inds
            # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
            # where b is the dimension of box representation (4 or 5)
            # Note that compared to Detectron1,
            # we do not perform bounding box regression for background classes.
            gt_class_cols = self.num_attr_classes * fg_gt_classes[:, None] + torch.arange(
                self.num_attr_classes, device=device)
            # [ANMOL] Dimensions of gt_class_cols [validproposals, 295], This indices will be different values based on object class
            # [ANMOL] Finds cols as we have num_classes * attr_class columns in total in else case.
            # [ANMOL] gt_class_cols has indices of valid columns [validProposals, 295]

        gt_attr_tensor = torch.zeros(
            (self.gt_attr_classes[fg_inds].shape[0], self.num_attr_classes),
            device=device)
        # [ANMOL] gt_attr_class shape has gt class numbers for all proposals [2048, 14]
        # [ANMOL] gt_attr_classes[fg_inds] select valid proposals from gt: [validproposal, 14]
        # [ANMOL] gt_attr_tensor is just one hot repr for gt_attr_classes: [validproposal, 294]
        gt_attr_tensor.scatter_(1, self.gt_attr_classes[fg_inds], 1)
        # [ANMOL] Set all valid classes to 1 in gt_attr_tensor 1-hot tensor. Confirmed it by sort function.
        #postive_negative_sampling = 14*3
        remove_allzeros_rows = 1
        focal_loss_enabled = 1
        if self.ignore_nan_attr_class:
            #print("Before removing 0s shape Pred: ", self.pred_attr_class_logits[fg_inds[:, None], gt_class_cols][:,:-1].shape)
            #print("Before removing 0s shape gt: ", gt_attr_tensor[:, :-1].shape)
            #print("Before remove 0s gt: ", torch.sort(gt_attr_tensor[:,:-1], 1, descending=True))
            #print("Entered Ignore Nan")
            gt_attr_tensor_t = gt_attr_tensor[:, :-1]
            pred_attr_class_logits_t = self.pred_attr_class_logits[
                fg_inds[:, None], gt_class_cols][:, :-1]
            #if positive_negative_sampling > 0:
            #find indices of positive samples
            #pass
            #find indices of 14*3 - gts false postive samples for loss
            if remove_allzeros_rows:
                #print("Entered Remove all Zeros")
                pred_attr_class_logits_t = self.pred_attr_class_logits[
                    fg_inds[:, None],
                    gt_class_cols][:, :-1][gt_attr_tensor_t.sum(dim=1) != 0]
                gt_attr_tensor_t = gt_attr_tensor_t[gt_attr_tensor_t.sum(
                    dim=1) != 0]
            if focal_loss_enabled:
                #print("Using Focal Loss")
                # Impl from RetinaNet
                #alpha = 0.25
                #gamma = 2
                #p = pred_attr_class_logits_t.sigmoid()
                #pt = p*gt_attr_tensor_t + (1-p)*(1-gt_attr_tensor_t)         # pt = p if t > 0 else 1-p
                #w = alpha*gt_attr_tensor_t + (1-alpha)*(1-gt_attr_tensor_t)  # w = alpha if t > 0 else 1-alpha
                #w = w * (1-pt).pow(gamma)
                #loss_attr_loss = F.binary_cross_entropy_with_logits(pred_attr_class_logits_t, gt_attr_tensor_t, w.detach(), size_average=False)
                #xt = pred_attr_class_logits_t*(2*gt_attr_tensor_t-1)  # xt = x if t > 0 else -x
                #pt = (2*xt+1).sigmoid()
                #w = alpha*gt_attr_tensor_t + (1-alpha)*(1-gt_attr_tensor_t)
                #focal_loss = -w*pt.log() / 2
                #loss_attr_loss = focal_loss.sum()
                #IMPL 2:
                alpha = 1
                gamma = 2
                BCE_loss = F.binary_cross_entropy_with_logits(
                    pred_attr_class_logits_t, gt_attr_tensor_t, reduce=False)
                pt = torch.exp(-BCE_loss)
                F_loss = alpha * (1 - pt)**gamma * BCE_loss
                loss_attr_loss = torch.mean(F_loss)
            #B = A[A.sum(dim=1) != 0]
            #loss_attr_loss = F.binary_cross_entropy_with_logits(self.pred_attr_class_logits[fg_inds[:, None], gt_class_cols][:,:-1], gt_attr_tensor[:, :-1])
            #print("After removing 0s shape Pred: ", pred_attr_class_logits_t.shape)
            #print("After removing 0s shape gt: ", gt_attr_tensor_t.shape)
            #print("After remove 0s gt: ", torch.sort(gt_attr_tensor_t, 1, descending=True))
            else:
                loss_attr_loss = F.binary_cross_entropy_with_logits(
                    pred_attr_class_logits_t, gt_attr_tensor_t)
        else:
            loss_attr_loss = F.binary_cross_entropy_with_logits(
                self.pred_attr_class_logits[fg_inds[:, None], gt_class_cols],
                gt_attr_tensor)
        # [ANMOL] predi_attr_class_logits in case of class dependency has dimension [allProposals, num_class*attr_classes]
        # [ANMOL] fg_ind has the valid proposals, for each proposal gt_class_cols as right set of 294 (attr_class) columns indices
        # [ANMOL] pred_attr_class_logits dimension again would be [validProposals, 294]

        # The loss is normalized using the total number of regions (R), not the number
        # of foreground regions even though the box regression loss is only defined on
        # foreground regions. Why? Because doing so gives equal training influence to
        # each foreground example. To see how, consider two different minibatches:
        #  (1) Contains a single foreground region
        #  (2) Contains 100 foreground regions
        # If we normalize by the number of foreground regions, the single example in
        # minibatch (1) will be given 100 times as much influence as each foreground
        # example in minibatch (2). Normalizing by the total number of regions, R,
        # means that the single example in minibatch (1) and each of the 100 examples
        # in minibatch (2) are given equal influence.
        #loss_attr_loss = loss_attr_loss
        #/ self.gt_classes.numel()
        #loss_box_reg = loss_box_reg / self.gt_classes.numel()
        return loss_attr_loss
Beispiel #14
0
    def forward(
        self,
        x: List[torch.Tensor],
        box_lists: List[Boxes],
        level_ids=None,
    ):
        """
        Args:
            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
                used to construct this module.
            box_lists (list[Boxes] | list[RotatedBoxes]):
                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
                The box coordinates are defined on the original image and
                will be scaled by the `scales` argument of :class:`ROIPooler`.

        Returns:
            Tensor:
                A tensor of shape (M, C, output_size, output_size) where M is the total number of
                boxes aggregated over all N batch images and C is the number of channels in `x`.
        """
        num_level_assignments = len(self.level_poolers)

        assert isinstance(x, list) and isinstance(
            box_lists, list), "Arguments to pooler must be lists"
        assert (
            len(x) == num_level_assignments
        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
            num_level_assignments, len(x))

        assert len(box_lists) == x[0].size(
            0
        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
            x[0].size(0), len(box_lists))

        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)

        if num_level_assignments == 1:
            return self.level_poolers[0](x[0], pooler_fmt_boxes)

        level_assignments = assign_boxes_to_levels(
            box_lists,
            self.min_level,
            self.max_level,
            self.canonical_box_size,
            self.canonical_level,
            self.valid_range,
        )
        if level_ids is not None:
            level_assignments = cat(level_ids).to(torch.int64)

        num_boxes = len(pooler_fmt_boxes)
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype, device = x[0].dtype, x[0].device
        output = torch.zeros(
            (num_boxes, num_channels, output_size, output_size),
            dtype=dtype,
            device=device)

        if isinstance(self.level_poolers[0], ROILoopPool):
            output = torch.zeros(
                (num_boxes * 3, num_channels, output_size, output_size),
                dtype=dtype,
                device=device)

        for level, pooler in enumerate(self.level_poolers):
            inds = nonzero_tuple(level_assignments == level)[0]
            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]

            if isinstance(pooler, ROILoopPool):
                inds = cat([inds, inds + num_boxes, inds + 2 * num_boxes],
                           dim=0)

            output[inds] = pooler(x[level], pooler_fmt_boxes_level)

        return output
    def forward(self, x: List[torch.Tensor], box_lists: List[Boxes]):
        """
        Args:
            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
                used to construct this module.
            box_lists (list[Boxes] | list[RotatedBoxes]):
                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
                The box coordinates are defined on the original image and
                will be scaled by the `scales` argument of :class:`ROIPooler`.

        Returns:
            Tensor:
                A tensor of shape (M, C, output_size, output_size) where M is the total number of
                boxes aggregated over all N batch images and C is the number of channels in `x`.
        """
        num_level_assignments = len(self.level_poolers)

        assert isinstance(x, list) and isinstance(
            box_lists, list), "Arguments to pooler must be lists"
        assert (
            len(x) == num_level_assignments
        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
            num_level_assignments, len(x))

        assert len(box_lists) == x[0].size(
            0
        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
            x[0].size(0), len(box_lists))
        if len(box_lists) == 0:
            return torch.zeros((0, x[0].shape[1]) + self.output_size,
                               device=x[0].device,
                               dtype=x[0].dtype)

        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)

        if num_level_assignments == 1:
            return self.level_poolers[0](x[0], pooler_fmt_boxes)

        level_assignments = assign_boxes_to_levels(box_lists, self.min_level,
                                                   self.max_level,
                                                   self.canonical_box_size,
                                                   self.canonical_level)

        num_boxes = pooler_fmt_boxes.size(0)
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype, device = x[0].dtype, x[0].device
        output = torch.zeros(
            (num_boxes, num_channels, output_size, output_size),
            dtype=dtype,
            device=device)

        for level, pooler in enumerate(self.level_poolers):
            inds = nonzero_tuple(level_assignments == level)[0]

            boxes = pooler_fmt_boxes[inds]
            scale = pooler.spatial_scale
            boxes[:, 1:3] = torch.floor(boxes[:, 1:3] * scale)
            boxes[:, 3:5] = torch.ceil(boxes[:, 3:5] * scale)
            boxes = boxes.to(device=device, dtype=torch.long)

            feats = x[level]

            # replacing the below for loop
            boxes[boxes[:, 1] < 0, 1] = 0
            boxes[boxes[:, 2] < 0, 2] = 0

            boxes[boxes[:, 3] >= feats[0].shape[-1],
                  3] = feats[0].shape[-1] - 1
            boxes[boxes[:, 4] >= feats[0].shape[-2],
                  4] = feats[0].shape[-2] - 1

            if boxes.shape[0] < 1:
                continue

            height = boxes[:, 4] - boxes[:, 2] + 1
            width = boxes[:, 3] - boxes[:, 1] + 1
            max_h, max_w = torch.max(torch.max(height),
                                     0)[0], torch.max(torch.max(width), 0)[0]
            crops = torch.zeros((boxes.shape[0], 256, max_h, max_w),
                                device=device,
                                dtype=torch.float32)
            for i in range(boxes.shape[0]):
                ind, x0, y0, x1, y1 = boxes[i]
                crops[i, :, :y1 - y0 + 1, :x1 - x0 + 1] = feats[ind][:,
                                                                     y0:y1 + 1,
                                                                     x0:x1 + 1]
            boxes[:,
                  0] = torch.arange(boxes.shape[0])  ## i changes this from 0
            boxes[:, 3:5] -= boxes[:, 1:3]
            boxes[:, 1:3] = 0

            if self.fixed:
                crops, boxes = self.fixed_learnable_downsample(
                    crops, boxes, out_shape=self.output_size, device=device)
                output[inds] = pooler(crops, boxes, 1.0)

        return output
Beispiel #16
0
    def forward(self, x: List[torch.Tensor], box_lists):
        """
        Args:
            x (list[Tensor]): A list of feature maps of NCHW shape, with scales matching those
                used to construct this module.
            box_lists (list[Boxes] | list[RotatedBoxes]):
                A list of N Boxes or N RotatedBoxes, where N is the number of images in the batch.
                The box coordinates are defined on the original image and
                will be scaled by the `scales` argument of :class:`ROIPooler`.
        Returns:
            Tensor:
                A tensor of shape (N, M, C * output_size * output_size)
                N: batch_size
                M: max box num per image
        """
        num_level_assignments = len(self.level_poolers)

        assert isinstance(x, list) and isinstance(
            box_lists, list
        ), "Arguments to pooler must be lists"
        assert (
            len(x) == num_level_assignments
        ), "unequal value, num_level_assignments={}, but x is list of {} Tensors".format(
            num_level_assignments, len(x)
        )

        assert len(box_lists) == x[0].size(
            0
        ), "unequal value, x[0] batch dim 0 is {}, but box_list has length {}".format(
            x[0].size(0), len(box_lists)
        )

        pooler_fmt_boxes = convert_boxes_to_pooler_format(box_lists)

        if num_level_assignments == 1:
            return self.level_poolers[0](x[0], pooler_fmt_boxes)

        level_assignments = assign_boxes_to_levels(
            box_lists, self.min_level, self.max_level, self.canonical_box_size, self.canonical_level
        )

        # num_boxes = len(pooler_fmt_boxes)
        num_proposals = [len(boxes.tensor) for boxes in box_lists]
        max_num_proposals = max(num_proposals)
        num_boxes = x[0].shape[0] * max_num_proposals
        num_channels = x[0].shape[1]
        output_size = self.output_size[0]

        dtype, device = x[0].dtype, x[0].device
        output = torch.zeros(
            (num_boxes, num_channels, output_size, output_size), dtype=dtype, device=device
        )

        inds_to_padded_inds = torch.zeros((sum(num_proposals),), dtype=torch.int64, device=device)
        accumulated_proposals = 0
        for batch_id in range(x[0].shape[0]):
            inds = torch.arange(start=0, end=num_proposals[batch_id], device=device)
            from_inds = inds + batch_id * max_num_proposals
            to_inds = inds + accumulated_proposals
            inds_to_padded_inds[to_inds] = from_inds
            accumulated_proposals += num_proposals[batch_id]

        for level, (x_level, pooler) in enumerate(zip(x, self.level_poolers)):
            inds = nonzero_tuple(level_assignments == level)[0]
            padded_inds = inds_to_padded_inds[inds]
            pooler_fmt_boxes_level = pooler_fmt_boxes[inds]
            output[padded_inds] = pooler(x_level, pooler_fmt_boxes_level)

        output = output.view(x[0].shape[0], max_num_proposals, num_channels, output_size, output_size)

        seq_lengths = torch.tensor(num_proposals, dtype=torch.int64, device=device)
        masks = torch.arange(max_num_proposals, device=device)[None, :] >= seq_lengths[:, None]

        return output, masks, inds_to_padded_inds