Exemple #1
0
    def filter_proposals(self, proposals, objectness, image_shapes,
                         num_anchors_per_level):
        # type: (Tensor, Tensor, List[Tuple[int, int]], List[int])
        num_images = proposals.shape[0]
        device = proposals.device
        # do not backprop throught objectness
        objectness = objectness.detach()
        objectness = objectness.reshape(num_images, -1)

        levels = [
            torch.full((n, ), idx, dtype=torch.int64, device=device)
            for idx, n in enumerate(num_anchors_per_level)
        ]
        levels = torch.cat(levels, 0)
        levels = levels.reshape(1, -1).expand_as(objectness)

        # select top_n boxes independently per level before applying nms
        top_n_idx = self._get_top_n_idx(objectness, num_anchors_per_level)

        image_range = torch.arange(num_images, device=device)
        batch_idx = image_range[:, None]

        objectness = objectness[batch_idx, top_n_idx]
        levels = levels[batch_idx, top_n_idx]
        proposals = proposals[batch_idx, top_n_idx]

        final_boxes = []
        final_scores = []
        for boxes, scores, lvl, img_shape in zip(proposals, objectness, levels,
                                                 image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, img_shape)
            keep = box_ops.remove_small_boxes(boxes, self.min_size)
            boxes, scores, lvl = boxes[keep], scores[keep], lvl[keep]
            # non-maximum suppression, independently done per level
            keep = box_ops.batched_nms(boxes, scores, lvl, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.post_nms_top_n()]
            boxes, scores = boxes[keep], scores[keep]
            final_boxes.append(boxes)
            final_scores.append(scores)
        return final_boxes, final_scores
Exemple #2
0
def batched_nms(
    boxes: torch.Tensor, scores: torch.Tensor, idxs: torch.Tensor, iou_threshold: float
):
    """
    Same as torchvision.ops.boxes.batched_nms, but safer.
    """
    assert boxes.shape[-1] == 4
    # TODO may need better strategy.
    # Investigate after having a fully-cuda NMS op.
    if len(boxes) < 40000:
        # fp16 does not have enough range for batched NMS
        return box_ops.batched_nms(boxes.float(), scores, idxs, iou_threshold)

    result_mask = scores.new_zeros(scores.size(), dtype=torch.bool)
    for id in torch.jit.annotate(List[int], torch.unique(idxs).cpu().tolist()):
        mask = (idxs == id).nonzero().view(-1)
        keep = nms(boxes[mask], scores[mask], iou_threshold)
        result_mask[mask[keep]] = True
    keep = result_mask.nonzero().view(-1)
    keep = keep[scores[keep].argsort(descending=True)]
    return keep
    def postprocess(self, x, anchors, regression, classification):
        # modified from https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/blob/master/utils/utils.py
        transformed_anchors = self.regressBoxes(anchors, regression)
        transformed_anchors = self.clipBoxes(transformed_anchors, x)
        scores = torch.max(classification, dim=2, keepdim=True)[0]
        scores_over_thresh = (scores > self.nms_score_thresh)[:, :, 0]
        out = []
        for i in range(x.shape[0]):
            if scores_over_thresh[i].sum() == 0:
                out.append({
                    'boxes': torch.tensor(()),
                    'labels': torch.tensor(()),
                    'scores': torch.tensor(()),
                })
                continue

            classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0)
            transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...]
            scores_per = scores[i, scores_over_thresh[i, :], ...]
            scores_, classes_ = classification_per.max(dim=0)
            anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=self.nms_iou_thresh)

            if anchors_nms_idx.shape[0] != 0:
                classes_ = classes_[anchors_nms_idx] + 1 # 0 is background and gets removed in metric, but is first class in model
                scores_ = scores_[anchors_nms_idx]
                boxes_ = transformed_anchors_per[anchors_nms_idx, :]

                out.append({
                    'boxes': boxes_.cpu(),
                    'labels': classes_.cpu(),
                    'scores': scores_.cpu(),
                })
            else:
                out.append({
                    'boxes': torch.tensor(()),
                    'labels': torch.tensor(()),
                    'scores': torch.tensor(()),
                })

        return out
 def post_process(self, cls_predicts, box_predicts, valid_size):
     predicts = list()
     for cls, box, wh in zip(cls_predicts, box_predicts, valid_size):
         box[..., [0, 2]] = box[..., [0, 2]].clamp(min=0, max=wh[0])
         box[..., [1, 3]] = box[..., [1, 3]].clamp(min=0, max=wh[1])
         scores = cls.softmax(dim=-1)
         scores = scores[:, 1:]
         labels = torch.arange(scores.shape[-1], device=cls.device)
         labels = labels.view(1, -1).expand_as(scores)
         boxes = box.unsqueeze(1).repeat(1, scores.shape[-1], 1).reshape(-1, 4)
         scores = scores.reshape(-1)
         labels = labels.reshape(-1)
         inds = torch.nonzero(scores > self.box_score_thresh, as_tuple=False).squeeze(1)
         boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
         keep = ((boxes[..., 2] - boxes[..., 0]) > 1e-2) & ((boxes[..., 3] - boxes[..., 1]) > 1e-2)
         boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
         keep = batched_nms(boxes, scores, labels, self.box_nms_thresh)
         keep = keep[:self.box_detections_per_img]
         boxes, scores, labels = boxes[keep], scores[keep], labels[keep]
         pred = torch.cat([boxes, scores[:, None], labels[:, None]], dim=-1)
         predicts.append(pred)
     return predicts
Exemple #5
0
 def __getitem__(self, idx):
     image, target, im_id = self.dataset[idx]
     boxes = torch.cat([target['boxes_h'], target['boxes_o']])
     # Convert ground truth boxes to zero-based index and the
     # representation from pixel indices to coordinates
     boxes[:, :2] -= 1
     labels = torch.cat(
         [49 * torch.ones_like(target['object']), target['object']])
     # Remove overlapping ground truth boxes
     keep = batched_nms(boxes,
                        torch.ones(len(boxes)),
                        labels,
                        iou_threshold=self.nms_thresh)
     boxes = boxes[keep]
     labels = labels[keep]
     # Convert HICODet object indices to COCO indices
     converted_labels = torch.as_tensor(
         [self.conversion[i.item()] for i in labels])
     # Apply transform
     image, target = self.transforms(
         image, dict(boxes=boxes, labels=converted_labels))
     return image, target, im_id
def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold):
    transformed_anchors = regressBoxes(anchors, regression)
    transformed_anchors = clipBoxes(transformed_anchors, x)
    scores = torch.max(classification, dim=2, keepdim=True)[0]
    scores_over_thresh = (scores > threshold)[:, :, 0]
    out = []
    for i in range(x.shape[0]):
        if scores_over_thresh[i].sum() == 0:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })
            continue

        classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0)
        transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...]
        scores_per = scores[i, scores_over_thresh[i, :], ...]
        scores_, classes_ = classification_per.max(dim=0)
        anchors_nms_idx = batched_nms(transformed_anchors_per, scores_per[:, 0], classes_, iou_threshold=iou_threshold)

        if anchors_nms_idx.shape[0] != 0:
            classes_ = classes_[anchors_nms_idx]
            scores_ = scores_[anchors_nms_idx]
            boxes_ = transformed_anchors_per[anchors_nms_idx, :]

            out.append({
                'rois': boxes_.cpu().numpy(),
                'class_ids': classes_.cpu().numpy(),
                'scores': scores_.cpu().numpy(),
            })
        else:
            out.append({
                'rois': np.array(()),
                'class_ids': np.array(()),
                'scores': np.array(()),
            })

    return out
def nms(rgb_info, thermal_info):
    # RGB boxes append thermal boxes
    # Order: len(RGB) | len(thermal)
    # Boxes
    boxes = rgb_info['bbox'].copy()
    boxes.extend(thermal_info['bbox'])
    boxes = torch.Tensor(boxes)
    # Scores
    scores = rgb_info['score'].copy()
    scores.extend(thermal_info['score'])
    scores = torch.Tensor(scores)
    # Classes
    classes = rgb_info['class'].copy()
    classes.extend(thermal_info['class'])
    classes = torch.Tensor(classes)
    # Perform nms
    iou_threshold = 0.7
    keep_id = box_ops.batched_nms(boxes, scores, classes, iou_threshold)
    # Add to output
    out_boxes = Boxes(boxes[keep_id])
    out_scores = torch.Tensor(scores[keep_id])
    out_class = torch.Tensor(classes[keep_id])
    return out_boxes, out_scores, out_class
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            # multiclass nms
            keep = batched_nms(boxlists[i].bbox,
                               boxlists[i].get_field("scores"),
                               boxlists[i].get_field("labels"),
                               self.nms_thresh)
            result = boxlists[i][keep]
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
Exemple #9
0
    def post_process(self, box_predicts, cls_predicts, shapes):
        '''

        :param box_predicts(len=bs):  list(box_predict) , box_predict=[n_p+n_n,4]  4==>x1,y1,x2,y2
        :param cls_predicts(len=bs):  list(cls_predict) , cls_predict=[n_p+n_n,num_cls]
        :param shapes(len=bs):  list(shape)
        :return:
        '''

        ret_dets = list()
        for box, cls, shape in zip(box_predicts, cls_predicts, shapes):
            score = cls.softmax(dim=-1)
            max_val, max_idx = score.max(dim=-1)
            thresh_mask = max_val > self.cfg['box_score_thresh']
            positive_mask = max_idx > 0  # question: why >0? because 0th means background score
            valid_mask = thresh_mask & positive_mask
            if valid_mask.sum() == 0:
                ret_dets.append(None)
                continue
            nms_box = box[valid_mask]
            nms_scores = max_val[valid_mask]
            nms_label_idx = max_idx[valid_mask] - 1
            idx = batched_nms(nms_box, nms_scores, nms_label_idx,
                              self.cfg['box_nms_thresh'])
            valid_idx = idx[:self.cfg['box_detections_per_img']]
            detects = torch.cat([
                nms_box[valid_idx], nms_scores[valid_idx].unsqueeze(-1),
                nms_label_idx[valid_idx].unsqueeze(-1)
            ],
                                dim=-1)
            detects[..., [0, 2]] = detects[..., [0, 2]].clamp(min=0,
                                                              max=shape[0])
            detects[..., [1, 3]] = detects[..., [1, 3]].clamp(min=0,
                                                              max=shape[1])
            ret_dets.append(detects)
        return ret_dets
    def forward(self, x):
        """Forward function for the BboxNMS layer

        :param x: tuple containing Custom SSD output data as well as
            the decoded boxes, scores and classes
        :type x: tuple(torch.Tensor, torch.Tensor, list, list, list)
        :return: tuple containing Custom SSD output data as well as
            the nms filtered boxes, scores and classes
        :rtype: tuple(torch.Tensor, torch.Tensor, list, list, list)
        """

        total_nms_boxes = list()
        total_nms_scores = list()
        total_nms_classes = list()

        batch_encoded_cls, batch_encoded_reg, batch_boxes, batch_scores, batch_classes = x

        for scores, boxes, classes in zip(batch_scores, batch_boxes,
                                          batch_classes):
            boxes[:, 2] += boxes[:, 0]
            boxes[:, 3] += boxes[:, 1]

            chosen_ids = box_ops.batched_nms(boxes.float(), scores, classes,
                                             self.nms_thres)

            nms_scores = scores[chosen_ids]
            nms_boxes = boxes[chosen_ids]
            nms_boxes[:, 2] -= nms_boxes[:, 0]
            nms_boxes[:, 3] -= nms_boxes[:, 1]
            nms_classes = classes[chosen_ids]

            total_nms_boxes.append(nms_boxes)
            total_nms_scores.append(nms_scores)
            total_nms_classes.append(nms_classes)

        return batch_encoded_cls, batch_encoded_reg, total_nms_boxes, total_nms_scores, total_nms_classes
Exemple #11
0
	def postprocess_detections_spo(self,
							   class_logits,    # type: Tensor
							   sbj_cls_scores,
							   box_regression,  # type: Tensor
							   proposals,       # type: List[Tensor]
							   image_shapes     # type: List[Tuple[int, int]]
							   ):
		# type: (...) -> Tuple[List[Tensor], List[Tensor], List[Tensor]]
		device = class_logits.device
		num_classes = class_logits.shape[-1]

		boxes_per_image = [boxes_in_image.shape[0] for boxes_in_image in proposals]
		pred_boxes = self.box_coder.decode(box_regression, proposals)

		pred_scores = F.softmax(class_logits, -1)
		sbj_cls_scores = F.softmax(sbj_cls_scores, -1)

		pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
		pred_scores_list = pred_scores.split(boxes_per_image, 0)
		sbj_cls_scores_list = sbj_cls_scores.split(boxes_per_image, 0)


		all_boxes = []
		all_scores = []
		all_labels = []
		for boxes, scores, sbj_scores, image_shape in zip(pred_boxes_list, pred_scores_list, sbj_cls_scores_list, image_shapes):
			boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

			# create labels for each prediction
			labels = torch.arange(num_classes, device=device)
			labels = labels.view(1, -1).expand_as(scores)

			# remove predictions with the background label
			boxes = boxes[:, 1:]
			scores = scores[:, 1:]
			sbj_scores = sbj_scores[:, 1:]
			labels = labels[:, 1:]

			# batch everything, by making every class prediction be a separate instance
			boxes = boxes.reshape(-1, 4)
			scores = scores.reshape(-1)
			sbj_scores = sbj_scores.reshape(-1)
			labels = labels.reshape(-1)

			# remove low scoring boxes
			inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
			boxes, scores, sbj_scores, labels = boxes[inds], scores[inds], sbj_scores[inds], labels[inds]

			# remove empty boxes
			keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
			boxes, scores, sbj_scores,labels = boxes[keep], scores[keep], sbj_scores[keep], labels[keep]

			# non-maximum suppression, independently done per class
			keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
			# keep only topk scoring predictions
			keep = keep[:self.detections_per_img]
			boxes, scores, sbj_scores, labels = boxes[keep], scores[keep], sbj_scores[keep],  labels[keep]

			all_boxes.append(boxes)
			all_scores.append(sbj_scores)
			all_labels.append(labels)

		return all_boxes, all_scores, all_labels
def generate_detections(cls_outputs,
                        box_outputs,
                        anchor_boxes,
                        indices,
                        classes,
                        img_scale: Optional[torch.Tensor],
                        img_size: Optional[torch.Tensor],
                        max_det_per_image: int = MAX_DETECTIONS_PER_IMAGE,
                        soft_nms: bool = False):
    """Generates detections with RetinaNet model outputs and anchors.

    Args:
        cls_outputs: a torch tensor with shape [N, 1], which has the highest class
            scores on all feature levels. The N is the number of selected
            top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)

        box_outputs: a torch tensor with shape [N, 4], which stacks box regression
            outputs on all feature levels. The N is the number of selected top-k
            total anchors on all levels. (k being MAX_DETECTION_POINTS)

        anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
            feature levels. The N is the number of selected top-k total anchors on all levels.

        indices: a torch tensor with shape [N], which is the indices from top-k selection.

        classes: a torch tensor with shape [N], which represents the class
            prediction on all selected anchors from top-k selection.

        img_scale: a float tensor representing the scale between original image
            and input image for the detector. It is used to rescale detections for
            evaluating with the original groundtruth annotations.

        max_det_per_image: an int constant, added as argument to make torchscript happy

    Returns:
        detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6],
            each row representing [x, y, width, height, score, class]
    """
    assert box_outputs.shape[-1] == 4
    assert anchor_boxes.shape[-1] == 4
    assert cls_outputs.shape[-1] == 1

    anchor_boxes = anchor_boxes[indices, :]

    # apply bounding box regression to anchors
    boxes = decode_box_outputs(box_outputs.float(),
                               anchor_boxes,
                               output_xyxy=True)
    if img_scale is not None and img_size is not None:
        boxes = clip_boxes_xyxy(boxes, img_size /
                                img_scale)  # clip before NMS better?

    scores = cls_outputs.sigmoid().squeeze(1).float()
    if soft_nms:
        top_detection_idx, soft_scores = batched_soft_nms(boxes,
                                                          scores,
                                                          classes,
                                                          method_gaussian=True,
                                                          iou_threshold=0.3,
                                                          score_threshold=.001)
        scores[top_detection_idx] = soft_scores
    else:
        top_detection_idx = batched_nms(boxes,
                                        scores,
                                        classes,
                                        iou_threshold=0.5)

    # keep only topk scoring predictions
    top_detection_idx = top_detection_idx[:max_det_per_image]
    boxes = boxes[top_detection_idx]
    scores = scores[top_detection_idx, None]
    classes = classes[top_detection_idx, None]

    # xyxy to xywh & rescale to original image
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    if img_scale is not None:
        boxes *= img_scale

    classes += 1  # back to class idx with background class = 0

    # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary
    detections = torch.cat([boxes, scores, classes.float()], dim=1)
    if len(top_detection_idx) < max_det_per_image:
        detections = torch.cat([
            detections,
            torch.zeros((max_det_per_image - len(top_detection_idx), 6),
                        device=detections.device,
                        dtype=detections.dtype)
        ],
                               dim=0)
    return detections
Exemple #13
0
    def filter_proposals(self, proposals, objectness, num_anchors_per_layer,
                         shapes):
        '''

        :param proposals:  shape=[bs,num_anchors,4] 4==>x1,y1,x2,y2 in input sizes
        :param objectness:  shape=[bs,num_anchors,1]   binary classification
        :param num_anchors_per_layer(list,len=fpn_layers): list(n)  n=anchor_num of each featuremap
        :param shapes(list): len=bs
        :return:
        filtered_boxes(list, len=bs): list(boxes_one_image)  boxes_one_image.shape=[n,4]
        filtered_scores(list, len=bs): list(scores_one_image) scores_one_image.shape=[n,1]
        '''
        min_size = torch.tensor(
            self.cfg['min_size'],
            device=proposals.device)  #define min proposal size
        pre_nms_top_n = self.cfg[
            'rpn_pre_nms_top_n_train'] if self.training else self.cfg[
                'rpn_pre_nms_top_n_test']  # 2000
        post_nms_top_n = self.cfg[
            'rpn_post_nms_top_n_train'] if self.training else self.cfg[
                'rpn_post_nms_top_n_test']  # 1000

        start_idx = 0
        filtered_idx = list()
        levels = list()

        for ldx, layer_num in enumerate(num_anchors_per_layer):
            levels.append(
                torch.full(size=(layer_num, ),
                           fill_value=ldx,
                           dtype=torch.int64,
                           device=proposals.device)
            )  # shape=[layer_num,], value=the i-th layer indices
            layer_objectness = objectness[:,
                                          start_idx:start_idx + layer_num, :]
            layer_top_n = min(layer_objectness.size(1), pre_nms_top_n)
            _, top_k_idx = layer_objectness.topk(dim=1, k=layer_top_n)
            filtered_idx.append(top_k_idx + start_idx)
            start_idx += layer_num

        levels = torch.cat(levels, dim=0).unsqueeze(0).repeat(
            proposals.size(0), 1)  # shape=[bs,num_anchors]
        filtered_idx = torch.cat(filtered_idx,
                                 dim=1)  # shape=[bs,sum_of_pre_topn,1]
        objectness = objectness.gather(dim=1, index=filtered_idx).squeeze(-1)
        proposals = proposals.gather(dim=1, index=filtered_idx.repeat(1, 1, 4))
        levels = levels.gather(dim=1, index=filtered_idx[..., 0])

        filtered_boxes = list()
        filtered_scores = list()

        # perform nms on each image, but do it on different fpn layer as lvl
        for box, scores, lvl, shape in zip(proposals, objectness, levels,
                                           shapes):
            # clip to img_size
            box[..., [0, 2]] = box[..., [0, 2]].clamp(min=0, max=shape[0])
            box[..., [1, 3]] = box[..., [1, 3]].clamp(min=0, max=shape[1])
            # remove small box
            dw = box[..., 2] - box[..., 0]
            dh = box[..., 3] - box[..., 1]
            keep = (dw > min_size) & (dh > min_size)
            box, scores, lvl = box[keep], scores[keep], lvl[keep]
            # perform nms on different layers by lvl
            keep = batched_nms(box, scores, lvl, self.cfg['rpn_nms_thresh'])
            keep = keep[:post_nms_top_n]
            box, scores = box[keep], scores[keep]
            # add it to list by bs
            filtered_boxes.append(box)
            filtered_scores.append(scores)
        # filtered_boxes = torch.stack(filtered_boxes, dim=0)
        # filtered_scores = torch.stack(filtered_scores, dim=0)
        return filtered_boxes, filtered_scores
Exemple #14
0
def postprocess_hoi_flip(x,
                         anchors,
                         regression,
                         obj_cls,
                         act_cls,
                         regressBoxes,
                         clipBoxes,
                         threshold,
                         iou_threshold,
                         mode="action",
                         classwise=True):
    transformed_anchors = regressBoxes(anchors, regression)
    transformed_anchors = clipBoxes(transformed_anchors, x)

    if mode == "action":
        main_cls = act_cls  # (bn, num_anchor, num_cls)
        other_cls = obj_cls  # (bn, num_anchor, num_cls)
    else:
        main_cls = obj_cls
        other_cls = act_cls
    scores = torch.max(main_cls, dim=2, keepdim=True)[0]  # (bn, num_anchor, 1)
    scores_over_thresh = (scores > threshold)[:, :, 0]  # (bn, num_anchor)
    out = []
    n = x.shape[0] // 2
    for i in range(n):
        if scores_over_thresh.sum() == 0:
            out.append({
                'rois': np.array(()),
                # 'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                'obj_class_ids': np.array(()),
                'obj_scores': np.array(())
            })
            continue

        main_cls_per = torch.cat([
            main_cls[i, scores_over_thresh[i, :], ...].permute(1, 0),
            main_cls[i + n, scores_over_thresh[i + n, :], ...].permute(1, 0)
        ], 1)  # (num_cls, num_bbox)
        other_cls_per = torch.cat([
            other_cls[i, scores_over_thresh[i, :], ...].permute(1, 0),
            other_cls[i + n, scores_over_thresh[i + n, :], ...].permute(1, 0)
        ], 1)  # (num_cls, num_bbox)
        transformed_anchors_per = transformed_anchors[i,
                                                      scores_over_thresh[i, :],
                                                      ...]  # (num_bbox, 4)
        transformed_anchors_per_flip = transformed_anchors[
            i + n, scores_over_thresh[i + n, :], ...].clone()

        cols = x.shape[3]
        w = transformed_anchors_per_flip[:,
                                         2] - transformed_anchors_per_flip[:,
                                                                           0]
        transformed_anchors_per_flip[:,
                                     2] = cols - transformed_anchors_per_flip[:,
                                                                              0]
        transformed_anchors_per_flip[:,
                                     0] = transformed_anchors_per_flip[:,
                                                                       2] - w

        transformed_anchors_per = torch.cat(
            [transformed_anchors_per, transformed_anchors_per_flip], 0)

        scores_per = torch.cat([
            scores[i, scores_over_thresh[i, :], ...],
            scores[i + n, scores_over_thresh[i + n, :], ...]
        ], 0)

        if classwise:
            scores_, classes_ = main_cls_per.max(dim=0)
            anchors_nms_idx = batched_nms(transformed_anchors_per,
                                          scores_per[:, 0],
                                          classes_,
                                          iou_threshold=iou_threshold)
        else:
            anchors_nms_idx = nms(transformed_anchors_per,
                                  scores_per[:, 0],
                                  iou_threshold=iou_threshold)

        if anchors_nms_idx.shape[0] != 0:
            main_scores_ = main_cls_per[:,
                                        anchors_nms_idx]  # (num_cls, num_nms_bbox)
            # main_scores_, main_classes_ = main_cls_per[:, anchors_nms_idx]  # (num_cls, num_nms_bbox)
            other_scores_ = other_cls_per[:,
                                          anchors_nms_idx]  # (num_cls, num_nms_bbox)
            # other_scores_, other_classes_ = other_cls_per[:, anchors_nms_idx]  # (num_cls, num_nms_bbox)
            boxes_ = transformed_anchors_per[
                anchors_nms_idx, :]  # (num_nms_bbox, 4)

            if mode == "action":
                # act_classes_ = main_classes_.permute(1, 0)  # (num_nms_bbox, num_cls)
                act_scores_ = main_scores_.permute(
                    1, 0)  # (num_nms_bbox, num_cls)
                # obj_classes_ = other_classes_.max(dim=0)  # (num_nms_bbox)
                obj_scores_, obj_classes_ = other_scores_.max(
                    dim=0)  # (num_nms_bbox)
                # obj_classes_ = other_classes_.permute(1, 0)  # (num_nms_bbox, num_cls)
                # obj_scores_ = other_scores_.permute(1, 0)  # (num_nms_bbox, num_cls)
            else:
                # act_classes_ = other_classes_.permute(1, 0)
                act_scores_ = other_scores_.permute(1, 0)
                # obj_classes_ = main_classes_.max(dim=0)
                obj_scores_, obj_classes_ = main_scores_.max(dim=0)
                # obj_classes_ = main_classes_.permute(1, 0)
                # obj_scores_ = main_scores_.permute(1, 0)

            out.append({
                'rois': boxes_.cpu().numpy(),
                # 'act_class_ids': act_classes_.cpu().numpy(),
                'act_scores': act_scores_.cpu().numpy(),
                'obj_class_ids': obj_classes_.cpu().numpy(),
                'obj_scores': obj_scores_.cpu().numpy()
            })
        else:
            out.append({
                'rois': np.array(()),
                # 'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                'obj_class_ids': np.array(()),
                'obj_scores': np.array(())
            })

    return out
Exemple #15
0
    def decode_output_batch(
        self,
        boxes: Tensor,
        scores: Tensor,
        score_threhsold: float = 0.01,
        iou_threshold: float = 0.45,
        max_detections: int = 200,
    ) -> List[Tuple[Tensor, Tensor, Tensor]]:
        """
        Decodes a batch detection model outputs from default box offsets and class
        scores to ltrb formatted bounding boxes, predicted labels, and scores
        for each image of the batch using non maximum suppression.

        :param boxes: Encoded default-box offsets. Expected shape:
            batch_size,4,num_default_boxes
        :param scores: Class scores for each image, class, box combination.
            Expected shape: batch_size,num_classes,num_default_boxes
        :param score_threhsold: minimum softmax score to be considered a positive
            prediction. Default is 0.01 following the SSD paper
        :param iou_threshold: The minimum IoU between two boxes to be considered the
            same object in non maximum suppression
        :param max_detections: the maximum number of detections to keep per image.
            Default is 200
        :return: Detected object boudning boxes, predicted labels, and class score for
            each image in this batch
        """
        if batched_nms is None:
            raise RuntimeError(
                "Unable to import batched_nms from torchvision.ops try upgrading your"
                " torch and torchvision versions")
        # Re-order so that dimensions are batch_size,num_default_boxes,{4,num_classes}
        boxes = boxes.permute(0, 2, 1)
        scores = scores.permute(0, 2, 1)

        # convert box offsets to bounding boxes and convert to ltrb form
        default_boxes = self._default_boxes.unsqueeze(
            0)  # extra dimension for math ops
        boxes[:, :, :2] = (
            self.scale_xy * boxes[:, :, :2] * default_boxes[:, :, :2] +
            default_boxes[:, :, :2])
        boxes[:, :, 2:] = (self._scale_wh *
                           boxes[:, :, 2:]).exp() * default_boxes[:, :, 2:]
        _xywh_to_ltrb_batch(boxes)

        # take softmax of class scores
        scores = torch.nn.functional.softmax(scores, dim=-1)  # class dimension

        # run non max suppression for each image in the batch and store outputs
        detection_outputs = []
        for image_boxes, box_class_scores in zip(boxes.split(1, 0),
                                                 scores.split(1, 0)):
            # strip batch dimension
            image_boxes = image_boxes.squeeze(0)
            box_class_scores = box_class_scores.squeeze(0)

            # get highest score per box and filter out background class
            box_class_scores[:, 0] = 0
            box_scores, box_labels = box_class_scores.max(dim=1)
            # background_filter = torch.nonzero(box_labels, as_tuple=False).squeeze()
            background_filter = box_scores > score_threhsold
            image_boxes = image_boxes[background_filter]
            box_scores = box_scores[background_filter]
            box_labels = box_labels[background_filter]

            if image_boxes.dim() == 0:
                # nothing predicted, add empty result and continue
                detection_outputs.append(
                    (torch.zeros(1, 4), torch.zeros(1), torch.zeros(1)))
                continue
            if image_boxes.dim() == 1:
                image_boxes = image_boxes.unsqueeze(0)
                box_scores = box_scores.unsqueeze(0)
                box_labels = box_labels.unsqueeze(0)

            # filter boxes, classes, and scores by nms results
            nms_filter = batched_nms(image_boxes, box_scores, box_labels,
                                     iou_threshold)
            if nms_filter.size(0) > max_detections:
                # update nms_filter to keep the boxes with top max_detections scores
                box_scores_nms = box_scores[nms_filter]
                sorted_scores_nms_idx = torch.argsort(box_scores_nms,
                                                      descending=True)
                nms_filter = nms_filter[sorted_scores_nms_idx[:max_detections]]
            detection_outputs.append((
                image_boxes[nms_filter],
                box_labels[nms_filter],
                box_scores[nms_filter],
            ))

        return detection_outputs
Exemple #16
0
def postprocess_hoi(x,
                    anchors,
                    regression,
                    obj_cls,
                    act_cls,
                    regressBoxes,
                    clipBoxes,
                    threshold,
                    iou_threshold,
                    mode="action",
                    classwise=True):
    transformed_anchors = regressBoxes(anchors, regression)
    transformed_anchors = clipBoxes(transformed_anchors, x)
    if mode == "action":
        main_cls = act_cls  # (bn, num_anchor, num_cls)
        other_cls = obj_cls  # (bn, num_anchor, num_cls)
    else:
        main_cls = obj_cls
        other_cls = act_cls
    scores = torch.max(main_cls, dim=2, keepdim=True)[0]  # (bn, num_anchor, 1)
    scores_over_thresh = (scores > threshold)[:, :, 0]  # (bn, num_anchor)
    out = []
    for i in range(x.shape[0]):
        if scores_over_thresh.sum() == 0:
            out.append({
                'rois': np.array(()),
                # 'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                'obj_class_ids': np.array(()),
                'obj_scores': np.array(())
            })
            continue

        main_cls_per = main_cls[i, scores_over_thresh[i, :],
                                ...].permute(1, 0)  # (num_cls, num_bbox)
        other_cls_per = other_cls[i, scores_over_thresh[i, :],
                                  ...].permute(1, 0)  # (num_cls, num_bbox)
        transformed_anchors_per = transformed_anchors[i,
                                                      scores_over_thresh[i, :],
                                                      ...]  # (num_bbox, 4)
        scores_per = scores[i, scores_over_thresh[i, :], ...]
        if classwise:
            scores_, classes_ = main_cls_per.max(dim=0)
            anchors_nms_idx = batched_nms(transformed_anchors_per,
                                          scores_per[:, 0],
                                          classes_,
                                          iou_threshold=iou_threshold)
        else:
            anchors_nms_idx = nms(transformed_anchors_per,
                                  scores_per[:, 0],
                                  iou_threshold=iou_threshold)

        if anchors_nms_idx.shape[0] != 0:
            main_scores_ = main_cls_per[:,
                                        anchors_nms_idx]  # (num_cls, num_nms_bbox)
            # main_scores_, main_classes_ = main_cls_per[:, anchors_nms_idx]  # (num_cls, num_nms_bbox)
            other_scores_ = other_cls_per[:,
                                          anchors_nms_idx]  # (num_cls, num_nms_bbox)
            # other_scores_, other_classes_ = other_cls_per[:, anchors_nms_idx]  # (num_cls, num_nms_bbox)
            boxes_ = transformed_anchors_per[
                anchors_nms_idx, :]  # (num_nms_bbox, 4)

            if mode == "action":
                # act_classes_ = main_classes_.permute(1, 0)  # (num_nms_bbox, num_cls)
                act_scores_ = main_scores_.permute(
                    1, 0)  # (num_nms_bbox, num_cls)
                # obj_classes_ = other_classes_.max(dim=0)  # (num_nms_bbox)
                obj_scores_, obj_classes_ = other_scores_.max(
                    dim=0)  # (num_nms_bbox)
                # obj_classes_ = other_classes_.permute(1, 0)  # (num_nms_bbox, num_cls)
                # obj_scores_ = other_scores_.permute(1, 0)  # (num_nms_bbox, num_cls)
            else:
                # act_classes_ = other_classes_.permute(1, 0)
                act_scores_ = other_scores_.permute(1, 0)
                # obj_classes_ = main_classes_.max(dim=0)

                # arg_sort = torch.argsort(-1*main_scores_, 0)
                # for i in range(arg_sort.shape[1]):
                #     print("object category")
                #     for j in range(5):
                #         print(obj_list[arg_sort[j, i]], main_scores_[arg_sort[j,i], i])

                obj_scores_, obj_classes_ = main_scores_.max(dim=0)
                # obj_classes_ = main_classes_.permute(1, 0)
                # obj_scores_ = main_scores_.permute(1, 0)

            out.append({
                'rois': boxes_.cpu().numpy(),
                # 'act_class_ids': act_classes_.cpu().numpy(),
                'act_scores': act_scores_.cpu().numpy(),
                'obj_class_ids': obj_classes_.cpu().numpy(),
                'obj_scores': obj_scores_.cpu().numpy()
            })
        else:
            out.append({
                'rois': np.array(()),
                # 'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                'obj_class_ids': np.array(()),
                'obj_scores': np.array(())
            })

    return out
Exemple #17
0
def batched_nms(boxes, n, threshold, mode='union'):
    """Applies NMS in a batched fashion, only comparing boxes from same item.

    NB: there is a (pretty sneaky) batched NMS function available
        at torchvision.ops.boxes.batched_nms

        However, since there are some differences
        between OP and torchvision NMS algorithms,
        and OP version loops in python anyway,
        we should use a simple version of our own

    Arguments
    ---------
    boxes : torch.Tensor
        size [num_boxes, 10]
        Each row is a single bounding box.

        Column 0 is batch index.
        Columns 1 - 4 are bounding box top left and bottom right coordinates.
        Column 5 is score for that box.
        Columns 6-10 are offset values.

    n : int
        number of items in a batch

    threshold : float
        IOU threshold for NMS

    mode : str
        'union' | 'min'
        'union': true IOU
        'min': divide intersection by minimum of areas instead of union

    Returns
    ------
    kept : torch.Tensor
        size [num_boxes, 10]
        Each row is a single bounding box.

        Column 0 is batch index.
        Columns 1 - 4 are bounding box top left and bottom right coordinates.
        Column 5 is score for that box.
        Columns 6-10 are offset values.
    """

    kept = []

    # For each batch item
    for bi in range(n):
        # Logical selector for batch item boxes
        selector = boxes[:, 0] == bi

        # Select boxes and scores for current item
        boxes_ = boxes[selector, 1:5]
        scores_ = boxes[selector, 5]

        keep = box_ops.batched_nms(boxes[:, 1:5], boxes[:, 5], selector,
                                   threshold)

        # Retain selected boxes for current item
        kept.append(boxes[selector, :][keep, :])

    # Repack into original data format
    kept = torch.cat(kept, dim=0)
    return kept
Exemple #18
0
 def prepare_roi_batch_classifier(self, class_logits, box_regression,
                                  proposals, image_shapes):
     device = class_logits.device
     num_classes = class_logits.shape[-1]
     boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
     #1. Reshape the box regression into num_predictions x num_classes (3) x 4
     res_boxes = box_regression.view(boxes_per_image[0], num_classes, -1)
     # for the NMS and sorting, need to decode:
     decoded_boxes = self.box_coder.decode(res_boxes, proposals)
     scaled_boxes = box_ops.clip_boxes_to_image(decoded_boxes,
                                                image_shapes[0])
     pred_scores = F.softmax(class_logits, -1)
     # 2. split reshaped boxes
     #res_boxes = res_boxes.split(boxes_per_image,0)
     #pred_scores = pred_scores.split(boxes_per_image, 0)
     all_scores = []
     # 3. store reshaped boxes
     all_res_boxes = []
     #all_rois_inds = []
     all_areas = []
     all_labels = []
     # 3b: add scaled boxes for masks,
     all_scaled_boxes = []
     #for res_box, scores, image_shape in zip(res_boxes, pred_scores, image_shapes):
     labels = torch.arange(num_classes, device=device)
     labels = labels.view(1, -1).expand_as(pred_scores)
     res_boxes = res_boxes[:, 1:]
     scaled_boxes = scaled_boxes[:, 1:]
     # Alex: also, extract zero boxes
     pred_scores = pred_scores[:, 1:]
     pred_scores = pred_scores.flatten()
     res_boxes = res_boxes.reshape(-1, 4)
     scaled_boxes = scaled_boxes.reshape(-1, 4)
     labels = labels[:, 1:].flatten()
     # remove empty boxes
     area = (scaled_boxes[:, 2] -
             scaled_boxes[:, 0]) * (scaled_boxes[:, 3] - scaled_boxes[:, 1])
     inds_area = torch.nonzero(area > 1e-5).squeeze(1)
     res_boxes, scaled_boxes, pred_scores, labels = res_boxes[
         inds_area], scaled_boxes[inds_area], pred_scores[
             inds_area], labels[inds_area]
     # Alex
     # for the S2 classifier: keep all boxes(score>-0.01)
     inds_classifier = torch.nonzero(
         pred_scores > self.score_thresh_classifier).squeeze(1)
     # non-maximum suppression, independently done per class
     # this returns the indices in the decreasing order of their confidence score
     keep = box_ops.batched_nms(scaled_boxes, pred_scores, labels,
                                self.nms_thresh_classifier)
     # keep only topk scoring predictions
     keep = keep[:self.detections_per_img_s2new]
     # Alex: keep is a vector, keep.ndimension()==1
     # if fewer than the RoI batch size, augment and keep the order!
     if keep.size().numel() < self.detections_per_img_s2new:
         keep_aug = torch.zeros(self.detections_per_img_s2new,
                                dtype=torch.long)
         # get the indices to fill in with the values from the keep vector, make sure 0 is the first value, and add the last index=RoI detections_per_img
         inds_rand = torch.cat(
             (torch.tensor([0]),
              torch.randperm(self.detections_per_img_s2new -
                             2)[:keep.size().numel() - 1].sort().values + 1,
              torch.tensor([self.detections_per_img_s2new])), 0).unique()
         # keep_aug.index_copy_(0, inds_rand[:-1], keep)
         for idxs, posts in enumerate(inds_rand[:-1]):
             keep_aug[posts:inds_rand[idxs + 1]] = keep[idxs].expand(
                 inds_rand[idxs + 1] - posts)
         keep = keep_aug
     # Alex
     # At this point the boxes and scores are sorted in the decreasing order
     # 05/12: add scaled boxes
     res_boxes, pred_scores, labels, scaled_boxes = res_boxes[
         keep], pred_scores[keep], labels[keep], scaled_boxes[keep]
     # keep the lists
     all_res_boxes.append(res_boxes)
     all_scores.append(pred_scores)
     all_labels.append(labels)
     all_scaled_boxes.append(scaled_boxes)
     #all_rois_inds.append(all_rois_inds)
     return all_res_boxes, all_scores, all_labels, all_scaled_boxes
Exemple #19
0
def detect_face(imgs, minsize, pnet, rnet, onet, threshold, factor, device):
    if isinstance(imgs, (np.ndarray, torch.Tensor)):
        imgs = torch.as_tensor(imgs, device=device)
        if len(imgs.shape) == 3:
            imgs = imgs.unsqueeze(0)
    else:
        if not isinstance(imgs, (list, tuple)):
            imgs = [imgs]
        if any(img.size != imgs[0].size for img in imgs):
            raise Exception(
                "MTCNN batch processing only compatible with equal-dimension images."
            )
        imgs = np.stack([np.uint8(img) for img in imgs])

    imgs = torch.as_tensor(imgs, device=device)

    model_dtype = next(pnet.parameters()).dtype
    imgs = imgs.permute(0, 3, 1, 2).type(model_dtype)

    batch_size = len(imgs)
    h, w = imgs.shape[2:4]
    m = 12.0 / minsize
    minl = min(h, w)
    minl = minl * m

    # Create scale pyramid
    scale_i = m
    scales = []
    while minl >= 12:
        scales.append(scale_i)
        scale_i = scale_i * factor
        minl = minl * factor

    # First stage
    boxes = []
    image_inds = []
    all_inds = []
    all_i = 0
    for scale in scales:
        im_data = imresample(imgs, (int(h * scale + 1), int(w * scale + 1)))
        im_data = (im_data - 127.5) * 0.0078125
        reg, probs = pnet(im_data)

        boxes_scale, image_inds_scale = generateBoundingBox(
            reg, probs[:, 1], scale, threshold[0])
        boxes.append(boxes_scale)
        image_inds.append(image_inds_scale)
        all_inds.append(all_i + image_inds_scale)
        all_i += batch_size

    boxes = torch.cat(boxes, dim=0)
    image_inds = torch.cat(image_inds, dim=0).cpu()
    all_inds = torch.cat(all_inds, dim=0)

    # NMS within each scale + image
    pick = batched_nms(boxes[:, :4], boxes[:, 4], all_inds, 0.5)
    boxes, image_inds = boxes[pick], image_inds[pick]

    # NMS within each image
    pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
    boxes, image_inds = boxes[pick], image_inds[pick]

    regw = boxes[:, 2] - boxes[:, 0]
    regh = boxes[:, 3] - boxes[:, 1]
    qq1 = boxes[:, 0] + boxes[:, 5] * regw
    qq2 = boxes[:, 1] + boxes[:, 6] * regh
    qq3 = boxes[:, 2] + boxes[:, 7] * regw
    qq4 = boxes[:, 3] + boxes[:, 8] * regh
    boxes = torch.stack([qq1, qq2, qq3, qq4, boxes[:, 4]]).permute(1, 0)
    boxes = rerec(boxes)
    y, ey, x, ex = pad(boxes, w, h)

    # Second stage
    if len(boxes) > 0:
        im_data = []
        for k in range(len(y)):
            if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
                img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k],
                             (x[k] - 1):ex[k]].unsqueeze(0)
                im_data.append(imresample(img_k, (24, 24)))
        im_data = torch.cat(im_data, dim=0)
        im_data = (im_data - 127.5) * 0.0078125
        out = rnet(im_data)

        out0 = out[0].permute(1, 0)
        out1 = out[1].permute(1, 0)
        score = out1[1, :]
        ipass = score > threshold[1]
        boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
        image_inds = image_inds[ipass]
        mv = out0[:, ipass].permute(1, 0)

        # NMS within each image
        pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
        boxes, image_inds, mv = boxes[pick], image_inds[pick], mv[pick]
        boxes = bbreg(boxes, mv)
        boxes = rerec(boxes)

    # Third stage
    points = torch.zeros(0, 5, 2, device=device)
    if len(boxes) > 0:
        y, ey, x, ex = pad(boxes, w, h)
        im_data = []
        for k in range(len(y)):
            if ey[k] > (y[k] - 1) and ex[k] > (x[k] - 1):
                img_k = imgs[image_inds[k], :, (y[k] - 1):ey[k],
                             (x[k] - 1):ex[k]].unsqueeze(0)
                im_data.append(imresample(img_k, (48, 48)))
        im_data = torch.cat(im_data, dim=0)
        im_data = (im_data - 127.5) * 0.0078125
        out = onet(im_data)

        out0 = out[0].permute(1, 0)
        out1 = out[1].permute(1, 0)
        out2 = out[2].permute(1, 0)
        score = out2[1, :]
        points = out1
        ipass = score > threshold[2]
        points = points[:, ipass]
        boxes = torch.cat((boxes[ipass, :4], score[ipass].unsqueeze(1)), dim=1)
        image_inds = image_inds[ipass]
        mv = out0[:, ipass].permute(1, 0)

        w_i = boxes[:, 2] - boxes[:, 0] + 1
        h_i = boxes[:, 3] - boxes[:, 1] + 1
        points_x = w_i.repeat(5, 1) * points[:5, :] + boxes[:, 0].repeat(5,
                                                                         1) - 1
        points_y = h_i.repeat(5, 1) * points[5:10, :] + boxes[:, 1].repeat(
            5, 1) - 1
        points = torch.stack((points_x, points_y)).permute(2, 1, 0)
        boxes = bbreg(boxes, mv)

        # NMS within each image using "Min" strategy
        # pick = batched_nms(boxes[:, :4], boxes[:, 4], image_inds, 0.7)
        pick = batched_nms_numpy(boxes[:, :4], boxes[:, 4], image_inds, 0.7,
                                 'Min')
        boxes, image_inds, points = boxes[pick], image_inds[pick], points[pick]

    boxes = boxes.cpu().numpy()
    points = points.cpu().numpy()

    batch_boxes = []
    batch_points = []
    for b_i in range(batch_size):
        b_i_inds = np.where(image_inds == b_i)
        batch_boxes.append(boxes[b_i_inds].copy())
        batch_points.append(points[b_i_inds].copy())

    batch_boxes, batch_points = np.array(batch_boxes), np.array(batch_points)

    return batch_boxes, batch_points
Exemple #20
0
def getresult(img_path, outpath):
    NN_WEIGHT_FILE_PATH = 'dect/weight/efficient_rcnn_9.pth'

    VERSION_FAST = 49

    NMS_PARAM = 0.35

    CLASS_PROP_THR = 0.5

    RUN_MODE = "NMS"

    #img_path = "../data/Images_test/test.jpg"
    imge = Image.open(img_path).convert('RGB')
    testtransform = Compose([ToTensor()])
    img = testtransform(imge)
    model = get_model(VERSION_FAST)
    model.load_state_dict(torch.load(NN_WEIGHT_FILE_PATH))
    model.eval()

    print("Run Mode = ", RUN_MODE)

    if "NMS" == RUN_MODE:
        start = time.time()
        print(img.size())
        results = model([img])
        open_cv_image = np.array(imge)
        open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
        boxes = []
        for box, label, score, in zip(results[0]['boxes'],
                                      results[0]['labels'],
                                      results[0]["scores"]):
            boxes.append(box[:4].tolist() + [label] + [score])

        # boxes = np.array(boxes)
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        if boxes.shape[0] != 0:
            # keep = py_cpu_nms(boxes, 0.35)
            keep = box_ops.batched_nms(boxes[:, :4], boxes[:, 5], boxes[:, 4],
                                       NMS_PARAM)
            # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu)
            boxes = boxes[keep, :]

        #
        count = 0
        for box in boxes:
            if box[5] < CLASS_PROP_THR:
                continue
            box = box.tolist()
            score = float(box[5])
            count += 1
            label_id = int(box[4]) - 1
            # label = CLASSES[label_id]
            label = 'Human'
            cv2.rectangle(open_cv_image, (int(box[0]), int(
                box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])),
                          (255, 225, 0), 2)
            cx = box[0]
            cy = box[1] + 12
            cv2.putText(open_cv_image, "{}:{:.2f}".format(label, score),
                        (int(cx), int(cy)), cv2.FONT_HERSHEY_DUPLEX, 0.6,
                        (0, 255, 0))
        # cv2.imshow("sd", open_cv_image)
        # cv2.imwrite("result/{}".format(img_path.split("/")[-1]), open_cv_image)
        # cv2.imshow("sd", open_cv_image)
        cv2.imwrite(outpath, open_cv_image)
        # cv2.waitKey(30000)
    else:
        start = time.time()
        print("img.size = ", img.size())
        results = model([img])
        open_cv_image = np.array(imge)
        open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
        print
        for box in results[0]['boxes']:
            box = box[:4].tolist()
            cv2.rectangle(open_cv_image, (int(box[0]), int(
                box[1]), int(box[2]) - int(box[0]), int(box[3]) - int(box[1])),
                          (255, 225, 0), 2)
        # cv2.imshow("sd", open_cv_image)
        cv2.imwrite(outpath, open_cv_image)
        # cv2.waitKey(30000)
    return count
    def box_features_hook(self, module, input, output):
        '''
        hook for extracting features from MaskRCNN
        '''

        features, proposals, image_shapes, targets = input

        box_features = module.box_roi_pool(features, proposals, image_shapes)
        box_features = module.box_head(box_features)
        class_logits, box_regression = module.box_predictor(box_features)

        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = module.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_keeps = []
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()

            # remove low scoring boxes
            inds = torch.nonzero(scores > module.score_thresh).squeeze(1)
            boxes, scores, labels = boxes[inds], scores[inds], labels[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels,
                                       module.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.mask_rcnn_top_k_boxes]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_keeps.append(keep)

        box_features_per_image = []
        for keep in all_keeps:
            box_features_per_image.append(box_features[keep])

        self.detection_box_features = box_features_per_image
        self.fpn_pooled_features = self.avg2dpool(
            features['pool']).squeeze(-1).squeeze(-1)
Exemple #22
0
def generate_detections(cls_outputs,
                        box_outputs,
                        anchor_boxes,
                        indices,
                        classes,
                        image_scale,
                        nms_thres=0.5,
                        max_dets=100):
    """Generates detections with RetinaNet model outputs and anchors.

    Args:
        cls_outputs: a torch tensor with shape [N, 1], which has the highest class
            scores on all feature levels. The N is the number of selected
            top-K total anchors on all levels.  (k being MAX_DETECTION_POINTS)

        box_outputs: a torch tensor with shape [N, 4], which stacks box regression
            outputs on all feature levels. The N is the number of selected top-k
            total anchors on all levels. (k being MAX_DETECTION_POINTS)

        anchor_boxes: a torch tensor with shape [N, 4], which stacks anchors on all
            feature levels. The N is the number of selected top-k total anchors on all levels.

        indices: a torch tensor with shape [N], which is the indices from top-k selection.

        classes: a torch tensor with shape [N], which represents the class
            prediction on all selected anchors from top-k selection.

        image_scale: a float tensor representing the scale between original image
            and input image for the detector. It is used to rescale detections for
            evaluating with the original groundtruth annotations.

    Returns:
        detections: detection results in a tensor with shape [MAX_DETECTION_POINTS, 6],
            each row representing [x, y, width, height, score, class]
    """
    anchor_boxes = anchor_boxes[indices, :]

    # apply bounding box regression to anchors
    boxes = decode_box_outputs(box_outputs.T.float(),
                               anchor_boxes.T,
                               output_xyxy=True)

    scores = cls_outputs.sigmoid().squeeze(1).float()
    human_idx = classes == 0
    boxes = boxes[human_idx]
    scores = scores[human_idx]
    classes = classes[human_idx]
    top_detection_idx = batched_nms(boxes,
                                    scores,
                                    classes,
                                    iou_threshold=nms_thres)

    # keep only topk scoring predictions
    top_detection_idx = top_detection_idx[:max_dets]
    boxes = boxes[top_detection_idx]
    scores = scores[top_detection_idx, None]
    classes = classes[top_detection_idx, None]

    # xyxy to xywh & rescale to original image
    boxes[:, 2] -= boxes[:, 0]
    boxes[:, 3] -= boxes[:, 1]
    boxes *= image_scale

    classes += 1  # back to class idx with background class = 0

    # stack em and pad out to MAX_DETECTIONS_PER_IMAGE if necessary
    detections = torch.cat([boxes, scores, classes.float()], dim=1)
    if len(top_detection_idx) < max_dets:
        detections = torch.cat([
            detections,
            torch.zeros((max_dets - len(top_detection_idx), 6),
                        device=detections.device,
                        dtype=detections.dtype)
        ],
                               dim=0)
    return detections
Exemple #23
0
    def ssm_postprocess_detections(self, class_logits, box_regression,
                                   proposals, image_shapes):
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        al_idx = 0
        all_boxes = torch.empty([0, 4]).cuda()
        all_scores = torch.tensor([]).cuda()
        all_labels = []
        CONF_THRESH = 0.5  # bigger leads more active learning samples
        for boxes, scores, image_shape in zip(pred_boxes, pred_scores,
                                              image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)
            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]
            if torch.max(scores) < CONF_THRESH:
                # print(scores)
                al_idx = 1
                continue
            for cls_ind in range(num_classes - 1):
                cls_boxes = boxes[:, cls_ind]
                cls_scores = scores[:, cls_ind]
                cls_labels = labels[:, cls_ind]
                # batch everything, by making every class prediction be a separate instance
                cls_boxes = cls_boxes.reshape(-1, 4)
                cls_scores = cls_scores.flatten()
                cls_labels = cls_labels.flatten()

                # remove low scoring boxes

                # non-maximum suppression, independently done per class
                # self.nms_thresh = 0.3
                keep = box_ops.batched_nms(cls_boxes, cls_scores, cls_labels,
                                           0.3)
                # keep only topk scoring predictions
                keep = keep[:self.detections_per_img]
                cls_boxes, cls_scores, cls_labels = cls_boxes[
                    keep], cls_scores[keep], cls_labels[keep]
                inds = torch.nonzero(cls_scores > self.score_thresh).squeeze(1)
                if len(inds) == 0:
                    continue
                for j in inds:
                    # boxes, scores, labels = boxes[inds], scores[inds], labels[inds]
                    all_boxes = torch.cat(
                        (all_boxes, cls_boxes[j].unsqueeze(0)), 0)
                    k = keep[j]
                    all_scores = torch.cat(
                        (all_scores, scores[k].unsqueeze(0)), 0)
                    all_labels.append(judge_y(scores[k]))
        # all_scores = [torch.cat(all_scores, 1)]
        return [all_boxes], [all_scores], [all_labels], al_idx
Exemple #24
0
    def postprocess_boxes(
        self,
        class_logits,
        box_regression,
        embeddings,
        proposals,
        image_shapes,
        fcs=None,
        gt_det=None,
        cws=True,
    ):
        """
        Similar to RoIHeads.postprocess_detections, but can handle embeddings and implement
        First Classification Score (FCS).
        """
        device = class_logits.device

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        if fcs is not None:
            # Fist Classification Score (FCS)
            pred_scores = fcs[0]
        else:
            pred_scores = torch.sigmoid(class_logits)
        if cws:
            # Confidence Weighted Similarity (CWS)
            embeddings = embeddings * pred_scores.view(-1, 1)

        # split boxes and scores per image
        pred_boxes = pred_boxes.split(boxes_per_image, 0)
        pred_scores = pred_scores.split(boxes_per_image, 0)
        pred_embeddings = embeddings.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_embeddings = []
        for boxes, scores, embeddings, image_shape in zip(
                pred_boxes, pred_scores, pred_embeddings, image_shapes):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.ones(scores.size(0), device=device)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores.unsqueeze(1)
            labels = labels.unsqueeze(1)

            # batch everything, by making every class prediction be a separate instance
            boxes = boxes.reshape(-1, 4)
            scores = scores.flatten()
            labels = labels.flatten()
            embeddings = embeddings.reshape(-1, self.embedding_head.dim)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels, embeddings = (
                boxes[inds],
                scores[inds],
                labels[inds],
                embeddings[inds],
            )

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, embeddings = (
                boxes[keep],
                scores[keep],
                labels[keep],
                embeddings[keep],
            )

            if gt_det is not None:
                # include GT into the detection results
                boxes = torch.cat((boxes, gt_det["boxes"]), dim=0)
                labels = torch.cat((labels, torch.tensor([1.0]).to(device)),
                                   dim=0)
                scores = torch.cat((scores, torch.tensor([1.0]).to(device)),
                                   dim=0)
                embeddings = torch.cat((embeddings, gt_det["embeddings"]),
                                       dim=0)

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, embeddings = (
                boxes[keep],
                scores[keep],
                labels[keep],
                embeddings[keep],
            )

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_embeddings.append(embeddings)

        return all_boxes, all_scores, all_embeddings, all_labels
Exemple #25
0
def compute_map(dataset,
                detection_dir,
                h_thresh,
                o_thresh,
                nms_thresh,
                max_human,
                max_object,
                human_idx=1,
                min_iou=0.5):
    num_pairs_object = torch.zeros(81)
    associate = BoxAssociation(min_iou=min_iou)
    meter = DetectionAPMeter(81, algorithm='INT', nproc=10)
    # Skip images without valid human-object pairs
    valid_idx = dataset._keep

    for i in tqdm(valid_idx):
        # Load annotation
        annotation = dataset.annotations[i]
        image_name = annotation.pop('file_name')
        target = to_tensor(annotation, input_format='dict')
        # Load detection
        detection_path = os.path.join(detection_dir,
                                      image_name.replace('jpg', 'json'))
        with open(detection_path, 'r') as f:
            detection = to_tensor(json.load(f), input_format='dict')

        boxes = detection['boxes']
        labels = detection['labels']
        scores = detection['scores']

        # Filter out low scoring human boxes
        idx = torch.nonzero(labels == human_idx).squeeze(1)
        keep_idx = idx[torch.nonzero(scores[idx] >= h_thresh).squeeze(1)]

        # Filter out low scoring object boxes
        idx = torch.nonzero(labels != human_idx).squeeze(1)
        keep_idx = torch.cat(
            [keep_idx, idx[torch.nonzero(scores[idx] >= o_thresh).squeeze(1)]])

        boxes = boxes[keep_idx].view(-1, 4)
        scores = scores[keep_idx].view(-1)
        labels = labels[keep_idx].view(-1)

        # Class-wise non-maximum suppression
        keep_idx = batched_nms(boxes, scores, labels, nms_thresh)
        boxes = boxes[keep_idx].view(-1, 4)
        scores = scores[keep_idx].view(-1)
        labels = labels[keep_idx].view(-1)

        sorted_idx = torch.argsort(scores, descending=True)
        boxes = boxes[sorted_idx]
        scores = scores[sorted_idx]
        labels = labels[sorted_idx]

        h_idx = torch.nonzero(labels == human_idx).squeeze(1)
        o_idx = torch.nonzero(labels != human_idx).squeeze(1)
        if len(h_idx) > max_human:
            h_idx = h_idx[:max_human]
        if len(o_idx) > max_object:
            o_idx = o_idx[:max_object]
        keep_idx = torch.cat([h_idx, o_idx])

        boxes = boxes[keep_idx].view(-1, 4)
        scores = scores[keep_idx].view(-1)
        labels = labels[keep_idx].view(-1)

        # Format ground truth boxes
        gt_boxes = torch.cat([target['boxes_h'], target['boxes_o']])
        gt_classes = torch.cat([
            human_idx * torch.ones_like(target['objects']), target['objects']
        ])
        # Remove duplicates
        _, keep = np.unique(gt_boxes, return_index=True, axis=0)
        keep = torch.from_numpy(keep)
        gt_boxes = gt_boxes[keep]
        gt_classes = gt_classes[keep]
        # Update number of ground truth annotations
        for c in gt_classes:
            num_pairs_object[c] += 1

        # Associate detections with ground truth
        binary_labels = torch.zeros_like(scores)
        unqiue_obj = labels.unique()
        for obj_idx in unqiue_obj:
            det_idx = torch.nonzero(labels == obj_idx).squeeze(1)
            gt_idx = torch.nonzero(gt_classes == obj_idx).squeeze(1)
            if len(gt_idx) == 0:
                continue
            binary_labels[det_idx] = associate(gt_boxes[gt_idx].view(-1, 4),
                                               boxes[det_idx].view(-1, 4),
                                               scores[det_idx].view(-1))

        meter.append(scores, labels, binary_labels)

    meter.num_gt = num_pairs_object.tolist()
    ap = meter.eval()
    object_keep = dataset.present_objects
    ap_present = ap[object_keep]
    rec_present = meter.max_rec[object_keep]
    print("Mean average precision: {:.4f} |".format(ap_present.mean().item()),
          "Mean maximum recall: {:.4f}".format(rec_present.mean().item()))
Exemple #26
0
def postprocess_dense_union(x,
                            anchors,
                            classification,
                            sub_regression,
                            obj_regression,
                            regressBoxes,
                            clipBoxes,
                            threshold,
                            iou_threshold=1,
                            classwise=False):
    transformed_anchors = torch.zeros_like(anchors).cuda()
    transformed_anchors[:, :, 0] = anchors[:, :, 1]
    transformed_anchors[:, :, 1] = anchors[:, :, 0]
    transformed_anchors[:, :, 2] = anchors[:, :, 3]
    transformed_anchors[:, :, 3] = anchors[:, :, 2]

    transformed_anchors = clipBoxes(transformed_anchors, x)

    transformed_anchors_sub = regressBoxes(anchors, sub_regression)
    transformed_anchors_sub = clipBoxes(transformed_anchors_sub, x)

    transformed_anchors_obj = regressBoxes(anchors, obj_regression)
    transformed_anchors_obj = clipBoxes(transformed_anchors_obj, x)

    main_cls = classification  # (bn, num_anchor, num_cls)
    # other_cls = obj_classication

    scores = torch.max(main_cls, dim=2, keepdim=True)[0]  # (bn, num_anchor, 1)
    scores_over_thresh = (scores > threshold)[:, :, 0]  # (bn, num_anchor)
    out = []
    for i in range(x.shape[0]):
        if scores_over_thresh.sum() == 0:
            out.append({
                'rois': np.array(()),
                'rois_sub': np.array(()),
                'rois_obj': np.array(()),
                'sp_vector': np.array(()),
                'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                # 'obj_scores': np.array(()),
            })
            continue

        main_cls_per = main_cls[i, scores_over_thresh[i, :],
                                ...].permute(1, 0)  # (num_cls, num_bbox)
        # other_cls_per = other_cls[i, scores_over_thresh[i, :], ...].permute(1, 0)  # (num_cls, num_bbox)

        transformed_anchors_per = transformed_anchors[i,
                                                      scores_over_thresh[i, :],
                                                      ...]  # (num_bbox, 4)
        transformed_anchors_sub_per = transformed_anchors_sub[
            i, scores_over_thresh[i, :], ...]  # (num_bbox, 4)
        transformed_anchors_obj_per = transformed_anchors_obj[
            i, scores_over_thresh[i, :], ...]  # (num_bbox, 4)

        scores_per = scores[i, scores_over_thresh[i, :], ...]

        if iou_threshold < 1:
            if classwise:
                scores_, classes_ = main_cls_per.max(dim=0)
                anchors_nms_idx = batched_nms(transformed_anchors_per,
                                              scores_per[:, 0],
                                              classes_,
                                              iou_threshold=iou_threshold)
            else:
                anchors_nms_idx = nms(transformed_anchors_per,
                                      scores_per[:, 0],
                                      iou_threshold=iou_threshold)
        else:
            anchors_nms_idx = np.arange(main_cls_per.shape[1])

        if anchors_nms_idx.shape[0] > 0:
            main_scores_ = main_cls_per[:,
                                        anchors_nms_idx]  # (num_cls, num_nms_bbox)
            # other_scores_ = other_cls_per[:, :]
            boxes_ = transformed_anchors_per[
                anchors_nms_idx, :]  # (num_nms_bbox, 4)
            boxes_sub_ = transformed_anchors_sub_per[
                anchors_nms_idx, :]  # (num_nms_bbox, 4)
            boxes_obj_ = transformed_anchors_obj_per[
                anchors_nms_idx, :]  # (num_nms_bbox, 4)
            sp_vector_x = (boxes_obj_[:, 0] + boxes_obj_[:, 2]) / 2 - (
                boxes_sub_[:, 0] + boxes_sub_[:, 2]) / 2
            sp_vector_y = (boxes_obj_[:, 1] + boxes_obj_[:, 3]) / 2 - (
                boxes_sub_[:, 1] + boxes_sub_[:, 3]) / 2

            sp_vector_x = sp_vector_x.reshape(-1, 1)
            sp_vector_y = sp_vector_y.reshape(-1, 1)

            sp_vector = torch.cat([sp_vector_x, sp_vector_y], 1)

            act_scores_ = main_scores_.permute(1, 0)  # (num_nms_bbox, num_cls)
            act_classes_ = main_scores_.max(dim=0)[1]
            # obj_scores_ = other_scores_.permute(1, 0)  #

            out.append({
                'rois': boxes_.cpu().numpy(),
                'rois_sub': boxes_sub_.cpu().numpy(),
                'rois_obj': boxes_obj_.cpu().numpy(),
                'sp_vector': sp_vector.cpu().numpy(),
                'act_class_ids': act_classes_.cpu().numpy(),
                'act_scores': act_scores_.cpu().numpy(),
                # 'obj_scores': obj_scores_.cpu().numpy()
            })
        else:
            out.append({
                'rois': np.array(()),
                'rois_sub': np.array(()),
                'rois_obj': np.array(()),
                'sp_vector': np.array(()),
                'act_class_ids': np.array(()),
                'act_scores': np.array(()),
                # 'obj_scores': np.array(())
            })

    return out
Exemple #27
0
    def postprocess_detections(self, class_logits, box_regression, proposals,
                               image_shapes, box_features):
        # type: (Tensor, Tensor, List[Tensor], List[Tuple[int, int]])
        device = class_logits.device
        num_classes = class_logits.shape[-1]

        boxes_per_image = [len(boxes_in_image) for boxes_in_image in proposals]
        pred_boxes = self.box_coder.decode(box_regression, proposals)

        pred_scores = F.softmax(class_logits, -1)

        # split boxes and scores per image
        if len(boxes_per_image) == 1:
            # TODO : remove this when ONNX support dynamic split sizes
            # and just assign to pred_boxes instead of pred_boxes_list
            pred_boxes_list = [pred_boxes]
            pred_scores_list = [pred_scores]
            pred_embeddings_list = [box_features]
        else:
            pred_boxes_list = pred_boxes.split(boxes_per_image, 0)
            pred_scores_list = pred_scores.split(boxes_per_image, 0)
            pred_embeddings_list = box_features.split(boxes_per_image, 0)

        all_boxes = []
        all_scores = []
        all_labels = []
        all_embeddings = []
        for boxes, scores, image_shape, embeddings in zip(
                pred_boxes_list, pred_scores_list, image_shapes,
                pred_embeddings_list):
            boxes = box_ops.clip_boxes_to_image(boxes, image_shape)

            # create labels for each prediction
            labels = torch.arange(num_classes, device=device)
            labels = labels.view(1, -1).expand_as(scores)

            # remove predictions with the background label
            boxes = boxes[:, 1:]
            scores = scores[:, 1:]
            labels = labels[:, 1:]

            # batch everything, by making every class prediction be a separate instance
            embeddings = torch.repeat_interleave(embeddings, scores.size(1), 0)
            boxes = boxes.reshape(-1, 4)
            scores = scores.reshape(-1)
            labels = labels.reshape(-1)

            # remove low scoring boxes
            inds = torch.nonzero(scores > self.score_thresh).squeeze(1)
            boxes, scores, labels, embeddings = boxes[inds], scores[
                inds], labels[inds], embeddings[inds]

            # remove empty boxes
            keep = box_ops.remove_small_boxes(boxes, min_size=1e-2)
            boxes, scores, labels, embeddings = boxes[keep], scores[
                keep], labels[keep], embeddings[keep]

            # non-maximum suppression, independently done per class
            keep = box_ops.batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring predictions
            keep = keep[:self.detections_per_img]
            boxes, scores, labels, embeddings = boxes[keep], scores[
                keep], labels[keep], embeddings[keep]

            all_boxes.append(boxes)
            all_scores.append(scores)
            all_labels.append(labels)
            all_embeddings.append(embeddings)

        return all_boxes, all_scores, all_labels, all_embeddings
Exemple #28
0
    def postprocess_detections(self, head_outputs, anchors, image_shapes):
        # type: (Dict[str, List[Tensor]], List[List[Tensor]], List[Tuple[int, int]]) -> List[Dict[str, Tensor]]
        class_logits = head_outputs['cls_logits']
        box_regression = head_outputs['bbox_regression']

        num_images = len(image_shapes)

        detections: List[Dict[str, Tensor]] = []

        for index in range(num_images):
            box_regression_per_image = [br[index] for br in box_regression]
            logits_per_image = [cl[index] for cl in class_logits]
            anchors_per_image, image_shape = anchors[index], image_shapes[
                index]

            image_boxes = []
            image_scores = []
            image_labels = []

            for box_regression_per_level, logits_per_level, anchors_per_level in \
                    zip(box_regression_per_image, logits_per_image, anchors_per_image):
                num_classes = logits_per_level.shape[-1]

                # remove low scoring boxes
                scores_per_level = torch.sigmoid(logits_per_level).flatten()
                keep_idxs = scores_per_level > self.score_thresh
                scores_per_level = scores_per_level[keep_idxs]
                topk_idxs = torch.where(keep_idxs)[0]

                # keep only topk scoring predictions
                num_topk = min(self.topk_candidates, topk_idxs.size(0))
                scores_per_level, idxs = scores_per_level.topk(num_topk)
                topk_idxs = topk_idxs[idxs]

                anchor_idxs = torch.div(topk_idxs,
                                        num_classes,
                                        rounding_mode='floor')
                labels_per_level = topk_idxs % num_classes

                boxes_per_level = self.box_coder.decode_single(
                    box_regression_per_level[anchor_idxs],
                    anchors_per_level[anchor_idxs])
                boxes_per_level = box_ops.clip_boxes_to_image(
                    boxes_per_level, image_shape)

                image_boxes.append(boxes_per_level)
                image_scores.append(scores_per_level)
                image_labels.append(labels_per_level)

            image_boxes = torch.cat(image_boxes, dim=0)
            image_scores = torch.cat(image_scores, dim=0)
            image_labels = torch.cat(image_labels, dim=0)

            # non-maximum suppression
            keep = box_ops.batched_nms(image_boxes, image_scores, image_labels,
                                       self.nms_thresh)
            keep = keep[:self.detections_per_img]

            detections.append({
                'boxes': image_boxes[keep],
                'scores': image_scores[keep],
                'labels': image_labels[keep],
            })

        return detections
Exemple #29
0
    def post_process(self, cls_logits: torch.Tensor, reg_deltas: torch.Tensor,
                     batched_rois: List[torch.Tensor]):
        nms_threshold = self._params['nms_threshold']
        conf_threshold = self._params['conf_threshold']
        keep_top_n = self._params['keep_top_n']

        batched_dets: List[torch.Tensor] = []
        current = 0
        for rois in batched_rois:
            N = rois.size(0)
            if N == 0:
                print("warning! found empty rois")
                batched_dets.append(
                    torch.empty(0,
                                6,
                                dtype=reg_deltas.dtype,
                                device=reg_deltas.device))
                continue

            logits = cls_logits[current:current + N]
            offsets = reg_deltas[current:current + N]
            current += N

            # logits: torch.Tensor(N,)
            # deltas: torch.Tensor(N,4)
            # rois: torch.Tensor(N,4)

            scores = torch.sigmoid(logits)
            preds = torch.zeros(scores.shape,
                                dtype=torch.int64,
                                device=scores.device)
            preds[scores >= 0.5] = 1

            fg_preds_mask = preds != 0

            # convert offsets to boxes
            # N,4 | N,4 => N,4 as xmin,ymin,xmax,ymax

            boxes = offsets2boxes(offsets.unsqueeze(0), rois).squeeze(0)

            # extract bg predictions
            boxes = boxes[fg_preds_mask]
            preds = preds[fg_preds_mask]
            scores = scores[fg_preds_mask]

            # apply conf threshold
            keep = scores >= conf_threshold
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # remove small
            keep = box_ops.remove_small_boxes(boxes, 1e-3)  # TODO try 1
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # batched nms
            keep = box_ops.batched_nms(boxes, scores, preds, nms_threshold)
            scores, preds, boxes = scores[keep], preds[keep], boxes[keep]

            # select top n
            keep_n = min(keep_top_n, scores.size(0))
            _, selected_ids = scores.topk(keep_n)
            scores, preds, boxes = scores[selected_ids], preds[
                selected_ids], boxes[selected_ids]
            scores.unsqueeze_(1)
            preds = preds.unsqueeze(1).to(boxes.dtype)
            dets = torch.cat([boxes, scores, preds], dim=-1)

            batched_dets.append(dets)

        return batched_dets
Exemple #30
0
def nms_with_class(boxes, scores, idxs, iou_threshold):
    return batched_nms(boxes, scores, idxs, iou_threshold)