예제 #1
0
    def loss_boxes(self, outputs, targets, indices, num_boxes):
        """
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
        The target boxes are expected in format (center_x, center_y, w, h), normalized by the image size.
        """
        # assert 'pred_boxes' in outputs
        idx = self._get_src_permutation_idx(indices)
        src_boxes = outputs["pred_boxes"][idx]
        target_boxes = torch.cat(
            [t["boxes"][i] for t, (_, i) in zip(targets, indices)], dim=0)

        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")

        losses = {}
        losses["loss_bbox"] = loss_bbox.sum() / num_boxes

        # loss_giou = 1 - torch.diag(generalized_box_iou(box_cxcywh_to_xyxy(src_boxes),
        #                                                box_cxcywh_to_xyxy(target_boxes)))
        loss_giou = 1 - torch.diag(
            generalized_box_iou(
                box_convert(src_boxes, in_fmt="cxcywh", out_fmt="xyxy"),
                box_convert(target_boxes, in_fmt="cxcywh", out_fmt="xyxy")))
        losses["loss_giou"] = loss_giou.sum() / num_boxes
        return losses
예제 #2
0
    def test_bbox_same(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                   [10, 15, 30, 35], [23, 35, 93, 95]],
                                  dtype=torch.float)

        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                 [10, 15, 30, 35], [23, 35, 93, 95]],
                                dtype=torch.float)

        box_same = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy")
        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_same, exp_xyxy)).item()

        box_same = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh")
        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_same, exp_xyxy)).item()

        box_same = ops.box_convert(box_tensor,
                                   in_fmt="cxcywh",
                                   out_fmt="cxcywh")
        self.assertEqual(exp_xyxy.size(), torch.Size([4, 4]))
        self.assertEqual(exp_xyxy.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_same, exp_xyxy)).item()
예제 #3
0
    def update(self, preds: List[Dict[str, Tensor]], target: List[Dict[str, Tensor]]) -> None:  # type: ignore
        """Add detections and ground truth to the metric.

        Args:
            preds: A list consisting of dictionaries each containing the key-values
            (each dictionary corresponds to a single image):
            - ``boxes``: ``torch.FloatTensor`` of shape
                [num_boxes, 4] containing `num_boxes` detection boxes of the format
                specified in the contructor. By default, this method expects
                [xmin, ymin, xmax, ymax] in absolute image coordinates.
            - ``scores``: ``torch.FloatTensor`` of shape
                [num_boxes] containing detection scores for the boxes.
            - ``labels``: ``torch.IntTensor`` of shape
                [num_boxes] containing 0-indexed detection classes for the boxes.

            target: A list consisting of dictionaries each containing the key-values
            (each dictionary corresponds to a single image):
            - ``boxes``: ``torch.FloatTensor`` of shape
                [num_boxes, 4] containing `num_boxes` ground truth boxes of the format
                specified in the contructor. By default, this method expects
                [xmin, ymin, xmax, ymax] in absolute image coordinates.
            - ``labels``: ``torch.IntTensor`` of shape
                [num_boxes] containing 1-indexed ground truth classes for the boxes.

        Raises:
            ValueError:
                If ``preds`` is not of type List[Dict[str, Tensor]]
            ValueError:
                If ``target`` is not of type List[Dict[str, Tensor]]
            ValueError:
                If ``preds`` and ``target`` are not of the same length
            ValueError:
                If any of ``preds.boxes``, ``preds.scores``
                and ``preds.labels`` are not of the same length
            ValueError:
                If any of ``target.boxes`` and ``target.labels`` are not of the same length
            ValueError:
                If any box is not type float and of length 4
            ValueError:
                If any class is not type int and of length 1
            ValueError:
                If any score is not type float and of length 1
        """
        _input_validator(preds, target)

        for item in preds:
            boxes = _fix_empty_tensors(item["boxes"])
            boxes = box_convert(boxes, in_fmt=self.box_format, out_fmt="xyxy")
            self.detection_boxes.append(boxes)
            self.detection_labels.append(item["labels"])
            self.detection_scores.append(item["scores"])

        for item in target:
            boxes = _fix_empty_tensors(item["boxes"])
            boxes = box_convert(boxes, in_fmt=self.box_format, out_fmt="xyxy")
            self.groundtruth_boxes.append(boxes)
            self.groundtruth_labels.append(item["labels"])
예제 #4
0
    def forward(self, outputs, targets):
        """Performs the matching
        Params:
            outputs: This is a dict that contains at least these entries:
                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        """
        bs, num_queries = outputs["pred_logits"].shape[:2]

        # We flatten to compute the cost matrices in a batch
        out_prob = (outputs["pred_logits"].flatten(0, 1).softmax(-1)
                    )  # [batch_size * num_queries, num_classes]
        out_bbox = outputs["pred_boxes"].flatten(
            0, 1)  # [batch_size * num_queries, 4]

        # Also concat the target labels and boxes
        tgt_ids = torch.cat([v["labels"] for v in targets])
        tgt_bbox = torch.cat([v["boxes"] for v in targets])

        # Compute the classification cost. Contrary to the loss, we don't use the NLL,
        # but approximate it in 1 - proba[target class].
        # The 1 is a constant that doesn't change the matching, it can be ommitted.
        cost_class = -out_prob[:, tgt_ids]

        # Compute the L1 cost between boxes
        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)

        # Compute the giou cost betwen boxes
        # cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
        cost_giou = -generalized_box_iou(
            box_convert(out_bbox, in_fmt="cxcywh", out_fmt="xyxy"),
            box_convert(tgt_bbox, in_fmt="cxcywh", out_fmt="xyxy"))

        # Final cost matrix
        C = (self.cost_bbox * cost_bbox + self.cost_class * cost_class +
             self.cost_giou * cost_giou)
        C = C.view(bs, num_queries, -1).cpu()

        sizes = [len(v["boxes"]) for v in targets]
        indices = [
            linear_sum_assignment(c[i])
            for i, c in enumerate(C.split(sizes, -1))
        ]
        return [(
            torch.as_tensor(i, dtype=torch.int64),
            torch.as_tensor(j, dtype=torch.int64),
        ) for i, j in indices]
예제 #5
0
    def test_bbox_same(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

        exp_xyxy = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

        assert exp_xyxy.size() == torch.Size([4, 4])
        assert_equal(ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xyxy"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy)
        assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy)
예제 #6
0
    def test_bbox_convert_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3

        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
        scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
        self.assertTrue((scripted_xywh - box_xywh).abs().max() < TOLERANCE)

        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
        scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
        self.assertTrue((scripted_cxcywh - box_cxcywh).abs().max() < TOLERANCE)
예제 #7
0
    def test_bbox_convert_jit(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)

        scripted_fn = torch.jit.script(ops.box_convert)
        TOLERANCE = 1e-3

        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
        scripted_xywh = scripted_fn(box_tensor, 'xyxy', 'xywh')
        torch.testing.assert_close(scripted_xywh, box_xywh, rtol=0.0, atol=TOLERANCE)

        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
        scripted_cxcywh = scripted_fn(box_tensor, 'xyxy', 'cxcywh')
        torch.testing.assert_close(scripted_cxcywh, box_cxcywh, rtol=0.0, atol=TOLERANCE)
예제 #8
0
    def test_bbox_xyxy_cxcywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

        assert exp_cxcywh.size() == torch.Size([4, 4])
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywh")
        assert_equal(box_cxcywh, exp_cxcywh)

        # Reverse conversion
        box_xyxy = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xyxy")
        assert_equal(box_xyxy, box_tensor)
예제 #9
0
    def test_bbox_xywh_cxcywh(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

        # This is wrong
        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

        assert exp_cxcywh.size() == torch.Size([4, 4])
        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
        assert_equal(box_cxcywh, exp_cxcywh)

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
        assert_equal(box_xywh, box_tensor)
예제 #10
0
    def __getitem__(self, i):
        image_path, image_meta, annotation = self.data_list[i]

        # read image
        image = np.array(Image.open(image_path))

        # read meta
        image_meta = {
            'image_id': image_meta['id'],
            'height': image_meta['height'],
            'width': image_meta['width']
        }

        # read bboxes & labels
        bboxes = []
        labels = []
        for anno in annotation:
            bboxes.append(anno['bbox'])
            labels.append(anno['category_id'])

        # transform
        image, image_meta, bboxes, labels = self.transform(
            image=image, image_meta=image_meta, bboxes=bboxes, labels=labels)
        bboxes = box_convert(torch.tensor(bboxes),
                             in_fmt='xywh',
                             out_fmt=self.fmt)
        if self.norm:
            bboxes = bboxes.div(image.size(-1)).float()
        labels = torch.tensor(labels)

        return image, image_meta, bboxes, labels
예제 #11
0
    def __getitem__(self, index):
        img, target = tools.load_img_target(self, index)

        cls_labels = [obj['category_id'] for obj in target]
        bbox_labels = [obj['bbox'] for obj in target]

        transformed = self.transform(image=img,
                                     bboxes=bbox_labels,
                                     class_labels=cls_labels)
        img = transformed['image']
        cls_labels = torch.as_tensor(transformed['class_labels'])
        bbox_labels = cv_ops.box_convert(torch.as_tensor(
            transformed['bboxes']),
                                         in_fmt='xywh',
                                         out_fmt='xyxy')

        all_level_points, class_targets, distance_targets = self._encode_targets(
            cls_labels, bbox_labels)
        centerness_targets = self._encode_centerness_targets(distance_targets)

        return img, {
            'points': all_level_points,
            'class_targets': class_targets,
            'distance_targets': distance_targets,
            'centerness_targets': centerness_targets
        }
예제 #12
0
    def forward(self, outputs, target_sizes):
        """
        Perform the computation
        Parameters:
            outputs: raw outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']

        assert len(out_logits) == len(target_sizes)
        assert target_sizes.shape[1] == 2

        prob = F.softmax(out_logits, -1)
        scores, labels = prob[..., :-1].max(-1)

        # convert to [x0, y0, x1, y1] format
        # boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
        boxes = box_convert(out_bbox, in_fmt="cxcywh", out_fmt="xyxy")
        # and from relative [0, 1] to absolute [0, height] coordinates
        img_h, img_w = target_sizes.unbind(1)
        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
        boxes = boxes * scale_fct[:, None, :]

        results = [{
            'scores': s,
            'labels': l,
            'boxes': b
        } for s, l, b in zip(scores, labels, boxes)]

        return results
예제 #13
0
    def build_reg_and_cls_targets(self, boxes):
        boxes_xyxy = ops.box_convert(boxes, 'cxcywh', 'xyxy') # [B, 4]
        iou_dist = ops.box_iou(self.anchors_xyxy, boxes_xyxy) # [A, B]
        closest_box_indices = torch.argmax(iou_dist, dim=1) # [A, 1]
        target_boxes = boxes[closest_box_indices] # [A, 4]

        # Both [A, 2]
        xy_targets = (
                (target_boxes[..., :2] - self.anchors[..., :2]) /
                self.anchors[..., 2:])
        wh_targets = torch.log(target_boxes[..., 2:] / self.anchors[..., 2:])
        reg_target = torch.hstack((xy_targets, wh_targets)) # [A, 4]

        pos_selector = torch.any(iou_dist > self.pos_thresh, dim=1) # [A,]
        neg_selector = torch.all(iou_dist < self.neg_thresh, dim=1) # [A,]

        valid_pos_selector = pos_selector & self.valid_anchors_selector # [A,]
        valid_neg_selector = neg_selector & self.valid_anchors_selector # [A,]

        cls_target = torch.full(
            (len(self.anchors),), INVALID_ANCHOR_LABEL,
            device=boxes.device) # [A,]
        cls_target[valid_pos_selector] = POS_ANCHOR_LABEL # [A,]
        cls_target[valid_neg_selector] = NEG_ANCHOR_LABEL # [A,]
        
        return reg_target, cls_target
예제 #14
0
    def test_bbox_xywh_cxcywh(self):
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

        # This is wrong
        exp_cxcywh = torch.tensor([[50, 50, 100, 100], [0, 0, 0, 0],
                                  [20, 25, 20, 20], [58, 65, 70, 60]], dtype=torch.float)

        box_cxcywh = ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="cxcywh")
        self.assertEqual(exp_cxcywh.size(), torch.Size([4, 4]))
        self.assertEqual(exp_cxcywh.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_cxcywh, exp_cxcywh)).item()

        # Reverse conversion
        box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh")
        self.assertEqual(box_xywh.size(), torch.Size([4, 4]))
        self.assertEqual(box_xywh.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_xywh, box_tensor)).item()
예제 #15
0
    def test_bbox_xyxy_xywh(self):
        # Simple test convert boxes to xywh and back. Make sure they are same.
        # box_tensor is in x1 y1 x2 y2 format.
        box_tensor = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                  [10, 15, 30, 35], [23, 35, 93, 95]], dtype=torch.float)
        exp_xywh = torch.tensor([[0, 0, 100, 100], [0, 0, 0, 0],
                                [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float)

        box_xywh = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="xywh")
        self.assertEqual(exp_xywh.size(), torch.Size([4, 4]))
        self.assertEqual(exp_xywh.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_xywh, exp_xywh)).item()

        # Reverse conversion
        box_xyxy = ops.box_convert(box_xywh, in_fmt="xywh", out_fmt="xyxy")
        self.assertEqual(box_xyxy.size(), torch.Size([4, 4]))
        self.assertEqual(box_xyxy.dtype, box_tensor.dtype)
        assert torch.all(torch.eq(box_xyxy, box_tensor)).item()
예제 #16
0
    def pre_predict(self,
                    outputs: tuple,
                    conf_thresh: float = 0.01,
                    top_k: int = 200) -> tuple:
        """ モデルの出力結果を予測データに変換する

        Args:
            outputs (tuple): モデルの出力. (予測オフセット, 予測信頼度)
            conf_thresh (float): 信頼度の閾値
            top_k (int): 検出数

        Returns:
            tuple: (予測BBox, 予測信頼度, 予測クラス)
                    - 予測BBox   : [N, 8732, 4] (coord fmt: [xmin, ymin, xmax, ymax], 0 ~ 1)
                    - 予測信頼度 : [N, 8732]
                    - 予測クラス : [N, 8732]
        """
        out_locs, out_objs, out_confs = outputs
        out_locs[..., :2] = out_locs[..., :2].sigmoid()
        out_objs = out_objs.sigmoid()
        out_confs = out_confs.sigmoid()
        out_confs = out_confs * out_objs[..., None]

        # to CPU
        out_locs = out_locs.detach().cpu()
        out_objs = out_objs.detach().cpu()
        out_confs = out_confs.detach().cpu()

        pred_bboxes = []
        pred_scores = []
        pred_class_ids = []

        for locs, objs, confs in zip(out_locs, out_objs, out_confs):
            bboxes = []
            scores = []
            class_ids = []

            for class_id in range(confs.size(1)):
                pos_mask = (confs[:, class_id] > conf_thresh) * (
                    confs[:,
                          class_id].argsort(descending=True).argsort() < top_k)
                scores_ = confs[pos_mask, class_id]
                class_ids_ = torch.full_like(scores_,
                                             class_id + 1,
                                             dtype=torch.long)
                bboxes_ = self._calc_coord(locs[pos_mask],
                                           self.pboxes[pos_mask])
                bboxes_ = box_convert(bboxes_, in_fmt='xywh', out_fmt='xyxy')

                bboxes.append(bboxes_)
                scores.append(scores_)
                class_ids.append(class_ids_)

            pred_bboxes.append(torch.cat(bboxes))
            pred_scores.append(torch.cat(scores))
            pred_class_ids.append(torch.cat(class_ids))
        return pred_bboxes, pred_scores, pred_class_ids
예제 #17
0
def _decode_pred_logits(pred_logits: Tensor):
    """
    Decode the prediction logit from the PostPrecess.
    """
    # Compute conf
    # box_conf x class_conf, w/ shape: num_anchors x num_classes
    scores = pred_logits[:, 5:] * pred_logits[:, 4:5]
    boxes = box_convert(pred_logits[:, :4], in_fmt="cxcywh", out_fmt="xyxy")

    return boxes, scores
예제 #18
0
    def __getitem__(self, index):
        img, target = tools.load_img_target(self, index)
        img_info = self.coco.loadImgs(self.ids[index])[0]
        iw, ih = img_info['width'], img_info['height']

        class_labels, bbox_labels, mask_labels = [], [], []
        for obj in target:
            if not tools.is_correct_instance(obj, self.cat_idx_list, iw, ih):
                continue

            class_labels.append(self.cat_to_label_map[obj['category_id']])
            bbox_labels.append(obj['bbox'])

            # rle = coco_mask.frPyObjects(obj['segmentation'], ih, iw)
            # if obj['iscrowd'] == 0:
            #     rle = coco_mask.merge(rle)
            # mask = coco_mask.decode(rle)
            # mask_labels.append(mask)

        transformed = self.img_transform(image=img,
                                         bboxes=bbox_labels,
                                         class_labels=class_labels)
        # transformed = self.img_transform(image=img, masks=mask_labels, bboxes=bbox_labels, class_labels=class_labels)
        img = tools.TENSOR_TRANSFORM(transformed['image'])
        # mask_labels = transformed['masks']
        class_labels = transformed['class_labels']
        bbox_labels = transformed['bboxes']

        if len(bbox_labels) == 0:
            # For any instance with classification label 0 (background), only classification loss will be computed, without mask loss, centerness loss and bbox loss.
            # When there is no instances in an image, it doesn't matter the value of the added bbox.
            mask_labels = [np.zeros((self.h, self.w))]
            bbox_labels = [[0., 0., 10., 10.]]
            class_labels = [0]

        class_labels = torch.as_tensor(class_labels)

        # instance_mask_labels = self._generate_instance_mask_labels(mask_labels, bbox_labels)
        # instance_mask_labels = torch.as_tensor(np.array(instance_mask_labels)).float()

        bbox_labels = cv_ops.box_convert(torch.as_tensor(bbox_labels,
                                                         dtype=torch.float32),
                                         in_fmt='xywh',
                                         out_fmt='xyxy')
        bbox_labels = cv_ops.clip_boxes_to_image(bbox_labels, (ih, iw))

        class_targets, distance_targets = self._encode_targets(
            class_labels, bbox_labels, None)
        centerness_targets = tools.encode_centerness_targets(distance_targets)

        return img, self.points, {
            'class': class_targets,
            'distance': distance_targets,
            'centerness': centerness_targets
        }
예제 #19
0
 def __call__(self, image, target=None):
     image = F.normalize(image, mean=self.mean, std=self.std)
     if target is None:
         return image, None
     target = target.copy()
     h, w = image.shape[-2:]
     if "boxes" in target:
         boxes = target["boxes"]
         boxes = box_convert(boxes, in_fmt="xyxy", out_fmt="cxcywh")
         boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
         target["boxes"] = boxes
     return image, target
def stats_dataset(dataset: ObjectDetectionDataSet,
                  rcnn_transform: GeneralizedRCNNTransform = False):
    """
    Iterates over the dataset and returns some stats.
    Can be useful to pick the right anchor box sizes.
    """
    from torchvision.ops import box_convert, box_area
    stats = {
        'image_height': [],
        'image_width': [],
        'image_mean': [],
        'image_std': [],
        'boxes_height': [],
        'boxes_width': [],
        'boxes_num': [],
        'boxes_area': []
    }
    for batch in dataset:
        # Batch
        x, y, x_name, y_name = batch['x'], batch['y'], batch['x_name'], batch[
            'y_name']

        # Transform
        if rcnn_transform:
            x, y = rcnn_transform([x], [y])
            x, y = x.tensors, y[0]

        # Image
        stats['image_height'].append(x.shape[-2])
        stats['image_width'].append(x.shape[-1])
        stats['image_mean'].append(x.mean().item())
        stats['image_std'].append(x.std().item())

        # Target
        wh = box_convert(y['boxes'], 'xyxy', 'xywh')[:, -2:]
        stats['boxes_height'].append(wh[:, -2])
        stats['boxes_width'].append(wh[:, -1])
        stats['boxes_num'].append(len(wh))
        stats['boxes_area'].append(box_area(y['boxes']))

    stats['image_height'] = torch.tensor(stats['image_height'],
                                         dtype=torch.float)
    stats['image_width'] = torch.tensor(stats['image_width'],
                                        dtype=torch.float)
    stats['image_mean'] = torch.tensor(stats['image_mean'], dtype=torch.float)
    stats['image_std'] = torch.tensor(stats['image_std'], dtype=torch.float)
    stats['boxes_height'] = torch.cat(stats['boxes_height'])
    stats['boxes_width'] = torch.cat(stats['boxes_width'])
    stats['boxes_area'] = torch.cat(stats['boxes_area'])
    stats['boxes_num'] = torch.tensor(stats['boxes_num'], dtype=torch.float)

    return stats
예제 #21
0
    def __init__(
            self, anchors, img_width, img_height, pos_thresh=0.5,
            neg_thresh=0.2):
        self.pos_thresh = pos_thresh
        self.neg_thresh = neg_thresh

        self.anchors = anchors
        self.anchors_xyxy = ops.box_convert(anchors, 'cxcywh', 'xyxy') # [A, 4]
        self.valid_anchors_selector = (
                (self.anchors_xyxy[:, 0] >= 0) &
                (self.anchors_xyxy[:, 1] >= 0) &
                (self.anchors_xyxy[:, 2] < img_width) &
                (self.anchors_xyxy[:, 3] < img_height))
예제 #22
0
    def update(self, img: ImageT) -> np.ndarray:
        self.model.eval()

        side_size = int(round(self.curr_instance_side_size))
        bbox = BBox.build_from_center_and_size(
            self.target_bbox.center, np.asarray((side_size, side_size)))
        instance_img = center_crop_and_resize(
            img, bbox, (self.cfg.instance_size, self.cfg.instance_size))

        if self.on_instance_img_extract:
            self.on_instance_img_extract(instance_img)

        instance_img = pil_to_tensor(instance_img).to(self.device)
        pred_reg, pred_cls = self.model.inference(instance_img,
                                                  self.kernel_reg,
                                                  self.kernel_cls)

        pred_reg = pred_reg.squeeze()
        pred_cls = pred_cls.squeeze()

        pred_cls = F.softmax(pred_cls, dim=1)
        pred_cls_max = pred_cls.argmax(dim=1)
        # TODO Store the range somewhere as it may be faster.
        scores = pred_cls[list(range(len(pred_cls))), pred_cls_max]
        scores[pred_cls_max == 0] = 0  # The 0-th position is the background.

        # TODO Think of modifying the regression predictions in place.
        xy_vals = pred_reg[:, :2] * self.anchors[:, 2:] + self.anchors[:, :2]
        wh_vals = torch.exp(pred_reg[:, 2:]) * self.anchors[:, 2:]
        boxes = torch.hstack((xy_vals, wh_vals))
        boxes = ops.box_convert(boxes, 'cxcywh', 'xyxy')
        boxes = ops.clip_boxes_to_image(
            boxes, (self.cfg.instance_size, self.cfg.instance_size))

        response = (1 - self.cfg.cosine_win_influence) * response + \
                   self.cfg.cosine_win_influence * self.cosine_win

        # The assumption is that the peak response value is in the center of the
        # response map. Thus, we compute the change with respect to the center
        # and convert it back to the pixel coordinates in the image.
        peak_response_pos = np.asarray(
            np.unravel_index(response.argmax(), response.shape))

        # Update target scale.
        self.curr_instance_side_size *= new_scale

        # Change from [row, col] to [x, y] coordinates.
        self.target_bbox.shift(disp_in_image[::-1])
        self.target_bbox.rescale(new_scale, new_scale)

        return self.target_bbox.as_xywh()
예제 #23
0
파일: grozi.py 프로젝트: laitalaj/cvpce
def visualize_test(root, select_from):
    dataset = datautils.GroZiTestSet(root)
    if select_from == 'min':
        idxset = dataset.least_annotated()
        print(f'There are {len(idxset)} least-annotated images')
    elif select_from == 'max':
        idxset = dataset.most_annotated()
        print(f'There are {len(idxset)} most-annotated images')
    else:
        idxset = range(len(dataset))
        print(f'There are {len(dataset)} images')
    img, anns, boxes = dataset[random.choice(idxset)]
    print(f'Annotations in image: {len(anns)}')
    utils.show(img, groundtruth=tvops.box_convert(boxes, 'xyxy', 'xywh'), groundtruth_labels=anns)
예제 #24
0
def detect(conf_thresh, save, state_file, image_file):
    '''
    Detect products and visualize the detections.
    '''
    state_dict = torch.load(state_file)[
        proposals_training.MODEL_STATE_DICT_KEY]
    model = proposals.gln().cuda()
    model.load_state_dict(state_dict)
    model.eval()
    generator = ProposalGenerator(model, confidence_threshold=conf_thresh)

    img = ttf.to_tensor(pil.Image.open(image_file))
    with torch.no_grad():
        detections = generator.generate_proposals(img)

    utils.show(
        img, utils.recall_tensor(tvops.box_convert(detections, 'xyxy',
                                                   'xywh')))
    if save is not None:
        utils.save(img,
                   save,
                   groundtruth=utils.recall_tensor(
                       tvops.box_convert(detections, 'xyxy', 'xywh')))
예제 #25
0
def matching_box(pbs: Tensor,
                 gbs: Tensor,
                 pb_format='cxcywh',
                 gb_format='cxcywh',
                 threshold=0.5) -> Tensor:
    """
    Matching the default boxes to ground truth boxes of category

    Args: 2 set of boxes in (x1, y1, x2, y2) format.
        pbs - Tensor[num_prior, 4]
        gbs - Tensor[num_obj, 4]
    Return:
        positive_map, positive_set
    """
    # print(pbs.device, gbs.device)
    xy_pbs = box_convert(pbs, pb_format, 'xyxy')
    xy_gbs = box_convert(gbs, gb_format, 'xyxy')
    # print(xy_pbs.device, xy_gbs.device)
    overlaps = box_iou(xy_pbs, xy_gbs)  # [N, M]

    # Các trường hợp dẫn đến tồn tại một obj không được gắn với bất kì prior box nào trong tập positive
    # 1. Nó không phải là best cho bất kì prior box nào
    # 2. Các overlab của nó nhỏ hơn threshold

    best_p4g_ind = torch.argmax(overlaps, dim=0)  # [M]
    assert best_p4g_ind.size(0) == gbs.size(0)
    best_g4p_overlap, best_g4p_ind = torch.max(overlaps, dim=1)

    best_g4p_ind[best_p4g_ind] = torch.LongTensor(range(
        best_p4g_ind.size(0))).to(best_g4p_ind.device)  # Giải quyết TH1
    # Đảm bảo vượt qua bước kiểm tra threshold, Giải quyết TH2
    best_g4p_overlap[best_p4g_ind] = 1.
    # then match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5).

    positive_map = best_g4p_overlap > threshold
    positive_set = best_g4p_ind[positive_map]
    return positive_map, positive_set  # [num_prior, 1]
예제 #26
0
    def __getitem__(self, idx: int):
        img = torch.rand(self.img_shape)
        boxes = torch.tensor(
            [self._random_bbox() for _ in range(self.num_boxes)],
            dtype=torch.float32)
        boxes = ops.clip_boxes_to_image(boxes,
                                        (self.img_shape[1], self.img_shape[2]))

        # No problems if we pass same in_fmt and out_fmt, it is covered by box_convert
        converted_boxes = ops.box_convert(boxes,
                                          in_fmt="xyxy",
                                          out_fmt=self.box_fmt)
        labels = torch.randint(self.num_classes, (self.num_boxes, ),
                               dtype=torch.long)
        return img, {"boxes": converted_boxes, "labels": labels}
예제 #27
0
def calculate_metrics(pred: Tensor,
                      gold: Tensor,
                      mask: Optional[Tensor] = None) -> Dict[str, Tensor]:
    if mask is None:
        mask = pred.new_ones(pred.shape[:-1]).unsqueeze(-1)
    pred_center = box_convert(pred, in_fmt='xyxy', out_fmt='cxcywh')[..., :2]
    gold_center = box_convert(gold, in_fmt='xyxy', out_fmt='cxcywh')[..., :2]
    sum_ade = ((pred_center - gold_center)**2 *
               mask.float()).sum(dim=-1).sqrt().sum()
    sum_fde = ((pred_center[:, -1] - gold_center[:, -1])**2 *
               mask.float()[:, -1]).sum(dim=-1).sqrt().sum()
    num_ade = mask.float().sum()
    num_fde = mask.float()[:, -1].sum()
    sum_fiou = box_iou(pred[:, -1].reshape(-1, 4).contiguous(),
                       gold[:, -1].reshape(-1, 4).contiguous()).diag()
    sum_fiou = sum_fiou[~sum_fiou.isnan()].sum()

    return {
        "sum_ade": sum_ade,
        "sum_fde": sum_fde,
        "num_ade": num_ade,
        "num_fde": num_fde,
        "sum_fiou": sum_fiou
    }
예제 #28
0
def normalize_boxes(boxes: Tensor, original_size: List[int]) -> Tensor:
    height = torch.tensor(original_size[0],
                          dtype=torch.float32,
                          device=boxes.device)
    width = torch.tensor(original_size[1],
                         dtype=torch.float32,
                         device=boxes.device)
    xmin, ymin, xmax, ymax = boxes.unbind(1)

    xmin = xmin / width
    xmax = xmax / width
    ymin = ymin / height
    ymax = ymax / height
    boxes = torch.stack((xmin, ymin, xmax, ymax), dim=1)
    # Convert xyxy to cxcywh
    return box_convert(boxes, in_fmt="xyxy", out_fmt="cxcywh")
예제 #29
0
    def forward(self, x):
        b, _, h, w = x.shape

        # (b, c, h, w) => (b, h * w * num_anchors, coord + num_classes)
        x = x.permute(0, 2, 3,
                      1).contiguous().view(b, h * w * len(self.anchors),
                                           5 + self.num_classes)

        # activate
        x = torch.cat([
            torch.sigmoid(x[:, :, 0:2]),
            torch.exp(x[:, :, 2:4]),
            torch.sigmoid(x[:, :, 4:5]),
            torch.softmax(x[:, :, 5:], dim=2)
        ],
                      dim=-1)

        # restore
        cx, cy = torch.meshgrid(torch.arange(w), torch.arange(h))
        cx = cx.t().contiguous().view(
            -1, 1)  # transpose because anchors to be organized in H x W order
        cy = cy.t().contiguous().view(-1, 1)

        centers = torch.cat([cx, cy], axis=1).float()
        anchors = torch.as_tensor(self.anchors)
        anchors[:, 0] = anchors[:, 0] * w
        anchors[:, 1] = anchors[:, 1] * h

        all_anchors = torch.cat(
            [
                centers.view(-1, 1, 2).expand(-1, len(self.anchors), 2),
                anchors.view(1, -1, 2).expand(h * w, -1, 2)
            ],
            axis=2).view(-1, 4)  # (h * w * num_anchors, [cx, cy, w, h])

        all_anchors = all_anchors.to(x.device)

        x[:, :, 0:2] = x[:, :, 0:2] + all_anchors[:, 0:2]
        x[:, :, 2:4] = x[:, :, 2:4] * all_anchors[:, 2:4]

        x = torch.cat([
            box_convert(x[:, :, 0:4], in_fmt='cxcywh', out_fmt='xyxy'), x[:, :,
                                                                          4:]
        ],
                      dim=-1)

        return x
예제 #30
0
def overlay_boxes(detections, path, time_consume, args):
    img = cv2.imread(path) if args.save_img else None

    for i, pred in enumerate(detections):  # detections per image
        det_logs = ''
        save_path = Path(args.output_dir).joinpath(Path(path).name)
        txt_path = Path(args.output_dir).joinpath(Path(path).stem)

        if pred is not None and len(pred) > 0:
            # Rescale boxes from img_size to im0 size
            boxes, scores, labels = pred['boxes'].round(
            ), pred['scores'], pred['labels']

            # Print results
            for c in labels.unique():
                n = (labels == c).sum()  # detections per class
                det_logs += '%g %ss, ' % (n, args.names[int(c)]
                                          )  # add to string

            # Write results
            for xyxy, conf, cls_name in zip(boxes, scores, labels):
                if args.save_txt:  # Write to file
                    # normalized cxcywh
                    cxcywh = box_convert(xyxy, in_fmt="xyxy",
                                         out_fmt="cxcywh").tolist()
                    with open(f'{txt_path}.txt', 'a') as f:
                        f.write(('%g ' * 5 + '\n') %
                                (cls_name, *cxcywh))  # label format

                if args.save_img:  # Add bbox to image
                    label = '%s %.2f' % (args.names[int(cls_name)], conf)
                    plot_one_box(
                        xyxy,
                        img,
                        label=label,
                        color=args.colors[int(cls_name) % len(args.colors)],
                        line_thickness=3,
                    )

        # Print inference time
        print('%sDone. (%.3fs)' % (det_logs, time_consume))

        # Save results (image with detections)
        if args.save_img:
            cv2.imwrite(str(save_path), img)

    return (boxes.tolist(), scores.tolist(), labels.tolist())