예제 #1
0
파일: mtcnn.py 프로젝트: iorivn/mtcnn
    def _first_stage(self, imgs: torch.Tensor):
        with EvalScope(self.pNet):
            _, c, h, w = imgs.shape

            scale = 12.0 / self.minSize  # This is initial scale
            min_l = min(h, w)

            b, s, i = [], [], []

            while min_l * scale >= 12.:
                imgs = _nnf.interpolate(imgs,
                                        size=[int(h * scale),
                                              int(w * scale)],
                                        mode='area')
                reg, pro = self.pNet(imgs)

                pro = pro[:, 1]

                strd = 2. / scale
                cell = 12. / scale

                msk = torch.ge(pro, self.pNetThreshold)  # b, h, w

                if msk.any():
                    indices = msk.nonzero()  # n, 3 <- (i, y, x)
                    idx, r, c = indices[:, 0], indices[:, 1], indices[:, 2]
                    pro = pro[msk]

                    reg = reg.permute(0, 2, 3,
                                      1)  # b, h, w, c <- (x1^, y1^, x2^, y2^)
                    reg = reg[msk]

                    x1, y1 = c * strd, r * strd
                    x2, y2 = x1 + cell, y1 + cell

                    bbs = torch.dstack([x1, y1, x2, y2]).squeeze(0)
                    bbs = self._bb_reg(bbs, reg)
                    nms_idx = batched_nms(bbs, pro, idx, self.nmsThreshold)

                    b.append(bbs[nms_idx])
                    s.append(pro[nms_idx])
                    i.append(idx[nms_idx])

                scale = scale * self.factor

            if len(b) > 0:
                b = torch.cat(b, dim=0)
                s = torch.cat(s, dim=0)
                i = torch.cat(i, dim=0)

                nms_idx = batched_nms(b, s, i, self.nmsThreshold)
                b = clip_boxes_to_image(b[nms_idx], size=(w, h)).int()
                i = i[nms_idx]

                return b, i
            else:
                return None
예제 #2
0
    def test_batched_nms_implementations(self):
        """Make sure that both implementations of batched_nms yield identical results"""

        num_boxes = 1000
        iou_threshold = .9

        boxes = torch.cat(
            (torch.rand(num_boxes, 2), torch.rand(num_boxes, 2) + 10), dim=1)
        assert max(boxes[:, 0]) < min(boxes[:, 2])  # x1 < x2
        assert max(boxes[:, 1]) < min(boxes[:, 3])  # y1 < y2

        scores = torch.rand(num_boxes)
        idxs = torch.randint(0, 4, size=(num_boxes, ))
        keep_vanilla = ops.boxes._batched_nms_vanilla(boxes, scores, idxs,
                                                      iou_threshold)
        keep_trick = ops.boxes._batched_nms_coordinate_trick(
            boxes, scores, idxs, iou_threshold)

        err_msg = "The vanilla and the trick implementation yield different nms outputs."
        self.assertTrue(torch.allclose(keep_vanilla, keep_trick), err_msg)

        # Also make sure an empty tensor is returned if boxes is empty
        empty = torch.empty((0, ), dtype=torch.int64)
        self.assertTrue(
            torch.allclose(empty, ops.batched_nms(empty, None, None, None)))
예제 #3
0
    def forward(
        self,
        head_outputs: List[Tensor],
        anchors_tuple: Tuple[Tensor, Tensor, Tensor],
    ) -> List[Dict[str, Tensor]]:
        """ Perform the computation. At test time, postprocess_detections is the final layer of YOLO.
        Decode location preds, apply non-maximum suppression to location predictions based on conf
        scores and threshold to a detections_per_img number of output predictions for both confidence
        score and locations.

        Parameters:
            head_outputs : [batch_size, num_anchors, num_classes + 5] predicted locations and class/object confidence.
            image_shapes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        """
        batch_size, _, _, _, K = head_outputs[0].shape

        all_pred_logits: List[Tensor] = []
        for pred_logits in head_outputs:
            pred_logits = pred_logits.reshape(batch_size, -1,
                                              K)  # Size=(NN, HWA, K)
            all_pred_logits.append(pred_logits)

        all_pred_logits = torch.cat(all_pred_logits, dim=1)

        detections: List[Dict[str, Tensor]] = []

        for idx in range(batch_size):  # image idx, image inference
            pred_logits = torch.sigmoid(all_pred_logits[idx])

            # Compute conf
            # box_conf x class_conf, w/ shape: num_anchors x num_classes
            scores = pred_logits[:, 5:] * pred_logits[:, 4:5]

            boxes = self.box_coder.decode_single(pred_logits[:, :4],
                                                 anchors_tuple)

            # remove low scoring boxes
            inds, labels = torch.where(scores > self.score_thresh)
            boxes, scores = boxes[inds], scores[inds, labels]

            # non-maximum suppression, independently done per level
            keep = batched_nms(boxes, scores, labels, self.nms_thresh)
            # keep only topk scoring head_outputs
            keep = keep[:self.detections_per_img]
            boxes, scores, labels = boxes[keep], scores[keep], labels[keep]

            detections.append({
                'scores': scores,
                'labels': labels,
                'boxes': boxes
            })

        return detections
예제 #4
0
def _apply_nms(final_boxes: Tensor, final_batch_idx: Tensor,
               nms_threshold: float,
               num_classes: int) -> Tuple[Tensor, Tensor]:
    coords = final_boxes[..., :4]
    final_boxes[..., -3, None]
    scaled_score = final_boxes[..., -2, None]
    class_id = final_boxes[..., -1, None]

    # torchvision NMS cant do batches of images, but it can separate based on class id
    # create a new "class id" that distinguishes batch and class
    idx = (final_batch_idx * num_classes +
           class_id.view_as(final_batch_idx)).view(-1).long()
    keep = batched_nms(coords.float(), scaled_score.view(-1), idx,
                       nms_threshold)
    final_boxes = final_boxes[keep, :]
    final_batch_idx = final_batch_idx[keep, :]
    return final_boxes.contiguous(), final_batch_idx.contiguous()
예제 #5
0
    def run(self, image_metas: list, pred_bboxes: torch.Tensor, pred_scores: torch.Tensor, pred_class_ids: torch.Tensor) -> list:
        """ 予測結果から条件を満たすものを抽出し、結果の辞書のリストを作成
            予測結果 -> 信頼度でフィルタ -> NMS でフィルタ -> 最終予測結果

        Args:
            image_metas (list): 画像メタデータ
            pred_bboxes (torch.Tensor): 予測 BBox [N, num_preds, 4] (coord fmt: [xmin, ymin, xmax, ymax])
            pred_scores (torch.Tensor): 予測信頼度 [N, num_preds]
            pred_class_ids (torch.Tensor): 予測クラス ID [N, num_preds]

        Returns:
            list: 最終予測結果
        """
        result = []
        for image_meta, bboxes, scores, class_ids in zip(image_metas, pred_bboxes, pred_scores, pred_class_ids):

            # 重複の除去(non-maximum supression)
            keep = batched_nms(bboxes, scores, class_ids, iou_threshold=self.iou_thresh)
            bboxes = bboxes[keep]
            scores = scores[keep]
            class_ids = class_ids[keep]

            H, W = image_meta['height'], image_meta['width']
            for (xmin, ymin, xmax, ymax), score, class_id in zip(bboxes, scores, class_ids):
                res = {
                    'image_id': image_meta['image_id'],
                    'category_id': class_id.item(),
                    'bbox': [
                        xmin.item() * W,
                        ymin.item() * H,
                        (xmax - xmin).item() * W,
                        (ymax - ymin).item() * H
                    ],
                    'score': score.item(),
                }
                result.append(res)

            # if self.out_dir:
            #     mean = torch.tensor(image_meta['norm_mean']).reshape(3, 1, 1)
            #     std = torch.tensor(image_meta['norm_std']).reshape(3, 1, 1)
            #     image = image * std + mean
            #     image = self._to_pil_image(image, size=(W, H))
            #     image = self._draw_bbox(image, result)
            #     self._save(image, image_meta['image_id'])

        return result
예제 #6
0
파일: mtcnn.py 프로젝트: iorivn/mtcnn
    def _third_stage(
        self, imgs: torch.Tensor, r_bbs: torch.Tensor, r_idxs: torch.Tensor
    ) -> Optional[Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]:
        _imgs = self._gather_rois(imgs, r_bbs, r_idxs, 48)

        with EvalScope(self.oNet):
            reg, lmk, pro = self.oNet(_imgs)
            mask = torch.ge(pro[:, 1], self.oNetThreshold)

            if not mask.any():
                return None

            reg = reg[mask]
            pro = pro[:, 1][mask]
            b = r_bbs[mask].type(torch.float32)
            i = r_idxs[mask]

            b = self._bb_reg(b, reg)
            j = batched_nms(b, pro, i, self.nmsThreshold)
            b = clip_boxes_to_image(b[j], size=imgs.shape[2:]).int()
            i = i[j]

            return b, i, lmk[j]
예제 #7
0
 def forward(self, boxes, scores, idxs):
     return ops.batched_nms(boxes, scores, idxs, 0.5)
예제 #8
0
img_preds = prediction[0]
for i in range(len(img_preds["boxes"])):
    x1, y1, x2, y2 = img_preds["boxes"][i]
    label = int(img_preds["labels"][i])
    score = float(img_preds["scores"][i])

    draw.rectangle(((x1, y1), (x2, y2)), outline="red")
    text = f'{dataset.cat2name[label]}: {score}'
    draw.text((x1+5, y1+5), text)

display(pred_img)

### Post Processing #################################################
img_preds = prediction[0]
keep_idx = batched_nms(boxes=img_preds["boxes"], scores=img_preds["scores"], idxs=img_preds["labels"], iou_threshold=params['IOU_THRESHOLD'])

# convert the image, which has been rescaled to 0-1 and had the channels flipped
pred_img = Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())
draw = ImageDraw.Draw(pred_img)

for i in range(len(img_preds["boxes"])):
    if i in keep_idx:
        x1, y1, x2, y2 = img_preds["boxes"][i]
        label = int(img_preds["labels"][i])
        score = float(img_preds["scores"][i])

        draw.rectangle(((x1, y1), (x2, y2)), outline="red")
        text = f'{dataset.cat2name[label]}: {score}'
        draw.text((x1+5, y1+5), text)
예제 #9
0
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx,
                  nms_cfg):
    pred_instances = []
    nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg[
        'cls_score_thr'], nms_cfg['iou_thr']

    model.eval()
    processor = dist_logger.init_processor(data_loader)
    for img, data in processor:
        img = img.cuda(non_blocking=True)
        points = data['points'].cuda(non_blocking=True)
        img_info_list = coco_gt.loadImgs(data['img_id'].numpy())

        class_pred, distance_pred, centerness_pred = model(img)

        class_pred = class_pred.sigmoid()  # [B, num_points, num_classes]
        cls_pred_scores, cls_pred_indexes = class_pred.max(
            dim=-1)  # [B, num_points]
        bbox_pred = bbox_ops.convert_distance_to_bbox(
            points, distance_pred)  # [B, num_points, 4]
        centerness_pred = centerness_pred.sigmoid()  # [B, num_points]

        batch_size, _, num_classes = class_pred.shape
        _, _, ih, iw = img.shape

        for batch_idx in range(batch_size):
            b_cls_pred_scores, b_cls_pred_indexes, b_centerness_pred = cls_pred_scores[
                batch_idx], cls_pred_indexes[batch_idx], centerness_pred[
                    batch_idx]  # [num_points]
            b_bbox_pred = bbox_pred[batch_idx, :]  # [num_points, 4]

            _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk(
                nms_pre)  # [topk]

            top_class_pred_scores, top_class_pred_indexes, top_centerness_pred = b_cls_pred_scores[
                top_idx], b_cls_pred_indexes[top_idx], b_centerness_pred[
                    top_idx]  # [topk]
            nms_scores = top_class_pred_scores * top_centerness_pred  # [topk]

            top_bbox_pred = b_bbox_pred[top_idx, :]  # [topk, 4]
            top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred,
                                                       size=(ih, iw))

            valid_mask = top_class_pred_scores > cls_score_thr
            valid_class_pred_scores, valid_class_pred_indexes, valid_nms_scores = top_class_pred_scores[
                valid_mask], top_class_pred_indexes[valid_mask], nms_scores[
                    valid_mask]
            valid_bbox_pred = top_bbox_pred[valid_mask, :]

            keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores,
                                          valid_class_pred_indexes, iou_thr)
            keep_class_pred_scores, keep_class_pred_indexes = valid_class_pred_scores[
                keep_idx], valid_class_pred_indexes[keep_idx]
            keep_bbox_pred = valid_bbox_pred[keep_idx, :]

            oh, ow = img_info_list[batch_idx]['height'], img_info_list[
                batch_idx]['width']
            keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow,
                                                     ih, iw)
            keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred,
                                                in_fmt='xyxy',
                                                out_fmt='xywh')

            for cls_score, cls_idx, bbox in zip(keep_class_pred_scores,
                                                keep_class_pred_indexes,
                                                keep_bbox_pred):
                pred_instances.append({
                    'image_id':
                    int(data['img_id'][batch_idx]),
                    'category_id':
                    int(cls_idx) + 1,
                    'bbox':
                    [float(str('%.1f' % coord)) for coord in bbox.tolist()],
                    'score':
                    float(str('%.1f' % cls_score))
                })

    dist_logger.save_pred_instances_local_rank(pred_instances)
    dist_logger.save_val_file()
    dist_logger.update_tensorboard_val_results(coco_gt, epoch_idx)
예제 #10
0
def val_one_epoch(model, data_loader, coco_gt, dist_logger, epoch_idx, nms_cfg):
    pred_instances = []
    nms_pre, cls_score_thr, iou_thr = nms_cfg['nms_pre'], nms_cfg['cls_score_thr'], nms_cfg['iou_thr']
    _, _, label_to_cat_map = tools.get_cat_label_map(coco_gt, tools.COCO_CLASSES)
    # print(label_to_cat_map)

    model.eval()
    processor = tqdm.tqdm(data_loader, disable=not dist_logger.is_master_rank)
    for img, points, img_ids in processor:
        img = img.cuda(non_blocking=True)
        points = points.cuda(non_blocking=True)
        img_info_list = coco_gt.loadImgs(img_ids.numpy())

        pred = model(img, points)
        class_pred = pred['class'].sigmoid()  # [B, num_points, num_classes]
        centerness_pred = pred['centerness'].sigmoid()  # [B, num_points]
        bbox_pred = bbox_ops.convert_distance_to_bbox(points, pred['distance'])  # [B, num_points, 4]
        # instance_mask_pred = pred['instance_mask'].sigmoid()  # [B, num_points, pooler_size, pooler_size]

        # print(class_pred.shape, centerness_pred.shape, bbox_pred.shape, instance_mask_pred.shape)
        # exit(-1)

        cls_pred_scores, cls_pred_indexes = class_pred.max(dim=-1)  # [B, num_points]

        batch_size, _, num_classes = class_pred.shape
        _, _, ih, iw = img.shape

        for batch_idx in range(batch_size):
            b_cls_pred_scores = cls_pred_scores[batch_idx]
            b_cls_pred_indexes = cls_pred_indexes[batch_idx]
            b_centerness_pred = centerness_pred[batch_idx]
            b_bbox_pred = bbox_pred[batch_idx, :]  # [num_points, 4]

            _, top_idx = (b_cls_pred_scores * b_centerness_pred).topk(nms_pre)
            top_class_pred_scores = b_cls_pred_scores[top_idx]
            top_class_pred_indexes = b_cls_pred_indexes[top_idx]
            top_centerness_pred = b_centerness_pred[top_idx]
            top_bbox_pred = b_bbox_pred[top_idx, :]  # [topk, 4]

            nms_scores = top_class_pred_scores * top_centerness_pred
            top_bbox_pred = cv_ops.clip_boxes_to_image(top_bbox_pred, size=(ih, iw))

            valid_mask = top_class_pred_scores > cls_score_thr
            valid_class_pred_scores = top_class_pred_scores[valid_mask]
            valid_class_pred_indexes = top_class_pred_indexes[valid_mask]
            valid_nms_scores = nms_scores[valid_mask]
            valid_bbox_pred = top_bbox_pred[valid_mask, :]

            keep_idx = cv_ops.batched_nms(valid_bbox_pred, valid_nms_scores, valid_class_pred_indexes, iou_thr)
            keep_class_pred_scores = valid_class_pred_scores[keep_idx]
            keep_class_pred_indexes = valid_class_pred_indexes[keep_idx]
            keep_bbox_pred = valid_bbox_pred[keep_idx, :]

            oh, ow = img_info_list[batch_idx]['height'], img_info_list[batch_idx]['width']
            keep_bbox_pred = bbox_ops.recover_bboxes(keep_bbox_pred, oh, ow, ih, iw)
            keep_bbox_pred = cv_ops.box_convert(keep_bbox_pred, in_fmt='xyxy', out_fmt='xywh')

            for cls_score, cls_idx, bbox in zip(keep_class_pred_scores, keep_class_pred_indexes, keep_bbox_pred):
                # poly = coco_mask.frPyObjects(poly.permute(1, 0).reshape(1, -1).detach().cpu().double().numpy(), oh, ow)
                # rle = coco_mask.merge(poly)
                # rle['counts'] = rle['counts'].decode('utf-8')

                pred_instances.append({
                    'image_id': int(img_ids[batch_idx]),
                    'category_id': label_to_cat_map[int(cls_idx) + 1],
                    'bbox': [float(str('%.1f' % coord)) for coord in bbox.tolist()],
                    # 'segmentation': rle,
                    'score': float(str('%.1f' % cls_score))
                })

    dist_logger.save_pred_instances_local_rank(pred_instances)
    dist_logger.save_val_file()
    dist_logger.evaluate(coco_gt)