예제 #1
0
    def _nms(self, cxywh_score_cls, nms_mode=4):
        """ Non maximum suppression.
        Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/

        Args:
          cxywh_score_cls (tensor): Bounding boxes and scores from
              get_detections. Assumes columns 0:4 are cx, cy, w, h, Column 4 is
              confidence, and column 5 is class id.

        Return:
          (tensor): Pruned boxes

        Examples:
            >>> import torch
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)])
            >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
            >>> output = torch.randn(8, 5, 5 + 20, 9, 9)
            >>> boxes_ = self._get_boxes(output.data)
            >>> boxes = torch.Tensor(boxes_[0])
            >>> ans0 = self._nms(boxes, nms_mode=0)
            >>> ans1 = self._nms(boxes, nms_mode=1)
            >>> ans2 = self._nms(boxes, nms_mode=2)

        Ignore:
            >>> from netharn import util
            >>> scores = boxes[..., 4:5]
            >>> classes = boxes[..., 5:6]
            >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
            >>> tlbr = cxywh.to_tlbr()
            >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)

        Benchmark:
            boxes = torch.Tensor(boxes_[0])
            import ubelt
            for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
                with timer:
                    self._nms(boxes, nms_mode=0)

            for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
                with timer:
                    self._nms(boxes, nms_mode=1)

            boxes = boxes.to()
            import ubelt
            for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
                with timer:
                    self._nms(boxes, nms_mode=0)

            for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
                with timer:
                    self._nms(boxes, nms_mode=1)
        """
        if cxywh_score_cls.numel() == 0:
            return cxywh_score_cls

        a = cxywh_score_cls[:, :2]
        b = cxywh_score_cls[:, 2:4]
        # convert to tlbr
        tlbr_tensor = torch.cat([a - b / 2, a + b / 2], 1)
        scores = cxywh_score_cls[:, 4]

        if nms_mode == 0:
            # if torch.cuda.is_available:
            #     boxes = boxes.to(0)
            from netharn.util._nms_backend.torch_nms import torch_nms
            cls_tensor = cxywh_score_cls[:, 5]
            keep = torch_nms(tlbr_tensor, scores, classes=cls_tensor,
                             thresh=self.nms_thresh, bias=0)
            return cxywh_score_cls[keep]
            # keep = _nms_torch(tlbr_tensor, scores, nms_thresh=self.nms_thresh)
            # keep = sorted(keep)
        elif nms_mode == 1:
            # Dont group by classes, just NMS
            tlbr_np = tlbr_tensor.cpu().numpy().astype(np.float32)
            scores_np = scores.cpu().numpy().astype(np.float32)
            keep = util.non_max_supression(tlbr_np, scores_np, self.nms_thresh,
                                           bias=0)
            keep = sorted(keep)
        elif nms_mode == 2:
            # Group and use NMS
            tlbr_np = tlbr_tensor.cpu().numpy().astype(np.float32)
            scores_np = scores.cpu().numpy().astype(np.float32)
            classes_np = cxywh_score_cls[:, 5].cpu().numpy().astype(np.int)

            keep = util.non_max_supression(tlbr_np, scores_np, self.nms_thresh,
                                           classes=classes_np, bias=0)
            # keep = []
            # for idxs in ub.group_items(range(len(classes_np)), classes_np).values():
            #     cls_tlbr_np = tlbr_np.take(idxs, axis=0)
            #     cls_scores_np = scores_np.take(idxs, axis=0)
            #     cls_keep = util.non_max_supression(cls_tlbr_np, cls_scores_np,
            #                                        self.nms_thresh, bias=0)
            #     keep.extend(list(ub.take(idxs, cls_keep)))
            keep = sorted(keep)
        elif nms_mode == 3:
            # Group and use NMS
            classes_np = cxywh_score_cls[:, 5].cpu().numpy().astype(np.int)
            keep = util.non_max_supression(tlbr_tensor, scores,
                                           self.nms_thresh, classes=classes_np,
                                           bias=0, impl='torch')
            keep = sorted(keep)
        elif nms_mode == 4:
            # Dont group, but use torch
            from netharn.util._nms_backend.torch_nms import torch_nms
            keep = torch_nms(tlbr_tensor, scores,
                             thresh=self.nms_thresh, bias=0)
            return cxywh_score_cls[keep]
        else:
            raise KeyError(nms_mode)
        return cxywh_score_cls[torch.LongTensor(keep)]
예제 #2
0
def _benchmark():
    import ubelt
    import torch
    import numpy as np
    import netharn as nh
    from netharn.util.nms.torch_nms import torch_nms
    from netharn.util import non_max_supression
    import ubelt as ub
    import itertools as it

    N = 100
    bestof = 10

    ydata = ub.ddict(list)
    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000
    ]

    rng = nh.util.ensure_rng(0)

    thresh = 0.5

    for num in xdata:

        outputs = {}

        # Build random test boxes and scores
        boxes = nh.util.Boxes.random(num,
                                     scale=10.0,
                                     rng=rng,
                                     format='tlbr',
                                     tensor=True).data
        scores = torch.Tensor(rng.rand(len(boxes)))

        t1 = ubelt.Timerit(N, bestof=bestof, label='torch(cpu)')
        for timer in t1:
            with timer:
                keep = torch_nms(boxes, scores, thresh=thresh)
        ydata[t1.label].append(t1.min())
        outputs[t1.label] = np.where(keep.cpu().numpy())[0]

        if torch.cuda.is_available():
            # Move boxes to the GPU
            gpu_boxes = boxes.cuda()
            gpu_scores = scores.cuda()

            t1 = ubelt.Timerit(N, bestof=bestof, label='torch(gpu)')
            for timer in t1:
                with timer:
                    keep = torch_nms(gpu_boxes, gpu_scores, thresh=thresh)
                    torch.cuda.synchronize()
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = np.where(keep.cpu().numpy())[0]

        # Move boxes to numpy
        np_boxes = boxes.cpu().numpy()
        np_scores = scores.cpu().numpy()

        t1 = ubelt.Timerit(N, bestof=bestof, label='numpy(cpu)')
        for timer in t1:
            with timer:
                keep = non_max_supression(np_boxes,
                                          np_scores,
                                          thresh=thresh,
                                          impl='py')
        ydata[t1.label].append(t1.min())
        outputs[t1.label] = sorted(keep)

        t1 = ubelt.Timerit(N, bestof=bestof, label='cython(cpu)')
        for timer in t1:
            with timer:
                keep = non_max_supression(np_boxes,
                                          np_scores,
                                          thresh=thresh,
                                          impl='cpu')
        ydata[t1.label].append(t1.min())
        outputs[t1.label] = sorted(keep)

        if torch.cuda.is_available():
            t1 = ubelt.Timerit(N, bestof=bestof, label='cython(gpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_boxes,
                                              np_scores,
                                              thresh=thresh,
                                              impl='gpu')
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

        # Check that all kept boxes do not have more than `threshold` ious
        for key, idxs in outputs.items():
            ious = nh.util.box_ious(np_boxes[idxs], np_boxes[idxs])
            max_iou = (np.tril(ious) - np.eye(len(ious))).max()
            if max_iou > thresh:
                print('{} produced a bad result with max_iou={}'.format(
                    key, max_iou))

        # Check result consistency:
        print('Result consistency:')
        for k1, k2 in it.combinations(outputs.keys(), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / len(idxs1 | idxs2)
            print('{}, {}: {}'.format(k1, k2, jaccard))

    nh.util.mplutil.qtensure()
    nh.util.mplutil.multi_plot(xdata,
                               ydata,
                               xlabel='num boxes',
                               ylabel='seconds')
예제 #3
0
파일: torch_nms.py 프로젝트: jcfr/netharn
def _benchmark():
    """
    python -m netharn.util.nms.torch_nms _benchmark --show

    SeeAlso:
        PJR Darknet NonMax supression
        https://github.com/pjreddie/darknet/blob/master/src/box.c

        Lightnet NMS
        https://gitlab.com/EAVISE/lightnet/blob/master/lightnet/data/transform/_postprocess.py#L116

    """
    import torch
    import numpy as np
    import netharn as nh
    from netharn.util.nms.torch_nms import torch_nms
    from netharn.util import non_max_supression
    import ubelt as ub
    import itertools as it

    N = 100
    bestof = 10

    ydata = ub.ddict(list)
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500, 2000]

    # max number of boxes yolo will spit out at a time
    max_boxes = 19 * 19 * 5

    xdata = [
        10, 20, 40, 80, 100, 200, 300, 400, 500, 600, 700, 1000, 1500,
        max_boxes
    ]
    # xdata = [10, 20, 40, 80, 100, 200, 300, 400, 500]
    xdata = [10, 100, 500]

    rng = nh.util.ensure_rng(0)

    thresh = 0.5

    for num in xdata:
        print('\n\n---- number of boxes = {} ----\n'.format(num))

        outputs = {}

        # Build random test boxes and scores
        cpu_boxes = nh.util.Boxes.random(num,
                                         scale=10.0,
                                         rng=rng,
                                         format='tlbr',
                                         tensor=True)
        cpu_tlbr = cpu_boxes.to_tlbr().data
        # cpu_scores = torch.Tensor(rng.rand(len(cpu_tlbr)))
        # make all scores unique to ensure comparability
        cpu_scores = torch.Tensor(np.linspace(0, 1, len(cpu_tlbr)))
        cpu_cls = torch.LongTensor(rng.randint(0, 10, len(cpu_tlbr)))

        # Format boxes in lightnet format
        cpu_ln_boxes = torch.cat([
            cpu_boxes.to_cxywh().data, cpu_scores[:, None],
            cpu_cls.float()[:, None]
        ],
                                 dim=-1)

        # Move boxes to numpy
        np_tlbr = cpu_tlbr.numpy()
        np_scores = cpu_scores.numpy()
        np_cls = cpu_cls.numpy()  # NOQA

        gpu = torch.device('cuda', 0)

        measure_gpu = torch.cuda.is_available()
        measure_cpu = False or not torch.cuda.is_available()

        def _ln_output_to_keep(ln_output, ln_boxes):
            keep = []
            for row in ln_output:
                # Find the index that we kept
                idxs = np.where(np.all(np.isclose(ln_boxes, row), axis=1))[0]
                assert len(idxs) == 1
                keep.append(idxs[0])
            assert np.all(np.isclose(ln_boxes[keep], ln_output))
            return keep

        if measure_gpu:
            # Move boxes to the GPU
            gpu_tlbr = cpu_tlbr.to(gpu)
            gpu_scores = cpu_scores.to(gpu)
            gpu_cls = cpu_cls.to(gpu)  # NOQA
            gpu_ln_boxes = cpu_ln_boxes.to(gpu)

            t1 = ub.Timerit(N, bestof=bestof, label='torch(gpu)')
            for timer in t1:
                with timer:
                    keep = torch_nms(gpu_tlbr, gpu_scores, thresh=thresh)
                    torch.cuda.synchronize()
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = np.where(keep.cpu().numpy())[0]

            t1 = ub.Timerit(N, bestof=bestof, label='cython(gpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='gpu')
                    torch.cuda.synchronize()
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            from lightnet.data.transform._postprocess import NonMaxSupression
            t1 = ub.Timerit(N, bestof=bestof, label='lightnet-slow(gpu)')
            for timer in t1:
                with timer:
                    ln_output = NonMaxSupression._nms(gpu_ln_boxes,
                                                      nms_thresh=thresh,
                                                      class_nms=False,
                                                      fast=False)
                    torch.cuda.synchronize()
            # convert lightnet NMS output to keep for consistency
            keep = _ln_output_to_keep(ln_output, gpu_ln_boxes)
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            if False:
                t1 = ub.Timerit(N, bestof=bestof, label='lightnet-fast(gpu)')
                for timer in t1:
                    with timer:
                        ln_output = NonMaxSupression._nms(gpu_ln_boxes,
                                                          nms_thresh=thresh,
                                                          class_nms=False,
                                                          fast=True)
                        torch.cuda.synchronize()
                # convert lightnet NMS output to keep for consistency
                keep = _ln_output_to_keep(ln_output, gpu_ln_boxes)
                ydata[t1.label].append(t1.min())
                outputs[t1.label] = sorted(keep)

        if measure_cpu:
            t1 = ub.Timerit(N, bestof=bestof, label='torch(cpu)')
            for timer in t1:
                with timer:
                    keep = torch_nms(cpu_tlbr, cpu_scores, thresh=thresh)
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = np.where(keep.cpu().numpy())[0]

        if True:
            t1 = ub.Timerit(N, bestof=bestof, label='cython(cpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='cpu')
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

            t1 = ub.Timerit(N, bestof=bestof, label='numpy(cpu)')
            for timer in t1:
                with timer:
                    keep = non_max_supression(np_tlbr,
                                              np_scores,
                                              thresh=thresh,
                                              impl='py')
            ydata[t1.label].append(t1.min())
            outputs[t1.label] = sorted(keep)

        # Check that all kept boxes do not have more than `threshold` ious
        for key, idxs in outputs.items():
            ious = nh.util.box_ious(np_tlbr[idxs], np_tlbr[idxs])
            max_iou = (np.tril(ious) - np.eye(len(ious))).max()
            if max_iou > thresh:
                print('{} produced a bad result with max_iou={}'.format(
                    key, max_iou))

        # Check result consistency:
        print('\nResult stats:')
        for key in sorted(outputs.keys()):
            print('    * {:<20}: num={}'.format(key, len(outputs[key])))

        print('\nResult overlaps (method1, method2: jaccard):')
        datas = []
        for k1, k2 in it.combinations(sorted(outputs.keys()), 2):
            idxs1 = set(outputs[k1])
            idxs2 = set(outputs[k2])
            jaccard = len(idxs1 & idxs2) / len(idxs1 | idxs2)
            datas.append((k1, k2, jaccard))
        datas = sorted(datas, key=lambda x: -x[2])
        for k1, k2, jaccard in datas:
            print('    * {:<20}, {:<20}: {:0.4f}'.format(k1, k2, jaccard))

    nh.util.mplutil.autompl()
    nh.util.mplutil.multi_plot(xdata,
                               ydata,
                               xlabel='num boxes',
                               ylabel='seconds')
    nh.util.show_if_requested()
예제 #4
0
    def _nms(self, boxes, mode=0):
        """ Non maximum suppression.
        Source: https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/

        Args:
          boxes (tensor): Bounding boxes from get_detections

        Return:
          (tensor): Pruned boxes

        CommandLine:
            python -m netharn.models.yolo2.light_postproc GetBoundingBoxes._nms --profile

        Examples:
            >>> import torch
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)])
            >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.01, nms_thresh=0.5)
            >>> output = torch.randn(8, 5, 5 + 20, 9, 9)
            >>> boxes_ = self._get_boxes(output.data)
            >>> boxes = torch.Tensor(boxes_[0])
            >>> ans0 = self._nms(boxes, mode=0)
            >>> ans1 = self._nms(boxes, mode=1)
            >>> ans2 = self._nms(boxes, mode=2)

        Ignore:
            >>> from netharn import util
            >>> scores = boxes[..., 4:5]
            >>> classes = boxes[..., 5:6]
            >>> cxywh = util.Boxes(boxes[..., 0:4], 'cxywh')
            >>> tlbr = cxywh.to_tlbr()
            >>> util.non_max_supression(tlbr.data.numpy(), scores.numpy().ravel(), self.nms_thresh)

        Benchmark:
            boxes = torch.Tensor(boxes_[0])
            import ubelt
            for timer in ubelt.Timerit(100, bestof=10, label='nms0+cpu'):
                with timer:
                    self._nms(boxes, mode=0)

            for timer in ubelt.Timerit(100, bestof=10, label='nms1+cpu'):
                with timer:
                    self._nms(boxes, mode=1)

            boxes = boxes.cuda()
            import ubelt
            for timer in ubelt.Timerit(100, bestof=10, label='nms0+gpu'):
                with timer:
                    self._nms(boxes, mode=0)

            for timer in ubelt.Timerit(100, bestof=10, label='nms1+gpu'):
                with timer:
                    self._nms(boxes, mode=1)
        """
        if boxes.numel() == 0:
            return boxes

        a = boxes[:, :2]
        b = boxes[:, 2:4]
        # convert to tlbr
        tlbr_tensor = torch.cat([a - b / 2, a + b / 2], 1)
        scores = boxes[:, 4]

        if mode == 0:
            # if torch.cuda.is_available:
            #     boxes = boxes.cuda()
            keep = _nms_torch(tlbr_tensor, scores, nms_thresh=self.nms_thresh)
            keep = sorted(keep)
        elif mode == 1:
            # Dont group by classes, just NMS
            tlbr_np = tlbr_tensor.cpu().numpy().astype(np.float32)
            scores_np = scores.cpu().numpy().astype(np.float32)
            keep = util.non_max_supression(tlbr_np, scores_np, self.nms_thresh)
            keep = sorted(keep)
        elif mode == 2:
            # Group and use NMS
            tlbr_np = tlbr_tensor.cpu().numpy().astype(np.float32)
            scores_np = scores.cpu().numpy().astype(np.float32)
            classes_np = boxes[..., 5].cpu().numpy().astype(np.int)
            keep = []
            for idxs in ub.group_items(range(len(classes_np)),
                                       classes_np).values():
                cls_tlbr_np = tlbr_np.take(idxs, axis=0)
                cls_scores_np = scores_np.take(idxs, axis=0)
                cls_keep = util.non_max_supression(cls_tlbr_np, cls_scores_np,
                                                   self.nms_thresh)
                keep.extend(list(ub.take(idxs, cls_keep)))
            keep = sorted(keep)
        else:
            raise KeyError(mode)
        return boxes[torch.LongTensor(keep)]