Esempio n. 1
0
    def _labels_to_true_dets(harn, inp_size, labels, _aidbase=1, undo_lb=True):
        """ Convert batch groundtruth to coco-style annotations for scoring """
        indices = labels['indices']
        orig_sizes = labels['orig_sizes']
        targets = labels['cxywh']
        gt_weights = labels['gt_weights']

        letterbox = harn.datasets[harn.current_tag].letterbox
        # On the training set, we need to add truth due to augmentation
        bsize = len(indices)
        for ix in range(bsize):
            target = targets[ix].view(-1, 5)
            import kwimage

            true_det = kwimage.Detections(
                boxes=kwimage.Boxes(target[:, 1:5].float(), 'cxywh'),
                class_idxs=target[:, 0].long(),
                weights=gt_weights[ix],
            )
            true_det = true_det.numpy()
            flags = true_det.class_idxs != -1
            true_det = true_det.compress(flags)

            if undo_lb:
                orig_size = orig_sizes[ix].cpu().numpy()
                true_det.data['boxes'] = letterbox._boxes_letterbox_invert(
                    true_det.boxes, orig_size, inp_size)

            true_det.data['aids'] = np.arange(_aidbase, _aidbase + len(true_det))
            gx = int(indices[ix].data.cpu().numpy())

            # if util.IS_PROFILING:
            #     torch.cuda.synchronize()

            yield gx, true_det
Esempio n. 2
0
    def overlapping_aids(self, gid, region, visible_thresh=0.0):
        """
        Finds the other annotations in this image that overlap a region

        Args:
            gid (int): image id
            region (kwimage.Boxes): bounding box
            visible_thresh (float): does not return annotations with visibility
                less than this threshold.

        Returns:
            List[int]: annotation ids
        """
        overlap_aids = self.isect_index.overlapping_aids(gid, region)
        if visible_thresh > 0 and len(overlap_aids) > 0:
            # Get info about all annotations inside this window
            if 0:
                overlap_annots = self.dset.annots(overlap_aids)
                abs_boxes = overlap_annots.boxes
            else:
                overlap_anns = [self.dset.anns[aid] for aid in overlap_aids]
                abs_boxes = kwimage.Boxes(
                    [ann['bbox'] for ann in overlap_anns], 'xywh')
            # Remove annotations that are not mostly invisible
            if len(abs_boxes) > 0:
                eps = 1e-6
                isect_area = region[None, :].isect_area(abs_boxes)[0]
                other_area = abs_boxes.area.T[0]
                visibility = isect_area / (other_area + eps)
                is_visible = visibility > visible_thresh
                abs_boxes = abs_boxes[is_visible]
                overlap_aids = list(it.compress(overlap_aids, is_visible))
                # overlap_annots = self.dset.annots(overlap_aids)
        return overlap_aids
Esempio n. 3
0
    def to_boxes(self):
        """
        Return the bounding box of the multi polygon

        Returns:
            kwimage.Boxes:

        Example:
            >>> from kwimage.structs.polygon import *  # NOQA
            >>> self = MultiPolygon.random(rng=0, n=10)
            >>> boxes = self.to_boxes()
            >>> sub_boxes = [d.to_boxes() for d in self.data]
            >>> areas1 = np.array([s.intersection(boxes).area[0] for s in sub_boxes])
            >>> areas2 = np.array([s.area[0] for s in sub_boxes])
            >>> assert np.allclose(areas1, areas2)
        """
        import kwimage
        tl = np.array([np.inf, np.inf])
        br = np.array([-np.inf, -np.inf])
        for data in self.data:
            xys = data.data['exterior'].data
            tl = np.minimum(tl, xys.min(axis=0))
            br = np.maximum(br, xys.max(axis=0))
        tlbr = np.hstack([tl, br])[None, :]
        boxes = kwimage.Boxes(tlbr, 'tlbr')
        return boxes
Esempio n. 4
0
 def to_boxes(self):
     import kwimage
     xys = self.data['exterior'].data
     tl = xys.min(axis=0)
     br = xys.max(axis=0)
     tlbr = np.hstack([tl, br])[None, :]
     boxes = kwimage.Boxes(tlbr, 'tlbr')
     return boxes
Esempio n. 5
0
    def _debug_index(self):
        from shapely.ops import cascaded_union

        def _to_shapely(boxes):
            from shapely.geometry import Polygon
            from kwimage.structs.boxes import _cat
            x1, y1, x2, y2 = boxes.to_tlbr(copy=False).components
            a = _cat([x1, y1]).tolist()
            b = _cat([x1, y2]).tolist()
            c = _cat([x2, y2]).tolist()
            d = _cat([x2, y1]).tolist()
            polygons = [Polygon(points) for points in zip(a, b, c, d, a)]
            return polygons

        for gid, qtree in self.qtrees.items():
            boxes = kwimage.Boxes(np.array(list(qtree.aid_to_tlbr.values())),
                                  'tlbr')
            polygons = _to_shapely(boxes)

            bounds = kwimage.Boxes([[0, 0, qtree.width, qtree.height]], 'tlbr')
            bounds = _to_shapely(bounds)[0]
            merged_polygon = cascaded_union(polygons)
            uncovered = (bounds - merged_polygon)
            print('uncovered.area = {!r}'.format(uncovered.area))

            # plot these two polygons separately
            if 1:
                from descartes import PolygonPatch
                from matplotlib import pyplot as plt
                import kwplot
                kwplot.autompl()
                fig = plt.figure(gid)
                ax = fig.add_subplot(111)
                ax.cla()
                # ax.add_patch(
                #     PolygonPatch(bounds, alpha=0.5, zorder=2, fc='blue')
                # )
                # ax.add_patch(
                #     PolygonPatch(merged_polygon, alpha=0.5, zorder=2, fc='red')
                # )
                ax.add_patch(
                    PolygonPatch(uncovered, alpha=0.5, zorder=2, fc='green'))
                ax.set_xlim(0, qtree.width)
                ax.set_ylim(0, qtree.height)
                ax.set_aspect(1)
Esempio n. 6
0
def select_positive_regions(targets, window_dims=(300, 300), thresh=0.0,
                            rng=None, verbose=0):
    """
    Reduce positive example redundency by selecting disparate positive samples

    Example:
        >>> from ndsampler.coco_regions import *
        >>> import kwcoco
        >>> dset = kwcoco.CocoDataset.demo('shapes8')
        >>> targets = tabular_coco_targets(dset)
        >>> window_dims = (300, 300)
        >>> selected = select_positive_regions(targets, window_dims)
        >>> print(len(selected))
        >>> print(len(dset.anns))
    """
    unique_gids, groupxs = kwarray.group_indices(targets['gid'])
    gid_to_groupx = dict(zip(unique_gids, groupxs))
    wh, ww = window_dims
    rng = kwarray.ensure_rng(rng)
    selection = []

    # Get all the bounding boxes
    cxs, cys = ub.take(targets, ['cx', 'cy'])
    n = len(targets)
    cxs = cxs.astype(np.float32)
    cys = cys.astype(np.float32)
    wws = np.full(n, ww, dtype=np.float32)
    whs = np.full(n, wh, dtype=np.float32)
    cxywh = np.hstack([a[:, None] for a in [cxs, cys, wws, whs]])
    boxes = kwimage.Boxes(cxywh, 'cxywh').to_tlbr()

    iter_ = ub.ProgIter(gid_to_groupx.items(),
                        enabled=verbose,
                        label='select positive regions',
                        total=len(gid_to_groupx), adjust=0, freq=32)

    for gid, groupx in iter_:
        # Select all candiate windows in this image
        cand_windows = boxes.take(groupx, axis=0)
        # Randomize which candidate windows have the highest scores so the
        # selection can vary each epoch.
        cand_scores = rng.rand(len(cand_windows))
        cand_dets = kwimage.Detections(boxes=cand_windows, scores=cand_scores)
        # Non-max supresssion is really similar to set-cover
        keep = cand_dets.non_max_supression(thresh=thresh)
        selection.extend(groupx[keep])

    selection = np.array(sorted(selection))
    return selection
Esempio n. 7
0
def draw_boxes_on_image(img,
                        boxes,
                        color='blue',
                        thickness=1,
                        box_format=None,
                        colorspace='rgb'):
    """
    Draws boxes on an image.

    Args:
        img (ndarray): image to copy and draw on
        boxes (nh.util.Boxes): boxes to draw
        colorspace (str): string code of the input image colorspace

    Example:
        >>> import kwimage
        >>> import numpy as np
        >>> img = np.zeros((10, 10, 3), dtype=np.uint8)
        >>> color = 'dodgerblue'
        >>> thickness = 1
        >>> boxes = kwimage.Boxes([[1, 1, 8, 8]], 'tlbr')
        >>> img2 = draw_boxes_on_image(img, boxes, color, thickness)
        >>> assert tuple(img2[1, 1]) == (30, 144, 255)
        >>> # xdoc: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()  # xdoc: +SKIP
        >>> kwplot.figure(doclf=True, fnum=1)
        >>> kwplot.imshow(img2)
    """
    import kwimage
    import cv2
    if not isinstance(boxes, kwimage.Boxes):
        if box_format is None:
            raise ValueError('specify box_format')
        boxes = kwimage.Boxes(boxes, box_format)

    color = kwimage.Color(color)._forimage(img, colorspace)
    tlbr = boxes.to_tlbr().data
    img2 = img.copy()
    for x1, y1, x2, y2 in tlbr:
        # pt1 = (int(round(x1)), int(round(y1)))
        # pt2 = (int(round(x2)), int(round(y2)))
        pt1 = (int(x1), int(y1))
        pt2 = (int(x2), int(y2))
        # Note cv2.rectangle does work inplace
        img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness)
    return img2
Esempio n. 8
0
    def boxes(self):
        """
        Get the column of kwimage-style bounding boxes

        Example:
            >>> import kwcoco
            >>> self = kwcoco.CocoDataset.demo().annots([1, 2, 11])
            >>> print(self.boxes)
            <Boxes(xywh,
                array([[ 10,  10, 360, 490],
                       [350,   5, 130, 290],
                       [124,  96,  45,  18]]))>
        """
        import kwimage
        xywh = self.lookup('bbox')
        boxes = kwimage.Boxes(xywh, 'xywh')
        return boxes
Esempio n. 9
0
def _kwiver_to_kwimage_detections(detected_objects):
    """
    Convert vital detected object sets to kwimage.Detections

    Args:
        detected_objects (kwiver.vital.types.DetectedObjectSet)

    Returns:
        kwimage.Detections
    """
    import ubelt as ub
    import kwimage
    boxes = []
    scores = []
    class_idxs = []

    classes = []
    if len(detected_objects) > 0:
        obj = ub.peek(detected_objects)
        classes = obj.type().all_class_names()

    for obj in detected_objects:
        box = obj.bounding_box()
        tlbr = [box.min_x(), box.min_y(), box.max_x(), box.max_y()]
        score = obj.confidence()
        cname = obj.type().get_most_likely_class()
        cidx = classes.index(cname)
        boxes.append(tlbr)
        scores.append(score)
        class_idxs.append(cidx)

    dets = kwimage.Detections(
        boxes=kwimage.Boxes(np.array(boxes), 'tlbr'),
        scores=np.array(scores),
        class_idxs=np.array(class_idxs),
        classes=classes,
    )
    return dets
Esempio n. 10
0
    def __init__(self, num_classes, anchors, coord_scale=1.0,
                 noobject_scale=1.0, object_scale=5.0, class_scale=1.0,
                 thresh=0.6, seen_thresh=12800,
                 small_boxes=False,
                 mse_factor=0.5):
        import kwimage
        super(RegionLoss, self).__init__()

        self.num_classes = num_classes

        self.seen_thresh = seen_thresh

        self.anchors = torch.Tensor(anchors)
        self.num_anchors = len(anchors)

        self.coord_scale = coord_scale
        self.noobject_scale = noobject_scale
        self.object_scale = object_scale
        self.class_scale = class_scale
        self.thresh = thresh

        self.loss_coord = None
        self.loss_conf = None
        self.loss_cls = None
        self.loss_tot = None

        self.coord_mse = nn.MSELoss(reduction='sum')
        self.conf_mse = nn.MSELoss(reduction='sum')
        self.cls_critrion = nn.CrossEntropyLoss(reduction='sum')

        # Precompute relative anchors in tlbr format for iou computation
        rel_anchors_cxywh = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1)
        self.rel_anchors_boxes = kwimage.Boxes(rel_anchors_cxywh, 'cxywh')

        self.small_boxes = small_boxes
        self.mse_factor = mse_factor
Esempio n. 11
0
    def iooas(self, gid, box):
        """
        Intersection over other's area

        Args:
            gid (int): an image id
            box (kwimage.Boxes): the specified region

        Like iou, but non-symetric, returned number is a percentage of the
        other's  (groundtruth) area. This means we dont care how big the
        (negative) `box` is.
        """
        boxes1 = box[None, :] if len(box.shape) == 1 else box
        isect_aids = self.overlapping_aids(gid, box)
        if len(isect_aids):
            boxes2 = [self.qtrees[gid].aid_to_tlbr[aid] for aid in isect_aids]
            boxes2 = kwimage.Boxes(np.array(boxes2), 'tlbr')
            isect = boxes1.isect_area(boxes2)
            denom = boxes2.area.T
            eps = 1e-6
            iomas = isect / (denom[0] + eps)
        else:
            iomas = np.empty(0)
        return isect_aids, iomas
Esempio n. 12
0
    def ious(self, gid, box):
        """
        Find overlaping annotations in a specific image and their intersection
        over union with a a query box.

        Args:
            gid (int): an image id
            box (kwimage.Boxes): the specified region

        Returns:
            Tuple[List[int], ndarray]:
                isect_aids: list of annotation ids
                ious: jaccard score for each returned annotation id
        """
        boxes1 = box[None, :] if len(box.shape) == 1 else box
        isect_aids = self.overlapping_aids(gid, box)
        if len(isect_aids):
            boxes1 = box[None, :]
            boxes2 = [self.qtrees[gid].aid_to_tlbr[aid] for aid in isect_aids]
            boxes2 = kwimage.Boxes(np.array(boxes2), 'tlbr')
            ious = boxes1.ious(boxes2)[0]
        else:
            ious = np.empty(0)
        return isect_aids, ious
Esempio n. 13
0
 def _build_index(dset, verbose=0):
     """
     """
     if verbose:
         print('Building isect index')
     qtrees = {
         img['id']: pyqtree.Index((0, 0, img['width'], img['height']))
         for img in ub.ProgIter(
             dset.dataset['images'], desc='init qtrees', verbose=verbose)
     }
     for qtree in qtrees.values():
         qtree.aid_to_tlbr = {}  # Add extra index to track boxes
     for ann in ub.ProgIter(dset.dataset['annotations'],
                            desc='populate qtrees',
                            verbose=verbose):
         bbox = ann.get('bbox', None)
         if bbox is not None:
             aid = ann['id']
             qtree = qtrees[ann['image_id']]
             xywh_box = kwimage.Boxes(bbox, 'xywh')
             tlbr_box = xywh_box.to_tlbr().data
             qtree.insert(aid, tlbr_box)
             qtree.aid_to_tlbr[aid] = tlbr_box
     return qtrees
Esempio n. 14
0
def tabular_coco_targets(dset):
    """
    Transforms COCO box annotations into a tabular form

    _ = xdev.profile_now(tabular_coco_targets)(dset)
    """
    import warnings
    # TODO: better handling of non-bounding box annotations; ignore for now

    if hasattr(dset, 'tabular_targets'):
        # In the SQL case, we can write a single query that
        # builds the table more efficiently.
        return dset.tabular_targets()

    img_items = list(dset.imgs.items())
    gid_to_width = {gid: img['width'] for gid, img in img_items}
    gid_to_height = {gid: img['height'] for gid, img in img_items}

    try:
        anns = dset.dataset['annotations']
        if not isinstance(anns, list):
            anns = list(anns)

        xywh = [ann['bbox'] for ann in anns]
        xywh = np.array(xywh, dtype=np.float32)
    except Exception:
        has_bbox = [ann.get('bbox', None) is not None for ann in anns]
        if not all(has_bbox):
            n_missing = len(has_bbox) - sum(has_bbox)
            warnings.warn('CocoDataset is missing boxes '
                          'for {} annotations'.format(n_missing))
        anns = list(ub.compress(anns, has_bbox))
        xywh = [ann['bbox'] for ann in anns]
        xywh = np.array(xywh, dtype=np.float32)

    boxes = kwimage.Boxes(xywh, 'xywh')
    cxywhs = boxes.to_cxywh().data.reshape(-1, 4)

    aids = [ann['id'] for ann in anns]
    gids = [ann['image_id'] for ann in anns]
    cids = [ann['category_id'] for ann in anns]

    img_width = [gid_to_width[gid] for gid in gids]
    img_height = [gid_to_height[gid] for gid in gids]

    aids = np.array(aids, dtype=np.int32)
    gids = np.array(gids, dtype=np.int32)
    cids = np.array(cids, dtype=np.int32)

    table = {
        # Annotation / Image / Category ids
        'aid': aids,
        'gid': gids,
        'category_id': cids,
        # Subpixel box localizations wrt parent image
        'cx': cxywhs.T[0],
        'cy': cxywhs.T[1],
        'width': cxywhs.T[2],
        'height': cxywhs.T[3],
    }

    # Parent image id and width / height
    table['img_width'] = np.array(img_width, dtype=np.int32)
    table['img_height'] = np.array(img_height, dtype=np.int32)

    # table = ub.map_vals(np.asarray, table)
    targets = kwarray.DataFrameArray(table)
    return targets
Esempio n. 15
0
def _devcheck_corner():
    self = DelayedWarp.random(rng=0)
    print(self.nesting())
    region_slices = (slice(40, 90), slice(20, 62))
    region_box = kwimage.Boxes.from_slice(region_slices, shape=self.shape)
    region_bounds = region_box.to_polygons()[0]

    for leaf in self._optimize_paths():
        pass

    tf_leaf_to_root = leaf['transform']
    tf_root_to_leaf = np.linalg.inv(tf_leaf_to_root)

    leaf_region_bounds = region_bounds.warp(tf_root_to_leaf)
    leaf_region_box = leaf_region_bounds.bounding_box().to_ltrb()
    leaf_crop_box = leaf_region_box.quantize()
    lt_x, lt_y, rb_x, rb_y = leaf_crop_box.data[0, 0:4]

    root_crop_corners = leaf_crop_box.to_polygons()[0].warp(tf_leaf_to_root)

    # leaf_crop_slices = (slice(lt_y, rb_y), slice(lt_x, rb_x))

    crop_offset = leaf_crop_box.data[0, 0:2]
    corner_offset = leaf_region_box.data[0, 0:2]
    offset_xy = crop_offset - corner_offset

    tf_root_to_leaf

    # NOTE:

    # Cropping applies a translation in whatever space we do it in
    # We need to save the bounds of the crop.
    # But now we need to adjust the transform so it points to the
    # cropped-leaf-space not just the leaf-space, so we invert the implicit
    # crop

    tf_crop_to_leaf = Affine.affine(offset=crop_offset)

    # tf_newroot_to_root = Affine.affine(offset=region_box.data[0, 0:2])
    tf_root_to_newroot = Affine.affine(offset=region_box.data[0, 0:2]).inv()

    tf_crop_to_leaf = Affine.affine(offset=crop_offset)
    tf_crop_to_newroot = tf_root_to_newroot @ tf_leaf_to_root @ tf_crop_to_leaf
    tf_newroot_to_crop = tf_crop_to_newroot.inv()

    # tf_leaf_to_crop
    # tf_corner_offset = Affine.affine(offset=offset_xy)

    subpixel_offset = Affine.affine(offset=offset_xy).matrix
    tf_crop_to_leaf = subpixel_offset
    # tf_crop_to_root = tf_leaf_to_root @ tf_crop_to_leaf
    # tf_root_to_crop = np.linalg.inv(tf_crop_to_root)

    if 1:
        import kwplot
        kwplot.autoplt()

        lw, lh = leaf['sub_data_shape'][0:2]
        leaf_box = kwimage.Boxes([[0, 0, lw, lh]], 'xywh')
        root_box = kwimage.Boxes([[0, 0, self.dsize[0], self.dsize[1]]],
                                 'xywh')

        ax1 = kwplot.figure(fnum=1, pnum=(2, 2, 1), doclf=1).gca()
        ax2 = kwplot.figure(fnum=1, pnum=(2, 2, 2)).gca()
        ax3 = kwplot.figure(fnum=1, pnum=(2, 2, 3)).gca()
        ax4 = kwplot.figure(fnum=1, pnum=(2, 2, 4)).gca()
        root_box.draw(setlim=True, ax=ax1)
        leaf_box.draw(setlim=True, ax=ax2)

        region_bounds.draw(ax=ax1, color='green', alpha=.4)
        leaf_region_bounds.draw(ax=ax2, color='green', alpha=.4)
        leaf_crop_box.draw(ax=ax2, color='purple')
        root_crop_corners.draw(ax=ax1, color='purple', alpha=.4)

        new_w = region_box.to_xywh().data[0, 2]
        new_h = region_box.to_xywh().data[0, 3]
        ax3.set_xlim(0, new_w)
        ax3.set_ylim(0, new_h)

        crop_w = leaf_crop_box.to_xywh().data[0, 2]
        crop_h = leaf_crop_box.to_xywh().data[0, 3]
        ax4.set_xlim(0, crop_w)
        ax4.set_ylim(0, crop_h)

        pts3_ = kwimage.Points.random(3).scale((new_w, new_h))
        pts3 = kwimage.Points(
            xy=np.vstack([[[0, 0], [5, 5], [0, 49], [40, 45]], pts3_.xy]))
        pts4 = pts3.warp(tf_newroot_to_crop.matrix)
        pts3.draw(ax=ax3)
        pts4.draw(ax=ax4)
Esempio n. 16
0
def warp_image_test(image, transform, dsize=None):
    """

    from kwimage.transform import Affine
    import kwimage
    image = kwimage.grab_test_image('checkerboard', dsize=(2048, 2048)).astype(np.float32)
    image = kwimage.grab_test_image('astro', dsize=(2048, 2048))
    transform = Affine.random() @ Affine.scale(0.01)

    """
    from kwimage.transform import Affine
    import kwimage
    import numpy as np
    import ubelt as ub

    # Choose a random affine transform that probably has a small scale
    # transform = Affine.random() @ Affine.scale((0.3, 2))
    # transform = Affine.scale((0.1, 1.2))
    # transform = Affine.scale(0.05)
    transform = Affine.random() @ Affine.scale(0.01)
    # transform = Affine.random()

    image = kwimage.grab_test_image('astro')
    image = kwimage.grab_test_image('checkerboard')

    image = kwimage.ensure_float01(image)

    from kwimage import im_cv2
    import kwarray
    import cv2
    transform = Affine.coerce(transform)

    if 1 or dsize is None:
        h, w = image.shape[0:2]

        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    import timerit
    ti = timerit.Timerit(10, bestof=3, verbose=2)

    def _full_gauss_kernel(k0, sigma0, scale):
        num_downscales = np.log2(1 / scale)
        if num_downscales < 0:
            return 1, 0

        # Define b0 = kernel size for one downsample operation
        b0 = 5
        # Define sigma0 = sigma for one downsample operation
        sigma0 = 1

        # The kernel size and sigma doubles for each 2x downsample
        k = int(np.ceil(b0 * (2 ** (num_downscales - 1))))
        sigma = sigma0 * (2 ** (num_downscales - 1))

        if k % 2 == 0:
            k += 1
        return k, sigma

    def pyrDownK(a, k=1):
        assert k >= 0
        for _ in range(k):
            a = cv2.pyrDown(a)
        return a

    for timer in ti.reset('naive'):
        with timer:
            interpolation = 'nearest'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v5 = cv2.warpAffine(image, transform.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 1
    #
    for timer in ti.reset('resize+warp'):
        with timer:
            params = transform.decompose()

            sx, sy = params['scale']
            noscale_params = ub.dict_diff(params, {'scale'})
            noscale_warp = Affine.affine(**noscale_params)

            h, w = image.shape[0:2]
            resize_dsize = (int(np.ceil(sx * w)), int(np.ceil(sy * h)))

            downsampled = cv2.resize(image, dsize=resize_dsize, fx=sx, fy=sy,
                                     interpolation=cv2.INTER_AREA)

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v1 = cv2.warpAffine(downsampled, noscale_warp.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 2
    for timer in ti.reset('fullblur+warp'):
        with timer:
            k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=sx)
            k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=sy)
            image_ = image.copy()
            image_ = cv2.GaussianBlur(image_, (k_x, k_y), sigma_x, sigma_y)
            image_ = kwarray.atleast_nd(image_, 3)
            # image_ = image_.clip(0, 1)

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v2 = cv2.warpAffine(image_, transform.matrix[0:2], dsize=dsize, flags=flags)

    # --------------------
    # METHOD 3

    for timer in ti.reset('pyrDown+blur+warp'):
        with timer:
            temp = image.copy()
            params = transform.decompose()
            sx, sy = params['scale']

            biggest_scale = max(sx, sy)
            # The -2 allows the gaussian to be a little bigger. This
            # seems to help with border effects at only a small runtime cost
            num_downscales = max(int(np.log2(1 / biggest_scale)) - 2, 0)
            pyr_scale = 1 / (2 ** num_downscales)

            # Does the gaussian downsampling
            temp = pyrDownK(image, num_downscales)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            partial_scale = Affine.scale((rest_sx, rest_sy))
            rest_warp = noscale_warp @ partial_scale

            k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sx)
            k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sy)
            temp = cv2.GaussianBlur(temp, (k_x, k_y), sigma_x, sigma_y)
            temp = kwarray.atleast_nd(temp, 3)

            interpolation = 'cubic'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v3 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize,
                                      flags=flags)

    # --------------------
    # METHOD 4 - dont do the final blur

    for timer in ti.reset('pyrDown+warp'):
        with timer:
            temp = image.copy()
            params = transform.decompose()
            sx, sy = params['scale']

            biggest_scale = max(sx, sy)
            num_downscales = max(int(np.log2(1 / biggest_scale)), 0)
            pyr_scale = 1 / (2 ** num_downscales)

            # Does the gaussian downsampling
            temp = pyrDownK(image, num_downscales)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            partial_scale = Affine.scale((rest_sx, rest_sy))
            rest_warp = noscale_warp @ partial_scale

            interpolation = 'linear'
            flags = im_cv2._coerce_interpolation(interpolation)
            final_v4 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags)

    if 1:

        def get_title(key):
            from ubelt.timerit import _choose_unit
            value = ti.measures['mean'][key]
            suffix, mag = _choose_unit(value)
            unit_val = value / mag

            return key + ' ' + ub.repr2(unit_val, precision=2) + ' ' + suffix

        final_v2 = final_v2.clip(0, 1)
        final_v1 = final_v1.clip(0, 1)
        final_v3 = final_v3.clip(0, 1)
        final_v4 = final_v4.clip(0, 1)
        final_v5 = final_v5.clip(0, 1)
        import kwplot
        kwplot.autompl()
        kwplot.imshow(final_v5, pnum=(1, 5, 1), title=get_title('naive'))
        kwplot.imshow(final_v2, pnum=(1, 5, 2), title=get_title('fullblur+warp'))
        kwplot.imshow(final_v1, pnum=(1, 5, 3), title=get_title('resize+warp'))
        kwplot.imshow(final_v3, pnum=(1, 5, 4), title=get_title('pyrDown+blur+warp'))
        kwplot.imshow(final_v4, pnum=(1, 5, 5), title=get_title('pyrDown+warp'))
Esempio n. 17
0
def main(**kw):
    """
    CommandLine:
        python $HOME/code/bioharn/dev/kwcoco_to_viame_csv.py \
            --src /data/public/Aerial/US_ALASKA_MML_SEALION/2007/sealions_2007_v9.kwcoco.json \
            --dst /data/public/Aerial/US_ALASKA_MML_SEALION/2007/sealions_2007_v9.viame.csv
    """
    config = ConvertConfig(default=kw, cmdline=True)

    import kwcoco
    import kwimage
    import ubelt as ub
    coco_dset = kwcoco.CocoDataset(config['src'])

    csv_rows = []
    for gid, img in ub.ProgIter(coco_dset.imgs.items(), total=coco_dset.n_images):
        gname = img['file_name']
        aids = coco_dset.gid_to_aids[gid]

        frame_index = img.get('frame_index', 0)
        # vidid = img.get('video_id', None)

        for aid in aids:
            ann = coco_dset.anns[aid]
            cat = coco_dset.cats[ann['category_id']]
            catname = cat['name']

            # just use annotation id if no tracks
            tid = ann.get('track_id', aid)
            # tracked_aids = tid_to_aids.get(tid, [aid])
            # track_len = len(tracked_aids)

            tl_x, tl_y, br_x, br_y = kwimage.Boxes([ann['bbox']], 'xywh').toformat('tlbr').data[0].tolist()

            score = ann.get('score', 1)

            row = [
                 tid,             # 1 - Detection or Track Unique ID
                 gname,           # 2 - Video or Image String Identifier
                 frame_index,     # 3 - Unique Frame Integer Identifier
                 round(tl_x, 3),  # 4 - TL-x (top left of the image is the origin: 0,0
                 round(tl_y, 3),  # 5 - TL-y
                 round(br_x, 3),  # 6 - BR-x
                 round(br_y, 3),  # 7 - BR-y
                 score,           # 8 - Auxiliary Confidence (how likely is this actually an object)
                 -1,              # 9 - Target Length
                 catname,         # 10+ - category name
                 score,           # 11+ - category score
            ]

            # Optional fields
            for kp in ann.get('keypoints', []):
                if 'keypoint_category_id' in kp:
                    cname = coco_dset._resolve_to_kpcat(kp['keypoint_category_id'])['name']
                elif 'category_name' in kp:
                    cname = kp['category_name']
                elif 'category' in kp:
                    cname = kp['category']
                else:
                    raise Exception(str(kp))
                kp_x, kp_y = kp['xy']
                row.append('(kp) {} {} {}'.format(
                    cname, round(kp_x, 3), round(kp_y, 3)))

            note_fields = [
                'box_source',
                'changelog',
                'color',
            ]
            for note_key in note_fields:
                if note_key in ann:
                    row.append('(note) {}: {}'.format(note_key, repr(ann[note_key]).replace(',', '<comma>')))

            row = list(map(str, row))
            for item in row:
                if ',' in row:
                    print('BAD row = {!r}'.format(row))
                    raise Exception('comma is in a row field')

            row_str = ','.join(row)
            csv_rows.append(row_str)

    csv_text = '\n'.join(csv_rows)
    dst_fpath = config['dst']
    print('dst_fpath = {!r}'.format(dst_fpath))
    with open(dst_fpath, 'w') as file:
        file.write(csv_text)
Esempio n. 18
0
def convert_camvid_raw_to_coco(camvid_raw_info):
    """
    Converts the raw camvid format to an MSCOCO based format, ( which lets use
    use kwcoco's COCO backend).

    Example:
        >>> # xdoctest: +REQUIRES(--download)
        >>> camvid_raw_info = grab_raw_camvid()
        >>> # test with a reduced set of data
        >>> del camvid_raw_info['img_paths'][2:]
        >>> del camvid_raw_info['mask_paths'][2:]
        >>> dset = convert_camvid_raw_to_coco(camvid_raw_info)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> plt = kwplot.autoplt()
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 1))
        >>> dset.show_image(gid=1)
        >>> kwplot.figure(fnum=1, pnum=(1, 2, 2))
        >>> dset.show_image(gid=2)
    """
    import re
    import kwimage
    import kwcoco
    print('Converting CamVid to MS-COCO format')

    dset_root, img_paths, label_path, mask_paths = ub.take(
        camvid_raw_info,
        'dset_root, img_paths, label_path, mask_paths'.split(', '))

    img_infos = {
        'img_fname': img_paths,
        'mask_fname': mask_paths,
    }
    keys = list(img_infos.keys())
    next_vals = list(zip(*img_infos.values()))
    image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals]

    dataset = {
        'img_root': dset_root,
        'images': [],
        'categories': [],
        'annotations': [],
    }

    lines = ub.readfrom(label_path).split('\n')
    lines = [line for line in lines if line]
    for line in lines:
        color_text, name = re.split('\t+', line)
        r, g, b = map(int, color_text.split(' '))
        color = (r, g, b)

        # Parse the special camvid format
        cid = (r << 16) + (g << 8) + (b << 0)
        cat = {
            'id': cid,
            'name': name,
            'color': color,
        }
        dataset['categories'].append(cat)

    for gid, img_item in enumerate(image_items, start=1):
        img = {
            'id': gid,
            'file_name': img_item['img_fname'],
            # nonstandard image field
            'segmentation': img_item['mask_fname'],
        }
        dataset['images'].append(img)

    dset = kwcoco.CocoDataset(dataset)
    dset.rename_categories({'Void': 'background'})

    assert dset.name_to_cat['background']['id'] == 0
    dset.name_to_cat['background'].setdefault('alias', []).append('Void')

    if False:
        _define_camvid_class_hierarcy(dset)

    if 1:
        # TODO: Binarize CCs (and efficiently encode if possible)
        import numpy as np

        bad_info = []
        once = False

        # Add images
        dset.remove_annotations(list(dset.index.anns.keys()))
        for gid, img in ub.ProgIter(dset.imgs.items(),
                                    desc='parse label masks'):
            mask_fpath = join(dset_root, img['segmentation'])

            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)

            cids = set(np.unique(cid_mask)) - {0}

            for cid in cids:
                if cid not in dset.cats:
                    if gid == 618:
                        # Handle a known issue with image 618
                        c_mask = (cid == cid_mask).astype(np.uint8)
                        total_bad = c_mask.sum()
                        if total_bad < 32:
                            if not once:
                                print(
                                    'gid 618 has a few known bad pixels, ignoring them'
                                )
                                once = True
                            continue
                        else:
                            raise Exception('more bad pixels than expected')
                    else:
                        raise Exception(
                            'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))

                    # bad_rgb = cid_to_rgb(cid)
                    # print('bad_rgb = {!r}'.format(bad_rgb))
                    # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid))
                    # bad_info.append({
                    #     'gid': gid,
                    #     'cid': cid,
                    # })
                else:
                    ann = {
                        'category_id': cid,
                        'image_id': gid
                        # 'segmentation': mask.to_coco()
                    }
                    assert cid in dset.cats
                    c_mask = (cid == cid_mask).astype(np.uint8)
                    mask = kwimage.Mask(c_mask, 'c_mask')

                    box = kwimage.Boxes([mask.get_xywh()], 'xywh')
                    # box = mask.to_boxes()

                    ann['bbox'] = ub.peek(box.to_coco())
                    ann['segmentation'] = mask.to_coco()
                    dset.add_annotation(**ann)

        if 0:
            bad_cids = [i['cid'] for i in bad_info]
            print(sorted([c['color'] for c in dataset['categories']]))
            print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info])))

            gid = 618
            img = dset.imgs[gid]
            mask_fpath = join(dset_root, img['segmentation'])
            rgb_mask = kwimage.imread(mask_fpath, space='rgb')
            r, g, b = rgb_mask.T.astype(np.int64)
            cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T)
            cid_hist = ub.dict_hist(cid_mask.ravel())

            bad_cid_hist = {}
            for cid in bad_cids:
                bad_cid_hist[cid] = cid_hist.pop(cid)

            import kwplot
            kwplot.autompl()
            kwplot.imshow(rgb_mask)

    if 0:
        import kwplot
        plt = kwplot.autoplt()
        plt.clf()
        dset.show_image(1)

        import xdev
        gid_list = list(dset.imgs)
        for gid in xdev.InteractiveIter(gid_list):
            dset.show_image(gid)
            xdev.InteractiveIter.draw()

    dset._build_index()
    dset._build_hashid()
    return dset
Esempio n. 19
0
    def decode_batch(self, output, forloss=False):
        """
        Returns array of detections for every image in batch

        Example:
            >>> # xdoc: +REQUIRES(--download, module:ndsampler)
            >>> from netharn.models.yolo2.yolo2 import *  # NOQA
            >>> self = YoloCoder.demo()
            >>> output = self.demo_output()
            >>> batch_dets = self.decode_batch(output)
            >>> batch_dets = self.decode_batch(output, forloss=True)

        Example:
            >>> # xdoc: +REQUIRES(--download, module:ndsampler)
            >>> info = dev_demodata()
            >>> self, output = ub.take(info, ['coder', 'outputs'])
            >>> batch_dets = self.decode_batch(output)
            >>> dets = batch_dets[0].sort().scale(info['orig_sizes'][0])
            >>> print('dets.boxes = {!r}'.format(dets.boxes))
            >>> # xdoctest: +REQUIRES(--show)
            >>> import kwplot
            >>> kwplot.figure(fnum=1, doclf=True)
            >>> kwplot.imshow(info['rgb255'], colorspace='rgb')
            >>> dets.draw()
            >>> kwplot.show_if_requested()
        """
        import kwimage
        # dont modify inplace
        # output = output.clone()

        class_energy = output['class_energy']
        score_energy = output['score_energy']
        cxywh_energy = output['cxywh_energy']

        # Variables
        nB = class_energy.shape[0]
        nH, nW = class_energy.shape[-2:]
        nA = self.num_anchors

        device = class_energy.device

        if self.anchors.device != device:
            self.anchors = self.anchors.to(device)

        # Compute xc,yc, nW,nH, box_score on Tensor
        lin_x = torch.linspace(0, nW - 1, nW, device=device).repeat(nH, 1)
        lin_y = torch.linspace(0, nH - 1, nH, device=device).repeat(nW, 1).t().contiguous()
        anchor_w = self.anchors[:, 0].contiguous().view(1, nA, 1, 1)
        anchor_h = self.anchors[:, 1].contiguous().view(1, nA, 1, 1)

        if forloss:
            # TODO : rectify
            coord = torch.empty_like(cxywh_energy)
            coord[:, :, 0:2, :, :] = cxywh_energy[:, :, 0:2, :, :].sigmoid()    # cx,cy
            coord[:, :, 2:4, :, :] = cxywh_energy[:, :, 2:4, :, :]              # w,h

            with torch.no_grad():
                pred_boxes = torch.empty_like(cxywh_energy, device=device).view(-1, 4)
                pred_boxes[:, 0] = (coord[:, :, 0, :, :] + lin_x).view(-1)
                pred_boxes[:, 1] = (coord[:, :, 1, :, :] + lin_y).view(-1)
                pred_boxes[:, 2] = (coord[:, :, 2, :, :].exp() * anchor_w).view(-1)
                pred_boxes[:, 3] = (coord[:, :, 3, :, :].exp() * anchor_h).view(-1)

            info = {
                'coord': coord,
                'pred_boxes': pred_boxes,
            }
            return info
        else:
            cxywh = cxywh_energy.clone()
            # cxywh_ = cxywh.view(nB, self.num_anchors, -1, nH,  nW)
            cxywh[:, :, 0, :].sigmoid_().add_(lin_x).div_(nW)          # X center
            cxywh[:, :, 1, :].sigmoid_().add_(lin_y).div_(nH)          # Y center
            cxywh[:, :, 2, :].exp_().mul_(anchor_w).div_(nW)           # Width
            cxywh[:, :, 3, :].exp_().mul_(anchor_h).div_(nH)           # Height

        score = score_energy.sigmoid()                             # Box score

        # Compute class_score
        if len(self.classes) > 1:
            cls_scores = torch.nn.functional.softmax(class_energy, dim=2)
            cls_max, cls_max_idx = torch.max(cls_scores, 2, keepdim=True)
            cls_max.mul_(score)
        else:
            cls_max = score
            cls_max_idx = torch.zeros_like(cls_max)

        # Save detection if conf*class_conf is higher than threshold
        flags = cls_max >= self.conf_thresh
        flags_flat = flags.view(-1)

        if flags.sum() == 0:
            batch_dets = []
            for i in range(nB):
                batch_dets.append(kwimage.Detections(
                    boxes=kwimage.Boxes(torch.empty((0, 4), dtype=torch.float32, device=device), 'cxywh'),
                    scores=torch.empty(0, dtype=torch.float32, device=device),
                    class_idxs=torch.empty(0, dtype=torch.int64, device=device),
                    classes=self.classes
                ))
        else:
            # Permute so the bbox dim (i.e. xywh) is trailing
            coords = cxywh.permute(0, 1, 3, 4, 2).contiguous().view(-1, 4)
            coords = coords[flags.view(-1)]

            scores = cls_max[flags]
            class_idxs = cls_max_idx[flags]

            stacked_dets = kwimage.Detections(
                boxes=kwimage.Boxes(coords, 'cxywh'),
                scores=scores,
                class_idxs=class_idxs,
                classes=self.classes
            )

            # Get indexes of splits between images of batch
            max_det_per_batch = len(self.anchors) * nH * nW
            m = max_det_per_batch
            flags_flat.int = flags_flat.int()
            slices = [slice(m * i, m * (i + 1)) for i in range(nB)]
            det_per_batch = torch.IntTensor([flags_flat[s].sum()
                                             for s in slices])
            split_idx = torch.cumsum(det_per_batch, dim=0)

            batch_dets = []
            start = 0
            for end in split_idx:
                dets = stacked_dets[start: end]
                dets = dets.non_max_supress(thresh=self.nms_thresh)
                batch_dets.append(dets)
                start = end
        return batch_dets
Esempio n. 20
0
def warp_affine(image, transform, dsize=None, antialias=True,
                interpolation='linear'):
    """
    Applies an affine transformation to an image with optional antialiasing.

    Args:
        image (ndarray): the input image

        transform (ndarray | Affine): a coercable affine matrix

        dsize (Tuple[int, int] | None | str):
            width and height of the resulting image. If "auto", it is computed
            such that the positive coordinates of the warped image will fit in
            the new canvas. If None, then the image size will not change.

        antialias (bool, default=True):
            if True determines if the transform is downsampling and applies
            antialiasing via gaussian a blur.

    TODO:
        - [ ] This will be moved to kwimage.im_cv2

    Example:
        >>> import kwimage
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale(0.05)
        >>> transform = Affine.scale(0.02)
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest')
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()

    Example:
        >>> import kwimage
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale((.1, 1.2))
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1)
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()
    """
    from kwimage import im_cv2
    from kwimage.transform import Affine
    import kwimage
    import numpy as np
    import cv2
    import ubelt as ub
    transform = Affine.coerce(transform)
    flags = im_cv2._coerce_interpolation(interpolation)

    # TODO: expose these params
    # borderMode = cv2.BORDER_DEFAULT
    # borderMode = cv2.BORDER_CONSTANT
    borderMode = None
    borderValue = None

    """
    Variations that could change in the future:

        * In _gauss_params I'm not sure if we want to compute integer or
            fractional "number of downsamples".

        * The fudge factor bothers me, but seems necessary
    """

    def _gauss_params(scale, k0=5, sigma0=1, fractional=True):
        # Compute a gaussian to mitigate aliasing for a requested downsample
        # Args:
        # scale: requested downsample factor
        # k0 (int): kernel size for one downsample operation
        # sigma0 (float): sigma for one downsample operation
        # fractional (bool): controls if we compute params for integer downsample
        # ops
        num_downs = np.log2(1 / scale)
        if not fractional:
            num_downs = max(int(num_downs), 0)
        if num_downs <= 0:
            k = 1
            sigma = 0
        else:
            # The kernel size and sigma doubles for each 2x downsample
            sigma = sigma0 * (2 ** (num_downs - 1))
            k = int(np.ceil(k0 * (2 ** (num_downs - 1))))
            k = k + int(k % 2 == 0)
        return k, sigma

    def _pyrDownK(a, k=1):
        # Downsamples by (2 ** k)x with antialiasing
        if k == 0:
            a = a.copy()
        for _ in range(k):
            a = cv2.pyrDown(a)
        return a

    if dsize is None:
        dsize = tuple(image.shape[0:2][::-1])
    elif dsize == 'auto':
        h, w = image.shape[0:2]
        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    if not antialias:
        M = np.asarray(transform)
        result = cv2.warpAffine(image, M[0:2],
                                dsize=dsize, flags=flags,
                                borderMode=borderMode,
                                borderValue=borderValue)
    else:
        # Decompose the affine matrix into its 6 core parameters
        params = transform.decompose()
        sx, sy = params['scale']

        if sx >= 1 and sy > 1:
            # No downsampling detected, no need to antialias
            M = np.asarray(transform)
            result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)
        else:
            # At least one dimension is downsampled

            # Compute the transform with all scaling removed
            noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'}))

            max_scale = max(sx, sy)
            # The "fudge" factor limits the number of downsampled pyramid
            # operations. A bigger fudge factor means means that the final
            # gaussian kernel for the antialiasing operation will be bigger.
            # It essentials say that at most "fudge" downsampling ops will
            # be handled by the final blur rather than the pyramid downsample.
            # It seems to help with border effects at only a small runtime cost
            # I don't entirely understand why the border artifact is introduced
            # when this is enabled though

            # TODO: should we allow for this fudge factor?
            # TODO: what is the real name of this? num_down_prevent ?
            # skip_final_downs?
            fudge = 2
            # TODO: should final antialiasing be on?
            # Note, if fudge is non-zero it is important to do this.
            do_final_aa = 1
            # TODO: should fractional be True or False by default?
            # If fudge is 0 and fractional=0, then I think is the same as
            # do_final_aa=0.
            fractional = 0

            num_downs = max(int(np.log2(1 / max_scale)) - fudge, 0)
            pyr_scale = 1 / (2 ** num_downs)

            # Downsample iteratively with antialiasing
            downscaled = _pyrDownK(image, num_downs)

            rest_sx = sx / pyr_scale
            rest_sy = sy / pyr_scale

            # Compute the transform from the downsampled image to the destination
            rest_warp = noscale_warp @ Affine.scale((rest_sx, rest_sy))

            # Do a final small blur to acount for the potential aliasing
            # in any remaining scaling operations.
            if do_final_aa:
                # Computed as the closest sigma to the [1, 4, 6, 4, 1] approx
                # used in cv2.pyrDown
                aa_sigma0 = 1.0565137190917149
                aa_k0 = 5
                k_x, sigma_x = _gauss_params(scale=rest_sx, k0=aa_k0,
                                             sigma0=aa_sigma0,
                                             fractional=fractional)
                k_y, sigma_y = _gauss_params(scale=rest_sy, k0=aa_k0,
                                             sigma0=aa_sigma0,
                                             fractional=fractional)

                # Note: when k=1, no blur occurs
                # blurBorderType = cv2.BORDER_REPLICATE
                # blurBorderType = cv2.BORDER_CONSTANT
                blurBorderType = cv2.BORDER_DEFAULT
                downscaled = cv2.GaussianBlur(
                    downscaled, (k_x, k_y), sigma_x, sigma_y,
                    borderType=blurBorderType
                )

            result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2],
                                    dsize=dsize, flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)

    return result
Esempio n. 21
0
    def build_targets(self, pred_cxywh, target, nH, nW, seen=0, gt_weights=None):
        """
        Compare prediction boxes and targets, convert targets to network output tensors

        Args:
            pred_cxywh (Tensor):   shape [B * A * W * H, 4] in normalized cxywh format
            target (Tensor): shape [B, max(gtannots), 4]

        CommandLine:
            python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1

        Example:
            >>> # xdoctest: +REQUIRES(module:kwimage)
            >>> from netharn.models.yolo2.light_yolo import Yolo
            >>> torch.random.manual_seed(0)
            >>> network = Yolo(num_classes=2, conf_thresh=4e-2)
            >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors)
            >>> Win, Hin = 96, 96
            >>> nW, nH = 3, 3
            >>> target = torch.FloatTensor([])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> #pred_cxywh = torch.rand(90, 4)
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)

        Example:
            >>> # xdoctest: +REQUIRES(module:kwimage)
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]])
            >>> self = RegionLoss(num_classes=2, anchors=anchors)
            >>> nW, nH = 2, 2
            >>> # true boxes for each item in the batch
            >>> # each box encodes class, center, width, and height
            >>> # coordinates are normalized in the range 0 to 1
            >>> # items in each batch are padded with dummy boxes with class_id=-1
            >>> target = torch.FloatTensor([
            >>>     # boxes for batch item 0 (it has no objects, note the pad!)
            >>>     [[-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0]],
            >>>     # boxes for batch item 1
            >>>     [[0, 0.50, 0.50, 1.00, 1.00],
            >>>      [1, 0.34, 0.32, 0.12, 0.32],
            >>>      [1, 0.32, 0.42, 0.22, 0.12]],
            >>> ])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)
        """
        import kwimage
        from netharn.util import torch_ravel_multi_index
        gtempty = (target.numel() == 0)

        # Parameters
        nB = target.shape[0] if not gtempty else 0
        # nT = target.shape[1] if not gtempty else 0
        nA = self.num_anchors

        nPixels = nW * nH

        if nB == 0:
            # torch does not preserve shapes when any dimension goes to 0
            # fix nB if there is no groundtruth
            nB = int(len(pred_cxywh) / (nA * nH * nW))
        else:
            assert nB == int(len(pred_cxywh) / (nA * nH * nW)), 'bad assumption'

        seen = seen + nB

        # Tensors
        device = target.device

        # Put the groundtruth in a format comparable to output
        tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device)
        tconf = torch.zeros(nB, nA, 1, nH, nW, device=device)
        tcls = torch.zeros(nB, nA, 1, nH, nW, device=device)

        # Create weights to determine which outputs are punished
        # By default we punish all outputs for not having correct iou
        # objectness prediction. The other masks default to zero meaning that
        # by default we will not punish a prediction for having a different
        # coordinate or class label (later the groundtruths will override these
        # defaults for select grid cells and anchors)
        coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device)
        conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device)
        # TODO: this could be a weight instead
        cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device, dtype=torch.uint8)

        # Default conf_mask to the noobject_scale
        conf_mask.fill_(self.noobject_scale)

        # encourage the network to predict boxes centered on the grid cells by
        # setting the default target xs and ys to be (.5, .5) (i.e. the
        # relative center of a grid cell) fill the mask with ones so all
        # outputs are punished for not predicting center anchor locations ---
        # unless tcoord is overriden by a real groundtruth target later on.
        if seen < self.seen_thresh:
            # PJreddies version
            # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254

            # By default encourage the network to predict no shift
            tcoord[:, :, 0:2, :, :].fill_(0.5)
            # By default encourage the network to predict no scale (in logspace)
            tcoord[:, :, 2:4, :, :].fill_(0.0)

            if False:
                # In the warmup phase we care about changing the coords to be
                # exactly the anchors if they don't predict anything, but the
                # weight is only 0.01, set it to 0.01 / self.coord_scale.
                # Note we will apply the required sqrt later
                coord_mask.fill_((0.01 / self.coord_scale))
                # This hurts even thought it seems like its what darknet does
            else:
                coord_mask.fill_(1)

        if gtempty:
            coord_mask = coord_mask.sqrt()
            conf_mask = conf_mask.sqrt()
            coord_mask = coord_mask.expand_as(tcoord)
            return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls

        # Put this back into a non-flat view
        pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4)
        pred_boxes = kwimage.Boxes(pred_cxywh, 'cxywh')

        gt_class = target[..., 0].data
        gt_boxes_norm = kwimage.Boxes(target[..., 1:5], 'cxywh')

        # Put GT boxes into output coordinates
        gt_boxes = gt_boxes_norm.scale([nW, nH])
        # Construct "relative" versions of the true boxes, centered at 0
        # This will allow them to be compared to the anchor boxes.
        rel_gt_boxes = gt_boxes.copy()
        rel_gt_boxes.data[..., 0:2] = 0

        # true boxes with a class of -1 are fillers, ignore them
        gt_isvalid = (gt_class >= 0)
        batch_nT = gt_isvalid.sum(dim=1).cpu().numpy()

        # Compute the grid cell for each groundtruth box
        true_xs = gt_boxes.data[..., 0]
        true_ys = gt_boxes.data[..., 1]
        true_is = true_xs.long().clamp_(0, nW - 1)
        true_js = true_ys.long().clamp_(0, nH - 1)

        if gt_weights is None:
            # If unspecified give each groundtruth a default weight of 1
            gt_weights = torch.ones_like(target[..., 0], device=device)

        # Undocumented darknet detail: multiply coord weight by two minus the
        # area of the true box in normalized coordinates.  the square root is
        # because the weight.
        if self.small_boxes:
            gt_coord_weights = (gt_weights * (2.0 - gt_boxes_norm.area[..., 0]))
        else:
            gt_coord_weights = gt_weights
        # Pre multiply weights with object scales
        gt_conf_weights = gt_weights * self.object_scale
        # Pre threshold classification weights
        gt_cls_weights = (gt_weights > .5).byte()

        # Loop over ground_truths and construct tensors
        for bx in range(nB):
            # Get the actual groundtruth boxes for this batch item
            nT = batch_nT[bx]
            if nT == 0:
                continue

            # Batch ground truth
            cur_rel_gt_boxes = rel_gt_boxes[bx, 0:nT]
            cur_gt_boxes = gt_boxes[bx, 0:nT]
            cur_gt_cls = target[bx, 0:nT, 0]
            # scalars, one for each true object
            cur_true_is = true_is[bx, 0:nT]
            cur_true_js = true_js[bx, 0:nT]
            cur_true_coord_weights = gt_coord_weights[bx, 0:nT]
            cur_true_conf_weights = gt_conf_weights[bx, 0:nT]
            cur_true_cls_weights = gt_cls_weights[bx, 0:nT]

            cur_gx, cur_gy, cur_gw, cur_gh = cur_gt_boxes.data.t()

            # Batch predictions
            cur_pred_boxes = pred_boxes[bx]

            # NOTE: IOU computation is the bottleneck in this function

            # Assign groundtruth boxes to anchor boxes
            cur_anchor_gt_ious = self.rel_anchors_boxes.ious(
                cur_rel_gt_boxes, bias=0)
            _, cur_true_anchor_axs = cur_anchor_gt_ious.max(dim=0)  # best_ns in YOLO

            # Get the anchor (w,h) assigned to each true object
            cur_true_anchor_w, cur_true_anchor_h = self.anchors[cur_true_anchor_axs].t()

            # Find the IOU of each predicted box with the groundtruth
            cur_pred_true_ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0)
            # Assign groundtruth boxes to predicted boxes
            cur_ious, _ = cur_pred_true_ious.max(dim=-1)

            # Set loss to zero for any predicted boxes that had a high iou with
            # a groundtruth target (we wont punish them for not being
            # background), One of these will be selected as the best and be
            # punished for not predicting the groundtruth value.
            conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0

            ####
            # Broadcast the loop over true boxes
            ####
            # Convert the true box coordinates to be comparable with pred output
            # * translate each gtbox to be relative to its assignd gridcell
            # * make w/h relative to anchor box w / h and convert to logspace
            cur_tcoord_x = cur_gx - cur_true_is.float()
            cur_tcoord_y = cur_gy - cur_true_js.float()
            cur_tcoord_w = (cur_gw / cur_true_anchor_w).log()
            cur_tcoord_h = (cur_gh / cur_true_anchor_h).log()

            if 0:
                cur_true_anchor_axs_ = cur_true_anchor_axs.cpu().numpy()
                cur_true_js_ = cur_true_js.cpu().numpy()
                cur_true_is_ = cur_true_is.cpu().numpy()

                iou_raveled_idxs = np.ravel_multi_index([
                    cur_true_anchor_axs_, cur_true_js_, cur_true_is_, np.arange(nT)
                ], cur_pred_true_ious.shape)
                # Get the ious with the assigned boxes for each truth
                cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs]

                raveled_idxs = np.ravel_multi_index([
                    [bx], cur_true_anchor_axs_, [0], cur_true_js_, cur_true_is_
                ], coord_mask.shape)

                # --------------------------------------------
                multi_index = ([bx], cur_true_anchor_axs_, [0], cur_true_js_, cur_true_is_)
                # multi_index_ = multi_index
                raveled_idxs_b0 = np.ravel_multi_index(multi_index, tcoord.shape)
                # A bit faster than ravel_multi_indexes with [1], [2], and [3]
                raveled_idxs_b1 = raveled_idxs_b0 + nPixels
                raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2
                raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3
            else:
                iou_raveled_idxs = torch_ravel_multi_index([
                    cur_true_anchor_axs, cur_true_js, cur_true_is,
                    torch.arange(nT, device=device, dtype=torch.long)
                ], cur_pred_true_ious.shape, device)
                # Get the ious with the assigned boxes for each truth
                cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs]

                Bxs = torch.full_like(cur_true_anchor_axs, bx)
                Zxs = torch.full_like(cur_true_anchor_axs, 0)

                multi_index = [Bxs, cur_true_anchor_axs, Zxs, cur_true_js, cur_true_is]
                multi_index = torch.cat([x.view(-1, 1) for x in multi_index], dim=1)
                raveled_idxs = torch_ravel_multi_index(multi_index, coord_mask.shape, device)

                # --------------------------------------------
                # We reuse the previous multi-index because the dims are
                # broadcastable at [:, :, [0], :, :]
                raveled_idxs_b0 = torch_ravel_multi_index(multi_index, tcoord.shape, device)
                # A bit faster than ravel_multi_indexes with [1], [2], and [3]
                raveled_idxs_b1 = raveled_idxs_b0 + nPixels
                raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2
                raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3
                # --------------------------------------------

            coord_mask.view(-1)[raveled_idxs] = cur_true_coord_weights
            cls_mask.view(-1)[raveled_idxs]   = cur_true_cls_weights
            conf_mask.view(-1)[raveled_idxs]  = cur_true_conf_weights

            tcoord.view(-1)[raveled_idxs_b0] = cur_tcoord_x
            tcoord.view(-1)[raveled_idxs_b1] = cur_tcoord_y
            tcoord.view(-1)[raveled_idxs_b2] = cur_tcoord_w
            tcoord.view(-1)[raveled_idxs_b3] = cur_tcoord_h

            tcls.view(-1)[raveled_idxs]  = cur_gt_cls
            tconf.view(-1)[raveled_idxs] = cur_true_ious

        # because coord and conf masks are witin this MSE we need to sqrt them
        coord_mask = coord_mask.sqrt()
        conf_mask = conf_mask.sqrt()
        coord_mask = coord_mask.expand_as(tcoord)
        return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
Esempio n. 22
0
    def _decode(self, output):
        """
        Returns array of detections for every image in batch

        CommandLine:
            python ~/code/netharn/netharn/box_models/yolo2/light_postproc.py GetBoundingBoxes._decode

        Examples:
            >>> # xdoctest: +REQUIRES(module:kwimage)
            >>> import torch
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)])
            >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
            >>> output = torch.randn(16, 5, 5 + 20, 9, 9)
            >>> from netharn import XPU
            >>> output = XPU.coerce('auto').move(output)
            >>> batch_dets = self._decode(output.data)
            >>> assert len(batch_dets) == 16

        Ignore:
            >>> from netharn.models.yolo2.yolo2 import *  # NOQA
            >>> info = dev_demodata()
            >>> outputs = info['outputs']
            >>> cxywh_energy = output['cxywh_energy']
            >>> raw = info['raw']
            >>> raw_ = raw.clone()

            >>> self = GetBoundingBoxes(anchors=info['model'].anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5)
            >>> dets = self._decode(raw)[0]
            >>> dets.scores

            >>> self, output = ub.take(info, ['coder', 'outputs'])
            >>> batch_dets = self.decode_batch(output)
            >>> dets = batch_dets[0]
            >>> dets.scores

        """
        import kwimage
        # dont modify inplace
        raw_ = output.clone()

        # Variables
        bsize = raw_.shape[0]
        h, w = raw_.shape[-2:]

        device = raw_.device

        if self.anchors.device != device:
            self.anchors = self.anchors.to(device)

        # Compute xc,yc, w,h, box_score on Tensor
        lin_x = torch.linspace(0, w - 1, w,
                               device=device).repeat(h, 1).view(h * w)
        lin_y = torch.linspace(0, h - 1, h, device=device).repeat(
            w, 1).t().contiguous().view(h * w)

        anchor_w = self.anchors[:, 0].contiguous().view(1, self.num_anchors, 1)
        anchor_h = self.anchors[:, 1].contiguous().view(1, self.num_anchors, 1)

        # -1 == 5+num_classes (we can drop feature maps if 1 class)
        output_ = raw_.view(bsize, self.num_anchors, -1, h * w)

        output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w)  # X center
        output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h)  # Y center
        output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w)  # Width
        output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h)  # Height
        output_[:, :, 4, :].sigmoid_()  # Box score

        # output_[:, :, 0:4].sum()
        # torch.all(cxywh.view(-1) == output_[:, :, 0:4].contiguous().view(-1))

        # Compute class_score
        if self.num_classes > 1:
            cls_scores = torch.nn.functional.softmax(output_[:, :, 5:, :], 2)

            cls_max, cls_max_idx = torch.max(cls_scores, 2)
            cls_max.mul_(output_[:, :, 4, :])
        else:
            cls_max = output_[:, :, 4, :]
            cls_max_idx = torch.zeros_like(cls_max)

        # Save detection if conf*class_conf is higher than threshold

        # Newst lightnet code, which is based on my mode1 code
        score_thresh = cls_max > self.conf_thresh
        score_thresh_flat = score_thresh.view(-1)

        if score_thresh.sum() == 0:
            batch_dets = []
            for i in range(bsize):
                batch_dets.append(
                    kwimage.Detections(
                        boxes=kwimage.Boxes(
                            torch.empty((0, 4),
                                        dtype=torch.float32,
                                        device=device), 'cxywh'),
                        scores=torch.empty(0,
                                           dtype=torch.float32,
                                           device=device),
                        class_idxs=torch.empty(0,
                                               dtype=torch.int64,
                                               device=device),
                    ))
        else:
            # Mask select boxes > conf_thresh
            coords = output_.transpose(2, 3)[..., 0:4]
            coords = coords[score_thresh[...,
                                         None].expand_as(coords)].view(-1, 4)

            scores = cls_max[score_thresh]

            class_idxs = cls_max_idx[score_thresh]

            stacked_dets = kwimage.Detections(
                boxes=kwimage.Boxes(coords, 'cxywh'),
                scores=scores,
                class_idxs=class_idxs,
            )

            # Get indexes of splits between images of batch
            max_det_per_batch = len(self.anchors) * h * w
            slices = [
                slice(max_det_per_batch * i, max_det_per_batch * (i + 1))
                for i in range(bsize)
            ]
            det_per_batch = torch.IntTensor(
                [score_thresh_flat[s].int().sum() for s in slices])
            split_idx = torch.cumsum(det_per_batch, dim=0)

            batch_dets = []
            start = 0
            for end in split_idx:
                dets = stacked_dets[start:end]
                dets = dets.non_max_supress(thresh=self.nms_thresh)
                batch_dets.append(dets)
                start = end
        return batch_dets
Esempio n. 23
0
    def random_negatives(self,
                         num,
                         anchors=None,
                         window_size=None,
                         gids=None,
                         thresh=0.0,
                         exact=True,
                         rng=None,
                         patience=None):
        """
        Finds random boxes that don't have a large overlap with positive
        instances.

        Args:
            num (int): number of negative boxes to generate (actual number of
                boxes returned may be less unless `exact=True`)

            anchors (ndarray): prior normalized aspect ratios for negative
                boxes. Mutually exclusive with `window_size`.

            window_size (ndarray): absolute (W, H) sizes to use for negative
                boxes.  Mutually exclusive with `anchors`.

            gids (List[int]): image-ids to generate negatives for,
                if not specified generates for all images.

            thresh (float): overlap area threshold as a percentage of
                the negative box size. When thresh=0.0, that means
                negatives cannot overlap any positive, when threh=1.0, there
                are no constrains on negative placement.

            exact (bool): if True, ensure that we generate exactly `num` boxes

            rng (RandomState): random number generator

        Example:
            >>> from ndsampler.isect_indexer import *
            >>> import ndsampler
            >>> import kwcoco
            >>> dset = kwcoco.CocoDataset.demo('shapes8')
            >>> self = FrameIntersectionIndex.from_coco(dset)
            >>> anchors = np.array([[.35, .15], [.2, .2], [.1, .1]])
            >>> #num = 25
            >>> num = 5
            >>> rng = kwarray.ensure_rng(None)
            >>> neg_gids, neg_boxes = self.random_negatives(
            >>>     num, anchors, gids=[1], rng=rng, thresh=0.01, exact=1)
            >>> # xdoc: +REQUIRES(--show)
            >>> gid = sorted(set(neg_gids))[0]
            >>> boxes = neg_boxes.compress(neg_gids == gid)
            >>> import kwplot
            >>> kwplot.autompl()
            >>> img = kwimage.imread(dset.imgs[gid]['file_name'])
            >>> kwplot.imshow(img, doclf=True, fnum=1, colorspace='bgr')
            >>> support = self._support(gid)
            >>> kwplot.draw_boxes(support, color='blue')
            >>> kwplot.draw_boxes(boxes, color='orange')

        Example:
            >>> from ndsampler.isect_indexer import *
            >>> import kwcoco
            >>> dset = kwcoco.CocoDataset.demo('shapes8')
            >>> self = FrameIntersectionIndex.from_coco(dset)
            >>> #num = 25
            >>> num = 5
            >>> rng = kwarray.ensure_rng(None)
            >>> window_size = (50, 50)
            >>> neg_gids, neg_boxes = self.random_negatives(
            >>>     num, window_size=window_size, gids=[1], rng=rng,
            >>>     thresh=0.01, exact=1)
            >>> # xdoc: +REQUIRES(--show)
            >>> import kwplot
            >>> kwplot.autompl()
            >>> gid = sorted(set(neg_gids))[0]
            >>> boxes = neg_boxes.compress(neg_gids == gid)
            >>> img = kwimage.imread(dset.imgs[gid]['file_name'])
            >>> kwplot.imshow(img, doclf=True, fnum=1, colorspace='bgr')
            >>> support = self._support(gid)
            >>> support.draw(color='blue')
            >>> boxes.draw(color='orange')
        """

        if not ((window_size is None) ^ (anchors is None)):
            raise ValueError('window_size and anchors are mutually exclusive')

        rng = kwarray.ensure_rng(rng)
        all_gids = self.all_gids if gids is None else gids

        def _generate_rel(n):
            # Generate n candidate boxes in the normalized 0-1 domain
            cand_boxes = kwimage.Boxes.random(num=n,
                                              scale=1.0,
                                              format='tlbr',
                                              anchors=anchors,
                                              anchor_std=0,
                                              rng=rng)

            chosen_gids = np.array(sorted(rng.choice(all_gids, size=n)))
            gid_to_boxes = kwarray.group_items(cand_boxes, chosen_gids, axis=0)

            neg_gids = []
            neg_boxes = []
            for gid, img_boxes in gid_to_boxes.items():
                qtree = self.qtrees[gid]
                # scale from normalized coordinates to image coordinates
                img_boxes = img_boxes.scale((qtree.width, qtree.height))
                for box in img_boxes:
                    # isect_aids, overlaps = self.ious(gid, box)
                    isect_aids, overlaps = self.iooas(gid, box)
                    if len(overlaps) == 0 or overlaps.max() < thresh:
                        neg_gids.append(gid)
                        neg_boxes.append(box.data)
            return neg_gids, neg_boxes

        def _generate_abs(n):
            # Randomly choose images to generate boxes for
            chosen_gids = np.array(sorted(rng.choice(all_gids, size=n)))
            gid_to_nboxes = ub.dict_hist(chosen_gids)

            neg_gids = []
            neg_boxes = []
            for gid, nboxes in gid_to_nboxes.items():
                qtree = self.qtrees[gid]
                scale = (qtree.width, qtree.height)
                anchors_ = np.array([window_size]) / np.array(scale)
                if np.any(anchors_ > 1.0):
                    continue
                img_boxes = kwimage.Boxes.random(num=nboxes,
                                                 scale=1.0,
                                                 format='tlbr',
                                                 anchors=anchors_,
                                                 anchor_std=0,
                                                 rng=rng)
                img_boxes = img_boxes.scale(scale)
                for box in img_boxes:
                    # isect_aids, overlaps = self.ious(gid, box)
                    isect_aids, overlaps = self.iooas(gid, box)
                    if len(overlaps) == 0 or overlaps.max() < thresh:
                        neg_gids.append(gid)
                        neg_boxes.append(box.data)
            return neg_gids, neg_boxes

        if window_size is not None:
            _generate = _generate_abs
        elif anchors is not None:
            _generate = _generate_rel
        else:
            raise ValueError(
                'must specify at least one window_size or anchors')

        if exact:
            # TODO: Dont attempt to sample negatives from images where the
            # positives cover more than a threshold percent. (Handle the case
            # of chip detections)

            factor = 2  # oversample factor
            if patience is None:
                patience = int(np.sqrt(num * 10) + 1)
            remaining_patience = patience
            timer = ub.Timer().tic()
            # Generate boxes until we have enough
            neg_gids, neg_boxes = _generate(n=int(num * factor))
            n_tries = 1
            for n_tries in it.count(n_tries):
                want = num - len(neg_boxes)
                if want <= 0:
                    break
                extra_gids, extra_boxes = _generate(n=int(want * factor))
                neg_gids.extend(extra_gids)
                neg_boxes.extend(extra_boxes)
                if len(neg_boxes) < num:
                    # If we haven't found a significant number of boxes our
                    # patience decreases (if the wall time is getting large)
                    if len(extra_boxes) <= (num // 10) and timer.toc() > 1.0:
                        remaining_patience -= 1
                        if remaining_patience == 0:
                            break

            if len(neg_boxes) < num:
                # but throw an error if we don't make any progress
                message = ('Cannot make a negative sample with thresh={} '
                           'in under {} tries. Found {} but need {}'.format(
                               thresh, n_tries, len(neg_boxes), num))
                if exact == 'warn':
                    warnings.warn(message)
                else:
                    raise Exception(message)
            print('n_tries = {!r}'.format(n_tries))

            neg_gids = neg_gids[:num]
            neg_boxes = neg_boxes[:num]
        else:
            neg_gids, neg_boxes = _generate(n=num)

        neg_gids = np.array(neg_gids)
        neg_boxes = kwimage.Boxes(np.array(neg_boxes), 'tlbr')
        return neg_gids, neg_boxes
Esempio n. 24
0
def new_video_sample_grid(dset, window_dims, window_overlap=0.0,
                          classes_of_interest=None, ignore_coverage_thresh=0.6,
                          negative_classes={'ignore', 'background'}):
    """
    Create a space time-grid to sample with

    Example:
        >>> from ndsampler.coco_regions import *  # NOQA
        >>> import kwcoco
        >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral', num_frames=5)
        >>> dset.conform()
        >>> window_dims = (2, 224, 224)
        >>> sample_grid = new_video_sample_grid(dset, window_dims)
        >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2)))
        >>> # Now try to load a sample
        >>> tr = sample_grid['positives'][0]
        >>> import ndsampler
        >>> sampler = ndsampler.CocoSampler(dset)
        >>> tr_ = sampler._infer_target_attributes(tr)
        >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
        >>> sample = sampler.load_sample(tr)
        >>> assert sample['im'].shape == (2, 224, 224, 5)

    Ignore:
        import xdev
        globals().update(xdev.get_func_kwargs(new_video_sample_grid))
    """
    import kwarray
    from ndsampler import isect_indexer
    keepbound = True

    if classes_of_interest:
        raise NotImplementedError

    # Create a sliding window object for each specific image (because they may
    # have different sizes, technically we could memoize this)
    vidid_to_slider = {}
    for vidid, video in dset.index.videos.items():
        gids = dset.index.vidid_to_gids[vidid]
        num_frames = len(gids)
        full_dims = [num_frames, video['height'], video['width']]
        window_dims_ = full_dims if window_dims == 'full' else window_dims
        slider = kwarray.SlidingWindow(full_dims, window_dims_,
                                       overlap=window_overlap,
                                       keepbound=keepbound,
                                       allow_overshoot=True)

        vidid_to_slider[vidid] = slider

    _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset)

    positives = []
    negatives = []
    for vidid, slider in vidid_to_slider.items():
        regions = list(slider)
        gids = dset.index.vidid_to_gids[vidid]
        boxes = []
        box_gids = []
        for region in regions:
            t_sl, y_sl, x_sl = region
            region_gids = gids[t_sl]
            box_gids.append(region_gids)
            boxes.append([x_sl.start,  y_sl.start, x_sl.stop, y_sl.stop])
        boxes = kwimage.Boxes(np.array(boxes), 'ltrb')

        for region, region_gids, box in zip(regions, box_gids, boxes):
            # Check to see what annotations this window-box overlaps with
            region_aids = []
            for gid in region_gids:
                # TODO: memoize to prevent dup queries (box is not hashable)
                aids = _isect_index.overlapping_aids(gid, box)
                region_aids.append(aids)

            pos_aids = sorted(ub.flatten(region_aids))
            space_slice = region[1:3]
            time_slice = region[0]

            tr = {
                'vidid': vidid,
                'time_slice': time_slice,
                'space_slice': space_slice,
                # 'slices': region,
                'gids': region_gids,
                'aids': pos_aids,
            }
            if len(pos_aids):
                positives.append(tr)
            else:
                negatives.append(tr)

    print('Found {} positives'.format(len(positives)))
    print('Found {} negatives'.format(len(negatives)))
    sample_grid = {
        'positives': positives,
        'negatives': negatives,
    }
    return sample_grid
Esempio n. 25
0
    def _support(self, gid):
        qtree = self.qtrees[gid]
        support_boxes = kwimage.Boxes(list(qtree.aid_to_tlbr.values()), 'tlbr')

        return support_boxes
Esempio n. 26
0
def new_image_sample_grid(dset, window_dims, window_overlap=0.0,
                          classes_of_interest=None, ignore_coverage_thresh=0.6,
                          negative_classes={'ignore', 'background'}):
    """
    Create a space time-grid to sample with

    Example:
        >>> from ndsampler.coco_regions import *  # NOQA
        >>> import kwcoco
        >>> dset = kwcoco.CocoDataset.demo('shapes8')
        >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral')
        >>> window_dims = (224, 224)
        >>> sample_grid = new_image_sample_grid(dset, window_dims)
        >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2)))
        >>> # Now try to load a sample
        >>> tr = sample_grid['positives'][0]
        >>> import ndsampler
        >>> sampler = ndsampler.CocoSampler(dset)
        >>> tr['channels'] = '<all>'
        >>> tr_ = sampler._infer_target_attributes(tr)
        >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1)))
        >>> sample = sampler.load_sample(tr)
        >>> assert sample['im'].shape == (224, 224, 5)

    Ignore:
        import xdev
        globals().update(xdev.get_func_kwargs(new_image_sample_grid))
    """
    # import netharn as nh
    import kwarray
    from ndsampler import isect_indexer
    keepbound = True

    # Create a sliding window object for each specific image (because they may
    # have different sizes, technically we could memoize this)
    gid_to_slider = {}
    for img in dset.imgs.values():
        full_dims = [img['height'], img['width']]
        window_dims_ = full_dims if window_dims == 'full' else window_dims
        slider = kwarray.SlidingWindow(full_dims, window_dims_,
                                       overlap=window_overlap, keepbound=keepbound,
                                       allow_overshoot=True)
        gid_to_slider[img['id']] = slider

    _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset)

    positives = []
    negatives = []
    for gid, slider in gid_to_slider.items():

        # For each image, create a box for each spatial region in the slider
        boxes = []
        regions = list(slider)
        for region in regions:
            y_sl, x_sl = region
            boxes.append([x_sl.start,  y_sl.start, x_sl.stop, y_sl.stop])
        boxes = kwimage.Boxes(np.array(boxes), 'ltrb')

        for region, box in zip(regions, boxes):
            # Check to see what annotations this window-box overlaps with
            aids = _isect_index.overlapping_aids(gid, box)

            # Look at the categories within this region
            catnames = [
                dset.cats[dset.anns[aid]['category_id']]['name'].lower()
                for aid in aids
            ]

            if ignore_coverage_thresh:
                ignore_flags = [catname == 'ignore' for catname in catnames]
                if any(ignore_flags):
                    # If the almost the entire window is marked as ignored then
                    # just skip this window.
                    ignore_aids = list(ub.compress(aids, ignore_flags))
                    ignore_boxes = dset.annots(ignore_aids).boxes

                    # Get an upper bound on coverage to short circuit extra
                    # computation in simple cases.
                    box_area = box.area.sum()
                    coverage_ub = ignore_boxes.area.sum() / box_area
                    if coverage_ub  > ignore_coverage_thresh:
                        max_coverage = ignore_boxes.iooas(box).max()
                        if max_coverage > ignore_coverage_thresh:
                            continue
                        elif len(ignore_boxes) > 1:
                            # We have to test the complex case
                            try:
                                from shapely.ops import cascaded_union
                                ignore_shape = cascaded_union(ignore_boxes.to_shapley())
                                region_shape = box[None, :].to_shapley()[0]
                                coverage_shape = ignore_shape.intersection(region_shape)
                                real_coverage = coverage_shape.area / box_area
                                if real_coverage > ignore_coverage_thresh:
                                    continue
                            except Exception as ex:
                                import warnings
                                warnings.warn(
                                    'ignore region select had non-critical '
                                    'issue ex = {!r}'.format(ex))

            if classes_of_interest:
                # If there are CoIs then only count a region as positive if one
                # of those is in this region
                interest_flags = np.array([
                    catname in classes_of_interest for catname in catnames])
                pos_aids = list(ub.compress(aids, interest_flags))
            elif negative_classes:
                # Don't count negative classes as positives
                nonnegative_flags = np.array([
                    catname not in negative_classes for catname in catnames])
                pos_aids = list(ub.compress(aids, nonnegative_flags))
            else:
                pos_aids = aids

            # aids = sampler.regions.overlapping_aids(gid, box, visible_thresh=0.001)
            tr = {
                'gid': gid,
                'slices': region,
                'aids': aids,
            }
            if len(pos_aids):
                positives.append(tr)
            else:
                negatives.append(tr)

    print('Found {} positives'.format(len(positives)))
    print('Found {} negatives'.format(len(negatives)))
    sample_grid = {
        'positives': positives,
        'negatives': negatives,
    }
    return sample_grid
Esempio n. 27
0
def torch_nms(tlbr, scores, classes=None, thresh=.5, bias=0, fast=False):
    """
    Non maximum suppression implemented with pytorch tensors

    CURRENTLY NOT WORKING

    Args:
        tlbr (Tensor): Bounding boxes of one image in the format (tlbr)
        scores (Tensor): Scores of each box
        classes (Tensor, optional): the classes of each box. If specified nms is applied to each class separately.
        thresh (float): iou threshold

    Returns:
        ByteTensor: keep: boolean array indicating which boxes were not pruned.

    Example:
        >>> # DISABLE_DOCTEST
        >>> # xdoctest: +REQUIRES(module:torch)
        >>> import torch
        >>> import numpy as np
        >>> tlbr = torch.FloatTensor(np.array([
        >>>     [0, 0, 100, 100],
        >>>     [100, 100, 10, 10],
        >>>     [10, 10, 100, 100],
        >>>     [50, 50, 100, 100],
        >>>     [100, 100, 130, 130],
        >>>     [100, 100, 130, 130],
        >>>     [100, 100, 130, 130],
        >>> ], dtype=np.float32))
        >>> scores = torch.FloatTensor(np.array([.1, .5, .9, .1, .3, .5, .4]))
        >>> classes = torch.LongTensor(np.array([0, 0, 0, 0, 0, 0, 0]))
        >>> thresh = .5
        >>> flags = torch_nms(tlbr, scores, classes, thresh)
        >>> keep = np.nonzero(flags).view(-1)
        >>> tlbr[flags]
        >>> tlbr[keep]

    Example:
        >>> # DISABLE_DOCTEST
        >>> # xdoctest: +REQUIRES(module:torch)
        >>> import torch
        >>> import numpy as np
        >>> # Test to check that conflicts are correctly resolved
        >>> tlbr = torch.FloatTensor(np.array([
        >>>     [100, 100, 150, 101],
        >>>     [120, 100, 180, 101],
        >>>     [150, 100, 200, 101],
        >>> ], dtype=np.float32))
        >>> scores = torch.FloatTensor(np.linspace(.8, .9, len(tlbr)))
        >>> classes = None
        >>> thresh = .3
        >>> keep = torch_nms(tlbr, scores, classes, thresh, fast=False)
        >>> bboxes[keep]
    """
    if tlbr.numel() == 0:
        return []

    # Sort coordinates by descending score
    ordered_scores, order = scores.sort(0, descending=True)

    import kwimage

    boxes = kwimage.Boxes(tlbr[order], 'tlbr')
    ious = boxes.ious(boxes, bias=bias)

    # if False:
    #     x1, y1, x2, y2 = tlbr[order].split(1, 1)

    #     # Compute dx and dy between each pair of boxes (these mat contain every pair twice...)
    #     dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp_(min=0)
    #     dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp_(min=0)

    #     # Compute iou
    #     intersections = dx * dy
    #     areas = (x2 - x1) * (y2 - y1)
    #     unions = (areas + areas.t()) - intersections
    #     ious = intersections / unions

    # Filter based on iou (and class)
    # NOTE: We are using following convention:
    #     * suppress if overlap > thresh
    #     * consider if overlap <= thresh
    # This convention has the property that when thresh=0, we dont just
    # remove everything.
    if _TORCH_HAS_BOOL_COMP:
        conflicting = (ious > thresh).byte().triu(1).bool()
    else:
        # Old way
        conflicting = (ious > thresh).triu(1)

    if classes is not None:
        ordered_classes = classes[order]
        same_class = (
            ordered_classes.unsqueeze(0) == ordered_classes.unsqueeze(1))
        conflicting = (conflicting & same_class)
    # Now we have a 2D matrix where conflicting[i, j] indicates if box[i]
    # conflicts with box[j]. For each box[i] we want to only keep the first
    # one that does not conflict with any other box[j].

    # Find out how many conflicts each ordered box has with other boxes that
    # have higher scores than it does. In other words...
    # n_conflicts[i] is the number of conflicts box[i] has with other boxes
    # that have a **higher score** than box[i] does. We will definately
    # keep any box where n_conflicts is 0, but we need to postprocess because
    # we might actually keep some boxes currently marked as conflicted.
    n_conflicts = conflicting.sum(0).byte()

    if not fast:
        # It is not enought to simply use all places where there are no
        # conflicts. Say we have boxes A, B, and C, where A conflicts with B,
        # B conflicts with C but A does not conflict with C. The fact that we
        # use A should mean that C is not longer conflicted.

        if True:
            # Marginally faster. best=618.2 us
            ordered_keep = np.zeros(len(conflicting), dtype=np.uint8)
            supress = np.zeros(len(conflicting), dtype=np.bool)
            for i, row in enumerate(conflicting.cpu().numpy() > 0):
                if not supress[i]:
                    ordered_keep[i] = 1
                    supress[row] = 1
            ordered_keep = torch.ByteTensor(ordered_keep).to(tlbr.device)
        else:
            # Marginally slower: best=1.382 ms,
            n_conflicts_post = n_conflicts.cpu()
            conflicting = conflicting.cpu()

            keep_len = len(n_conflicts_post) - 1
            for i in range(1, keep_len):
                if n_conflicts_post[i] > 0:
                    n_conflicts_post -= conflicting[i]

            n_conflicts = n_conflicts_post.to(n_conflicts.device)
            ordered_keep = (n_conflicts == 0)
    else:
        # Now we can simply keep any box that has no conflicts.
        ordered_keep = (n_conflicts == 0)

    # Unsort, so keep is aligned with input boxes
    keep = ordered_keep.new(*ordered_keep.size())
    keep.scatter_(0, order, ordered_keep)
    return keep
Esempio n. 28
0
    def from_coco(KW18, coco_dset):
        import kwimage
        raw = {col: None for col in KW18.DEFAULT_COLUMNS}
        anns = coco_dset.dataset['annotations']
        boxes = kwimage.Boxes(np.array([ann['bbox'] for ann in anns]), 'xywh')
        tlbr = boxes.to_tlbr()
        cxywh = tlbr.to_cxywh()
        tl_x, tl_y, br_x, br_y = tlbr.data.T

        cx = cxywh.data[:, 0]
        cy = cxywh.data[:, 1]

        # Create track ids if not given
        track_ids = np.array([ann.get('track_id', np.nan) for ann in anns])
        missing = np.isnan(track_ids)
        valid_track_ids = track_ids[~missing]
        if len(valid_track_ids) == 0:
            next_track_id = 1
        else:
            next_track_id = valid_track_ids.max() + 1
        num_need = np.sum(missing)
        new_track_ids = np.arange(next_track_id, next_track_id + num_need)
        track_ids[missing] = new_track_ids
        track_ids = track_ids.astype(int)

        scores = np.array([ann.get('score', -1) for ann in anns])
        image_ids = np.array([ann['image_id'] for ann in anns])
        cids = np.array([ann.get('category_id', -1) for ann in anns])

        num = len(anns)

        raw['track_id'] = track_ids
        raw['track_length'] = np.full(num, fill_value=-1)
        raw['frame_number'] = image_ids

        raw['tracking_plane_loc_x'] = cx
        raw['tracking_plane_loc_y'] = cy

        raw['velocity_x'] = np.full(num, fill_value=0)
        raw['velocity_y'] = np.full(num, fill_value=0)

        raw['image_loc_x'] = cx
        raw['image_loc_y'] = cy

        raw['img_bbox_tl_x'] = tl_x
        raw['img_bbox_tl_y'] = tl_y
        raw['img_bbox_br_x'] = br_x
        raw['img_bbox_br_y'] = br_y

        raw['area'] = boxes.area.ravel()

        raw['world_loc_x'] = np.full(num, fill_value=-1)
        raw['world_loc_y'] = np.full(num, fill_value=-1)
        raw['world_loc_z'] = np.full(num, fill_value=-1)

        raw['timestamp'] = np.full(num, fill_value=-1)

        raw['confidence'] = scores
        raw['object_type_id'] = cids

        raw = {k: v for k, v in raw.items() if v is not None}

        track_ids, groupxs = kwarray.group_indices(raw['track_id'])
        for groupx in groupxs:
            raw['track_length'][groupx] = len(groupx)

        self = KW18(raw)
        return self
Esempio n. 29
0
    def draw_batch(harn,
                   batch,
                   outputs,
                   batch_dets,
                   idx=None,
                   thresh=None,
                   orig_img=None,
                   num_extra=3):
        """
        Returns:
            np.ndarray: numpy image

        Example:
            >>> # DISABLE_DOCTSET
            >>> harn = setup_harn(bsize=1, datasets='special:voc', pretrained='lightnet')
            >>> harn.initialize()
            >>> batch = harn._demo_batch(0, 'train')

            >>> outputs, loss = harn.run_batch(batch)
            >>> batch_dets = harn.raw_model.coder.decode_batch(outputs)

            >>> stacked = harn.draw_batch(batch, outputs, batch_dets)

            >>> # xdoc: +REQUIRES(--show)
            >>> kwplot.autompl()  # xdoc: +SKIP
            >>> kwplot.imshow(stacked)
            >>> kwplot.show_if_requested()
        """
        import cv2
        inputs = batch['im']
        labels = batch['label']
        orig_sizes = labels['orig_sizes']

        classes = harn.datasets['train'].sampler.classes

        if idx is None:
            idxs = range(len(inputs))
        else:
            idxs = [idx]

        imgs = []
        for idx in idxs:
            chw01 = inputs[idx]
            pred_dets = batch_dets[idx]
            # pred_dets.meta['classes'] = classes

            import kwimage
            true_dets = kwimage.Detections(
                boxes=kwimage.Boxes(labels['cxywh'][idx], 'cxywh'),
                class_idxs=labels['class_idxs'][idx].view(-1),
                weights=labels['weight'][idx],
                classes=classes,
            )

            pred_dets = pred_dets.numpy()
            true_dets = true_dets.numpy()

            true_dets = true_dets.compress(true_dets.class_idxs != -1)

            if thresh is not None:
                pred_dets = pred_dets.compress(pred_dets.scores > thresh)

            # only show so many predictions
            num_max = len(true_dets) + num_extra
            sortx = pred_dets.argsort(reverse=True)
            pred_dets = pred_dets.take(sortx[0:num_max])

            hwc01 = chw01.cpu().numpy().transpose(1, 2, 0)
            inp_size = np.array(hwc01.shape[0:2][::-1])

            true_dets.boxes.scale(inp_size, inplace=True)
            pred_dets.boxes.scale(inp_size, inplace=True)

            letterbox = harn.datasets[harn.current_tag].letterbox
            orig_size = orig_sizes[idx].cpu().numpy()
            target_size = inp_size
            img = letterbox._img_letterbox_invert(hwc01, orig_size,
                                                  target_size)
            img = np.clip(img, 0, 1)
            # we are given the original image, to avoid artifacts from
            # inverting a downscale
            assert orig_img is None or orig_img.shape == img.shape

            true_dets.data['boxes'] = letterbox._boxes_letterbox_invert(
                true_dets.boxes, orig_size, target_size)
            pred_dets.data['boxes'] = letterbox._boxes_letterbox_invert(
                pred_dets.boxes, orig_size, target_size)

            # shift, scale, embed_size = letterbox._letterbox_transform(orig_size, target_size)
            # fig = kwplot.figure(doclf=True, fnum=1)
            # kwplot.imshow(img, colorspace='rgb')
            canvas = (img * 255).astype(np.uint8)
            canvas = true_dets.draw_on(canvas, color='green')
            canvas = pred_dets.draw_on(canvas, color='blue')

            canvas = cv2.resize(canvas, (300, 300))
            imgs.append(canvas)

        stacked = imgs[0] if len(imgs) == 1 else kwimage.stack_images_grid(
            imgs)
        return stacked
Esempio n. 30
0
def warp_affine(image,
                transform,
                dsize=None,
                antialias=False,
                interpolation='linear'):
    """
    Applies an affine transformation to an image with optional antialiasing.

    Args:
        image (ndarray): the input image

        transform (ndarray | Affine): a coercable affine matrix

        dsize (Tuple[int, int] | None | str):
            width and height of the resulting image. If "auto", it is computed
            such that the positive coordinates of the warped image will fit in
            the new canvas. If None, then the image size will not change.

        antialias (bool, default=False):
            if True determines if the transform is downsampling and applies
            antialiasing via gaussian a blur.

        interpolation (str):
            interpolation code or cv2 integer. Interpolation codes are linear,
            nearest, cubic, lancsoz, and area.

    Example:
        >>> from kwimage.im_cv2 import *  # NOQA
        >>> import kwimage
        >>> from kwimage.transform import Affine
        >>> image = kwimage.grab_test_image('astro')
        >>> #image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale(0.05)
        >>> transform = Affine.scale(0.02)
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest')
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()

    Example:
        >>> from kwimage.im_cv2 import *  # NOQA
        >>> import kwimage
        >>> from kwimage.transform import Affine
        >>> image = kwimage.grab_test_image('astro')
        >>> image = kwimage.grab_test_image('checkerboard')
        >>> transform = Affine.random() @ Affine.scale((.1, 1.2))
        >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1)
        >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0)
        >>> # xdoctest: +REQUIRES(--show)
        >>> import kwplot
        >>> kwplot.autompl()
        >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2)
        >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True')
        >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False')
        >>> kwplot.show_if_requested()
    """
    from kwimage import im_cv2
    from kwimage.transform import Affine
    import kwimage
    transform = Affine.coerce(transform)
    flags = im_cv2._coerce_interpolation(interpolation)

    # TODO: expose these params
    # borderMode = cv2.BORDER_DEFAULT
    # borderMode = cv2.BORDER_CONSTANT
    borderMode = None
    borderValue = None
    """
    Variations that could change in the future:

        * In _gauss_params I'm not sure if we want to compute integer or
            fractional "number of downsamples".

        * The fudge factor bothers me, but seems necessary
    """

    if dsize is None:
        dsize = tuple(image.shape[0:2][::-1])
    elif dsize == 'auto':
        h, w = image.shape[0:2]
        boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh')
        poly = boxes.to_polygons()[0]
        warped_poly = poly.warp(transform.matrix)
        warped_box = warped_poly.to_boxes().to_ltrb().quantize()
        dsize = tuple(map(int, warped_box.data[0, 2:4]))

    if not antialias:
        M = np.asarray(transform)
        result = cv2.warpAffine(image,
                                M[0:2],
                                dsize=dsize,
                                flags=flags,
                                borderMode=borderMode,
                                borderValue=borderValue)
    else:
        # Decompose the affine matrix into its 6 core parameters
        params = transform.decompose()
        sx, sy = params['scale']

        if sx >= 1 and sy > 1:
            # No downsampling detected, no need to antialias
            M = np.asarray(transform)
            result = cv2.warpAffine(image,
                                    M[0:2],
                                    dsize=dsize,
                                    flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)
        else:
            # At least one dimension is downsampled

            # Compute the transform with all scaling removed
            noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'}))

            # Execute part of the downscale with iterative pyramid downs
            downscaled, residual_sx, residual_sy = _prepare_downscale(
                image, sx, sy)

            # Compute the transform from the downsampled image to the destination
            rest_warp = noscale_warp @ Affine.scale((residual_sx, residual_sy))

            result = cv2.warpAffine(downscaled,
                                    rest_warp.matrix[0:2],
                                    dsize=dsize,
                                    flags=flags,
                                    borderMode=borderMode,
                                    borderValue=borderValue)

    return result