def _labels_to_true_dets(harn, inp_size, labels, _aidbase=1, undo_lb=True): """ Convert batch groundtruth to coco-style annotations for scoring """ indices = labels['indices'] orig_sizes = labels['orig_sizes'] targets = labels['cxywh'] gt_weights = labels['gt_weights'] letterbox = harn.datasets[harn.current_tag].letterbox # On the training set, we need to add truth due to augmentation bsize = len(indices) for ix in range(bsize): target = targets[ix].view(-1, 5) import kwimage true_det = kwimage.Detections( boxes=kwimage.Boxes(target[:, 1:5].float(), 'cxywh'), class_idxs=target[:, 0].long(), weights=gt_weights[ix], ) true_det = true_det.numpy() flags = true_det.class_idxs != -1 true_det = true_det.compress(flags) if undo_lb: orig_size = orig_sizes[ix].cpu().numpy() true_det.data['boxes'] = letterbox._boxes_letterbox_invert( true_det.boxes, orig_size, inp_size) true_det.data['aids'] = np.arange(_aidbase, _aidbase + len(true_det)) gx = int(indices[ix].data.cpu().numpy()) # if util.IS_PROFILING: # torch.cuda.synchronize() yield gx, true_det
def overlapping_aids(self, gid, region, visible_thresh=0.0): """ Finds the other annotations in this image that overlap a region Args: gid (int): image id region (kwimage.Boxes): bounding box visible_thresh (float): does not return annotations with visibility less than this threshold. Returns: List[int]: annotation ids """ overlap_aids = self.isect_index.overlapping_aids(gid, region) if visible_thresh > 0 and len(overlap_aids) > 0: # Get info about all annotations inside this window if 0: overlap_annots = self.dset.annots(overlap_aids) abs_boxes = overlap_annots.boxes else: overlap_anns = [self.dset.anns[aid] for aid in overlap_aids] abs_boxes = kwimage.Boxes( [ann['bbox'] for ann in overlap_anns], 'xywh') # Remove annotations that are not mostly invisible if len(abs_boxes) > 0: eps = 1e-6 isect_area = region[None, :].isect_area(abs_boxes)[0] other_area = abs_boxes.area.T[0] visibility = isect_area / (other_area + eps) is_visible = visibility > visible_thresh abs_boxes = abs_boxes[is_visible] overlap_aids = list(it.compress(overlap_aids, is_visible)) # overlap_annots = self.dset.annots(overlap_aids) return overlap_aids
def to_boxes(self): """ Return the bounding box of the multi polygon Returns: kwimage.Boxes: Example: >>> from kwimage.structs.polygon import * # NOQA >>> self = MultiPolygon.random(rng=0, n=10) >>> boxes = self.to_boxes() >>> sub_boxes = [d.to_boxes() for d in self.data] >>> areas1 = np.array([s.intersection(boxes).area[0] for s in sub_boxes]) >>> areas2 = np.array([s.area[0] for s in sub_boxes]) >>> assert np.allclose(areas1, areas2) """ import kwimage tl = np.array([np.inf, np.inf]) br = np.array([-np.inf, -np.inf]) for data in self.data: xys = data.data['exterior'].data tl = np.minimum(tl, xys.min(axis=0)) br = np.maximum(br, xys.max(axis=0)) tlbr = np.hstack([tl, br])[None, :] boxes = kwimage.Boxes(tlbr, 'tlbr') return boxes
def to_boxes(self): import kwimage xys = self.data['exterior'].data tl = xys.min(axis=0) br = xys.max(axis=0) tlbr = np.hstack([tl, br])[None, :] boxes = kwimage.Boxes(tlbr, 'tlbr') return boxes
def _debug_index(self): from shapely.ops import cascaded_union def _to_shapely(boxes): from shapely.geometry import Polygon from kwimage.structs.boxes import _cat x1, y1, x2, y2 = boxes.to_tlbr(copy=False).components a = _cat([x1, y1]).tolist() b = _cat([x1, y2]).tolist() c = _cat([x2, y2]).tolist() d = _cat([x2, y1]).tolist() polygons = [Polygon(points) for points in zip(a, b, c, d, a)] return polygons for gid, qtree in self.qtrees.items(): boxes = kwimage.Boxes(np.array(list(qtree.aid_to_tlbr.values())), 'tlbr') polygons = _to_shapely(boxes) bounds = kwimage.Boxes([[0, 0, qtree.width, qtree.height]], 'tlbr') bounds = _to_shapely(bounds)[0] merged_polygon = cascaded_union(polygons) uncovered = (bounds - merged_polygon) print('uncovered.area = {!r}'.format(uncovered.area)) # plot these two polygons separately if 1: from descartes import PolygonPatch from matplotlib import pyplot as plt import kwplot kwplot.autompl() fig = plt.figure(gid) ax = fig.add_subplot(111) ax.cla() # ax.add_patch( # PolygonPatch(bounds, alpha=0.5, zorder=2, fc='blue') # ) # ax.add_patch( # PolygonPatch(merged_polygon, alpha=0.5, zorder=2, fc='red') # ) ax.add_patch( PolygonPatch(uncovered, alpha=0.5, zorder=2, fc='green')) ax.set_xlim(0, qtree.width) ax.set_ylim(0, qtree.height) ax.set_aspect(1)
def select_positive_regions(targets, window_dims=(300, 300), thresh=0.0, rng=None, verbose=0): """ Reduce positive example redundency by selecting disparate positive samples Example: >>> from ndsampler.coco_regions import * >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> targets = tabular_coco_targets(dset) >>> window_dims = (300, 300) >>> selected = select_positive_regions(targets, window_dims) >>> print(len(selected)) >>> print(len(dset.anns)) """ unique_gids, groupxs = kwarray.group_indices(targets['gid']) gid_to_groupx = dict(zip(unique_gids, groupxs)) wh, ww = window_dims rng = kwarray.ensure_rng(rng) selection = [] # Get all the bounding boxes cxs, cys = ub.take(targets, ['cx', 'cy']) n = len(targets) cxs = cxs.astype(np.float32) cys = cys.astype(np.float32) wws = np.full(n, ww, dtype=np.float32) whs = np.full(n, wh, dtype=np.float32) cxywh = np.hstack([a[:, None] for a in [cxs, cys, wws, whs]]) boxes = kwimage.Boxes(cxywh, 'cxywh').to_tlbr() iter_ = ub.ProgIter(gid_to_groupx.items(), enabled=verbose, label='select positive regions', total=len(gid_to_groupx), adjust=0, freq=32) for gid, groupx in iter_: # Select all candiate windows in this image cand_windows = boxes.take(groupx, axis=0) # Randomize which candidate windows have the highest scores so the # selection can vary each epoch. cand_scores = rng.rand(len(cand_windows)) cand_dets = kwimage.Detections(boxes=cand_windows, scores=cand_scores) # Non-max supresssion is really similar to set-cover keep = cand_dets.non_max_supression(thresh=thresh) selection.extend(groupx[keep]) selection = np.array(sorted(selection)) return selection
def draw_boxes_on_image(img, boxes, color='blue', thickness=1, box_format=None, colorspace='rgb'): """ Draws boxes on an image. Args: img (ndarray): image to copy and draw on boxes (nh.util.Boxes): boxes to draw colorspace (str): string code of the input image colorspace Example: >>> import kwimage >>> import numpy as np >>> img = np.zeros((10, 10, 3), dtype=np.uint8) >>> color = 'dodgerblue' >>> thickness = 1 >>> boxes = kwimage.Boxes([[1, 1, 8, 8]], 'tlbr') >>> img2 = draw_boxes_on_image(img, boxes, color, thickness) >>> assert tuple(img2[1, 1]) == (30, 144, 255) >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() # xdoc: +SKIP >>> kwplot.figure(doclf=True, fnum=1) >>> kwplot.imshow(img2) """ import kwimage import cv2 if not isinstance(boxes, kwimage.Boxes): if box_format is None: raise ValueError('specify box_format') boxes = kwimage.Boxes(boxes, box_format) color = kwimage.Color(color)._forimage(img, colorspace) tlbr = boxes.to_tlbr().data img2 = img.copy() for x1, y1, x2, y2 in tlbr: # pt1 = (int(round(x1)), int(round(y1))) # pt2 = (int(round(x2)), int(round(y2))) pt1 = (int(x1), int(y1)) pt2 = (int(x2), int(y2)) # Note cv2.rectangle does work inplace img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness) return img2
def boxes(self): """ Get the column of kwimage-style bounding boxes Example: >>> import kwcoco >>> self = kwcoco.CocoDataset.demo().annots([1, 2, 11]) >>> print(self.boxes) <Boxes(xywh, array([[ 10, 10, 360, 490], [350, 5, 130, 290], [124, 96, 45, 18]]))> """ import kwimage xywh = self.lookup('bbox') boxes = kwimage.Boxes(xywh, 'xywh') return boxes
def _kwiver_to_kwimage_detections(detected_objects): """ Convert vital detected object sets to kwimage.Detections Args: detected_objects (kwiver.vital.types.DetectedObjectSet) Returns: kwimage.Detections """ import ubelt as ub import kwimage boxes = [] scores = [] class_idxs = [] classes = [] if len(detected_objects) > 0: obj = ub.peek(detected_objects) classes = obj.type().all_class_names() for obj in detected_objects: box = obj.bounding_box() tlbr = [box.min_x(), box.min_y(), box.max_x(), box.max_y()] score = obj.confidence() cname = obj.type().get_most_likely_class() cidx = classes.index(cname) boxes.append(tlbr) scores.append(score) class_idxs.append(cidx) dets = kwimage.Detections( boxes=kwimage.Boxes(np.array(boxes), 'tlbr'), scores=np.array(scores), class_idxs=np.array(class_idxs), classes=classes, ) return dets
def __init__(self, num_classes, anchors, coord_scale=1.0, noobject_scale=1.0, object_scale=5.0, class_scale=1.0, thresh=0.6, seen_thresh=12800, small_boxes=False, mse_factor=0.5): import kwimage super(RegionLoss, self).__init__() self.num_classes = num_classes self.seen_thresh = seen_thresh self.anchors = torch.Tensor(anchors) self.num_anchors = len(anchors) self.coord_scale = coord_scale self.noobject_scale = noobject_scale self.object_scale = object_scale self.class_scale = class_scale self.thresh = thresh self.loss_coord = None self.loss_conf = None self.loss_cls = None self.loss_tot = None self.coord_mse = nn.MSELoss(reduction='sum') self.conf_mse = nn.MSELoss(reduction='sum') self.cls_critrion = nn.CrossEntropyLoss(reduction='sum') # Precompute relative anchors in tlbr format for iou computation rel_anchors_cxywh = torch.cat([torch.zeros_like(self.anchors), self.anchors], 1) self.rel_anchors_boxes = kwimage.Boxes(rel_anchors_cxywh, 'cxywh') self.small_boxes = small_boxes self.mse_factor = mse_factor
def iooas(self, gid, box): """ Intersection over other's area Args: gid (int): an image id box (kwimage.Boxes): the specified region Like iou, but non-symetric, returned number is a percentage of the other's (groundtruth) area. This means we dont care how big the (negative) `box` is. """ boxes1 = box[None, :] if len(box.shape) == 1 else box isect_aids = self.overlapping_aids(gid, box) if len(isect_aids): boxes2 = [self.qtrees[gid].aid_to_tlbr[aid] for aid in isect_aids] boxes2 = kwimage.Boxes(np.array(boxes2), 'tlbr') isect = boxes1.isect_area(boxes2) denom = boxes2.area.T eps = 1e-6 iomas = isect / (denom[0] + eps) else: iomas = np.empty(0) return isect_aids, iomas
def ious(self, gid, box): """ Find overlaping annotations in a specific image and their intersection over union with a a query box. Args: gid (int): an image id box (kwimage.Boxes): the specified region Returns: Tuple[List[int], ndarray]: isect_aids: list of annotation ids ious: jaccard score for each returned annotation id """ boxes1 = box[None, :] if len(box.shape) == 1 else box isect_aids = self.overlapping_aids(gid, box) if len(isect_aids): boxes1 = box[None, :] boxes2 = [self.qtrees[gid].aid_to_tlbr[aid] for aid in isect_aids] boxes2 = kwimage.Boxes(np.array(boxes2), 'tlbr') ious = boxes1.ious(boxes2)[0] else: ious = np.empty(0) return isect_aids, ious
def _build_index(dset, verbose=0): """ """ if verbose: print('Building isect index') qtrees = { img['id']: pyqtree.Index((0, 0, img['width'], img['height'])) for img in ub.ProgIter( dset.dataset['images'], desc='init qtrees', verbose=verbose) } for qtree in qtrees.values(): qtree.aid_to_tlbr = {} # Add extra index to track boxes for ann in ub.ProgIter(dset.dataset['annotations'], desc='populate qtrees', verbose=verbose): bbox = ann.get('bbox', None) if bbox is not None: aid = ann['id'] qtree = qtrees[ann['image_id']] xywh_box = kwimage.Boxes(bbox, 'xywh') tlbr_box = xywh_box.to_tlbr().data qtree.insert(aid, tlbr_box) qtree.aid_to_tlbr[aid] = tlbr_box return qtrees
def tabular_coco_targets(dset): """ Transforms COCO box annotations into a tabular form _ = xdev.profile_now(tabular_coco_targets)(dset) """ import warnings # TODO: better handling of non-bounding box annotations; ignore for now if hasattr(dset, 'tabular_targets'): # In the SQL case, we can write a single query that # builds the table more efficiently. return dset.tabular_targets() img_items = list(dset.imgs.items()) gid_to_width = {gid: img['width'] for gid, img in img_items} gid_to_height = {gid: img['height'] for gid, img in img_items} try: anns = dset.dataset['annotations'] if not isinstance(anns, list): anns = list(anns) xywh = [ann['bbox'] for ann in anns] xywh = np.array(xywh, dtype=np.float32) except Exception: has_bbox = [ann.get('bbox', None) is not None for ann in anns] if not all(has_bbox): n_missing = len(has_bbox) - sum(has_bbox) warnings.warn('CocoDataset is missing boxes ' 'for {} annotations'.format(n_missing)) anns = list(ub.compress(anns, has_bbox)) xywh = [ann['bbox'] for ann in anns] xywh = np.array(xywh, dtype=np.float32) boxes = kwimage.Boxes(xywh, 'xywh') cxywhs = boxes.to_cxywh().data.reshape(-1, 4) aids = [ann['id'] for ann in anns] gids = [ann['image_id'] for ann in anns] cids = [ann['category_id'] for ann in anns] img_width = [gid_to_width[gid] for gid in gids] img_height = [gid_to_height[gid] for gid in gids] aids = np.array(aids, dtype=np.int32) gids = np.array(gids, dtype=np.int32) cids = np.array(cids, dtype=np.int32) table = { # Annotation / Image / Category ids 'aid': aids, 'gid': gids, 'category_id': cids, # Subpixel box localizations wrt parent image 'cx': cxywhs.T[0], 'cy': cxywhs.T[1], 'width': cxywhs.T[2], 'height': cxywhs.T[3], } # Parent image id and width / height table['img_width'] = np.array(img_width, dtype=np.int32) table['img_height'] = np.array(img_height, dtype=np.int32) # table = ub.map_vals(np.asarray, table) targets = kwarray.DataFrameArray(table) return targets
def _devcheck_corner(): self = DelayedWarp.random(rng=0) print(self.nesting()) region_slices = (slice(40, 90), slice(20, 62)) region_box = kwimage.Boxes.from_slice(region_slices, shape=self.shape) region_bounds = region_box.to_polygons()[0] for leaf in self._optimize_paths(): pass tf_leaf_to_root = leaf['transform'] tf_root_to_leaf = np.linalg.inv(tf_leaf_to_root) leaf_region_bounds = region_bounds.warp(tf_root_to_leaf) leaf_region_box = leaf_region_bounds.bounding_box().to_ltrb() leaf_crop_box = leaf_region_box.quantize() lt_x, lt_y, rb_x, rb_y = leaf_crop_box.data[0, 0:4] root_crop_corners = leaf_crop_box.to_polygons()[0].warp(tf_leaf_to_root) # leaf_crop_slices = (slice(lt_y, rb_y), slice(lt_x, rb_x)) crop_offset = leaf_crop_box.data[0, 0:2] corner_offset = leaf_region_box.data[0, 0:2] offset_xy = crop_offset - corner_offset tf_root_to_leaf # NOTE: # Cropping applies a translation in whatever space we do it in # We need to save the bounds of the crop. # But now we need to adjust the transform so it points to the # cropped-leaf-space not just the leaf-space, so we invert the implicit # crop tf_crop_to_leaf = Affine.affine(offset=crop_offset) # tf_newroot_to_root = Affine.affine(offset=region_box.data[0, 0:2]) tf_root_to_newroot = Affine.affine(offset=region_box.data[0, 0:2]).inv() tf_crop_to_leaf = Affine.affine(offset=crop_offset) tf_crop_to_newroot = tf_root_to_newroot @ tf_leaf_to_root @ tf_crop_to_leaf tf_newroot_to_crop = tf_crop_to_newroot.inv() # tf_leaf_to_crop # tf_corner_offset = Affine.affine(offset=offset_xy) subpixel_offset = Affine.affine(offset=offset_xy).matrix tf_crop_to_leaf = subpixel_offset # tf_crop_to_root = tf_leaf_to_root @ tf_crop_to_leaf # tf_root_to_crop = np.linalg.inv(tf_crop_to_root) if 1: import kwplot kwplot.autoplt() lw, lh = leaf['sub_data_shape'][0:2] leaf_box = kwimage.Boxes([[0, 0, lw, lh]], 'xywh') root_box = kwimage.Boxes([[0, 0, self.dsize[0], self.dsize[1]]], 'xywh') ax1 = kwplot.figure(fnum=1, pnum=(2, 2, 1), doclf=1).gca() ax2 = kwplot.figure(fnum=1, pnum=(2, 2, 2)).gca() ax3 = kwplot.figure(fnum=1, pnum=(2, 2, 3)).gca() ax4 = kwplot.figure(fnum=1, pnum=(2, 2, 4)).gca() root_box.draw(setlim=True, ax=ax1) leaf_box.draw(setlim=True, ax=ax2) region_bounds.draw(ax=ax1, color='green', alpha=.4) leaf_region_bounds.draw(ax=ax2, color='green', alpha=.4) leaf_crop_box.draw(ax=ax2, color='purple') root_crop_corners.draw(ax=ax1, color='purple', alpha=.4) new_w = region_box.to_xywh().data[0, 2] new_h = region_box.to_xywh().data[0, 3] ax3.set_xlim(0, new_w) ax3.set_ylim(0, new_h) crop_w = leaf_crop_box.to_xywh().data[0, 2] crop_h = leaf_crop_box.to_xywh().data[0, 3] ax4.set_xlim(0, crop_w) ax4.set_ylim(0, crop_h) pts3_ = kwimage.Points.random(3).scale((new_w, new_h)) pts3 = kwimage.Points( xy=np.vstack([[[0, 0], [5, 5], [0, 49], [40, 45]], pts3_.xy])) pts4 = pts3.warp(tf_newroot_to_crop.matrix) pts3.draw(ax=ax3) pts4.draw(ax=ax4)
def warp_image_test(image, transform, dsize=None): """ from kwimage.transform import Affine import kwimage image = kwimage.grab_test_image('checkerboard', dsize=(2048, 2048)).astype(np.float32) image = kwimage.grab_test_image('astro', dsize=(2048, 2048)) transform = Affine.random() @ Affine.scale(0.01) """ from kwimage.transform import Affine import kwimage import numpy as np import ubelt as ub # Choose a random affine transform that probably has a small scale # transform = Affine.random() @ Affine.scale((0.3, 2)) # transform = Affine.scale((0.1, 1.2)) # transform = Affine.scale(0.05) transform = Affine.random() @ Affine.scale(0.01) # transform = Affine.random() image = kwimage.grab_test_image('astro') image = kwimage.grab_test_image('checkerboard') image = kwimage.ensure_float01(image) from kwimage import im_cv2 import kwarray import cv2 transform = Affine.coerce(transform) if 1 or dsize is None: h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) import timerit ti = timerit.Timerit(10, bestof=3, verbose=2) def _full_gauss_kernel(k0, sigma0, scale): num_downscales = np.log2(1 / scale) if num_downscales < 0: return 1, 0 # Define b0 = kernel size for one downsample operation b0 = 5 # Define sigma0 = sigma for one downsample operation sigma0 = 1 # The kernel size and sigma doubles for each 2x downsample k = int(np.ceil(b0 * (2 ** (num_downscales - 1)))) sigma = sigma0 * (2 ** (num_downscales - 1)) if k % 2 == 0: k += 1 return k, sigma def pyrDownK(a, k=1): assert k >= 0 for _ in range(k): a = cv2.pyrDown(a) return a for timer in ti.reset('naive'): with timer: interpolation = 'nearest' flags = im_cv2._coerce_interpolation(interpolation) final_v5 = cv2.warpAffine(image, transform.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 1 # for timer in ti.reset('resize+warp'): with timer: params = transform.decompose() sx, sy = params['scale'] noscale_params = ub.dict_diff(params, {'scale'}) noscale_warp = Affine.affine(**noscale_params) h, w = image.shape[0:2] resize_dsize = (int(np.ceil(sx * w)), int(np.ceil(sy * h))) downsampled = cv2.resize(image, dsize=resize_dsize, fx=sx, fy=sy, interpolation=cv2.INTER_AREA) interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v1 = cv2.warpAffine(downsampled, noscale_warp.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 2 for timer in ti.reset('fullblur+warp'): with timer: k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=sx) k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=sy) image_ = image.copy() image_ = cv2.GaussianBlur(image_, (k_x, k_y), sigma_x, sigma_y) image_ = kwarray.atleast_nd(image_, 3) # image_ = image_.clip(0, 1) interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v2 = cv2.warpAffine(image_, transform.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 3 for timer in ti.reset('pyrDown+blur+warp'): with timer: temp = image.copy() params = transform.decompose() sx, sy = params['scale'] biggest_scale = max(sx, sy) # The -2 allows the gaussian to be a little bigger. This # seems to help with border effects at only a small runtime cost num_downscales = max(int(np.log2(1 / biggest_scale)) - 2, 0) pyr_scale = 1 / (2 ** num_downscales) # Does the gaussian downsampling temp = pyrDownK(image, num_downscales) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale partial_scale = Affine.scale((rest_sx, rest_sy)) rest_warp = noscale_warp @ partial_scale k_x, sigma_x = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sx) k_y, sigma_y = _full_gauss_kernel(k0=5, sigma0=1, scale=rest_sy) temp = cv2.GaussianBlur(temp, (k_x, k_y), sigma_x, sigma_y) temp = kwarray.atleast_nd(temp, 3) interpolation = 'cubic' flags = im_cv2._coerce_interpolation(interpolation) final_v3 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags) # -------------------- # METHOD 4 - dont do the final blur for timer in ti.reset('pyrDown+warp'): with timer: temp = image.copy() params = transform.decompose() sx, sy = params['scale'] biggest_scale = max(sx, sy) num_downscales = max(int(np.log2(1 / biggest_scale)), 0) pyr_scale = 1 / (2 ** num_downscales) # Does the gaussian downsampling temp = pyrDownK(image, num_downscales) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale partial_scale = Affine.scale((rest_sx, rest_sy)) rest_warp = noscale_warp @ partial_scale interpolation = 'linear' flags = im_cv2._coerce_interpolation(interpolation) final_v4 = cv2.warpAffine(temp, rest_warp.matrix[0:2], dsize=dsize, flags=flags) if 1: def get_title(key): from ubelt.timerit import _choose_unit value = ti.measures['mean'][key] suffix, mag = _choose_unit(value) unit_val = value / mag return key + ' ' + ub.repr2(unit_val, precision=2) + ' ' + suffix final_v2 = final_v2.clip(0, 1) final_v1 = final_v1.clip(0, 1) final_v3 = final_v3.clip(0, 1) final_v4 = final_v4.clip(0, 1) final_v5 = final_v5.clip(0, 1) import kwplot kwplot.autompl() kwplot.imshow(final_v5, pnum=(1, 5, 1), title=get_title('naive')) kwplot.imshow(final_v2, pnum=(1, 5, 2), title=get_title('fullblur+warp')) kwplot.imshow(final_v1, pnum=(1, 5, 3), title=get_title('resize+warp')) kwplot.imshow(final_v3, pnum=(1, 5, 4), title=get_title('pyrDown+blur+warp')) kwplot.imshow(final_v4, pnum=(1, 5, 5), title=get_title('pyrDown+warp'))
def main(**kw): """ CommandLine: python $HOME/code/bioharn/dev/kwcoco_to_viame_csv.py \ --src /data/public/Aerial/US_ALASKA_MML_SEALION/2007/sealions_2007_v9.kwcoco.json \ --dst /data/public/Aerial/US_ALASKA_MML_SEALION/2007/sealions_2007_v9.viame.csv """ config = ConvertConfig(default=kw, cmdline=True) import kwcoco import kwimage import ubelt as ub coco_dset = kwcoco.CocoDataset(config['src']) csv_rows = [] for gid, img in ub.ProgIter(coco_dset.imgs.items(), total=coco_dset.n_images): gname = img['file_name'] aids = coco_dset.gid_to_aids[gid] frame_index = img.get('frame_index', 0) # vidid = img.get('video_id', None) for aid in aids: ann = coco_dset.anns[aid] cat = coco_dset.cats[ann['category_id']] catname = cat['name'] # just use annotation id if no tracks tid = ann.get('track_id', aid) # tracked_aids = tid_to_aids.get(tid, [aid]) # track_len = len(tracked_aids) tl_x, tl_y, br_x, br_y = kwimage.Boxes([ann['bbox']], 'xywh').toformat('tlbr').data[0].tolist() score = ann.get('score', 1) row = [ tid, # 1 - Detection or Track Unique ID gname, # 2 - Video or Image String Identifier frame_index, # 3 - Unique Frame Integer Identifier round(tl_x, 3), # 4 - TL-x (top left of the image is the origin: 0,0 round(tl_y, 3), # 5 - TL-y round(br_x, 3), # 6 - BR-x round(br_y, 3), # 7 - BR-y score, # 8 - Auxiliary Confidence (how likely is this actually an object) -1, # 9 - Target Length catname, # 10+ - category name score, # 11+ - category score ] # Optional fields for kp in ann.get('keypoints', []): if 'keypoint_category_id' in kp: cname = coco_dset._resolve_to_kpcat(kp['keypoint_category_id'])['name'] elif 'category_name' in kp: cname = kp['category_name'] elif 'category' in kp: cname = kp['category'] else: raise Exception(str(kp)) kp_x, kp_y = kp['xy'] row.append('(kp) {} {} {}'.format( cname, round(kp_x, 3), round(kp_y, 3))) note_fields = [ 'box_source', 'changelog', 'color', ] for note_key in note_fields: if note_key in ann: row.append('(note) {}: {}'.format(note_key, repr(ann[note_key]).replace(',', '<comma>'))) row = list(map(str, row)) for item in row: if ',' in row: print('BAD row = {!r}'.format(row)) raise Exception('comma is in a row field') row_str = ','.join(row) csv_rows.append(row_str) csv_text = '\n'.join(csv_rows) dst_fpath = config['dst'] print('dst_fpath = {!r}'.format(dst_fpath)) with open(dst_fpath, 'w') as file: file.write(csv_text)
def convert_camvid_raw_to_coco(camvid_raw_info): """ Converts the raw camvid format to an MSCOCO based format, ( which lets use use kwcoco's COCO backend). Example: >>> # xdoctest: +REQUIRES(--download) >>> camvid_raw_info = grab_raw_camvid() >>> # test with a reduced set of data >>> del camvid_raw_info['img_paths'][2:] >>> del camvid_raw_info['mask_paths'][2:] >>> dset = convert_camvid_raw_to_coco(camvid_raw_info) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> plt = kwplot.autoplt() >>> kwplot.figure(fnum=1, pnum=(1, 2, 1)) >>> dset.show_image(gid=1) >>> kwplot.figure(fnum=1, pnum=(1, 2, 2)) >>> dset.show_image(gid=2) """ import re import kwimage import kwcoco print('Converting CamVid to MS-COCO format') dset_root, img_paths, label_path, mask_paths = ub.take( camvid_raw_info, 'dset_root, img_paths, label_path, mask_paths'.split(', ')) img_infos = { 'img_fname': img_paths, 'mask_fname': mask_paths, } keys = list(img_infos.keys()) next_vals = list(zip(*img_infos.values())) image_items = [{k: v for k, v in zip(keys, vals)} for vals in next_vals] dataset = { 'img_root': dset_root, 'images': [], 'categories': [], 'annotations': [], } lines = ub.readfrom(label_path).split('\n') lines = [line for line in lines if line] for line in lines: color_text, name = re.split('\t+', line) r, g, b = map(int, color_text.split(' ')) color = (r, g, b) # Parse the special camvid format cid = (r << 16) + (g << 8) + (b << 0) cat = { 'id': cid, 'name': name, 'color': color, } dataset['categories'].append(cat) for gid, img_item in enumerate(image_items, start=1): img = { 'id': gid, 'file_name': img_item['img_fname'], # nonstandard image field 'segmentation': img_item['mask_fname'], } dataset['images'].append(img) dset = kwcoco.CocoDataset(dataset) dset.rename_categories({'Void': 'background'}) assert dset.name_to_cat['background']['id'] == 0 dset.name_to_cat['background'].setdefault('alias', []).append('Void') if False: _define_camvid_class_hierarcy(dset) if 1: # TODO: Binarize CCs (and efficiently encode if possible) import numpy as np bad_info = [] once = False # Add images dset.remove_annotations(list(dset.index.anns.keys())) for gid, img in ub.ProgIter(dset.imgs.items(), desc='parse label masks'): mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cids = set(np.unique(cid_mask)) - {0} for cid in cids: if cid not in dset.cats: if gid == 618: # Handle a known issue with image 618 c_mask = (cid == cid_mask).astype(np.uint8) total_bad = c_mask.sum() if total_bad < 32: if not once: print( 'gid 618 has a few known bad pixels, ignoring them' ) once = True continue else: raise Exception('more bad pixels than expected') else: raise Exception( 'UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_rgb = cid_to_rgb(cid) # print('bad_rgb = {!r}'.format(bad_rgb)) # print('WARNING UNKNOWN cid = {!r} in gid={!r}'.format(cid, gid)) # bad_info.append({ # 'gid': gid, # 'cid': cid, # }) else: ann = { 'category_id': cid, 'image_id': gid # 'segmentation': mask.to_coco() } assert cid in dset.cats c_mask = (cid == cid_mask).astype(np.uint8) mask = kwimage.Mask(c_mask, 'c_mask') box = kwimage.Boxes([mask.get_xywh()], 'xywh') # box = mask.to_boxes() ann['bbox'] = ub.peek(box.to_coco()) ann['segmentation'] = mask.to_coco() dset.add_annotation(**ann) if 0: bad_cids = [i['cid'] for i in bad_info] print(sorted([c['color'] for c in dataset['categories']])) print(sorted(set([cid_to_rgb(i['cid']) for i in bad_info]))) gid = 618 img = dset.imgs[gid] mask_fpath = join(dset_root, img['segmentation']) rgb_mask = kwimage.imread(mask_fpath, space='rgb') r, g, b = rgb_mask.T.astype(np.int64) cid_mask = np.ascontiguousarray(rgb_to_cid(r, g, b).T) cid_hist = ub.dict_hist(cid_mask.ravel()) bad_cid_hist = {} for cid in bad_cids: bad_cid_hist[cid] = cid_hist.pop(cid) import kwplot kwplot.autompl() kwplot.imshow(rgb_mask) if 0: import kwplot plt = kwplot.autoplt() plt.clf() dset.show_image(1) import xdev gid_list = list(dset.imgs) for gid in xdev.InteractiveIter(gid_list): dset.show_image(gid) xdev.InteractiveIter.draw() dset._build_index() dset._build_hashid() return dset
def decode_batch(self, output, forloss=False): """ Returns array of detections for every image in batch Example: >>> # xdoc: +REQUIRES(--download, module:ndsampler) >>> from netharn.models.yolo2.yolo2 import * # NOQA >>> self = YoloCoder.demo() >>> output = self.demo_output() >>> batch_dets = self.decode_batch(output) >>> batch_dets = self.decode_batch(output, forloss=True) Example: >>> # xdoc: +REQUIRES(--download, module:ndsampler) >>> info = dev_demodata() >>> self, output = ub.take(info, ['coder', 'outputs']) >>> batch_dets = self.decode_batch(output) >>> dets = batch_dets[0].sort().scale(info['orig_sizes'][0]) >>> print('dets.boxes = {!r}'.format(dets.boxes)) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.figure(fnum=1, doclf=True) >>> kwplot.imshow(info['rgb255'], colorspace='rgb') >>> dets.draw() >>> kwplot.show_if_requested() """ import kwimage # dont modify inplace # output = output.clone() class_energy = output['class_energy'] score_energy = output['score_energy'] cxywh_energy = output['cxywh_energy'] # Variables nB = class_energy.shape[0] nH, nW = class_energy.shape[-2:] nA = self.num_anchors device = class_energy.device if self.anchors.device != device: self.anchors = self.anchors.to(device) # Compute xc,yc, nW,nH, box_score on Tensor lin_x = torch.linspace(0, nW - 1, nW, device=device).repeat(nH, 1) lin_y = torch.linspace(0, nH - 1, nH, device=device).repeat(nW, 1).t().contiguous() anchor_w = self.anchors[:, 0].contiguous().view(1, nA, 1, 1) anchor_h = self.anchors[:, 1].contiguous().view(1, nA, 1, 1) if forloss: # TODO : rectify coord = torch.empty_like(cxywh_energy) coord[:, :, 0:2, :, :] = cxywh_energy[:, :, 0:2, :, :].sigmoid() # cx,cy coord[:, :, 2:4, :, :] = cxywh_energy[:, :, 2:4, :, :] # w,h with torch.no_grad(): pred_boxes = torch.empty_like(cxywh_energy, device=device).view(-1, 4) pred_boxes[:, 0] = (coord[:, :, 0, :, :] + lin_x).view(-1) pred_boxes[:, 1] = (coord[:, :, 1, :, :] + lin_y).view(-1) pred_boxes[:, 2] = (coord[:, :, 2, :, :].exp() * anchor_w).view(-1) pred_boxes[:, 3] = (coord[:, :, 3, :, :].exp() * anchor_h).view(-1) info = { 'coord': coord, 'pred_boxes': pred_boxes, } return info else: cxywh = cxywh_energy.clone() # cxywh_ = cxywh.view(nB, self.num_anchors, -1, nH, nW) cxywh[:, :, 0, :].sigmoid_().add_(lin_x).div_(nW) # X center cxywh[:, :, 1, :].sigmoid_().add_(lin_y).div_(nH) # Y center cxywh[:, :, 2, :].exp_().mul_(anchor_w).div_(nW) # Width cxywh[:, :, 3, :].exp_().mul_(anchor_h).div_(nH) # Height score = score_energy.sigmoid() # Box score # Compute class_score if len(self.classes) > 1: cls_scores = torch.nn.functional.softmax(class_energy, dim=2) cls_max, cls_max_idx = torch.max(cls_scores, 2, keepdim=True) cls_max.mul_(score) else: cls_max = score cls_max_idx = torch.zeros_like(cls_max) # Save detection if conf*class_conf is higher than threshold flags = cls_max >= self.conf_thresh flags_flat = flags.view(-1) if flags.sum() == 0: batch_dets = [] for i in range(nB): batch_dets.append(kwimage.Detections( boxes=kwimage.Boxes(torch.empty((0, 4), dtype=torch.float32, device=device), 'cxywh'), scores=torch.empty(0, dtype=torch.float32, device=device), class_idxs=torch.empty(0, dtype=torch.int64, device=device), classes=self.classes )) else: # Permute so the bbox dim (i.e. xywh) is trailing coords = cxywh.permute(0, 1, 3, 4, 2).contiguous().view(-1, 4) coords = coords[flags.view(-1)] scores = cls_max[flags] class_idxs = cls_max_idx[flags] stacked_dets = kwimage.Detections( boxes=kwimage.Boxes(coords, 'cxywh'), scores=scores, class_idxs=class_idxs, classes=self.classes ) # Get indexes of splits between images of batch max_det_per_batch = len(self.anchors) * nH * nW m = max_det_per_batch flags_flat.int = flags_flat.int() slices = [slice(m * i, m * (i + 1)) for i in range(nB)] det_per_batch = torch.IntTensor([flags_flat[s].sum() for s in slices]) split_idx = torch.cumsum(det_per_batch, dim=0) batch_dets = [] start = 0 for end in split_idx: dets = stacked_dets[start: end] dets = dets.non_max_supress(thresh=self.nms_thresh) batch_dets.append(dets) start = end return batch_dets
def warp_affine(image, transform, dsize=None, antialias=True, interpolation='linear'): """ Applies an affine transformation to an image with optional antialiasing. Args: image (ndarray): the input image transform (ndarray | Affine): a coercable affine matrix dsize (Tuple[int, int] | None | str): width and height of the resulting image. If "auto", it is computed such that the positive coordinates of the warped image will fit in the new canvas. If None, then the image size will not change. antialias (bool, default=True): if True determines if the transform is downsampling and applies antialiasing via gaussian a blur. TODO: - [ ] This will be moved to kwimage.im_cv2 Example: >>> import kwimage >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale(0.05) >>> transform = Affine.scale(0.02) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest') >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() Example: >>> import kwimage >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale((.1, 1.2)) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1) >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() """ from kwimage import im_cv2 from kwimage.transform import Affine import kwimage import numpy as np import cv2 import ubelt as ub transform = Affine.coerce(transform) flags = im_cv2._coerce_interpolation(interpolation) # TODO: expose these params # borderMode = cv2.BORDER_DEFAULT # borderMode = cv2.BORDER_CONSTANT borderMode = None borderValue = None """ Variations that could change in the future: * In _gauss_params I'm not sure if we want to compute integer or fractional "number of downsamples". * The fudge factor bothers me, but seems necessary """ def _gauss_params(scale, k0=5, sigma0=1, fractional=True): # Compute a gaussian to mitigate aliasing for a requested downsample # Args: # scale: requested downsample factor # k0 (int): kernel size for one downsample operation # sigma0 (float): sigma for one downsample operation # fractional (bool): controls if we compute params for integer downsample # ops num_downs = np.log2(1 / scale) if not fractional: num_downs = max(int(num_downs), 0) if num_downs <= 0: k = 1 sigma = 0 else: # The kernel size and sigma doubles for each 2x downsample sigma = sigma0 * (2 ** (num_downs - 1)) k = int(np.ceil(k0 * (2 ** (num_downs - 1)))) k = k + int(k % 2 == 0) return k, sigma def _pyrDownK(a, k=1): # Downsamples by (2 ** k)x with antialiasing if k == 0: a = a.copy() for _ in range(k): a = cv2.pyrDown(a) return a if dsize is None: dsize = tuple(image.shape[0:2][::-1]) elif dsize == 'auto': h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) if not antialias: M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # Decompose the affine matrix into its 6 core parameters params = transform.decompose() sx, sy = params['scale'] if sx >= 1 and sy > 1: # No downsampling detected, no need to antialias M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # At least one dimension is downsampled # Compute the transform with all scaling removed noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'})) max_scale = max(sx, sy) # The "fudge" factor limits the number of downsampled pyramid # operations. A bigger fudge factor means means that the final # gaussian kernel for the antialiasing operation will be bigger. # It essentials say that at most "fudge" downsampling ops will # be handled by the final blur rather than the pyramid downsample. # It seems to help with border effects at only a small runtime cost # I don't entirely understand why the border artifact is introduced # when this is enabled though # TODO: should we allow for this fudge factor? # TODO: what is the real name of this? num_down_prevent ? # skip_final_downs? fudge = 2 # TODO: should final antialiasing be on? # Note, if fudge is non-zero it is important to do this. do_final_aa = 1 # TODO: should fractional be True or False by default? # If fudge is 0 and fractional=0, then I think is the same as # do_final_aa=0. fractional = 0 num_downs = max(int(np.log2(1 / max_scale)) - fudge, 0) pyr_scale = 1 / (2 ** num_downs) # Downsample iteratively with antialiasing downscaled = _pyrDownK(image, num_downs) rest_sx = sx / pyr_scale rest_sy = sy / pyr_scale # Compute the transform from the downsampled image to the destination rest_warp = noscale_warp @ Affine.scale((rest_sx, rest_sy)) # Do a final small blur to acount for the potential aliasing # in any remaining scaling operations. if do_final_aa: # Computed as the closest sigma to the [1, 4, 6, 4, 1] approx # used in cv2.pyrDown aa_sigma0 = 1.0565137190917149 aa_k0 = 5 k_x, sigma_x = _gauss_params(scale=rest_sx, k0=aa_k0, sigma0=aa_sigma0, fractional=fractional) k_y, sigma_y = _gauss_params(scale=rest_sy, k0=aa_k0, sigma0=aa_sigma0, fractional=fractional) # Note: when k=1, no blur occurs # blurBorderType = cv2.BORDER_REPLICATE # blurBorderType = cv2.BORDER_CONSTANT blurBorderType = cv2.BORDER_DEFAULT downscaled = cv2.GaussianBlur( downscaled, (k_x, k_y), sigma_x, sigma_y, borderType=blurBorderType ) result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) return result
def build_targets(self, pred_cxywh, target, nH, nW, seen=0, gt_weights=None): """ Compare prediction boxes and targets, convert targets to network output tensors Args: pred_cxywh (Tensor): shape [B * A * W * H, 4] in normalized cxywh format target (Tensor): shape [B, max(gtannots), 4] CommandLine: python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1 Example: >>> # xdoctest: +REQUIRES(module:kwimage) >>> from netharn.models.yolo2.light_yolo import Yolo >>> torch.random.manual_seed(0) >>> network = Yolo(num_classes=2, conf_thresh=4e-2) >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors) >>> Win, Hin = 96, 96 >>> nW, nH = 3, 3 >>> target = torch.FloatTensor([]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> #pred_cxywh = torch.rand(90, 4) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) Example: >>> # xdoctest: +REQUIRES(module:kwimage) >>> torch.random.manual_seed(0) >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]]) >>> self = RegionLoss(num_classes=2, anchors=anchors) >>> nW, nH = 2, 2 >>> # true boxes for each item in the batch >>> # each box encodes class, center, width, and height >>> # coordinates are normalized in the range 0 to 1 >>> # items in each batch are padded with dummy boxes with class_id=-1 >>> target = torch.FloatTensor([ >>> # boxes for batch item 0 (it has no objects, note the pad!) >>> [[-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0]], >>> # boxes for batch item 1 >>> [[0, 0.50, 0.50, 1.00, 1.00], >>> [1, 0.34, 0.32, 0.12, 0.32], >>> [1, 0.32, 0.42, 0.22, 0.12]], >>> ]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) """ import kwimage from netharn.util import torch_ravel_multi_index gtempty = (target.numel() == 0) # Parameters nB = target.shape[0] if not gtempty else 0 # nT = target.shape[1] if not gtempty else 0 nA = self.num_anchors nPixels = nW * nH if nB == 0: # torch does not preserve shapes when any dimension goes to 0 # fix nB if there is no groundtruth nB = int(len(pred_cxywh) / (nA * nH * nW)) else: assert nB == int(len(pred_cxywh) / (nA * nH * nW)), 'bad assumption' seen = seen + nB # Tensors device = target.device # Put the groundtruth in a format comparable to output tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device) tconf = torch.zeros(nB, nA, 1, nH, nW, device=device) tcls = torch.zeros(nB, nA, 1, nH, nW, device=device) # Create weights to determine which outputs are punished # By default we punish all outputs for not having correct iou # objectness prediction. The other masks default to zero meaning that # by default we will not punish a prediction for having a different # coordinate or class label (later the groundtruths will override these # defaults for select grid cells and anchors) coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device) conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device) # TODO: this could be a weight instead cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device, dtype=torch.uint8) # Default conf_mask to the noobject_scale conf_mask.fill_(self.noobject_scale) # encourage the network to predict boxes centered on the grid cells by # setting the default target xs and ys to be (.5, .5) (i.e. the # relative center of a grid cell) fill the mask with ones so all # outputs are punished for not predicting center anchor locations --- # unless tcoord is overriden by a real groundtruth target later on. if seen < self.seen_thresh: # PJreddies version # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254 # By default encourage the network to predict no shift tcoord[:, :, 0:2, :, :].fill_(0.5) # By default encourage the network to predict no scale (in logspace) tcoord[:, :, 2:4, :, :].fill_(0.0) if False: # In the warmup phase we care about changing the coords to be # exactly the anchors if they don't predict anything, but the # weight is only 0.01, set it to 0.01 / self.coord_scale. # Note we will apply the required sqrt later coord_mask.fill_((0.01 / self.coord_scale)) # This hurts even thought it seems like its what darknet does else: coord_mask.fill_(1) if gtempty: coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls # Put this back into a non-flat view pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4) pred_boxes = kwimage.Boxes(pred_cxywh, 'cxywh') gt_class = target[..., 0].data gt_boxes_norm = kwimage.Boxes(target[..., 1:5], 'cxywh') # Put GT boxes into output coordinates gt_boxes = gt_boxes_norm.scale([nW, nH]) # Construct "relative" versions of the true boxes, centered at 0 # This will allow them to be compared to the anchor boxes. rel_gt_boxes = gt_boxes.copy() rel_gt_boxes.data[..., 0:2] = 0 # true boxes with a class of -1 are fillers, ignore them gt_isvalid = (gt_class >= 0) batch_nT = gt_isvalid.sum(dim=1).cpu().numpy() # Compute the grid cell for each groundtruth box true_xs = gt_boxes.data[..., 0] true_ys = gt_boxes.data[..., 1] true_is = true_xs.long().clamp_(0, nW - 1) true_js = true_ys.long().clamp_(0, nH - 1) if gt_weights is None: # If unspecified give each groundtruth a default weight of 1 gt_weights = torch.ones_like(target[..., 0], device=device) # Undocumented darknet detail: multiply coord weight by two minus the # area of the true box in normalized coordinates. the square root is # because the weight. if self.small_boxes: gt_coord_weights = (gt_weights * (2.0 - gt_boxes_norm.area[..., 0])) else: gt_coord_weights = gt_weights # Pre multiply weights with object scales gt_conf_weights = gt_weights * self.object_scale # Pre threshold classification weights gt_cls_weights = (gt_weights > .5).byte() # Loop over ground_truths and construct tensors for bx in range(nB): # Get the actual groundtruth boxes for this batch item nT = batch_nT[bx] if nT == 0: continue # Batch ground truth cur_rel_gt_boxes = rel_gt_boxes[bx, 0:nT] cur_gt_boxes = gt_boxes[bx, 0:nT] cur_gt_cls = target[bx, 0:nT, 0] # scalars, one for each true object cur_true_is = true_is[bx, 0:nT] cur_true_js = true_js[bx, 0:nT] cur_true_coord_weights = gt_coord_weights[bx, 0:nT] cur_true_conf_weights = gt_conf_weights[bx, 0:nT] cur_true_cls_weights = gt_cls_weights[bx, 0:nT] cur_gx, cur_gy, cur_gw, cur_gh = cur_gt_boxes.data.t() # Batch predictions cur_pred_boxes = pred_boxes[bx] # NOTE: IOU computation is the bottleneck in this function # Assign groundtruth boxes to anchor boxes cur_anchor_gt_ious = self.rel_anchors_boxes.ious( cur_rel_gt_boxes, bias=0) _, cur_true_anchor_axs = cur_anchor_gt_ious.max(dim=0) # best_ns in YOLO # Get the anchor (w,h) assigned to each true object cur_true_anchor_w, cur_true_anchor_h = self.anchors[cur_true_anchor_axs].t() # Find the IOU of each predicted box with the groundtruth cur_pred_true_ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0) # Assign groundtruth boxes to predicted boxes cur_ious, _ = cur_pred_true_ious.max(dim=-1) # Set loss to zero for any predicted boxes that had a high iou with # a groundtruth target (we wont punish them for not being # background), One of these will be selected as the best and be # punished for not predicting the groundtruth value. conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0 #### # Broadcast the loop over true boxes #### # Convert the true box coordinates to be comparable with pred output # * translate each gtbox to be relative to its assignd gridcell # * make w/h relative to anchor box w / h and convert to logspace cur_tcoord_x = cur_gx - cur_true_is.float() cur_tcoord_y = cur_gy - cur_true_js.float() cur_tcoord_w = (cur_gw / cur_true_anchor_w).log() cur_tcoord_h = (cur_gh / cur_true_anchor_h).log() if 0: cur_true_anchor_axs_ = cur_true_anchor_axs.cpu().numpy() cur_true_js_ = cur_true_js.cpu().numpy() cur_true_is_ = cur_true_is.cpu().numpy() iou_raveled_idxs = np.ravel_multi_index([ cur_true_anchor_axs_, cur_true_js_, cur_true_is_, np.arange(nT) ], cur_pred_true_ious.shape) # Get the ious with the assigned boxes for each truth cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs] raveled_idxs = np.ravel_multi_index([ [bx], cur_true_anchor_axs_, [0], cur_true_js_, cur_true_is_ ], coord_mask.shape) # -------------------------------------------- multi_index = ([bx], cur_true_anchor_axs_, [0], cur_true_js_, cur_true_is_) # multi_index_ = multi_index raveled_idxs_b0 = np.ravel_multi_index(multi_index, tcoord.shape) # A bit faster than ravel_multi_indexes with [1], [2], and [3] raveled_idxs_b1 = raveled_idxs_b0 + nPixels raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2 raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3 else: iou_raveled_idxs = torch_ravel_multi_index([ cur_true_anchor_axs, cur_true_js, cur_true_is, torch.arange(nT, device=device, dtype=torch.long) ], cur_pred_true_ious.shape, device) # Get the ious with the assigned boxes for each truth cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs] Bxs = torch.full_like(cur_true_anchor_axs, bx) Zxs = torch.full_like(cur_true_anchor_axs, 0) multi_index = [Bxs, cur_true_anchor_axs, Zxs, cur_true_js, cur_true_is] multi_index = torch.cat([x.view(-1, 1) for x in multi_index], dim=1) raveled_idxs = torch_ravel_multi_index(multi_index, coord_mask.shape, device) # -------------------------------------------- # We reuse the previous multi-index because the dims are # broadcastable at [:, :, [0], :, :] raveled_idxs_b0 = torch_ravel_multi_index(multi_index, tcoord.shape, device) # A bit faster than ravel_multi_indexes with [1], [2], and [3] raveled_idxs_b1 = raveled_idxs_b0 + nPixels raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2 raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3 # -------------------------------------------- coord_mask.view(-1)[raveled_idxs] = cur_true_coord_weights cls_mask.view(-1)[raveled_idxs] = cur_true_cls_weights conf_mask.view(-1)[raveled_idxs] = cur_true_conf_weights tcoord.view(-1)[raveled_idxs_b0] = cur_tcoord_x tcoord.view(-1)[raveled_idxs_b1] = cur_tcoord_y tcoord.view(-1)[raveled_idxs_b2] = cur_tcoord_w tcoord.view(-1)[raveled_idxs_b3] = cur_tcoord_h tcls.view(-1)[raveled_idxs] = cur_gt_cls tconf.view(-1)[raveled_idxs] = cur_true_ious # because coord and conf masks are witin this MSE we need to sqrt them coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
def _decode(self, output): """ Returns array of detections for every image in batch CommandLine: python ~/code/netharn/netharn/box_models/yolo2/light_postproc.py GetBoundingBoxes._decode Examples: >>> # xdoctest: +REQUIRES(module:kwimage) >>> import torch >>> torch.random.manual_seed(0) >>> anchors = np.array([(1.3221, 1.73145), (3.19275, 4.00944), (5.05587, 8.09892), (9.47112, 4.84053), (11.2364, 10.0071)]) >>> self = GetBoundingBoxes(anchors=anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5) >>> output = torch.randn(16, 5, 5 + 20, 9, 9) >>> from netharn import XPU >>> output = XPU.coerce('auto').move(output) >>> batch_dets = self._decode(output.data) >>> assert len(batch_dets) == 16 Ignore: >>> from netharn.models.yolo2.yolo2 import * # NOQA >>> info = dev_demodata() >>> outputs = info['outputs'] >>> cxywh_energy = output['cxywh_energy'] >>> raw = info['raw'] >>> raw_ = raw.clone() >>> self = GetBoundingBoxes(anchors=info['model'].anchors, num_classes=20, conf_thresh=.14, nms_thresh=0.5) >>> dets = self._decode(raw)[0] >>> dets.scores >>> self, output = ub.take(info, ['coder', 'outputs']) >>> batch_dets = self.decode_batch(output) >>> dets = batch_dets[0] >>> dets.scores """ import kwimage # dont modify inplace raw_ = output.clone() # Variables bsize = raw_.shape[0] h, w = raw_.shape[-2:] device = raw_.device if self.anchors.device != device: self.anchors = self.anchors.to(device) # Compute xc,yc, w,h, box_score on Tensor lin_x = torch.linspace(0, w - 1, w, device=device).repeat(h, 1).view(h * w) lin_y = torch.linspace(0, h - 1, h, device=device).repeat( w, 1).t().contiguous().view(h * w) anchor_w = self.anchors[:, 0].contiguous().view(1, self.num_anchors, 1) anchor_h = self.anchors[:, 1].contiguous().view(1, self.num_anchors, 1) # -1 == 5+num_classes (we can drop feature maps if 1 class) output_ = raw_.view(bsize, self.num_anchors, -1, h * w) output_[:, :, 0, :].sigmoid_().add_(lin_x).div_(w) # X center output_[:, :, 1, :].sigmoid_().add_(lin_y).div_(h) # Y center output_[:, :, 2, :].exp_().mul_(anchor_w).div_(w) # Width output_[:, :, 3, :].exp_().mul_(anchor_h).div_(h) # Height output_[:, :, 4, :].sigmoid_() # Box score # output_[:, :, 0:4].sum() # torch.all(cxywh.view(-1) == output_[:, :, 0:4].contiguous().view(-1)) # Compute class_score if self.num_classes > 1: cls_scores = torch.nn.functional.softmax(output_[:, :, 5:, :], 2) cls_max, cls_max_idx = torch.max(cls_scores, 2) cls_max.mul_(output_[:, :, 4, :]) else: cls_max = output_[:, :, 4, :] cls_max_idx = torch.zeros_like(cls_max) # Save detection if conf*class_conf is higher than threshold # Newst lightnet code, which is based on my mode1 code score_thresh = cls_max > self.conf_thresh score_thresh_flat = score_thresh.view(-1) if score_thresh.sum() == 0: batch_dets = [] for i in range(bsize): batch_dets.append( kwimage.Detections( boxes=kwimage.Boxes( torch.empty((0, 4), dtype=torch.float32, device=device), 'cxywh'), scores=torch.empty(0, dtype=torch.float32, device=device), class_idxs=torch.empty(0, dtype=torch.int64, device=device), )) else: # Mask select boxes > conf_thresh coords = output_.transpose(2, 3)[..., 0:4] coords = coords[score_thresh[..., None].expand_as(coords)].view(-1, 4) scores = cls_max[score_thresh] class_idxs = cls_max_idx[score_thresh] stacked_dets = kwimage.Detections( boxes=kwimage.Boxes(coords, 'cxywh'), scores=scores, class_idxs=class_idxs, ) # Get indexes of splits between images of batch max_det_per_batch = len(self.anchors) * h * w slices = [ slice(max_det_per_batch * i, max_det_per_batch * (i + 1)) for i in range(bsize) ] det_per_batch = torch.IntTensor( [score_thresh_flat[s].int().sum() for s in slices]) split_idx = torch.cumsum(det_per_batch, dim=0) batch_dets = [] start = 0 for end in split_idx: dets = stacked_dets[start:end] dets = dets.non_max_supress(thresh=self.nms_thresh) batch_dets.append(dets) start = end return batch_dets
def random_negatives(self, num, anchors=None, window_size=None, gids=None, thresh=0.0, exact=True, rng=None, patience=None): """ Finds random boxes that don't have a large overlap with positive instances. Args: num (int): number of negative boxes to generate (actual number of boxes returned may be less unless `exact=True`) anchors (ndarray): prior normalized aspect ratios for negative boxes. Mutually exclusive with `window_size`. window_size (ndarray): absolute (W, H) sizes to use for negative boxes. Mutually exclusive with `anchors`. gids (List[int]): image-ids to generate negatives for, if not specified generates for all images. thresh (float): overlap area threshold as a percentage of the negative box size. When thresh=0.0, that means negatives cannot overlap any positive, when threh=1.0, there are no constrains on negative placement. exact (bool): if True, ensure that we generate exactly `num` boxes rng (RandomState): random number generator Example: >>> from ndsampler.isect_indexer import * >>> import ndsampler >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> self = FrameIntersectionIndex.from_coco(dset) >>> anchors = np.array([[.35, .15], [.2, .2], [.1, .1]]) >>> #num = 25 >>> num = 5 >>> rng = kwarray.ensure_rng(None) >>> neg_gids, neg_boxes = self.random_negatives( >>> num, anchors, gids=[1], rng=rng, thresh=0.01, exact=1) >>> # xdoc: +REQUIRES(--show) >>> gid = sorted(set(neg_gids))[0] >>> boxes = neg_boxes.compress(neg_gids == gid) >>> import kwplot >>> kwplot.autompl() >>> img = kwimage.imread(dset.imgs[gid]['file_name']) >>> kwplot.imshow(img, doclf=True, fnum=1, colorspace='bgr') >>> support = self._support(gid) >>> kwplot.draw_boxes(support, color='blue') >>> kwplot.draw_boxes(boxes, color='orange') Example: >>> from ndsampler.isect_indexer import * >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> self = FrameIntersectionIndex.from_coco(dset) >>> #num = 25 >>> num = 5 >>> rng = kwarray.ensure_rng(None) >>> window_size = (50, 50) >>> neg_gids, neg_boxes = self.random_negatives( >>> num, window_size=window_size, gids=[1], rng=rng, >>> thresh=0.01, exact=1) >>> # xdoc: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> gid = sorted(set(neg_gids))[0] >>> boxes = neg_boxes.compress(neg_gids == gid) >>> img = kwimage.imread(dset.imgs[gid]['file_name']) >>> kwplot.imshow(img, doclf=True, fnum=1, colorspace='bgr') >>> support = self._support(gid) >>> support.draw(color='blue') >>> boxes.draw(color='orange') """ if not ((window_size is None) ^ (anchors is None)): raise ValueError('window_size and anchors are mutually exclusive') rng = kwarray.ensure_rng(rng) all_gids = self.all_gids if gids is None else gids def _generate_rel(n): # Generate n candidate boxes in the normalized 0-1 domain cand_boxes = kwimage.Boxes.random(num=n, scale=1.0, format='tlbr', anchors=anchors, anchor_std=0, rng=rng) chosen_gids = np.array(sorted(rng.choice(all_gids, size=n))) gid_to_boxes = kwarray.group_items(cand_boxes, chosen_gids, axis=0) neg_gids = [] neg_boxes = [] for gid, img_boxes in gid_to_boxes.items(): qtree = self.qtrees[gid] # scale from normalized coordinates to image coordinates img_boxes = img_boxes.scale((qtree.width, qtree.height)) for box in img_boxes: # isect_aids, overlaps = self.ious(gid, box) isect_aids, overlaps = self.iooas(gid, box) if len(overlaps) == 0 or overlaps.max() < thresh: neg_gids.append(gid) neg_boxes.append(box.data) return neg_gids, neg_boxes def _generate_abs(n): # Randomly choose images to generate boxes for chosen_gids = np.array(sorted(rng.choice(all_gids, size=n))) gid_to_nboxes = ub.dict_hist(chosen_gids) neg_gids = [] neg_boxes = [] for gid, nboxes in gid_to_nboxes.items(): qtree = self.qtrees[gid] scale = (qtree.width, qtree.height) anchors_ = np.array([window_size]) / np.array(scale) if np.any(anchors_ > 1.0): continue img_boxes = kwimage.Boxes.random(num=nboxes, scale=1.0, format='tlbr', anchors=anchors_, anchor_std=0, rng=rng) img_boxes = img_boxes.scale(scale) for box in img_boxes: # isect_aids, overlaps = self.ious(gid, box) isect_aids, overlaps = self.iooas(gid, box) if len(overlaps) == 0 or overlaps.max() < thresh: neg_gids.append(gid) neg_boxes.append(box.data) return neg_gids, neg_boxes if window_size is not None: _generate = _generate_abs elif anchors is not None: _generate = _generate_rel else: raise ValueError( 'must specify at least one window_size or anchors') if exact: # TODO: Dont attempt to sample negatives from images where the # positives cover more than a threshold percent. (Handle the case # of chip detections) factor = 2 # oversample factor if patience is None: patience = int(np.sqrt(num * 10) + 1) remaining_patience = patience timer = ub.Timer().tic() # Generate boxes until we have enough neg_gids, neg_boxes = _generate(n=int(num * factor)) n_tries = 1 for n_tries in it.count(n_tries): want = num - len(neg_boxes) if want <= 0: break extra_gids, extra_boxes = _generate(n=int(want * factor)) neg_gids.extend(extra_gids) neg_boxes.extend(extra_boxes) if len(neg_boxes) < num: # If we haven't found a significant number of boxes our # patience decreases (if the wall time is getting large) if len(extra_boxes) <= (num // 10) and timer.toc() > 1.0: remaining_patience -= 1 if remaining_patience == 0: break if len(neg_boxes) < num: # but throw an error if we don't make any progress message = ('Cannot make a negative sample with thresh={} ' 'in under {} tries. Found {} but need {}'.format( thresh, n_tries, len(neg_boxes), num)) if exact == 'warn': warnings.warn(message) else: raise Exception(message) print('n_tries = {!r}'.format(n_tries)) neg_gids = neg_gids[:num] neg_boxes = neg_boxes[:num] else: neg_gids, neg_boxes = _generate(n=num) neg_gids = np.array(neg_gids) neg_boxes = kwimage.Boxes(np.array(neg_boxes), 'tlbr') return neg_gids, neg_boxes
def new_video_sample_grid(dset, window_dims, window_overlap=0.0, classes_of_interest=None, ignore_coverage_thresh=0.6, negative_classes={'ignore', 'background'}): """ Create a space time-grid to sample with Example: >>> from ndsampler.coco_regions import * # NOQA >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral', num_frames=5) >>> dset.conform() >>> window_dims = (2, 224, 224) >>> sample_grid = new_video_sample_grid(dset, window_dims) >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2))) >>> # Now try to load a sample >>> tr = sample_grid['positives'][0] >>> import ndsampler >>> sampler = ndsampler.CocoSampler(dset) >>> tr_ = sampler._infer_target_attributes(tr) >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1))) >>> sample = sampler.load_sample(tr) >>> assert sample['im'].shape == (2, 224, 224, 5) Ignore: import xdev globals().update(xdev.get_func_kwargs(new_video_sample_grid)) """ import kwarray from ndsampler import isect_indexer keepbound = True if classes_of_interest: raise NotImplementedError # Create a sliding window object for each specific image (because they may # have different sizes, technically we could memoize this) vidid_to_slider = {} for vidid, video in dset.index.videos.items(): gids = dset.index.vidid_to_gids[vidid] num_frames = len(gids) full_dims = [num_frames, video['height'], video['width']] window_dims_ = full_dims if window_dims == 'full' else window_dims slider = kwarray.SlidingWindow(full_dims, window_dims_, overlap=window_overlap, keepbound=keepbound, allow_overshoot=True) vidid_to_slider[vidid] = slider _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset) positives = [] negatives = [] for vidid, slider in vidid_to_slider.items(): regions = list(slider) gids = dset.index.vidid_to_gids[vidid] boxes = [] box_gids = [] for region in regions: t_sl, y_sl, x_sl = region region_gids = gids[t_sl] box_gids.append(region_gids) boxes.append([x_sl.start, y_sl.start, x_sl.stop, y_sl.stop]) boxes = kwimage.Boxes(np.array(boxes), 'ltrb') for region, region_gids, box in zip(regions, box_gids, boxes): # Check to see what annotations this window-box overlaps with region_aids = [] for gid in region_gids: # TODO: memoize to prevent dup queries (box is not hashable) aids = _isect_index.overlapping_aids(gid, box) region_aids.append(aids) pos_aids = sorted(ub.flatten(region_aids)) space_slice = region[1:3] time_slice = region[0] tr = { 'vidid': vidid, 'time_slice': time_slice, 'space_slice': space_slice, # 'slices': region, 'gids': region_gids, 'aids': pos_aids, } if len(pos_aids): positives.append(tr) else: negatives.append(tr) print('Found {} positives'.format(len(positives))) print('Found {} negatives'.format(len(negatives))) sample_grid = { 'positives': positives, 'negatives': negatives, } return sample_grid
def _support(self, gid): qtree = self.qtrees[gid] support_boxes = kwimage.Boxes(list(qtree.aid_to_tlbr.values()), 'tlbr') return support_boxes
def new_image_sample_grid(dset, window_dims, window_overlap=0.0, classes_of_interest=None, ignore_coverage_thresh=0.6, negative_classes={'ignore', 'background'}): """ Create a space time-grid to sample with Example: >>> from ndsampler.coco_regions import * # NOQA >>> import kwcoco >>> dset = kwcoco.CocoDataset.demo('shapes8') >>> dset = kwcoco.CocoDataset.demo('vidshapes8-multispectral') >>> window_dims = (224, 224) >>> sample_grid = new_image_sample_grid(dset, window_dims) >>> print('sample_grid = {}'.format(ub.repr2(sample_grid, nl=2))) >>> # Now try to load a sample >>> tr = sample_grid['positives'][0] >>> import ndsampler >>> sampler = ndsampler.CocoSampler(dset) >>> tr['channels'] = '<all>' >>> tr_ = sampler._infer_target_attributes(tr) >>> print('tr_ = {}'.format(ub.repr2(tr_, nl=1))) >>> sample = sampler.load_sample(tr) >>> assert sample['im'].shape == (224, 224, 5) Ignore: import xdev globals().update(xdev.get_func_kwargs(new_image_sample_grid)) """ # import netharn as nh import kwarray from ndsampler import isect_indexer keepbound = True # Create a sliding window object for each specific image (because they may # have different sizes, technically we could memoize this) gid_to_slider = {} for img in dset.imgs.values(): full_dims = [img['height'], img['width']] window_dims_ = full_dims if window_dims == 'full' else window_dims slider = kwarray.SlidingWindow(full_dims, window_dims_, overlap=window_overlap, keepbound=keepbound, allow_overshoot=True) gid_to_slider[img['id']] = slider _isect_index = isect_indexer.FrameIntersectionIndex.from_coco(dset) positives = [] negatives = [] for gid, slider in gid_to_slider.items(): # For each image, create a box for each spatial region in the slider boxes = [] regions = list(slider) for region in regions: y_sl, x_sl = region boxes.append([x_sl.start, y_sl.start, x_sl.stop, y_sl.stop]) boxes = kwimage.Boxes(np.array(boxes), 'ltrb') for region, box in zip(regions, boxes): # Check to see what annotations this window-box overlaps with aids = _isect_index.overlapping_aids(gid, box) # Look at the categories within this region catnames = [ dset.cats[dset.anns[aid]['category_id']]['name'].lower() for aid in aids ] if ignore_coverage_thresh: ignore_flags = [catname == 'ignore' for catname in catnames] if any(ignore_flags): # If the almost the entire window is marked as ignored then # just skip this window. ignore_aids = list(ub.compress(aids, ignore_flags)) ignore_boxes = dset.annots(ignore_aids).boxes # Get an upper bound on coverage to short circuit extra # computation in simple cases. box_area = box.area.sum() coverage_ub = ignore_boxes.area.sum() / box_area if coverage_ub > ignore_coverage_thresh: max_coverage = ignore_boxes.iooas(box).max() if max_coverage > ignore_coverage_thresh: continue elif len(ignore_boxes) > 1: # We have to test the complex case try: from shapely.ops import cascaded_union ignore_shape = cascaded_union(ignore_boxes.to_shapley()) region_shape = box[None, :].to_shapley()[0] coverage_shape = ignore_shape.intersection(region_shape) real_coverage = coverage_shape.area / box_area if real_coverage > ignore_coverage_thresh: continue except Exception as ex: import warnings warnings.warn( 'ignore region select had non-critical ' 'issue ex = {!r}'.format(ex)) if classes_of_interest: # If there are CoIs then only count a region as positive if one # of those is in this region interest_flags = np.array([ catname in classes_of_interest for catname in catnames]) pos_aids = list(ub.compress(aids, interest_flags)) elif negative_classes: # Don't count negative classes as positives nonnegative_flags = np.array([ catname not in negative_classes for catname in catnames]) pos_aids = list(ub.compress(aids, nonnegative_flags)) else: pos_aids = aids # aids = sampler.regions.overlapping_aids(gid, box, visible_thresh=0.001) tr = { 'gid': gid, 'slices': region, 'aids': aids, } if len(pos_aids): positives.append(tr) else: negatives.append(tr) print('Found {} positives'.format(len(positives))) print('Found {} negatives'.format(len(negatives))) sample_grid = { 'positives': positives, 'negatives': negatives, } return sample_grid
def torch_nms(tlbr, scores, classes=None, thresh=.5, bias=0, fast=False): """ Non maximum suppression implemented with pytorch tensors CURRENTLY NOT WORKING Args: tlbr (Tensor): Bounding boxes of one image in the format (tlbr) scores (Tensor): Scores of each box classes (Tensor, optional): the classes of each box. If specified nms is applied to each class separately. thresh (float): iou threshold Returns: ByteTensor: keep: boolean array indicating which boxes were not pruned. Example: >>> # DISABLE_DOCTEST >>> # xdoctest: +REQUIRES(module:torch) >>> import torch >>> import numpy as np >>> tlbr = torch.FloatTensor(np.array([ >>> [0, 0, 100, 100], >>> [100, 100, 10, 10], >>> [10, 10, 100, 100], >>> [50, 50, 100, 100], >>> [100, 100, 130, 130], >>> [100, 100, 130, 130], >>> [100, 100, 130, 130], >>> ], dtype=np.float32)) >>> scores = torch.FloatTensor(np.array([.1, .5, .9, .1, .3, .5, .4])) >>> classes = torch.LongTensor(np.array([0, 0, 0, 0, 0, 0, 0])) >>> thresh = .5 >>> flags = torch_nms(tlbr, scores, classes, thresh) >>> keep = np.nonzero(flags).view(-1) >>> tlbr[flags] >>> tlbr[keep] Example: >>> # DISABLE_DOCTEST >>> # xdoctest: +REQUIRES(module:torch) >>> import torch >>> import numpy as np >>> # Test to check that conflicts are correctly resolved >>> tlbr = torch.FloatTensor(np.array([ >>> [100, 100, 150, 101], >>> [120, 100, 180, 101], >>> [150, 100, 200, 101], >>> ], dtype=np.float32)) >>> scores = torch.FloatTensor(np.linspace(.8, .9, len(tlbr))) >>> classes = None >>> thresh = .3 >>> keep = torch_nms(tlbr, scores, classes, thresh, fast=False) >>> bboxes[keep] """ if tlbr.numel() == 0: return [] # Sort coordinates by descending score ordered_scores, order = scores.sort(0, descending=True) import kwimage boxes = kwimage.Boxes(tlbr[order], 'tlbr') ious = boxes.ious(boxes, bias=bias) # if False: # x1, y1, x2, y2 = tlbr[order].split(1, 1) # # Compute dx and dy between each pair of boxes (these mat contain every pair twice...) # dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp_(min=0) # dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp_(min=0) # # Compute iou # intersections = dx * dy # areas = (x2 - x1) * (y2 - y1) # unions = (areas + areas.t()) - intersections # ious = intersections / unions # Filter based on iou (and class) # NOTE: We are using following convention: # * suppress if overlap > thresh # * consider if overlap <= thresh # This convention has the property that when thresh=0, we dont just # remove everything. if _TORCH_HAS_BOOL_COMP: conflicting = (ious > thresh).byte().triu(1).bool() else: # Old way conflicting = (ious > thresh).triu(1) if classes is not None: ordered_classes = classes[order] same_class = ( ordered_classes.unsqueeze(0) == ordered_classes.unsqueeze(1)) conflicting = (conflicting & same_class) # Now we have a 2D matrix where conflicting[i, j] indicates if box[i] # conflicts with box[j]. For each box[i] we want to only keep the first # one that does not conflict with any other box[j]. # Find out how many conflicts each ordered box has with other boxes that # have higher scores than it does. In other words... # n_conflicts[i] is the number of conflicts box[i] has with other boxes # that have a **higher score** than box[i] does. We will definately # keep any box where n_conflicts is 0, but we need to postprocess because # we might actually keep some boxes currently marked as conflicted. n_conflicts = conflicting.sum(0).byte() if not fast: # It is not enought to simply use all places where there are no # conflicts. Say we have boxes A, B, and C, where A conflicts with B, # B conflicts with C but A does not conflict with C. The fact that we # use A should mean that C is not longer conflicted. if True: # Marginally faster. best=618.2 us ordered_keep = np.zeros(len(conflicting), dtype=np.uint8) supress = np.zeros(len(conflicting), dtype=np.bool) for i, row in enumerate(conflicting.cpu().numpy() > 0): if not supress[i]: ordered_keep[i] = 1 supress[row] = 1 ordered_keep = torch.ByteTensor(ordered_keep).to(tlbr.device) else: # Marginally slower: best=1.382 ms, n_conflicts_post = n_conflicts.cpu() conflicting = conflicting.cpu() keep_len = len(n_conflicts_post) - 1 for i in range(1, keep_len): if n_conflicts_post[i] > 0: n_conflicts_post -= conflicting[i] n_conflicts = n_conflicts_post.to(n_conflicts.device) ordered_keep = (n_conflicts == 0) else: # Now we can simply keep any box that has no conflicts. ordered_keep = (n_conflicts == 0) # Unsort, so keep is aligned with input boxes keep = ordered_keep.new(*ordered_keep.size()) keep.scatter_(0, order, ordered_keep) return keep
def from_coco(KW18, coco_dset): import kwimage raw = {col: None for col in KW18.DEFAULT_COLUMNS} anns = coco_dset.dataset['annotations'] boxes = kwimage.Boxes(np.array([ann['bbox'] for ann in anns]), 'xywh') tlbr = boxes.to_tlbr() cxywh = tlbr.to_cxywh() tl_x, tl_y, br_x, br_y = tlbr.data.T cx = cxywh.data[:, 0] cy = cxywh.data[:, 1] # Create track ids if not given track_ids = np.array([ann.get('track_id', np.nan) for ann in anns]) missing = np.isnan(track_ids) valid_track_ids = track_ids[~missing] if len(valid_track_ids) == 0: next_track_id = 1 else: next_track_id = valid_track_ids.max() + 1 num_need = np.sum(missing) new_track_ids = np.arange(next_track_id, next_track_id + num_need) track_ids[missing] = new_track_ids track_ids = track_ids.astype(int) scores = np.array([ann.get('score', -1) for ann in anns]) image_ids = np.array([ann['image_id'] for ann in anns]) cids = np.array([ann.get('category_id', -1) for ann in anns]) num = len(anns) raw['track_id'] = track_ids raw['track_length'] = np.full(num, fill_value=-1) raw['frame_number'] = image_ids raw['tracking_plane_loc_x'] = cx raw['tracking_plane_loc_y'] = cy raw['velocity_x'] = np.full(num, fill_value=0) raw['velocity_y'] = np.full(num, fill_value=0) raw['image_loc_x'] = cx raw['image_loc_y'] = cy raw['img_bbox_tl_x'] = tl_x raw['img_bbox_tl_y'] = tl_y raw['img_bbox_br_x'] = br_x raw['img_bbox_br_y'] = br_y raw['area'] = boxes.area.ravel() raw['world_loc_x'] = np.full(num, fill_value=-1) raw['world_loc_y'] = np.full(num, fill_value=-1) raw['world_loc_z'] = np.full(num, fill_value=-1) raw['timestamp'] = np.full(num, fill_value=-1) raw['confidence'] = scores raw['object_type_id'] = cids raw = {k: v for k, v in raw.items() if v is not None} track_ids, groupxs = kwarray.group_indices(raw['track_id']) for groupx in groupxs: raw['track_length'][groupx] = len(groupx) self = KW18(raw) return self
def draw_batch(harn, batch, outputs, batch_dets, idx=None, thresh=None, orig_img=None, num_extra=3): """ Returns: np.ndarray: numpy image Example: >>> # DISABLE_DOCTSET >>> harn = setup_harn(bsize=1, datasets='special:voc', pretrained='lightnet') >>> harn.initialize() >>> batch = harn._demo_batch(0, 'train') >>> outputs, loss = harn.run_batch(batch) >>> batch_dets = harn.raw_model.coder.decode_batch(outputs) >>> stacked = harn.draw_batch(batch, outputs, batch_dets) >>> # xdoc: +REQUIRES(--show) >>> kwplot.autompl() # xdoc: +SKIP >>> kwplot.imshow(stacked) >>> kwplot.show_if_requested() """ import cv2 inputs = batch['im'] labels = batch['label'] orig_sizes = labels['orig_sizes'] classes = harn.datasets['train'].sampler.classes if idx is None: idxs = range(len(inputs)) else: idxs = [idx] imgs = [] for idx in idxs: chw01 = inputs[idx] pred_dets = batch_dets[idx] # pred_dets.meta['classes'] = classes import kwimage true_dets = kwimage.Detections( boxes=kwimage.Boxes(labels['cxywh'][idx], 'cxywh'), class_idxs=labels['class_idxs'][idx].view(-1), weights=labels['weight'][idx], classes=classes, ) pred_dets = pred_dets.numpy() true_dets = true_dets.numpy() true_dets = true_dets.compress(true_dets.class_idxs != -1) if thresh is not None: pred_dets = pred_dets.compress(pred_dets.scores > thresh) # only show so many predictions num_max = len(true_dets) + num_extra sortx = pred_dets.argsort(reverse=True) pred_dets = pred_dets.take(sortx[0:num_max]) hwc01 = chw01.cpu().numpy().transpose(1, 2, 0) inp_size = np.array(hwc01.shape[0:2][::-1]) true_dets.boxes.scale(inp_size, inplace=True) pred_dets.boxes.scale(inp_size, inplace=True) letterbox = harn.datasets[harn.current_tag].letterbox orig_size = orig_sizes[idx].cpu().numpy() target_size = inp_size img = letterbox._img_letterbox_invert(hwc01, orig_size, target_size) img = np.clip(img, 0, 1) # we are given the original image, to avoid artifacts from # inverting a downscale assert orig_img is None or orig_img.shape == img.shape true_dets.data['boxes'] = letterbox._boxes_letterbox_invert( true_dets.boxes, orig_size, target_size) pred_dets.data['boxes'] = letterbox._boxes_letterbox_invert( pred_dets.boxes, orig_size, target_size) # shift, scale, embed_size = letterbox._letterbox_transform(orig_size, target_size) # fig = kwplot.figure(doclf=True, fnum=1) # kwplot.imshow(img, colorspace='rgb') canvas = (img * 255).astype(np.uint8) canvas = true_dets.draw_on(canvas, color='green') canvas = pred_dets.draw_on(canvas, color='blue') canvas = cv2.resize(canvas, (300, 300)) imgs.append(canvas) stacked = imgs[0] if len(imgs) == 1 else kwimage.stack_images_grid( imgs) return stacked
def warp_affine(image, transform, dsize=None, antialias=False, interpolation='linear'): """ Applies an affine transformation to an image with optional antialiasing. Args: image (ndarray): the input image transform (ndarray | Affine): a coercable affine matrix dsize (Tuple[int, int] | None | str): width and height of the resulting image. If "auto", it is computed such that the positive coordinates of the warped image will fit in the new canvas. If None, then the image size will not change. antialias (bool, default=False): if True determines if the transform is downsampling and applies antialiasing via gaussian a blur. interpolation (str): interpolation code or cv2 integer. Interpolation codes are linear, nearest, cubic, lancsoz, and area. Example: >>> from kwimage.im_cv2 import * # NOQA >>> import kwimage >>> from kwimage.transform import Affine >>> image = kwimage.grab_test_image('astro') >>> #image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale(0.05) >>> transform = Affine.scale(0.02) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1, interpolation='nearest') >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() Example: >>> from kwimage.im_cv2 import * # NOQA >>> import kwimage >>> from kwimage.transform import Affine >>> image = kwimage.grab_test_image('astro') >>> image = kwimage.grab_test_image('checkerboard') >>> transform = Affine.random() @ Affine.scale((.1, 1.2)) >>> warped1 = warp_affine(image, transform, dsize='auto', antialias=1) >>> warped2 = warp_affine(image, transform, dsize='auto', antialias=0) >>> # xdoctest: +REQUIRES(--show) >>> import kwplot >>> kwplot.autompl() >>> pnum_ = kwplot.PlotNums(nRows=1, nCols=2) >>> kwplot.imshow(warped1, pnum=pnum_(), title='antialias=True') >>> kwplot.imshow(warped2, pnum=pnum_(), title='antialias=False') >>> kwplot.show_if_requested() """ from kwimage import im_cv2 from kwimage.transform import Affine import kwimage transform = Affine.coerce(transform) flags = im_cv2._coerce_interpolation(interpolation) # TODO: expose these params # borderMode = cv2.BORDER_DEFAULT # borderMode = cv2.BORDER_CONSTANT borderMode = None borderValue = None """ Variations that could change in the future: * In _gauss_params I'm not sure if we want to compute integer or fractional "number of downsamples". * The fudge factor bothers me, but seems necessary """ if dsize is None: dsize = tuple(image.shape[0:2][::-1]) elif dsize == 'auto': h, w = image.shape[0:2] boxes = kwimage.Boxes(np.array([[0, 0, w, h]]), 'xywh') poly = boxes.to_polygons()[0] warped_poly = poly.warp(transform.matrix) warped_box = warped_poly.to_boxes().to_ltrb().quantize() dsize = tuple(map(int, warped_box.data[0, 2:4])) if not antialias: M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # Decompose the affine matrix into its 6 core parameters params = transform.decompose() sx, sy = params['scale'] if sx >= 1 and sy > 1: # No downsampling detected, no need to antialias M = np.asarray(transform) result = cv2.warpAffine(image, M[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) else: # At least one dimension is downsampled # Compute the transform with all scaling removed noscale_warp = Affine.affine(**ub.dict_diff(params, {'scale'})) # Execute part of the downscale with iterative pyramid downs downscaled, residual_sx, residual_sy = _prepare_downscale( image, sx, sy) # Compute the transform from the downsampled image to the destination rest_warp = noscale_warp @ Affine.scale((residual_sx, residual_sy)) result = cv2.warpAffine(downscaled, rest_warp.matrix[0:2], dsize=dsize, flags=flags, borderMode=borderMode, borderValue=borderValue) return result