def _postout_to_pred_ann(harn, inp_size, labels, postout, _aidbase=1, undo_lb=True): """ Convert batch predictions to coco-style annotations for scoring """ indices = labels['indices'] orig_sizes = labels['orig_sizes'] letterbox = harn.datasets[harn.current_tag].letterbox MAX_DETS = None bsize = len(indices) _aids = it.count(_aidbase) for bx in range(bsize): postitem = postout[bx].data.cpu().numpy() orig_size = orig_sizes[bx].data.cpu().numpy() gx = int(indices[bx].data.cpu().numpy()) # Unpack postprocessed predictions sboxes = postitem.reshape(-1, 6) pred_boxes_ = util.Boxes(sboxes[:, 0:4], 'cxywh').scale(inp_size) pred_scores = sboxes[:, 4] pred_cxs = sboxes[:, 5].astype(np.int) if undo_lb: pred_boxes = letterbox._boxes_letterbox_invert( pred_boxes_, orig_size, inp_size) else: pred_boxes = pred_boxes_ # sort predictions by descending score # Take at most MAX_DETS detections to evaulate _pred_sortx = pred_scores.argsort()[::-1][:MAX_DETS] _pred_boxes = pred_boxes.take(_pred_sortx, axis=0).to_xywh().data.tolist() _pred_cxs = pred_cxs.take(_pred_sortx, axis=0).tolist() _pred_scores = pred_scores.take(_pred_sortx, axis=0).tolist() for box, cx, score, aid in zip(_pred_boxes, _pred_cxs, _pred_scores, _aids): yield { 'id': aid, 'image_id': gx, 'category_id': cx, 'bbox': box, 'score': score }
def __init__(self, num_classes, anchors, coord_scale=1.0, noobject_scale=1.0, object_scale=5.0, class_scale=1.0, thresh=0.6): super().__init__() self.num_classes = num_classes self.anchors = anchors self.num_anchors = len(anchors) # self.anchor_step = len(self.anchors) // self.num_anchors self.reduction = 32 # input_dim/output_dim self.coord_scale = coord_scale self.noobject_scale = noobject_scale self.object_scale = object_scale self.class_scale = class_scale self.thresh = thresh self.loss_coord = None self.loss_conf = None self.loss_cls = None self.loss_tot = None self.mse = nn.MSELoss(size_average=False) self.mse = nn.MSELoss(size_average=False) self.cls_critrion = nn.CrossEntropyLoss(size_average=False) nA = self.num_anchors self.anchor_w = torch.Tensor(self.anchors.T[0]).view(nA, 1) self.anchor_h = torch.Tensor(self.anchors.T[1]).view(nA, 1) rel_anchors_cxywh = util.Boxes( np.hstack([self.anchors * 0, self.anchors]).astype(np.float32), 'cxywh') self.rel_anchors_tlbr = rel_anchors_cxywh.toformat('tlbr').data self._prev_pred_init = None self._prev_pred_dim = None self.iou_mode = None
def draw_boxes_on_image(img, boxes, color='blue', thickness=1, box_format=None, colorspace='bgr'): """ Draws boxes on an image. Args: img (ndarray): image to copy and draw on boxes (nh.util.Boxes): boxes to draw colorspace (str): string code of the input image colorspace Example: >>> from netharn import util >>> img = np.zeros((10, 10, 3), dtype=np.uint8) >>> color = 'dodgerblue' >>> thickness = 1 >>> boxes = util.Boxes([[1, 1, 8, 8]], 'tlbr') >>> img2 = draw_boxes_on_image(img, boxes, color, thickness) >>> assert tuple(img2[1, 1]) == (255, 144, 30) >>> # xdoc: +REQUIRES(--show) >>> from netharn.util import mplutil >>> mplutil.autompl() # xdoc: +SKIP >>> mplutil.figure(doclf=True, fnum=1) >>> mplutil.imshow(img2) """ from netharn import util if not isinstance(boxes, util.Boxes): if box_format is None: raise ValueError('specify box_format') boxes = util.Boxes(boxes, box_format) color = tuple(util.Color(color).as255(colorspace)) tlbr = boxes.to_tlbr().data img2 = img.copy() for x1, y1, x2, y2 in tlbr: # pt1 = (int(round(x1)), int(round(y1))) # pt2 = (int(round(x2)), int(round(y2))) pt1 = (int(x1), int(y1)) pt2 = (int(x2), int(y2)) img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness) return img2
def __init__(self, num_classes, anchors, coord_scale=1.0, noobject_scale=1.0, object_scale=5.0, class_scale=1.0, thresh=0.6): super().__init__() self.num_classes = num_classes self.anchors = torch.Tensor(anchors) self.num_anchors = len(anchors) # self.anchor_step = len(self.anchors) // self.num_anchors self.reduction = 32 # input_dim/output_dim self.coord_scale = coord_scale self.noobject_scale = noobject_scale self.object_scale = object_scale self.class_scale = class_scale self.thresh = thresh self.loss_coord = None self.loss_conf = None self.loss_cls = None self.loss_tot = None self.coord_mse = nn.MSELoss(size_average=False) self.conf_mse = nn.MSELoss(size_average=False) self.cls_critrion = nn.CrossEntropyLoss(size_average=False) # Precompute relative anchors in tlbr format for iou computation rel_anchors_cxywh = torch.cat( [torch.zeros_like(self.anchors), self.anchors], 1) self.rel_anchors_boxes = util.Boxes(rel_anchors_cxywh, 'cxywh') self._prev_pred_init = None self._prev_pred_dim = None self.iou_mode = None
def __init__(self, num_classes, anchors, coord_scale=1.0, noobject_scale=1.0, object_scale=5.0, class_scale=1.0, thresh=0.6, seen_thresh=12800, small_boxes=False, mse_factor=0.5): super(RegionLoss, self).__init__() self.num_classes = num_classes self.seen_thresh = seen_thresh self.anchors = torch.Tensor(anchors) self.num_anchors = len(anchors) self.coord_scale = coord_scale self.noobject_scale = noobject_scale self.object_scale = object_scale self.class_scale = class_scale self.thresh = thresh self.loss_coord = None self.loss_conf = None self.loss_cls = None self.loss_tot = None self.coord_mse = nn.MSELoss(size_average=False) self.conf_mse = nn.MSELoss(size_average=False) self.cls_critrion = nn.CrossEntropyLoss(size_average=False) # Precompute relative anchors in tlbr format for iou computation rel_anchors_cxywh = torch.cat( [torch.zeros_like(self.anchors), self.anchors], 1) self.rel_anchors_boxes = util.Boxes(rel_anchors_cxywh, 'cxywh') self.small_boxes = small_boxes self.mse_factor = mse_factor
def draw_boxes_on_image(img, boxes, color='blue', thickness=1, box_format=None): """ Example: >>> from netharn import util >>> img = np.zeros((10, 10, 3), dtype=np.uint8) >>> color = 'blue' >>> thickness = 1 >>> boxes = util.Boxes([[1, 1, 8, 8]], 'tlbr') >>> img2 = draw_boxes_on_image(img, boxes, color, thickness) >>> # xdoc: +REQUIRES(--show) >>> from netharn.util import mplutil >>> mplutil.qtensure() # xdoc: +SKIP >>> mplutil.figure(doclf=True, fnum=1) >>> mplutil.imshow(img2) """ from netharn import util if not isinstance(boxes, util.Boxes): if box_format is None: raise ValueError('specify box_format') boxes = util.Boxes(boxes, box_format) color = tuple(util.Color(color).as255('bgr')) tlbr = boxes.to_tlbr().data img2 = img.copy() for x1, y1, x2, y2 in tlbr: # pt1 = (int(round(x1)), int(round(y1))) # pt2 = (int(round(x2)), int(round(y2))) pt1 = (int(x1), int(y1)) pt2 = (int(x2), int(y2)) img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness) return img2
def build_targets(self, pred_cxywh, target, nH, nW, seen=0, gt_weights=None): """ Compare prediction boxes and targets, convert targets to network output tensors Args: pred_cxywh (Tensor): shape [B * A * W * H, 4] in normalized cxywh format target (Tensor): shape [B, max(gtannots), 4] CommandLine: python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1 Example: >>> from netharn.models.yolo2.light_yolo import Yolo >>> from netharn.models.yolo2.light_region_loss import RegionLoss >>> torch.random.manual_seed(0) >>> network = Yolo(num_classes=2, conf_thresh=4e-2) >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors) >>> Win, Hin = 96, 96 >>> nW, nH = 3, 3 >>> target = torch.FloatTensor([]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> #pred_cxywh = torch.rand(90, 4) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) Example: >>> from netharn.models.yolo2.light_region_loss import RegionLoss >>> torch.random.manual_seed(0) >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]]) >>> self = RegionLoss(num_classes=2, anchors=anchors) >>> nW, nH = 2, 2 >>> # true boxes for each item in the batch >>> # each box encodes class, center, width, and height >>> # coordinates are normalized in the range 0 to 1 >>> # items in each batch are padded with dummy boxes with class_id=-1 >>> target = torch.FloatTensor([ >>> # boxes for batch item 0 (it has no objects, note the pad!) >>> [[-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0]], >>> # boxes for batch item 1 >>> [[0, 0.50, 0.50, 1.00, 1.00], >>> [1, 0.34, 0.32, 0.12, 0.32], >>> [1, 0.32, 0.42, 0.22, 0.12]], >>> ]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) """ gtempty = (target.numel() == 0) # Parameters nB = target.shape[0] if not gtempty else 0 # nT = target.shape[1] if not gtempty else 0 nA = self.num_anchors if nB == 0: # torch does not preserve shapes when any dimension goes to 0 # fix nB if there is no groundtruth nB = int(len(pred_cxywh) / (nA * nH * nW)) else: assert nB == int(len(pred_cxywh) / (nA * nH * nW)), 'bad assumption' seen = seen + nB # Tensors device = self.get_device() # Put the groundtruth in a format comparable to output tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device) tconf = torch.zeros(nB, nA, 1, nH, nW, device=device) tcls = torch.zeros(nB, nA, 1, nH, nW, device=device) # Create weights to determine which outputs are punished # By default we punish all outputs for not having correct iou # objectness prediction. The other masks default to zero meaning that # by default we will not punish a prediction for having a different # coordinate or class label (later the groundtruths will override these # defaults for select grid cells and anchors) coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device) conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device) cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device).byte() # Default conf_mask to the noobject_scale conf_mask.fill_(self.noobject_scale) # encourage the network to predict boxes centered on the grid cells by # setting the default target xs and ys to be (.5, .5) (i.e. the # relative center of a grid cell) fill the mask with ones so all # outputs are punished for not predicting center anchor locations --- # unless tcoord is overriden by a real groundtruth target later on. if seen < 12800: # PJreddies version # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254 # By default encourage the network to predict no shift tcoord[:, :, 0:2, :, :].fill_(0.5) # By default encourage the network to predict no scale (in logspace) tcoord[:, :, 0:2, :, :].fill_(0.0) # In the warmup phase we care about changing the coords to be # exactly the anchors if they don't predict anything, but the # weight is only 0.01, set it to 0.01 / self.coord_scale. # Note we will apply the required sqrt later coord_mask.fill_((0.01 / self.coord_scale)) if gtempty: coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls # Put this back into a non-flat view pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4) pred_boxes = util.Boxes(pred_cxywh, 'cxywh') gt_class = target[..., 0].data gt_boxes_norm = util.Boxes(target[..., 1:5], 'cxywh') gt_boxes = gt_boxes_norm.scale([nW, nH]) # Construct "relative" versions of the true boxes, centered at 0 # This will allow them to be compared to the anchor boxes. rel_gt_boxes = gt_boxes.copy() rel_gt_boxes.data[..., 0:2] = 0 # true boxes with a class of -1 are fillers, ignore them gt_isvalid = (gt_class >= 0) # Compute the grid cell for each groundtruth box true_xs, true_ys = gt_boxes.components[0:2] true_is = true_xs.long().clamp_(0, nW - 1) true_js = true_ys.long().clamp_(0, nH - 1) if gt_weights is None: # If unspecified give each groundtruth a default weight of 1 gt_weights = torch.ones_like(target[..., 0], device=device) # Undocumented darknet detail: multiply coord weight by two minus the # area of the true box in normalized coordinates. the square root is # because the weight. gt_coord_weights = (gt_weights * (2.0 - gt_boxes_norm.area[..., 0])) # Loop over ground_truths and construct tensors for bx in range(nB): # Get the actual groundtruth boxes for this batch item flags = gt_isvalid[bx] if not np.any(flags): continue # Batch ground truth batch_rel_gt_boxes = rel_gt_boxes[bx, flags] cur_gt_boxes = gt_boxes[bx, flags] cur_true_is = true_is[bx, flags] cur_true_js = true_js[bx, flags] cur_true_weights = gt_weights[bx, flags] cur_true_coord_weights = gt_coord_weights[bx, flags] # Batch predictions cur_pred_boxes = pred_boxes[bx] # Assign groundtruth boxes to anchor boxes anchor_ious = self.rel_anchors_boxes.ious(batch_rel_gt_boxes, bias=0) _, best_anchor_idxs = anchor_ious.max(dim=0) # best_ns in YOLO # Assign groundtruth boxes to predicted boxes ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0) cur_ious, _ = ious.max(dim=-1) # Set loss to zero for any predicted boxes that had a high iou with # a groundtruth target (we wont punish them for not being # background), One of these will be selected as the best and be # punished for not predicting the groundtruth value. conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0 for t in range(cur_gt_boxes.shape[0]): gt_box_ = cur_gt_boxes[t] weight = cur_true_weights[t] # coord weights incorporate weight and true box area coord_weight = cur_true_coord_weights[t] # The assigned (best) anchor index ax = best_anchor_idxs[t].item() anchor_w, anchor_h = self.anchors[ax] # Compute this ground truth's grid cell gx, gy, gw, gh = gt_box_.data gi = cur_true_is[t].item() gj = cur_true_js[t].item() # The prediction will be punished if it does not match this true box # pred_box_ = cur_pred_boxes[best_n, gj, gi] # Get the precomputed iou of the truth with this box # corresponding to the assigned anchor and grid cell iou = ious[ax, gj, gi, t].item() # Mark that we will care about the predicted box with some weight coord_mask[bx, ax, 0, gj, gi] = coord_weight # PJReddie delta_region_class: # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L112 # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L314 cls_mask[bx, ax, 0, gj, gi] = int(weight > .5) conf_mask[bx, ax, 0, gj, gi] = self.object_scale * weight # The true box is converted into coordinates comparable to the # network outputs by: # (1) we center the true box on its assigned grid cell # (2) we divide its width and height by its assigned anchor # (3) we take the log of width and height because the raw # network wh outputs are in logspace. tcoord[bx, ax, 0, gj, gi] = gx - gi tcoord[bx, ax, 1, gj, gi] = gy - gj tcoord[bx, ax, 2, gj, gi] = math.log(gw / anchor_w) tcoord[bx, ax, 3, gj, gi] = math.log(gh / anchor_h) tconf[bx, ax, 0, gj, gi] = iou # if rescore else 1 tcls[bx, ax, 0, gj, gi] = target[bx, t, 0] # because coord and conf masks are witin this MSE we need to sqrt them coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) # masked_tcls = tcls[cls_mask].view(-1).long() # cls_probs_mask = cls_mask.reshape(nB, nA, nH, nW, 1).repeat(1, 1, 1, 1, nC) # cls_probs_mask = Variable(cls_probs_mask, requires_grad=False) # masked_cls_probs = cls_probs[cls_probs_mask].view(-1, nC) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
def torch_nms(tlbr, scores, classes=None, thresh=.5, bias=0, fast=False): """ Non maximum suppression implemented with pytorch tensors CURRENTLY NOT WORKING Args: tlbr (Tensor): Bounding boxes of one image in the format (tlbr) scores (Tensor): Scores of each box classes (Tensor, optional): the classes of each box. If specified nms is applied to each class separately. thresh (float): iou threshold Returns: ByteTensor: keep: boolean array indicating which boxes were not pruned. Example: >>> # DISABLE_DOCTEST >>> import torch >>> import numpy as np >>> tlbr = torch.FloatTensor(np.array([ >>> [0, 0, 100, 100], >>> [100, 100, 10, 10], >>> [10, 10, 100, 100], >>> [50, 50, 100, 100], >>> [100, 100, 130, 130], >>> [100, 100, 130, 130], >>> [100, 100, 130, 130], >>> ], dtype=np.float32)) >>> scores = torch.FloatTensor(np.array([.1, .5, .9, .1, .3, .5, .4])) >>> classes = torch.FloatTensor(np.array([0, 0, 0, 0, 0, 0])) >>> thresh = .5 >>> keep = torch_nms(tlbr, scores, classes, thresh) >>> bboxes[keep] Example: >>> # DISABLE_DOCTEST >>> import torch >>> import numpy as np >>> # Test to check that conflicts are correctly resolved >>> tlbr = torch.FloatTensor(np.array([ >>> [100, 100, 150, 101], >>> [120, 100, 180, 101], >>> [150, 100, 200, 101], >>> ], dtype=np.float32)) >>> scores = torch.FloatTensor(np.linspace(.8, .9, len(tlbr))) >>> classes = None >>> thresh = .3 >>> keep = torch_nms(tlbr, scores, classes, thresh, fast=False) >>> bboxes[keep] """ if tlbr.numel() == 0: return [] # Sort coordinates by descending score ordered_scores, order = scores.sort(0, descending=True) from netharn import util boxes = util.Boxes(tlbr[order], 'tlbr') ious = boxes.ious(boxes, bias=bias) # if False: # x1, y1, x2, y2 = tlbr[order].split(1, 1) # # Compute dx and dy between each pair of boxes (these mat contain every pair twice...) # dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp_(min=0) # dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp_(min=0) # # Compute iou # intersections = dx * dy # areas = (x2 - x1) * (y2 - y1) # unions = (areas + areas.t()) - intersections # ious = intersections / unions # Filter based on iou (and class) conflicting = (ious > thresh).triu(1) if classes is not None: ordered_classes = classes[order] same_class = ( ordered_classes.unsqueeze(0) == ordered_classes.unsqueeze(1)) conflicting = (conflicting & same_class) # Now we have a 2D matrix where conflicting[i, j] indicates if box[i] # conflicts with box[j]. For each box[i] we want to only keep the first # one that does not conflict with any other box[j]. # Find out how many conflicts each ordered box has with other boxes that # have higher scores than it does. In other words... # n_conflicts[i] is the number of conflicts box[i] has with other boxes # that have a **higher score** than box[i] does. We will definately # keep any box where n_conflicts is 0, but we need to postprocess because # we might actually keep some boxes currently marked as conflicted. n_conflicts = conflicting.sum(0).byte() if not fast: # It is not enought to simply use all places where there are no # conflicts. Say we have boxes A, B, and C, where A conflicts with B, # B conflicts with C but A does not conflict with C. The fact that we # use A should mean that C is not longer conflicted. if True: # Marginally faster. best=618.2 us ordered_keep = np.zeros(len(conflicting), dtype=np.uint8) supress = np.zeros(len(conflicting), dtype=np.bool) for i, row in enumerate(conflicting.cpu().numpy() > 0): if not supress[i]: ordered_keep[i] = 1 supress[row] = 1 ordered_keep = torch.ByteTensor(ordered_keep).to(tlbr.device) else: # Marginally slower: best=1.382 ms, n_conflicts_post = n_conflicts.cpu() conflicting = conflicting.cpu() keep_len = len(n_conflicts_post) - 1 for i in range(1, keep_len): if n_conflicts_post[i] > 0: n_conflicts_post -= conflicting[i] n_conflicts = n_conflicts_post.to(n_conflicts.device) ordered_keep = (n_conflicts == 0) else: # Now we can simply keep any box that has no conflicts. ordered_keep = (n_conflicts == 0) # Unsort, so keep is aligned with input boxes keep = ordered_keep.new(*ordered_keep.size()) keep.scatter_(0, order, ordered_keep) return keep
def __getitem__(self, index): """ CommandLine: python ~/code/netharn/examples/yolo_voc.py YoloVOCDataset.__getitem__ --show Example: >>> # DISABLE_DOCTSET >>> import sys, ubelt >>> sys.path.append(ubelt.truepath('~/code/netharn/examples')) >>> from yolo_voc import * >>> self = YoloVOCDataset(split='train') >>> index = 7 >>> chw01, label = self[index] >>> hwc01 = chw01.numpy().transpose(1, 2, 0) >>> print(hwc01.shape) >>> norm_boxes = label['targets'].numpy().reshape(-1, 5)[:, 1:5] >>> inp_size = hwc01.shape[-2::-1] >>> # xdoc: +REQUIRES(--show) >>> from netharn.util import mplutil >>> mplutil.figure(doclf=True, fnum=1) >>> mplutil.qtensure() # xdoc: +SKIP >>> mplutil.imshow(hwc01, colorspace='rgb') >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size).data >>> mplutil.draw_boxes(inp_boxes, box_format='cxywh') >>> mplutil.show_if_requested() Example: >>> # DISABLE_DOCTSET >>> import sys, ubelt >>> sys.path.append(ubelt.truepath('~/code/netharn/examples')) >>> from yolo_voc import * >>> self = YoloVOCDataset(split='test') >>> index = 0 >>> chw01, label = self[index] >>> hwc01 = chw01.numpy().transpose(1, 2, 0) >>> print(hwc01.shape) >>> norm_boxes = label[0].numpy().reshape(-1, 5)[:, 1:5] >>> inp_size = hwc01.shape[-2::-1] >>> # xdoc: +REQUIRES(--show) >>> from netharn.util import mplutil >>> mplutil.figure(doclf=True, fnum=1) >>> mplutil.qtensure() # xdoc: +SKIP >>> mplutil.imshow(hwc01, colorspace='rgb') >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size).data >>> mplutil.draw_boxes(inp_boxes, box_format='cxywh') >>> mplutil.show_if_requested() Ignore: >>> self = YoloVOCDataset(split='train') for index in ub.ProgIter(range(len(self))): chw01, label = self[index] target = label[0] wh = target[:, 3:5] if np.any(wh == 0): raise ValueError() pass >>> # Check that we can collate this data >>> self = YoloVOCDataset(split='train') >>> inbatch = [self[index] for index in range(0, 16)] >>> from netharn.data import collate >>> batch = collate.padded_collate(inbatch) >>> inputs, labels = batch >>> assert len(labels) == len(inbatch[0][1]) >>> targets = labels['targets'] >>> orig_sizes = labels['orig_sizes'] >>> gt_weights = labels['gt_weights'] >>> indices = labels['indices'] >>> bg_weights = labels['bg_weights'] >>> assert list(target.shape) == [16, 6, 5] >>> assert list(gt_weights.shape) == [16, 6] >>> assert list(origsize.shape) == [16, 2] >>> assert list(index.shape) == [16, 1] """ if isinstance(index, tuple): # Get size index from the batch loader index, size_index = index if size_index is None: inp_size = self.base_wh else: inp_size = self.multi_scale_inp_size[size_index] else: inp_size = self.base_wh inp_size = np.array(inp_size) image, tlbr, gt_classes, gt_weights = self._load_item(index) orig_size = np.array(image.shape[0:2][::-1]) bbs = util.Boxes(tlbr, 'tlbr').to_imgaug(shape=image.shape) if self.augmenter: # Ensure the same augmentor is used for bboxes and iamges seq_det = self.augmenter.to_deterministic() image = seq_det.augment_image(image) bbs = seq_det.augment_bounding_boxes([bbs])[0] # Clip any bounding boxes that went out of bounds h, w = image.shape[0:2] tlbr = util.Boxes.from_imgaug(bbs) old_area = tlbr.area tlbr = tlbr.clip(0, 0, w - 1, h - 1, inplace=True) new_area = tlbr.area # Remove any boxes that have gone significantly out of bounds. remove_thresh = 0.1 flags = (new_area / old_area).ravel() > remove_thresh tlbr = tlbr.compress(flags, inplace=True) gt_classes = gt_classes[flags] gt_weights = gt_weights[flags] bbs = tlbr.to_imgaug(shape=image.shape) # Apply letterbox resize transform to train and test self.letterbox.target_size = inp_size image = self.letterbox.augment_image(image) bbs = self.letterbox.augment_bounding_boxes([bbs])[0] tlbr_inp = util.Boxes.from_imgaug(bbs) # Remove any boxes that are no longer visible or out of bounds flags = (tlbr_inp.area > 0).ravel() tlbr_inp = tlbr_inp.compress(flags, inplace=True) gt_classes = gt_classes[flags] gt_weights = gt_weights[flags] chw01 = torch.FloatTensor(image.transpose(2, 0, 1) / 255.0) # Lightnet YOLO accepts truth tensors in the format: # [class_id, center_x, center_y, w, h] # where coordinates are noramlized between 0 and 1 cxywh_norm = tlbr_inp.toformat('cxywh').scale(1 / inp_size) _target_parts = [gt_classes[:, None], cxywh_norm.data] target = np.concatenate(_target_parts, axis=-1) target = torch.FloatTensor(target) # Return index information in the label as well orig_size = torch.LongTensor(orig_size) index = torch.LongTensor([index]) # how much do we care about each annotation in this image? gt_weights = torch.FloatTensor(gt_weights) # how much do we care about the background in this image? bg_weight = torch.FloatTensor([1.0]) label = { 'targets': target, 'gt_weights': gt_weights, 'orig_sizes': orig_size, 'indices': index, 'bg_weights': bg_weight } return chw01, label
def detection_confusions(true_boxes, true_cxs, true_weights, pred_boxes, pred_scores, pred_cxs, bg_weight=1.0, ovthresh=0.5, bg_cls=-1, bias=0.0): """ Classify detections by assigning to groundtruth boxes. Given predictions and truth for an image return (y_pred, y_true, y_score), which is suitable for sklearn classification metrics Args: true_boxes (ndarray): boxes in tlbr format true_cxs (ndarray): classes of each box true_weights (ndarray): weight of this each groundtruth item pred_boxes (ndarray): predicted boxes in tlbr format pred_scores (ndarray): scores for each prediction pred_cxs (ndarray): class predictions ovthresh (float): overlap threshold bg_weight (ndarray): weight of background predictions (default=1) single_class(): if True, considers this to be a binary problem bias : for computing overlap either 1 or 0 Returns: dict: with relevant clf information Ignore: from xinspect.dynamic_kwargs import get_func_kwargs globals().update(get_func_kwargs(detection_confusions)) Example: >>> from netharn.metrics.detections import * >>> from netharn.metrics.detections import _ave_precision, pr_curves >>> true_boxes = np.array([[ 0, 0, 10, 10], >>> [10, 0, 20, 10], >>> [10, 0, 20, 10], >>> [20, 0, 30, 10]]) >>> true_weights = np.array([1, 0, .9, 1]) >>> bg_weight = 1.0 >>> true_cxs = np.array([0, 0, 1, 1]) >>> pred_boxes = np.array([[6, 2, 20, 10], >>> [3, 2, 9, 7], >>> [20, 0, 30, 10]]) >>> pred_scores = np.array([.5, .5, .5]) >>> pred_cxs = np.array([0, 0, 1]) >>> y = detection_confusions(true_boxes, true_cxs, true_weights, >>> pred_boxes, pred_scores, pred_cxs, >>> bg_weight=bg_weight, ovthresh=.5) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT pred true score weight cx txs pxs 0 1 1 0.5000 1.0000 1 3 2 1 0 -1 0.5000 1.0000 0 -1 1 2 0 0 0.5000 0.0000 0 1 0 3 -1 0 0.0000 1.0000 0 0 -1 4 -1 1 0.0000 0.9000 1 2 -1 Example: >>> true_boxes = np.array([[ 0, 0, 10, 10], >>> [10, 0, 20, 10], >>> [10, 0, 20, 10], >>> [20, 0, 30, 10]]) >>> true_weights = np.array([1, 0.0, 1, 1.0]) >>> bg_weight = 1.0 >>> true_cxs = np.array([0, 0, 1, 1]) >>> pred_boxes = np.array([[6, 2, 20, 10], >>> [3, 2, 9, 7], >>> [20, 0, 30, 10]]) >>> pred_scores = np.array([.5, .6, .7]) >>> pred_cxs = np.array([0, 0, 1]) >>> y = detection_confusions(true_boxes, true_cxs, true_weights, >>> pred_boxes, pred_scores, pred_cxs, >>> bg_weight=bg_weight, ovthresh=.5) >>> y = pd.DataFrame(y) >>> print(y) # xdoc: +IGNORE_WANT """ y_pred = [] y_true = [] y_score = [] y_weight = [] cxs = [] y_pxs = [] y_txs = [] if bg_weight is None: bg_weight = 1.0 if False: if isinstance(true_boxes, util.Boxes): true_boxes = true_boxes.data if isinstance(pred_boxes, util.Boxes): pred_boxes = pred_boxes.data else: if not isinstance(true_boxes, util.Boxes): true_boxes = util.Boxes(true_boxes, 'tlbr') if not isinstance(pred_boxes, util.Boxes): pred_boxes = util.Boxes(pred_boxes, 'tlbr') # Keep track of which true items have been used true_unused = np.ones(len(true_cxs), dtype=np.bool) if true_weights is None: true_weights = np.ones(len(true_cxs)) else: true_weights = np.array(true_weights) pred_scores = np.array(pred_scores) pred_cxs = np.array(pred_cxs) true_cxs = np.array(true_cxs) # Group true boxes by class # Keep track which true boxes are unused / not assigned cx_to_idxs = ub.group_items(range(len(true_cxs)), true_cxs) cx_to_tboxes = util.group_items(true_boxes, true_cxs, axis=0) cx_to_tweight = util.group_items(true_weights, true_cxs, axis=0) # cx_to_boxes = ub.group_items(true_boxes, true_cxs) # cx_to_boxes = ub.map_vals(np.array, cx_to_boxes) # sort predictions by descending score _pred_sortx = pred_scores.argsort()[::-1] _pred_boxes = pred_boxes.take(_pred_sortx, axis=0) _pred_cxs = pred_cxs.take(_pred_sortx, axis=0) _pred_scores = pred_scores.take(_pred_sortx, axis=0) # For each predicted detection box # Allow it to match the truth of a particular class for px, cx, box, score in zip(_pred_sortx, _pred_cxs, _pred_boxes, _pred_scores): cls_true_idxs = cx_to_idxs.get(cx, []) ovmax = -np.inf ovidx = None weight = bg_weight tx = None # we will set this to the index of the assignd gt if len(cls_true_idxs): cls_true_boxes = cx_to_tboxes[cx] cls_true_weights = cx_to_tweight[cx] # cls_true_boxes = true_boxes.take(cls_true_idxs, axis=0) # cls_true_weights = true_weights.take(cls_true_idxs, axis=0) overlaps = cls_true_boxes.ious(box, bias=bias) # choose best score by default ovidx = overlaps.argsort()[-1] ovmax = overlaps[ovidx] weight = cls_true_weights[ovidx] tx = cls_true_idxs[ovidx] if ovmax > ovthresh and true_unused[tx]: # Assign this prediction to a groundtruth object # Mark this prediction as a true positive y_pred.append(cx) y_true.append(cx) y_score.append(score) y_weight.append(weight) cxs.append(cx) # cls_unused[ovidx] = False tx = cls_true_idxs[ovidx] true_unused[tx] = False y_pxs.append(px) y_txs.append(tx) else: # Assign this prediction to a the background # Mark this prediction as a false positive y_pred.append(cx) y_true.append(bg_cls) # use -1 as background ignore class y_score.append(score) y_weight.append(bg_weight) cxs.append(cx) tx = -1 y_pxs.append(px) y_txs.append(tx) # All pred boxes have been assigned to a truth box or the background. # Mark unused true boxes we failed to predict as false negatives for tx in np.where(true_unused)[0]: # Mark each unmatched truth as a false negative y_pred.append(-1) y_true.append(true_cxs[tx]) y_score.append(0.0) y_weight.append(true_weights[tx]) cxs.append(true_cxs[tx]) px = -1 y_pxs.append(px) y_txs.append(tx) y = { 'pred': y_pred, 'true': y_true, 'score': y_score, 'weight': y_weight, 'cx': cxs, 'txs': y_txs, # index into the original true box for this row 'pxs': y_pxs, # index into the original pred box for this row } # print('y = {}'.format(ub.repr2(y, nl=1))) # y = pd.DataFrame(y) return y
def _build_targets_tensor(self, pred_boxes, ground_truth, nH, nW, seen=0, gt_weights=None): """ Compare prediction boxes and ground truths, convert ground truths to network output tensors Example: >>> from netharn.models.yolo2.light_yolo import Yolo >>> from netharn.models.yolo2.light_region_loss import RegionLoss >>> torch.random.manual_seed(0) >>> network = Yolo(num_classes=2, conf_thresh=4e-2) >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors) >>> Win, Hin = 96, 96 >>> nW, nH = 3, 3 >>> # true boxes for each item in the batch >>> # each box encodes class, center, width, and height >>> # coordinates are normalized in the range 0 to 1 >>> # items in each batch are padded with dummy boxes with class_id=-1 >>> ground_truth = torch.FloatTensor([ >>> # boxes for batch item 0 (it has no objects, note the pad!) >>> [[-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0]], >>> # boxes for batch item 1 >>> [[0, 0.50, 0.50, 1.00, 1.00], >>> [1, 0.34, 0.32, 0.12, 0.32], >>> [1, 0.32, 0.42, 0.22, 0.12]], >>> ]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> pred_boxes = torch.rand(90, 4) >>> seen = 0 """ # Parameters nB = ground_truth.size(0) nT = ground_truth.size(1) nA = self.num_anchors nAnchors = nA * nH * nW nPixels = nH * nW seen = seen + nB # Tensors conf_mask = torch.ones(nB, nA, nPixels) * self.noobject_scale coord_mask = torch.zeros(nB, nA, 1, nPixels) cls_mask = torch.zeros(nB, nA, nPixels).byte() tcoord = torch.zeros(nB, nA, 4, nPixels) tconf = torch.zeros(nB, nA, nPixels) tcls = torch.zeros(nB, nA, nPixels) if seen < 12800: coord_mask.fill_(1) tcoord[:, :, 0].fill_(0.5) tcoord[:, :, 1].fill_(0.5) pred_cxywh = pred_boxes pred_tlbr = util.Boxes(pred_cxywh.data.cpu().numpy(), 'cxywh').toformat('tlbr').data gt_class = ground_truth[..., 0].data.cpu().numpy() gt_cxywh = util.Boxes( ground_truth[..., 1:5].data.cpu().numpy().astype(np.float32), 'cxywh').scale([nW, nH]) gt_tlbr = gt_cxywh.to_tlbr().data rel_gt_cxywh = gt_cxywh.copy() rel_gt_cxywh.data.T[0:2] = 0 rel_gt_tlbr = rel_gt_cxywh.toformat('tlbr').data gt_isvalid = (gt_class >= 0) # Loop over ground_truths and construct tensors for bx in range(nB): # Get the actual groundtruth boxes for this batch item flags = gt_isvalid[bx] if not np.any(flags): continue # Create gt anchor assignments batch_rel_gt_tlbr = rel_gt_tlbr[bx][flags] anchor_ious = util.box_ious(self.rel_anchors_tlbr, batch_rel_gt_tlbr, bias=0, mode=self.iou_mode) best_ns = np.argmax(anchor_ious, axis=0) # Setting confidence mask cur_pred_tlbr = pred_tlbr[bx * nAnchors:(bx + 1) * nAnchors] cur_gt_tlbr = gt_tlbr[bx][flags] ious = util.box_ious(cur_pred_tlbr, cur_gt_tlbr, bias=0, mode=self.iou_mode) cur_ious = torch.FloatTensor(ious.max(-1)) conf_mask[bx].view(-1)[cur_ious > self.thresh] = 0 for t in range(nT): if not flags[t]: break if gt_weights is None: weight = 1 else: weight = gt_weights[bx][t] gx, gy, gw, gh = gt_cxywh.data[bx][t] gi = min(nW - 1, max(0, int(gx))) gj = min(nH - 1, max(0, int(gy))) best_n = best_ns[t] gt_box_ = gt_tlbr[bx][t] pred_box_ = pred_tlbr[bx * nAnchors + best_n * nPixels + gj * nW + gi] iou = float( util.box_ious(gt_box_[None, :], pred_box_[None, :], bias=0, mode=self.iou_mode)[0, 0]) best_anchor = self.anchors[best_n] best_aw, best_ah = best_anchor if weight == 0: # HACK: Only allow weight == 0 and weight == 1 for now # TODO: # - [ ] Allow for continuous weights # - [ ] Allow for per-image background weight conf_mask[bx, best_n, gj * nW + gi] = 0 else: assert weight == 1, 'can only have weight in {0, 1} for now' coord_mask[bx, best_n, 0, gj * nW + gi] = 1 cls_mask[bx, best_n, gj * nW + gi] = 1 conf_mask[bx, best_n, gj * nW + gi] = self.object_scale tcoord[bx, best_n, 0, gj * nW + gi] = gx - gi tcoord[bx, best_n, 1, gj * nW + gi] = gy - gj tcoord[bx, best_n, 2, gj * nW + gi] = math.log(gw / best_aw) tcoord[bx, best_n, 3, gj * nW + gi] = math.log(gh / best_ah) tconf[bx, best_n, gj * nW + gi] = iou tcls[bx, best_n, gj * nW + gi] = ground_truth[bx, t, 0] return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
def build_targets(self, pred_cxywh, target, nH, nW, seen=0, gt_weights=None): """ Compare prediction boxes and targets, convert targets to network output tensors Args: pred_cxywh (Tensor): shape [B * A * W * H, 4] in normalized cxywh format target (Tensor): shape [B, max(gtannots), 4] CommandLine: python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1 Example: >>> from netharn.models.yolo2.light_yolo import Yolo >>> from netharn.models.yolo2.light_region_loss import RegionLoss >>> torch.random.manual_seed(0) >>> network = Yolo(num_classes=2, conf_thresh=4e-2) >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors) >>> Win, Hin = 96, 96 >>> nW, nH = 3, 3 >>> target = torch.FloatTensor([]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> #pred_cxywh = torch.rand(90, 4) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) Example: >>> from netharn.models.yolo2.light_region_loss import RegionLoss >>> torch.random.manual_seed(0) >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]]) >>> self = RegionLoss(num_classes=2, anchors=anchors) >>> nW, nH = 2, 2 >>> # true boxes for each item in the batch >>> # each box encodes class, center, width, and height >>> # coordinates are normalized in the range 0 to 1 >>> # items in each batch are padded with dummy boxes with class_id=-1 >>> target = torch.FloatTensor([ >>> # boxes for batch item 0 (it has no objects, note the pad!) >>> [[-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0], >>> [-1, 0, 0, 0, 0]], >>> # boxes for batch item 1 >>> [[0, 0.50, 0.50, 1.00, 1.00], >>> [1, 0.34, 0.32, 0.12, 0.32], >>> [1, 0.32, 0.42, 0.22, 0.12]], >>> ]) >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]]) >>> nB = len(gt_weights) >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4) >>> seen = 0 >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights) """ gtempty = (target.numel() == 0) # Parameters nB = target.shape[0] if not gtempty else 0 # nT = target.shape[1] if not gtempty else 0 nA = self.num_anchors nPixels = nW * nH if nB == 0: # torch does not preserve shapes when any dimension goes to 0 # fix nB if there is no groundtruth nB = int(len(pred_cxywh) / (nA * nH * nW)) else: assert nB == int(len(pred_cxywh) / (nA * nH * nW)), 'bad assumption' seen = seen + nB # Tensors device = self.get_device() # Put the groundtruth in a format comparable to output tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device) tconf = torch.zeros(nB, nA, 1, nH, nW, device=device) tcls = torch.zeros(nB, nA, 1, nH, nW, device=device) # Create weights to determine which outputs are punished # By default we punish all outputs for not having correct iou # objectness prediction. The other masks default to zero meaning that # by default we will not punish a prediction for having a different # coordinate or class label (later the groundtruths will override these # defaults for select grid cells and anchors) coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device) conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device) cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device).byte() # Default conf_mask to the noobject_scale conf_mask.fill_(self.noobject_scale) # encourage the network to predict boxes centered on the grid cells by # setting the default target xs and ys to be (.5, .5) (i.e. the # relative center of a grid cell) fill the mask with ones so all # outputs are punished for not predicting center anchor locations --- # unless tcoord is overriden by a real groundtruth target later on. if seen < self.seen_thresh: # PJreddies version # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254 # By default encourage the network to predict no shift tcoord[:, :, 0:2, :, :].fill_(0.5) # By default encourage the network to predict no scale (in logspace) tcoord[:, :, 2:4, :, :].fill_(0.0) if False: # In the warmup phase we care about changing the coords to be # exactly the anchors if they don't predict anything, but the # weight is only 0.01, set it to 0.01 / self.coord_scale. # Note we will apply the required sqrt later coord_mask.fill_((0.01 / self.coord_scale)) # This hurts even thought it seems like its what darknet does else: coord_mask.fill_(1) if gtempty: coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls # Put this back into a non-flat view pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4) pred_boxes = util.Boxes(pred_cxywh, 'cxywh') gt_class = target[..., 0].data gt_boxes_norm = util.Boxes(target[..., 1:5], 'cxywh') # Put GT boxes into output coordinates gt_boxes = gt_boxes_norm.scale([nW, nH]) # Construct "relative" versions of the true boxes, centered at 0 # This will allow them to be compared to the anchor boxes. rel_gt_boxes = gt_boxes.copy() rel_gt_boxes.data[..., 0:2] = 0 # true boxes with a class of -1 are fillers, ignore them gt_isvalid = (gt_class >= 0) batch_nT = gt_isvalid.sum(dim=1).cpu().numpy() # Compute the grid cell for each groundtruth box true_xs = gt_boxes.data[..., 0] true_ys = gt_boxes.data[..., 1] true_is = true_xs.long().clamp_(0, nW - 1) true_js = true_ys.long().clamp_(0, nH - 1) if gt_weights is None: # If unspecified give each groundtruth a default weight of 1 gt_weights = torch.ones_like(target[..., 0], device=device) # Undocumented darknet detail: multiply coord weight by two minus the # area of the true box in normalized coordinates. the square root is # because the weight. if self.small_boxes: gt_coord_weights = (gt_weights * (2.0 - gt_boxes_norm.area[..., 0])) else: gt_coord_weights = gt_weights # Pre multiply weights with object scales gt_conf_weights = gt_weights * self.object_scale # Pre threshold classification weights gt_cls_weights = (gt_weights > .5) # Loop over ground_truths and construct tensors for bx in range(nB): # Get the actual groundtruth boxes for this batch item nT = batch_nT[bx] if nT == 0: continue # Batch ground truth cur_rel_gt_boxes = rel_gt_boxes[bx, 0:nT] cur_gt_boxes = gt_boxes[bx, 0:nT] cur_gt_cls = target[bx, 0:nT, 0] # scalars, one for each true object cur_true_is = true_is[bx, 0:nT] cur_true_js = true_js[bx, 0:nT] cur_true_coord_weights = gt_coord_weights[bx, 0:nT] cur_true_conf_weights = gt_conf_weights[bx, 0:nT] cur_true_cls_weights = gt_cls_weights[bx, 0:nT] cur_gx, cur_gy, cur_gw, cur_gh = cur_gt_boxes.data.t() # Batch predictions cur_pred_boxes = pred_boxes[bx] # NOTE: IOU computation is the bottleneck in this function # Assign groundtruth boxes to anchor boxes cur_anchor_gt_ious = self.rel_anchors_boxes.ious(cur_rel_gt_boxes, bias=0) _, cur_true_anchor_axs = cur_anchor_gt_ious.max( dim=0) # best_ns in YOLO # Get the anchor (w,h) assigned to each true object cur_true_anchor_w, cur_true_anchor_h = self.anchors[ cur_true_anchor_axs].t() # Find the IOU of each predicted box with the groundtruth cur_pred_true_ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0) # Assign groundtruth boxes to predicted boxes cur_ious, _ = cur_pred_true_ious.max(dim=-1) # Set loss to zero for any predicted boxes that had a high iou with # a groundtruth target (we wont punish them for not being # background), One of these will be selected as the best and be # punished for not predicting the groundtruth value. conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0 #### # Broadcast the loop over true boxes #### # Convert the true box coordinates to be comparable with pred output # * translate each gtbox to be relative to its assignd gridcell # * make w/h relative to anchor box w / h and convert to logspace cur_tcoord_x = cur_gx - cur_true_is.float() cur_tcoord_y = cur_gy - cur_true_js.float() cur_tcoord_w = (cur_gw / cur_true_anchor_w).log() cur_tcoord_h = (cur_gh / cur_true_anchor_h).log() iou_raveled_idxs = np.ravel_multi_index( [cur_true_anchor_axs, cur_true_js, cur_true_is, np.arange(nT)], cur_pred_true_ious.shape) # Get the ious with the assigned boxes for each truth cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs] raveled_idxs = np.ravel_multi_index( [[bx], cur_true_anchor_axs, [0], cur_true_js, cur_true_is], coord_mask.shape) # -------------------------------------------- raveled_idxs_b0 = np.ravel_multi_index( [[bx], cur_true_anchor_axs, [0], cur_true_js, cur_true_is], tcoord.shape) # A bit faster than ravel_multi_indexes with [1], [2], and [3] raveled_idxs_b1 = raveled_idxs_b0 + nPixels raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2 raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3 # -------------------------------------------- coord_mask.view(-1)[raveled_idxs] = cur_true_coord_weights cls_mask.view(-1)[raveled_idxs] = cur_true_cls_weights conf_mask.view(-1)[raveled_idxs] = cur_true_conf_weights tcoord.view(-1)[raveled_idxs_b0] = cur_tcoord_x tcoord.view(-1)[raveled_idxs_b1] = cur_tcoord_y tcoord.view(-1)[raveled_idxs_b2] = cur_tcoord_w tcoord.view(-1)[raveled_idxs_b3] = cur_tcoord_h tcls.view(-1)[raveled_idxs] = cur_gt_cls tconf.view(-1)[raveled_idxs] = cur_true_ious # because coord and conf masks are witin this MSE we need to sqrt them coord_mask = coord_mask.sqrt() conf_mask = conf_mask.sqrt() coord_mask = coord_mask.expand_as(tcoord) return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
def undo_letterbox(cxywh): boxes = util.Boxes(cxywh, 'cxywh') letterbox = harn.datasets['train'].letterbox return letterbox._boxes_letterbox_invert(boxes, orig_size, inp_size)
def _measure_confusion(harn, postout, labels, inp_size, **kw): targets = labels['targets'] gt_weights = labels['gt_weights'] bg_weights = labels['bg_weights'] # orig_sizes = labels['orig_sizes'] # indices = labels['indices'] def asnumpy(tensor): return tensor.data.cpu().numpy() bsize = len(targets) for bx in range(bsize): postitem = asnumpy(postout[bx]) target = asnumpy(targets[bx]).reshape(-1, 5) true_cxywh = target[:, 1:5] true_cxs = target[:, 0] true_weight = asnumpy(gt_weights[bx]) # Remove padded truth flags = true_cxs != -1 true_cxywh = true_cxywh[flags] true_cxs = true_cxs[flags] true_weight = true_weight[flags] # orig_size = asnumpy(orig_sizes[bx]) # gx = int(asnumpy(indices[bx])) # how much do we care about the background in this image? bg_weight = float(asnumpy(bg_weights[bx])) # Unpack postprocessed predictions sboxes = postitem.reshape(-1, 6) pred_cxywh = sboxes[:, 0:4] pred_scores = sboxes[:, 4] pred_cxs = sboxes[:, 5].astype(np.int) true_tlbr = util.Boxes(true_cxywh, 'cxywh').to_tlbr() pred_tlbr = util.Boxes(pred_cxywh, 'cxywh').to_tlbr() true_tlbr = true_tlbr.scale(inp_size) pred_tlbr = pred_tlbr.scale(inp_size) # TODO: can we invert the letterbox transform here and clip for # some extra mAP? true_boxes = true_tlbr.data pred_boxes = pred_tlbr.data y = nh.metrics.detection_confusions( true_boxes=true_boxes, true_cxs=true_cxs, true_weights=true_weight, pred_boxes=pred_boxes, pred_scores=pred_scores, pred_cxs=pred_cxs, bg_weight=bg_weight, bg_cls=-1, ovthresh=harn.hyper.other['ovthresh'], **kw) # y['gx'] = gx yield y
def visualize_prediction(harn, batch, outputs, postout, idx=0, thresh=None): """ Returns: np.ndarray: numpy image """ # xdoc: +REQUIRES(--show) inputs, labels = batch targets, gt_weights, orig_sizes, indices, bg_weights = labels chw01 = inputs[idx] target = targets[idx] postitem = postout[idx] # --- hwc01 = chw01.cpu().numpy().transpose(1, 2, 0) # TRUE true_cxs = target[:, 0].long() true_boxes = target[:, 1:5] flags = true_cxs != -1 true_boxes = true_boxes[flags] true_cxs = true_cxs[flags] # PRED pred_boxes = postitem[:, 0:4] pred_scores = postitem[:, 4] pred_cxs = postitem[:, 5] if thresh is not None: flags = pred_scores > thresh pred_cxs = pred_cxs[flags] pred_boxes = pred_boxes[flags] pred_scores = pred_scores[flags] pred_clsnms = list( ub.take(harn.datasets['train'].label_names, pred_cxs.long().cpu().numpy())) pred_labels = [ '{}@{:.2f}'.format(n, s) for n, s in zip(pred_clsnms, pred_scores) ] true_labels = list( ub.take(harn.datasets['train'].label_names, true_cxs.long().cpu().numpy())) # --- inp_size = np.array(hwc01.shape[0:2][::-1]) true_boxes_ = util.Boxes(true_boxes.cpu().numpy(), 'cxywh').scale(inp_size).data pred_boxes_ = util.Boxes(pred_boxes.cpu().numpy(), 'cxywh').scale(inp_size).data from netharn.util import mplutil mplutil.figure(doclf=True, fnum=1) mplutil.imshow(hwc01, colorspace='rgb') mplutil.draw_boxes(true_boxes_, color='green', box_format='cxywh', labels=true_labels) mplutil.draw_boxes(pred_boxes_, color='blue', box_format='cxywh', labels=pred_labels)
def _measure_confusion(harn, postout, labels, inp_size): targets = labels[0] gt_weights = labels[1] orig_sizes = labels[2] indices = labels[3] bg_weights = labels[4] # def clip_boxes_to_letterbox(boxes, letterbox_tlbr): # if boxes.shape[0] == 0: # return boxes # boxes = boxes.copy() # left, top, right, bot = letterbox_tlbr # x1, y1, x2, y2 = boxes.T # np.minimum(x1, right, out=x1) # np.minimum(y1, bot, out=y1) # np.minimum(x2, right, out=x2) # np.minimum(y2, bot, out=y2) # np.maximum(x1, left, out=x1) # np.maximum(y1, top, out=y1) # np.maximum(x2, left, out=x2) # np.maximum(y2, top, out=y2) # return boxes def asnumpy(tensor): return tensor.data.cpu().numpy() bsize = len(labels[0]) for bx in range(bsize): postitem = asnumpy(postout[bx]) target = asnumpy(targets[bx]).reshape(-1, 5) true_cxywh = target[:, 1:5] true_cxs = target[:, 0] true_weight = asnumpy(gt_weights[bx]) # Remove padded truth flags = true_cxs != -1 true_cxywh = true_cxywh[flags] true_cxs = true_cxs[flags] true_weight = true_weight[flags] # orig_size = asnumpy(orig_sizes[bx]) # gx = int(asnumpy(indices[bx])) # how much do we care about the background in this image? bg_weight = float(asnumpy(bg_weights[bx])) # Unpack postprocessed predictions sboxes = postitem.reshape(-1, 6) pred_cxywh = sboxes[:, 0:4] pred_scores = sboxes[:, 4] pred_cxs = sboxes[:, 5].astype(np.int) true_tlbr = util.Boxes(true_cxywh, 'cxywh').to_tlbr() pred_tlbr = util.Boxes(pred_cxywh, 'cxywh').to_tlbr() # TODO: can we invert the letterbox transform here and clip for # some extra mAP? true_boxes = true_tlbr.data pred_boxes = pred_tlbr.data # if False: # # new letterbox transform makes this tricker, simply try and # # compare in 0-1 space for now. # # use max because of letterbox transform # lettered_orig_size = orig_size.max() # true_boxes = true_tlbr.scale(lettered_orig_size).data # pred_boxes = pred_tlbr.scale(lettered_orig_size).data # # Clip predicted boxes to the letterbox # shift, embed_size = letterbox_transform(orig_size, inp_size) # orig_lefttop = (shift / inp_size) * orig_size.max() # orig_rightbot = lettered_orig_size - orig_lefttop # letterbox_tlbr = list(orig_lefttop) + list(orig_rightbot) # pred_boxes = clip_boxes_to_letterbox(pred_boxes, letterbox_tlbr) y = nh.metrics.detection_confusions( true_boxes=true_boxes, true_cxs=true_cxs, true_weights=true_weight, pred_boxes=pred_boxes, pred_scores=pred_scores, pred_cxs=pred_cxs, bg_weight=bg_weight, bg_cls=-1, ovthresh=harn.hyper.other['ovthresh']) # y['gx'] = gx yield y