コード例 #1
0
    def _postout_to_pred_ann(harn,
                             inp_size,
                             labels,
                             postout,
                             _aidbase=1,
                             undo_lb=True):
        """ Convert batch predictions to coco-style annotations for scoring """
        indices = labels['indices']
        orig_sizes = labels['orig_sizes']
        letterbox = harn.datasets[harn.current_tag].letterbox

        MAX_DETS = None

        bsize = len(indices)

        _aids = it.count(_aidbase)
        for bx in range(bsize):
            postitem = postout[bx].data.cpu().numpy()
            orig_size = orig_sizes[bx].data.cpu().numpy()
            gx = int(indices[bx].data.cpu().numpy())

            # Unpack postprocessed predictions
            sboxes = postitem.reshape(-1, 6)
            pred_boxes_ = util.Boxes(sboxes[:, 0:4], 'cxywh').scale(inp_size)
            pred_scores = sboxes[:, 4]
            pred_cxs = sboxes[:, 5].astype(np.int)

            if undo_lb:
                pred_boxes = letterbox._boxes_letterbox_invert(
                    pred_boxes_, orig_size, inp_size)
            else:
                pred_boxes = pred_boxes_

            # sort predictions by descending score

            # Take at most MAX_DETS detections to evaulate
            _pred_sortx = pred_scores.argsort()[::-1][:MAX_DETS]

            _pred_boxes = pred_boxes.take(_pred_sortx,
                                          axis=0).to_xywh().data.tolist()
            _pred_cxs = pred_cxs.take(_pred_sortx, axis=0).tolist()
            _pred_scores = pred_scores.take(_pred_sortx, axis=0).tolist()
            for box, cx, score, aid in zip(_pred_boxes, _pred_cxs,
                                           _pred_scores, _aids):
                yield {
                    'id': aid,
                    'image_id': gx,
                    'category_id': cx,
                    'bbox': box,
                    'score': score
                }
コード例 #2
0
    def __init__(self,
                 num_classes,
                 anchors,
                 coord_scale=1.0,
                 noobject_scale=1.0,
                 object_scale=5.0,
                 class_scale=1.0,
                 thresh=0.6):
        super().__init__()

        self.num_classes = num_classes

        self.anchors = anchors
        self.num_anchors = len(anchors)

        # self.anchor_step = len(self.anchors) // self.num_anchors
        self.reduction = 32  # input_dim/output_dim

        self.coord_scale = coord_scale
        self.noobject_scale = noobject_scale
        self.object_scale = object_scale
        self.class_scale = class_scale
        self.thresh = thresh

        self.loss_coord = None
        self.loss_conf = None
        self.loss_cls = None
        self.loss_tot = None

        self.mse = nn.MSELoss(size_average=False)
        self.mse = nn.MSELoss(size_average=False)
        self.cls_critrion = nn.CrossEntropyLoss(size_average=False)

        nA = self.num_anchors
        self.anchor_w = torch.Tensor(self.anchors.T[0]).view(nA, 1)
        self.anchor_h = torch.Tensor(self.anchors.T[1]).view(nA, 1)

        rel_anchors_cxywh = util.Boxes(
            np.hstack([self.anchors * 0, self.anchors]).astype(np.float32),
            'cxywh')
        self.rel_anchors_tlbr = rel_anchors_cxywh.toformat('tlbr').data

        self._prev_pred_init = None
        self._prev_pred_dim = None

        self.iou_mode = None
コード例 #3
0
ファイル: util_cv2.py プロジェクト: Cookt2/netharn
def draw_boxes_on_image(img,
                        boxes,
                        color='blue',
                        thickness=1,
                        box_format=None,
                        colorspace='bgr'):
    """
    Draws boxes on an image.

    Args:
        img (ndarray): image to copy and draw on
        boxes (nh.util.Boxes): boxes to draw
        colorspace (str): string code of the input image colorspace

    Example:
        >>> from netharn import util
        >>> img = np.zeros((10, 10, 3), dtype=np.uint8)
        >>> color = 'dodgerblue'
        >>> thickness = 1
        >>> boxes = util.Boxes([[1, 1, 8, 8]], 'tlbr')
        >>> img2 = draw_boxes_on_image(img, boxes, color, thickness)
        >>> assert tuple(img2[1, 1]) == (255, 144, 30)
        >>> # xdoc: +REQUIRES(--show)
        >>> from netharn.util import mplutil
        >>> mplutil.autompl()  # xdoc: +SKIP
        >>> mplutil.figure(doclf=True, fnum=1)
        >>> mplutil.imshow(img2)
    """
    from netharn import util
    if not isinstance(boxes, util.Boxes):
        if box_format is None:
            raise ValueError('specify box_format')
        boxes = util.Boxes(boxes, box_format)

    color = tuple(util.Color(color).as255(colorspace))
    tlbr = boxes.to_tlbr().data
    img2 = img.copy()
    for x1, y1, x2, y2 in tlbr:
        # pt1 = (int(round(x1)), int(round(y1)))
        # pt2 = (int(round(x2)), int(round(y2)))
        pt1 = (int(x1), int(y1))
        pt2 = (int(x2), int(y2))
        img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness)
    return img2
コード例 #4
0
    def __init__(self,
                 num_classes,
                 anchors,
                 coord_scale=1.0,
                 noobject_scale=1.0,
                 object_scale=5.0,
                 class_scale=1.0,
                 thresh=0.6):
        super().__init__()

        self.num_classes = num_classes

        self.anchors = torch.Tensor(anchors)
        self.num_anchors = len(anchors)

        # self.anchor_step = len(self.anchors) // self.num_anchors
        self.reduction = 32  # input_dim/output_dim

        self.coord_scale = coord_scale
        self.noobject_scale = noobject_scale
        self.object_scale = object_scale
        self.class_scale = class_scale
        self.thresh = thresh

        self.loss_coord = None
        self.loss_conf = None
        self.loss_cls = None
        self.loss_tot = None

        self.coord_mse = nn.MSELoss(size_average=False)
        self.conf_mse = nn.MSELoss(size_average=False)
        self.cls_critrion = nn.CrossEntropyLoss(size_average=False)

        # Precompute relative anchors in tlbr format for iou computation
        rel_anchors_cxywh = torch.cat(
            [torch.zeros_like(self.anchors), self.anchors], 1)
        self.rel_anchors_boxes = util.Boxes(rel_anchors_cxywh, 'cxywh')

        self._prev_pred_init = None
        self._prev_pred_dim = None

        self.iou_mode = None
コード例 #5
0
    def __init__(self,
                 num_classes,
                 anchors,
                 coord_scale=1.0,
                 noobject_scale=1.0,
                 object_scale=5.0,
                 class_scale=1.0,
                 thresh=0.6,
                 seen_thresh=12800,
                 small_boxes=False,
                 mse_factor=0.5):
        super(RegionLoss, self).__init__()

        self.num_classes = num_classes

        self.seen_thresh = seen_thresh

        self.anchors = torch.Tensor(anchors)
        self.num_anchors = len(anchors)

        self.coord_scale = coord_scale
        self.noobject_scale = noobject_scale
        self.object_scale = object_scale
        self.class_scale = class_scale
        self.thresh = thresh

        self.loss_coord = None
        self.loss_conf = None
        self.loss_cls = None
        self.loss_tot = None

        self.coord_mse = nn.MSELoss(size_average=False)
        self.conf_mse = nn.MSELoss(size_average=False)
        self.cls_critrion = nn.CrossEntropyLoss(size_average=False)

        # Precompute relative anchors in tlbr format for iou computation
        rel_anchors_cxywh = torch.cat(
            [torch.zeros_like(self.anchors), self.anchors], 1)
        self.rel_anchors_boxes = util.Boxes(rel_anchors_cxywh, 'cxywh')

        self.small_boxes = small_boxes
        self.mse_factor = mse_factor
コード例 #6
0
def draw_boxes_on_image(img,
                        boxes,
                        color='blue',
                        thickness=1,
                        box_format=None):
    """

    Example:
        >>> from netharn import util
        >>> img = np.zeros((10, 10, 3), dtype=np.uint8)
        >>> color = 'blue'
        >>> thickness = 1
        >>> boxes = util.Boxes([[1, 1, 8, 8]], 'tlbr')
        >>> img2 = draw_boxes_on_image(img, boxes, color, thickness)
        >>> # xdoc: +REQUIRES(--show)
        >>> from netharn.util import mplutil
        >>> mplutil.qtensure()  # xdoc: +SKIP
        >>> mplutil.figure(doclf=True, fnum=1)
        >>> mplutil.imshow(img2)
    """
    from netharn import util
    if not isinstance(boxes, util.Boxes):
        if box_format is None:
            raise ValueError('specify box_format')
        boxes = util.Boxes(boxes, box_format)

    color = tuple(util.Color(color).as255('bgr'))
    tlbr = boxes.to_tlbr().data
    img2 = img.copy()
    for x1, y1, x2, y2 in tlbr:
        # pt1 = (int(round(x1)), int(round(y1)))
        # pt2 = (int(round(x2)), int(round(y2)))
        pt1 = (int(x1), int(y1))
        pt2 = (int(x2), int(y2))
        img2 = cv2.rectangle(img2, pt1, pt2, color, thickness=thickness)
    return img2
コード例 #7
0
    def build_targets(self,
                      pred_cxywh,
                      target,
                      nH,
                      nW,
                      seen=0,
                      gt_weights=None):
        """
        Compare prediction boxes and targets, convert targets to network output tensors

        Args:
            pred_cxywh (Tensor):   shape [B * A * W * H, 4] in normalized cxywh format
            target (Tensor): shape [B, max(gtannots), 4]

        CommandLine:
            python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1

        Example:
            >>> from netharn.models.yolo2.light_yolo import Yolo
            >>> from netharn.models.yolo2.light_region_loss import RegionLoss
            >>> torch.random.manual_seed(0)
            >>> network = Yolo(num_classes=2, conf_thresh=4e-2)
            >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors)
            >>> Win, Hin = 96, 96
            >>> nW, nH = 3, 3
            >>> target = torch.FloatTensor([])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> #pred_cxywh = torch.rand(90, 4)
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)

        Example:
            >>> from netharn.models.yolo2.light_region_loss import RegionLoss
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]])
            >>> self = RegionLoss(num_classes=2, anchors=anchors)
            >>> nW, nH = 2, 2
            >>> # true boxes for each item in the batch
            >>> # each box encodes class, center, width, and height
            >>> # coordinates are normalized in the range 0 to 1
            >>> # items in each batch are padded with dummy boxes with class_id=-1
            >>> target = torch.FloatTensor([
            >>>     # boxes for batch item 0 (it has no objects, note the pad!)
            >>>     [[-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0]],
            >>>     # boxes for batch item 1
            >>>     [[0, 0.50, 0.50, 1.00, 1.00],
            >>>      [1, 0.34, 0.32, 0.12, 0.32],
            >>>      [1, 0.32, 0.42, 0.22, 0.12]],
            >>> ])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)
        """
        gtempty = (target.numel() == 0)

        # Parameters
        nB = target.shape[0] if not gtempty else 0
        # nT = target.shape[1] if not gtempty else 0
        nA = self.num_anchors

        if nB == 0:
            # torch does not preserve shapes when any dimension goes to 0
            # fix nB if there is no groundtruth
            nB = int(len(pred_cxywh) / (nA * nH * nW))
        else:
            assert nB == int(len(pred_cxywh) /
                             (nA * nH * nW)), 'bad assumption'

        seen = seen + nB

        # Tensors
        device = self.get_device()

        # Put the groundtruth in a format comparable to output
        tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device)
        tconf = torch.zeros(nB, nA, 1, nH, nW, device=device)
        tcls = torch.zeros(nB, nA, 1, nH, nW, device=device)

        # Create weights to determine which outputs are punished
        # By default we punish all outputs for not having correct iou
        # objectness prediction. The other masks default to zero meaning that
        # by default we will not punish a prediction for having a different
        # coordinate or class label (later the groundtruths will override these
        # defaults for select grid cells and anchors)
        coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device)
        conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device)
        cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device).byte()

        # Default conf_mask to the noobject_scale
        conf_mask.fill_(self.noobject_scale)

        # encourage the network to predict boxes centered on the grid cells by
        # setting the default target xs and ys to be (.5, .5) (i.e. the
        # relative center of a grid cell) fill the mask with ones so all
        # outputs are punished for not predicting center anchor locations ---
        # unless tcoord is overriden by a real groundtruth target later on.
        if seen < 12800:
            # PJreddies version
            # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254

            # By default encourage the network to predict no shift
            tcoord[:, :, 0:2, :, :].fill_(0.5)
            # By default encourage the network to predict no scale (in logspace)
            tcoord[:, :, 0:2, :, :].fill_(0.0)
            # In the warmup phase we care about changing the coords to be
            # exactly the anchors if they don't predict anything, but the
            # weight is only 0.01, set it to 0.01 / self.coord_scale.
            # Note we will apply the required sqrt later
            coord_mask.fill_((0.01 / self.coord_scale))

        if gtempty:
            coord_mask = coord_mask.sqrt()
            conf_mask = conf_mask.sqrt()
            coord_mask = coord_mask.expand_as(tcoord)
            return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls

        # Put this back into a non-flat view
        pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4)
        pred_boxes = util.Boxes(pred_cxywh, 'cxywh')

        gt_class = target[..., 0].data
        gt_boxes_norm = util.Boxes(target[..., 1:5], 'cxywh')
        gt_boxes = gt_boxes_norm.scale([nW, nH])
        # Construct "relative" versions of the true boxes, centered at 0
        # This will allow them to be compared to the anchor boxes.
        rel_gt_boxes = gt_boxes.copy()
        rel_gt_boxes.data[..., 0:2] = 0

        # true boxes with a class of -1 are fillers, ignore them
        gt_isvalid = (gt_class >= 0)

        # Compute the grid cell for each groundtruth box
        true_xs, true_ys = gt_boxes.components[0:2]
        true_is = true_xs.long().clamp_(0, nW - 1)
        true_js = true_ys.long().clamp_(0, nH - 1)

        if gt_weights is None:
            # If unspecified give each groundtruth a default weight of 1
            gt_weights = torch.ones_like(target[..., 0], device=device)

        # Undocumented darknet detail: multiply coord weight by two minus the
        # area of the true box in normalized coordinates.  the square root is
        # because the weight.
        gt_coord_weights = (gt_weights * (2.0 - gt_boxes_norm.area[..., 0]))

        # Loop over ground_truths and construct tensors
        for bx in range(nB):
            # Get the actual groundtruth boxes for this batch item
            flags = gt_isvalid[bx]
            if not np.any(flags):
                continue

            # Batch ground truth
            batch_rel_gt_boxes = rel_gt_boxes[bx, flags]
            cur_gt_boxes = gt_boxes[bx, flags]
            cur_true_is = true_is[bx, flags]
            cur_true_js = true_js[bx, flags]
            cur_true_weights = gt_weights[bx, flags]
            cur_true_coord_weights = gt_coord_weights[bx, flags]

            # Batch predictions
            cur_pred_boxes = pred_boxes[bx]

            # Assign groundtruth boxes to anchor boxes
            anchor_ious = self.rel_anchors_boxes.ious(batch_rel_gt_boxes,
                                                      bias=0)
            _, best_anchor_idxs = anchor_ious.max(dim=0)  # best_ns in YOLO

            # Assign groundtruth boxes to predicted boxes
            ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0)
            cur_ious, _ = ious.max(dim=-1)

            # Set loss to zero for any predicted boxes that had a high iou with
            # a groundtruth target (we wont punish them for not being
            # background), One of these will be selected as the best and be
            # punished for not predicting the groundtruth value.
            conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0

            for t in range(cur_gt_boxes.shape[0]):
                gt_box_ = cur_gt_boxes[t]
                weight = cur_true_weights[t]
                # coord weights incorporate weight and true box area
                coord_weight = cur_true_coord_weights[t]

                # The assigned (best) anchor index
                ax = best_anchor_idxs[t].item()
                anchor_w, anchor_h = self.anchors[ax]

                # Compute this ground truth's grid cell
                gx, gy, gw, gh = gt_box_.data
                gi = cur_true_is[t].item()
                gj = cur_true_js[t].item()

                # The prediction will be punished if it does not match this true box
                # pred_box_ = cur_pred_boxes[best_n, gj, gi]

                # Get the precomputed iou of the truth with this box
                # corresponding to the assigned anchor and grid cell
                iou = ious[ax, gj, gi, t].item()

                # Mark that we will care about the predicted box with some weight
                coord_mask[bx, ax, 0, gj, gi] = coord_weight

                # PJReddie delta_region_class:
                # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L112
                # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L314
                cls_mask[bx, ax, 0, gj, gi] = int(weight > .5)

                conf_mask[bx, ax, 0, gj, gi] = self.object_scale * weight

                # The true box is converted into coordinates comparable to the
                # network outputs by:
                # (1) we center the true box on its assigned grid cell
                # (2) we divide its width and height by its assigned anchor
                # (3) we take the log of width and height because the raw
                #     network wh outputs are in logspace.
                tcoord[bx, ax, 0, gj, gi] = gx - gi
                tcoord[bx, ax, 1, gj, gi] = gy - gj
                tcoord[bx, ax, 2, gj, gi] = math.log(gw / anchor_w)
                tcoord[bx, ax, 3, gj, gi] = math.log(gh / anchor_h)
                tconf[bx, ax, 0, gj, gi] = iou  # if rescore else 1
                tcls[bx, ax, 0, gj, gi] = target[bx, t, 0]

        # because coord and conf masks are witin this MSE we need to sqrt them
        coord_mask = coord_mask.sqrt()
        conf_mask = conf_mask.sqrt()
        coord_mask = coord_mask.expand_as(tcoord)

        # masked_tcls = tcls[cls_mask].view(-1).long()
        # cls_probs_mask = cls_mask.reshape(nB, nA, nH, nW, 1).repeat(1, 1, 1, 1, nC)
        # cls_probs_mask = Variable(cls_probs_mask, requires_grad=False)
        # masked_cls_probs = cls_probs[cls_probs_mask].view(-1, nC)

        return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
コード例 #8
0
ファイル: torch_nms.py プロジェクト: jcfr/netharn
def torch_nms(tlbr, scores, classes=None, thresh=.5, bias=0, fast=False):
    """
    Non maximum suppression implemented with pytorch tensors

    CURRENTLY NOT WORKING

    Args:
        tlbr (Tensor): Bounding boxes of one image in the format (tlbr)
        scores (Tensor): Scores of each box
        classes (Tensor, optional): the classes of each box. If specified nms is applied to each class separately.
        thresh (float): iou threshold

    Returns:
        ByteTensor: keep: boolean array indicating which boxes were not pruned.

    Example:
        >>> # DISABLE_DOCTEST
        >>> import torch
        >>> import numpy as np
        >>> tlbr = torch.FloatTensor(np.array([
        >>>     [0, 0, 100, 100],
        >>>     [100, 100, 10, 10],
        >>>     [10, 10, 100, 100],
        >>>     [50, 50, 100, 100],
        >>>     [100, 100, 130, 130],
        >>>     [100, 100, 130, 130],
        >>>     [100, 100, 130, 130],
        >>> ], dtype=np.float32))
        >>> scores = torch.FloatTensor(np.array([.1, .5, .9, .1, .3, .5, .4]))
        >>> classes = torch.FloatTensor(np.array([0, 0, 0, 0, 0, 0]))
        >>> thresh = .5
        >>> keep = torch_nms(tlbr, scores, classes, thresh)
        >>> bboxes[keep]

    Example:
        >>> # DISABLE_DOCTEST
        >>> import torch
        >>> import numpy as np
        >>> # Test to check that conflicts are correctly resolved
        >>> tlbr = torch.FloatTensor(np.array([
        >>>     [100, 100, 150, 101],
        >>>     [120, 100, 180, 101],
        >>>     [150, 100, 200, 101],
        >>> ], dtype=np.float32))
        >>> scores = torch.FloatTensor(np.linspace(.8, .9, len(tlbr)))
        >>> classes = None
        >>> thresh = .3
        >>> keep = torch_nms(tlbr, scores, classes, thresh, fast=False)
        >>> bboxes[keep]
    """
    if tlbr.numel() == 0:
        return []

    # Sort coordinates by descending score
    ordered_scores, order = scores.sort(0, descending=True)

    from netharn import util
    boxes = util.Boxes(tlbr[order], 'tlbr')
    ious = boxes.ious(boxes, bias=bias)

    # if False:
    #     x1, y1, x2, y2 = tlbr[order].split(1, 1)

    #     # Compute dx and dy between each pair of boxes (these mat contain every pair twice...)
    #     dx = (x2.min(x2.t()) - x1.max(x1.t())).clamp_(min=0)
    #     dy = (y2.min(y2.t()) - y1.max(y1.t())).clamp_(min=0)

    #     # Compute iou
    #     intersections = dx * dy
    #     areas = (x2 - x1) * (y2 - y1)
    #     unions = (areas + areas.t()) - intersections
    #     ious = intersections / unions

    # Filter based on iou (and class)
    conflicting = (ious > thresh).triu(1)

    if classes is not None:
        ordered_classes = classes[order]
        same_class = (
            ordered_classes.unsqueeze(0) == ordered_classes.unsqueeze(1))
        conflicting = (conflicting & same_class)
    # Now we have a 2D matrix where conflicting[i, j] indicates if box[i]
    # conflicts with box[j]. For each box[i] we want to only keep the first
    # one that does not conflict with any other box[j].

    # Find out how many conflicts each ordered box has with other boxes that
    # have higher scores than it does. In other words...
    # n_conflicts[i] is the number of conflicts box[i] has with other boxes
    # that have a **higher score** than box[i] does. We will definately
    # keep any box where n_conflicts is 0, but we need to postprocess because
    # we might actually keep some boxes currently marked as conflicted.
    n_conflicts = conflicting.sum(0).byte()

    if not fast:
        # It is not enought to simply use all places where there are no
        # conflicts. Say we have boxes A, B, and C, where A conflicts with B,
        # B conflicts with C but A does not conflict with C. The fact that we
        # use A should mean that C is not longer conflicted.

        if True:
            # Marginally faster. best=618.2 us
            ordered_keep = np.zeros(len(conflicting), dtype=np.uint8)
            supress = np.zeros(len(conflicting), dtype=np.bool)
            for i, row in enumerate(conflicting.cpu().numpy() > 0):
                if not supress[i]:
                    ordered_keep[i] = 1
                    supress[row] = 1
            ordered_keep = torch.ByteTensor(ordered_keep).to(tlbr.device)
        else:
            # Marginally slower: best=1.382 ms,
            n_conflicts_post = n_conflicts.cpu()
            conflicting = conflicting.cpu()

            keep_len = len(n_conflicts_post) - 1
            for i in range(1, keep_len):
                if n_conflicts_post[i] > 0:
                    n_conflicts_post -= conflicting[i]

            n_conflicts = n_conflicts_post.to(n_conflicts.device)
            ordered_keep = (n_conflicts == 0)
    else:
        # Now we can simply keep any box that has no conflicts.
        ordered_keep = (n_conflicts == 0)

    # Unsort, so keep is aligned with input boxes
    keep = ordered_keep.new(*ordered_keep.size())
    keep.scatter_(0, order, ordered_keep)
    return keep
コード例 #9
0
    def __getitem__(self, index):
        """
        CommandLine:
            python ~/code/netharn/examples/yolo_voc.py YoloVOCDataset.__getitem__ --show

        Example:
            >>> # DISABLE_DOCTSET
            >>> import sys, ubelt
            >>> sys.path.append(ubelt.truepath('~/code/netharn/examples'))
            >>> from yolo_voc import *
            >>> self = YoloVOCDataset(split='train')
            >>> index = 7
            >>> chw01, label = self[index]
            >>> hwc01 = chw01.numpy().transpose(1, 2, 0)
            >>> print(hwc01.shape)
            >>> norm_boxes = label['targets'].numpy().reshape(-1, 5)[:, 1:5]
            >>> inp_size = hwc01.shape[-2::-1]
            >>> # xdoc: +REQUIRES(--show)
            >>> from netharn.util import mplutil
            >>> mplutil.figure(doclf=True, fnum=1)
            >>> mplutil.qtensure()  # xdoc: +SKIP
            >>> mplutil.imshow(hwc01, colorspace='rgb')
            >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size).data
            >>> mplutil.draw_boxes(inp_boxes, box_format='cxywh')
            >>> mplutil.show_if_requested()

        Example:
            >>> # DISABLE_DOCTSET
            >>> import sys, ubelt
            >>> sys.path.append(ubelt.truepath('~/code/netharn/examples'))
            >>> from yolo_voc import *
            >>> self = YoloVOCDataset(split='test')
            >>> index = 0
            >>> chw01, label = self[index]
            >>> hwc01 = chw01.numpy().transpose(1, 2, 0)
            >>> print(hwc01.shape)
            >>> norm_boxes = label[0].numpy().reshape(-1, 5)[:, 1:5]
            >>> inp_size = hwc01.shape[-2::-1]
            >>> # xdoc: +REQUIRES(--show)
            >>> from netharn.util import mplutil
            >>> mplutil.figure(doclf=True, fnum=1)
            >>> mplutil.qtensure()  # xdoc: +SKIP
            >>> mplutil.imshow(hwc01, colorspace='rgb')
            >>> inp_boxes = util.Boxes(norm_boxes, 'cxywh').scale(inp_size).data
            >>> mplutil.draw_boxes(inp_boxes, box_format='cxywh')
            >>> mplutil.show_if_requested()

        Ignore:
            >>> self = YoloVOCDataset(split='train')
            for index in ub.ProgIter(range(len(self))):
                chw01, label = self[index]
                target = label[0]
                wh = target[:, 3:5]
                if np.any(wh == 0):
                    raise ValueError()
                pass
            >>> # Check that we can collate this data
            >>> self = YoloVOCDataset(split='train')
            >>> inbatch = [self[index] for index in range(0, 16)]
            >>> from netharn.data import collate
            >>> batch = collate.padded_collate(inbatch)
            >>> inputs, labels = batch
            >>> assert len(labels) == len(inbatch[0][1])
            >>> targets = labels['targets']
            >>> orig_sizes = labels['orig_sizes']
            >>> gt_weights = labels['gt_weights']
            >>> indices = labels['indices']
            >>> bg_weights = labels['bg_weights']
            >>> assert list(target.shape) == [16, 6, 5]
            >>> assert list(gt_weights.shape) == [16, 6]
            >>> assert list(origsize.shape) == [16, 2]
            >>> assert list(index.shape) == [16, 1]
        """
        if isinstance(index, tuple):
            # Get size index from the batch loader
            index, size_index = index
            if size_index is None:
                inp_size = self.base_wh
            else:
                inp_size = self.multi_scale_inp_size[size_index]
        else:
            inp_size = self.base_wh
        inp_size = np.array(inp_size)

        image, tlbr, gt_classes, gt_weights = self._load_item(index)
        orig_size = np.array(image.shape[0:2][::-1])
        bbs = util.Boxes(tlbr, 'tlbr').to_imgaug(shape=image.shape)

        if self.augmenter:
            # Ensure the same augmentor is used for bboxes and iamges
            seq_det = self.augmenter.to_deterministic()

            image = seq_det.augment_image(image)
            bbs = seq_det.augment_bounding_boxes([bbs])[0]

            # Clip any bounding boxes that went out of bounds
            h, w = image.shape[0:2]
            tlbr = util.Boxes.from_imgaug(bbs)

            old_area = tlbr.area
            tlbr = tlbr.clip(0, 0, w - 1, h - 1, inplace=True)
            new_area = tlbr.area

            # Remove any boxes that have gone significantly out of bounds.
            remove_thresh = 0.1
            flags = (new_area / old_area).ravel() > remove_thresh

            tlbr = tlbr.compress(flags, inplace=True)
            gt_classes = gt_classes[flags]
            gt_weights = gt_weights[flags]

            bbs = tlbr.to_imgaug(shape=image.shape)

        # Apply letterbox resize transform to train and test
        self.letterbox.target_size = inp_size
        image = self.letterbox.augment_image(image)
        bbs = self.letterbox.augment_bounding_boxes([bbs])[0]
        tlbr_inp = util.Boxes.from_imgaug(bbs)

        # Remove any boxes that are no longer visible or out of bounds
        flags = (tlbr_inp.area > 0).ravel()
        tlbr_inp = tlbr_inp.compress(flags, inplace=True)
        gt_classes = gt_classes[flags]
        gt_weights = gt_weights[flags]

        chw01 = torch.FloatTensor(image.transpose(2, 0, 1) / 255.0)

        # Lightnet YOLO accepts truth tensors in the format:
        # [class_id, center_x, center_y, w, h]
        # where coordinates are noramlized between 0 and 1
        cxywh_norm = tlbr_inp.toformat('cxywh').scale(1 / inp_size)
        _target_parts = [gt_classes[:, None], cxywh_norm.data]
        target = np.concatenate(_target_parts, axis=-1)
        target = torch.FloatTensor(target)

        # Return index information in the label as well
        orig_size = torch.LongTensor(orig_size)
        index = torch.LongTensor([index])
        # how much do we care about each annotation in this image?
        gt_weights = torch.FloatTensor(gt_weights)
        # how much do we care about the background in this image?
        bg_weight = torch.FloatTensor([1.0])
        label = {
            'targets': target,
            'gt_weights': gt_weights,
            'orig_sizes': orig_size,
            'indices': index,
            'bg_weights': bg_weight
        }
        return chw01, label
コード例 #10
0
ファイル: detections.py プロジェクト: jcfr/netharn
def detection_confusions(true_boxes,
                         true_cxs,
                         true_weights,
                         pred_boxes,
                         pred_scores,
                         pred_cxs,
                         bg_weight=1.0,
                         ovthresh=0.5,
                         bg_cls=-1,
                         bias=0.0):
    """ Classify detections by assigning to groundtruth boxes.

    Given predictions and truth for an image return (y_pred, y_true,
    y_score), which is suitable for sklearn classification metrics

    Args:
        true_boxes (ndarray): boxes in tlbr format
        true_cxs (ndarray): classes of each box
        true_weights (ndarray): weight of this each groundtruth item
        pred_boxes (ndarray): predicted boxes in tlbr format
        pred_scores (ndarray): scores for each prediction
        pred_cxs (ndarray): class predictions
        ovthresh (float): overlap threshold

        bg_weight (ndarray): weight of background predictions
          (default=1)

        single_class(): if True, considers this to be a binary problem
        bias : for computing overlap either 1 or 0

    Returns:
        dict: with relevant clf information

    Ignore:
        from xinspect.dynamic_kwargs import get_func_kwargs
        globals().update(get_func_kwargs(detection_confusions))

    Example:
        >>> from netharn.metrics.detections import *
        >>> from netharn.metrics.detections import _ave_precision, pr_curves
        >>> true_boxes = np.array([[ 0,  0, 10, 10],
        >>>                        [10,  0, 20, 10],
        >>>                        [10,  0, 20, 10],
        >>>                        [20,  0, 30, 10]])
        >>> true_weights = np.array([1, 0, .9, 1])
        >>> bg_weight = 1.0
        >>> true_cxs = np.array([0, 0, 1, 1])
        >>> pred_boxes = np.array([[6, 2, 20, 10],
        >>>                        [3,  2, 9, 7],
        >>>                        [20,  0, 30, 10]])
        >>> pred_scores = np.array([.5, .5, .5])
        >>> pred_cxs = np.array([0, 0, 1])
        >>> y = detection_confusions(true_boxes, true_cxs, true_weights,
        >>>                          pred_boxes, pred_scores, pred_cxs,
        >>>                          bg_weight=bg_weight, ovthresh=.5)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
           pred  true  score  weight  cx  txs  pxs
        0     1     1 0.5000  1.0000   1    3    2
        1     0    -1 0.5000  1.0000   0   -1    1
        2     0     0 0.5000  0.0000   0    1    0
        3    -1     0 0.0000  1.0000   0    0   -1
        4    -1     1 0.0000  0.9000   1    2   -1

    Example:
        >>> true_boxes = np.array([[ 0,  0, 10, 10],
        >>>                        [10,  0, 20, 10],
        >>>                        [10,  0, 20, 10],
        >>>                        [20,  0, 30, 10]])
        >>> true_weights = np.array([1, 0.0, 1, 1.0])
        >>> bg_weight = 1.0
        >>> true_cxs = np.array([0, 0, 1, 1])
        >>> pred_boxes = np.array([[6, 2, 20, 10],
        >>>                        [3,  2, 9, 7],
        >>>                        [20,  0, 30, 10]])
        >>> pred_scores = np.array([.5, .6, .7])
        >>> pred_cxs = np.array([0, 0, 1])
        >>> y = detection_confusions(true_boxes, true_cxs, true_weights,
        >>>                          pred_boxes, pred_scores, pred_cxs,
        >>>                          bg_weight=bg_weight, ovthresh=.5)
        >>> y = pd.DataFrame(y)
        >>> print(y)  # xdoc: +IGNORE_WANT
    """
    y_pred = []
    y_true = []
    y_score = []
    y_weight = []
    cxs = []

    y_pxs = []
    y_txs = []

    if bg_weight is None:
        bg_weight = 1.0

    if False:
        if isinstance(true_boxes, util.Boxes):
            true_boxes = true_boxes.data
        if isinstance(pred_boxes, util.Boxes):
            pred_boxes = pred_boxes.data
    else:
        if not isinstance(true_boxes, util.Boxes):
            true_boxes = util.Boxes(true_boxes, 'tlbr')
        if not isinstance(pred_boxes, util.Boxes):
            pred_boxes = util.Boxes(pred_boxes, 'tlbr')

    # Keep track of which true items have been used
    true_unused = np.ones(len(true_cxs), dtype=np.bool)
    if true_weights is None:
        true_weights = np.ones(len(true_cxs))
    else:
        true_weights = np.array(true_weights)
    pred_scores = np.array(pred_scores)
    pred_cxs = np.array(pred_cxs)
    true_cxs = np.array(true_cxs)

    # Group true boxes by class
    # Keep track which true boxes are unused / not assigned
    cx_to_idxs = ub.group_items(range(len(true_cxs)), true_cxs)
    cx_to_tboxes = util.group_items(true_boxes, true_cxs, axis=0)
    cx_to_tweight = util.group_items(true_weights, true_cxs, axis=0)

    # cx_to_boxes = ub.group_items(true_boxes, true_cxs)
    # cx_to_boxes = ub.map_vals(np.array, cx_to_boxes)

    # sort predictions by descending score
    _pred_sortx = pred_scores.argsort()[::-1]
    _pred_boxes = pred_boxes.take(_pred_sortx, axis=0)
    _pred_cxs = pred_cxs.take(_pred_sortx, axis=0)
    _pred_scores = pred_scores.take(_pred_sortx, axis=0)

    # For each predicted detection box
    # Allow it to match the truth of a particular class
    for px, cx, box, score in zip(_pred_sortx, _pred_cxs, _pred_boxes,
                                  _pred_scores):
        cls_true_idxs = cx_to_idxs.get(cx, [])

        ovmax = -np.inf
        ovidx = None
        weight = bg_weight
        tx = None  # we will set this to the index of the assignd gt

        if len(cls_true_idxs):
            cls_true_boxes = cx_to_tboxes[cx]
            cls_true_weights = cx_to_tweight[cx]
            # cls_true_boxes = true_boxes.take(cls_true_idxs, axis=0)
            # cls_true_weights = true_weights.take(cls_true_idxs, axis=0)

            overlaps = cls_true_boxes.ious(box, bias=bias)

            # choose best score by default
            ovidx = overlaps.argsort()[-1]
            ovmax = overlaps[ovidx]
            weight = cls_true_weights[ovidx]
            tx = cls_true_idxs[ovidx]

        if ovmax > ovthresh and true_unused[tx]:
            # Assign this prediction to a groundtruth object
            # Mark this prediction as a true positive
            y_pred.append(cx)
            y_true.append(cx)
            y_score.append(score)
            y_weight.append(weight)
            cxs.append(cx)
            # cls_unused[ovidx] = False

            tx = cls_true_idxs[ovidx]
            true_unused[tx] = False

            y_pxs.append(px)
            y_txs.append(tx)
        else:
            # Assign this prediction to a the background
            # Mark this prediction as a false positive
            y_pred.append(cx)
            y_true.append(bg_cls)  # use -1 as background ignore class
            y_score.append(score)
            y_weight.append(bg_weight)
            cxs.append(cx)

            tx = -1
            y_pxs.append(px)
            y_txs.append(tx)

    # All pred boxes have been assigned to a truth box or the background.
    # Mark unused true boxes we failed to predict as false negatives
    for tx in np.where(true_unused)[0]:
        # Mark each unmatched truth as a false negative
        y_pred.append(-1)
        y_true.append(true_cxs[tx])
        y_score.append(0.0)
        y_weight.append(true_weights[tx])
        cxs.append(true_cxs[tx])

        px = -1
        y_pxs.append(px)
        y_txs.append(tx)

    y = {
        'pred': y_pred,
        'true': y_true,
        'score': y_score,
        'weight': y_weight,
        'cx': cxs,
        'txs': y_txs,  # index into the original true box for this row
        'pxs': y_pxs,  # index into the original pred box for this row
    }
    # print('y = {}'.format(ub.repr2(y, nl=1)))
    # y = pd.DataFrame(y)
    return y
コード例 #11
0
    def _build_targets_tensor(self,
                              pred_boxes,
                              ground_truth,
                              nH,
                              nW,
                              seen=0,
                              gt_weights=None):
        """
        Compare prediction boxes and ground truths, convert ground truths to network output tensors

        Example:
            >>> from netharn.models.yolo2.light_yolo import Yolo
            >>> from netharn.models.yolo2.light_region_loss import RegionLoss
            >>> torch.random.manual_seed(0)
            >>> network = Yolo(num_classes=2, conf_thresh=4e-2)
            >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors)
            >>> Win, Hin = 96, 96
            >>> nW, nH = 3, 3
            >>> # true boxes for each item in the batch
            >>> # each box encodes class, center, width, and height
            >>> # coordinates are normalized in the range 0 to 1
            >>> # items in each batch are padded with dummy boxes with class_id=-1
            >>> ground_truth = torch.FloatTensor([
            >>>     # boxes for batch item 0 (it has no objects, note the pad!)
            >>>     [[-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0]],
            >>>     # boxes for batch item 1
            >>>     [[0, 0.50, 0.50, 1.00, 1.00],
            >>>      [1, 0.34, 0.32, 0.12, 0.32],
            >>>      [1, 0.32, 0.42, 0.22, 0.12]],
            >>> ])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> pred_boxes = torch.rand(90, 4)
            >>> seen = 0
        """
        # Parameters
        nB = ground_truth.size(0)
        nT = ground_truth.size(1)
        nA = self.num_anchors
        nAnchors = nA * nH * nW
        nPixels = nH * nW

        seen = seen + nB

        # Tensors
        conf_mask = torch.ones(nB, nA, nPixels) * self.noobject_scale
        coord_mask = torch.zeros(nB, nA, 1, nPixels)
        cls_mask = torch.zeros(nB, nA, nPixels).byte()
        tcoord = torch.zeros(nB, nA, 4, nPixels)
        tconf = torch.zeros(nB, nA, nPixels)
        tcls = torch.zeros(nB, nA, nPixels)

        if seen < 12800:
            coord_mask.fill_(1)
            tcoord[:, :, 0].fill_(0.5)
            tcoord[:, :, 1].fill_(0.5)

        pred_cxywh = pred_boxes
        pred_tlbr = util.Boxes(pred_cxywh.data.cpu().numpy(),
                               'cxywh').toformat('tlbr').data

        gt_class = ground_truth[..., 0].data.cpu().numpy()
        gt_cxywh = util.Boxes(
            ground_truth[..., 1:5].data.cpu().numpy().astype(np.float32),
            'cxywh').scale([nW, nH])

        gt_tlbr = gt_cxywh.to_tlbr().data

        rel_gt_cxywh = gt_cxywh.copy()
        rel_gt_cxywh.data.T[0:2] = 0

        rel_gt_tlbr = rel_gt_cxywh.toformat('tlbr').data

        gt_isvalid = (gt_class >= 0)

        # Loop over ground_truths and construct tensors
        for bx in range(nB):
            # Get the actual groundtruth boxes for this batch item
            flags = gt_isvalid[bx]
            if not np.any(flags):
                continue

            # Create gt anchor assignments
            batch_rel_gt_tlbr = rel_gt_tlbr[bx][flags]
            anchor_ious = util.box_ious(self.rel_anchors_tlbr,
                                        batch_rel_gt_tlbr,
                                        bias=0,
                                        mode=self.iou_mode)
            best_ns = np.argmax(anchor_ious, axis=0)

            # Setting confidence mask
            cur_pred_tlbr = pred_tlbr[bx * nAnchors:(bx + 1) * nAnchors]
            cur_gt_tlbr = gt_tlbr[bx][flags]

            ious = util.box_ious(cur_pred_tlbr,
                                 cur_gt_tlbr,
                                 bias=0,
                                 mode=self.iou_mode)
            cur_ious = torch.FloatTensor(ious.max(-1))
            conf_mask[bx].view(-1)[cur_ious > self.thresh] = 0

            for t in range(nT):
                if not flags[t]:
                    break

                if gt_weights is None:
                    weight = 1
                else:
                    weight = gt_weights[bx][t]

                gx, gy, gw, gh = gt_cxywh.data[bx][t]
                gi = min(nW - 1, max(0, int(gx)))
                gj = min(nH - 1, max(0, int(gy)))

                best_n = best_ns[t]

                gt_box_ = gt_tlbr[bx][t]
                pred_box_ = pred_tlbr[bx * nAnchors + best_n * nPixels +
                                      gj * nW + gi]

                iou = float(
                    util.box_ious(gt_box_[None, :],
                                  pred_box_[None, :],
                                  bias=0,
                                  mode=self.iou_mode)[0, 0])

                best_anchor = self.anchors[best_n]
                best_aw, best_ah = best_anchor

                if weight == 0:
                    # HACK: Only allow weight == 0 and weight == 1 for now
                    # TODO:
                    #    - [ ] Allow for continuous weights
                    #    - [ ] Allow for per-image background weight
                    conf_mask[bx, best_n, gj * nW + gi] = 0
                else:
                    assert weight == 1, 'can only have weight in {0, 1} for now'
                    coord_mask[bx, best_n, 0, gj * nW + gi] = 1
                    cls_mask[bx, best_n, gj * nW + gi] = 1
                    conf_mask[bx, best_n, gj * nW + gi] = self.object_scale

                    tcoord[bx, best_n, 0, gj * nW + gi] = gx - gi
                    tcoord[bx, best_n, 1, gj * nW + gi] = gy - gj
                    tcoord[bx, best_n, 2,
                           gj * nW + gi] = math.log(gw / best_aw)
                    tcoord[bx, best_n, 3,
                           gj * nW + gi] = math.log(gh / best_ah)
                    tconf[bx, best_n, gj * nW + gi] = iou
                    tcls[bx, best_n, gj * nW + gi] = ground_truth[bx, t, 0]

        return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
コード例 #12
0
    def build_targets(self,
                      pred_cxywh,
                      target,
                      nH,
                      nW,
                      seen=0,
                      gt_weights=None):
        """
        Compare prediction boxes and targets, convert targets to network output tensors

        Args:
            pred_cxywh (Tensor):   shape [B * A * W * H, 4] in normalized cxywh format
            target (Tensor): shape [B, max(gtannots), 4]

        CommandLine:
            python ~/code/netharn/netharn/models/yolo2/light_region_loss.py RegionLoss.build_targets:1

        Example:
            >>> from netharn.models.yolo2.light_yolo import Yolo
            >>> from netharn.models.yolo2.light_region_loss import RegionLoss
            >>> torch.random.manual_seed(0)
            >>> network = Yolo(num_classes=2, conf_thresh=4e-2)
            >>> self = RegionLoss(num_classes=network.num_classes, anchors=network.anchors)
            >>> Win, Hin = 96, 96
            >>> nW, nH = 3, 3
            >>> target = torch.FloatTensor([])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> #pred_cxywh = torch.rand(90, 4)
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(self.anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)

        Example:
            >>> from netharn.models.yolo2.light_region_loss import RegionLoss
            >>> torch.random.manual_seed(0)
            >>> anchors = np.array([[.75, .75], [1.0, .3], [.3, 1.0]])
            >>> self = RegionLoss(num_classes=2, anchors=anchors)
            >>> nW, nH = 2, 2
            >>> # true boxes for each item in the batch
            >>> # each box encodes class, center, width, and height
            >>> # coordinates are normalized in the range 0 to 1
            >>> # items in each batch are padded with dummy boxes with class_id=-1
            >>> target = torch.FloatTensor([
            >>>     # boxes for batch item 0 (it has no objects, note the pad!)
            >>>     [[-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0],
            >>>      [-1, 0, 0, 0, 0]],
            >>>     # boxes for batch item 1
            >>>     [[0, 0.50, 0.50, 1.00, 1.00],
            >>>      [1, 0.34, 0.32, 0.12, 0.32],
            >>>      [1, 0.32, 0.42, 0.22, 0.12]],
            >>> ])
            >>> gt_weights = torch.FloatTensor([[-1, -1, -1], [1, 1, 0]])
            >>> nB = len(gt_weights)
            >>> pred_cxywh = torch.rand(nB, len(anchors), nH, nW, 4).view(-1, 4)
            >>> seen = 0
            >>> coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls = self.build_targets(pred_cxywh, target, nH, nW, seen, gt_weights)
        """
        gtempty = (target.numel() == 0)

        # Parameters
        nB = target.shape[0] if not gtempty else 0
        # nT = target.shape[1] if not gtempty else 0
        nA = self.num_anchors

        nPixels = nW * nH

        if nB == 0:
            # torch does not preserve shapes when any dimension goes to 0
            # fix nB if there is no groundtruth
            nB = int(len(pred_cxywh) / (nA * nH * nW))
        else:
            assert nB == int(len(pred_cxywh) /
                             (nA * nH * nW)), 'bad assumption'

        seen = seen + nB

        # Tensors
        device = self.get_device()

        # Put the groundtruth in a format comparable to output
        tcoord = torch.zeros(nB, nA, 4, nH, nW, device=device)
        tconf = torch.zeros(nB, nA, 1, nH, nW, device=device)
        tcls = torch.zeros(nB, nA, 1, nH, nW, device=device)

        # Create weights to determine which outputs are punished
        # By default we punish all outputs for not having correct iou
        # objectness prediction. The other masks default to zero meaning that
        # by default we will not punish a prediction for having a different
        # coordinate or class label (later the groundtruths will override these
        # defaults for select grid cells and anchors)
        coord_mask = torch.zeros(nB, nA, 1, nH, nW, device=device)
        conf_mask = torch.ones(nB, nA, 1, nH, nW, device=device)
        cls_mask = torch.zeros(nB, nA, 1, nH, nW, device=device).byte()

        # Default conf_mask to the noobject_scale
        conf_mask.fill_(self.noobject_scale)

        # encourage the network to predict boxes centered on the grid cells by
        # setting the default target xs and ys to be (.5, .5) (i.e. the
        # relative center of a grid cell) fill the mask with ones so all
        # outputs are punished for not predicting center anchor locations ---
        # unless tcoord is overriden by a real groundtruth target later on.
        if seen < self.seen_thresh:
            # PJreddies version
            # https://github.com/pjreddie/darknet/blob/master/src/region_layer.c#L254

            # By default encourage the network to predict no shift
            tcoord[:, :, 0:2, :, :].fill_(0.5)
            # By default encourage the network to predict no scale (in logspace)
            tcoord[:, :, 2:4, :, :].fill_(0.0)

            if False:
                # In the warmup phase we care about changing the coords to be
                # exactly the anchors if they don't predict anything, but the
                # weight is only 0.01, set it to 0.01 / self.coord_scale.
                # Note we will apply the required sqrt later
                coord_mask.fill_((0.01 / self.coord_scale))
                # This hurts even thought it seems like its what darknet does
            else:
                coord_mask.fill_(1)

        if gtempty:
            coord_mask = coord_mask.sqrt()
            conf_mask = conf_mask.sqrt()
            coord_mask = coord_mask.expand_as(tcoord)
            return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls

        # Put this back into a non-flat view
        pred_cxywh = pred_cxywh.view(nB, nA, nH, nW, 4)
        pred_boxes = util.Boxes(pred_cxywh, 'cxywh')

        gt_class = target[..., 0].data
        gt_boxes_norm = util.Boxes(target[..., 1:5], 'cxywh')

        # Put GT boxes into output coordinates
        gt_boxes = gt_boxes_norm.scale([nW, nH])
        # Construct "relative" versions of the true boxes, centered at 0
        # This will allow them to be compared to the anchor boxes.
        rel_gt_boxes = gt_boxes.copy()
        rel_gt_boxes.data[..., 0:2] = 0

        # true boxes with a class of -1 are fillers, ignore them
        gt_isvalid = (gt_class >= 0)
        batch_nT = gt_isvalid.sum(dim=1).cpu().numpy()

        # Compute the grid cell for each groundtruth box
        true_xs = gt_boxes.data[..., 0]
        true_ys = gt_boxes.data[..., 1]
        true_is = true_xs.long().clamp_(0, nW - 1)
        true_js = true_ys.long().clamp_(0, nH - 1)

        if gt_weights is None:
            # If unspecified give each groundtruth a default weight of 1
            gt_weights = torch.ones_like(target[..., 0], device=device)

        # Undocumented darknet detail: multiply coord weight by two minus the
        # area of the true box in normalized coordinates.  the square root is
        # because the weight.
        if self.small_boxes:
            gt_coord_weights = (gt_weights *
                                (2.0 - gt_boxes_norm.area[..., 0]))
        else:
            gt_coord_weights = gt_weights
        # Pre multiply weights with object scales
        gt_conf_weights = gt_weights * self.object_scale
        # Pre threshold classification weights
        gt_cls_weights = (gt_weights > .5)

        # Loop over ground_truths and construct tensors
        for bx in range(nB):
            # Get the actual groundtruth boxes for this batch item
            nT = batch_nT[bx]
            if nT == 0:
                continue

            # Batch ground truth
            cur_rel_gt_boxes = rel_gt_boxes[bx, 0:nT]
            cur_gt_boxes = gt_boxes[bx, 0:nT]
            cur_gt_cls = target[bx, 0:nT, 0]
            # scalars, one for each true object
            cur_true_is = true_is[bx, 0:nT]
            cur_true_js = true_js[bx, 0:nT]
            cur_true_coord_weights = gt_coord_weights[bx, 0:nT]
            cur_true_conf_weights = gt_conf_weights[bx, 0:nT]
            cur_true_cls_weights = gt_cls_weights[bx, 0:nT]

            cur_gx, cur_gy, cur_gw, cur_gh = cur_gt_boxes.data.t()

            # Batch predictions
            cur_pred_boxes = pred_boxes[bx]

            # NOTE: IOU computation is the bottleneck in this function

            # Assign groundtruth boxes to anchor boxes
            cur_anchor_gt_ious = self.rel_anchors_boxes.ious(cur_rel_gt_boxes,
                                                             bias=0)
            _, cur_true_anchor_axs = cur_anchor_gt_ious.max(
                dim=0)  # best_ns in YOLO

            # Get the anchor (w,h) assigned to each true object
            cur_true_anchor_w, cur_true_anchor_h = self.anchors[
                cur_true_anchor_axs].t()

            # Find the IOU of each predicted box with the groundtruth
            cur_pred_true_ious = cur_pred_boxes.ious(cur_gt_boxes, bias=0)
            # Assign groundtruth boxes to predicted boxes
            cur_ious, _ = cur_pred_true_ious.max(dim=-1)

            # Set loss to zero for any predicted boxes that had a high iou with
            # a groundtruth target (we wont punish them for not being
            # background), One of these will be selected as the best and be
            # punished for not predicting the groundtruth value.
            conf_mask[bx].view(-1)[cur_ious.view(-1) > self.thresh] = 0

            ####
            # Broadcast the loop over true boxes
            ####
            # Convert the true box coordinates to be comparable with pred output
            # * translate each gtbox to be relative to its assignd gridcell
            # * make w/h relative to anchor box w / h and convert to logspace
            cur_tcoord_x = cur_gx - cur_true_is.float()
            cur_tcoord_y = cur_gy - cur_true_js.float()
            cur_tcoord_w = (cur_gw / cur_true_anchor_w).log()
            cur_tcoord_h = (cur_gh / cur_true_anchor_h).log()

            iou_raveled_idxs = np.ravel_multi_index(
                [cur_true_anchor_axs, cur_true_js, cur_true_is,
                 np.arange(nT)], cur_pred_true_ious.shape)
            # Get the ious with the assigned boxes for each truth
            cur_true_ious = cur_pred_true_ious.view(-1)[iou_raveled_idxs]

            raveled_idxs = np.ravel_multi_index(
                [[bx], cur_true_anchor_axs, [0], cur_true_js, cur_true_is],
                coord_mask.shape)

            # --------------------------------------------
            raveled_idxs_b0 = np.ravel_multi_index(
                [[bx], cur_true_anchor_axs, [0], cur_true_js, cur_true_is],
                tcoord.shape)
            # A bit faster than ravel_multi_indexes with [1], [2], and [3]
            raveled_idxs_b1 = raveled_idxs_b0 + nPixels
            raveled_idxs_b2 = raveled_idxs_b0 + nPixels * 2
            raveled_idxs_b3 = raveled_idxs_b0 + nPixels * 3
            # --------------------------------------------

            coord_mask.view(-1)[raveled_idxs] = cur_true_coord_weights
            cls_mask.view(-1)[raveled_idxs] = cur_true_cls_weights
            conf_mask.view(-1)[raveled_idxs] = cur_true_conf_weights

            tcoord.view(-1)[raveled_idxs_b0] = cur_tcoord_x
            tcoord.view(-1)[raveled_idxs_b1] = cur_tcoord_y
            tcoord.view(-1)[raveled_idxs_b2] = cur_tcoord_w
            tcoord.view(-1)[raveled_idxs_b3] = cur_tcoord_h

            tcls.view(-1)[raveled_idxs] = cur_gt_cls
            tconf.view(-1)[raveled_idxs] = cur_true_ious

        # because coord and conf masks are witin this MSE we need to sqrt them
        coord_mask = coord_mask.sqrt()
        conf_mask = conf_mask.sqrt()
        coord_mask = coord_mask.expand_as(tcoord)
        return coord_mask, conf_mask, cls_mask, tcoord, tconf, tcls
コード例 #13
0
 def undo_letterbox(cxywh):
     boxes = util.Boxes(cxywh, 'cxywh')
     letterbox = harn.datasets['train'].letterbox
     return letterbox._boxes_letterbox_invert(boxes, orig_size,
                                              inp_size)
コード例 #14
0
    def _measure_confusion(harn, postout, labels, inp_size, **kw):
        targets = labels['targets']
        gt_weights = labels['gt_weights']
        bg_weights = labels['bg_weights']

        # orig_sizes = labels['orig_sizes']
        # indices = labels['indices']

        def asnumpy(tensor):
            return tensor.data.cpu().numpy()

        bsize = len(targets)
        for bx in range(bsize):
            postitem = asnumpy(postout[bx])
            target = asnumpy(targets[bx]).reshape(-1, 5)
            true_cxywh = target[:, 1:5]
            true_cxs = target[:, 0]
            true_weight = asnumpy(gt_weights[bx])

            # Remove padded truth
            flags = true_cxs != -1
            true_cxywh = true_cxywh[flags]
            true_cxs = true_cxs[flags]
            true_weight = true_weight[flags]

            # orig_size    = asnumpy(orig_sizes[bx])
            # gx           = int(asnumpy(indices[bx]))

            # how much do we care about the background in this image?
            bg_weight = float(asnumpy(bg_weights[bx]))

            # Unpack postprocessed predictions
            sboxes = postitem.reshape(-1, 6)
            pred_cxywh = sboxes[:, 0:4]
            pred_scores = sboxes[:, 4]
            pred_cxs = sboxes[:, 5].astype(np.int)

            true_tlbr = util.Boxes(true_cxywh, 'cxywh').to_tlbr()
            pred_tlbr = util.Boxes(pred_cxywh, 'cxywh').to_tlbr()

            true_tlbr = true_tlbr.scale(inp_size)
            pred_tlbr = pred_tlbr.scale(inp_size)

            # TODO: can we invert the letterbox transform here and clip for
            # some extra mAP?
            true_boxes = true_tlbr.data
            pred_boxes = pred_tlbr.data

            y = nh.metrics.detection_confusions(
                true_boxes=true_boxes,
                true_cxs=true_cxs,
                true_weights=true_weight,
                pred_boxes=pred_boxes,
                pred_scores=pred_scores,
                pred_cxs=pred_cxs,
                bg_weight=bg_weight,
                bg_cls=-1,
                ovthresh=harn.hyper.other['ovthresh'],
                **kw)
            # y['gx'] = gx
            yield y
コード例 #15
0
    def visualize_prediction(harn,
                             batch,
                             outputs,
                             postout,
                             idx=0,
                             thresh=None):
        """
        Returns:
            np.ndarray: numpy image
        """
        # xdoc: +REQUIRES(--show)
        inputs, labels = batch
        targets, gt_weights, orig_sizes, indices, bg_weights = labels
        chw01 = inputs[idx]
        target = targets[idx]
        postitem = postout[idx]
        # ---
        hwc01 = chw01.cpu().numpy().transpose(1, 2, 0)
        # TRUE
        true_cxs = target[:, 0].long()
        true_boxes = target[:, 1:5]
        flags = true_cxs != -1
        true_boxes = true_boxes[flags]
        true_cxs = true_cxs[flags]
        # PRED
        pred_boxes = postitem[:, 0:4]
        pred_scores = postitem[:, 4]
        pred_cxs = postitem[:, 5]

        if thresh is not None:
            flags = pred_scores > thresh
            pred_cxs = pred_cxs[flags]
            pred_boxes = pred_boxes[flags]
            pred_scores = pred_scores[flags]

        pred_clsnms = list(
            ub.take(harn.datasets['train'].label_names,
                    pred_cxs.long().cpu().numpy()))
        pred_labels = [
            '{}@{:.2f}'.format(n, s) for n, s in zip(pred_clsnms, pred_scores)
        ]

        true_labels = list(
            ub.take(harn.datasets['train'].label_names,
                    true_cxs.long().cpu().numpy()))

        # ---
        inp_size = np.array(hwc01.shape[0:2][::-1])
        true_boxes_ = util.Boxes(true_boxes.cpu().numpy(),
                                 'cxywh').scale(inp_size).data
        pred_boxes_ = util.Boxes(pred_boxes.cpu().numpy(),
                                 'cxywh').scale(inp_size).data
        from netharn.util import mplutil

        mplutil.figure(doclf=True, fnum=1)
        mplutil.imshow(hwc01, colorspace='rgb')
        mplutil.draw_boxes(true_boxes_,
                           color='green',
                           box_format='cxywh',
                           labels=true_labels)
        mplutil.draw_boxes(pred_boxes_,
                           color='blue',
                           box_format='cxywh',
                           labels=pred_labels)
コード例 #16
0
    def _measure_confusion(harn, postout, labels, inp_size):
        targets = labels[0]
        gt_weights = labels[1]
        orig_sizes = labels[2]
        indices = labels[3]
        bg_weights = labels[4]

        # def clip_boxes_to_letterbox(boxes, letterbox_tlbr):
        #     if boxes.shape[0] == 0:
        #         return boxes

        #     boxes = boxes.copy()
        #     left, top, right, bot = letterbox_tlbr
        #     x1, y1, x2, y2 = boxes.T
        #     np.minimum(x1, right, out=x1)
        #     np.minimum(y1, bot, out=y1)
        #     np.minimum(x2, right, out=x2)
        #     np.minimum(y2, bot, out=y2)

        #     np.maximum(x1, left, out=x1)
        #     np.maximum(y1, top, out=y1)
        #     np.maximum(x2, left, out=x2)
        #     np.maximum(y2, top, out=y2)
        #     return boxes

        def asnumpy(tensor):
            return tensor.data.cpu().numpy()

        bsize = len(labels[0])
        for bx in range(bsize):
            postitem = asnumpy(postout[bx])
            target = asnumpy(targets[bx]).reshape(-1, 5)
            true_cxywh = target[:, 1:5]
            true_cxs = target[:, 0]
            true_weight = asnumpy(gt_weights[bx])

            # Remove padded truth
            flags = true_cxs != -1
            true_cxywh = true_cxywh[flags]
            true_cxs = true_cxs[flags]
            true_weight = true_weight[flags]

            # orig_size    = asnumpy(orig_sizes[bx])
            # gx           = int(asnumpy(indices[bx]))

            # how much do we care about the background in this image?
            bg_weight = float(asnumpy(bg_weights[bx]))

            # Unpack postprocessed predictions
            sboxes = postitem.reshape(-1, 6)
            pred_cxywh = sboxes[:, 0:4]
            pred_scores = sboxes[:, 4]
            pred_cxs = sboxes[:, 5].astype(np.int)

            true_tlbr = util.Boxes(true_cxywh, 'cxywh').to_tlbr()
            pred_tlbr = util.Boxes(pred_cxywh, 'cxywh').to_tlbr()

            # TODO: can we invert the letterbox transform here and clip for
            # some extra mAP?
            true_boxes = true_tlbr.data
            pred_boxes = pred_tlbr.data

            # if False:
            #     # new letterbox transform makes this tricker, simply try and
            #     # compare in 0-1 space for now.

            #     # use max because of letterbox transform
            #     lettered_orig_size = orig_size.max()
            #     true_boxes = true_tlbr.scale(lettered_orig_size).data
            #     pred_boxes = pred_tlbr.scale(lettered_orig_size).data

            #     # Clip predicted boxes to the letterbox
            #     shift, embed_size = letterbox_transform(orig_size, inp_size)
            #     orig_lefttop = (shift / inp_size) * orig_size.max()
            #     orig_rightbot = lettered_orig_size - orig_lefttop
            #     letterbox_tlbr = list(orig_lefttop) + list(orig_rightbot)

            #     pred_boxes = clip_boxes_to_letterbox(pred_boxes, letterbox_tlbr)

            y = nh.metrics.detection_confusions(
                true_boxes=true_boxes,
                true_cxs=true_cxs,
                true_weights=true_weight,
                pred_boxes=pred_boxes,
                pred_scores=pred_scores,
                pred_cxs=pred_cxs,
                bg_weight=bg_weight,
                bg_cls=-1,
                ovthresh=harn.hyper.other['ovthresh'])
            # y['gx'] = gx
            yield y