Python yolo_to_bboxの例、utils.cython_yolo.yolo_to_bbox Pythonの例

コード例 #1

0

ファイルを表示

ファイル: yolo.py プロジェクト: jjprincess/yolo2-pytorch

def postprocess(bbox_pred, iou_pred, prob_pred, im_shape, cfg, thresh=0.05,
                size_index=0):
    """
    bbox_pred: (bsize, HxW, num_anchors, 4)
               ndarray of float (sig(tx), sig(ty), exp(tw), exp(th))
    iou_pred: (bsize, HxW, num_anchors, 1)
    prob_pred: (bsize, HxW, num_anchors, num_classes)
    """

    # num_classes, num_anchors = cfg.num_classes, cfg.num_anchors
    num_classes = cfg.num_classes
    anchors = cfg.anchors
    W, H = cfg.multi_scale_out_size[size_index]
    assert bbox_pred.shape[0] == 1, 'postprocess only support one image per batch'  # noqa

    bbox_pred = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred, dtype=np.float),
        np.ascontiguousarray(anchors, dtype=np.float),
        H, W)
    bbox_pred = np.reshape(bbox_pred, [-1, 4])
    bbox_pred[:, 0::2] *= float(im_shape[1])
    bbox_pred[:, 1::2] *= float(im_shape[0])
    bbox_pred = bbox_pred.astype(np.int)

    iou_pred = np.reshape(iou_pred, [-1])
    prob_pred = np.reshape(prob_pred, [-1, num_classes])

    cls_inds = np.argmax(prob_pred, axis=1)
    prob_pred = prob_pred[(np.arange(prob_pred.shape[0]), cls_inds)]
    scores = iou_pred * prob_pred
    # scores = iou_pred
    assert len(scores) == len(bbox_pred), '{}, {}'.format(scores.shape, bbox_pred.shape)
    # threshold
    keep = np.where(scores >= thresh)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # NMS
    keep = np.zeros(len(bbox_pred), dtype=np.int)
    for i in range(num_classes):
        inds = np.where(cls_inds == i)[0]
        if len(inds) == 0:
            continue
        c_bboxes = bbox_pred[inds]
        c_scores = scores[inds]
        c_keep = nms_detections(c_bboxes, c_scores, 0.3)
        keep[inds[c_keep]] = 1

    keep = np.where(keep > 0)
    # keep = nms_detections(bbox_pred, scores, 0.3)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # clip
    bbox_pred = clip_boxes(bbox_pred, im_shape)

    return bbox_pred, scores, cls_inds

コード例 #2

0

ファイルを表示

def postprocess(bbox_pred, iou_pred, prob_pred, im_shape, cfg, thresh=0.05,
                size_index=0):
    """
    bbox_pred: (bsize, HxW, num_anchors, 4)
               ndarray of float (sig(tx), sig(ty), exp(tw), exp(th))
    iou_pred: (bsize, HxW, num_anchors, 1)
    prob_pred: (bsize, HxW, num_anchors, num_classes)
    """

    # num_classes, num_anchors = cfg.num_classes, cfg.num_anchors
    num_classes = cfg.num_classes
    anchors = cfg.anchors
    W, H = cfg.multi_scale_out_size[size_index]
    assert bbox_pred.shape[0] == 1, 'postprocess only support one image per batch'  # noqa

    bbox_pred = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred, dtype=np.float),
        np.ascontiguousarray(anchors, dtype=np.float),
        H, W)
    bbox_pred = np.reshape(bbox_pred, [-1, 4])
    bbox_pred[:, 0::2] *= float(im_shape[1])
    bbox_pred[:, 1::2] *= float(im_shape[0])
    bbox_pred = bbox_pred.astype(np.int)

    iou_pred = np.reshape(iou_pred, [-1])
    prob_pred = np.reshape(prob_pred, [-1, num_classes])

    cls_inds = np.argmax(prob_pred, axis=1)
    prob_pred = prob_pred[(np.arange(prob_pred.shape[0]), cls_inds)]
    scores = iou_pred * prob_pred
    # scores = iou_pred
    assert len(scores) == len(bbox_pred), '{}, {}'.format(scores.shape, bbox_pred.shape)
    # threshold
    keep = np.where(scores >= thresh)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # NMS
    keep = np.zeros(len(bbox_pred), dtype=np.int)
    for i in range(num_classes):
        inds = np.where(cls_inds == i)[0]
        if len(inds) == 0:
            continue
        c_bboxes = bbox_pred[inds]
        c_scores = scores[inds]
        c_keep = nms_detections(c_bboxes, c_scores, 0.3)
        keep[inds[c_keep]] = 1

    keep = np.where(keep > 0)
    # keep = nms_detections(bbox_pred, scores, 0.3)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # clip
    bbox_pred = clip_boxes(bbox_pred, im_shape)

    return bbox_pred, scores, cls_inds

コード例 #3

0

ファイルを表示

ファイル: yolo_v2.py プロジェクト: liwei46/yolo2-pytorch

def postprocess(bbox_pred, iou_pred, prob_pred, im_shape, cfg, thresh=0.001):
    """
    bbox_pred: (bsize, HxW, num_anchors, 4) ndarray of float (sig(tx), sig(ty), exp(tw), exp(th))
    iou_pred: (bsize, HxW, num_anchors, 1)
    prob_pred: (bsize, HxW, num_anchors, num_classes)
    """

    num_classes, num_anchors = cfg['num_classes'], cfg['num_anchors']
    anchors = cfg['anchors']
    W, H = cfg['out_size']
    assert bbox_pred.shape[
        0] == 1, 'postprocess only support one image per batch'

    bbox_pred = yolo_to_bbox(np.ascontiguousarray(bbox_pred, dtype=np.float),
                             np.ascontiguousarray(anchors, dtype=np.float), H,
                             W)
    bbox_pred = np.reshape(bbox_pred, [-1, 4])
    bbox_pred[:, 0::2] *= float(im_shape[1])  # w
    bbox_pred[:, 1::2] *= float(im_shape[0])  # h
    bbox_pred = bbox_pred.astype(np.int)

    iou_pred = np.reshape(iou_pred, [-1])
    prob_pred = np.reshape(prob_pred, [-1, num_classes])

    cls_inds = np.argmax(prob_pred, axis=1)
    prob_pred = prob_pred[(np.arange(prob_pred.shape[0]), cls_inds)]
    scores = iou_pred * prob_pred
    # scores = iou_pred

    keep = np.where(scores >= thresh)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # NMS
    keep = np.zeros(len(bbox_pred), dtype=np.int)
    for i in range(num_classes):
        inds = np.where(cls_inds == i)[0]
        if len(inds) == 0:
            continue
        c_bboxes = bbox_pred[inds]
        c_scores = scores[inds]
        c_keep = nms_detections(c_bboxes, c_scores, 0.5)
        keep[inds[c_keep]] = 1

    # threshold nms
    keep = np.where(keep > 0)
    bbox_pred = bbox_pred[keep]
    scores = scores[keep]
    cls_inds = cls_inds[keep]

    # clip
    bbox_pred = clip_boxes(bbox_pred, im_shape)

    return bbox_pred, scores, cls_inds

コード例 #4

0

ファイルを表示

ファイル: model.py プロジェクト: WJtomcat/yolo2-pytorch

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float))
    anchor_inds = np.argmax(anchor_ious, axis=0)

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind,
                  a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

コード例 #5

0

ファイルを表示

    def _build_target(self, bbox_pred_np, gt_boxes, gt_classes, dontcare):
        """
        :param bbox_pred: shape: (bsize, h x w, num_anchors, 4) : (sig(tx), sig(ty), exp(tw), exp(th))
        """
        W, H = cfg.out_size
        inp_size = cfg.inp_size
        out_size = cfg.out_size
        # TODO: dontcare areas
        # dontcare_areas = np.asarray(dontcare_areas, dtype=np.float)

        # net output
        bsize, hw, num_anchors, _ = bbox_pred_np.shape
        # gt
        _boxes = np.zeros([bsize, hw, num_anchors, 4], dtype=np.float)
        _ious = np.zeros([bsize, hw, num_anchors, 1], dtype=np.float)
        _classes = np.zeros([bsize, hw, num_anchors, cfg.num_classes], dtype=np.int)
        _mask = np.zeros([bsize, hw, num_anchors, 1], dtype=np.int)

        # scale pred_bbox
        anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
        bbox_np = yolo_to_bbox(
            np.ascontiguousarray(bbox_pred_np, dtype=np.float),
            anchors,
            H, W)
        bbox_np[:, :, :, 0::2] *= float(inp_size[0])
        bbox_np[:, :, :, 1::2] *= float(inp_size[1])

        # assign each box to cells
        for b in range(bsize):
            gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)

            # locate the cell of each gt_boxe
            cell_w = float(inp_size[0]) / W
            cell_h = float(inp_size[1]) / H
            cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
            cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
            cell_inds = np.floor(cy) * W + np.floor(cx)
            cell_inds = cell_inds.astype(np.int)
            # gt_boxes[:, :, 0::2] /= inp_size[1]
            # gt_boxes[:, :, 1::2] /= inp_size[0]

            target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
            target_boxes[:, 0] = cx - np.floor(cx)  # cx
            target_boxes[:, 1] = cy - np.floor(cy)  # cy
            target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
            target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

            cell_boxes = [[] for _ in range(hw)]
            for i, ind in enumerate(cell_inds):
                cell_boxes[ind].append(i)

            for i in range(hw):
                if len(cell_boxes[i]) == 0:
                    continue
                bboxes = [gt_boxes_b[j] for j in cell_boxes[i]]
                targets_b = np.array([target_boxes[j] for j in cell_boxes[i]], dtype=np.float)
                # targets_c = np.array([gt_classes[j] for j in cell_boxes[i]], dtype=np.int)

                ious = bbox_ious(
                    np.ascontiguousarray(bbox_np[b, i], dtype=np.float),
                    np.ascontiguousarray(bboxes, dtype=np.float)
                )

                argmax = np.argmax(ious, axis=0)
                for j, a in enumerate(argmax):
                    if _ious[b, i, a, 0] <= ious[a, j]:
                        _mask[b, i, a, :] = 1
                        _ious[b, i, a, 0] = ious[a, j]
                        targets_b[j, 2:4] /= anchors[a]
                        _boxes[b, i, a, :] = targets_b[j]
                        # print bbox_pred_np[b, i, a], targets_b[j]
                        # _classes[b, i, a, :] = targets_c[j]
                        _classes[b, i, a, gt_classes[b][j]] = 1

            # _boxes[:, :, :, 2:4] /= anchors

                #
                # _boxes[b, i, :, :] = _box
                # _ious[b, i, :, :] = np.expand_dims(ious[(np.arange(len(argmax)), argmax)], 1)
                # _classes[b, i, :, targets_c[argmax]] = 1
                #
                # _mask[b, i, :, :] = 1
        return _boxes, _ious, _classes, _mask

コード例 #6

0

ファイルを表示

ファイル: darknet.py プロジェクト: linkapp-github/YOLO2-pytorch

def _process_batch(data, size_index):
    '''
    分析一下什么是多尺度的输出，这里指的是 pred 最后的size 为input/strides   通常strides 在这里是32
    '''
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)

    #用于预测的bbox 将其对bs 维度进行扩充，具体操作如下，1，w×h，number anchor，4
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)

    '''
    其应该是来源于这个问题
    bx = deta(tx) + cx
    by = deta(ty) + cy
    bw = pw*exp(tw)
    bh = ph*exp(th)
    '''
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y
    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)


    # for each cell, compare predicted_bbox and gt_bbox
    #(w,h,anchors,4)---->(w*h*anchors,4)
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    #计算预测的值和gt的overlap
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    #这里计算完的iou 是500 个候选预测和num class 的交互比 （w*h*anchor,numclass）
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)

    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious < cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    #iou_mask 存放的是他的 惩罚项目
    # locate the cell of each gt_boxe
    '''
    计算每个cell 框所对应的大小
    其实也就是一个predict，一格所代表的原图中的长宽
    '''

    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    #中间值
    '''
    表示的是样本中心所对中心所对应的框框所在位置在predict 中
    '''
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h


    '''
    这里的cell inds 是干嘛用的呢？我们接着往下看
    ×××非常重要这个是核心步骤，找到我们的hw 中所对应的中间位置，太漂亮了0~100之间
    '''
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)


    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    '''
    这里应该是使用者写错了应该对应的是tx，ty
    '''
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    '''
    表达的是 gt 在predict 中应该有的位置，这个也是一个神秘操作
    这个对应的应该是 bw,bh
    '''
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th


    '''
    这一步操作是获取gt_ 和anchor 的交.
    并且找到那个anchor 对那个gt 负责
    '''
    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))

    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)



    '''
    cell_inds 对应的是num_class 的个数，也就是说所对应的objs的个数
    '''
    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    '''
    ious_reshaped 这里需要特别关注一下 （h*w,num_anchors,objects） 其中第一维度可以取出object中心所在位置
    训练中的mask 对应的是其要乘的 scale 也可以被称之为 randa
    '''
    for i, cell_ind in enumerate(cell_inds):

        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        #找出对其负责的anchors 也即哪个anchor 对哪个object 负责
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        #预测的值于iou 的置信度
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]

        #预测的值与gt的 ious
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]
        _box_mask[cell_ind, a, :] = cfg.coord_scale

        '''
        这里为什么要除呢？
        bw = pw*exp(tw) --->所以除了之后会有 bw/pw = exp(tw) ,所以经过这一步操作之后会有 _boxes -->(tx,ty,exp(tw),exp(th))
        '''
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])
    '''
    这里整体整理一下操作的整个过程来梳理一下bbox 的操作
    1.首先对应的是mask mask 对应的是损失函数中的系数，按照paper上和源码的初始设置，我们这里设置我们的
    这里的scale 对应的是损失函数中的对应系数
    object_scale = 5.
    noobject_scale = 1.
    class_scale = 1.
    coord_scale = 1.
    2.首先我们对我们预测的bbox 回归到原图坐标，这个操作是根据yolo2bbox 来实现的 我们得到我们pred_boxes
    然后我们对应的pred_boxes 于gt求得一个iou 这个iou 是我们的预测于真值之间的iou 其输出为 (h*w*anchor,gt_numbers)
    我们可以求出对应的最好的iou 并且根据最好的iou 可以知道iou_mask 所对应的是损失函数为多少，其best iou 小于阈值的??这个得去看下yolov1
    3. 根据gt_bbox 求出 对应的tx，ty，和bw，bh 记住源码中的注释是错误的这里纠正过来，并求得其中心位置的prior 的位置index
    4. 求候选prior 和ground truth-->映射到feature map空间后的 iou ，这里我们可以求出， anchor_inds，这个anchor_inds 标记着
    哪个anchor 于哪一类的iou 最大，这个anchor 需要对这个类负责 记住这个类对应的是映射空间最终可以得到一系列操作，其中包括样本的中心位置，已经anchor
    对应object 位置
    这样根据循环，我们对每个object 的_boxes(tx,ty,exp(tw),exp(th)),_ious(预测pred 和 gt )，_classes:全文0 则表示此为此为背景
    '''
    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

コード例 #7

0

ファイルを表示

ファイル: darknet.py プロジェクト: slieped/yolo2-pytorch

def _process_batch(data):
    W, H = cfg.out_size
    inp_size = cfg.inp_size
    out_size = cfg.out_size

    bbox_pred_np, gt_boxes, gt_classes, dontcares = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])
    bbox_np[:, :, 1::2] *= float(inp_size[1])

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print cell_ind
            continue
        a = anchor_inds[i]

        _iou_mask[cell_ind, a, :] = cfg.object_scale
        _ious[cell_ind, a, :] = anchor_ious[a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

コード例 #8

0

ファイルを表示

ファイル: darknet_training_v3.py プロジェクト: liwei46/yolo2-pytorch

def _process_batch(data):
    bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size, cfg = data
    out_size = inp_size / 32
    num_gt = gt_boxes.shape[0]

    cell_w = 32
    cell_h = 32

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape
    # hw = num_cell

    # gt
    _classes = np.zeros([hw, num_anchors, cfg['num_classes']], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)
    # _class_mask = np.ones([hw, num_anchors, 1], dtype=np.float) * cfg['class_scale']

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    # _boxes[:, :, 0:2] = 0.5
    # _boxes[:, :, 2:4] = 1.0
    # debug mask_val
    # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg['anchors'], dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, out_size[1], out_size[0])
    bbox_np = bbox_np[
        0]  # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x by w
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y by h

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    # _iou_mask[best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * 1
    iou_penalty = 0 - iou_pred_np[best_ious < cfg['iou_thresh']]
    _iou_mask[
        best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * iou_penalty
    ious_reshaped = np.reshape(ious, [hw, num_anchors, num_gt])

    # locate the cell of each gt_boxes
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * out_size[0] + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx  (0 ~ 1)
    target_boxes[:, 1] = cy - np.floor(cy)  # cy  (0 ~ 1)
    target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / cell_w  # tw
    target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / cell_h  # th

    # for each gt boxes, match the best anchor
    # gt_boxes_resize = [(xmin, ymin, xmax, ymax)] unit: cell px
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] /= cell_w
    gt_boxes_resize[:, 1::2] /= cell_h

    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float))
    anchor_inds = np.argmax(anchor_ious, axis=0)

    # for every gt cell
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('warning: invalid cell_ind, cx, cy, W, H', cell_ind, cx[i],
                  cy[i], out_size[0], out_size[1])
            continue
        a = anchor_inds[i]

        # do not evaluate for dontcare / unknown class
        if gt_classes[i] == -1:
            continue

        iou_pred = iou_pred_np[cell_ind,
                               a, :]  # 0 ~ 1, should be close to iou_truth
        iou_truth = ious_reshaped[cell_ind, a, i]
        _iou_mask[cell_ind,
                  a, :] = cfg['object_scale'] * (iou_truth - iou_pred)
        _ious[cell_ind, a, :] = iou_truth

        truth_w = (gt_boxes_b[i, 2] - gt_boxes_b[i, 0]) / inp_size[0]
        truth_h = (gt_boxes_b[i, 3] - gt_boxes_b[i, 1]) / inp_size[1]
        _box_mask[cell_ind,
                  a, :] = cfg['coord_scale'] * (2 - truth_w * truth_h)
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg['class_scale']
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    # _boxes = (sig(tx), sig(ty), exp(tw), exp(th))
    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

コード例 #9

0

ファイルを表示

ファイル: darknet.py プロジェクト: jjprincess/yolo2-pytorch

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

コード例 #10

0

ファイルを表示

ファイル: darknet.py プロジェクト: wolfworld6/Deep-Learning-1

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    # _boxes[:, :, 0:2] = 0.5
    # _boxes[:, :, 2:4] = 1.0
    # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1 # 预测值转移到实际框的位置
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x 乘以原图大小
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)  # [R,4]

    # for each cell, compare predicted_bbox and gt_bbox， 这里是在image_input_size的层面比较的
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)

    # iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    # _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # 小于阈值认为没有物体，将mask设为 -p
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # bx - cx = sig(tx)
    target_boxes[:, 1] = cy - np.floor(cy)  # by - cy = sig(ty)
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # bw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # bh

    # for each gt boxes, match the best anchor # 将gt_boxes转化到特征图大小和anchor相比较
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize,
                                      dtype=np.float))  # 这里假设他们的中心重合
    anchor_inds = np.argmax(anchor_ious, axis=0)  # 每个实际框对应的最佳锚框

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])

    for i, cell_ind in enumerate(
            cell_inds):  # 最后只会标注 gt_bbox所在的 cell, 和gt_bbox选出的最佳 anchor
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        # iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        # _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        _iou_mask[cell_ind, a, :] = cfg.object_scale
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]  # bw / bh
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask