Python anchor_intersections Examples

Programming Language: Python

Namespace/Package Name: utils.cython_bbox

Method/Function: anchor_intersections

Examples at hotexamples.com: 6

Python anchor_intersections - 6 examples found. These are the top rated real world Python examples of utils.cython_bbox.anchor_intersections extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: model.py Project: WJtomcat/yolo2-pytorch

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float))
    anchor_inds = np.argmax(anchor_ious, axis=0)

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind,
                  a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

Example #2

Show file

File: darknet.py Project: linkapp-github/YOLO2-pytorch

def _process_batch(data, size_index):
    '''
    分析一下什么是多尺度的输出，这里指的是 pred 最后的size 为input/strides   通常strides 在这里是32
    '''
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)

    #用于预测的bbox 将其对bs 维度进行扩充，具体操作如下，1，w×h，number anchor，4
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)

    '''
    其应该是来源于这个问题
    bx = deta(tx) + cx
    by = deta(ty) + cy
    bw = pw*exp(tw)
    bh = ph*exp(th)
    '''
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y
    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)


    # for each cell, compare predicted_bbox and gt_bbox
    #(w,h,anchors,4)---->(w*h*anchors,4)
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    #计算预测的值和gt的overlap
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    #这里计算完的iou 是500 个候选预测和num class 的交互比 （w*h*anchor,numclass）
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)

    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious < cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    #iou_mask 存放的是他的 惩罚项目
    # locate the cell of each gt_boxe
    '''
    计算每个cell 框所对应的大小
    其实也就是一个predict，一格所代表的原图中的长宽
    '''

    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    #中间值
    '''
    表示的是样本中心所对中心所对应的框框所在位置在predict 中
    '''
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h


    '''
    这里的cell inds 是干嘛用的呢？我们接着往下看
    ×××非常重要这个是核心步骤，找到我们的hw 中所对应的中间位置，太漂亮了0~100之间
    '''
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)


    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    '''
    这里应该是使用者写错了应该对应的是tx，ty
    '''
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    '''
    表达的是 gt 在predict 中应该有的位置，这个也是一个神秘操作
    这个对应的应该是 bw,bh
    '''
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th


    '''
    这一步操作是获取gt_ 和anchor 的交.
    并且找到那个anchor 对那个gt 负责
    '''
    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))

    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)



    '''
    cell_inds 对应的是num_class 的个数，也就是说所对应的objs的个数
    '''
    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    '''
    ious_reshaped 这里需要特别关注一下 （h*w,num_anchors,objects） 其中第一维度可以取出object中心所在位置
    训练中的mask 对应的是其要乘的 scale 也可以被称之为 randa
    '''
    for i, cell_ind in enumerate(cell_inds):

        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        #找出对其负责的anchors 也即哪个anchor 对哪个object 负责
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        #预测的值于iou 的置信度
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]

        #预测的值与gt的 ious
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]
        _box_mask[cell_ind, a, :] = cfg.coord_scale

        '''
        这里为什么要除呢？
        bw = pw*exp(tw) --->所以除了之后会有 bw/pw = exp(tw) ,所以经过这一步操作之后会有 _boxes -->(tx,ty,exp(tw),exp(th))
        '''
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])
    '''
    这里整体整理一下操作的整个过程来梳理一下bbox 的操作
    1.首先对应的是mask mask 对应的是损失函数中的系数，按照paper上和源码的初始设置，我们这里设置我们的
    这里的scale 对应的是损失函数中的对应系数
    object_scale = 5.
    noobject_scale = 1.
    class_scale = 1.
    coord_scale = 1.
    2.首先我们对我们预测的bbox 回归到原图坐标，这个操作是根据yolo2bbox 来实现的 我们得到我们pred_boxes
    然后我们对应的pred_boxes 于gt求得一个iou 这个iou 是我们的预测于真值之间的iou 其输出为 (h*w*anchor,gt_numbers)
    我们可以求出对应的最好的iou 并且根据最好的iou 可以知道iou_mask 所对应的是损失函数为多少，其best iou 小于阈值的??这个得去看下yolov1
    3. 根据gt_bbox 求出 对应的tx，ty，和bw，bh 记住源码中的注释是错误的这里纠正过来，并求得其中心位置的prior 的位置index
    4. 求候选prior 和ground truth-->映射到feature map空间后的 iou ，这里我们可以求出， anchor_inds，这个anchor_inds 标记着
    哪个anchor 于哪一类的iou 最大，这个anchor 需要对这个类负责 记住这个类对应的是映射空间最终可以得到一系列操作，其中包括样本的中心位置，已经anchor
    对应object 位置
    这样根据循环，我们对每个object 的_boxes(tx,ty,exp(tw),exp(th)),_ious(预测pred 和 gt )，_classes:全文0 则表示此为此为背景
    '''
    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

Example #3

Show file

File: darknet.py Project: slieped/yolo2-pytorch

def _process_batch(data):
    W, H = cfg.out_size
    inp_size = cfg.inp_size
    out_size = cfg.out_size

    bbox_pred_np, gt_boxes, gt_classes, dontcares = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])
    bbox_np[:, :, 1::2] *= float(inp_size[1])

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print cell_ind
            continue
        a = anchor_inds[i]

        _iou_mask[cell_ind, a, :] = cfg.object_scale
        _ious[cell_ind, a, :] = anchor_ious[a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

Example #4

Show file

File: darknet_training_v3.py Project: liwei46/yolo2-pytorch

def _process_batch(data):
    bbox_pred_np, gt_boxes, gt_classes, iou_pred_np, inp_size, cfg = data
    out_size = inp_size / 32
    num_gt = gt_boxes.shape[0]

    cell_w = 32
    cell_h = 32

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape
    # hw = num_cell

    # gt
    _classes = np.zeros([hw, num_anchors, cfg['num_classes']], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)
    # _class_mask = np.ones([hw, num_anchors, 1], dtype=np.float) * cfg['class_scale']

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    # _boxes[:, :, 0:2] = 0.5
    # _boxes[:, :, 2:4] = 1.0
    # debug mask_val
    # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg['anchors'], dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, out_size[1], out_size[0])
    bbox_np = bbox_np[
        0]  # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x by w
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y by h

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    # _iou_mask[best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * 1
    iou_penalty = 0 - iou_pred_np[best_ious < cfg['iou_thresh']]
    _iou_mask[
        best_ious < cfg['iou_thresh']] = cfg['noobject_scale'] * iou_penalty
    ious_reshaped = np.reshape(ious, [hw, num_anchors, num_gt])

    # locate the cell of each gt_boxes
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * out_size[0] + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx  (0 ~ 1)
    target_boxes[:, 1] = cy - np.floor(cy)  # cy  (0 ~ 1)
    target_boxes[:, 2] = (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / cell_w  # tw
    target_boxes[:, 3] = (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / cell_h  # th

    # for each gt boxes, match the best anchor
    # gt_boxes_resize = [(xmin, ymin, xmax, ymax)] unit: cell px
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] /= cell_w
    gt_boxes_resize[:, 1::2] /= cell_h

    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize, dtype=np.float))
    anchor_inds = np.argmax(anchor_ious, axis=0)

    # for every gt cell
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('warning: invalid cell_ind, cx, cy, W, H', cell_ind, cx[i],
                  cy[i], out_size[0], out_size[1])
            continue
        a = anchor_inds[i]

        # do not evaluate for dontcare / unknown class
        if gt_classes[i] == -1:
            continue

        iou_pred = iou_pred_np[cell_ind,
                               a, :]  # 0 ~ 1, should be close to iou_truth
        iou_truth = ious_reshaped[cell_ind, a, i]
        _iou_mask[cell_ind,
                  a, :] = cfg['object_scale'] * (iou_truth - iou_pred)
        _ious[cell_ind, a, :] = iou_truth

        truth_w = (gt_boxes_b[i, 2] - gt_boxes_b[i, 0]) / inp_size[0]
        truth_h = (gt_boxes_b[i, 3] - gt_boxes_b[i, 1]) / inp_size[1]
        _box_mask[cell_ind,
                  a, :] = cfg['coord_scale'] * (2 - truth_w * truth_h)
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg['class_scale']
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    # _boxes = (sig(tx), sig(ty), exp(tw), exp(th))
    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

Example #5

Show file

File: darknet.py Project: jjprincess/yolo2-pytorch

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    _boxes[:, :, 0:2] = 0.5
    _boxes[:, :, 2:4] = 1.0
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(
        np.ascontiguousarray(bbox_pred_np, dtype=np.float),
        anchors,
        H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)

    # for each cell, compare predicted_bbox and gt_bbox
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(
        np.ascontiguousarray(bbox_np_b, dtype=np.float),
        np.ascontiguousarray(gt_boxes_b, dtype=np.float)
    )
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)
    iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # cx
    target_boxes[:, 1] = cy - np.floor(cy)  # cy
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # tw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # th

    # for each gt boxes, match the best anchor
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors,
        np.ascontiguousarray(gt_boxes_resize, dtype=np.float)
    )
    anchor_inds = np.argmax(anchor_ious, axis=0)

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])
    for i, cell_ind in enumerate(cell_inds):
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask

Example #6

Show file

File: darknet.py Project: wolfworld6/Deep-Learning-1

def _process_batch(data, size_index):
    W, H = cfg.multi_scale_out_size[size_index]
    inp_size = cfg.multi_scale_inp_size[size_index]
    out_size = cfg.multi_scale_out_size[size_index]

    bbox_pred_np, gt_boxes, gt_classes, dontcares, iou_pred_np = data

    # net output
    hw, num_anchors, _ = bbox_pred_np.shape

    # gt
    _classes = np.zeros([hw, num_anchors, cfg.num_classes], dtype=np.float)
    _class_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _ious = np.zeros([hw, num_anchors, 1], dtype=np.float)
    _iou_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    _boxes = np.zeros([hw, num_anchors, 4], dtype=np.float)
    # _boxes[:, :, 0:2] = 0.5
    # _boxes[:, :, 2:4] = 1.0
    # _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float) + 0.01
    _box_mask = np.zeros([hw, num_anchors, 1], dtype=np.float)

    # scale pred_bbox
    anchors = np.ascontiguousarray(cfg.anchors, dtype=np.float)
    bbox_pred_np = np.expand_dims(bbox_pred_np, 0)
    bbox_np = yolo_to_bbox(np.ascontiguousarray(bbox_pred_np, dtype=np.float),
                           anchors, H, W)
    # bbox_np = (hw, num_anchors, (x1, y1, x2, y2))   range: 0 ~ 1 # 预测值转移到实际框的位置
    bbox_np = bbox_np[0]
    bbox_np[:, :, 0::2] *= float(inp_size[0])  # rescale x 乘以原图大小
    bbox_np[:, :, 1::2] *= float(inp_size[1])  # rescale y

    # gt_boxes_b = np.asarray(gt_boxes[b], dtype=np.float)
    gt_boxes_b = np.asarray(gt_boxes, dtype=np.float)  # [R,4]

    # for each cell, compare predicted_bbox and gt_bbox， 这里是在image_input_size的层面比较的
    bbox_np_b = np.reshape(bbox_np, [-1, 4])
    ious = bbox_ious(np.ascontiguousarray(bbox_np_b, dtype=np.float),
                     np.ascontiguousarray(gt_boxes_b, dtype=np.float))
    best_ious = np.max(ious, axis=1).reshape(_iou_mask.shape)

    # iou_penalty = 0 - iou_pred_np[best_ious < cfg.iou_thresh]
    # _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale * iou_penalty # 小于阈值认为没有物体，将mask设为 -p
    _iou_mask[best_ious <= cfg.iou_thresh] = cfg.noobject_scale

    # locate the cell of each gt_boxe
    cell_w = float(inp_size[0]) / W
    cell_h = float(inp_size[1]) / H
    cx = (gt_boxes_b[:, 0] + gt_boxes_b[:, 2]) * 0.5 / cell_w
    cy = (gt_boxes_b[:, 1] + gt_boxes_b[:, 3]) * 0.5 / cell_h
    cell_inds = np.floor(cy) * W + np.floor(cx)
    cell_inds = cell_inds.astype(np.int)

    target_boxes = np.empty(gt_boxes_b.shape, dtype=np.float)
    target_boxes[:, 0] = cx - np.floor(cx)  # bx - cx = sig(tx)
    target_boxes[:, 1] = cy - np.floor(cy)  # by - cy = sig(ty)
    target_boxes[:, 2] = \
        (gt_boxes_b[:, 2] - gt_boxes_b[:, 0]) / inp_size[0] * out_size[0]  # bw
    target_boxes[:, 3] = \
        (gt_boxes_b[:, 3] - gt_boxes_b[:, 1]) / inp_size[1] * out_size[1]  # bh

    # for each gt boxes, match the best anchor # 将gt_boxes转化到特征图大小和anchor相比较
    gt_boxes_resize = np.copy(gt_boxes_b)
    gt_boxes_resize[:, 0::2] *= (out_size[0] / float(inp_size[0]))
    gt_boxes_resize[:, 1::2] *= (out_size[1] / float(inp_size[1]))
    anchor_ious = anchor_intersections(
        anchors, np.ascontiguousarray(gt_boxes_resize,
                                      dtype=np.float))  # 这里假设他们的中心重合
    anchor_inds = np.argmax(anchor_ious, axis=0)  # 每个实际框对应的最佳锚框

    ious_reshaped = np.reshape(ious, [hw, num_anchors, len(cell_inds)])

    for i, cell_ind in enumerate(
            cell_inds):  # 最后只会标注 gt_bbox所在的 cell, 和gt_bbox选出的最佳 anchor
        if cell_ind >= hw or cell_ind < 0:
            print('cell inds size {}'.format(len(cell_inds)))
            print('cell over {} hw {}'.format(cell_ind, hw))
            continue
        a = anchor_inds[i]

        # 0 ~ 1, should be close to 1
        # iou_pred_cell_anchor = iou_pred_np[cell_ind, a, :]
        # _iou_mask[cell_ind, a, :] = cfg.object_scale * (1 - iou_pred_cell_anchor)  # noqa
        _iou_mask[cell_ind, a, :] = cfg.object_scale
        # _ious[cell_ind, a, :] = anchor_ious[a, i]
        _ious[cell_ind, a, :] = ious_reshaped[cell_ind, a, i]

        _box_mask[cell_ind, a, :] = cfg.coord_scale
        target_boxes[i, 2:4] /= anchors[a]  # bw / bh
        _boxes[cell_ind, a, :] = target_boxes[i]

        _class_mask[cell_ind, a, :] = cfg.class_scale
        _classes[cell_ind, a, gt_classes[i]] = 1.

    # _boxes[:, :, 2:4] = np.maximum(_boxes[:, :, 2:4], 0.001)
    # _boxes[:, :, 2:4] = np.log(_boxes[:, :, 2:4])

    return _boxes, _ious, _classes, _box_mask, _iou_mask, _class_mask