Ejemplo n.º 1
0
def cls_target2(img_shape, all_anchors, bboxes, gt_class_ids):
    """

    :param img_shape:
    :param bboxes:
    :param gt_class_ids:
    :return:
    """
    # 返回值是[批数,anchor数,(x1, y1, x2, y2)],相对输入图片的像素坐标
    # anchors = generate_pyramid_anchors(batch_size, resolution, input_shape, smallest_anchor_size)
    # all_anchors = anchors[0]  # 只需要取第一批, [num, (x1, y1, x2, y2)]

    # 在图片里面
    inside = ((all_anchors[:, 0] >= -allowed_border) &
              (all_anchors[:, 1] >= -allowed_border) &
              (all_anchors[:, 2] < img_shape[1] + allowed_border) &
              (all_anchors[:, 3] < img_shape[0] + allowed_border))

    num_anchors = all_anchors.shape[0]

    rpn_labels = np.empty(shape=(num_anchors, ), dtype=np.int32)
    rpn_labels.fill(-1)
    anchor_deltas = np.zeros(shape=(num_anchors, 4), dtype=np.float32)

    # 有的bounding box可能框住了多个实例,标签就是-1
    crowd_ix = np.where(gt_class_ids < 0)[0]
    if crowd_ix.shape[0] > 0:
        non_crowd_ix = np.where(gt_class_ids > 0)[0]
        crowd_boxes = bboxes[crowd_ix]

        gt_boxes = bboxes[non_crowd_ix]
        # 计算anchor与crowd的iou,如果与crowd的iou过大,那这个anchor不进行训练
        crowd_overlaps = bbox_overlaps(
            np.ascontiguousarray(all_anchors, dtype=np.float),
            np.ascontiguousarray(crowd_boxes, dtype=np.float))
        crowd_iou_max = np.amax(crowd_overlaps, axis=1)  # 长度是所有anchor的个数
        no_crowd_bool = (crowd_iou_max < 0.001)
    else:
        no_crowd_bool = np.ones(shape=(num_anchors, ), dtype=bool)
        gt_boxes = bboxes

    if gt_boxes.shape[0] > 0:
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        argmax_overlaps = overlaps.argmax(axis=1)  # 长度为num_anchors

        max_overlaps = overlaps[np.arange(num_anchors), argmax_overlaps]

        # 将iou小于0.3并且没有与crowd相交的,设置为0,表示负例
        rpn_labels[(max_overlaps < neg_anchor_thresh) & no_crowd_bool
                   & inside] = 0

        rpn_labels[(max_overlaps >= posi_anchor_thresh) & inside] = 1

        # 对于某个GT而言,即使所有anchor与他的iou都小于0.3,也需要把与之iou最大的那个设置为正例
        gt_iou_argmax = np.argmax(overlaps, axis=0)
        rpn_labels[gt_iou_argmax] = 1

        pos_ids = np.where(rpn_labels == 1)[0]

        # 不能让正例超过一半
        extra = len(pos_ids) - RPN_TRAIN_ANCHORS_PER_IMAGE // 2
        if extra > 0:
            rpn_labels[np.random.choice(pos_ids, extra, replace=False)] = -1
            pos_ids = np.where(rpn_labels == 1)[0]
        pos_anchor = all_anchors[pos_ids]
        for i, a in zip(pos_ids, pos_anchor):
            gt = gt_boxes[argmax_overlaps[i]]
            gt_h = gt[3] - gt[1]
            gt_w = gt[2] - gt[0]
            gt_ctr_x = gt[0] + 0.5 * gt_w
            gt_ctr_y = gt[1] + 0.5 * gt_h

            an_h = a[3] - a[1]
            an_w = a[2] - a[0]
            an_ctr_x = a[0] + 0.5 * an_w
            an_ctr_y = a[1] + 0.5 * an_h
            if gt_h <= 0.00001 or an_h <= 0.00001 or gt_w <= 0.00001 or an_w <= 0.00001:
                print(gt_h, an_h, gt_w, an_w, "invalid anchor or gt")
                exit(0)
            anchor_deltas[i] = [(gt_ctr_x - an_ctr_x) / an_w,
                                (gt_ctr_y - an_ctr_y) / an_h,
                                np.log(gt_h / an_h),
                                np.log(gt_w / an_w)]
            anchor_deltas[i] /= RPN_BBOX_STD_DEV

            nan_count = np.isnan(anchor_deltas[i]).sum()
            if nan_count > 0:
                print("++++++++++++++++++++++")
                print(anchor_deltas)
                print("-----------------------")
                exit(0)

        neg_ids = np.where(rpn_labels == 0)[0]
        extra = len(neg_ids) - (RPN_TRAIN_ANCHORS_PER_IMAGE - len(pos_ids))
        if extra > 0:
            rpn_labels[np.random.choice(neg_ids, extra, replace=False)] = -1
            # neg_ids = np.where(rpn_labels == 0)[0]
    else:
        rpn_labels[np.random.choice(num_anchors,
                                    RPN_TRAIN_ANCHORS_PER_IMAGE,
                                    replace=False)] = 0

    return rpn_labels, anchor_deltas
Ejemplo n.º 2
0
def cls_target(img_shape, bboxes):
    scales = np.array(anchor_scales).reshape((-1, 1))
    ratios = np.array(anchor_ratios)

    all_scales = (scales * ratios).reshape(-1)

    labels = []
    targets = []
    anchors = []

    num_anchors = len(all_scales)
    for feat_stride in feat_strides:
        per_cell_anchor = np.zeros([num_anchors, 4], dtype=np.float32)
        per_cell_anchor[:, 0] = (feat_stride - 1) / 2 - all_scales / 2  # xmin
        per_cell_anchor[:, 2] = (feat_stride - 1) / 2 + all_scales / 2  # xmax
        per_cell_anchor[:, 1] = (feat_stride - 1) / 2 - all_scales / 2  # ymin
        per_cell_anchor[:, 3] = (feat_stride - 1) / 2 + all_scales / 2  # ymax

        fm_h = img_shape[0] // feat_stride
        fm_w = img_shape[1] // feat_stride

        # every predict feature map pixel has num_scales anchors,
        # each anchor has a label, as well as the target
        label = np.empty((fm_h * fm_w * num_anchors, ), dtype=np.int8)
        label.fill(-1)
        target = np.empty((fm_h * fm_w * num_anchors, 4), dtype=np.float32)

        shift_x = np.arange(0, fm_w) * feat_stride
        shift_y = np.arange(0, fm_h) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        #
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        all_anchors = (per_cell_anchor.reshape(
            (1, num_anchors, 4)) + shifts.reshape(
                (1, fm_h * fm_w, 4)).transpose((1, 0, 2)))

        all_anchors = all_anchors.reshape((fm_h * fm_w * num_anchors, 4))

        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_anchors, dtype=np.float),
            np.ascontiguousarray(bboxes, dtype=np.float))

        argmax_overlaps = overlaps.argmax(axis=1)

        # print(argmax_overlaps.shape)
        max_overlaps = overlaps[np.arange(all_anchors.shape[0]),
                                argmax_overlaps]
        # print(max_overlaps)

        all_pos_idx = np.where(max_overlaps > posi_anchor_thresh)[0]

        all_neg_idx = np.where((max_overlaps < neg_anchor_thresh)
                               & (max_overlaps > 0))[0]

        if batch_anchor_num * positive_ratio > len(all_pos_idx):
            posi_idx = all_pos_idx

        else:
            posi_idx = npr.choice(all_pos_idx,
                                  int(batch_anchor_num * positive_ratio))

        neg_idx = npr.choice(
            all_neg_idx,
            int(len(posi_idx) * (1 - positive_ratio) / positive_ratio))

        # TODO some feature map have no posi anchors
        label[posi_idx] = 1
        label[neg_idx] = 0
        if len(neg_idx) == 0:
            neg_idx = npr.choice(all_neg_idx,
                                 int(batch_anchor_num * (1 - positive_ratio)))
            label[neg_idx] = 0

        box_target = np.zeros((all_anchors.shape[0], 4), dtype=np.float64)
        # pos_idx 存放的是要训练的正例的 在all_anchor中的索引
        # argmax_overlaps 存放的是每个anchor与所有gt_text交集中最大的gt在text_proposal_gt中的索引, 长度是所有anchor的个数
        # text_proposal_gt 存放所有的gt_text_proposal
        posi_target = bboxes[argmax_overlaps[posi_idx]]
        box_target[posi_idx] = posi_target

        # print(posi_idx)

        labels.append(label)
        targets.append(target)
        anchors.append(all_anchors)
    return labels, targets, anchors
Ejemplo n.º 3
0
def corner_py(corner_pred_score, corner_pred_offset, gt_default_box, scales, feat_stride, img_info):
    # TODO 要把输入的tensor 转换一下
    """(num_scales, 4)
       gt_default_box: (4, every corner box number, 4)
                     : 0 left top,
                     : 1 right top
                     : 2 right bottom
                     : 3 left bottom
    """

    assert corner_pred_score.shape[0] == 1, \
        'Only single item batches are supported'

    # q
    num_scales = len(scales)

    per_cell_db = np.array([[0 for _ in scales], [0 for _ in scales], scales, scales], np.int32).transpose()

    height, width = corner_pred_score.shape[1:3]

    shift_x = np.arange(0, width) * feat_stride
    shift_y = np.arange(0, height) * feat_stride

    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order

    padding_zeros = np.zeros(len(shift_x), np.int32)

    # TODO 每个default box 的表示方法(x,y,ss,ss)ss为scale
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), padding_zeros, padding_zeros)).transpose()

    """shift_x 是展平后的x值 每个x值对应一个pixel的横坐标"""
    all_pixel_num = shift_x.shape[0]  # the number of pixel in feature map
    """为每个cell都生成了num_scale 个 default box
       all_default_box shape (H*W,num_scales, 4)
    """
    all_default_box = (per_cell_db.reshape((1, num_scales, 4)) +
                       shifts.reshape((1, all_pixel_num, 4)).transpose((1, 0, 2)))

    """ all_defalut_box (H*W*num_scales, 4)"""
    all_default_box = all_default_box.reshape(all_pixel_num * num_scales, 4)

    """filter the db out of  the image"""
    idx_indside = np.where(
        (all_default_box[:, 0] - all_default_box[:, 2] / 2) >= 0 &
        (all_default_box[:, 0] + all_default_box[:, 2] / 2 <= img_info[1]) &
        (all_default_box[:, 1] - all_default_box[:, 3] / 2 >= 0) &
        (all_default_box[:, 1] + all_default_box[:, 3] / 2 <= img_info[0])
    )[0]

    default_boxes = all_default_box[idx_indside, :]

    """给 每个default box 匹配一个 corner box"""
    """"""

    """ 对于每个default box ,需要计算它和每种corner box 真值的iou,来确定它是否是属于某种corner point

    """
    # all_overlaps = np.zeros(())
    # gt_default_box shape: (4, gt_text_num, 4) 0 for left_top ...etc

    valid_pixel_num = len(idx_indside)
    # 需要返回的 labels 为 (N H W num_scales q=4 1), 后面再reshape
    labels = np.empty((height, width, num_scales, 4, 1))
    # 需要返回的 box_target 为 (N, H, W, num_scales, q, 4) 后面再reshape
    box_target = np.empty((height, width, num_scales, 4, 4))

    labels.fill(-1)
    """
       gt_default_box shape(4, num_gt_text, 4)
    """
    for ix, gt_corner_box in enumerate(gt_default_box):
        # overlap 返回的 shape (valid_pixel_num * num_scales, gt_box_num)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(default_boxes, dtype=np.float),
            np.ascontiguousarray(gt_corner_box, dtype=np.float))
        # argmax_overlap (valid_pixel_num * num_scales, 1)
        argmax_overlaps = overlaps.argmax(axis=1)  # 找到和每一个gtbox,overlap最大的那个db

        # valid_label 所有有效像素个数 * 每个像素上的scale个数
        valid_label = np.empty((valid_pixel_num * num_scales,), np.int8)
        valid_label.fill(-1)

        max_overlaps = overlaps[np.arange(valid_pixel_num), argmax_overlaps]

        # 最大iou < 0.3 的设置为负例
        valid_label[max_overlaps < cfg.TRAIN.NEGATIVE_OVERLAP] = 0
        # cfg.TRAIN.RPN_POSITIVE_OVERLAP = 0.8
        valid_label[max_overlaps >= cfg.TRAIN.POSITIVE_OVERLAP] = 1  # overlap大于0.8的认为是前景

        per_kind_corner_label = np.empty((height * width * num_scales,), np.int8)
        per_kind_corner_label.fill(-1)

        per_kind_corner_label[idx_indside] = valid_label

        labels[:, :, :, ix, :] = per_kind_corner_label.reshape(height, width, num_scales, 1, 1)

        ########################### box target ##################################
        # TODO 对于每个真值是1的default box 需要它有回归目标
        positive_inds = np.where(valid_label == 1)[0]
        per_kind_corner_target = np.empty((height * width * num_scales, 4), np.int32)

        per_kind_corner_target.fill(0)

        # argmax为每个default box对应iou最大的那个gt的下标,从中选出label是正的
        per_kind_corner_target[positive_inds, :] = gt_corner_box[argmax_overlaps[positive_inds]]

        box_target[:, :, :, ix, :] = per_kind_corner_target.reshape(height, width, num_scales, 1, 4)

    num_fg = int(cfg.TRAIN.POSITIVE_RATIO * cfg.TRAIN.DEFAULT_BOX_NUM)  # 0.25*300

    flat_label = labels.reshape((height * width * num_scales * 4,))
    fg_inds = np.where(flat_label == 1)[0]

    assert len(fg_inds) > 0, "The number of positive proposals must be lager than zero"

    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)  # 随机去除掉一些正样本
        flat_label[disable_inds] = -1  # 变为-1

    # subsample negative labels if we have too many
    # 对负样本进行采样,如果负样本的数量太多的话
    # 正负样本总数是300,限制正样本数目最多150,
    num_bg = cfg.TRAIN.DEFAULT_BOX_NUM - np.sum(flat_label == 1)

    bg_inds = np.where(flat_label == 0)[0]

    assert len(bg_inds) > 0, "The number of negtive proposals must be lager than zero"

    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        flat_label[disable_inds] = -1

    """
     labels (1, height, width, num_scales, 4, 1)
     box_target (1, height, width, num_scales, 4, 4)
    """
    # return flat_label.reshape((1, height, width, num_scales, 4, 1))
    return labels, box_target