Example #1
0
def get_delta_roi(filename, roi_rec, im_scale):
    trackid = roi_rec['gt_trackid']
    boxes = roi_rec['boxes']
    boxes = boxes * im_scale
    delta = np.zeros_like(roi_rec['boxes'], dtype=float)
    dic = {}

    tree = ET.parse(filename)
    size = tree.find('size')
    height = float(size.find('height').text)
    width = float(size.find('width').text)
    objs = tree.findall('object')
    for obj in objs:
        bbox = obj.find('bndbox')
        if roi_rec['flipped'] == False:
            np.minimum(float(bbox.find('ymax').text), roi_rec['height'] - 1)
            dic[int(obj.find('trackid').text)] = [
                np.maximum(float(bbox.find('xmin').text), 0) * im_scale,
                np.maximum(float(bbox.find('ymin').text), 0) * im_scale,
                np.minimum(float(bbox.find('xmax').text), roi_rec['width'] - 1)
                * im_scale,
                np.minimum(float(bbox.find('ymax').text),
                           roi_rec['height'] - 1) * im_scale
            ]
        else:
            xmin = np.maximum(float(bbox.find('xmin').text), 0)
            ymin = np.maximum(float(bbox.find('ymin').text), 0)
            xmax = np.minimum(float(bbox.find('xmax').text),
                              roi_rec['width'] - 1)
            ymax = np.minimum(float(bbox.find('ymax').text),
                              roi_rec['height'] - 1)

            xmin_flip = width - 1 - xmax
            xmax_flip = width - 1 - xmin

            assert xmax_flip >= xmin_flip

            dic[int(obj.find('trackid').text)] = [
                xmin_flip * im_scale, ymin * im_scale, xmax_flip * im_scale,
                ymax * im_scale
            ]

    for i in range(len(trackid)):
        if trackid[i] in dic:
            delta_trans = bbox_transform(np.array([boxes[i]]),
                                         np.array([dic[trackid[i]]]))
            delta[i][:] = delta_trans[0]
    return delta
Example #2
0
def sample_rois_v2(rois,
                   num_classes,
                   cfg,
                   labels=None,
                   overlaps=None,
                   bbox_targets=None,
                   gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # set labels of bg_rois to be 0
    bg_ind = np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0]
    labels[bg_ind] = 0

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment, :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                       np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
Example #3
0
def assign_pyramid_anchor(
        feat_shapes,
        gt_boxes,
        im_info,
        cfg,
        feat_strides=(4, 8, 16, 32, 64),
        scales=(8, ),
        ratios=(0.5, 1, 2),
        allowed_border=0,
        balance_scale_bg=False,
):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert (len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios[feat_id],
                                            scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(
        cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[
        0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(
            fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id])
                                   & (bg_inds < fpn_anchors_fid[feat_id + 1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale,
                                          size=(len(bg_ind_scale) -
                                                num_bg_scale),
                                          replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(
            fpn_anchors[fpn_labels >= 1, :],
            gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(
            fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=-1)
        bbox_targets = _unmap(
            fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)
        bbox_weights = _unmap(
            fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)

        labels = labels.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
Example #4
0
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print 'anchors:'
        print base_anchors
        print 'anchor shapes:'
        print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                         base_anchors[:, 3::4] - base_anchors[:, 1::4]))
        print 'im_info', im_info
        print 'height', feat_height, 'width', feat_width
        print 'gt_boxes shape', gt_boxes.shape
        print 'gt_boxes', gt_boxes

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors shape', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print 'means', means
        print 'stdevs', stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {'label': labels,
             'bbox_target': bbox_targets,
             'bbox_weight': bbox_weights}
    return label
Example #5
0
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64),
                          scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert(len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
Example #6
0
def sample_rois(rois,
                fg_rois_per_image,
                rois_per_image,
                num_classes,
                cfg,
                labels=None,
                overlaps=None,
                bbox_targets=None,
                gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
        '''
        #yangyk
        print('gt_boxes:',gt_boxes[:,4])
        print('gt_assignment:',gt_assignment)
        print('labels:',labels)
        print('rois shape:',rois.shape,'overlaps shape:',overlaps.shape,'labels shape',labels.shape)   
        
        '''

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]

    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes,
                                size=fg_rois_per_this_image,
                                replace=False)

    debug = False
    if debug:
        #yangyk
        print('fg_indexes size:', fg_indexes.size, 'fg_rois_per_image:',
              fg_rois_per_image, 'fg_rois_per_this_image:',
              fg_rois_per_this_image)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI)
                          & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                        bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes,
                                size=bg_rois_per_this_image,
                                replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    #print('fg_over_laps:', overlaps[fg_indexes])

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    #yangyk
    labels_all = labels.copy()

    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    #print('labels:',labels)

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:],
                                 gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                       np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    #yangyk
    overlaps = overlaps[keep_indexes]
    #print('fg_over_laps:', overlaps[:fg_rois_per_this_image])

    neg_low = 0.0
    neg_middle = 0.2
    neg_high = 0.3
    neg_indexes_L1 = np.where((overlaps < neg_middle)
                              & (overlaps >= neg_low))[0]
    neg_indexes_L2 = np.where((overlaps < neg_high)
                              & (overlaps >= neg_middle))[0]
    neg_indexes_L3 = np.where(overlaps >= neg_high)[0]

    neg_labels = np.zeros(labels.shape)

    #print(neg_indexes_L2)
    neg_labels[neg_indexes_L2] = labels_all[neg_indexes_L2]

    if debug:
        print('neg_indexes_L1:', len(neg_indexes_L1), 'neg_indexes_L2:',
              len(neg_indexes_L2), 'neg_indexes_L3', len(neg_indexes_L3))
        print('labels_all:', labels_all)
        print('neg_labels:', neg_labels, 'neg_labels_shape:', neg_labels.shape)
        #print(neg_labels[neg_indexes_L2])
        print('<<<fg neg labels>>>>', neg_labels[neg_indexes_L2])
        print('fg neg labels sum', np.sum(neg_labels[neg_indexes_L2]))
        print('neg labels sum', np.sum(neg_labels))
        print('over_laps:', overlaps)
        print('neg_fg_over_laps:', overlaps[neg_indexes_L2])
        print('<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, neg_labels, bbox_targets, bbox_weights
Example #7
0
    def sample_rois(self,
                    rois,
                    fg_rois_per_image,
                    rois_per_image,
                    num_classes,
                    cfg,
                    labels=None,
                    overlaps=None,
                    bbox_targets=None,
                    gt_boxes=None,
                    gt_masks=None):
        if labels is None:
            overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                     gt_boxes[:, :4].astype(np.float))
            gt_assignment = overlaps.argmax(axis=1)
            overlaps = overlaps.max(axis=1)
            labels = gt_boxes[gt_assignment, 4]

        # foreground RoI with FG_THRESH overlap
        fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
        if cfg.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[fg_indexes, 1:])
            fg_indexes = fg_indexes[keep_inds]

        # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
        fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
        # Sample foreground regions without replacement
        if len(fg_indexes) > fg_rois_per_this_image:
            fg_indexes = np.random.choice(fg_indexes,
                                          size=fg_rois_per_this_image,
                                          replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI)
                              & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
        if cfg.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[bg_indexes, 1:])
            bg_indexes = bg_indexes[keep_inds]

        # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                            bg_indexes.size)
        # Sample foreground regions without replacement
        if len(bg_indexes) > bg_rois_per_this_image:
            bg_indexes = np.random.choice(bg_indexes,
                                          size=bg_rois_per_this_image,
                                          replace=False)

        # indexes selected
        keep_indexes = np.append(fg_indexes, bg_indexes)

        # pad more to ensure a fixed minibatch size
        while keep_indexes.shape[0] < rois_per_image:
            gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
            if cfg.TRAIN.GAP_SELECT_FROM_ALL:
                gap_indexes = np.random.choice(range(len(rois)),
                                               size=gap,
                                               replace=False)
            else:
                bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes))
                gap_indexes = np.random.choice(bg_full_indexes,
                                               size=gap,
                                               replace=False)
            keep_indexes = np.append(keep_indexes, gap_indexes)

        # select labels
        labels = labels[keep_indexes]
        # set labels of bg_rois to be 0
        labels[fg_rois_per_this_image:] = 0
        rois = rois[keep_indexes]

        # load or compute bbox target
        if bbox_targets is not None:
            bbox_target_data = bbox_targets[keep_indexes, :]
        else:
            targets = bbox_transform(rois[:, 1:],
                                     gt_boxes[gt_assignment[keep_indexes], :4])
            if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
                targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                           np.array(cfg.TRAIN.BBOX_STDS))
            bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

        bbox_targets, bbox_weights = \
            expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

        if cfg.TRAIN.IGNORE_GAP:
            valid_rois_per_this_image = fg_rois_per_this_image + bg_rois_per_this_image
            labels[valid_rois_per_this_image:] = -1
            bbox_weights[valid_rois_per_this_image:] = 0

        # masks
        # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg')
        mask_reg_targets = -np.ones(
            (len(keep_indexes), 1, self._mask_size, self._mask_size))
        for idx, obj in enumerate(fg_indexes):
            gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int)
            ex_roi = np.round(rois[idx, 1:]).astype(int)
            gt_mask = gt_masks[gt_assignment[obj]]
            mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask)
            mask_reg_target = cv2.resize(mask_reg_target.astype(np.float),
                                         (self._mask_size, self._mask_size))
            mask_reg_target = mask_reg_target >= self._binary_thresh
            mask_reg_targets[idx, ...] = mask_reg_target

        return rois, labels, bbox_targets, bbox_weights, mask_reg_targets
Example #8
0
def assign_anchor(feat_shape_p2,
                  feat_shape_p3,
                  feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p2=4,
                  scales_p2=(16, ),
                  ratios_p2=(0.75, 1, 1.5),
                  feat_stride_p3=8,
                  scales_p3=(16, ),
                  ratios_p3=(0.75, 1, 1.5),
                  feat_stride_p4=16,
                  scales_p4=(16, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=32,
                  scales_p5=(16, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=64,
                  scales_p6=(16, ),
                  ratios_p6=(0.75, 1, 1.5),
                  allowed_border=1000):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    allowed_border = 1000
    feat_shape = [
        feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5,
        feat_shape_p6
    ]
    feat_stride = [4, 8, 16, 32, 64]
    scales = scales_p3
    ratios = (0.5, 1, 2)

    def _unmap(data, count, inds, fill=0, allowed_border=allowed_border):
        """" unmap a subset inds of data into original data of size count """
        if allowed_border:
            return data
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    debug = True
    im_info = im_info[0]
    #print 'im_info: ', im_info
    scales = np.array(scales, dtype=np.float32)
    if len(feat_stride) != len(feat_shape):
        assert ('length of feat_stride is not equal to length of feat_shape')
    all_anchors_list = []
    anchors_counter = []
    total_anchors = 0
    t = time.time()
    #print 'length of feat_shape: ',len(feat_shape)
    for i in range(len(feat_shape)):
        base_anchors = generate_anchors(base_size=feat_stride[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]  #3
        #print feat_shape[i]
        feat_height, feat_width = (feat_shape[i])[-2:]

        if DEBUG:
            print 'anchors:'
            print base_anchors
            print 'anchor shapes:'
            print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                             base_anchors[:, 3::4] - base_anchors[:, 1::4]))
            print 'im_info', im_info
            print 'height', feat_height, 'width', feat_width
            print 'gt_boxes shape', gt_boxes.shape
            print 'gt_boxes', gt_boxes

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_stride[i]
        shift_y = np.arange(0, feat_height) * feat_stride[i]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors  #3
        K = shifts.shape[0]  #h*w
        i_all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        i_all_anchors = i_all_anchors.reshape(
            (K * A, 4))  #(k*A,4) in the original image
        all_anchors_list.append(i_all_anchors)
        i_total_anchors = int(K * A)  #3*w*h
        total_anchors += i_total_anchors
        anchors_counter.append(total_anchors)

        # only keep anchors inside the image, but in FPN, author allowed anchor outside of image
        # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
        #                        (all_anchors[:, 1] >= -allowed_border) &
        #                        (all_anchors[:, 2] < im_info[1] + allowed_border) &
        #                        (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        if DEBUG:
            print 'total_anchors', i_total_anchors
            #print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        #anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors shape', anchors.shape

    all_anchors = np.array(all_anchors_list[0])  #(3*h1*w1,4)
    for i_anchors in all_anchors_list[1:]:
        all_anchors = np.vstack((all_anchors, i_anchors))
    #all_anchors:[total_anchors,4]
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((total_anchors, ), dtype=np.float32)
    labels.fill(-1)
    #print 'get anchors spends :{:.4f}s'.format(time.time()-t)
    t_1 = time.time()
    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        #t = time.time()
        overlaps = bbox_overlaps(all_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print 'bbox overlaps spends :{:.4f}s'.format(time.time()-t)
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0
    t_1_1 = time.time()
    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:

        bbox_targets[:] = bbox_transform(all_anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means', means
        print 'stdevs', stds
    #print 'choose labels spends :{:.4f}s'.format(time.time()-t_1_1)
    #print 'sort labels spends :{:.4f}s'.format(time.time()-t_1)
    # map up to original set of anchors
#   print '---------++++++++++++++++++++++++++++++++-----------------',len(labels[labels!=-1]),len(labels[labels==1])
    t_2 = time.time()
    labels_list = []
    bbox_targets_list = []
    bbox_weights_list = []
    labels_list.append(
        _unmap(labels[:anchors_counter[0]],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=-1))
    bbox_targets_list.append(
        _unmap(bbox_targets[range(anchors_counter[0]), :],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=0))
    bbox_weights_list.append(
        _unmap(bbox_weights[range(anchors_counter[0]), :],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=0))
    for i in range(1, len(feat_shape)):
        count = anchors_counter[i] - anchors_counter[i - 1]
        labels_list.append(
            _unmap(labels[anchors_counter[i - 1]:anchors_counter[i]],
                   count,
                   range(count),
                   fill=-1))
        bbox_targets_list.append(
            _unmap(bbox_targets[anchors_counter[i - 1]:anchors_counter[i], :],
                   count,
                   range(count),
                   fill=0))
        bbox_weights_list.append(
            _unmap(bbox_weights[anchors_counter[i - 1]:anchors_counter[i], :],
                   count,
                   range(count),
                   fill=0))
    if DEBUG:
        #         print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count
    feat_heights = []
    feat_widths = []
    for i in range(len(feat_shape)):
        feat_heights.append(feat_shape[i][-2])
        feat_widths.append(feat_shape[i][-1])
    #print '_unmap spends :{:.4f}s'.format(time.time()-t_2)
    label1 = labels_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A)).transpose(0, 3, 1, 2)
    labels1 = label1.reshape((1, A * feat_heights[0] * feat_widths[0]))
    bbox_targets1 = bbox_targets_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights1 = bbox_weights_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A * 4)).transpose((0, 3, 1, 2))

    label2 = labels_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A)).transpose(0, 3, 1, 2)
    labels2 = label2.reshape((1, A * feat_heights[1] * feat_widths[1]))
    bbox_targets2 = bbox_targets_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights2 = bbox_weights_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A * 4)).transpose((0, 3, 1, 2))

    label3 = labels_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A)).transpose(0, 3, 1, 2)
    labels3 = label3.reshape((1, A * feat_heights[2] * feat_widths[2]))
    bbox_targets3 = bbox_targets_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights3 = bbox_weights_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A * 4)).transpose((0, 3, 1, 2))

    if len(feat_shape) > 3:
        label4 = labels_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A)).transpose(0, 3, 1, 2)
        labels4 = label4.reshape((1, A * feat_heights[3] * feat_widths[3]))
        bbox_targets4 = bbox_targets_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A * 4)).transpose(0, 3, 1, 2)
        bbox_weights4 = bbox_weights_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A * 4)).transpose(
                (0, 3, 1, 2))

    if len(feat_shape) > 4:
        label5 = labels_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A)).transpose(0, 3, 1, 2)
        labels5 = label5.reshape((1, A * feat_heights[4] * feat_widths[4]))
        bbox_targets5 = bbox_targets_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A * 4)).transpose(0, 3, 1, 2)
        bbox_weights5 = bbox_weights_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A * 4)).transpose(
                (0, 3, 1, 2))
    if len(feat_shape) > 5:
        assert (
            'RPN anchorloader only support max number of feature map of 5!')
    #  'label/p4': labels2, 'label/p5': labels3,
    #, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3,
    #, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3
    if len(feat_shape) == 3:
        label = {
            'label/p3': labels1,
            'label/p4': labels2,
            'label/p5': labels3,
            'bbox_target/p3': bbox_targets1,
            'bbox_target/p4': bbox_targets2,
            'bbox_target/p5': bbox_targets3,
            'bbox_weight/p3': bbox_weights1,
            'bbox_weight/p4': bbox_weights2,
            'bbox_weight/p5': bbox_weights3,
        }
    elif len(feat_shape) == 4:
        label = {
            'label/p3': labels1,
            'label/p4': labels2,
            'label/p5': labels3,
            'label/p6': labels4,
            'bbox_target/p3': bbox_targets1,
            'bbox_target/p4': bbox_targets2,
            'bbox_target/p5': bbox_targets3,
            'bbox_target/p6': bbox_targets4,
            'bbox_weight/p3': bbox_weights1,
            'bbox_weight/p4': bbox_weights2,
            'bbox_weight/p5': bbox_weights3,
            'bbox_weight/p6': bbox_weights4
        }
    elif len(feat_shape) == 5:
        label = {
            'label/p2': labels1,
            'label/p3': labels2,
            'label/p4': labels3,
            'label/p5': labels4,
            'label/p6': labels5,
            'bbox_target/p2': bbox_targets1,
            'bbox_target/p3': bbox_targets2,
            'bbox_target/p4': bbox_targets3,
            'bbox_target/p5': bbox_targets4,
            'bbox_target/p6': bbox_targets5,
            'bbox_weight/p2': bbox_weights1,
            'bbox_weight/p3': bbox_weights2,
            'bbox_weight/p4': bbox_weights3,
            'bbox_weight/p5': bbox_weights4,
            'bbox_weight/p6': bbox_weights5
        }
    #print 'get labels spends :{:.4f}s'.format(time.time()-t_2)
    return label
Example #9
0
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps,overlaps1,overlaps2,tboxcenter_ins = bbox_overlaps_py1(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        boxcenter_ins=np.zeros(gt_assignment.shape[0])
        for i in range(gt_assignment.shape[0]):
            boxcenter_ins[i]=tboxcenter_ins[i,gt_assignment[i]]
        overlaps = overlaps.max(axis=1)
        overlaps1 = overlaps1.max(axis=1)
        overlaps2 = overlaps2.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
    #print labels
    #print gt_boxes
    #print gt_assignment
    # foreground RoI with FG_THRESH overlap
    #print "gt_boxes:"+str(gt_boxes)
    new_order = np.argsort(overlaps)
    if DEBUG:
        print "overlaps:"+str(overlaps[new_order[-100:]])
        print "overlaps1:"+str(overlaps1[new_order[-100:]])
        print "overlaps2:"+str(overlaps2[new_order[-100:]])
        print "boxcenter_ins"+str(boxcenter_ins[new_order[-100:]])
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    if DEBUG:
        print "fg_indexes:"+str(fg_indexes)
    for i in range(len(overlaps)):
        if overlaps[i]>0.1:
            if overlaps1[i]>0.7:
                if boxcenter_ins[i]==1:
                    if not(i in fg_indexes):
                        fg_indexes = np.append(fg_indexes,i)
    if DEBUG: 
        print "fg_indexes:"+str(fg_indexes)
        print "**********proposal-gt:"+str(len(fg_indexes)-gt_boxes.shape[0])
        f_chan = open('channels.txt')
        sf_chan = f_chan.read()
        channels = sf_chan.split(" ")
        for ii in range(len(fg_indexes)-gt_boxes.shape[0]):
            if fg_indexes[ii]  <len(channels):
                print channels[fg_indexes[ii]]
                print labels[fg_indexes[ii]]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
Example #10
0
  def generate_proposals(self, cls_prob, bbox_pred, im_info):
  
    batch_size = cls_prob[0].shape[0]
    if batch_size > 1:
      raise ValueError("Sorry, multiple images for each device is not implemented.")

    pre_nms_topN = self.rpn_pre_nms_top_n 
    post_nms_topN = self.rpn_post_nms_top_n
    min_size = self.rpn_min_size

    proposal_list = []
    score_list = []
    
    for idx in range(len(self.feat_stride)):
      stride = int(self.feat_stride[idx])
      sub_anchors = generate_anchors(stride=stride, sizes=self.scales * stride, aspect_ratios=self.ratios)

      scores, bbox_deltas = cls_prob[idx], bbox_pred[idx]

      # 1. generate proposals from bbox_deltas and shifted anchors
      # use real image size instead of padded feature map sizes
      height, width = scores.shape[-3:-1]

      # enumerate all shifts 
      shift_x = np.arange(0, width) * stride 
      shift_y = np.arange(0, height) * stride 
      shift_x, shift_y = np.meshgrid(shift_x, shift_y)
      shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

      A = self.num_anchors 
      K = shifts.shape[0]
      anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
      anchors = anchors.reshape((K * A, 4))

      bbox_deltas = bbox_deltas.reshape((-1, 4))
      scores = scores.reshape((-1, 1))

      if self.individual_proposals:
        if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
          order = np.argsort(-scores.squeeze())
        else:
          inds = np.argpartition(
            -scores.squeeze(), pre_nms_topN
          )[:pre_nms_topN]
          order = np.argsort(-scores[inds].squeeze())
          order = inds[order]
        bbox_deltas = bbox_deltas[order, :]
        anchors = anchors[order, :]
        scores = scores[order]
      
      # convert anchors into proposals via bbox transformations
      proposals = bbox_transform(anchors, bbox_deltas)

      # 2. clip predicted boxes to image 
      proposals = clip_boxes(proposals, im_info[:2])

      # 3. remove predicted boxes with either height or width < threshold 
      # (NOTE: convert min_size to input image scale stored in im_info[2])
      # keep = self._filter_boxes(proposals, min_size * im_info[2])
      keep = self._filter_boxes(proposals, min_size * im_info[2])
      proposals = proposals[keep, :]
      scores = scores[keep]

      if self.individual_proposals:
        keep = self.nms_func(np.hstack((proposals, scores)).astype(np.float32))
        if post_nms_topN > 0:
          keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]
      
      proposal_list.append(proposals)
      score_list.append(scores)
  
    proposals = np.vstack(proposal_list)
    scores = np.vstack(score_list)

    batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * self.batch_idx
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Example #11
0
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16),
                          scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False):
    def _unmap(data, count, inds, fill = 0):
        if len(data.shape) == 1:
            ret = np.empty((count,),dtype = np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:],dtype = np.float32)
            ret.fill(fill)
            ret[inds,:] = data
        return ret
    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype = np.float32)
    ratios = np.array(ratios, dtype = np.float32)
    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0,4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]])

        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
         
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        labels = np.empty((len(inds_inside),),dtype = np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors,anchors))
        fpn_labels = np.hstack((fpn_labels,labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height,feat_width,A,total_anchors])
    
    if gt_boxes.size > 0:
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis = 1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis = 0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        fpn_labels[gt_argmax_overlaps] = 1
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    else:
        fpn_labels[:] = 0

    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1
    
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1)
    bg_inds = np.where(fpn_labels ==0)[0]
    fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0,len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

        
    fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
    
    fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0,len(feat_strides)):
        feat_height, feat_width,A,total_anchors = fpn_args[feat_id]
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2)
        labels = labels.reshape((1,A*feat_height*feat_width))
        bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)

    label = {
        'label':np.concatenate(label_list,axis = 1),
        'bbox_target':np.concatenate(bbox_target_list, axis = 2),
        'bbox_weight':np.concatenate(bbox_weight_list,axis = 2)
    }

    return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
Example #12
0
def assign_anchor(feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  feat_shape_p7,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p4=16,
                  scales_p4=(8, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=32,
                  scales_p5=(8, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=64,
                  scales_p6=(8, ),
                  ratios_p6=(0.75, 1, 1.5),
                  feat_stride_p7=128,
                  scales_p7=(8, ),
                  ratios_p7=(0.75, 1, 1.5),
                  allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """

    feat_shapes = [feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7]
    feat_strides = [16, 32, 64, 128]

    scales = np.array(scales_p5)

    ratios = np.array(ratios_p5)

    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios[feat_id],
                                            scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]

        feat_height, feat_width = feat_shapes[feat_id][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = [ind for ind in xrange(total_anchors)]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]

        labels = gt_boxes[argmax_overlaps, 4]
        labels[max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        fpn_labels = labels
    else:
        fpn_labels[:] = 0

#  subsample positive labels if we have too many
#     num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
#     fg_inds = np.where(fpn_labels >= 1)[0]
#     if len(fg_inds) > num_fg:
#         disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
#         fpn_labels[disable_inds] = -1
#   #  subsample negative labels if we have too many
#     num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1)
#     bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))
    # if len(bg_inds) > num_bg:
    #     disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    #     fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        #fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        fpn_bbox_targets[:] = bbox_transform(fpn_anchors,
                                             gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(
            fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=-1)
        bbox_targets = _unmap(
            fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)
        bbox_weights = _unmap(
            fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)

        labels = labels.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)

    debug_label = np.concatenate(label_list, axis=1)
    # print debug_label
    # print"-----------total:",len(debug_label[0])
    # print "--------ig-",len(debug_label[debug_label==-1])
    # print "--------bg--",len(debug_label[debug_label==0])
    # print "--------gg--",len(debug_label[debug_label>=1])
    # print np.concatenate(label_list, axis=1)[np.concatenate(label_list, axis=1)>=1].shape
    #print np.concatenate(bbox_target_list, axis=2)

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
Example #13
0
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes,
                labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_kps=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :param gt_kps: optional for e2e [n, num_kps*3] (x1, y1, v1, ...)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    res = {'rois_output': rois,
           'label'      : labels,
           'bbox_target': bbox_targets,
           'bbox_weight': bbox_weights,
        }
    if gt_kps is not None:
        keep_kps = gt_kps[gt_assignment[keep_indexes]]
        n_keep = keep_kps.shape[0]
        K = cfg.dataset.NUM_KEYPOINTS
        assert gt_kps.shape[1] == K*3

        G = cfg.network.KEYPOINTS_POOLED_SIZE
        kps_labels = np.empty([n_keep, K], dtype=np.float32)
        kps_labels.fill(-1)
        kps_targets = np.zeros([n_keep, K, G, G, 2], dtype=np.float32)
        kps_weights = kps_targets.copy()
        num_fg = fg_indexes.size
        assert num_fg > 0, 'need at least one roi'

        # assgin kp targets
        fg_kps_label, fg_kps_target, fg_kps_weight =  assign_keypoints(rois[:num_fg, 1:], keep_kps[:num_fg], pooled_size=G)
        kps_labels[:num_fg]  = fg_kps_label
        kps_targets[:num_fg] = fg_kps_target
        normalizer = 1.0 / (num_fg + 1e-3)
        kps_weights[:num_fg] = fg_kps_weight * normalizer

        res['kps_label'] = kps_labels.reshape([-1])
        res['kps_target'] = kps_targets.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G])
        res['kps_weight'] = kps_weights.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G])

    return res
Example #14
0
def assign_anchor(feat_shape_p3,
                  feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p3=4,
                  scales_p3=(8, ),
                  ratios_p3=(0.75, 1, 1.5),
                  feat_stride_p4=8,
                  scales_p4=(8, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=16,
                  scales_p5=(8, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=4,
                  scales_p6=(8, ),
                  ratios_p6=(0.75, 1, 1.5),
                  allowed_border=1):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """

    feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6]
    feat_stride = [8, 16, 32, 64]
    scales = (8, 10, 12)
    ratios = (0.5, 1, 2)

    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    im_info = im_info[0]
    #print 'im_info: ', im_info
    scales = np.array(scales, dtype=np.float32)
    if len(feat_stride) != len(feat_shape):
        assert ('length of feat_stride is not equal to length of feat_shape')

    labels_list = []
    bbox_targets_list = []
    bbox_weights_list = []
    #print 'length of feat_shape: ',len(feat_shape)
    for i in range(len(feat_shape)):
        total_anchors = 0
        base_anchors = generate_anchors(base_size=feat_stride[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]  #3
        #print feat_shape[i]
        feat_height, feat_width = (feat_shape[i])[-2:]
        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_stride[i]
        shift_y = np.arange(0, feat_height) * feat_stride[i]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors  #3
        K = shifts.shape[0]  #h*w
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape(
            (K * A, 4))  #(k*A,4) in the original image

        # keep only inside anchors
        anchors = all_anchors
        # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
        #                    (all_anchors[:, 1] >= -allowed_border) &
        #                    (all_anchors[:, 2] < im_info[1] + allowed_border) &
        #                    (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        # label: 1 is positive, 0 is negative, -1 is dont care
        total_anchors = len(anchors)  #3*w*h
        #   anchors = all_anchors[inds_inside, :]
        labels = np.empty((total_anchors, ), dtype=np.float32)
        labels.fill(-1)

        if gt_boxes.size > 0:
            overlaps = bbox_overlaps(anchors.astype(np.float),
                                     gt_boxes.astype(np.float))

            argmax_overlaps = overlaps.argmax(axis=1)

            gt_labels = gt_boxes[:, -1]
            gt_labels_ = np.zeros((total_anchors, len(gt_labels)),
                                  dtype=np.int)
            gt_labels_[:, :] = gt_labels
            #   print gt_labels_

            labels = gt_labels_[np.arange(total_anchors), argmax_overlaps]
            max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]

            # gt_argmax_overlaps = overlaps.argmax(axis=0)
            # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP)
                   & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1
            # bg_inds = np.where(labels == 0)[0]
            # if len(bg_inds) > 256:
            #     disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False)
            # labels[disable_inds] = -1
        else:
            labels[:] = 0
        # # print anchors[labels>0]

        # # a = anchors[labels>0].astype(np.int)
        # # np.savetxt('aa.txt',a,fmt="%d %d %d %d")

        # if len(anchors[labels>0])!=0:
        #     aaa
        bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors,
                                             gt_boxes[argmax_overlaps, :4])
        bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32)
        bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1)
        bbox_targets = _unmap(bbox_targets,
                              int(K * A),
                              range(total_anchors),
                              fill=0)
        bbox_weights = _unmap(bbox_weights,
                              int(K * A),
                              range(total_anchors),
                              fill=0)

        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

        labels_list.append(labels)
        bbox_targets_list.append(bbox_targets)
        bbox_weights_list.append(bbox_weights)

    if len(feat_shape) == 4:
        label = {
            'label/p3': labels_list[0],
            'label/p4': labels_list[1],
            'label/p5': labels_list[2],
            'label/p6': labels_list[3],
            'bbox_target/p3': bbox_targets_list[0],
            'bbox_target/p4': bbox_targets_list[1],
            'bbox_target/p5': bbox_targets_list[2],
            'bbox_target/p6': bbox_targets_list[3],
            'bbox_weight/p3': bbox_weights_list[0],
            'bbox_weight/p4': bbox_weights_list[1],
            'bbox_weight/p5': bbox_weights_list[2],
            'bbox_weight/p6': bbox_weights_list[3]
        }

    return label
def assign_quadrangle_anchor(feat_shape,
                             gt_boxes,
                             im_info,
                             cfg,
                             feat_strides=[64, 32, 16, 8, 4],
                             scales=(8, 16, 32),
                             ratios=(0.5, 1, 2),
                             allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    #base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    #num_anchors = base_anchors.shape[0]
    #feat_height, feat_width = feat_shape[-2:]
    anchors_list = []
    anchors_num_list = []
    inds_inside_list = []
    feat_infos = []
    A_list = []

    for i in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size=feat_strides[i],
                                        ratios=list(ratios),
                                        scales=scales)

        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shape[i][-2:]
        feat_stride = feat_strides[i]
        feat_infos.append([feat_height, feat_width])

        shift_x = np.arange(0, feat_width) * feat_stride
        shift_y = np.arange(0, feat_height) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        A = num_anchors
        A_list.append(A)

        K = shifts.shape[0]

        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))

        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        anchors_num_list.append(total_anchors)

        # only keep anchors inside the image
        # print 'allowed_border is',allowed_border 0
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors shape', anchors.shape

        anchors_list.append(anchors)
        inds_inside_list.append(inds_inside)

    anchors = np.concatenate(anchors_list)
    for i in range(1, len(inds_inside_list)):
        inds_inside_list[i] = inds_inside_list[i] + sum(anchors_num_list[:i])
    inds_inside = np.concatenate(inds_inside_list)
    total_anchors = sum(anchors_num_list)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    gt_boxes_bbox = np.zeros((gt_boxes.shape[0], 4), dtype=gt_boxes.dtype)

    ex_x = np.vstack(
        (gt_boxes[:, 0], gt_boxes[:, 2], gt_boxes[:, 4], gt_boxes[:, 6]))
    ex_y = np.vstack(
        (gt_boxes[:, 1], gt_boxes[:, 3], gt_boxes[:, 5], gt_boxes[:, 7]))
    gt_boxes_bbox[:, 0] = np.amin(ex_x, axis=0)
    gt_boxes_bbox[:, 1] = np.amin(ex_y, axis=0)
    gt_boxes_bbox[:, 2] = np.amax(ex_x, axis=0)
    gt_boxes_bbox[:, 3] = np.amax(ex_y, axis=0)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes_bbox.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # temp = np.zeros((anchors.shape[0], 8), dtype=anchors.dtype)
    # temp[:, 0] = anchors[:, 0]
    # temp[:, 1] = anchors[:, 1]
    # temp[:, 2] = anchors[:, 2]
    # temp[:, 3] = anchors[:, 1]
    # temp[:, 4] = anchors[:, 2]
    # temp[:, 5] = anchors[:, 3]
    # temp[:, 6] = anchors[:, 0]
    # temp[:, 7] = anchors[:, 3]
    # eight_coordinate_anchors = temp

    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes_bbox[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means', means
        print 'stdevs', stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    # resahpe
    label_list = list()
    bbox_target_list = list()
    bbox_weight_list = list()
    anchors_num_range = [0] + anchors_num_list
    for i in range(len(feat_strides)):
        feat_height, feat_width = feat_infos[i]
        A = A_list[i]
        label = labels[sum(anchors_num_range[:i +
                                             1]):sum(anchors_num_range[:i +
                                                                       1]) +
                       anchors_num_range[i + 1]]
        bbox_target = bbox_targets[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]
        bbox_weight = bbox_weights[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]

        label = label.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        label = label.reshape((1, A * feat_height * feat_width))
        bbox_target = bbox_target.reshape(
            (1, feat_height * feat_width, A * 4)).transpose(0, 2, 1)
        bbox_weight = bbox_weight.reshape(
            (1, feat_height * feat_width, A * 4)).transpose((0, 2, 1))

        label_list.append(label)
        bbox_target_list.append(bbox_target)
        bbox_weight_list.append(bbox_weight)

    label_concat = np.concatenate(label_list, axis=1)
    bbox_target_concat = np.concatenate(bbox_target_list, axis=2)
    bbox_weight_concat = np.concatenate(bbox_weight_list, axis=2)

    label = {
        'label': label_concat,
        'bbox_target': bbox_target_concat,
        'bbox_weight': bbox_weight_concat
    }
    return label
Example #16
0
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
Example #17
0
def assign_anchor(feat_shape,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride=16,
                  scales=(8, 16, 32),
                  ratios=(0.5, 1, 2),
                  allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride,
                                    ratios=list(ratios),
                                    scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print('anchors:')
        print(base_anchors)
        print('anchor shapes:')
        print(
            np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                       base_anchors[:, 3::4] - base_anchors[:, 1::4])))
        print('im_info', im_info)
        print('height', feat_height, 'width', feat_width)
        print('gt_boxes shape', gt_boxes.shape)
        print('gt_boxes', gt_boxes)

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border)
                           & (all_anchors[:, 1] >= -allowed_border)
                           & (all_anchors[:, 2] < im_info[1] + allowed_border)
                           & (all_anchors[:,
                                          3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print('anchors shape', anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means', means)
        print('stdevs', stds)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print('rpn: max max_overlaps', np.max(max_overlaps))
        print('rpn: num_positives', np.sum(labels == 1))
        print('rpn: num_negatives', np.sum(labels == 0))
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    labels = labels.reshape(
        (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape(
        (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape(
        (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {
        'label': labels,
        'bbox_target': bbox_targets,
        'bbox_weight': bbox_weights
    }
    return label
Example #18
0
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0


    if valid_ranges is None:
        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))
        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

        label = {'label': labels,
                 'bbox_target': bbox_targets,
                 'bbox_weight': bbox_weights}
        return label

    else:
        all_labels, all_bbox_targets, all_bbox_weights = [], [], []
        for valid_range in valid_ranges:
            cls_labels = labels.copy()
            if gt_boxes.size > 0:
                gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.)
                invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0]
                invalid_gt_boxes = gt_boxes[invalid_inds, :]
                if len(invalid_inds) > 0:
                    invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float))
                    invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1)
                    invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps]
                    
                    disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0]
                    cls_labels[disable_inds] = -1

            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
            fg_inds = np.where(cls_labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
                cls_labels[disable_inds] = -1

            # subsample negative labels if we have too many
            num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1)
            bg_inds = np.where(cls_labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
                cls_labels[disable_inds] = -1

            bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
            if gt_boxes.size > 0:
                bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

            bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
            bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

            # map up to original set of anchors
            cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1)
            bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
            bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

            cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
            cls_labels = cls_labels.reshape((1, A * feat_height * feat_width))
            bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
            bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

            all_labels.append(cls_labels)
            all_bbox_targets.append(bbox_targets)
            all_bbox_weights.append(bbox_weights)

        all_labels = np.vstack(all_labels)
        all_bbox_targets = np.vstack(all_bbox_targets)
        all_bbox_weights = np.vstack(all_bbox_weights)

        valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2)
        valid_ranges *= im_info[2]
        inds = np.where(valid_ranges[:, 1] < 0)[0]
        valid_ranges[inds, 1] = max(im_info[0], im_info[1])

        label = {'label': all_labels,
                 'bbox_target': all_bbox_targets,
                 'bbox_weight': all_bbox_weights,
                 'valid_ranges': valid_ranges}
        return label