Beispiel #1
0
    def __init__(self, cfg, chip_size, max_n_gts=100, max_poly_len=500):
        self.scales = np.array(cfg.network.ANCHOR_SCALES, dtype=np.float32)
        self.ratios = cfg.network.ANCHOR_RATIOS
        feat_stride = cfg.network.RPN_FEAT_STRIDE
        self.max_n_gts = max_n_gts
        self.max_poly_len = max_poly_len

        # Initializing anchors
        base_anchors = generate_anchors(base_size=feat_stride, ratios=list(self.ratios),
                                             scales=list(self.scales))
        self.num_anchors = base_anchors.shape[0]
        self.feat_width = chip_size / cfg.network.RPN_FEAT_STRIDE
        self.feat_height = chip_size / cfg.network.RPN_FEAT_STRIDE
        shift_x = np.arange(0, self.feat_width) * feat_stride
        shift_y = np.arange(0, self.feat_height) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        self.K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, self.num_anchors, 4)) + \
                      shifts.reshape((1, self.K, 4)).transpose((1, 0, 2))
        self.all_anchors = all_anchors.reshape((self.K * self.num_anchors, 4))
        self.batch_size = cfg.TRAIN.RPN_BATCH_SIZE
        self.pos_thresh = cfg.TRAIN.RPN_POSITIVE_OVERLAP
        self.neg_thresh = cfg.TRAIN.RPN_NEGATIVE_OVERLAP
        self.num_fg = int(self.batch_size * cfg.TRAIN.RPN_FG_FRACTION)
Beispiel #2
0
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print 'anchors:'
        print base_anchors
        print 'anchor shapes:'
        print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                         base_anchors[:, 3::4] - base_anchors[:, 1::4]))
        print 'im_info', im_info
        print 'height', feat_height, 'width', feat_width
        print 'gt_boxes shape', gt_boxes.shape
        print 'gt_boxes', gt_boxes

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors shape', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print 'means', means
        print 'stdevs', stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {'label': labels,
             'bbox_target': bbox_targets,
             'bbox_weight': bbox_weights}
    return label
Beispiel #3
0
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64),
                          scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert(len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
def assign_anchor(feat_shape,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride=16,
                  scales=(8, 16, 32),
                  ratios=(0.5, 1, 2),
                  allowed_border=0,
                  normalize_target=False,
                  bbox_mean=(0.0, 0.0, 0.0, 0.0),
                  bbox_std=(0.1, 0.1, 0.4, 0.4)):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param normalize_target: normalize rpn target
    :param bbox_mean: anchor target mean
    :param bbox_std: anchor target std
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride,
                                    ratios=list(ratios),
                                    scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print 'anchors:'
        print base_anchors
        print 'anchor shapes:'
        print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                         base_anchors[:, 3::4] - base_anchors[:, 1::4]))
        print 'im_info', im_info
        print 'height', feat_height, 'width', feat_width
        print 'gt_boxes shape', gt_boxes.shape
        print 'gt_boxes', gt_boxes

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border)
                           & (all_anchors[:, 1] >= -allowed_border)
                           & (all_anchors[:, 2] < im_info[1] + allowed_border)
                           & (all_anchors[:,
                                          3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors shape', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means', means
        print 'stdevs', stds

    if normalize_target:
        bbox_targets = ((bbox_targets - np.array(bbox_mean)) /
                        np.array(bbox_std))

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    labels = labels.reshape(
        (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape(
        (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape(
        (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {
        'label': labels,
        'bbox_target': bbox_targets,
        'bbox_weight': bbox_weights
    }
    return label
Beispiel #5
0
def assign_pyramid_anchor(
        feat_shapes,
        gt_boxes,
        im_info,
        cfg,
        feat_strides=(4, 8, 16, 32, 64),
        scales=(8, ),
        ratios=(0.5, 1, 2),
        allowed_border=0,
        balance_scale_bg=False,
):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert (len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios[feat_id],
                                            scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        #overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float))
        overlaps, overlaps1, _, center_ins = bbox_overlaps_py1(
            anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        max_overlaps1 = overlaps1[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        #print center_ins
        for i in range(argmax_overlaps.shape[0]):
            if center_ins[i, argmax_overlaps[i]] == 1:
                if max_overlaps[i] >= cfg.TRAIN.RPN_POSITIVE_OVERLAP / 3:
                    if max_overlaps1[i] >= 0.7:
                        if DEBUG:
                            print max_overlaps[i]
                            print max_overlaps1[i]
                            print fpn_labels[i]
                        fpn_labels[i] = 1
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(
        cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[
        0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(
            fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id])
                                   & (bg_inds < fpn_anchors_fid[feat_id + 1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale,
                                          size=(len(bg_ind_scale) -
                                                num_bg_scale),
                                          replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(
            fpn_anchors[fpn_labels >= 1, :],
            gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(
            fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=-1)
        bbox_targets = _unmap(
            fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)
        bbox_weights = _unmap(
            fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)

        labels = labels.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
Beispiel #6
0
def assign_anchor(feat_shape_p3,
                  feat_shape_p4,
                  feat_shape_p5,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p3=4,
                  scales_p3=(8, ),
                  ratios_p3=(0.75, 1, 1.5),
                  feat_stride_p4=8,
                  scales_p4=(8, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=16,
                  scales_p5=(8, ),
                  ratios_p5=(0.75, 1, 1.5),
                  allowed_border=1):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """

    feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5]
    feat_stride = [4, 8, 16]
    scales = (8, 10, 12)
    ratios = (0.5, 1, 2)

    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    im_info = im_info[0]
    #print 'im_info: ', im_info
    scales = np.array(scales, dtype=np.float32)
    if len(feat_stride) != len(feat_shape):
        assert ('length of feat_stride is not equal to length of feat_shape')

    labels_list = []
    bbox_targets_list = []
    bbox_weights_list = []
    #print 'length of feat_shape: ',len(feat_shape)
    for i in range(len(feat_shape)):
        total_anchors = 0
        base_anchors = generate_anchors(base_size=feat_stride[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]  #3
        #print feat_shape[i]
        feat_height, feat_width = (feat_shape[i])[-2:]
        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_stride[i]
        shift_y = np.arange(0, feat_height) * feat_stride[i]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors  #3
        K = shifts.shape[0]  #h*w
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape(
            (K * A, 4))  #(k*A,4) in the original image

        # keep only inside anchors
        anchors = all_anchors
        # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
        #                    (all_anchors[:, 1] >= -allowed_border) &
        #                    (all_anchors[:, 2] < im_info[1] + allowed_border) &
        #                    (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        # label: 1 is positive, 0 is negative, -1 is dont care
        total_anchors = len(anchors)  #3*w*h
        #   anchors = all_anchors[inds_inside, :]
        labels = np.empty((total_anchors, ), dtype=np.float32)
        labels.fill(-1)

        if gt_boxes.size > 0:
            overlaps = bbox_overlaps(anchors.astype(np.float),
                                     gt_boxes.astype(np.float))

            argmax_overlaps = overlaps.argmax(axis=1)

            gt_labels = gt_boxes[:, -1]
            gt_labels_ = np.zeros((total_anchors, len(gt_labels)),
                                  dtype=np.int)
            gt_labels_[:, :] = gt_labels
            #   print gt_labels_

            labels = gt_labels_[np.arange(total_anchors), argmax_overlaps]
            max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]

            # gt_argmax_overlaps = overlaps.argmax(axis=0)
            # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP)
                   & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1
            # bg_inds = np.where(labels == 0)[0]
            # if len(bg_inds) > 256:
            #     disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False)
            # labels[disable_inds] = -1
        else:
            labels[:] = 0

        bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors,
                                             gt_boxes[argmax_overlaps, :4])
        bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32)
        bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1)
        bbox_targets = _unmap(bbox_targets,
                              int(K * A),
                              range(total_anchors),
                              fill=0)
        bbox_weights = _unmap(bbox_weights,
                              int(K * A),
                              range(total_anchors),
                              fill=0)

        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        labels_list.append(labels)
        bbox_targets_list.append(bbox_targets)
        bbox_weights_list.append(bbox_weights)

    if len(feat_shape) == 3:
        label = {
            'label/p3': labels_list[0],
            'label/p4': labels_list[1],
            'label/p5': labels_list[2],
            'bbox_target/p3': bbox_targets_list[0],
            'bbox_target/p4': bbox_targets_list[1],
            'bbox_target/p5': bbox_targets_list[2],
            'bbox_weight/p3': bbox_weights_list[0],
            'bbox_weight/p4': bbox_weights_list[1],
            'bbox_weight/p5': bbox_weights_list[2]
        }

    return label
Beispiel #7
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, 0)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores_list = in_data[0].asnumpy()  #[1,n]
        #print 'score_list shape:',scores_list.shape
        bbox_deltas_list = in_data[1].asnumpy()  #[1,n*2]
        im_info = in_data[2].asnumpy()[0, :]
        feat_shape = in_data[3].asnumpy()
        #t = time.time()
        #print 'feat_shape:', feat_shape
        num_feat = feat_shape.shape[1]  #[1,5,4]
        score_index_start = 0
        bbox_index_start = 0
        keep_proposal = []
        keep_scores = []
        #t_1 = time.time()
        for i in range(num_feat):
            feat_stride = int(self._feat_stride[i])  #4,8,16,32,64
            #print 'feat_stride:', feat_stride
            anchor = generate_anchors(feat_stride,
                                      scales=self._scales,
                                      ratios=self._ratios)
            num_anchors = anchor.shape[0]  #3
            height = feat_shape[0, i, 2]
            width = feat_shape[0, i, 3]

            shift_x = np.arange(0, width) * feat_stride
            shift_y = np.arange(0, height) * feat_stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            A = num_anchors  #3
            K = shifts.shape[0]  #height*width
            anchors = anchor.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))  #3*height*widht,4
            scores = (scores_list[
                0,
                int(score_index_start):int(score_index_start +
                                           K * A * 2)]).reshape(
                                               (1, int(2 * num_anchors), -1,
                                                int(width)))  #1,2*3,h,w
            scores = scores[:, num_anchors:, :, :]  #1,3,h,w
            bbox_deltas = (bbox_deltas_list[
                0, int(bbox_index_start):int(bbox_index_start +
                                             K * A * 4)]).reshape(
                                                 (1, int(4 * num_anchors), -1,
                                                  int(width)))  #1,4*3,h,w
            score_index_start += K * A * 2
            bbox_index_start += K * A * 4
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))  #[1,h,w,12]--->[1*h*w*3,4]
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape(
                (-1, 1))  #[1,h,w,3]--->[1*h*w*3,1]
            proposals = bbox_pred(anchors,
                                  bbox_deltas)  #debug here, corresponding?
            proposals = clip_boxes(proposals, im_info[:2])
            keep = self._filter_boxes(proposals, min_size[i] * im_info[2])
            keep_proposal.append(proposals[keep, :])
            keep_scores.append(scores[keep])

        proposals = keep_proposal[0]
        scores = keep_scores[0]
        for i in range(1, num_feat):
            proposals = np.vstack((proposals, keep_proposal[i]))
            scores = np.vstack((scores, keep_scores[i]))
        #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        #t_2 = time.time()
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2)
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        #t_nms = time.time()
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:, 2] = 16
                proposals[:, 3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1),
                                      dtype=np.float32)
                blob = np.hstack(
                    (batch_inds, proposals.astype(np.float32, copy=False)))
                self.assign(out_data[0], req[0], blob)

                if self._output_score:
                    self.assign(out_data[1], req[1],
                                scores.astype(np.float32, copy=False))
                return
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2)
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
Beispiel #8
0
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0


    if valid_ranges is None:
        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))
        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

        label = {'label': labels,
                 'bbox_target': bbox_targets,
                 'bbox_weight': bbox_weights}
        return label

    else:
        all_labels, all_bbox_targets, all_bbox_weights = [], [], []
        for valid_range in valid_ranges:
            cls_labels = labels.copy()
            if gt_boxes.size > 0:
                gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.)
                invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0]
                invalid_gt_boxes = gt_boxes[invalid_inds, :]
                if len(invalid_inds) > 0:
                    invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float))
                    invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1)
                    invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps]
                    
                    disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0]
                    cls_labels[disable_inds] = -1

            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
            fg_inds = np.where(cls_labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
                cls_labels[disable_inds] = -1

            # subsample negative labels if we have too many
            num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1)
            bg_inds = np.where(cls_labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
                cls_labels[disable_inds] = -1

            bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
            if gt_boxes.size > 0:
                bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

            bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
            bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

            # map up to original set of anchors
            cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1)
            bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
            bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

            cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
            cls_labels = cls_labels.reshape((1, A * feat_height * feat_width))
            bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
            bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

            all_labels.append(cls_labels)
            all_bbox_targets.append(bbox_targets)
            all_bbox_weights.append(bbox_weights)

        all_labels = np.vstack(all_labels)
        all_bbox_targets = np.vstack(all_bbox_targets)
        all_bbox_weights = np.vstack(all_bbox_weights)

        valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2)
        valid_ranges *= im_info[2]
        inds = np.where(valid_ranges[:, 1] < 0)[0]
        valid_ranges[inds, 1] = max(im_info[0], im_info[1])

        label = {'label': all_labels,
                 'bbox_target': all_bbox_targets,
                 'bbox_weight': all_bbox_weights,
                 'valid_ranges': valid_ranges}
        return label
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key,
                       _feat_stride, anchor_scales):
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # rpn_bbox_cls_prob shape : 1 , h , w , 2*9
    # rpn_bbox_pred shape : 1 , h , w , 4*9
    '''
    _anchors = generate_anchor.generate_anchors(
        scales=np.array(anchor_scales))  # #_anchors ( 9, 4 )
    _num_anchors = _anchors.shape[0]  #9
    rpn_bbox_cls_prob = np.transpose(
        rpn_bbox_cls_prob, [0, 3, 1, 2])  # rpn bbox _cls prob # 1, 18 , h , w
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])  # 1, 36 , h , w
    # Only minibatch of 1 supported
    assert rpn_bbox_cls_prob.shape[0] == 1, \
        'Only single item batches are supported'
    if cfg_key == 'TRAIN':
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N  #12000
        post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N  # 2000
        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH  #0.7
        min_size = cfg.TRAIN.RPN_MIN_SIZE  # 16
    else:  # cfg_key == 'TEST':
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TEST.RPN_NMS_THRESH
        min_size = cfg.TEST.RPN_MIN_SIZE
    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs

    scores = rpn_bbox_cls_prob[:,
                               _num_anchors:, :, :]  # 1, 18  , H, W --> 1, 9, H, W
    # 1. Generate proposals from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]
    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # Enumerate all shifted anchors:
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]

    #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = np.array([])
    for i in range(len(_anchors)):
        if i == 0:
            anchors = np.add(shifts, _anchors[i])
        else:
            anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])),
                                     axis=0)
    anchors = anchors.reshape((K * A, 4))

    ## BBOX TRANSPOSE (1,4*A,H,W --> A*H*W,4)
    shape = rpn_bbox_pred.shape  # 1,4*A , H, W
    rpn_bbox_pred = rpn_bbox_pred.reshape(
        [1, 4, (shape[1] // 4) * shape[2], shape[3]])
    rpn_bbox_pred = rpn_bbox_pred.transpose([0, 2, 3, 1])
    rpn_bbox_pred = rpn_bbox_pred.reshape([-1, 4])
    bbox_deltas = rpn_bbox_pred
    ## CLS TRANSPOSE

    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))  # (h * w * A , 1)
    scores_ori = scores
    ## BBOX TRANSPOSE Using Anchor
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    proposals_ori = proposals
    proposals = clip_boxes(
        proposals, im_dims)  # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다.

    keep = _filter_boxes(proposals,
                         min_size)  # min size = 16 # min보다 큰 놈들만 살아남았다
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    #print 'scores : ',np.shape(scores) #421 ,13 <--여기 13이 자꾸 바귄다..
    order = scores.ravel().argsort()[::-1]  # 크기 순서를 뒤집는다 가장 큰 값이 먼저 오게 한다

    if pre_nms_topN > 0:  #120000
        order = order[:pre_nms_topN]
    #print np.sum([scores>0.7])
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    #print np.shape(np.hstack ((proposals , scores))) # --> [x_start , y_start ,x_end, y_end , score ] 이런 형태로 만든다
    keep = nms(np.hstack((proposals, scores)),
               nms_thresh)  # nms_thresh = 0.7 | hstack --> axis =1
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                   copy=False)))  # N , 5
    #blob=np.hstack((blob , scores))

    return blob, scores, proposals_ori, scores_ori