예제 #1
0
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height,
                    nms_thresh, merge_thresh, binary_thresh=0.4):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    """
    masks = masks.astype(np.float32)
    mask_size = masks.shape[-1]
    nms = py_nms_wrapper(nms_thresh)
    # apply nms and sort to get first images according to their scores

    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1))
        res_mask[i] = mask

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size))
        boxes_ar = np.zeros((num_boxes, 4))
        for i in xrange(num_boxes):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            p_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh)
            masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size))
        boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis]))
        list_result_box[c] = boxes_scored_ar
        list_result_mask[c] = masks_ar
    return list_result_mask, list_result_box
예제 #2
0
    def create_roidb_from_box_list(self, box_list, mapping_list, gt_roidb):
        """
        given ground truth, prepare roidb
        :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2]
        :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
        :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
        """
        self.num_images = len(gt_roidb)
        assert len(box_list) == self.num_images, 'number of boxes matrix must match number of images'
        roidb = []
        stats = np.zeros(81)
        for i in range(self.num_images):
            roi_rec = dict()
            roi_rec['image'] = gt_roidb[i]['image']
            roi_rec['height'] = gt_roidb[i]['height']
            roi_rec['width'] = gt_roidb[i]['width']
            boxes = box_list[i]

            if boxes.shape[1] == 5:
                scores = boxes[:, -1]
                boxes = boxes[:, :4]


            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']

                gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float))
                # for each box in n boxes, select only maximum overlap (must be greater than zero)
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

                for k in range(len(maxes)):
                    if maxes[k] > 0.5:
                        stats[gt_classes[argmaxes[k]]] = stats[gt_classes[argmaxes[k]]] + 1
                    else:
                        stats[0] = stats[0] + 1

            roi_rec.update({'boxes': boxes,
                            'gt_classes': np.zeros((num_boxes,), dtype=np.int32),
                            'gt_overlaps': overlaps,
                            'max_classes': overlaps.argmax(axis=1),
                            'max_overlaps': overlaps.max(axis=1),
                            'flipped': False,
                            'proposal_scores': scores})

            # background roi => background class
            zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0]
            assert all(roi_rec['max_classes'][zero_indexes] == 0)
            # foreground roi => foreground class
            nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0]
            assert all(roi_rec['max_classes'][nonzero_indexes] != 0)

            roidb.append(roi_rec)

        return roidb
예제 #3
0
        def get_scores_per_class(bbox_per_class, gt_box_per_class,
                                 score_per_class):
            pass
            # bbox [FIRST_N, 4]
            # gt_box [, 4]
            # score [FIRST_N]
            num_valid_gt = len(gt_box_per_class)
            output_list_per_class = []
            if num_valid_gt == 0:
                return output_list_per_class

            overlap_mat = bbox_overlaps(
                bbox_per_class.astype(np.float),
                gt_box_per_class[:, :-1].astype(np.float))

            eye_matrix = np.eye(num_valid_gt)
            output_list_per_class = []

            for thresh in self._target_thresh:
                # following mAP metric
                overlap_mask = (overlap_mat > thresh)
                valid_bbox_indices = np.where(overlap_mask)[0]
                # require score be 2-dim
                # [first_n, num_valid_gt]
                overlap_score = np.tile(score_per_class, (1, num_valid_gt))
                overlap_score *= overlap_mask
                max_overlap_indices = np.argmax(overlap_mat, axis=1)
                # [first_n, num_valid_gt]
                max_overlap_mask = eye_matrix[max_overlap_indices]
                overlap_score *= max_overlap_mask

                output_list_per_class.append(overlap_score)

            return output_list_per_class
예제 #4
0
    def forward(self, is_train, req, in_data, out_data, aux):
        # bbox, [first_n, num_fg_classes, 4]
        bbox = in_data[0].asnumpy()
        num_boxes = bbox.shape[0]
        num_fg_classes = bbox.shape[1]
        gt_box = in_data[1].asnumpy()
        # score, [first_n, num_fg_classes]
        score = in_data[2].asnumpy()

        batch_image, num_gt, code_size = gt_box.shape
        assert batch_image == 1, 'only support batch_image=1, but receive %d' % num_gt
        assert code_size == 5, 'code_size of gt should be 5, but receive %d' % code_size
        assert len(
            score.shape) == 2, 'shape of score is %d instead of 2.' % len(
                score.shape)
        assert score.shape[
            1] == num_fg_classes, 'number of fg classes should be same for boxes and scores'

        output_list = []
        for cls_idx in range(0, num_fg_classes):
            valid_gt_mask = (gt_box[0, :,
                                    -1].astype(np.int32) == (cls_idx + 1))
            valid_gt_box = gt_box[0, valid_gt_mask, :]
            num_valid_gt = len(valid_gt_box)

            if num_valid_gt == 0:
                output = np.zeros(shape=(num_boxes, self._num_thresh),
                                  dtype=np.float32)
                output_list.append(output)
            else:
                bbox_per_class = bbox[:, cls_idx, :]
                score_per_class = score[:, cls_idx:cls_idx + 1]
                overlap_mat = bbox_overlaps(
                    bbox_per_class.astype(np.float),
                    valid_gt_box[:, :-1].astype(np.float))

                eye_matrix = np.eye(num_valid_gt)
                output_list_per_class = []

                for thresh in self._target_thresh:
                    # following mAP metric
                    overlap_mask = (overlap_mat > thresh)
                    valid_bbox_indices = np.where(overlap_mask)[0]
                    # require score be 2-dim
                    overlap_score = np.tile(score_per_class, (1, num_valid_gt))
                    overlap_score *= overlap_mask
                    max_overlap_indices = np.argmax(overlap_mat, axis=1)
                    max_overlap_mask = eye_matrix[max_overlap_indices]
                    overlap_score *= max_overlap_mask
                    max_score_indices = np.argmax(overlap_score, axis=0)
                    output = np.zeros((num_boxes, ))
                    output[np.intersect1d(max_score_indices,
                                          valid_bbox_indices)] = 1
                    output_list_per_class.append(output)
                output_per_class = np.stack(output_list_per_class, axis=-1)
                output_list.append(output_per_class)
        blob = np.stack(output_list, axis=1).astype(np.float32, copy=False)
        self.assign(out_data[0], req[0], blob)
예제 #5
0
def check_rois(rois, gt_boxes):
    '''
    :param  rois: numpy, (128, 5)
            gt_boxes: numpy
    :return: num of fg_rois with iou > 0.5
    '''
    overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                             gt_boxes[:, :4].astype(np.float))
    overlaps = overlaps.max(axis=1)
    fg_indexes = np.where(overlaps >= 0.5)[0]
    print('check proposals: {}'.format(fg_indexes.shape))
예제 #6
0
파일: rcnn.py 프로젝트: makefile/DCR
def resample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                  labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    return keep_indexes
예제 #7
0
def sample_rois_v2(rois,
                   num_classes,
                   cfg,
                   labels=None,
                   overlaps=None,
                   bbox_targets=None,
                   gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # set labels of bg_rois to be 0
    bg_ind = np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0]
    labels[bg_ind] = 0

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment, :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                       np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
예제 #8
0
        def get_scores(bbox, gt_box, score):

            output_list = []
            for cls_idx in range(0, num_fg_classes):
                valid_gt_mask = (gt_box[0, :,
                                        -1].astype(np.int32) == (cls_idx + 1))

                # [num_valid_gt, 5]
                valid_gt_box = gt_box[0, valid_gt_mask, :]
                num_valid_gt = len(valid_gt_box)

                if num_valid_gt == 0:
                    output_list.append([])
                else:
                    bbox_per_class = bbox[:, cls_idx, :]
                    # score_per_class, [first_n, 1]
                    score_per_class = score[:, cls_idx:cls_idx + 1]
                    # [first_n, num_valid_gt]
                    overlap_mat = bbox_overlaps(
                        bbox_per_class.astype(np.float),
                        valid_gt_box[:, :-1].astype(np.float))

                    eye_matrix = np.eye(num_valid_gt)
                    output_list_per_class = []

                    for thresh in self._target_thresh:
                        # following mAP metric
                        overlap_mask = (overlap_mat > thresh)
                        valid_bbox_indices = np.where(overlap_mask)[0]
                        # require score be 2-dim
                        # [first_n, num_valid_gt]
                        overlap_score = np.tile(score_per_class,
                                                (1, num_valid_gt))
                        overlap_score *= overlap_mask
                        max_overlap_indices = np.argmax(overlap_mat, axis=1)
                        # [first_n, num_valid_gt]
                        max_overlap_mask = eye_matrix[max_overlap_indices]
                        overlap_score *= max_overlap_mask

                        output_list_per_class.append(overlap_score)
                    output_list.append(output_list_per_class)

            return output_list
예제 #9
0
    def assign_bbox(gt_bbox, det_bbox, frame_seg_id, traj_id):
        if len(gt_bbox) == 0 or len(det_bbox) == 0:
            return
        overlap_mat = bbox_overlaps(gt_bbox, det_bbox)
        matched_list = linear_sum_assignment(-overlap_mat)
        for matched_gt, matched_det in zip(*matched_list):
            if overlap_mat[matched_gt, matched_det] < overlap_thresh:
                continue
            matched_traj = traj_id[matched_gt]

            matched_gt_bbox = gt_bbox[matched_gt, :]
            matched_det_bbox = det_bbox[matched_det, :]

            err_x, err_y, err_r, err_s = get_stability_err(
                matched_gt_bbox, matched_det_bbox)

            det_traj_frame.setdefault(matched_traj, []).append(
                [frame_seg_id, err_x, err_y, err_r, err_s])

            vid_traj.setdefault(frame_seg_id,
                                []).append([matched_traj] +
                                           list(matched_det_bbox[:4]))
def assign_quadrangle_anchor(feat_shape,
                             gt_boxes,
                             im_info,
                             cfg,
                             feat_strides=[64, 32, 16, 8, 4],
                             scales=(8, 16, 32),
                             ratios=(0.5, 1, 2),
                             allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    #base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    #num_anchors = base_anchors.shape[0]
    #feat_height, feat_width = feat_shape[-2:]
    anchors_list = []
    anchors_num_list = []
    inds_inside_list = []
    feat_infos = []
    A_list = []

    for i in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size=feat_strides[i],
                                        ratios=list(ratios),
                                        scales=scales)

        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shape[i][-2:]
        feat_stride = feat_strides[i]
        feat_infos.append([feat_height, feat_width])

        shift_x = np.arange(0, feat_width) * feat_stride
        shift_y = np.arange(0, feat_height) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        A = num_anchors
        A_list.append(A)

        K = shifts.shape[0]

        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))

        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        anchors_num_list.append(total_anchors)

        # only keep anchors inside the image
        # print 'allowed_border is',allowed_border 0
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors shape', anchors.shape

        anchors_list.append(anchors)
        inds_inside_list.append(inds_inside)

    anchors = np.concatenate(anchors_list)
    for i in range(1, len(inds_inside_list)):
        inds_inside_list[i] = inds_inside_list[i] + sum(anchors_num_list[:i])
    inds_inside = np.concatenate(inds_inside_list)
    total_anchors = sum(anchors_num_list)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    gt_boxes_bbox = np.zeros((gt_boxes.shape[0], 4), dtype=gt_boxes.dtype)

    ex_x = np.vstack(
        (gt_boxes[:, 0], gt_boxes[:, 2], gt_boxes[:, 4], gt_boxes[:, 6]))
    ex_y = np.vstack(
        (gt_boxes[:, 1], gt_boxes[:, 3], gt_boxes[:, 5], gt_boxes[:, 7]))
    gt_boxes_bbox[:, 0] = np.amin(ex_x, axis=0)
    gt_boxes_bbox[:, 1] = np.amin(ex_y, axis=0)
    gt_boxes_bbox[:, 2] = np.amax(ex_x, axis=0)
    gt_boxes_bbox[:, 3] = np.amax(ex_y, axis=0)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes_bbox.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # temp = np.zeros((anchors.shape[0], 8), dtype=anchors.dtype)
    # temp[:, 0] = anchors[:, 0]
    # temp[:, 1] = anchors[:, 1]
    # temp[:, 2] = anchors[:, 2]
    # temp[:, 3] = anchors[:, 1]
    # temp[:, 4] = anchors[:, 2]
    # temp[:, 5] = anchors[:, 3]
    # temp[:, 6] = anchors[:, 0]
    # temp[:, 7] = anchors[:, 3]
    # eight_coordinate_anchors = temp

    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes_bbox[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means', means
        print 'stdevs', stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    # resahpe
    label_list = list()
    bbox_target_list = list()
    bbox_weight_list = list()
    anchors_num_range = [0] + anchors_num_list
    for i in range(len(feat_strides)):
        feat_height, feat_width = feat_infos[i]
        A = A_list[i]
        label = labels[sum(anchors_num_range[:i +
                                             1]):sum(anchors_num_range[:i +
                                                                       1]) +
                       anchors_num_range[i + 1]]
        bbox_target = bbox_targets[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]
        bbox_weight = bbox_weights[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]

        label = label.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        label = label.reshape((1, A * feat_height * feat_width))
        bbox_target = bbox_target.reshape(
            (1, feat_height * feat_width, A * 4)).transpose(0, 2, 1)
        bbox_weight = bbox_weight.reshape(
            (1, feat_height * feat_width, A * 4)).transpose((0, 2, 1))

        label_list.append(label)
        bbox_target_list.append(bbox_target)
        bbox_weight_list.append(bbox_weight)

    label_concat = np.concatenate(label_list, axis=1)
    bbox_target_concat = np.concatenate(bbox_target_list, axis=2)
    bbox_weight_concat = np.concatenate(bbox_weight_list, axis=2)

    label = {
        'label': label_concat,
        'bbox_target': bbox_target_concat,
        'bbox_weight': bbox_weight_concat
    }
    return label
예제 #11
0
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64),
                          scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert(len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
예제 #12
0
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print 'anchors:'
        print base_anchors
        print 'anchor shapes:'
        print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                         base_anchors[:, 3::4] - base_anchors[:, 1::4]))
        print 'im_info', im_info
        print 'height', feat_height, 'width', feat_width
        print 'gt_boxes shape', gt_boxes.shape
        print 'gt_boxes', gt_boxes

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors shape', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print 'means', means
        print 'stdevs', stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {'label': labels,
             'bbox_target': bbox_targets,
             'bbox_weight': bbox_weights}
    return label
예제 #13
0
def sample_xyhs_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                     labels=None, overlaps=None, dbbox_targets=None, gt_boxes=None):
    """

    :param rois: al_rois [n, 4]; e2e [n, 5] with batch_index
    :param fg_rois_per_image:
    :param rois_per_image:
    :param num_clases:
    :param cfg:
    :param labels:
    :param overlaps:
    :param dbbox_targets:
    :param gt_boxes: optional for e2e [n, 9] (x1, y1, ..., x4, y4, cls)
    :return:
    """
    if labels is None:
        # hgt_boxes = np.hstack((bbox_poly2hbb(gt_boxes[:, :-1]), gt_boxes[:, -1]))
        hgt_boxes = bbox_poly2hbb(gt_boxes)
        ## rois: (xmin, ymin, xmax, ymax)
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), hgt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = hgt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if dbbox_targets is not None:
        bbox_target_data = dbbox_targets[keep_indexes, :]
    else:
        # targets = dbbox_transform2_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8])
        targets = dbboxtransform3_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))
    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets_base(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
예제 #14
0
파일: rcnn.py 프로젝트: zhuofalin/NP-RepMet
def sample_rois(rois,
                fg_rois_per_image,
                rois_per_image,
                num_classes,
                cfg,
                labels=None,
                overlaps=None,
                bbox_targets=None,
                gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
        '''
        #yangyk
        print('gt_boxes:',gt_boxes[:,4])
        print('gt_assignment:',gt_assignment)
        print('labels:',labels)
        print('rois shape:',rois.shape,'overlaps shape:',overlaps.shape,'labels shape',labels.shape)   
        
        '''

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]

    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes,
                                size=fg_rois_per_this_image,
                                replace=False)

    debug = False
    if debug:
        #yangyk
        print('fg_indexes size:', fg_indexes.size, 'fg_rois_per_image:',
              fg_rois_per_image, 'fg_rois_per_this_image:',
              fg_rois_per_this_image)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI)
                          & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                        bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes,
                                size=bg_rois_per_this_image,
                                replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    #print('fg_over_laps:', overlaps[fg_indexes])

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    #yangyk
    labels_all = labels.copy()

    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    #print('labels:',labels)

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:],
                                 gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                       np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    #yangyk
    overlaps = overlaps[keep_indexes]
    #print('fg_over_laps:', overlaps[:fg_rois_per_this_image])

    neg_low = 0.0
    neg_middle = 0.2
    neg_high = 0.3
    neg_indexes_L1 = np.where((overlaps < neg_middle)
                              & (overlaps >= neg_low))[0]
    neg_indexes_L2 = np.where((overlaps < neg_high)
                              & (overlaps >= neg_middle))[0]
    neg_indexes_L3 = np.where(overlaps >= neg_high)[0]

    neg_labels = np.zeros(labels.shape)

    #print(neg_indexes_L2)
    neg_labels[neg_indexes_L2] = labels_all[neg_indexes_L2]

    if debug:
        print('neg_indexes_L1:', len(neg_indexes_L1), 'neg_indexes_L2:',
              len(neg_indexes_L2), 'neg_indexes_L3', len(neg_indexes_L3))
        print('labels_all:', labels_all)
        print('neg_labels:', neg_labels, 'neg_labels_shape:', neg_labels.shape)
        #print(neg_labels[neg_indexes_L2])
        print('<<<fg neg labels>>>>', neg_labels[neg_indexes_L2])
        print('fg neg labels sum', np.sum(neg_labels[neg_indexes_L2]))
        print('neg labels sum', np.sum(neg_labels))
        print('over_laps:', overlaps)
        print('neg_fg_over_laps:', overlaps[neg_indexes_L2])
        print('<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>')


    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, neg_labels, bbox_targets, bbox_weights
예제 #15
0
파일: rpn.py 프로젝트: zhongtb/mxnet-detnet
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16),
                          scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False):
    def _unmap(data, count, inds, fill = 0):
        if len(data.shape) == 1:
            ret = np.empty((count,),dtype = np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:],dtype = np.float32)
            ret.fill(fill)
            ret[inds,:] = data
        return ret
    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype = np.float32)
    ratios = np.array(ratios, dtype = np.float32)
    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0,4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]])

        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
         
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                               (all_anchors[:, 1] >= -allowed_border) &
                               (all_anchors[:, 2] < im_info[1] + allowed_border) &
                               (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        labels = np.empty((len(inds_inside),),dtype = np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors,anchors))
        fpn_labels = np.hstack((fpn_labels,labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height,feat_width,A,total_anchors])
    
    if gt_boxes.size > 0:
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis = 1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis = 0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        fpn_labels[gt_argmax_overlaps] = 1
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    else:
        fpn_labels[:] = 0

    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1
    
    num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1)
    bg_inds = np.where(fpn_labels ==0)[0]
    fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0,len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

        
    fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
    
    fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32)
    fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0,len(feat_strides)):
        feat_height, feat_width,A,total_anchors = fpn_args[feat_id]
        labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1)
        bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)
        bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0)

        labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2)
        labels = labels.reshape((1,A*feat_height*feat_width))
        bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)

    label = {
        'label':np.concatenate(label_list,axis = 1),
        'bbox_target':np.concatenate(bbox_target_list, axis = 2),
        'bbox_weight':np.concatenate(bbox_weight_list,axis = 2)
    }

    return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
예제 #16
0
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height,
                    nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0):
    """
    A wrapper function, note we already know the class of boxes and masks
    """
    nms = gpu_nms_wrapper(nms_thresh, device_id)
    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = [[] for _ in xrange(num_classes)]

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    # organize helper variable for gpu mask voting
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        for i in xrange(num_boxes):
            cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(t_scores[c])
        class_bar[c] = len(candidate_scores)

    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)

    # the input masks/boxes are relatively large
    # select only a subset of them are useful for mask merge
    unique_inds = np.unique(candidate_inds)
    unique_inds_order = unique_inds.argsort()
    unique_map = {}
    for i in xrange(len(unique_inds)):
        unique_map[unique_inds[i]] = unique_inds_order[i]
    for i in xrange(len(candidate_inds)):
        candidate_inds[i] = unique_map[candidate_inds[i]]
    boxes = boxes[unique_inds, ...]
    masks = masks[unique_inds, ...]

    boxes = np.round(boxes)
    result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights,
                                                 binary_thresh, im_height, im_width, device_id)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    cls_start = 0
    for i in xrange(1, num_classes):
        cls_end = class_bar[i]
        cls_box = result_box[cls_start:cls_end, :]
        cls_mask = result_mask[cls_start:cls_end, :]
        valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) &
                             (cls_box[:, 3] > cls_box[:, 1]))[0]
        list_result_box[i] = cls_box[valid_ind, :]
        list_result_mask[i] = cls_mask[valid_ind, :]
        cls_start = cls_end

    return list_result_mask, list_result_box
예제 #17
0
        def get_target(bbox, gt_box, score, ref_bbox, ref_gt_box, ref_score):

            global num_of_is_full_max
            num_boxes = bbox.shape[0]
            ref_num_boxes = ref_bbox.shape[0]
            score_list = get_scores(bbox, gt_box, score)
            ref_score_list = get_scores(ref_bbox, ref_gt_box, ref_score)

            output_list = []
            ref_output_list = []
            for cls_idx in range(0, num_fg_classes):

                valid_gt_mask = (gt_box[0, :,
                                        -1].astype(np.int32) == (cls_idx + 1))
                valid_gt_box = gt_box[0, valid_gt_mask, :]
                num_valid_gt = len(valid_gt_box)

                ref_valid_gt_mask = (ref_gt_box[0, :, -1].astype(
                    np.int32) == (cls_idx + 1))
                ref_valid_gt_box = ref_gt_box[0, ref_valid_gt_mask, :]
                ref_num_valid_gt = len(ref_valid_gt_box)

                score_list_per_class = score_list[cls_idx]
                ref_score_list_per_class = ref_score_list[cls_idx]

                bbox_per_class = bbox[:, cls_idx, :]
                ref_bbox_per_class = ref_bbox[:, cls_idx, :]

                if num_valid_gt != ref_num_valid_gt:
                    if ref_num_valid_gt > num_valid_gt:
                        num_rm = ref_num_valid_gt - num_valid_gt
                        ref_num_valid_gt = num_valid_gt
                        gt_overlap_mat = bbox_overlaps(
                            ref_valid_gt_box.astype(np.float),
                            valid_gt_box.astype(np.float))
                        rm_indices = np.argsort(np.sum(gt_overlap_mat,
                                                       axis=1))[:num_rm]
                        ref_valid_gt_box = np.delete(ref_valid_gt_box,
                                                     rm_indices,
                                                     axis=0)
                        # update ref_score_list_per_class
                        ref_score_list_per_class = get_scores_per_class(
                            ref_bbox_per_class, ref_valid_gt_box,
                            ref_score[:, cls_idx:cls_idx + 1])
                        assert ref_valid_gt_box.shape == valid_gt_box.shape, "failed remove ref, {} -> {}".format(
                            ref_valid_gt_box.shape[0], valid_gt_box.shape[0])
                        print "success remove ref"
                    else:
                        num_rm = num_valid_gt - ref_num_valid_gt
                        num_valid_gt = ref_num_valid_gt
                        gt_overlap_mat = bbox_overlaps(
                            valid_gt_box.astype(np.float),
                            ref_valid_gt_box.astype(np.float))
                        rm_indices = np.argsort(np.sum(gt_overlap_mat,
                                                       axis=1))[:num_rm]
                        valid_gt_box = np.delete(valid_gt_box,
                                                 rm_indices,
                                                 axis=0)
                        # update score_list_per_class
                        score_list_per_class = get_scores_per_class(
                            bbox_per_class, valid_gt_box,
                            score[:, cls_idx:cls_idx + 1])
                        assert ref_valid_gt_box.shape == valid_gt_box.shape, "failed remove, {} -> {}".format(
                            ref_valid_gt_box.shape[0], valid_gt_box.shape[0])
                        print "success remove"

                assert num_valid_gt == ref_num_valid_gt, "gt num are not the same"

                if len(score_list_per_class) == 0 or len(
                        ref_score_list_per_class) == 0:
                    output_list.append(
                        get_max_socre_bboxes(score_list_per_class, num_boxes))
                    ref_output_list.append(
                        get_max_socre_bboxes(ref_score_list_per_class,
                                             ref_num_boxes))

                else:
                    output_list_per_class = []
                    ref_output_list_per_class = []

                    for i in range(len(self._target_thresh)):
                        overlap_score = score_list_per_class[i]
                        ref_overlap_score = ref_score_list_per_class[i]
                        output = np.zeros((overlap_score.shape[0], ))
                        ref_output = np.zeros((ref_overlap_score.shape[0], ))
                        if np.count_nonzero(
                                overlap_score) == 0 or np.count_nonzero(
                                    ref_overlap_score) == 0:
                            output_list_per_class.append(output)
                            ref_output_list_per_class.append(ref_output)
                            continue
                        for x in range(num_valid_gt):
                            overlap_score_per_gt = overlap_score[:, x]
                            ref_overlap_score_per_gt = ref_overlap_score[:, x]
                            valid_bbox_indices = np.where(
                                overlap_score_per_gt)[0]
                            ref_valid_bbox_indices = np.where(
                                ref_overlap_score_per_gt)[0]
                            target_gt_box = valid_gt_box[x:x + 1, :-1]
                            ref_target_gt_box = ref_valid_gt_box[x:x + 1, :-1]
                            if len(valid_bbox_indices) == 0 or len(
                                    ref_valid_bbox_indices) == 0:
                                continue
                            dist_mat = translation_dist(
                                bbox_per_class[valid_bbox_indices],
                                target_gt_box)[:, 0, :]
                            ref_dist_mat = translation_dist(
                                ref_bbox_per_class[ref_valid_bbox_indices],
                                ref_target_gt_box)[:, 0, :]
                            dist_mat_shape = (
                                bbox_per_class[valid_bbox_indices].shape[0],
                                ref_bbox_per_class[ref_valid_bbox_indices].
                                shape[0], 4)
                            # print((np.tile(np.expand_dims(dist_mat, 1), (1, dist_mat_shape[1], 1)) -
                            # np.tile(np.expand_dims(ref_dist_mat, 0), (dist_mat_shape[0], 1, 1)))**2)
                            bbox_dist_mat = np.sum(
                                (np.tile(np.expand_dims(dist_mat, 1),
                                         (1, dist_mat_shape[1], 1)) -
                                 np.tile(np.expand_dims(ref_dist_mat, 0),
                                         (dist_mat_shape[0], 1, 1)))**2,
                                axis=2)
                            assert bbox_dist_mat.shape == (
                                len(bbox_per_class[valid_bbox_indices]),
                                len(ref_bbox_per_class[ref_valid_bbox_indices])
                            )
                            # top_k = 10
                            # translation_thresh = 1.1*np.min(bbox_dist_mat)
                            # top_k = np.sum(bbox_dist_mat < translation_thresh)
                            top_k = int(0.1 * len(bbox_dist_mat.flatten()) +
                                        0.5)
                            top_k = max(1, top_k)
                            top_k = min(top_k, len(bbox_dist_mat.flatten()))
                            # top_k = 1
                            print("{} of out {} stable pair".format(
                                top_k, len(bbox_dist_mat.flatten())))
                            ind_list, ref_ind_list = np.unravel_index(
                                np.argsort(bbox_dist_mat, axis=None)[:top_k],
                                bbox_dist_mat.shape)
                            score_sum_list = []
                            rank_sum_list = []
                            for ind, ref_ind in zip(ind_list, ref_ind_list):
                                score_sum = overlap_score_per_gt[
                                    valid_bbox_indices[
                                        ind]] + ref_overlap_score_per_gt[
                                            ref_valid_bbox_indices[ref_ind]]
                                rank_sum = valid_bbox_indices[
                                    ind] + ref_valid_bbox_indices[ref_ind]
                                score_sum_list.append(score_sum)
                                rank_sum_list.append(rank_sum)
                            score_max_idx = np.argmax(np.array(score_sum_list))
                            rank_max_idx = np.argmin(np.array(rank_sum_list))
                            if score_max_idx == rank_max_idx:
                                score_rank_max[0] += 1
                            score_rank_max[1] += 1
                            # max_idx = rank_max_idx
                            max_idx = score_max_idx
                            ind = ind_list[max_idx]
                            ref_ind = ref_ind_list[max_idx]
                            if ind == np.argmax(
                                    overlap_score_per_gt[valid_bbox_indices]):
                                # num_of_is_full_max[0] += 1
                                print('cur takes the max')
                            if ref_ind == np.argmax(ref_overlap_score_per_gt[
                                    ref_valid_bbox_indices]):
                                # num_of_is_full_max[0] += 1
                                print('ref takes the max')

                            output[valid_bbox_indices[ind]] = 1
                            ref_output[ref_valid_bbox_indices[ref_ind]] = 1
                        output_list_per_class.append(output)
                        ref_output_list_per_class.append(ref_output)
                    output_per_class = np.stack(output_list_per_class, axis=-1)
                    ref_output_per_class = np.stack(ref_output_list_per_class,
                                                    axis=-1)
                    output_list.append(output_per_class)
                    ref_output_list.append(ref_output_per_class)
            # [num_boxes, num_fg_classes, num_thresh]
            blob = np.stack(output_list, axis=1).astype(np.float32, copy=False)
            ref_blob = np.stack(ref_output_list, axis=1).astype(np.float32,
                                                                copy=False)
            return blob, ref_blob
예제 #18
0
def assign_anchor(feat_shape_p3,
                  feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p3=4,
                  scales_p3=(8, ),
                  ratios_p3=(0.75, 1, 1.5),
                  feat_stride_p4=8,
                  scales_p4=(8, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=16,
                  scales_p5=(8, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=4,
                  scales_p6=(8, ),
                  ratios_p6=(0.75, 1, 1.5),
                  allowed_border=1):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """

    feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6]
    feat_stride = [8, 16, 32, 64]
    scales = (8, 10, 12)
    ratios = (0.5, 1, 2)

    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    im_info = im_info[0]
    #print 'im_info: ', im_info
    scales = np.array(scales, dtype=np.float32)
    if len(feat_stride) != len(feat_shape):
        assert ('length of feat_stride is not equal to length of feat_shape')

    labels_list = []
    bbox_targets_list = []
    bbox_weights_list = []
    #print 'length of feat_shape: ',len(feat_shape)
    for i in range(len(feat_shape)):
        total_anchors = 0
        base_anchors = generate_anchors(base_size=feat_stride[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]  #3
        #print feat_shape[i]
        feat_height, feat_width = (feat_shape[i])[-2:]
        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_stride[i]
        shift_y = np.arange(0, feat_height) * feat_stride[i]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors  #3
        K = shifts.shape[0]  #h*w
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape(
            (K * A, 4))  #(k*A,4) in the original image

        # keep only inside anchors
        anchors = all_anchors
        # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
        #                    (all_anchors[:, 1] >= -allowed_border) &
        #                    (all_anchors[:, 2] < im_info[1] + allowed_border) &
        #                    (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        # label: 1 is positive, 0 is negative, -1 is dont care
        total_anchors = len(anchors)  #3*w*h
        #   anchors = all_anchors[inds_inside, :]
        labels = np.empty((total_anchors, ), dtype=np.float32)
        labels.fill(-1)

        if gt_boxes.size > 0:
            overlaps = bbox_overlaps(anchors.astype(np.float),
                                     gt_boxes.astype(np.float))

            argmax_overlaps = overlaps.argmax(axis=1)

            gt_labels = gt_boxes[:, -1]
            gt_labels_ = np.zeros((total_anchors, len(gt_labels)),
                                  dtype=np.int)
            gt_labels_[:, :] = gt_labels
            #   print gt_labels_

            labels = gt_labels_[np.arange(total_anchors), argmax_overlaps]
            max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]

            # gt_argmax_overlaps = overlaps.argmax(axis=0)
            # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
            labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP)
                   & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1
            # bg_inds = np.where(labels == 0)[0]
            # if len(bg_inds) > 256:
            #     disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False)
            # labels[disable_inds] = -1
        else:
            labels[:] = 0
        # # print anchors[labels>0]

        # # a = anchors[labels>0].astype(np.int)
        # # np.savetxt('aa.txt',a,fmt="%d %d %d %d")

        # if len(anchors[labels>0])!=0:
        #     aaa
        bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors,
                                             gt_boxes[argmax_overlaps, :4])
        bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32)
        bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1)
        bbox_targets = _unmap(bbox_targets,
                              int(K * A),
                              range(total_anchors),
                              fill=0)
        bbox_weights = _unmap(bbox_weights,
                              int(K * A),
                              range(total_anchors),
                              fill=0)

        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

        labels_list.append(labels)
        bbox_targets_list.append(bbox_targets)
        bbox_weights_list.append(bbox_weights)

    if len(feat_shape) == 4:
        label = {
            'label/p3': labels_list[0],
            'label/p4': labels_list[1],
            'label/p5': labels_list[2],
            'label/p6': labels_list[3],
            'bbox_target/p3': bbox_targets_list[0],
            'bbox_target/p4': bbox_targets_list[1],
            'bbox_target/p5': bbox_targets_list[2],
            'bbox_target/p6': bbox_targets_list[3],
            'bbox_weight/p3': bbox_weights_list[0],
            'bbox_weight/p4': bbox_weights_list[1],
            'bbox_weight/p5': bbox_weights_list[2],
            'bbox_weight/p6': bbox_weights_list[3]
        }

    return label
예제 #19
0
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg,
                labels=None, overlaps=None, bbox_targets=None, gt_boxes=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    return rois, labels, bbox_targets, bbox_weights
예제 #20
0
파일: dcr_target.py 프로젝트: makefile/DCR
    def forward(self, is_train, req, in_data, out_data, aux):

        rois = in_data[0].asnumpy()
        cls_prob = in_data[1].asnumpy()
        if self._cfg.CLASS_AGNOSTIC:
            bbox_deltas = in_data[2].asnumpy()[:, 4:8]
        else:
            fg_cls_prob = cls_prob[:, 1:]
            fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int)
            batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int)
            # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4 : (fg_cls_idx+1) * 4]
            in_data2 = in_data[2].asnumpy()
            bbox_deltas = np.hstack(
                (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1),
                 in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1)))
        im_info = in_data[3].asnumpy()[0, :]
        gt_boxes = in_data[4].asnumpy()

        # post processing
        if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            bbox_deltas = bbox_deltas * np.array(
                self._cfg.TRAIN.BBOX_STDS) + np.array(
                    self._cfg.TRAIN.BBOX_MEANS)

        proposals = bbox_pred(rois[:, 1:], bbox_deltas)
        proposals = clip_boxes(proposals, im_info[:2])

        # only support single batch
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))

        # reassign label
        gt_classes = gt_boxes[:, -1].astype(np.int)
        overlaps = np.zeros((blob.shape[0], self._cfg.dataset.NUM_CLASSES),
                            dtype=np.float32)
        # n boxes and k gt_boxes => n * k overlap
        gt_overlaps = bbox_overlaps(blob[:, 1:].astype(np.float),
                                    gt_boxes[:, :-1].astype(np.float))
        # for each box in n boxes, select only maximum overlap (must be greater than zero)
        argmaxes = gt_overlaps.argmax(axis=1)
        maxes = gt_overlaps.max(axis=1)
        I = np.where(maxes > 0)[0]
        overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

        roi_max_classes = overlaps.argmax(axis=1)
        roi_max_overlaps = overlaps.max(axis=1)
        # assign bg labels
        roi_max_classes[np.where(
            roi_max_overlaps < self._cfg.TRAIN.FG_THRESH)] = 0
        assert (roi_max_classes[np.where(
            roi_max_overlaps < self._cfg.TRAIN.FG_THRESH)] == 0).all()

        if self._resample == -1:
            self.assign(out_data[0], req[0], blob)
            self.assign(out_data[1], req[1], roi_max_classes)
        else:
            # Include ground-truth boxes in the set of candidate rois
            batch_inds = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
            all_rois = np.vstack((np.hstack(
                (batch_inds, gt_boxes[:, :-1])), blob))

            # gt boxes
            pred_classes = gt_boxes[:, -1]
            pred_scores = np.ones_like(pred_classes)
            max_classes = pred_classes.copy()
            max_overlaps = np.ones_like(max_classes)
            # predicted boxes
            roi_pred_classes = cls_prob.argmax(axis=1)
            roi_pred_scores = cls_prob.max(axis=1)

            roi_rec = {}
            # roi_rec['pred_classes'] = np.vstack((pred_classes, roi_pred_classes))
            # roi_rec['scores'] = np.vstack((pred_scores, roi_pred_scores))
            # roi_rec['max_classes'] = np.vstack((max_classes, roi_max_classes))
            # roi_rec['max_overlaps'] = np.vstack((max_overlaps, roi_max_overlaps))
            roi_rec['pred_classes'] = np.append(pred_classes, roi_pred_classes)
            roi_rec['scores'] = np.append(pred_scores, roi_pred_scores)
            roi_rec['max_classes'] = np.append(max_classes, roi_max_classes)
            roi_rec['max_overlaps'] = np.append(max_overlaps, roi_max_overlaps)

            if self._cfg.DCR.sample == 'DCRV1':
                keep_indexes, pad_indexes = sample_rois_fg_bg(
                    roi_rec, self._cfg, self._resample)
            elif self._cfg.DCR.sample == 'RANDOM':
                keep_indexes, pad_indexes = sample_rois_random(
                    roi_rec, self._cfg, self._resample)
            else:
                raise ValueError('Undefined sampling method: %s' %
                                 self._cfg.DCR.sample)

            resampled_blob = np.vstack(
                (all_rois[keep_indexes, :], all_rois[pad_indexes, :]))
            # assign bg classes
            assert (roi_rec['max_classes'][np.where(
                roi_rec['max_overlaps'] < self._cfg.TRAIN.FG_THRESH)] == 0
                    ).all()
            resampled_label = np.append(roi_rec['max_classes'][keep_indexes],
                                        -1 * np.ones(len(pad_indexes)))

            self.assign(out_data[0], req[0], resampled_blob)
            self.assign(out_data[1], req[1], resampled_label)
예제 #21
0
def gpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4,
                    device_id=0):  #0.4
    """
    A wrapper function, note we already know the class of boxes and masks
    """
    nms = gpu_nms_wrapper(nms_thresh, device_id)
    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = [[] for _ in xrange(num_classes)]

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    # organize helper variable for gpu mask voting
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        for i in xrange(num_boxes):
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(t_scores[c])
        class_bar[c] = len(candidate_scores)

    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)

    # the input masks/boxes are relatively large
    # select only a subset of them are useful for mask merge
    unique_inds = np.unique(candidate_inds)
    unique_inds_order = unique_inds.argsort()
    unique_map = {}
    for i in xrange(len(unique_inds)):
        unique_map[unique_inds[i]] = unique_inds_order[i]
    for i in xrange(len(candidate_inds)):
        candidate_inds[i] = unique_map[candidate_inds[i]]
    boxes = boxes[unique_inds, ...]
    masks = masks[unique_inds, ...]

    boxes = np.round(boxes)
    result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds,
                                                 candidate_start,
                                                 candidate_weights,
                                                 binary_thresh, im_height,
                                                 im_width, device_id)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    cls_start = 0
    for i in xrange(1, num_classes):
        cls_end = class_bar[i]
        cls_box = result_box[cls_start:cls_end, :]
        cls_mask = result_mask[cls_start:cls_end, :]
        valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0])
                             & (cls_box[:, 3] > cls_box[:, 1]))[0]
        ########################
        # cls_box = cls_box[valid_ind, :]
        # cls_mask = cls_mask[valid_ind, :]

        # #print 'cls_box', cls_box
        # def nms(dets, thresh):
        #     """
        #     greedily select boxes with high confidence and overlap with current maximum <= thresh
        #     rule out overlap >= thresh
        #     :param dets: [[x1, y1, x2, y2 score]]
        #     :param thresh: retain overlap < thresh
        #     :return: indexes to keep
        #     """
        #     if dets.shape[0] == 0:
        #         return []

        #     x1 = dets[:, 0]
        #     y1 = dets[:, 1]
        #     x2 = dets[:, 2]
        #     y2 = dets[:, 3]
        #     scores = dets[:, 4]

        #     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        #     order = scores.argsort()[::-1]

        #     keep = []
        #     while order.size > 0:
        #         i = order[0]
        #         keep.append(i)
        #         xx1 = np.maximum(x1[i], x1[order[1:]])
        #         yy1 = np.maximum(y1[i], y1[order[1:]])
        #         xx2 = np.minimum(x2[i], x2[order[1:]])
        #         yy2 = np.minimum(y2[i], y2[order[1:]])

        #         w = np.maximum(0.0, xx2 - xx1 + 1)
        #         h = np.maximum(0.0, yy2 - yy1 + 1)
        #         inter = w * h
        #         ovr = inter / (areas[i] + areas[order[1:]] - inter)

        #         inds = np.where(ovr <= thresh)[0]
        #         order = order[inds + 1]

        #     return keep

        # #print 'aaaaaaaa'
        # keep = nms(cls_box, 0.3) #bei niedrigen treshhold wirfts welche raus
        # #print 'aa', len(keep), len(boxes_scored_ar)
        # #print 'keep', keep

        # #print 'a', len(boxes_scored_ar)
        # #print 'b', len(boxes_scored_ar[keep, :])
        # cls_box = cls_box[keep, :]
        # cls_mask = cls_mask[keep, :]

        # # print 'cls_box', cls_box
        # # print 'cls_mask', cls_mask

        # list_result_box[i] = cls_box
        # list_result_mask[i] = cls_mask

        #################

        list_result_box[i] = cls_box[valid_ind, :]  #auscommenten wenn nms an.
        list_result_mask[i] = cls_mask[valid_ind, :]  #auscommenten wehn nms an
        cls_start = cls_end

    return list_result_mask, list_result_box
def pred_double_eval(predictor,
                     test_data,
                     imdb,
                     cfg,
                     vis=False,
                     thresh=1e-3,
                     logger=None,
                     ignore_cache=True,
                     show_gt=False):
    """
    wrapper for calculating offline validation for faster data analysis
    in this example, all threshold are set by hand
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffle
    :param imdb: image database
    :param vis: controls visualization
    :param thresh: valid detection threshold
    :return:
    """
    det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl')
    if os.path.exists(det_file) and not ignore_cache:
        with open(det_file, 'rb') as fid:
            all_boxes = cPickle.load(fid)
        info_str = imdb.evaluate_detections(all_boxes)
        if logger:
            logger.info('evaluate detections: \n{}'.format(info_str))
        return

    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data[0]]
    num_images = test_data.size

    if not isinstance(test_data, PrefetchingIter):
        test_data = PrefetchingIter(test_data)

    #if cfg.TEST.SOFTNMS:
    #    nms = py_softnms_wrapper(cfg.TEST.NMS)
    #else:
    #    nms = py_nms_wrapper(cfg.TEST.NMS)

    if cfg.TEST.SOFTNMS:
        nms = py_softnms_wrapper(cfg.TEST.NMS)
    else:
        nms = py_nms_wrapper(cfg.TEST.NMS)

    # limit detections to max_per_image over all classes
    max_per_image = cfg.TEST.max_per_image

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(num_images)]
                 for _ in range(imdb.num_classes)]
    ref_all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(imdb.num_classes)]
    # class_lut = [[] for _ in range(imdb.num_classes)]
    valid_tally = 0
    valid_sum = 0

    idx = 0
    t = time.time()
    inference_count = 0
    all_inference_time = []
    post_processing_time = []
    nms_full_count = []
    nms_pos_count = []
    is_max_count = []
    all_count = []
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        scales = [iim_info[0, 2] for iim_info in im_info]
        scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all = im_double_detect(
            predictor, data_batch, data_names, scales, cfg)

        t2 = time.time() - t
        t = time.time()
        # for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)):
        nms_full_count_per_batch = 0
        nms_pos_count_per_batch = 0
        global num_of_is_full_max
        is_max_count_per_batch = num_of_is_full_max[0]
        all_count_per_batch = 0
        for delta, (scores, boxes, ref_scores, ref_boxes,
                    data_dict) in enumerate(
                        zip(scores_all, boxes_all, ref_scores_all,
                            ref_boxes_all, data_dict_all)):
            if cfg.TEST.LEARN_NMS:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j - 1, 0] > thresh)[0]
                    cls_scores = scores[indexes, j - 1, :]
                    cls_boxes = boxes[indexes, j - 1, :]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)

                    all_boxes[j][idx + delta] = cls_dets

                    if DEBUG:
                        keep = nms(cls_dets)
                        nms_cls_dets = cls_dets[keep, :]
                        target = data_dict['nms_multi_target']
                        target_indices = np.where(target[:, 4] == j - 1)
                        target = target[target_indices]
                        nms_full_count_per_batch += bbox_equal_count(
                            nms_cls_dets, target)

                        gt_boxes = data_dict['gt_boxes'][0].asnumpy()
                        gt_boxes = gt_boxes[np.where(gt_boxes[:,
                                                              4] == j)[0], :4]
                        gt_boxes /= scales[delta]

                        if len(cls_boxes) != 0 and len(gt_boxes) != 0:
                            overlap_mat = bbox_overlaps(
                                cls_boxes.astype(np.float),
                                gt_boxes.astype(np.float))
                            keep = nms(
                                cls_dets[np.where(overlap_mat > 0.5)[0]])
                            nms_cls_dets = cls_dets[np.where(
                                overlap_mat > 0.5)[0]][keep]
                            nms_pos_count_per_batch += bbox_equal_count(
                                nms_cls_dets, target)
                        all_count_per_batch += len(target)
            else:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(scores[:, j] > thresh)[0]
                    if cfg.TEST.FIRST_N > 0:
                        # todo: check whether the order affects the result
                        sort_indices = np.argsort(
                            scores[:, j])[-cfg.TEST.FIRST_N:]
                        # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N]
                        indexes = np.intersect1d(sort_indices, indexes)

                    cls_scores = scores[indexes, j, np.newaxis]
                    cls_boxes = boxes[indexes,
                                      4:8] if cfg.CLASS_AGNOSTIC else boxes[
                                          indexes, j * 4:(j + 1) * 4]
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx+delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(scores)
                        # print np.min(cls_scores), valid_tally, valid_sum
                        # cls_scores = scores[:, j, np.newaxis]
                        # cls_scores[cls_scores <= thresh] = thresh
                        # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    if cfg.TEST.SOFTNMS:
                        all_boxes[j][idx + delta] = nms(cls_dets)
                    else:
                        keep = nms(cls_dets)
                        all_boxes[j][idx + delta] = cls_dets[keep, :]
                        # all_boxes[j][idx + delta] = cls_dets

            if max_per_image > 0:
                image_scores = np.hstack([
                    all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            all_boxes[j][idx + delta][:,
                                                      -1] >= image_thresh)[0]
                        all_boxes[j][idx +
                                     delta] = all_boxes[j][idx +
                                                           delta][keep, :]

            if vis:
                boxes_this_image = [[]] + [
                    all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                if show_gt:
                    gt_boxes = data_dict['gt_boxes'][0]
                    for gt_box in gt_boxes:
                        gt_box = gt_box.asnumpy()
                        gt_cls = int(gt_box[4])
                        gt_box = gt_box / scales[delta]
                        gt_box[4] = 1
                        if cfg.TEST.LEARN_NMS:
                            gt_box = np.append(gt_box, 1)
                        boxes_this_image[gt_cls] = np.vstack(
                            (boxes_this_image[gt_cls], gt_box))

                    if cfg.TEST.LEARN_NMS:
                        target_boxes = data_dict['nms_multi_target']
                        for target_box in target_boxes:
                            print("cur", target_box * scales[delta])
                            target_cls = int(target_box[4]) + 1
                            target_box[4] = 2 + target_box[5]
                            target_box[5] = target_box[6]
                            target_box = target_box[:6]
                            boxes_this_image[target_cls] = np.vstack(
                                (boxes_this_image[target_cls], target_box))
                # vis_all_detection(data_dict['ref_data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg)
                # vis_double_all_detection(data_dict['data'].asnumpy(), boxes_this_image, data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg)
            if cfg.TEST.LEARN_NMS:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(ref_scores[:, j - 1, 0] > thresh)[0]
                    cls_scores = ref_scores[indexes, j - 1, :]
                    cls_boxes = ref_boxes[indexes, j - 1, :]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx + delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(ref_scores)
                    ref_all_boxes[j][idx + delta] = cls_dets

                    if DEBUG:
                        pass
                        keep = nms(cls_dets)
                        nms_cls_dets = cls_dets[keep, :]
                        target = data_dict['ref_nms_multi_target']
                        target_indices = np.where(target[:, 4] == j - 1)
                        target = target[target_indices]
                        nms_full_count_per_batch += bbox_equal_count(
                            nms_cls_dets, target)

                        gt_boxes = data_dict['ref_gt_boxes'][0].asnumpy()
                        gt_boxes = gt_boxes[np.where(gt_boxes[:,
                                                              4] == j)[0], :4]
                        gt_boxes /= scales[delta]

                        if len(cls_boxes) != 0 and len(gt_boxes) != 0:
                            overlap_mat = bbox_overlaps(
                                cls_boxes.astype(np.float),
                                gt_boxes.astype(np.float))
                            keep = nms(
                                cls_dets[np.where(overlap_mat > 0.5)[0]])
                            nms_cls_dets = cls_dets[np.where(
                                overlap_mat > 0.5)[0]][keep]
                            nms_pos_count_per_batch += bbox_equal_count(
                                nms_cls_dets, target)
                        all_count_per_batch += len(target)
            else:
                for j in range(1, imdb.num_classes):
                    indexes = np.where(ref_scores[:, j] > thresh)[0]
                    if cfg.TEST.FIRST_N > 0:
                        # todo: check whether the order affects the result
                        sort_indices = np.argsort(
                            ref_scores[:, j])[-cfg.TEST.FIRST_N:]
                        # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N]
                        indexes = np.intersect1d(sort_indices, indexes)

                    cls_scores = ref_scores[indexes, j, np.newaxis]
                    cls_boxes = ref_boxes[
                        indexes,
                        4:8] if cfg.CLASS_AGNOSTIC else ref_boxes[indexes, j *
                                                                  4:(j + 1) *
                                                                  4]
                    # count the valid ground truth
                    if len(cls_scores) > 0:
                        # class_lut[j].append(idx+delta)
                        valid_tally += len(cls_scores)
                        valid_sum += len(ref_scores)
                        # print np.min(cls_scores), valid_tally, valid_sum
                        # cls_scores = scores[:, j, np.newaxis]
                        # cls_scores[cls_scores <= thresh] = thresh
                        # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4]
                    cls_dets = np.hstack((cls_boxes, cls_scores))
                    if cfg.TEST.SOFTNMS:
                        ref_all_boxes[j][idx + delta] = nms(cls_dets)
                    else:
                        keep = nms(cls_dets)
                        ref_all_boxes[j][idx + delta] = cls_dets[keep, :]

            if max_per_image > 0:
                image_scores = np.hstack([
                    ref_all_boxes[j][idx + delta][:, -1]
                    for j in range(1, imdb.num_classes)
                ])
                if len(image_scores) > max_per_image:
                    image_thresh = np.sort(image_scores)[-max_per_image]
                    for j in range(1, imdb.num_classes):
                        keep = np.where(
                            ref_all_boxes[j][idx +
                                             delta][:, -1] >= image_thresh)[0]
                        ref_all_boxes[j][idx + delta] = ref_all_boxes[j][
                            idx + delta][keep, :]

            if vis:
                ref_boxes_this_image = [[]] + [
                    ref_all_boxes[j][idx + delta]
                    for j in range(1, imdb.num_classes)
                ]
                if show_gt:
                    gt_boxes = data_dict['ref_gt_boxes'][0]
                    for gt_box in gt_boxes:
                        gt_box = gt_box.asnumpy()
                        gt_cls = int(gt_box[4])
                        gt_box = gt_box / scales[delta]
                        gt_box[4] = 1
                        if cfg.TEST.LEARN_NMS:
                            gt_box = np.append(gt_box, 1)
                        ref_boxes_this_image[gt_cls] = np.vstack(
                            (ref_boxes_this_image[gt_cls], gt_box))

                    if cfg.TEST.LEARN_NMS:
                        target_boxes = data_dict['ref_nms_multi_target']
                        for target_box in target_boxes:
                            print("ref", target_box * scales[delta])
                            target_cls = int(target_box[4]) + 1
                            target_box[4] = 2 + target_box[5]
                            target_box[5] = target_box[6]
                            target_box = target_box[:6]
                            ref_boxes_this_image[target_cls] = np.vstack(
                                (ref_boxes_this_image[target_cls], target_box))
                vis_double_all_detection(data_dict['data'][0:1].asnumpy(),
                                         boxes_this_image,
                                         data_dict['data'][1:2].asnumpy(),
                                         ref_boxes_this_image, imdb.classes,
                                         scales[delta], cfg)
                # vis_all_detection(data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg)

        if DEBUG:
            nms_full_count.append(nms_full_count_per_batch)
            nms_pos_count.append(nms_pos_count_per_batch)
            is_max_count.append(is_max_count_per_batch)
            all_count.append(all_count_per_batch)
            print("full:{} pos:{} max:{}".format(
                1.0 * sum(nms_full_count) / sum(all_count),
                1.0 * sum(nms_pos_count) / sum(all_count),
                1.0 * sum(is_max_count) / sum(all_count)))
        idx += test_data.batch_size
        t3 = time.time() - t
        t = time.time()
        post_processing_time.append(t3)
        all_inference_time.append(t1 + t2 + t3)
        inference_count += 1
        if inference_count % 200 == 0:
            valid_count = 500 if inference_count > 500 else inference_count
            print("--->> running-average inference time per batch: {}".format(
                float(sum(all_inference_time[-valid_count:])) / valid_count))
            print("--->> running-average post processing time per batch: {}".
                  format(
                      float(sum(post_processing_time[-valid_count:])) /
                      valid_count))
        print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
            idx, num_images, t1, t2, t3)
        if logger:
            logger.info(
                'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format(
                    idx, num_images, t1, t2, t3))
예제 #23
0
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes,
                labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_kps=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :param gt_kps: optional for e2e [n, num_kps*3] (x1, y1, v1, ...)
    :return: (labels, rois, bbox_targets, bbox_weights)
    """
    if labels is None:
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
        if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS))
                       / np.array(cfg.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

    res = {'rois_output': rois,
           'label'      : labels,
           'bbox_target': bbox_targets,
           'bbox_weight': bbox_weights,
        }
    if gt_kps is not None:
        keep_kps = gt_kps[gt_assignment[keep_indexes]]
        n_keep = keep_kps.shape[0]
        K = cfg.dataset.NUM_KEYPOINTS
        assert gt_kps.shape[1] == K*3

        G = cfg.network.KEYPOINTS_POOLED_SIZE
        kps_labels = np.empty([n_keep, K], dtype=np.float32)
        kps_labels.fill(-1)
        kps_targets = np.zeros([n_keep, K, G, G, 2], dtype=np.float32)
        kps_weights = kps_targets.copy()
        num_fg = fg_indexes.size
        assert num_fg > 0, 'need at least one roi'

        # assgin kp targets
        fg_kps_label, fg_kps_target, fg_kps_weight =  assign_keypoints(rois[:num_fg, 1:], keep_kps[:num_fg], pooled_size=G)
        kps_labels[:num_fg]  = fg_kps_label
        kps_targets[:num_fg] = fg_kps_target
        normalizer = 1.0 / (num_fg + 1e-3)
        kps_weights[:num_fg] = fg_kps_weight * normalizer

        res['kps_label'] = kps_labels.reshape([-1])
        res['kps_target'] = kps_targets.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G])
        res['kps_weight'] = kps_weights.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G])

    return res
예제 #24
0
    def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None):
        """
        evaluate detection proposal recall metrics
        record max overlap value for each gt box; return vector of overlap values
        :param roidb: used to evaluate
        :param candidate_boxes: if not given, use roidb's non-gt boxes
        :param thresholds: array-like recall threshold
        :return: None
        ar: average recall, recalls: vector recalls at each IoU overlap threshold
        thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps
        """
        all_log_info = ''
        area_names = ['all', '0-25', '25-50', '50-100',
                      '100-200', '200-300', '300-inf']
        area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2], [50**2, 100**2],
                       [100**2, 200**2], [200**2, 300**2], [300**2, 1e5**2]]
        area_counts = []
        for area_name, area_range in zip(area_names[1:], area_ranges[1:]):
            area_count = 0
            for i in range(self.num_images):
                if candidate_boxes is None:
                    # default is use the non-gt boxes from roidb
                    non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0]
                    boxes = roidb[i]['boxes'][non_gt_inds, :]
                else:
                    boxes = candidate_boxes[i]
                boxes_areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1)
                valid_range_inds = np.where((boxes_areas >= area_range[0]) & (boxes_areas < area_range[1]))[0]
                area_count += len(valid_range_inds)
            area_counts.append(area_count)
        total_counts = float(sum(area_counts))
        for area_name, area_count in zip(area_names[1:], area_counts):
            log_info = 'percentage of {} {}'.format(area_name, area_count / total_counts)
            print log_info
            all_log_info += log_info
        log_info = 'average number of proposal {}'.format(total_counts / self.num_images)
        print log_info
        all_log_info += log_info
        for area_name, area_range in zip(area_names, area_ranges):
            gt_overlaps = np.zeros(0)
            num_pos = 0
            for i in range(self.num_images):
                # check for max_overlaps == 1 avoids including crowd annotations
                max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1)
                gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0]
                gt_boxes = roidb[i]['boxes'][gt_inds, :]
                gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
                valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas < area_range[1]))[0]
                gt_boxes = gt_boxes[valid_gt_inds, :]
                num_pos += len(valid_gt_inds)

                if candidate_boxes is None:
                    # default is use the non-gt boxes from roidb
                    non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0]
                    boxes = roidb[i]['boxes'][non_gt_inds, :]
                else:
                    boxes = candidate_boxes[i]
                if boxes.shape[0] == 0:
                    continue

                overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float))

                _gt_overlaps = np.zeros((gt_boxes.shape[0]))
                # choose whatever is smaller to iterate
                rounds = min(boxes.shape[0], gt_boxes.shape[0])
                for j in range(rounds):
                    # find which proposal maximally covers each gt box
                    argmax_overlaps = overlaps.argmax(axis=0)
                    # get the IoU amount of coverage for each gt box
                    max_overlaps = overlaps.max(axis=0)
                    # find which gt box is covered by most IoU
                    gt_ind = max_overlaps.argmax()
                    gt_ovr = max_overlaps.max()
                    assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps)
                    # find the proposal box that covers the best covered gt box
                    box_ind = argmax_overlaps[gt_ind]
                    # record the IoU coverage of this gt box
                    _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                    assert (_gt_overlaps[j] == gt_ovr)
                    # mark the proposal box and the gt box as used
                    overlaps[box_ind, :] = -1
                    overlaps[:, gt_ind] = -1
                # append recorded IoU coverage level
                gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

            gt_overlaps = np.sort(gt_overlaps)
            if thresholds is None:
                step = 0.05
                thresholds = np.arange(0.5, 0.95 + 1e-5, step)
            recalls = np.zeros_like(thresholds)

            # compute recall for each IoU threshold
            for i, t in enumerate(thresholds):
                recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
            ar = recalls.mean()

            # print results
            log_info = 'average recall for {}: {:.3f}'.format(area_name, ar)
            print log_info
            all_log_info += log_info
            for threshold, recall in zip(thresholds, recalls):
                log_info = 'recall @{:.2f}: {:.3f}'.format(threshold, recall)
                print log_info
                all_log_info += log_info

        return all_log_info
예제 #25
0
    def sample_rois(self,
                    rois,
                    fg_rois_per_image,
                    rois_per_image,
                    num_classes,
                    cfg,
                    labels=None,
                    overlaps=None,
                    bbox_targets=None,
                    gt_boxes=None,
                    gt_masks=None):
        if labels is None:
            overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                     gt_boxes[:, :4].astype(np.float))
            gt_assignment = overlaps.argmax(axis=1)
            overlaps = overlaps.max(axis=1)
            labels = gt_boxes[gt_assignment, 4]

        # foreground RoI with FG_THRESH overlap
        fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
        if cfg.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[fg_indexes, 1:])
            fg_indexes = fg_indexes[keep_inds]

        # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
        fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
        # Sample foreground regions without replacement
        if len(fg_indexes) > fg_rois_per_this_image:
            fg_indexes = np.random.choice(fg_indexes,
                                          size=fg_rois_per_this_image,
                                          replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI)
                              & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
        if cfg.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[bg_indexes, 1:])
            bg_indexes = bg_indexes[keep_inds]

        # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                            bg_indexes.size)
        # Sample foreground regions without replacement
        if len(bg_indexes) > bg_rois_per_this_image:
            bg_indexes = np.random.choice(bg_indexes,
                                          size=bg_rois_per_this_image,
                                          replace=False)

        # indexes selected
        keep_indexes = np.append(fg_indexes, bg_indexes)

        # pad more to ensure a fixed minibatch size
        while keep_indexes.shape[0] < rois_per_image:
            gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
            if cfg.TRAIN.GAP_SELECT_FROM_ALL:
                gap_indexes = np.random.choice(range(len(rois)),
                                               size=gap,
                                               replace=False)
            else:
                bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes))
                gap_indexes = np.random.choice(bg_full_indexes,
                                               size=gap,
                                               replace=False)
            keep_indexes = np.append(keep_indexes, gap_indexes)

        # select labels
        labels = labels[keep_indexes]
        # set labels of bg_rois to be 0
        labels[fg_rois_per_this_image:] = 0
        rois = rois[keep_indexes]

        # load or compute bbox target
        if bbox_targets is not None:
            bbox_target_data = bbox_targets[keep_indexes, :]
        else:
            targets = bbox_transform(rois[:, 1:],
                                     gt_boxes[gt_assignment[keep_indexes], :4])
            if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
                targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) /
                           np.array(cfg.TRAIN.BBOX_STDS))
            bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

        bbox_targets, bbox_weights = \
            expand_bbox_regression_targets(bbox_target_data, num_classes, cfg)

        if cfg.TRAIN.IGNORE_GAP:
            valid_rois_per_this_image = fg_rois_per_this_image + bg_rois_per_this_image
            labels[valid_rois_per_this_image:] = -1
            bbox_weights[valid_rois_per_this_image:] = 0

        # masks
        # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg')
        mask_reg_targets = -np.ones(
            (len(keep_indexes), 1, self._mask_size, self._mask_size))
        for idx, obj in enumerate(fg_indexes):
            gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int)
            ex_roi = np.round(rois[idx, 1:]).astype(int)
            gt_mask = gt_masks[gt_assignment[obj]]
            mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask)
            mask_reg_target = cv2.resize(mask_reg_target.astype(np.float),
                                         (self._mask_size, self._mask_size))
            mask_reg_target = mask_reg_target >= self._binary_thresh
            mask_reg_targets[idx, ...] = mask_reg_target

        return rois, labels, bbox_targets, bbox_weights, mask_reg_targets
예제 #26
0
def cpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    """
    masks = masks.astype(np.float32)
    mask_size = masks.shape[-1]
    nms = py_nms_wrapper(nms_thresh)
    # apply nms and sort to get first images according to their scores

    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        #print 'keep', keep
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i, 0].astype(np.float32),
                          (box[2] - box[0] + 1, box[3] - box[1] + 1))
        res_mask[i] = mask

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size))
        boxes_ar = np.zeros((num_boxes, 4))
        for i in xrange(num_boxes):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            p_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask,
                                                      cur_weights, im_width,
                                                      im_height, binary_thresh)
            masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32),
                                        (mask_size, mask_size))
        boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis]))
        #print 'boxes_scored_ar', boxes_scored_ar

        ############
        # def nms(dets, thresh):
        #     """
        #     greedily select boxes with high confidence and overlap with current maximum <= thresh
        #     rule out overlap >= thresh
        #     :param dets: [[x1, y1, x2, y2 score]]
        #     :param thresh: retain overlap < thresh
        #     :return: indexes to keep
        #     """
        #     if dets.shape[0] == 0:
        #         return []

        #     x1 = dets[:, 0]
        #     y1 = dets[:, 1]
        #     x2 = dets[:, 2]
        #     y2 = dets[:, 3]
        #     scores = dets[:, 4]

        #     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
        #     order = scores.argsort()[::-1]

        #     keep = []
        #     while order.size > 0:
        #         i = order[0]
        #         keep.append(i)
        #         xx1 = np.maximum(x1[i], x1[order[1:]])
        #         yy1 = np.maximum(y1[i], y1[order[1:]])
        #         xx2 = np.minimum(x2[i], x2[order[1:]])
        #         yy2 = np.minimum(y2[i], y2[order[1:]])

        #         w = np.maximum(0.0, xx2 - xx1 + 1)
        #         h = np.maximum(0.0, yy2 - yy1 + 1)
        #         inter = w * h
        #         ovr = inter / (areas[i] + areas[order[1:]] - inter)

        #         inds = np.where(ovr <= thresh)[0]
        #         order = order[inds + 1]

        #     return keep

        # #print 'aaaaaaaa'
        # keep = nms(boxes_scored_ar, 0.3) #bei niedrigen treshhold wirfts welche raus
        # #print 'aa', len(keep), len(boxes_scored_ar)
        # #print 'keep', keep

        # #print 'a', len(boxes_scored_ar)
        # #print 'b', len(boxes_scored_ar[keep, :])
        # list_result_box[c] = boxes_scored_ar[keep, :]
        # list_result_mask[c] = masks_ar[keep, :]

        ###############

        list_result_box[c] = boxes_scored_ar
        list_result_mask[c] = masks_ar

    return list_result_mask, list_result_box
예제 #27
0
파일: rpn.py 프로젝트: sonyeric/RetinaNet-1
def assign_anchor(feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  feat_shape_p7,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p4=16,
                  scales_p4=(8, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=32,
                  scales_p5=(8, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=64,
                  scales_p6=(8, ),
                  ratios_p6=(0.75, 1, 1.5),
                  feat_stride_p7=128,
                  scales_p7=(8, ),
                  ratios_p7=(0.75, 1, 1.5),
                  allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """

    feat_shapes = [feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7]
    feat_strides = [16, 32, 64, 128]

    scales = np.array(scales_p5)

    ratios = np.array(ratios_p5)

    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios[feat_id],
                                            scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]

        feat_height, feat_width = feat_shapes[feat_id][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = [ind for ind in xrange(total_anchors)]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)  # (A)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]

        labels = gt_boxes[argmax_overlaps, 4]
        labels[max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        fpn_labels = labels
    else:
        fpn_labels[:] = 0

#  subsample positive labels if we have too many
#     num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
#     fg_inds = np.where(fpn_labels >= 1)[0]
#     if len(fg_inds) > num_fg:
#         disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
#         fpn_labels[disable_inds] = -1
#   #  subsample negative labels if we have too many
#     num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1)
#     bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))
    # if len(bg_inds) > num_bg:
    #     disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    #     fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        #fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        fpn_bbox_targets[:] = bbox_transform(fpn_anchors,
                                             gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(
            fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=-1)
        bbox_targets = _unmap(
            fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)
        bbox_weights = _unmap(
            fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)

        labels = labels.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)

    debug_label = np.concatenate(label_list, axis=1)
    # print debug_label
    # print"-----------total:",len(debug_label[0])
    # print "--------ig-",len(debug_label[debug_label==-1])
    # print "--------bg--",len(debug_label[debug_label==0])
    # print "--------gg--",len(debug_label[debug_label>=1])
    # print np.concatenate(label_list, axis=1)[np.concatenate(label_list, axis=1)>=1].shape
    #print np.concatenate(bbox_target_list, axis=2)

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
예제 #28
0
def assign_pyramid_anchor(
        feat_shapes,
        gt_boxes,
        im_info,
        cfg,
        feat_strides=(4, 8, 16, 32, 64),
        scales=(8, ),
        ratios=(0.5, 1, 2),
        allowed_border=0,
        balance_scale_bg=False,
):
    """
    assign ground truth boxes to anchor positions
    :param feat_shapes: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_strides: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :param balance_scale_bg: restrict the background samples for each pyramid level
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    ratios = np.array(ratios, dtype=np.float32)
    assert (len(feat_shapes) == len(feat_strides))

    fpn_args = []
    fpn_anchors_fid = np.zeros(0).astype(int)
    fpn_anchors = np.zeros([0, 4])
    fpn_labels = np.zeros(0)
    fpn_inds_inside = []
    for feat_id in range(len(feat_strides)):
        # len(scales.shape) == 1 just for backward compatibility, will remove in the future
        if len(scales.shape) == 1:
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios,
                                            scales=scales)
        else:
            assert len(scales.shape) == len(ratios.shape) == 2
            base_anchors = generate_anchors(base_size=feat_strides[feat_id],
                                            ratios=ratios[feat_id],
                                            scales=scales[feat_id])
        num_anchors = base_anchors.shape[0]
        feat_height, feat_width = feat_shapes[feat_id][0][-2:]

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_strides[feat_id]
        shift_y = np.arange(0, feat_height) * feat_strides[feat_id]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors
        K = shifts.shape[0]
        all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        # for sigmoid classifier, ignore the 'background' class
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside)))
        fpn_anchors = np.vstack((fpn_anchors, anchors))
        fpn_labels = np.hstack((fpn_labels, labels))
        fpn_inds_inside.append(inds_inside)
        fpn_args.append([feat_height, feat_width, A, total_anchors])

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(fpn_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # fg label: for each gt, anchor with highest overlap
        fpn_labels[gt_argmax_overlaps] = 1
        # fg label: above threshold IoU
        fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        fpn_labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(
        cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(fpn_labels >= 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        fpn_labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = fpn_labels.shape[
        0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(
            fpn_labels >= 1)
    bg_inds = np.where(fpn_labels == 0)[0]
    fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum()))

    if balance_scale_bg:
        num_bg_scale = num_bg / len(feat_strides)
        for feat_id in range(0, len(feat_strides)):
            bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id])
                                   & (bg_inds < fpn_anchors_fid[feat_id + 1])]
            if len(bg_ind_scale) > num_bg_scale:
                disable_inds = npr.choice(bg_ind_scale,
                                          size=(len(bg_ind_scale) -
                                                num_bg_scale),
                                          replace=False)
                fpn_labels[disable_inds] = -1
    else:
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            if DEBUG:
                disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
            fpn_labels[disable_inds] = -1

    fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(
            fpn_anchors[fpn_labels >= 1, :],
            gt_boxes[argmax_overlaps[fpn_labels >= 1], :4])
        # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4])
    # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)
    fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32)

    fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    label_list = []
    bbox_target_list = []
    bbox_weight_list = []
    for feat_id in range(0, len(feat_strides)):
        feat_height, feat_width, A, total_anchors = fpn_args[feat_id]
        # map up to original set of anchors
        labels = _unmap(
            fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=-1)
        bbox_targets = _unmap(
            fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)
        bbox_weights = _unmap(
            fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id +
                                                                      1]],
            total_anchors,
            fpn_inds_inside[feat_id],
            fill=0)

        labels = labels.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))

        bbox_targets = bbox_targets.reshape(
            (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_targets = bbox_targets.reshape((1, A * 4, -1))
        bbox_weights = bbox_weights.reshape(
            (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))
        bbox_weights = bbox_weights.reshape((1, A * 4, -1))

        label_list.append(labels)
        bbox_target_list.append(bbox_targets)
        bbox_weight_list.append(bbox_weights)
        # label.update({'label_p' + str(feat_id + feat_id_start): labels,
        #               'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets,
        #               'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights})

    label = {
        'label': np.concatenate(label_list, axis=1),
        'bbox_target': np.concatenate(bbox_target_list, axis=2),
        'bbox_weight': np.concatenate(bbox_weight_list, axis=2)
    }

    return label
예제 #29
0
파일: rpn.py 프로젝트: xuxiaohui4/FPN-mxnet
def assign_anchor(feat_shape_p2,
                  feat_shape_p3,
                  feat_shape_p4,
                  feat_shape_p5,
                  feat_shape_p6,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride_p2=4,
                  scales_p2=(16, ),
                  ratios_p2=(0.75, 1, 1.5),
                  feat_stride_p3=8,
                  scales_p3=(16, ),
                  ratios_p3=(0.75, 1, 1.5),
                  feat_stride_p4=16,
                  scales_p4=(16, ),
                  ratios_p4=(0.75, 1, 1.5),
                  feat_stride_p5=32,
                  scales_p5=(16, ),
                  ratios_p5=(0.75, 1, 1.5),
                  feat_stride_p6=64,
                  scales_p6=(16, ),
                  ratios_p6=(0.75, 1, 1.5),
                  allowed_border=1000):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: list of infer output shape
    :param gt_boxes: assign ground truth:[n, 5]
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    allowed_border = 1000
    feat_shape = [
        feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5,
        feat_shape_p6
    ]
    feat_stride = [4, 8, 16, 32, 64]
    scales = scales_p3
    ratios = (0.5, 1, 2)

    def _unmap(data, count, inds, fill=0, allowed_border=allowed_border):
        """" unmap a subset inds of data into original data of size count """
        if allowed_border:
            return data
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    debug = True
    im_info = im_info[0]
    #print 'im_info: ', im_info
    scales = np.array(scales, dtype=np.float32)
    if len(feat_stride) != len(feat_shape):
        assert ('length of feat_stride is not equal to length of feat_shape')
    all_anchors_list = []
    anchors_counter = []
    total_anchors = 0
    t = time.time()
    #print 'length of feat_shape: ',len(feat_shape)
    for i in range(len(feat_shape)):
        base_anchors = generate_anchors(base_size=feat_stride[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]  #3
        #print feat_shape[i]
        feat_height, feat_width = (feat_shape[i])[-2:]

        if DEBUG:
            print 'anchors:'
            print base_anchors
            print 'anchor shapes:'
            print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                             base_anchors[:, 3::4] - base_anchors[:, 1::4]))
            print 'im_info', im_info
            print 'height', feat_height, 'width', feat_width
            print 'gt_boxes shape', gt_boxes.shape
            print 'gt_boxes', gt_boxes

        # 1. generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, feat_width) * feat_stride[i]
        shift_y = np.arange(0, feat_height) * feat_stride[i]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = num_anchors  #3
        K = shifts.shape[0]  #h*w
        i_all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        i_all_anchors = i_all_anchors.reshape(
            (K * A, 4))  #(k*A,4) in the original image
        all_anchors_list.append(i_all_anchors)
        i_total_anchors = int(K * A)  #3*w*h
        total_anchors += i_total_anchors
        anchors_counter.append(total_anchors)

        # only keep anchors inside the image, but in FPN, author allowed anchor outside of image
        # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
        #                        (all_anchors[:, 1] >= -allowed_border) &
        #                        (all_anchors[:, 2] < im_info[1] + allowed_border) &
        #                        (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        if DEBUG:
            print 'total_anchors', i_total_anchors
            #print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        #anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors shape', anchors.shape

    all_anchors = np.array(all_anchors_list[0])  #(3*h1*w1,4)
    for i_anchors in all_anchors_list[1:]:
        all_anchors = np.vstack((all_anchors, i_anchors))
    #all_anchors:[total_anchors,4]
    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((total_anchors, ), dtype=np.float32)
    labels.fill(-1)
    #print 'get anchors spends :{:.4f}s'.format(time.time()-t)
    t_1 = time.time()
    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        #t = time.time()
        overlaps = bbox_overlaps(all_anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print 'bbox overlaps spends :{:.4f}s'.format(time.time()-t)
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0
    t_1_1 = time.time()
    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32)
    if gt_boxes.size > 0:

        bbox_targets[:] = bbox_transform(all_anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means', means
        print 'stdevs', stds
    #print 'choose labels spends :{:.4f}s'.format(time.time()-t_1_1)
    #print 'sort labels spends :{:.4f}s'.format(time.time()-t_1)
    # map up to original set of anchors
#   print '---------++++++++++++++++++++++++++++++++-----------------',len(labels[labels!=-1]),len(labels[labels==1])
    t_2 = time.time()
    labels_list = []
    bbox_targets_list = []
    bbox_weights_list = []
    labels_list.append(
        _unmap(labels[:anchors_counter[0]],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=-1))
    bbox_targets_list.append(
        _unmap(bbox_targets[range(anchors_counter[0]), :],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=0))
    bbox_weights_list.append(
        _unmap(bbox_weights[range(anchors_counter[0]), :],
               anchors_counter[0],
               range(anchors_counter[0]),
               fill=0))
    for i in range(1, len(feat_shape)):
        count = anchors_counter[i] - anchors_counter[i - 1]
        labels_list.append(
            _unmap(labels[anchors_counter[i - 1]:anchors_counter[i]],
                   count,
                   range(count),
                   fill=-1))
        bbox_targets_list.append(
            _unmap(bbox_targets[anchors_counter[i - 1]:anchors_counter[i], :],
                   count,
                   range(count),
                   fill=0))
        bbox_weights_list.append(
            _unmap(bbox_weights[anchors_counter[i - 1]:anchors_counter[i], :],
                   count,
                   range(count),
                   fill=0))
    if DEBUG:
        #         print 'rpn: max max_overlaps', np.max(max_overlaps)
        print 'rpn: num_positives', np.sum(labels == 1)
        print 'rpn: num_negatives', np.sum(labels == 0)
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count
    feat_heights = []
    feat_widths = []
    for i in range(len(feat_shape)):
        feat_heights.append(feat_shape[i][-2])
        feat_widths.append(feat_shape[i][-1])
    #print '_unmap spends :{:.4f}s'.format(time.time()-t_2)
    label1 = labels_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A)).transpose(0, 3, 1, 2)
    labels1 = label1.reshape((1, A * feat_heights[0] * feat_widths[0]))
    bbox_targets1 = bbox_targets_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights1 = bbox_weights_list[0].reshape(
        (1, feat_heights[0], feat_widths[0], A * 4)).transpose((0, 3, 1, 2))

    label2 = labels_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A)).transpose(0, 3, 1, 2)
    labels2 = label2.reshape((1, A * feat_heights[1] * feat_widths[1]))
    bbox_targets2 = bbox_targets_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights2 = bbox_weights_list[1].reshape(
        (1, feat_heights[1], feat_widths[1], A * 4)).transpose((0, 3, 1, 2))

    label3 = labels_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A)).transpose(0, 3, 1, 2)
    labels3 = label3.reshape((1, A * feat_heights[2] * feat_widths[2]))
    bbox_targets3 = bbox_targets_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A * 4)).transpose(0, 3, 1, 2)
    bbox_weights3 = bbox_weights_list[2].reshape(
        (1, feat_heights[2], feat_widths[2], A * 4)).transpose((0, 3, 1, 2))

    if len(feat_shape) > 3:
        label4 = labels_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A)).transpose(0, 3, 1, 2)
        labels4 = label4.reshape((1, A * feat_heights[3] * feat_widths[3]))
        bbox_targets4 = bbox_targets_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A * 4)).transpose(0, 3, 1, 2)
        bbox_weights4 = bbox_weights_list[3].reshape(
            (1, feat_heights[3], feat_widths[3], A * 4)).transpose(
                (0, 3, 1, 2))

    if len(feat_shape) > 4:
        label5 = labels_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A)).transpose(0, 3, 1, 2)
        labels5 = label5.reshape((1, A * feat_heights[4] * feat_widths[4]))
        bbox_targets5 = bbox_targets_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A * 4)).transpose(0, 3, 1, 2)
        bbox_weights5 = bbox_weights_list[4].reshape(
            (1, feat_heights[4], feat_widths[4], A * 4)).transpose(
                (0, 3, 1, 2))
    if len(feat_shape) > 5:
        assert (
            'RPN anchorloader only support max number of feature map of 5!')
    #  'label/p4': labels2, 'label/p5': labels3,
    #, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3,
    #, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3
    if len(feat_shape) == 3:
        label = {
            'label/p3': labels1,
            'label/p4': labels2,
            'label/p5': labels3,
            'bbox_target/p3': bbox_targets1,
            'bbox_target/p4': bbox_targets2,
            'bbox_target/p5': bbox_targets3,
            'bbox_weight/p3': bbox_weights1,
            'bbox_weight/p4': bbox_weights2,
            'bbox_weight/p5': bbox_weights3,
        }
    elif len(feat_shape) == 4:
        label = {
            'label/p3': labels1,
            'label/p4': labels2,
            'label/p5': labels3,
            'label/p6': labels4,
            'bbox_target/p3': bbox_targets1,
            'bbox_target/p4': bbox_targets2,
            'bbox_target/p5': bbox_targets3,
            'bbox_target/p6': bbox_targets4,
            'bbox_weight/p3': bbox_weights1,
            'bbox_weight/p4': bbox_weights2,
            'bbox_weight/p5': bbox_weights3,
            'bbox_weight/p6': bbox_weights4
        }
    elif len(feat_shape) == 5:
        label = {
            'label/p2': labels1,
            'label/p3': labels2,
            'label/p4': labels3,
            'label/p5': labels4,
            'label/p6': labels5,
            'bbox_target/p2': bbox_targets1,
            'bbox_target/p3': bbox_targets2,
            'bbox_target/p4': bbox_targets3,
            'bbox_target/p5': bbox_targets4,
            'bbox_target/p6': bbox_targets5,
            'bbox_weight/p2': bbox_weights1,
            'bbox_weight/p3': bbox_weights2,
            'bbox_weight/p4': bbox_weights3,
            'bbox_weight/p5': bbox_weights4,
            'bbox_weight/p6': bbox_weights5
        }
    #print 'get labels spends :{:.4f}s'.format(time.time()-t_2)
    return label
예제 #30
0
def assign_anchor(feat_shape,
                  gt_boxes,
                  im_info,
                  cfg,
                  feat_stride=16,
                  scales=(8, 16, 32),
                  ratios=(0.5, 1, 2),
                  allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride,
                                    ratios=list(ratios),
                                    scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    if DEBUG:
        print('anchors:')
        print(base_anchors)
        print('anchor shapes:')
        print(
            np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4],
                       base_anchors[:, 3::4] - base_anchors[:, 1::4])))
        print('im_info', im_info)
        print('height', feat_height, 'width', feat_width)
        print('gt_boxes shape', gt_boxes.shape)
        print('gt_boxes', gt_boxes)

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border)
                           & (all_anchors[:, 1] >= -allowed_border)
                           & (all_anchors[:, 2] < im_info[1] + allowed_border)
                           & (all_anchors[:,
                                          3] < im_info[0] + allowed_border))[0]
    if DEBUG:
        print('total_anchors', total_anchors)
        print('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print('anchors shape', anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means', means)
        print('stdevs', stds)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print('rpn: max max_overlaps', np.max(max_overlaps))
        print('rpn: num_positives', np.sum(labels == 1))
        print('rpn: num_negatives', np.sum(labels == 0))
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    labels = labels.reshape(
        (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape(
        (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape(
        (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {
        'label': labels,
        'bbox_target': bbox_targets,
        'bbox_weight': bbox_weights
    }
    return label
예제 #31
0
파일: rpn.py 프로젝트: zengzhaoyang/trident
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16,
                  scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count,), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) &
                           (all_anchors[:, 1] >= -allowed_border) &
                           (all_anchors[:, 2] < im_info[1] + allowed_border) &
                           (all_anchors[:, 3] < im_info[0] + allowed_border))[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
    else:
        labels[:] = 0


    if valid_ranges is None:
        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

        labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, A * feat_height * feat_width))
        bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
        bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

        label = {'label': labels,
                 'bbox_target': bbox_targets,
                 'bbox_weight': bbox_weights}
        return label

    else:
        all_labels, all_bbox_targets, all_bbox_weights = [], [], []
        for valid_range in valid_ranges:
            cls_labels = labels.copy()
            if gt_boxes.size > 0:
                gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.)
                invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0]
                invalid_gt_boxes = gt_boxes[invalid_inds, :]
                if len(invalid_inds) > 0:
                    invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float))
                    invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1)
                    invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps]
                    
                    disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0]
                    cls_labels[disable_inds] = -1

            num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE)
            fg_inds = np.where(cls_labels == 1)[0]
            if len(fg_inds) > num_fg:
                disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
                cls_labels[disable_inds] = -1

            # subsample negative labels if we have too many
            num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1)
            bg_inds = np.where(cls_labels == 0)[0]
            if len(bg_inds) > num_bg:
                disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
                cls_labels[disable_inds] = -1

            bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
            if gt_boxes.size > 0:
                bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

            bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
            bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS)

            # map up to original set of anchors
            cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1)
            bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
            bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

            cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
            cls_labels = cls_labels.reshape((1, A * feat_height * feat_width))
            bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
            bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

            all_labels.append(cls_labels)
            all_bbox_targets.append(bbox_targets)
            all_bbox_weights.append(bbox_weights)

        all_labels = np.vstack(all_labels)
        all_bbox_targets = np.vstack(all_bbox_targets)
        all_bbox_weights = np.vstack(all_bbox_weights)

        valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2)
        valid_ranges *= im_info[2]
        inds = np.where(valid_ranges[:, 1] < 0)[0]
        valid_ranges[inds, 1] = max(im_info[0], im_info[1])

        label = {'label': all_labels,
                 'bbox_target': all_bbox_targets,
                 'bbox_weight': all_bbox_weights,
                 'valid_ranges': valid_ranges}
        return label