Ejemplo n.º 1
0
def compute_bbox_regression_targets(rois, overlaps, labels):
    """
    given rois, overlaps, gt labels, compute bounding box regression targets
    :param rois: roidb[i]['boxes'] k * 4
    :param overlaps: roidb[i]['max_overlaps'] k * 1
    :param labels: roidb[i]['max_classes'] k * 1
    :return: targets[i][class, dx, dy, dw, dh] k * 5
    """
    # Ensure ROIs are floats
    rois = rois.astype(np.float, copy=False)
    # Sanity check
    if len(rois) != len(overlaps):
        print('bbox regression: this should not happen')

    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        print('something wrong : zero ground truth rois')
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :])

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Ejemplo n.º 2
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        """
        given ground truth, prepare roidb
        :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2]
        :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
        :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped']
        """
        assert len(
            box_list
        ) == self.num_images, 'number of boxes matrix must match number of images'
        roidb = []
        for i in range(self.num_images):
            roi_rec = dict()
            roi_rec['image'] = gt_roidb[i]['image']
            roi_rec['height'] = gt_roidb[i]['height']
            roi_rec['width'] = gt_roidb[i]['width']

            boxes = box_list[i]
            if boxes.shape[1] == 5:
                boxes = boxes[:, :4]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)
            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                # n boxes and k gt_boxes => n * k overlap
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                # for each box in n boxes, select only maximum overlap (must be greater than zero)
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            roi_rec.update({
                'boxes':
                boxes,
                'gt_classes':
                np.zeros((num_boxes, ), dtype=np.int32),
                'gt_overlaps':
                overlaps,
                'max_classes':
                overlaps.argmax(axis=1),
                'max_overlaps':
                overlaps.max(axis=1),
                'flipped':
                False
            })

            # background roi => background class
            zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0]
            assert all(roi_rec['max_classes'][zero_indexes] == 0)
            # foreground roi => foreground class
            nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0]
            assert all(roi_rec['max_classes'][nonzero_indexes] != 0)

            roidb.append(roi_rec)

        return roidb
Ejemplo n.º 3
0
def single_image_single_class_confusion_matrix(gt_boxes, pred_boxes, scores, threshold):
    """
    :param gt_boxes: (#gt, 4)
    :param pred_boxes: (#box, 4)
    :param scores: (#box, )
    :param threshold: float, IoU threshold
    :return: tp, fp, fn
    """
    if pred_boxes.size == 0:
        return np.zeros(shape=(0, )), np.zeros(shape=(0, ))
    if gt_boxes.size == 0:
        return np.zeros(shape=(pred_boxes.shape[0], )), np.ones(shape=(pred_boxes.shape[0], ))

    ious = bbox_overlaps(pred_boxes.astype(np.float32, copy=False),
                         gt_boxes.astype(np.float32, copy=False))  # (#box, #gt)
    max_overlap_for_boxes = np.max(ious, axis=1)
    gt_for_boxes = np.argmax(ious, axis=1)
    gt_detected = [False] * ious.shape[1]
    tp, fp = np.zeros(shape=(pred_boxes.shape[0], )), np.zeros(shape=(pred_boxes.shape[0], ))
    for ind in np.argsort(-scores):
        overlap = max_overlap_for_boxes[ind]
        gt_ind = gt_for_boxes[ind]
        if overlap >= threshold:
            if not gt_detected[gt_ind]:
                tp[ind] = 1
                gt_detected[gt_ind] = True
            else:
                fp[ind] = 1
        else:
            fp[ind] = 1
    return tp, fp
Ejemplo n.º 4
0
def evalutate_detections(all_boxes, roidb):
    """evalutate detections.
    :param all_boxes:all boxes predicted by our model
    :param roidb:that store the ground truth info
    :param p:select boxes which probability large than p
    :param thresh:overlap threshold."""
    assert len(all_boxes) == len(roidb)
    pos_count = 0
    for i in range(len(roidb)):
        ground_truth = roidb[i]['bbox']
        pred_boxes = all_boxes[i]
        if pred_boxes.shape[0] == 0:
            continue
        pred_box_ind = np.argmax(pred_boxes[:, 4])
        pred_box = pred_boxes[pred_box_ind, :]
        pred_box = pred_box[np.newaxis, :]
        overlap = bbox_overlaps(pred_box[:, :4].astype(np.float),
                                ground_truth.astype(np.float))
        if overlap[0][0] > 0.9:
            pos_count += 1

    acc = float(pos_count) / len(roidb)
    print(acc)
Ejemplo n.º 5
0
def test(args):
    print('test with', args)
    global detector
    output_folder = args.output
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    detector = RetinaFace(args.prefix,
                          args.epoch,
                          args.gpu,
                          network=args.network,
                          nocrop=args.nocrop,
                          vote=args.bbox_vote)
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0
    print('roidb size', len(roidb))

    for i in range(len(roidb)):
        if i % args.parts != args.part:
            continue
        #if i%10==0:
        #  print('processing', i, file=sys.stderr)
        roi = roidb[i]
        boxes = get_boxes(roi, args.pyramid)
        if 'boxes' in roi:
            gt_boxes = roi['boxes'].copy()
            gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] +
                        1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
            num_pos += gt_boxes.shape[0]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))
            #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))

            if boxes.shape[0] > 0:
                _gt_overlaps = overlaps.max(axis=0)
                #print('max_overlaps', _gt_overlaps, file=sys.stderr)
                for j in range(len(_gt_overlaps)):
                    if _gt_overlaps[j] > 0.5:
                        continue
                    #print(j, 'failed', gt_boxes[j],  'max_overlap:', _gt_overlaps[j], file=sys.stderr)

                # append recorded IoU coverage level
                found = (_gt_overlaps > 0.5).sum()
                recall = found / float(gt_boxes.shape[0])
                #print('recall', _recall, gt_boxes.shape[0], boxes.shape[0], gt_areas, 'num:', i, file=sys.stderr)
                overall[0] += found
                overall[1] += gt_boxes.shape[0]
                #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
                #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
                recall_all = float(overall[0]) / overall[1]
                #print('recall_all', _recall, file=sys.stderr)
                print('[%d]' % i,
                      'recall',
                      recall, (gt_boxes.shape[0], boxes.shape[0]),
                      'all:',
                      recall_all,
                      file=sys.stderr)
        else:
            print('[%d]' % i, 'detect %d faces' % boxes.shape[0])

        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
def test(args):
    print('test with', args)
    global detector
    output_folder = args.output
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    detector = ESSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True)
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0

    for i in xrange(len(roidb)):
        if i % 10 == 0:
            print('processing', i, file=sys.stderr)
        roi = roidb[i]
        boxes = get_boxes(roi, args.pyramid)
        gt_boxes = roidb[i]['boxes'].copy()
        gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] -
                                                            gt_boxes[:, 1] + 1)
        num_pos += gt_boxes.shape[0]

        overlaps = bbox_overlaps(boxes.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

        _gt_overlaps = np.zeros((gt_boxes.shape[0]))

        if boxes.shape[0] > 0:
            _gt_overlaps = overlaps.max(axis=0)
            #print('max_overlaps', _gt_overlaps, file=sys.stderr)
            for j in range(len(_gt_overlaps)):
                if _gt_overlaps[j] > config.TEST.IOU_THRESH:
                    continue
                print(j,
                      'failed',
                      gt_boxes[j],
                      'max_overlap:',
                      _gt_overlaps[j],
                      file=sys.stderr)

            # append recorded IoU coverage level
            found = (_gt_overlaps > config.TEST.IOU_THRESH).sum()
            _recall = found / float(gt_boxes.shape[0])
            print('recall',
                  _recall,
                  gt_boxes.shape[0],
                  boxes.shape[0],
                  gt_areas,
                  file=sys.stderr)
            overall[0] += found
            overall[1] += gt_boxes.shape[0]
            #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
            #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
            _recall = float(overall[0]) / overall[1]
            print('recall_all', _recall, file=sys.stderr)

        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))

    print('Evaluating detections using official WIDER toolbox...')
    path = os.path.join(os.path.dirname(__file__), 'wider_eval_tools')
    eval_output_path = os.path.join(path, 'wider_plots')
    if not os.path.isdir(eval_output_path):
        os.mkdir(eval_output_path)
    cmd = 'cd {} && '.format(path)
    cmd += 'matlab -nodisplay -nodesktop '
    cmd += '-r "dbstop if error; '
    cmd += 'wider_eval(\'{:s}\',\'{:s}\',\'{:s}\'); quit;"' \
        .format(args.output, args.method_name, eval_output_path)
    print('Running:\n{}'.format(cmd))
    subprocess.call(cmd, shell=True)
Ejemplo n.º 7
0
def test(args):
    print('test with', args)
    global detector
    output_folder = args.output
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    detector = SSHDetector(args.prefix, args.epoch, args.gpu, test_mode=True)
    imdb = eval(args.dataset)(args.image_set, args.root_path,
                              args.dataset_path)
    roidb = imdb.gt_roidb()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0

    for i in xrange(len(roidb)):
        if i % 10 == 0:
            print('processing', i, file=sys.stderr)
        roi = roidb[i]
        boxes = get_boxes(roi, args.pyramid)
        gt_boxes = roidb[i]['boxes'].copy()
        gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] -
                                                            gt_boxes[:, 1] + 1)
        num_pos += gt_boxes.shape[0]

        overlaps = bbox_overlaps(boxes.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

        _gt_overlaps = np.zeros((gt_boxes.shape[0]))

        if boxes.shape[0] > 0:
            _gt_overlaps = overlaps.max(axis=0)
            #print('max_overlaps', _gt_overlaps, file=sys.stderr)
            for j in range(len(_gt_overlaps)):
                if _gt_overlaps[j] > config.TEST.IOU_THRESH:
                    continue
                print(j,
                      'failed',
                      gt_boxes[j],
                      'max_overlap:',
                      _gt_overlaps[j],
                      file=sys.stderr)

            # append recorded IoU coverage level
            found = (_gt_overlaps > config.TEST.IOU_THRESH).sum()
            _recall = found / float(gt_boxes.shape[0])
            print('recall',
                  _recall,
                  gt_boxes.shape[0],
                  boxes.shape[0],
                  gt_areas,
                  file=sys.stderr)
            overall[0] += found
            overall[1] += gt_boxes.shape[0]
            #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
            #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
            _recall = float(overall[0]) / overall[1]
            print('recall_all', _recall, file=sys.stderr)

        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
Ejemplo n.º 8
0
def test_proposals(predictor, test_data, imdb, roidb, vis=False):
    """
    Test detections results using RPN.
    :param predictor: Predictor
    :param test_data: data iterator, must be non-shuffled
    :param imdb: image database
    :param roidb: roidb 
    :param vis: controls visualization
    :return: recall, mAP
    """
    assert vis or not test_data.shuffle
    data_names = [k[0] for k in test_data.provide_data]

    #bbox_file = os.path.join(rpn_folder, imdb.name + '_bbox.txt')
    #bbox_f = open(bbox_file, 'w')

    i = 0
    t = time.time()
    output_folder = os.path.join(imdb.root_path, 'output')
    if not os.path.exists(output_folder):
        os.mkdir(output_folder)
    imdb_boxes = list()
    original_boxes = list()
    gt_overlaps = np.zeros(0)
    overall = [0.0, 0.0]
    gt_max = np.array((0.0, 0.0))
    num_pos = 0
    #apply scale, for SSH
    #_, roidb = image.get_image(roidb)
    for im_info, data_batch in test_data:
        t1 = time.time() - t
        t = time.time()

        oscale = im_info[0, 2]
        #print('scale', scale, file=sys.stderr)
        scale = 1.0  #fix scale=1.0 for SSH face detector
        scores, boxes, data_dict = im_proposal(predictor, data_batch,
                                               data_names, scale)
        #print(scores.shape, boxes.shape, file=sys.stderr)
        t2 = time.time() - t
        t = time.time()

        # assemble proposals
        dets = np.hstack((boxes, scores))
        original_boxes.append(dets)

        # filter proposals
        keep = np.where(dets[:, 4:] > config.TEST.SCORE_THRESH)[0]
        dets = dets[keep, :]
        imdb_boxes.append(dets)

        logger.info('generating %d/%d ' % (i + 1, imdb.num_images) +
                    'proposal %d ' % (dets.shape[0]) + 'data %.4fs net %.4fs' %
                    (t1, t2))

        #if dets.shape[0]==0:
        #  continue
        if vis:
            vis_all_detection(data_dict['data'].asnumpy(), [dets], ['obj'],
                              scale)
        boxes = dets
        #max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1)
        #gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0]
        #gt_boxes = roidb[i]['boxes'][gt_inds, :]
        gt_boxes = roidb[i]['boxes'].copy(
        ) * oscale  # as roidb is the original one, need to scale GT for SSH
        gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] -
                                                            gt_boxes[:, 1] + 1)
        num_pos += gt_boxes.shape[0]

        overlaps = bbox_overlaps(boxes.astype(np.float),
                                 gt_boxes.astype(np.float))
        #print(im_info, gt_boxes.shape, boxes.shape, overlaps.shape, file=sys.stderr)

        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        # choose whatever is smaller to iterate

        #for j in range(gt_boxes.shape[0]):
        #  print('gt %d,%d,%d,%d'% (gt_boxes[j][0], gt_boxes[j][1], gt_boxes[j][2]-gt_boxes[j][0], gt_boxes[j][3]-gt_boxes[j][1]), file=sys.stderr)
        #  gt_max = np.maximum( gt_max, np.array( (gt_boxes[j][2], gt_boxes[j][3]) ) )
        #print('gt max', gt_max, file=sys.stderr)
        #for j in range(boxes.shape[0]):
        #  print('anchor_box %.2f,%.2f,%.2f,%.2f'% (boxes[j][0], boxes[j][1], boxes[j][2]-boxes[j][0], boxes[j][3]-boxes[j][1]), file=sys.stderr)

        #rounds = min(boxes.shape[0], gt_boxes.shape[0])
        #for j in range(rounds):
        #    # find which proposal maximally covers each gt box
        #    argmax_overlaps = overlaps.argmax(axis=0)
        #    print(j, 'argmax_overlaps', argmax_overlaps, file=sys.stderr)
        #    # get the IoU amount of coverage for each gt box
        #    max_overlaps = overlaps.max(axis=0)
        #    print(j, 'max_overlaps', max_overlaps, file=sys.stderr)
        #    # find which gt box is covered by most IoU
        #    gt_ind = max_overlaps.argmax()
        #    gt_ovr = max_overlaps.max()
        #    assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps)
        #    # find the proposal box that covers the best covered gt box
        #    box_ind = argmax_overlaps[gt_ind]
        #    print('max box', gt_ind, box_ind, (boxes[box_ind][0], boxes[box_ind][1], boxes[box_ind][2]-boxes[box_ind][0], boxes[box_ind][3]-boxes[box_ind][1], boxes[box_ind][4]), file=sys.stderr)
        #    # record the IoU coverage of this gt box
        #    _gt_overlaps[j] = overlaps[box_ind, gt_ind]
        #    assert (_gt_overlaps[j] == gt_ovr)
        #    # mark the proposal box and the gt box as used
        #    overlaps[box_ind, :] = -1
        #    overlaps[:, gt_ind] = -1

        if boxes.shape[0] > 0:
            _gt_overlaps = overlaps.max(axis=0)
            #print('max_overlaps', _gt_overlaps, file=sys.stderr)
            for j in range(len(_gt_overlaps)):
                if _gt_overlaps[j] > config.TEST.IOU_THRESH:
                    continue
                print(j,
                      'failed',
                      gt_boxes[j],
                      'max_overlap:',
                      _gt_overlaps[j],
                      file=sys.stderr)
                #_idx = np.where(overlaps[:,j]>0.4)[0]
                #print(j, _idx, file=sys.stderr)
                #print(overlaps[_idx,j], file=sys.stderr)
                #for __idx in _idx:
                #  print(gt_boxes[j], boxes[__idx], overlaps[__idx,j], IOU(gt_boxes[j], boxes[__idx,0:4]), file=sys.stderr)

            # append recorded IoU coverage level
            found = (_gt_overlaps > config.TEST.IOU_THRESH).sum()
            _recall = found / float(gt_boxes.shape[0])
            print('recall',
                  _recall,
                  gt_boxes.shape[0],
                  boxes.shape[0],
                  gt_areas,
                  file=sys.stderr)
            overall[0] += found
            overall[1] += gt_boxes.shape[0]
            #gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))
            #_recall = (gt_overlaps >= threshold).sum() / float(num_pos)
            _recall = float(overall[0]) / overall[1]
            print('recall_all', _recall, file=sys.stderr)

        boxes[:, 0:4] /= oscale
        _vec = roidb[i]['image'].split('/')
        out_dir = os.path.join(output_folder, _vec[-2])
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        out_file = os.path.join(out_dir, _vec[-1].replace('jpg', 'txt'))
        with open(out_file, 'w') as f:
            name = '/'.join(roidb[i]['image'].split('/')[-2:])
            f.write("%s\n" % (name))
            f.write("%d\n" % (boxes.shape[0]))
            for b in range(boxes.shape[0]):
                box = boxes[b]
                f.write(
                    "%d %d %d %d %g \n" %
                    (box[0], box[1], box[2] - box[0], box[3] - box[1], box[4]))
        i += 1

    #bbox_f.close()
    return
    gt_overlaps = np.sort(gt_overlaps)
    recalls = np.zeros_like(thresholds)

    # compute recall for each IoU threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    ar = recalls.mean()

    # print results
    print('average recall for {}: {:.3f}'.format(area_name, ar))
    for threshold, recall in zip(thresholds, recalls):
        print('recall @{:.2f}: {:.3f}'.format(threshold, recall))

    assert len(imdb_boxes) == imdb.num_images, 'calculations not complete'

    # save results

    rpn_file = os.path.join(rpn_folder, imdb.name + '_rpn.pkl')
    with open(rpn_file, 'wb') as f:
        pickle.dump(imdb_boxes, f, pickle.HIGHEST_PROTOCOL)

    logger.info('wrote rpn proposals to %s' % rpn_file)
    return imdb_boxes
Ejemplo n.º 9
0
    def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None):
        """
        evaluate detection proposal recall metrics
        record max overlap value for each gt box; return vector of overlap values
        :param roidb: used to evaluate
        :param candidate_boxes: if not given, use roidb's non-gt boxes
        :param thresholds: array-like recall threshold
        :return: None
        ar: average recall, recalls: vector recalls at each IoU overlap threshold
        thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps
        """
        area_names = [
            'all', '0-25', '25-50', '50-100', '100-200', '200-300', '300-inf'
        ]
        area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2],
                       [50**2, 100**2], [100**2, 200**2], [200**2, 300**2],
                       [300**2, 1e5**2]]
        area_counts = []
        for area_name, area_range in zip(area_names[1:], area_ranges[1:]):
            area_count = 0
            for i in range(self.num_images):
                if candidate_boxes is None:
                    # default is use the non-gt boxes from roidb
                    non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0]
                    boxes = roidb[i]['boxes'][non_gt_inds, :]
                else:
                    boxes = candidate_boxes[i]
                boxes_areas = (boxes[:, 2] - boxes[:, 0] +
                               1) * (boxes[:, 3] - boxes[:, 1] + 1)
                valid_range_inds = np.where((boxes_areas >= area_range[0])
                                            & (boxes_areas < area_range[1]))[0]
                area_count += len(valid_range_inds)
            area_counts.append(area_count)
        total_counts = float(sum(area_counts))
        for area_name, area_count in zip(area_names[1:], area_counts):
            print('percentage of', area_name, area_count / total_counts)
        print('average number of proposal', total_counts / self.num_images)
        for area_name, area_range in zip(area_names, area_ranges):
            gt_overlaps = np.zeros(0)
            num_pos = 0
            for i in range(self.num_images):
                # check for max_overlaps == 1 avoids including crowd annotations
                max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1)
                gt_inds = np.where((roidb[i]['gt_classes'] > 0)
                                   & (max_gt_overlaps == 1))[0]
                gt_boxes = roidb[i]['boxes'][gt_inds, :]
                gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] +
                            1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
                valid_gt_inds = np.where((gt_areas >= area_range[0])
                                         & (gt_areas < area_range[1]))[0]
                gt_boxes = gt_boxes[valid_gt_inds, :]
                num_pos += len(valid_gt_inds)

                if candidate_boxes is None:
                    # default is use the non-gt boxes from roidb
                    non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0]
                    boxes = roidb[i]['boxes'][non_gt_inds, :]
                else:
                    boxes = candidate_boxes[i]
                if boxes.shape[0] == 0:
                    continue

                overlaps = bbox_overlaps(boxes.astype(np.float),
                                         gt_boxes.astype(np.float))

                _gt_overlaps = np.zeros((gt_boxes.shape[0]))
                # choose whatever is smaller to iterate
                rounds = min(boxes.shape[0], gt_boxes.shape[0])
                for j in range(rounds):
                    # find which proposal maximally covers each gt box
                    argmax_overlaps = overlaps.argmax(axis=0)
                    # get the IoU amount of coverage for each gt box
                    max_overlaps = overlaps.max(axis=0)
                    # find which gt box is covered by most IoU
                    gt_ind = max_overlaps.argmax()
                    gt_ovr = max_overlaps.max()
                    assert (gt_ovr >=
                            0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps)
                    # find the proposal box that covers the best covered gt box
                    box_ind = argmax_overlaps[gt_ind]
                    # record the IoU coverage of this gt box
                    _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                    assert (_gt_overlaps[j] == gt_ovr)
                    # mark the proposal box and the gt box as used
                    overlaps[box_ind, :] = -1
                    overlaps[:, gt_ind] = -1
                # append recorded IoU coverage level
                gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

            gt_overlaps = np.sort(gt_overlaps)
            if thresholds is None:
                step = 0.05
                thresholds = np.arange(0.5, 0.95 + 1e-5, step)
            recalls = np.zeros_like(thresholds)

            # compute recall for each IoU threshold
            for i, t in enumerate(thresholds):
                recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
            ar = recalls.mean()

            # print results
            print('average recall for {}: {:.3f}'.format(area_name, ar))
            for threshold, recall in zip(thresholds, recalls):
                print('recall @{:.2f}: {:.3f}'.format(threshold, recall))
Ejemplo n.º 10
0
def gpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4,
                    device_id=0):
    """
    A wrapper function, note we already know the class of boxes and masks
    """
    nms = gpu_nms_wrapper(nms_thresh, device_id)
    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    # inds array to record which mask should be aggregated together
    candidate_inds = []
    # weight for each element in the candidate inds
    candidate_weights = []
    # start position for candidate array
    candidate_start = []
    candidate_scores = []
    class_bar = [[] for _ in xrange(num_classes)]

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    # organize helper variable for gpu mask voting
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        for i in xrange(num_boxes):
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            candidate_inds.extend(cur_inds)
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            candidate_weights.extend(cur_weights)
            candidate_start.append(len(candidate_inds))
        candidate_scores.extend(t_scores[c])
        class_bar[c] = len(candidate_scores)

    candidate_inds = np.array(candidate_inds, dtype=np.int32)
    candidate_weights = np.array(candidate_weights, dtype=np.float32)
    candidate_start = np.array(candidate_start, dtype=np.int32)
    candidate_scores = np.array(candidate_scores, dtype=np.float32)

    # the input masks/boxes are relatively large
    # select only a subset of them are useful for mask merge
    unique_inds = np.unique(candidate_inds)
    unique_inds_order = unique_inds.argsort()
    unique_map = {}
    for i in xrange(len(unique_inds)):
        unique_map[unique_inds[i]] = unique_inds_order[i]
    for i in xrange(len(candidate_inds)):
        candidate_inds[i] = unique_map[candidate_inds[i]]
    boxes = boxes[unique_inds, ...]
    masks = masks[unique_inds, ...]

    boxes = np.round(boxes)
    result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds,
                                                 candidate_start,
                                                 candidate_weights,
                                                 binary_thresh, im_height,
                                                 im_width, device_id)
    result_box = np.hstack((result_box, candidate_scores[:, np.newaxis]))

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    cls_start = 0
    for i in xrange(1, num_classes):
        cls_end = class_bar[i]
        cls_box = result_box[cls_start:cls_end, :]
        cls_mask = result_mask[cls_start:cls_end, :]
        valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0])
                             & (cls_box[:, 3] > cls_box[:, 1]))[0]
        list_result_box[i] = cls_box[valid_ind, :]
        list_result_mask[i] = cls_mask[valid_ind, :]
        cls_start = cls_end

    return list_result_mask, list_result_box
Ejemplo n.º 11
0
def cpu_mask_voting(masks,
                    boxes,
                    scores,
                    num_classes,
                    max_per_image,
                    im_width,
                    im_height,
                    nms_thresh,
                    merge_thresh,
                    binary_thresh=0.4):
    """
    Wrapper function for mask voting, note we already know the class of boxes and masks
    """
    masks = masks.astype(np.float32)
    mask_size = masks.shape[-1]
    nms = py_nms_wrapper(nms_thresh)
    # apply nms and sort to get first images according to their scores

    # Intermediate results
    t_boxes = [[] for _ in xrange(num_classes)]
    t_scores = [[] for _ in xrange(num_classes)]
    t_all_scores = []
    for i in xrange(1, num_classes):
        dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1]))
        inds = nms(dets)
        num_keep = min(len(inds), max_per_image)
        inds = inds[:num_keep]
        t_boxes[i] = boxes[inds]
        t_scores[i] = scores[inds, i]
        t_all_scores.extend(scores[inds, i])

    sorted_scores = np.sort(t_all_scores)[::-1]
    num_keep = min(len(sorted_scores), max_per_image)
    thresh = max(sorted_scores[num_keep - 1], 1e-3)

    for i in xrange(1, num_classes):
        keep = np.where(t_scores[i] >= thresh)
        t_boxes[i] = t_boxes[i][keep]
        t_scores[i] = t_scores[i][keep]

    num_detect = boxes.shape[0]
    res_mask = [[] for _ in xrange(num_detect)]
    for i in xrange(num_detect):
        box = np.round(boxes[i]).astype(int)
        mask = cv2.resize(masks[i, 0].astype(np.float32),
                          (box[2] - box[0] + 1, box[3] - box[1] + 1))
        res_mask[i] = mask

    list_result_box = [[] for _ in xrange(num_classes)]
    list_result_mask = [[] for _ in xrange(num_classes)]
    for c in xrange(1, num_classes):
        num_boxes = len(t_boxes[c])
        masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size))
        boxes_ar = np.zeros((num_boxes, 4))
        for i in xrange(num_boxes):
            # Get weights according to their segmentation scores
            cur_ov = bbox_overlaps(boxes.astype(np.float),
                                   t_boxes[c][i, np.newaxis].astype(np.float))
            cur_inds = np.where(cur_ov >= merge_thresh)[0]
            cur_weights = scores[cur_inds, c]
            cur_weights = cur_weights / sum(cur_weights)
            # Re-format mask when passing it to mask_aggregation
            p_mask = [res_mask[j] for j in list(cur_inds)]
            # do mask aggregation
            orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask,
                                                      cur_weights, im_width,
                                                      im_height, binary_thresh)
            masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32),
                                        (mask_size, mask_size))
        boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis]))
        list_result_box[c] = boxes_scored_ar
        list_result_mask[c] = masks_ar
    return list_result_mask, list_result_box
Ejemplo n.º 12
0
def get_rois(rois,
             rois_per_image,
             num_classes,
             labels=None,
             overlaps=None,
             bbox_targets=None,
             gt_boxes=None):
    """
    get top N ROIs, used in online hard example mining
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (rois, labels, bbox_targets, bbox_weights)
    """
    if labels is None:
        if len(gt_boxes) == 0:
            gt_boxes = np.array([[1, 1, 1, 1, 0]])
        overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                 gt_boxes[:, :4].astype(np.float))
        gt_assignment = overlaps.argmax(axis=1)
        overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]

    # select indices
    keep_indexes = np.arange(rois.shape[0])
    if keep_indexes.shape[0] > rois_per_image:
        keep_indexes = npr.choice(keep_indexes,
                                  size=rois_per_image,
                                  replace=False)

    # if not enough, pad until rois_per_image is satisfied
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(rois_per_image - keep_indexes.shape[0], len(rois))
        gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, gap_indexes)

    # suppress any bg defined by overlap
    bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI)
                          & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
    labels[bg_indexes] = 0

    labels = labels[keep_indexes]
    rois = rois[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:],
                                 gt_boxes[gt_assignment[keep_indexes], :4])
        if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) /
                       np.array(config.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes)

    return rois, labels, bbox_targets, bbox_weights
Ejemplo n.º 13
0
def sample_rois_fpn(rois,
                    assign_levels,
                    fg_rois_per_image,
                    rois_per_image,
                    num_classes,
                    labels=None,
                    overlaps=None,
                    bbox_targets=None,
                    mask_targets=None,
                    mask_labels=None,
                    mask_inds=None,
                    gt_boxes=None,
                    im_info=None):
    """
    generate random sample of ROIs comprising foreground and background examples
    :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index
    :param assign_levels: [n]
    :param fg_rois_per_image: foreground roi number
    :param rois_per_image: total roi number
    :param num_classes: number of classes
    :param labels: maybe precomputed
    :param overlaps: maybe precomputed (max_overlaps)
    :param bbox_targets: maybe precomputed
    :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls)
    :return: (rois, labels, bbox_targets, bbox_weights)
    """
    DEBUG = False
    if labels is None:
        if len(gt_boxes) == 0:
            gt_boxes = np.zeros((1, 5))
            gt_assignment = np.zeros((len(rois), ), dtype=np.int32)
            overlaps = np.zeros((len(rois), ))
            labels = np.zeros((len(rois), ))
        else:
            overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                     gt_boxes[:, :4].astype(np.float))
            gt_assignment = overlaps.argmax(axis=1)
            overlaps = overlaps.max(axis=1)
            labels = gt_boxes[gt_assignment, 4]

    num_rois = rois.shape[0]
    # foreground RoI with FG_THRESH overlap
    fg_indexes = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
    # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
    fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)

    if DEBUG:
        print('fg total num:', len(fg_indexes))

    # Sample foreground regions without replacement
    if len(fg_indexes) > fg_rois_per_this_image:
        fg_indexes = npr.choice(fg_indexes,
                                size=fg_rois_per_this_image,
                                replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI)
                          & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
    if DEBUG:
        print('bg total num:', len(bg_indexes))
    # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
                                        bg_indexes.size)
    # Sample foreground regions without replacement
    if len(bg_indexes) > bg_rois_per_this_image:
        bg_indexes = npr.choice(bg_indexes,
                                size=bg_rois_per_this_image,
                                replace=False)
    if DEBUG:
        print('fg num:', len(fg_indexes))
        print('bg num:', len(bg_indexes))

    # bg rois statistics
    if DEBUG:
        bg_assign = assign_levels[bg_indexes]
        bg_rois_on_levels = dict()
        for i, s in enumerate(config.RCNN_FEAT_STRIDE):
            bg_rois_on_levels.update(
                {'stride%s' % s: len(np.where(bg_assign == s)[0])})
        print(bg_rois_on_levels)

    # indexes selected
    keep_indexes = np.append(fg_indexes, bg_indexes)

    neg_idx = np.where(overlaps < config.TRAIN.FG_THRESH)[0]
    neg_rois = rois[neg_idx]

    # pad more to ensure a fixed minibatch size
    while keep_indexes.shape[0] < rois_per_image:
        gap = np.minimum(len(neg_rois), rois_per_image - keep_indexes.shape[0])
        gap_indexes = npr.choice(range(len(neg_rois)), size=gap, replace=False)
        keep_indexes = np.append(keep_indexes, neg_idx[gap_indexes])

    # select labels
    labels = labels[keep_indexes]
    # set labels of bg_rois to be 0
    labels[fg_rois_per_this_image:] = 0
    rois = rois[keep_indexes]
    assign_levels = assign_levels[keep_indexes]

    if mask_targets is not None:
        assert mask_labels is not None
        assert mask_inds is not None

        def _mask_umap(mask_targets, mask_labels, mask_inds):
            _mask_targets = np.zeros((num_rois, num_classes, 28, 28),
                                     dtype=np.int8)
            _mask_weights = np.zeros((num_rois, num_classes, 1, 1),
                                     dtype=np.int8)
            _mask_targets[mask_inds, mask_labels] = mask_targets
            _mask_weights[mask_inds, mask_labels] = 1
            return _mask_targets, _mask_weights  # [num_rois, num_classes, 28, 28]

        mask_targets, mask_weights = _mask_umap(mask_targets, mask_labels,
                                                mask_inds)
        mask_targets = mask_targets[keep_indexes]
        mask_weights = mask_weights[keep_indexes]

    # load or compute bbox_target
    if bbox_targets is not None:
        bbox_target_data = bbox_targets[keep_indexes, :]
    else:
        targets = bbox_transform(rois[:, 1:],
                                 gt_boxes[gt_assignment[keep_indexes], :4])
        if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
            targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) /
                       np.array(config.TRAIN.BBOX_STDS))
        bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

    bbox_targets, bbox_weights = \
        expand_bbox_regression_targets(bbox_target_data, num_classes)

    # Assign to levels
    rois_on_levels = dict()
    labels_on_levels = dict()
    bbox_targets_on_levels = dict()
    bbox_weights_on_levels = dict()
    if mask_targets is not None:
        mask_targets_on_levels = dict()
        mask_weights_on_levels = dict()
    for i, s in enumerate(config.RCNN_FEAT_STRIDE):
        index = np.where(assign_levels == s)
        _rois = rois[index]
        _labels = labels[index]
        _bbox_targets = bbox_targets[index]
        _bbox_weights = bbox_weights[index]
        if mask_targets is not None:
            _mask_targets = mask_targets[index]
            _mask_weights = mask_weights[index]

        rois_on_levels.update({'stride%s' % s: _rois})
        labels_on_levels.update({'stride%s' % s: _labels})
        bbox_targets_on_levels.update({'stride%s' % s: _bbox_targets})
        bbox_weights_on_levels.update({'stride%s' % s: _bbox_weights})
        if mask_targets is not None:
            mask_targets_on_levels.update({'stride%s' % s: _mask_targets})
            mask_weights_on_levels.update({'stride%s' % s: _mask_weights})

    if mask_targets is not None:
        return rois_on_levels, labels_on_levels, bbox_targets_on_levels, bbox_weights_on_levels, mask_targets_on_levels, mask_weights_on_levels
    else:
        return rois_on_levels, labels_on_levels, bbox_targets_on_levels, bbox_weights_on_levels
Ejemplo n.º 14
0
    def sample_rois(self, rois, fg_rois_per_image, rois_per_image, num_classes,
                    labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_masks=None):
        if labels is None:
            overlaps = bbox_overlaps(rois[:, 1:].astype(np.float),
                                     gt_boxes[:, :4].astype(np.float))
            gt_assignment = overlaps.argmax(axis=1)
            overlaps = overlaps.max(axis=1)
            labels = gt_boxes[gt_assignment, 4]

        # foreground RoI with FG_THRESH overlap
        fg_indexes = np.where(overlaps >= config.TRAIN.FG_THRESH)[0]
        if config.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[fg_indexes, 1:])
            fg_indexes = fg_indexes[keep_inds]

        # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs
        fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size)
        # Sample foreground regions without replacement
        if len(fg_indexes) > fg_rois_per_this_image:
            fg_indexes = np.random.choice(fg_indexes, size=fg_rois_per_this_image, replace=False)

        # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
        bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0]
        if config.TRAIN.IGNORE_GAP:
            keep_inds = remove_repetition(rois[bg_indexes, 1:])
            bg_indexes = bg_indexes[keep_inds]

        # Compute number of background RoIs to take from this image (guarding against there being fewer than desired)
        bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
        bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size)
        # Sample foreground regions without replacement
        if len(bg_indexes) > bg_rois_per_this_image:
            bg_indexes = np.random.choice(bg_indexes, size=bg_rois_per_this_image, replace=False)

        # indexes selected
        keep_indexes = np.append(fg_indexes, bg_indexes)

        # pad more to ensure a fixed minibatch size
        while keep_indexes.shape[0] < rois_per_image:
            gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0])
            if config.TRAIN.GAP_SELECT_FROM_ALL:
                gap_indexes = np.random.choice(range(len(rois)), size=gap, replace=False)
            else:
                bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes))
                gap_indexes = np.random.choice(bg_full_indexes, size=gap, replace=False)
            keep_indexes = np.append(keep_indexes, gap_indexes)

        # select labels
        labels = labels[keep_indexes]
        # set labels of bg_rois to be 0
        labels[fg_rois_per_this_image:] = 0
        rois = rois[keep_indexes]

        # load or compute bbox target
        if bbox_targets is not None:
            bbox_target_data = bbox_targets[keep_indexes, :]
        else:
            targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4])
            if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED:
                targets = ((targets - np.array(config.TRAIN.BBOX_MEANS))
                           / np.array(config.TRAIN.BBOX_STDS))
            bbox_target_data = np.hstack((labels[:, np.newaxis], targets))

        bbox_targets, bbox_weights = \
            expand_bbox_regression_targets(bbox_target_data, num_classes)

        if config.TRAIN.IGNORE_GAP:
            valid_rois_per_this_image = fg_rois_per_this_image+bg_rois_per_this_image
            labels[valid_rois_per_this_image:] = -1
            bbox_weights[valid_rois_per_this_image:] = 0

        # masks
        # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg')
        mask_reg_targets = -np.ones((len(keep_indexes), 1, self._mask_size, self._mask_size))
        for idx, obj in enumerate(fg_indexes):
            gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int)
            ex_roi = np.round(rois[idx, 1:]).astype(int)
            gt_mask = gt_masks[gt_assignment[obj]]
            mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask)
            mask_reg_target = cv2.resize(mask_reg_target.astype(np.float), (self._mask_size, self._mask_size))
            mask_reg_target = mask_reg_target >= self._binary_thresh
            mask_reg_targets[idx, ...] = mask_reg_target

        return rois, labels, bbox_targets, bbox_weights, mask_reg_targets