Example #1
0
def rel_assignments_gt_boxes(roidb, im_inds):

    fg_rels = []
    num_img = len(roidb)
    is_cand = (im_inds[:, None] == im_inds[None])
    is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False
    for i in range(num_img):
        gt_boxes_i = roidb[i]['boxes']
        sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes']
        obj_gt_boxes_i = roidb[i]['obj_gt_boxes']
        prd_gt_classes_i = roidb[i]['prd_gt_classes']
        if cfg.MODEL.USE_BG:
            prd_gt_classes_i += 1

        sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i,
                                                gt_boxes_i).argmax(-1)
        obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i,
                                                gt_boxes_i).argmax(-1)
        im_id_i = np.ones_like(sbj_gt_inds_i) * i
        gt_rels_i = np.stack(
            (im_id_i, sbj_gt_inds_i, obj_gt_inds_i, prd_gt_classes_i), -1)
        fg_rels.append(gt_rels_i)

    fg_rels = np.concatenate(fg_rels, 0)
    offset = {}
    for i, s, e in enumerate_by_image(im_inds):
        offset[i] = s
    for i, s, e in enumerate_by_image(fg_rels[:, 0]):
        fg_rels[s:e, 1:3] += offset[i]

    is_cand[fg_rels[:, 1], fg_rels[:, 2]] = False

    num_fg = min(fg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM * num_img))
    if fg_rels.shape[0] > num_fg:
        fg_ind = np.random.choice(fg_rels.shape[0], num_fg, replace=False)

        fg_rels = fg_rels[fg_ind]

    sbj_bg_inds, obj_bg_inds = np.where(is_cand)
    bg_rels = np.stack((im_inds[sbj_bg_inds].astype(sbj_bg_inds.dtype), sbj_bg_inds, \
                        obj_bg_inds, np.zeros_like(sbj_bg_inds)), -1)

    num_bg = min(
        bg_rels.shape[0],
        int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION *
            num_img - num_fg))
    if num_bg > 0:

        if bg_rels.shape[0] > num_bg:
            bg_ind = np.random.choice(bg_rels.shape[0], num_bg, replace=False)

            bg_rels = bg_rels[bg_ind]

        rel_labels = np.concatenate((fg_rels, bg_rels), 0)
    else:
        rel_labels = fg_rels

    return rel_labels[:, :-1], rel_labels
def _compute_pred_matches(gt_triplets,
                          pred_triplets,
                          gt_boxes,
                          pred_boxes,
                          iou_thresh=0.5,
                          phrdet=False):
    """
    Given a set of predicted triplets, return the list of matching GT's for each of the
    given predictions
    :param gt_triplets: 
    :param pred_triplets: 
    :param gt_boxes: 
    :param pred_boxes: 
    :param iou_thresh: Do y
    :return: 
    """
    # This performs a matrix multiplication-esque thing between the two arrays
    # Instead of summing, we want the equality, so we reduce in that way
    # The rows correspond to GT triplets, columns to pred triplets
    keeps = intersect_2d(gt_triplets, pred_triplets)
    gt_has_match = keeps.any(1)
    pred_to_gt = [[] for x in range(pred_boxes.shape[0])]
    for gt_ind, gt_box, keep_inds in zip(
            np.where(gt_has_match)[0],
            gt_boxes[gt_has_match],
            keeps[gt_has_match],
    ):
        boxes = pred_boxes[keep_inds]
        if phrdet:
            # Evaluate where the union box > 0.5
            gt_box_union = gt_box.reshape((2, 4))
            gt_box_union = np.concatenate(
                (gt_box_union.min(0)[:2], gt_box_union.max(0)[2:]), 0)

            box_union = boxes.reshape((-1, 2, 4))
            box_union = np.concatenate(
                (box_union.min(1)[:, :2], box_union.max(1)[:, 2:]), 1)

            gt_box_union = gt_box_union.astype(dtype=np.float32, copy=False)
            box_union = box_union.astype(dtype=np.float32, copy=False)
            inds = bbox_overlaps(gt_box_union[None],
                                 box_union=box_union)[0] >= iou_thresh

        else:
            gt_box = gt_box.astype(dtype=np.float32, copy=False)
            boxes = boxes.astype(dtype=np.float32, copy=False)
            sub_iou = bbox_overlaps(gt_box[None, :4], boxes[:, :4])[0]
            obj_iou = bbox_overlaps(gt_box[None, 4:], boxes[:, 4:])[0]

            inds = (sub_iou >= iou_thresh) & (obj_iou >= iou_thresh)

        for i in np.where(keep_inds)[0][inds]:
            pred_to_gt[i].append(int(gt_ind))
    return pred_to_gt
def get_importance_factor(select_rois, sbj_gt_boxes, obj_gt_boxes, im_info):
    select_boxes = select_rois[:, 1:] / im_info[0, 2].data.cpu().numpy()
    sbj_count = (box_utils.bbox_overlaps(select_boxes, sbj_gt_boxes) >
                 0.5).astype(np.float32).sum(-1)
    obj_count = (box_utils.bbox_overlaps(select_boxes, obj_gt_boxes) >
                 0.5).astype(np.float32).sum(-1)
    pair_count = sbj_count + obj_count
    theta = pair_count / np.maximum(pair_count.sum(), 1e-12)
    gamma = np.minimum(2.0, -((1 - 2 * theta)**5) * np.log(2 * theta))

    return gamma
Example #4
0
def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    # for each of the images, merge the proposals
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]),
                               dtype=entry['gt_overlaps'].dtype)
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype)

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(entry['boxes'],
                                   boxes.astype(entry['boxes'].dtype,
                                                copy=False),
                                   axis=0)
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype))
        # hmm i do not think it matters here
        # max_attr_per_ins = entry['gt_attributes'].shape[1]
        # entry['gt_attributes'] = np.append(
        #     entry['gt_attributes'],
        #     np.zeros((num_boxes, max_attr_per_ins), dtype=entry['gt_attributes'].dtype),
        #     axis=0
        # )
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype))
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype))
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype,
                                     copy=False))
Example #5
0
def _build_graph(boxes, iou_threshold):
    """Build graph based on box IoU"""
    overlaps = box_utils.bbox_overlaps(
        boxes.astype(dtype=np.float32, copy=False),
        boxes.astype(dtype=np.float32, copy=False))

    return (overlaps > iou_threshold).astype(np.float32)
Example #6
0
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds, 0] = (
        1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds, 1:] = box_utils.bbox_transform_inv(
        ex_rois, gt_rois, cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
Example #7
0
 def _get_gt_bboxes_overlaps(self, entry):
     gt_boxes = entry['boxes']
     gt_to_gt_overlaps = box_utils.bbox_overlaps(
         gt_boxes.astype(dtype=np.float32, copy=False),
         gt_boxes.astype(dtype=np.float32, copy=False)
     )
     return gt_to_gt_overlaps
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds,
            0] = (1 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else labels[ex_inds])
    targets[ex_inds,
            1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                               cfg.MODEL.BBOX_REG_WEIGHTS)
    return targets
Example #9
0
    def get_rel_inds(self, det_rois, det_labels, roidb, im_info):

        num_img = int(det_rois[:, 0].max()) + 1

        im_inds = det_rois[:, 0].astype(np.int64)

        # TODO: Not support sgdet mode training yet.

        if self.training:

            
            return relpn_heads.rel_assignments(im_inds, det_rois, det_labels, roidb, im_info, \
                                            num_sample_per_gt=1, filter_non_overlap=True)

        else:
            if cfg.TRAIN.GT_BOXES:
                fg_rels = []
                is_cand = (im_inds[:, None] == im_inds[None])
                is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False
                for i in range(num_img):
                    gt_boxes_i = roidb[i]['boxes']
                    sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes']
                    obj_gt_boxes_i = roidb[i]['obj_gt_boxes']

                    sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i, gt_boxes_i).argmax(-1)
                    obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i, gt_boxes_i).argmax(-1)
                    im_id_i = np.ones_like(sbj_gt_inds_i) * i
                    gt_rels_i = np.stack((im_id_i, sbj_gt_inds_i, obj_gt_inds_i), -1)
                    fg_rels.append(gt_rels_i)

                rel_inds = np.concatenate(fg_rels, 0)
                
            else:

                is_cand = (im_inds[:, None] == im_inds[None])
                is_cand[np.arange(im_inds.shape[0]), np.arange(im_inds.shape[0])] = False

                is_cand = (box_utils.bbox_overlaps(det_rois[:, 1:], det_rois[:, 1:]) > 0) & is_cand
                    # raise FError('not support this mode!')

                sbj_ind, obj_ind = np.where(is_cand)
                if len(sbj_ind) == 0:
                    sbj_ind, obj_ind = np.zeros(1, dtype=np.int64), np.zeros(1, dtype=np.int64)
                rel_inds = np.stack((det_rois[sbj_ind, 0].astype(sbj_ind.dtype), sbj_ind, obj_ind), -1)

            return rel_inds, None
Example #10
0
def predbox_roi_iou(raw_roi, pred_box):
    if raw_roi.size == 0:
        raw_roi = np.zeros((1, 4), dtype="float32")
    if pred_box.size == 0:
        pred_box = np.zeros((1, 4), dtype="float32")

    iou = box_utils.bbox_overlaps(raw_roi, pred_box)
    roi_iou = iou.max(axis=1)
    return roi_iou
Example #11
0
def rel_samples(det_rois, edge_inds, im_info, roidb):
    num_img = int(det_rois[:, 0].max() + 1)
    edge_indices_sets = [
        np.where(edge_inds[:, 0] == i)[0] for i in range(num_img)
    ]
    fg_rels = []
    bg_rels = []
    for i, edge_indices in enumerate(edge_indices_sets):
        edge_inds_i = edge_inds[edge_indices]
        sbj_gt_rois_i = roidb[i]['sbj_gt_boxes'] * im_info[
            i, 2].data.cpu().numpy()
        obj_gt_rois_i = roidb[i]['obj_gt_boxes'] * im_info[
            i, 2].data.cpu().numpy()
        prd_gt_classes_i = roidb[i]['prd_gt_classes']
        if cfg.MODEL.USE_BG:
            prd_gt_classes_i += 1
        min_ious_i = np.minimum(box_utils.bbox_overlaps(det_rois[edge_inds_i[:, 1]][:, 1:], sbj_gt_rois_i) , \
                              box_utils.bbox_overlaps(det_rois[edge_inds_i[:, 2]][:, 1:], obj_gt_rois_i))

        edge_rels_i = np.pad(edge_inds_i, ((0, 0), (0, 1)), 'constant')
        fg_inds_i = np.where(min_ious_i.max(-1) >= cfg.TRAIN.FG_THRESH)[0]
        edge_rels_i[fg_inds_i,
                    -1] = prd_gt_classes_i[min_ious_i.argmax(-1)[fg_inds_i]]
        fg_rels.append(edge_rels_i[edge_rels_i[:, -1] > 0])
        bg_rels.append(edge_rels_i[edge_rels_i[:, -1] == 0])
    fg_rels = np.concatenate(fg_rels, 0)
    bg_rels = np.concatenate(bg_rels, 0)
    num_fg = min(fg_rels.shape[0], int(cfg.TRAIN.FG_REL_SIZE_PER_IM * num_img))
    num_bg = min(
        bg_rels.shape[0],
        int(cfg.TRAIN.FG_REL_SIZE_PER_IM / cfg.TRAIN.FG_REL_FRACTION *
            num_img - num_fg))
    if fg_rels.shape[0] > num_fg:
        fg_ind = npr.choice(fg_rels.shape[0], num_fg, replace=False)
        fg_rels = fg_rels[fg_ind]
    if num_bg > 0:
        if bg_rels.shape[0] > num_bg:
            bg_ind = npr.choice(bg_rels.shape[0], num_bg, replace=False)
            bg_rels = bg_rels[bg_ind]

        rel_labels = np.concatenate((fg_rels, bg_rels), 0)
    else:
        rel_labels = fg_rels
    return rel_labels
Example #12
0
def stats_calculator(all_proposals, gt_i):
    iou_mat = box_utils.bbox_overlaps(all_proposals, gt_i)
    max_inds = np.argmax(iou_mat, axis=1)
    max_element = np.max(iou_mat, axis=1)
    thrsh_inds = np.where(max_element >= 0)
    max_inds = max_inds[thrsh_inds]
    all_proposals = all_proposals[thrsh_inds]

    # IOU(Intersection Over Union)
    max_element = max_element[thrsh_inds]

    center_point_distance = []
    iou_over_gt = []

    for ind, item in enumerate(all_proposals):
        # item是迭代中的pp, matched_gt是迭代中的ground_truth
        matched_gt_ind = max_inds[int(ind)]
        matched_gt = gt_i[matched_gt_ind]
        matched_gt_width = matched_gt[2] - matched_gt[0] + 1
        matched_gt_height = matched_gt[3] - matched_gt[1] + 1

        gt_center_point = (matched_gt_width / 2, matched_gt_height / 2)

        pp_width = item[2] - item[0] + 1
        pp_height = item[3] - item[1] + 1

        pp_center_point = (pp_width / 2, pp_height / 2)

        distance = np.sqrt(
            np.square(gt_center_point[0] - pp_center_point[0]) +
            np.square(gt_center_point[1] - pp_center_point[1]))

        # DoC: Distance of Centers(normalized)
        dis_width = matched_gt_width / 2 + pp_width / 2
        dis_height = matched_gt_height / 2 + pp_height / 2

        distance = distance / np.sqrt(
            np.square(dis_width) + np.square(dis_height))

        # 计算intersect面积
        center_point_distance.append(distance)
        iw = min(item[2], matched_gt[2]) - max(item[0], matched_gt[0]) + 1
        intersect = 0
        if iw > 0:
            ih = min(item[3], matched_gt[3]) - max(item[1], matched_gt[1]) + 1
            if ih > 0:
                intersect = iw * ih

        gt_area = matched_gt_height * matched_gt_width
        assert gt_area > 0
        # Intersection Over GT
        iou_over_gt.append(intersect / gt_area)

    center_point_distance = np.array(center_point_distance, dtype=np.float32)
    iou_over_gt = np.array(iou_over_gt, dtype=np.float32)
    return max_element, center_point_distance, iou_over_gt
Example #13
0
def _merge_compute_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)

    for i, entry in enumerate(roidb):
        boxes = box_list[i]  # gt + det
        #print('len boxes:', len(boxes))
        num_boxes = boxes.shape[0]

        gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]),
                               dtype=entry['gt_overlaps'].dtype)
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype)

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            # import ipdb; ipdb.set_trace()
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        # import ipdb; ipdb.set_trace()
        entry['boxes'] = boxes.astype(entry['boxes'].dtype, copy=False)
        entry['box_to_gt_ind_map'] = box_to_gt_ind_map.astype(
            entry['box_to_gt_ind_map'].dtype, copy=False)

        gt_to_classes = -np.ones(len(entry['box_to_gt_ind_map']))
        matched_ids = np.where(entry['box_to_gt_ind_map'] > -1)[0]
        gt_to_classes[matched_ids] = entry['gt_classes'][
            entry['box_to_gt_ind_map'][matched_ids]]
        entry['gt_classes'] = gt_to_classes

        entry['seg_areas'] = np.zeros((num_boxes),
                                      dtype=entry['seg_areas'].dtype)
        entry['gt_overlaps'] = gt_overlaps
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])

        is_to_crowd = np.ones(len(entry['box_to_gt_ind_map']))
        is_to_crowd[matched_ids] = entry['is_crowd'][entry['box_to_gt_ind_map']
                                                     [matched_ids]]
        entry['is_crowd'] = is_to_crowd
Example #14
0
    def _sample_pairs(self, det_rois, edge_inds, im_info, roidb):
        sbj_gt_rois = roidb['sbj_gt_boxes'] * im_info[2].data.cpu().numpy()
        obj_gt_rois = roidb['obj_gt_boxes'] * im_info[2].data.cpu().numpy()
        p_ious = (box_utils.bbox_overlaps(det_rois[edge_inds[:, 1]][:, 1:], sbj_gt_rois) * \
                  box_utils.bbox_overlaps(det_rois[edge_inds[:, 2]][:, 1:], obj_gt_rois)).max(-1)

        fg_inds = np.where(p_ious >= cfg.TRAIN.PRUNE_PAIRS_POSTIVE_OVERLAP)[0]
        bg_inds = np.where(p_ious < cfg.TRAIN.PRUNE_PAIRS_NEGATIVE_OVERLAP)[0]
        num_fg = min(
            fg_inds.shape[0], cfg.TRAIN.PRUNE_PAIRS_FG_FRACTION *
            cfg.TRAIN.PRUNE_PAIRS_BATCHSIZE)
        num_bg = min(bg_inds.shape[0],
                     cfg.TRAIN.PRUNE_PAIRS_BATCHSIZE - num_fg)
        if fg_inds.shape[0] > num_fg:
            fg_inds = npr.choice(fg_inds, size=int(num_fg), replace=False)
        if bg_inds.shape[0] > num_bg:
            bg_inds = npr.choice(bg_inds, size=int(num_bg), replace=False)
        labels = np.concatenate(
            (np.ones_like(fg_inds), np.zeros_like(bg_inds)), 0)
        keep_inds = np.concatenate((fg_inds, bg_inds), 0)
        return keep_inds, labels
 def _do_test(b1, b2):
     # Compute IoU overlap with the cython implementation
     cython_iou = box_utils.bbox_overlaps(b1, b2)
     # Compute IoU overlap with the COCO API implementation
     # (requires converting boxes from xyxy to xywh format)
     xywh_b1 = box_utils.xyxy_to_xywh(b1)
     xywh_b2 = box_utils.xyxy_to_xywh(b2)
     not_crowd = [int(False)] * b2.shape[0]
     coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
     # IoUs should be similar
     np.testing.assert_array_almost_equal(cython_iou,
                                          coco_ious,
                                          decimal=5)
 def _do_test(b1, b2):
     # Compute IoU overlap with the cython implementation
     cython_iou = box_utils.bbox_overlaps(b1, b2)
     # Compute IoU overlap with the COCO API implementation
     # (requires converting boxes from xyxy to xywh format)
     xywh_b1 = box_utils.xyxy_to_xywh(b1)
     xywh_b2 = box_utils.xyxy_to_xywh(b2)
     not_crowd = [int(False)] * b2.shape[0]
     coco_ious = COCOmask.iou(xywh_b1, xywh_b2, not_crowd)
     # IoUs should be similar
     np.testing.assert_array_almost_equal(
         cython_iou, coco_ious, decimal=5
     )
Example #17
0
def box_filter(boxes, must_overlap=False):
    """ Only include boxes that overlap as possible relations. 
    If no overlapping boxes, use all of them."""
    n_cands = boxes.shape[0]

    overlaps = box_utils.bbox_overlaps(boxes.astype(np.float32), boxes.astype(np.float32)) > 0
    np.fill_diagonal(overlaps, 0)

    all_possib = np.ones_like(overlaps, dtype=np.bool)
    np.fill_diagonal(all_possib, 0)

    if must_overlap:
        possible_boxes = np.column_stack(np.where(overlaps))

        if possible_boxes.size == 0:
            possible_boxes = np.column_stack(np.where(all_possib))
    else:
        possible_boxes = np.column_stack(np.where(all_possib))
    return possible_boxes
Example #18
0
def get_proposal_clusters(all_rois, proposals, im_labels):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = box_utils.bbox_overlaps(
        all_rois.astype(dtype=np.float32, copy=False),
        gt_boxes.astype(dtype=np.float32, copy=False))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]

    # Select background RoIs as those with < FG_THRESH overlap
    bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]

    ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0]
    cls_loss_weights[ig_inds] = 0.0

    labels[bg_inds] = 0

    if cfg.MODEL.WITH_FRCNN:
        bbox_targets = _compute_targets(all_rois, gt_boxes[gt_assignment, :],
                                        labels)
        bbox_targets, bbox_inside_weights = _expand_bbox_targets(bbox_targets)
        bbox_outside_weights = np.array(
            bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype) \
            * cls_loss_weights.reshape(-1, 1)
    else:
        bbox_targets, bbox_inside_weights, bbox_outside_weights = np.array(
            [0]), np.array([0]), np.array([0])

    gt_assignment[bg_inds] = -1

    return labels, cls_loss_weights, gt_assignment, bbox_targets, bbox_inside_weights, bbox_outside_weights
Example #19
0
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = box_utils.bbox_overlaps(
        all_rois.astype(dtype=np.float32, copy=False),
        gt_boxes.astype(dtype=np.float32, copy=False))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]

    # Select background RoIs as those with < FG_THRESH overlap
    bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]

    ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0]
    cls_loss_weights[ig_inds] = 0.0

    labels[bg_inds] = 0
    gt_assignment[bg_inds] = -1

    img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32)
    pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32)
    pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32)

    for i in xrange(gt_boxes.shape[0]):
        po_index = np.where(gt_assignment == i)[0]
        img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index])
        pc_labels[i] = gt_labels[i, 0]
        pc_count[i] = len(po_index)
        pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]])

    return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
Example #20
0
    def _calculate_gt_bbox_overlaps(self, entry, threshold, is_same_cls=True):
        """ Calculate the overlap between gt bbox, will be used for
            merging significantly overlaped bbox to one single bbox
        """
        gt_boxes = entry['boxes']
        segms = entry['segms']
        gt_classes = entry['gt_classes']
        num_valid_objs = gt_boxes.shape[0]
        gt_to_gt_overlaps = box_utils.bbox_overlaps(
                gt_boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False)
        )
        # keep the up-triangle matrix
        gt_to_gt_overlaps = np.triu(gt_to_gt_overlaps, k=1)
        # only cares for overlap in the same class
        if is_same_cls:
            mask = ((gt_classes[:, np.newaxis] - gt_classes[np.newaxis, :]) == 0)
            gt_to_gt_overlaps = gt_to_gt_overlaps[mask]
        num_overlap = np.sum(gt_to_gt_overlaps > threshold)

        return num_overlap
Example #21
0
def _merge_paired_boxes_into_roidb(roidb, sbj_box_list, obj_box_list):
    assert len(sbj_box_list) == len(obj_box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        sbj_boxes = sbj_box_list[i]
        obj_boxes = obj_box_list[i]
        assert sbj_boxes.shape[0] == obj_boxes.shape[0]
        num_pairs = sbj_boxes.shape[0]
        sbj_gt_overlaps = np.zeros(
            (num_pairs, entry['sbj_gt_overlaps'].shape[1]),
            dtype=entry['sbj_gt_overlaps'].dtype
        )
        obj_gt_overlaps = np.zeros(
            (num_pairs, entry['obj_gt_overlaps'].shape[1]),
            dtype=entry['obj_gt_overlaps'].dtype
        )
        prd_gt_overlaps = np.zeros(
            (num_pairs, entry['prd_gt_overlaps'].shape[1]),
            dtype=entry['prd_gt_overlaps'].dtype
        )
        pair_to_gt_ind_map = -np.ones(
            (num_pairs), dtype=entry['pair_to_gt_ind_map'].dtype
        )
        
        pair_gt_inds = np.arange(entry['prd_gt_classes'].shape[0])
        if len(pair_gt_inds) > 0:
            sbj_gt_boxes = entry['sbj_gt_boxes'][pair_gt_inds, :]
            sbj_gt_classes = entry['sbj_gt_classes'][pair_gt_inds]
            obj_gt_boxes = entry['obj_gt_boxes'][pair_gt_inds, :]
            obj_gt_classes = entry['obj_gt_classes'][pair_gt_inds]
            prd_gt_classes = entry['prd_gt_classes'][pair_gt_inds]
            sbj_to_gt_overlaps = box_utils.bbox_overlaps(
                sbj_boxes.astype(dtype=np.float32, copy=False),
                sbj_gt_boxes.astype(dtype=np.float32, copy=False)
            )
            obj_to_gt_overlaps = box_utils.bbox_overlaps(
                obj_boxes.astype(dtype=np.float32, copy=False),
                obj_gt_boxes.astype(dtype=np.float32, copy=False)
            )
            pair_to_gt_overlaps = np.minimum(sbj_to_gt_overlaps, obj_to_gt_overlaps)
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            sbj_argmaxes = sbj_to_gt_overlaps.argmax(axis=1)
            sbj_maxes = sbj_to_gt_overlaps.max(axis=1)  # Amount of that overlap
            sbj_I = np.where(sbj_maxes >= 0)[0]  # Those boxes with non-zero overlap with gt boxes, get all items
            
            obj_argmaxes = obj_to_gt_overlaps.argmax(axis=1)
            obj_maxes = obj_to_gt_overlaps.max(axis=1)  # Amount of that overlap
            obj_I = np.where(obj_maxes >= 0)[0]  # Those boxes with non-zero overlap with gt boxes, get all items
            
            pair_argmaxes = pair_to_gt_overlaps.argmax(axis=1)
            pair_maxes = pair_to_gt_overlaps.max(axis=1)  # Amount of that overlap
            pair_I = np.where(pair_maxes >= 0)[0]  # Those boxes with non-zero overlap with gt boxes, get all items
            # Record max overlaps with the class of the appropriate gt box
            sbj_gt_overlaps[sbj_I, sbj_gt_classes[sbj_argmaxes[sbj_I]]] = sbj_maxes[sbj_I]
            obj_gt_overlaps[obj_I, obj_gt_classes[obj_argmaxes[obj_I]]] = obj_maxes[obj_I]
            prd_gt_overlaps[pair_I, prd_gt_classes[pair_argmaxes[pair_I]]] = pair_maxes[pair_I]
            pair_to_gt_ind_map[pair_I] = pair_gt_inds[pair_argmaxes[pair_I]]
        entry['sbj_boxes'] = sbj_boxes.astype(entry['sbj_gt_boxes'].dtype, copy=False)
        entry['sbj_gt_overlaps'] = sbj_gt_overlaps
        entry['sbj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['sbj_gt_overlaps'])

        entry['obj_boxes'] = obj_boxes.astype(entry['obj_gt_boxes'].dtype, copy=False)
        entry['obj_gt_overlaps'] = obj_gt_overlaps
        entry['obj_gt_overlaps'] = scipy.sparse.csr_matrix(entry['obj_gt_overlaps'])

        entry['prd_gt_classes'] = -np.ones((num_pairs), dtype=entry['prd_gt_classes'].dtype)
        entry['prd_gt_overlaps'] = prd_gt_overlaps
        entry['prd_gt_overlaps'] = scipy.sparse.csr_matrix(entry['prd_gt_overlaps'])
        entry['pair_to_gt_ind_map'] = pair_to_gt_ind_map.astype(
                entry['pair_to_gt_ind_map'].dtype, copy=False)
Example #22
0
def _get_retinanet_blobs(
        foas, all_anchors, gt_boxes, gt_classes, im_width, im_height):
    total_anchors = all_anchors.shape[0]
    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
        im_height, im_width))

    inds_inside = np.arange(all_anchors.shape[0])
    anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.float32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[
            np.arange(num_inside), anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax, np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]
        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
        # Fg label: above threshold IOU
        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
        gt_inds = anchor_to_gt_argmax[inds]
        labels[inds] = gt_classes[gt_inds]

    fg_inds = np.where(labels >= 1)[0]
    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
    labels[bg_inds] = 0
    num_fg, num_bg = len(fg_inds), len(bg_inds)

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets, total_anchors, inds_inside, fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        end_idx = start_idx + H * W
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, height, width)
        _labels = _labels.reshape((1, 1, H, W))
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape((1, H, W, 4)).transpose(0, 3, 1, 2)
        stride = foa.stride
        w = int(im_width / stride)
        h = int(im_height / stride)

        # data for select_smooth_l1 loss
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        inds_4d = np.where(_labels > 0)
        M = len(inds_4d)
        _roi_bbox_targets = np.zeros((0, 4))
        _roi_fg_bbox_locs = np.zeros((0, 4))
        if M > 0:
            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
            _roi_bbox_targets = np.zeros((len(im_inds), 4))
            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
            lbls = _labels[im_inds, :, y, x]
            for i, lbl in enumerate(lbls):
                l = lbl[0] - 1
                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                    l = 0
                assert l >= 0 and l < num_classes, 'label out of the range'
                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])
        blobs_out.append(
            dict(
                retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                retnet_roi_bbox_targets=_roi_bbox_targets.astype(np.float32),
                retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(np.float32),
            ))
    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
    out_num_bg = (
        np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
        out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
    return blobs_out, out_num_fg, out_num_bg
Example #23
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where((roidb['gt_classes'] > 0) &
                             (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    # boxes_from_polys = [roidb['boxes'][i] for i in polys_gt_inds]
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
Example #24
0
def evaluate_box_proposals(json_dataset,
                           roidb,
                           thresholds=None,
                           area='all',
                           limit=None):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7
    }
    area_ranges = [
        [0**2, 1e5**2],  # all
        [0**2, 32**2],  # small
        [32**2, 96**2],  # medium
        [96**2, 1e5**2],  # large
        [96**2, 128**2],  # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]
    ]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where((entry['gt_classes'] > 0)
                           & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where((gt_areas >= area_range[0])
                                 & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {
        'ar': ar,
        'recalls': recalls,
        'thresholds': thresholds,
        'gt_overlaps': gt_overlaps,
        'num_pos': num_pos
    }
Example #25
0
    def _forward(self, data, im_info, do_vis=False, dataset_name=None, roidb=None, use_gt_labels=False, **rpn_kwargs):
        im_data = data
        if self.training:
            # if not isinstance(roidb[0], np.array):
            #     roidb = roidb[0]
            roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) # only support one gpu
        if dataset_name is not None:
            dataset_name = blob_utils.deserialize(dataset_name)
        else:
            dataset_name = cfg.TRAIN.DATASETS[0] if self.training else cfg.TEST.DATASETS[0]  # assuming only one dataset per run

        device_id = im_data.get_device()

        return_dict = {}  # A dict to collect return variables

        blob_conv = self.Conv_Body(im_data)
        # if not cfg.MODEL.USE_REL_PYRAMID:
        #     blob_conv_prd = self.Prd_RCNN.Conv_Body(im_data)

        if self.training:
            gt_rois = np.empty((0, 5), dtype=np.float32)
            gt_classes = np.empty((0), dtype=np.int64)
            for i, r in enumerate(roidb):
                rois_i = r['boxes'] * im_info[i, 2]
                rois_i = np.hstack((i * blob_utils.ones((rois_i.shape[0], 1)), rois_i))
                gt_rois = np.append(gt_rois, rois_i, axis=0)
                gt_classes = np.append(gt_classes, r['gt_classes'], axis=0)

        if self.training or roidb is None:
            rpn_ret = self.RPN(blob_conv, im_info, roidb)




        if cfg.FPN.FPN_ON:
            # Retain only the blobs that will be used for RoI heads. `blob_conv` may include
            # extra blobs that are used for RPN proposals, but not for RoI heads.
            blob_conv = blob_conv[-self.num_roi_levels:]
            # if not cfg.MODEL.USE_REL_PYRAMID:
            #     blob_conv_prd = blob_conv_prd[-self.num_roi_levels:]
            # else:
            #     blob_conv_prd = self.RelPyramid(blob_conv)

        if self.training or roidb is None:
            if cfg.MODEL.SHARE_RES5 and self.training:
                box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret, use_relu=True)
            else:
                box_feat = self.Box_Head(blob_conv, rpn_ret, use_relu=True)
            cls_score, bbox_pred = self.Box_Outs(box_feat)

        
        # now go through the predicate branch
        use_relu = False if cfg.MODEL.NO_FC7_RELU else True
        if self.training:
            score_thresh = cfg.TEST.SCORE_THRESH
            cls_score = F.softmax(cls_score, -1)
            while score_thresh >= -1e-06:  # a negative value very close to 0.0
                det_rois, det_labels, det_scores, det_dists, det_boxes_all = \
                    self.prepare_det_rois(rpn_ret['rois'], cls_score, bbox_pred, im_info, score_thresh)
                real_area = (det_rois[:, 3] - det_rois[:, 1]) * (det_rois[:, 4] - det_rois[:, 2])
                non_zero_area_inds = np.where(real_area > 0)[0]
                det_rois = det_rois[non_zero_area_inds]
                det_labels = det_labels[non_zero_area_inds]
                det_scores = det_scores[non_zero_area_inds]
                det_dists = det_dists[non_zero_area_inds]
                det_boxes_all = det_boxes_all[non_zero_area_inds]
                # rel_ret = self.RelPN(det_rois, det_labels, det_scores, im_info, dataset_name, roidb)
                valid_len = len(det_rois)
                if valid_len > 0:
                    break
                logger.info('Got {} det_rois when score_thresh={}, changing to {}'.format(
                    valid_len, score_thresh, score_thresh - 0.01))
                score_thresh -= 0.01
            det_labels_gt = []
            ious = box_utils.bbox_overlaps(det_rois[:, 1:], gt_rois[:, 1:]) * \
                                          (det_rois[:, 0][:,None] == gt_rois[:, 0][None, :])
            det_labels_gt = gt_classes[ious.argmax(-1)]
            det_labels_gt[ious.max(-1) < cfg.TRAIN.FG_THRESH] = 0

        else:
            if roidb is not None:
                # raise FError('not support this mode!')
                # assert len(roidb) == 1
                im_scale = im_info.data.numpy()[:, 2][0]
                im_w = im_info.data.numpy()[:, 1][0]
                im_h = im_info.data.numpy()[:, 0][0]
                
                fpn_ret = {'gt_rois': gt_rois}
                if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
                    lvl_min = cfg.FPN.ROI_MIN_LEVEL
                    lvl_max = cfg.FPN.ROI_MAX_LEVEL
                    rois_blob_names = ['gt_rois']
                    for rois_blob_name in rois_blob_names:
                        # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
                        target_lvls = fpn_utils.map_rois_to_fpn_levels(
                            fpn_ret[rois_blob_name][:, 1:5], lvl_min, lvl_max)
                        fpn_utils.add_multilevel_roi_blobs(
                            fpn_ret, rois_blob_name, fpn_ret[rois_blob_name], target_lvls,
                            lvl_min, lvl_max)
                det_feats = self.Box_Head(blob_conv, fpn_ret, rois_name='det_rois', use_relu=True)
                det_dists, _ = self.Box_Outs(det_feats)
                det_boxes_all = None
                if use_gt_labels:
                    det_labels_gt = gt_classes
                    det_labels = gt_classes
            else:

                score_thresh = cfg.TEST.SCORE_THRESH
                while score_thresh >= -1e-06:  # a negative value very close to 0.0
                    det_rois, det_labels, det_scores, det_dists, det_boxes_all = \
                        self.prepare_det_rois(rpn_ret['rois'], cls_score, bbox_pred, im_info, score_thresh)
                    real_area = (det_rois[:, 3] - det_rois[:, 1]) * (det_rois[:, 4] - det_rois[:, 2])
                    non_zero_area_inds = np.where(real_area > 0)[0]
                    det_rois = det_rois[non_zero_area_inds]
                    det_labels = det_labels[non_zero_area_inds]
                    det_scores = det_scores[non_zero_area_inds]
                    det_dists = det_dists[non_zero_area_inds]
                    det_boxes_all = det_boxes_all[non_zero_area_inds]
                    # rel_ret = self.RelPN(det_rois, det_labels, det_scores, im_info, dataset_name, roidb)
                    valid_len = len(det_rois)
                    if valid_len > 0:
                        break
                    logger.info('Got {} det_rois when score_thresh={}, changing to {}'.format(
                        valid_len, score_thresh, score_thresh - 0.01))
                    score_thresh -= 0.01 


        return_dict['det_rois'] = det_rois
        num_rois = det_rois.shape[0]
        if not isinstance(det_dists, torch.Tensor):
            assert det_dists.shape[0] == num_rois
            det_dists = torch.from_numpy(det_dists).float().cuda(device_id)
        
        return_dict['det_dists'] = det_dists
        return_dict['det_scores'] = det_scores
        return_dict['blob_conv'] = blob_conv
        return_dict['det_boxes_all'] = det_boxes_all
        assert det_boxes_all.shape[0] == num_rois
        return_dict['det_labels'] = det_labels
        # return_dict['blob_conv_prd'] = blob_conv_prd

        if self.training or use_gt_labels:
            return_dict['det_labels_gt'] = det_labels_gt

        return return_dict
def _get_retinanet_blobs(foas, all_anchors, gt_boxes, gt_classes, im_width,
                         im_height):
    total_anchors = all_anchors.shape[0]
    logger.debug('Getting mad blobs: im_height {} im_width: {}'.format(
        im_height, im_width))

    inds_inside = np.arange(all_anchors.shape[0])  #0, 1... 371349
    anchors = all_anchors
    num_inside = len(inds_inside)  #371349

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.float32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(
            anchors, gt_boxes)  # (371349, 17)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(
            axis=1)  # (371349,) this is index
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[  # (371349,) this is area
            np.arange(num_inside), anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(
            axis=0)  # (17,)  index
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[  # (17,)  area
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(  # (21,) find all anchors with most overlaps
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        gt_inds = anchor_to_gt_argmax[anchors_with_max_overlap]  # 416
        labels[anchors_with_max_overlap] = gt_classes[gt_inds]
        # Fg label: above threshold IOU
        inds = anchor_to_gt_max >= cfg.RETINANET.POSITIVE_OVERLAP
        gt_inds = anchor_to_gt_argmax[inds]
        labels[inds] = gt_classes[
            gt_inds]  # for all anchors, inds are valued by gt_inds, this gives class values 1~80

    fg_inds = np.where(labels >= 1)[0]
    bg_inds = np.where(anchor_to_gt_max < cfg.RETINANET.NEGATIVE_OVERLAP)[0]
    labels[bg_inds] = 0
    num_fg, num_bg = len(fg_inds), len(bg_inds)

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(bbox_targets,
                                    total_anchors,
                                    inds_inside,
                                    fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        end_idx = start_idx + H * W
        _labels = labels[start_idx:end_idx]
        triangle_start_idx = start_idx
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, height, width)
        _labels = _labels.reshape((1, 1, H, W))
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, 4)).transpose(0, 3, 1, 2)
        stride = foa.stride
        w = int(im_width / stride)
        h = int(im_height / stride)

        # data for select_smooth_l1 loss
        num_classes = cfg.MODEL.NUM_CLASSES - 1
        inds_4d = np.where(_labels > 0)
        M = len(inds_4d)
        _roi_bbox_targets = np.zeros((0, 4))
        _roi_fg_bbox_locs = np.zeros((0, 4))
        if M > 0:
            im_inds, y, x = inds_4d[0], inds_4d[2], inds_4d[3]
            _roi_bbox_targets = np.zeros((len(im_inds), 4))
            _roi_fg_bbox_locs = np.zeros((len(im_inds), 4))
            lbls = _labels[im_inds, :, y, x]
            for i, lbl in enumerate(lbls):
                l = lbl[0] - 1
                if not cfg.RETINANET.CLASS_SPECIFIC_BBOX:
                    l = 0
                assert l >= 0 and l < num_classes, 'label out of the range'
                _roi_bbox_targets[i, :] = _bbox_targets[:, :, y[i], x[i]]
                _roi_fg_bbox_locs[i, :] = np.array([[0, l, y[i], x[i]]])

        if _roi_bbox_targets.astype(
                np.float32).shape[0] == 0 and _roi_fg_bbox_locs.astype(
                    np.float32).shape[0] == 0:
            blobs_out.append(
                dict(
                    retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                    retnet_roi_bbox_targets=_roi_bbox_targets.astype(
                        np.float32),
                    retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(
                        np.float32),
                    # retnet_roi_bbox_targets=np.array([[0, 0, 0, 0]]),
                    # retnet_roi_fg_bbox_locs=np.array([[0, 0, 0, 0]]),
                ))
            # we don't add zero padding here, because this is inside the loop of foa, we don't
            #         want every anchor to have padding, instead we want to firstly sum all anchors in a FPN of an image, and then check if it's emtpy
        else:

            blobs_out.append(
                dict(
                    retnet_cls_labels=_labels[:, :, 0:h, 0:w].astype(np.int32),
                    retnet_roi_bbox_targets=_roi_bbox_targets.astype(
                        np.float32),
                    retnet_roi_fg_bbox_locs=_roi_fg_bbox_locs.astype(
                        np.float32),
                ))

    out_num_fg = np.array([num_fg + 1.0], dtype=np.float32)
    out_num_bg = (np.array([num_bg + 1.0]) * (cfg.MODEL.NUM_CLASSES - 1) +
                  out_num_fg * (cfg.MODEL.NUM_CLASSES - 2))
    return blobs_out, out_num_fg, out_num_bg
    def forward(self, proposals, gt_boxes):
        """
        Args:
            proposals (Tensor): Region proposals in (0, x1, y1, x2, y2) format coming from RPN.
            gt_boxes (Tensor): Ground-truth boxes in (x1, y1, x2, y2, class, person_id) format.

        Returns:
            proposals (Tensor[N, 5]): Sampled proposals.
            cls_labels (Tensor[N]): Ground-truth classification labels of the proposals.
            pid_labels (Tensor[N]): Ground-truth person IDs of the proposals.
            deltas (Tensor[N, num_classes * 4]):  Ground-truth regression deltas of the proposals.
            inside_weights, outside_weights (Tensor): Used to calculate smooth_l1_loss.
        """
        assert torch.all(proposals[:, 0] == 0), "Single batch only."

        # Include ground-truth boxes in the set of candidate proposals
        zeros = gt_boxes.new(gt_boxes.shape[0], 1).zero_()
        proposals = torch.cat(
            (proposals, torch.cat((zeros, gt_boxes[:, :4]), dim=1)), dim=0)

        overlaps = bbox_overlaps(proposals[:, 1:5], gt_boxes[:, :4])
        max_overlaps, argmax_overlaps = overlaps.max(dim=1)
        cls_labels = gt_boxes[argmax_overlaps, 4]
        pid_labels = gt_boxes[argmax_overlaps, 5]

        # Sample some proposals at the specified positive and negative ratio
        batch_size = cfg.TRAIN.BATCH_SIZE
        num_fg = round(cfg.TRAIN.FG_FRACTION * batch_size)

        # Sample foreground proposals
        fg_inds = torch.nonzero(max_overlaps >= cfg.TRAIN.FG_THRESH)[:, 0]
        num_fg = min(num_fg, fg_inds.numel())
        if fg_inds.numel() > 0:
            if "DEBUG" in os.environ:
                fg_inds = fg_inds[:num_fg]
            else:
                fg_inds = torch_rand_choice(fg_inds, num_fg)

        # Sample background proposals
        bg_inds = torch.nonzero((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                                & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[:,
                                                                            0]
        num_bg = min(batch_size - num_fg, bg_inds.numel())
        if bg_inds.numel() > 0:
            if "DEBUG" in os.environ:
                bg_inds = bg_inds[:num_bg]
            else:
                bg_inds = torch_rand_choice(bg_inds, num_bg)

        # assert num_fg + num_bg == batch_size

        keep = torch.cat((fg_inds, bg_inds))
        cls_labels = cls_labels[keep]
        pid_labels = pid_labels[keep]
        proposals = proposals[keep]

        # Correct the cls_labels and pid_labels of bg proposals
        cls_labels[num_fg:] = 0
        pid_labels[num_fg:] = self.bg_pid_label

        deltas, inside_weights, outside_weights = self.get_regression_targets(
            proposals[:, 1:5],
            gt_boxes[argmax_overlaps][keep, :4],
            cls_labels,
            self.num_classes,
        )

        return (
            proposals,
            cls_labels.long(),
            pid_labels.long(),
            deltas,
            inside_weights,
            outside_weights,
        )
Example #28
0
def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros(
            (num_boxes, entry['gt_overlaps'].shape[1]),
            dtype=entry['gt_overlaps'].dtype
        )
        box_to_gt_ind_map = -np.ones(
            (num_boxes), dtype=entry['box_to_gt_ind_map'].dtype
        )

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False)
            )
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(
            entry['boxes'],
            boxes.astype(entry['boxes'].dtype, copy=False),
            axis=0
        )
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes), dtype=entry['gt_classes'].dtype)
        )
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes), dtype=entry['seg_areas'].dtype)
        )
        entry['gt_overlaps'] = np.append(
            entry['gt_overlaps'].toarray(), gt_overlaps, axis=0
        )
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes), dtype=entry['is_crowd'].dtype)
        )
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(
                entry['box_to_gt_ind_map'].dtype, copy=False
            )
        )
def evaluate_box_proposals(
    json_dataset, roidb, thresholds=None, area='all', limit=None
):
    """Evaluate detection proposal recall metrics. This function is a much
    faster alternative to the official COCO API recall evaluation code. However,
    it produces slightly different results.
    """
    # Record max overlap value for each gt box
    # Return vector of overlap values
    areas = {
        'all': 0,
        'small': 1,
        'medium': 2,
        'large': 3,
        '96-128': 4,
        '128-256': 5,
        '256-512': 6,
        '512-inf': 7}
    area_ranges = [
        [0**2, 1e5**2],    # all
        [0**2, 32**2],     # small
        [32**2, 96**2],    # medium
        [96**2, 1e5**2],   # large
        [96**2, 128**2],   # 96-128
        [128**2, 256**2],  # 128-256
        [256**2, 512**2],  # 256-512
        [512**2, 1e5**2]]  # 512-inf
    assert area in areas, 'Unknown area range: {}'.format(area)
    area_range = area_ranges[areas[area]]
    gt_overlaps = np.zeros(0)
    num_pos = 0
    for entry in roidb:
        gt_inds = np.where(
            (entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
        gt_boxes = entry['boxes'][gt_inds, :]
        gt_areas = entry['seg_areas'][gt_inds]
        valid_gt_inds = np.where(
            (gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0]
        gt_boxes = gt_boxes[valid_gt_inds, :]
        num_pos += len(valid_gt_inds)
        non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
        boxes = entry['boxes'][non_gt_inds, :]
        if boxes.shape[0] == 0:
            continue
        if limit is not None and boxes.shape[0] > limit:
            boxes = boxes[:limit, :]
        overlaps = box_utils.bbox_overlaps(
            boxes.astype(dtype=np.float32, copy=False),
            gt_boxes.astype(dtype=np.float32, copy=False))
        _gt_overlaps = np.zeros((gt_boxes.shape[0]))
        for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
            # find which proposal box maximally covers each gt box
            argmax_overlaps = overlaps.argmax(axis=0)
            # and get the iou amount of coverage for each gt box
            max_overlaps = overlaps.max(axis=0)
            # find which gt box is 'best' covered (i.e. 'best' = most iou)
            gt_ind = max_overlaps.argmax()
            gt_ovr = max_overlaps.max()
            assert gt_ovr >= 0
            # find the proposal box that covers the best covered gt box
            box_ind = argmax_overlaps[gt_ind]
            # record the iou coverage of this gt box
            _gt_overlaps[j] = overlaps[box_ind, gt_ind]
            assert _gt_overlaps[j] == gt_ovr
            # mark the proposal box and the gt box as used
            overlaps[box_ind, :] = -1
            overlaps[:, gt_ind] = -1
        # append recorded iou coverage level
        gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

    gt_overlaps = np.sort(gt_overlaps)
    if thresholds is None:
        step = 0.05
        thresholds = np.arange(0.5, 0.95 + 1e-5, step)
    recalls = np.zeros_like(thresholds)
    # compute recall for each iou threshold
    for i, t in enumerate(thresholds):
        recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
    # ar = 2 * np.trapz(recalls, thresholds)
    ar = recalls.mean()
    return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds,
            'gt_overlaps': gt_overlaps, 'num_pos': num_pos}
Example #30
0
def add_refine_local_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.REFINENET.RESOLUTION
    up_scale = cfg.REFINENET.UP_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    gt_classes = roidb['gt_classes'][polys_gt_inds]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    pad_img_h, pad_img_w = inp_h / im_scale, inp_w / im_scale

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # Expand the foreground rois by a factor of up_scale and
        # clip by the padded image boundary
        pad_rois_fg = box_utils.expand_boxes(rois_fg, up_scale)
        pad_rois_fg = box_utils.clip_boxes_to_image(pad_rois_fg, pad_img_h,
                                                    pad_img_w)

        if cfg.REFINENET.ONLY_USE_CROWDED_SAMPLES:
            # Only use crowded samples to train the RefineNet
            THRES = cfg.REFINENET.OVERLAP_THRESHOLD
            for i in range(rois_fg.shape[0]):
                overlap = overlaps_bbfg_bbpolys[i]
                if np.sum(overlap > THRES) > 1:
                    # if has multiple instances overlapped, use it for training
                    fg_polys_ind = fg_polys_inds[i]
                    poly_gt = polys_gt[fg_polys_ind]
                    pad_roi_fg = pad_rois_fg[i]
                    # Rasterize the portion of the polygon mask within the given fg roi
                    # to an M x M binary image
                    mask = segm_utils.polys_to_mask_wrt_box(
                        poly_gt, pad_roi_fg, M)
                    mask = np.array(mask > 0,
                                    dtype=np.int32)  # Ensure it's binary
                    masks[i, :] = np.reshape(mask, M**2)

                else:  # Only one instance, then set label to be -1 (ignored)
                    masks[i, :] = -1
                    mask_class_labels[i] = 0
        elif cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
            loss_weights = blob_utils.ones((rois_fg.shape[0], ))
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                class_label = mask_class_labels[i]

                # Rasterize the portion of the polygon mask within the given
                # fg roi to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

                # And now determine the weight for each roi. If any instance
                # that is of the same class as the RoI, then we expect it to
                # be a hard sample and assigns a larger weight for this RoI
                for j in range(len(polys_gt)):
                    if j == fg_polys_ind:
                        continue
                    if gt_classes[
                            j] == class_label:  # only same class is valid
                        mask = segm_utils.polys_to_mask_wrt_box(
                            polys_gt[j], pad_roi_fg, M)
                        # and check if has anypart fall inside the bbox
                        is_inside_bbox = (np.sum(mask) > 0)
                        if is_inside_bbox:
                            loss_weights[i] = cfg.REFINENET.WEIGHT_LOSS_CROWDED
                            break  # early stop

        else:
            # add fg targets
            for i in range(rois_fg.shape[0]):
                fg_polys_ind = fg_polys_inds[i]
                poly_gt = polys_gt[fg_polys_ind]
                pad_roi_fg = pad_rois_fg[i]
                # Rasterize the portion of the polygon mask within the given fg roi
                # to an M x M binary image
                mask = segm_utils.polys_to_mask_wrt_box(poly_gt, pad_roi_fg, M)
                mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
                masks[i, :] = np.reshape(mask, M**2)

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # pad_rois_fg is actually one background roi, but that's ok because ...
        pad_rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    pad_rois_fg = (pad_rois_fg.astype(np.float32)) * im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((pad_rois_fg.shape[0], 1))
    pad_rois_fg = np.hstack((repeated_batch_idx, pad_rois_fg)).astype(np.int32)

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = pad_rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks

    if cfg.REFINENET.ASSIGN_LARGER_WEIGHT_FOR_CROWDED_SAMPLES:
        blobs['loss_weights'] = loss_weights
Example #31
0
def add_refine_global_mask_blobs(blobs, sampled_boxes, roidb, im_scale,
                                 batch_idx, data):
    """Add RefineNet Mask specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    dst_scale = cfg.REFINENET.SPATIAL_SCALE
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    # Define size variables
    inp_h, inp_w = data.shape[2], data.shape[3]
    out_h, out_w = int(inp_h * dst_scale), int(inp_w * dst_scale)

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], out_h, out_w), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # narrow scale and size
        scale = im_scale * dst_scale
        im_h, im_w = roidb['height'], roidb['width']
        im_label_h, im_label_w = int(im_h * scale), int(im_w * scale)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an im_label_h x im_label_w binary image
            mask = segm_utils.polys_to_mask_scaled(poly_gt, im_h, im_w, scale)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, 0:im_label_h, 0:im_label_w] = mask

        masks = np.reshape(masks, (-1, out_h * out_w))

    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, out_h * out_w), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks,
                                                       mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Refine-Net blobs
    blobs['refined_mask_rois'] = rois_fg
    blobs['roi_has_refined_mask_int32'] = roi_has_mask
    blobs['refined_masks_int32'] = masks
Example #32
0
    def forward(self,
                det_rois,
                det_labels,
                det_scores,
                im_info,
                dataset_name,
                roidb=None):
        """
        det_rois: feature maps from the backbone network. (Variable)
        im_info: (CPU Variable)
        roidb: (list of ndarray)
        """

        # Get pairwise proposals first
        im_inds = det_rois[:, 0]
        is_cand = im_inds[:, None] == im_inds[None, :]
        is_cand.reshape(-1)[diagonal_inds(is_cand)] = False
        is_empty = np.where(is_cand.any(1) == 0)[0]
        if self.overlap:
            is_cand = is_cand & (box_utils.bbox_overlaps(
                det_rois[:, 1:], det_rois[:, 1:]) > 0)
        if is_empty.size > 0:
            is_cand[is_empty, is_empty] = True

        sbj_inds, obj_inds = np.where(is_cand)
        # remove self paired rois
        sbj_rois = det_rois[sbj_inds]
        obj_rois = det_rois[obj_inds]

        im_scale = im_info.data.numpy()[:, 2][0]
        sbj_boxes = sbj_rois[:, 1:] / im_scale
        obj_boxes = obj_rois[:, 1:] / im_scale
        # filters out those roi pairs whose boxes are not overlapping in the original scales

        return_dict = {}
        sbj_labels = det_labels[sbj_inds]
        obj_labels = det_labels[obj_inds]
        sbj_scores = det_scores[sbj_inds]
        obj_scores = det_scores[obj_inds]
        rel_rois = box_utils_rel.rois_union(sbj_rois, obj_rois)
        return_dict['det_rois'] = det_rois
        return_dict['sbj_inds'] = sbj_inds
        return_dict['obj_inds'] = obj_inds
        return_dict['sbj_rois'] = sbj_rois
        return_dict['obj_rois'] = obj_rois
        return_dict['rel_rois'] = rel_rois
        return_dict['sbj_labels'] = sbj_labels
        return_dict['obj_labels'] = obj_labels
        return_dict['sbj_scores'] = sbj_scores
        return_dict['obj_scores'] = obj_scores
        return_dict['fg_size'] = np.array([sbj_rois.shape[0]], dtype=np.int32)

        if cfg.FPN.FPN_ON and cfg.FPN.MULTILEVEL_ROIS:
            lvl_min = cfg.FPN.ROI_MIN_LEVEL
            lvl_max = cfg.FPN.ROI_MAX_LEVEL
            # when use min_rel_area, the same sbj/obj area could be mapped to different feature levels
            # when they are associated with different relationships
            # Thus we cannot get det_rois features then gather sbj/obj features
            # The only way is gather sbj/obj per relationship, thus need to return sbj_rois/obj_rois
            rois_blob_names = ['det_rois', 'rel_rois']
            for rois_blob_name in rois_blob_names:
                # Add per FPN level roi blobs named like: <rois_blob_name>_fpn<lvl>
                target_lvls = fpn_utils.map_rois_to_fpn_levels(
                    return_dict[rois_blob_name][:, 1:5], lvl_min, lvl_max)
                fpn_utils.add_multilevel_roi_blobs(return_dict, rois_blob_name,
                                                   return_dict[rois_blob_name],
                                                   target_lvls, lvl_min,
                                                   lvl_max)

        return return_dict
Example #33
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = cfg.MRCNN.RESOLUTION
    polys_gt_inds = np.where(
        (roidb['gt_classes'] > 0) & (roidb['is_crowd'] == 0)
    )[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = segm_utils.polys_to_boxes(polys_gt)
    # Keep only a subset of classes (set A in the paper) for mask training
    if cfg.TRAIN.MRCNN_FILTER_LABELS:
        keep_label_set = set(cfg.TRAIN.MRCNN_LABELS_TO_KEEP)
        labels_int32 = blobs['labels_int32']
        labels_int32_keep = np.array(
            [(l if l in keep_label_set else 0) for l in labels_int32],
            dtype=labels_int32.dtype)
    else:
        labels_int32_keep = blobs['labels_int32']
    fg_inds = np.where(labels_int32_keep > 0)[0]
    roi_has_mask = labels_int32_keep.copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = blob_utils.zeros((fg_inds.shape[0], M**2), int32=True)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = box_utils.bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False)
        )
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = segm_utils.polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -blob_utils.ones((1, M**2), int32=True)
        # We label it with class = 0 (background)
        mask_class_labels = blob_utils.zeros((1, ))
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    if cfg.MRCNN.CLS_SPECIFIC_MASK:
        masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * blob_utils.ones((rois_fg.shape[0], 1))
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['masks_int32'] = masks
def _compute_pairwise_iou(a, b):
    """
    a, b (np.ndarray) of shape Nx4T and Mx4T.
    The output is NxM, for each combination of boxes.
    """
    return box_utils.bbox_overlaps(a, b)
def _compute_pairwise_iou(a, b):
    """
    a, b (np.ndarray) of shape Nx4T and Mx4T.
    The output is NxM, for each combination of boxes.
    """
    return box_utils.bbox_overlaps(a, b)
Example #36
0
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh) &
            (all_anchors[:, 1] >= -straddle_thresh) &
            (all_anchors[:, 2] < im_width + straddle_thresh) &
            (all_anchors[:, 3] < im_height + straddle_thresh)
        )[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])
        ]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max
        )[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False
        )
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
    if len(bg_inds) > num_bg:
        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
        labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]
    )

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(
        bbox_targets, total_anchors, inds_inside, fill=0
    )
    bbox_inside_weights = data_utils.unmap(
        bbox_inside_weights, total_anchors, inds_inside, fill=0
    )
    bbox_outside_weights = data_utils.unmap(
        bbox_outside_weights, total_anchors, inds_inside, fill=0
    )

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(
                rpn_labels_int32_wide=_labels,
                rpn_bbox_targets_wide=_bbox_targets,
                rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                rpn_bbox_outside_weights_wide=_bbox_outside_weights
            )
        )
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
Example #37
0
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = cfg.TRAIN.RPN_STRADDLE_THRESH

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh) &
            (all_anchors[:, 1] >= -straddle_thresh) &
            (all_anchors[:, 2] < im_width + straddle_thresh) &
            (all_anchors[:, 3] < im_height + straddle_thresh)
        )[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    logger.debug('total_anchors: {}'.format(total_anchors))
    logger.debug('inds_inside: {}'.format(num_inside))
    logger.debug('anchors.shape: {}'.format(anchors.shape))

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = box_utils.bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])
        ]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max
        )[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE_PER_IM)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False
        )
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = cfg.TRAIN.RPN_BATCH_SIZE_PER_IM - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < cfg.TRAIN.RPN_NEGATIVE_OVERLAP)[0]
    if len(bg_inds) > num_bg:
        enable_inds = bg_inds[npr.randint(len(bg_inds), size=num_bg)]
        labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = data_utils.compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :]
    )

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = data_utils.unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = data_utils.unmap(
        bbox_targets, total_anchors, inds_inside, fill=0
    )
    bbox_inside_weights = data_utils.unmap(
        bbox_inside_weights, total_anchors, inds_inside, fill=0
    )
    bbox_outside_weights = data_utils.unmap(
        bbox_outside_weights, total_anchors, inds_inside, fill=0
    )

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(
                rpn_labels_int32_wide=_labels,
                rpn_bbox_targets_wide=_bbox_targets,
                rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                rpn_bbox_outside_weights_wide=_bbox_outside_weights
            )
        )
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
Example #38
0
def rel_assignments(im_inds,
                    rpn_rois,
                    roi_gtlabels,
                    roidb,
                    im_info,
                    num_sample_per_gt=4,
                    filter_non_overlap=True):
    """
    Assign object detection proposals to ground-truth targets. Produces proposal
    classification labels and bounding-box regression targets.
    :param rpn_rois: [img_ind, x1, y1, x2, y2]
    :param gt_boxes:   [num_boxes, 4] array of x0, y0, x1, y1]
    :param gt_classes: [num_boxes, 2] array of [img_ind, class]
    :param gt_rels     [num_boxes, 4] array of [img_ind, box_0, box_1, rel type]
    :param Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
    :return:
        rois: [num_rois, 5]
        labels: [num_rois] array of labels
        bbox_targets [num_rois, 4] array of targets for the labels.
        rel_labels: [num_rels, 4] (img ind, box0 ind, box1ind, rel type)
    """
    fg_rels_per_image = int(
        np.round(cfg.TRAIN.FG_REL_FRACTION * cfg.TRAIN.RELS_PER_IMG_REFINE))

    num_im = int(im_inds.max() + 1)
    indices_sets = [np.where(im_inds == i)[0] for i in range(num_im)]

    # print("Pred inds {} pred boxes {} pred box labels {} gt classes {} gt rels {}".format(
    #     pred_inds_np, pred_boxes_np, pred_boxlabels_np, gt_classes_np, gt_rels_np
    # ))

    rel_labels = []
    num_box_seen = 0
    for i, indices in enumerate(indices_sets):

        gt_boxes_i = roidb[i]['boxes']
        gt_rois_i = gt_boxes_i * im_info[i, 2]
        gt_classes_i = roidb[i]['gt_classes']
        sbj_gt_boxes_i = roidb[i]['sbj_gt_boxes']
        obj_gt_boxes_i = roidb[i]['obj_gt_boxes']
        prd_gt_classes_i = roidb[i]['prd_gt_classes']
        if cfg.MODEL.USE_BG:
            prd_gt_classes_i += 1

        sbj_gt_inds_i = box_utils.bbox_overlaps(sbj_gt_boxes_i,
                                                gt_boxes_i).argmax(-1)
        obj_gt_inds_i = box_utils.bbox_overlaps(obj_gt_boxes_i,
                                                gt_boxes_i).argmax(-1)
        gt_rels_i = np.stack((sbj_gt_inds_i, obj_gt_inds_i, prd_gt_classes_i),
                             -1)

        # [num_pred, num_gt]
        pred_rois_i = rpn_rois[indices, 1:]
        pred_roilabels_i = roi_gtlabels[indices]

        ious = box_utils.bbox_overlaps(pred_rois_i, gt_rois_i)
        is_match = (pred_roilabels_i[:, None]
                    == gt_classes_i[None]) & (ious >= cfg.TRAIN.FG_THRESH)

        # FOR BG. Limit ourselves to only IOUs that overlap, but are not the exact same box
        pbi_iou = box_utils.bbox_overlaps(pred_rois_i, pred_rois_i)
        if filter_non_overlap:
            rel_possibilities = (pbi_iou < 1) & (pbi_iou > 0)
            rels_intersect = rel_possibilities
        else:
            rel_possibilities = np.ones(
                (pred_rois_i.shape[0], pred_rois_i.shape[0]),
                dtype=np.int64) - np.eye(pred_rois_i.shape[0], dtype=np.int64)
            rels_intersect = (pbi_iou < 1) & (pbi_iou > 0)

        # ONLY select relations between ground truth because otherwise we get useless data
        rel_possibilities[pred_roilabels_i == 0] = 0
        rel_possibilities[:, pred_roilabels_i == 0] = 0

        # Sample the GT relationships.
        fg_rels = []
        p_size = []
        for i, (from_gtind, to_gtind, rel_id) in enumerate(gt_rels_i):
            fg_rels_i = []
            fg_scores_i = []

            for from_ind in np.where(is_match[:, from_gtind])[0]:
                for to_ind in np.where(is_match[:, to_gtind])[0]:
                    if from_ind != to_ind:
                        fg_rels_i.append((from_ind, to_ind, rel_id))
                        fg_scores_i.append((ious[from_ind, from_gtind] *
                                            ious[to_ind, to_gtind]))
                        rel_possibilities[from_ind, to_ind] = 0
            if len(fg_rels_i) == 0:
                continue
            p = np.array(fg_scores_i)
            p = p / p.sum()
            p_size.append(p.shape[0])
            num_to_add = min(p.shape[0], num_sample_per_gt)
            for rel_to_add in npr.choice(p.shape[0],
                                         p=p,
                                         size=num_to_add,
                                         replace=False):
                fg_rels.append(fg_rels_i[rel_to_add])

        fg_rels = np.array(fg_rels, dtype=np.int64)
        if fg_rels.size > 0 and fg_rels.shape[0] > fg_rels_per_image:
            fg_rels = fg_rels[npr.choice(fg_rels.shape[0],
                                         size=fg_rels_per_image,
                                         replace=False)]
        elif fg_rels.size == 0:
            fg_rels = np.zeros((0, 3), dtype=np.int64)

        bg_rels = np.column_stack(np.where(rel_possibilities))
        bg_rels = np.column_stack(
            (bg_rels, np.zeros(bg_rels.shape[0], dtype=np.int64)))

        num_bg_rel = min(cfg.TRAIN.RELS_PER_IMG_REFINE - fg_rels.shape[0],
                         bg_rels.shape[0])
        if bg_rels.size > 0:
            # Sample 4x as many intersecting relationships as non-intersecting.
            # bg_rels_intersect = rels_intersect[bg_rels[:, 0], bg_rels[:, 1]]
            # p = bg_rels_intersect.astype(np.float32)
            # p[bg_rels_intersect == 0] = 0.2
            # p[bg_rels_intersect == 1] = 0.8
            # p /= p.sum()
            bg_rels = bg_rels[np.random.choice(
                bg_rels.shape[0],
                #p=p,
                size=num_bg_rel,
                replace=False)]
        else:
            bg_rels = np.zeros((0, 3), dtype=np.int64)

        if fg_rels.size == 0 and bg_rels.size == 0:
            # Just put something here
            bg_rels = np.array([[0, 0, 0]], dtype=np.int64)

        # print("GTR {} -> AR {} vs {}".format(gt_rels.shape, fg_rels.shape, bg_rels.shape))
        all_rels_i = np.concatenate((fg_rels, bg_rels), 0)
        all_rels_i[:, 0:2] += num_box_seen

        all_rels_i = all_rels_i[np.lexsort((all_rels_i[:, 1], all_rels_i[:,
                                                                         0]))]

        rel_labels.append(
            np.column_stack((
                i * np.ones(all_rels_i.shape[0], dtype=np.int64),
                all_rels_i,
            )))

        num_box_seen += pred_rois_i.shape[0]

    rel_labels = np.concatenate(rel_labels, 0)
    return rel_labels[:, :-1], rel_labels