Ejemplo n.º 1
0
def compute_proposal_targets(proposals,
                             cfg,
                             ground_truth_bboxes,
                             image_info,
                             ignore_regions=None,
                             use_ohem=False):
    '''
    :argument
        proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2
        ground_truth_bboxes: [batch, max_num_gts, k], k>=5, x1,y1,x2,y2,label
    returns:
        rois: [N, 5]:
        cls_targets: [N, num_classes]
        loc_targets, loc_weights: [N, num_classes * 4]
    '''
    proposals, ground_truth_bboxes, image_info, ignore_regions = \
        map(to_np_array, [proposals, ground_truth_bboxes, image_info, ignore_regions])
    B = ground_truth_bboxes.shape[0]
    logger.debug('proposals.shape:{}'.format(proposals.shape))
    logger.debug('ground_truth_bboxes.shape:{}'.format(
        ground_truth_bboxes.shape))
    batch_rois = []
    batch_labels = []
    batch_loc_targets = []
    batch_loc_weights = []
    for b_ix in range(B):
        rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4]
        gts = ground_truth_bboxes[b_ix]
        # kick out padded empty ground truth bboxes
        #gts = gts[gts[:, 2] > gts[:, 0] + 1]
        gts = gts[(gts[:, 2] > gts[:, 0] + 1) & (gts[:, 3] > gts[:, 1] + 1)]
        if cfg['append_gts']:
            rois = np.vstack([rois, gts[:, :4]])
        rois = bbox_helper.clip_bbox(rois, image_info[b_ix])
        R = rois.shape[0]
        G = gts.shape[0]
        if R == 0 or G == 0: continue
        #[R, G]
        overlaps = bbox_helper.bbox_iou_overlaps(rois, gts)

        # (i) the anchor with the highest Intersection-over-Union (IoU)
        # overlap with a ground-truth box is positive
        # [G]
        #gt_max_overlaps = overlaps.max(axis=0)
        #gt_max_overlaps[gt_max_overlaps < 0.1] = -1
        #gt_pos_r_ix, gt_pos_g_ix = np.where(overlaps == gt_max_overlaps[np.newaxis, :])

        # (ii) an anchor that has an IoU overlap higher than positive_iou_thresh
        # with any ground-truth box is positive
        # [R]
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0]
        pos_g_ix = argmax_overlaps[pos_r_ix]

        # merge pos_r_ix & gt_pos_b_ix
        #pos_r_ix = np.concatenate([pos_r_ix, gt_pos_r_ix])
        #pos_g_ix = np.concatenate([pos_g_ix, gt_pos_g_ix])
        # remove duplicate positives
        pos_r_ix, return_index = np.unique(pos_r_ix, return_index=True)
        pos_g_ix = pos_g_ix[return_index]

        # (iii) We assign a negative label to a non-positive anchor if its IoU ratio
        # is between [negative_iou_thresh_lo, negative_iou_thresh_low] for all ground-truth boxes
        neg_r_ix = np.where((max_overlaps < cfg['negative_iou_thresh_hi'])
                            &
                            (max_overlaps >= cfg['negative_iou_thresh_lo']))[0]

        # remove negatives which located in ignore regions
        if ignore_regions is not None:
            cur_ignore = ignore_regions[b_ix]
            # remove padded ignore regions
            cur_ignore = cur_ignore[cur_ignore[:, 2] - cur_ignore[:, 0] > 1]
            if cur_ignore.shape[0] > 0:
                iof_overlaps = bbox_helper.bbox_iof_overlaps(rois, cur_ignore)
                max_iof_overlaps = iof_overlaps.max(axis=1)  # [B, K*A]
                ignore_rois_ix = np.where(
                    max_iof_overlaps > cfg['ignore_iou_thresh'])[0]
                neg_r_ix = np.array(list(set(neg_r_ix) - set(ignore_rois_ix)))

        # remove positives(rule (i)) from negatives
        neg_r_ix = np.array(list(set(neg_r_ix) - set(pos_r_ix)))

        #sampling
        num_positives = len(pos_r_ix)

        batch_size_per_image = cfg['batch_size']

        # keep all pos and negs if use OHEM
        if not use_ohem:
            num_pos_sampling = int(cfg['positive_percent'] *
                                   batch_size_per_image)
            if num_pos_sampling < num_positives:
                keep_ix = np.random.choice(num_positives,
                                           size=num_pos_sampling,
                                           replace=False)
                pos_r_ix = pos_r_ix[keep_ix]
                pos_g_ix = pos_g_ix[keep_ix]
                num_positives = num_pos_sampling

            num_negatives = len(neg_r_ix)
            num_neg_sampling = batch_size_per_image - num_positives
            if num_neg_sampling < num_negatives:
                keep_ix = np.random.choice(num_negatives,
                                           size=num_neg_sampling,
                                           replace=False)
                neg_r_ix = neg_r_ix[keep_ix]
                num_negatives = num_neg_sampling
            #else:
            #    keep_ix = np.random.choice(num_negatives, size = num_neg_sampling, replace = True)
            #    neg_r_ix = neg_r_ix[keep_ix]
            #    num_negatives = num_neg_sampling

        # convert neg_r_ix, pos_r_ix and pos_g_ix from np.array to list in case of *_ix == np.array([])
        # which can't index np.array
        pos_r_ix = list(pos_r_ix)
        pos_g_ix = list(pos_g_ix)
        neg_r_ix = list(neg_r_ix)
        # gather positives, matched gts, and negatives
        pos_rois = rois[pos_r_ix]
        pos_target_gts = gts[pos_g_ix]
        neg_rois = rois[neg_r_ix]
        rois_sampling = np.vstack([pos_rois, neg_rois])
        num_pos, num_neg = pos_rois.shape[0], neg_rois.shape[0]
        num_sampling = num_pos + num_neg

        # generate targets
        pos_labels = pos_target_gts[:, 4].astype(np.int32)
        neg_labels = np.zeros(num_neg)
        labels = np.concatenate([pos_labels, neg_labels]).astype(np.int32)

        loc_targets = np.zeros([num_sampling, cfg['num_classes'], 4])
        loc_weights = np.zeros([num_sampling, cfg['num_classes'], 4])
        pos_loc_targets = bbox_helper.compute_loc_targets(
            pos_rois, pos_target_gts)
        if cfg['bbox_normalize_stats_precomputed']:
            pos_loc_targets = (pos_loc_targets - np.array(cfg['bbox_normalize_means'])[np.newaxis, :]) \
                              / np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]
        loc_targets[range(num_pos), pos_labels, :] = pos_loc_targets
        loc_weights[range(num_pos), pos_labels, :] = 1
        loc_targets = loc_targets.reshape([num_sampling, -1])
        loc_weights = loc_weights.reshape([num_sampling, -1])

        batch_ix = np.full(rois_sampling.shape[0], b_ix)
        rois_sampling = np.hstack([batch_ix[:, np.newaxis], rois_sampling])

        if rois_sampling.shape[0] < batch_size_per_image:
            rep_num = batch_size_per_image - rois_sampling.shape[0]
            rep_index = np.random.choice(rois_sampling.shape[0],
                                         size=rep_num,
                                         replace=True)
            rois_sampling = np.vstack(
                [rois_sampling, rois_sampling[rep_index]])
            labels = np.concatenate([labels, labels[rep_index]])
            loc_targets = np.vstack([loc_targets, loc_targets[rep_index]])
            loc_weights = np.vstack([loc_weights, loc_weights[rep_index]])

        batch_rois.append(rois_sampling)
        batch_labels.append(labels)
        batch_loc_targets.append(loc_targets)
        batch_loc_weights.append(loc_weights)

    pos_num = np.where(np.concatenate(batch_labels) > 0)[0].shape[0]
    neg_num = np.concatenate(batch_labels).shape[0] - pos_num
    history[0] += pos_num
    history[1] += neg_num
    history_pos, history_neg = history
    pos_percent = history_pos / (history_neg + history_pos)
    neg_percent = history_neg / (history_neg + history_pos)
    logger.debug(
        'proposal_target(pos/neg): %d=%d+%d, history ratio:%.5f/%.5f' %
        (pos_num + neg_num, pos_num, neg_num, pos_percent, neg_percent))

    batch_rois = (torch.from_numpy(
        np.vstack(batch_rois))).float().cuda().contiguous()
    batch_labels = (torch.from_numpy(
        np.concatenate(batch_labels))).long().cuda().contiguous()
    batch_loc_targets = (torch.from_numpy(
        np.vstack(batch_loc_targets))).float().cuda().contiguous()
    batch_loc_weights = (torch.from_numpy(
        np.vstack(batch_loc_weights))).float().cuda().contiguous()

    return batch_rois, batch_labels, batch_loc_targets, batch_loc_weights
Ejemplo n.º 2
0
def compute_mask_targets(proposals,
                         cfg,
                         ground_truth_bboxes,
                         ground_truth_masks,
                         image_info,
                         ignore_regions=None):
    '''
    Args:
        proposals:[N, k], k>=5(b_ix, x1,y1,x2,y2, ...)
        ground_truth_bboxes: [batch_size, max_gts, k], k>=5(x1,y1,x2,y2,label)
        ground_truth_masks: [batch_size, max_gts, image_h, image_w]
        image_info: [batch_size, 3], (resized_image_h, resized_image_w, resize_scale)
    Return:
        batch_rois: [R, 5] (b_ix, x1,y1,x2,y2)
        batch_kpt_labels: [R, num_classes, label_h, label_w]
    '''
    proposals_device = proposals.device
    proposals = to_np_array(proposals)
    ground_truth_bboxes = to_np_array(ground_truth_bboxes)
    ground_truth_masks = to_np_array(ground_truth_masks)
    image_info = to_np_array(image_info)
    ignore_regions = to_np_array(ignore_regions)

    B = ground_truth_bboxes.shape[0]
    batch_rois = []
    batch_mask_labels = []

    for b_ix in range(B):
        rois = proposals[proposals[:, 0] == b_ix][:, 1:1 + 4]
        gts = ground_truth_bboxes[b_ix]
        masks = ground_truth_masks[b_ix]
        # kick out padded gts
        keep_ix = np.where(gts[:, 2] > gts[:, 1] + 1)[0]
        if keep_ix.size == 0: continue
        gts = gts[keep_ix]
        masks = masks[keep_ix]
        if cfg['append_gts']:
            rois = np.vstack([rois, gts[:, :4]])
        rois = bbox_helper.clip_bbox(rois.astype(np.int32),
                                     image_info[b_ix].astype(np.int32))
        R = rois.shape[0]
        G = gts.shape[0]
        if R == 0 or G == 0: continue
        # [R, G]
        overlaps = bbox_helper.bbox_iou_overlaps(rois, gts)
        # [R]
        # (i): a roi that has an IoU higher than than positive_iou_thresh is postive
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        pos_r_ix = np.where(max_overlaps > cfg['positive_iou_thresh'])[0]
        pos_g_ix = argmax_overlaps[pos_r_ix]

        # sampling
        num_positives = pos_r_ix.shape[0]
        if num_positives == 0: continue
        if cfg['batch_size_per_image'] > 0 and num_positives > cfg[
                'batch_size_per_image']:
            keep_ix = np.random.choice(num_positives,
                                       size=cfg['batch_size_per_image'],
                                       replace=False)
            pos_r_ix = pos_r_ix[keep_ix]
            pos_g_ix = pos_g_ix[keep_ix]

        # gather positive bboxes and related masks
        pos_rois = rois[pos_r_ix]
        pos_target_classes = gts[pos_g_ix][:, 4].astype(np.int64)
        pos_target_masks = masks[pos_g_ix]
        N = pos_rois.shape[0]
        pos_mask_labels = generate_mask_labels(pos_rois, pos_target_masks,
                                               cfg['label_h'], cfg['label_w'])

        mask_labels = -np.ones(
            (N, cfg['num_classes'], cfg['label_h'], cfg['label_w']))
        mask_labels[range(N), pos_target_classes, ...] = pos_mask_labels

        batch_idx = np.full((N, 1), b_ix)
        pos_rois = np.hstack(
            [batch_idx, pos_rois, pos_target_classes[:, np.newaxis]])

        batch_rois.append(pos_rois)
        batch_mask_labels.append(mask_labels)
    if len(batch_rois) == 0:
        # if there's no positive rois, pad zeros
        n = 1
        batch_rois = np.zeros((n, 5), dtype=np.float32)
        batch_mask_labels = -np.ones(
            (n, cfg['num_classes'], cfg['label_h'], cfg['label_w']),
            dtype=np.float32)
    else:
        batch_rois = np.vstack(batch_rois)
        batch_mask_labels = np.vstack(batch_mask_labels)

    # debug
    #import os
    #import torch.distributed as dist
    #vis_mask = 'vis_mask'
    #if not os.path.exists(vis_mask):
    #    os.makedirs(vis_mask)
    #for i, roi in enumerate(batch_rois):
    #    b_ix, x1, y1, x2, y2, cls = map(int, roi[:6])
    #    roi_w = x2 - x1
    #    roi_h = y2 - y1
    #    img = debugger.get_image(b_ix).copy()
    #    filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0]
    #    mask = batch_mask_labels[i, cls]
    #    mask = cv2.resize(mask, (roi_w, roi_h)) * 100
    #    img[y1:y2, x1:x2, ...] += mask[..., np.newaxis]
    #    vis_helper.draw_bbox(img, roi[1:1+4])
    #    cv2.imwrite('vis_mask/{0}_{1}.jpg'.format(filename, i), img)
    cuda_device = proposals_device
    f = lambda x: (torch.from_numpy(x)).to(cuda_device)
    batch_rois = f(batch_rois).float()
    batch_mask_labels = f(batch_mask_labels).float()
    return batch_rois, batch_mask_labels
Ejemplo n.º 3
0
def compute_anchor_targets(feature_size,
                           cfg,
                           ground_truth_bboxes,
                           image_info,
                           ignore_regions=None):
    r'''
    :argument
        cfg.keys(): {
            'anchor_ratios', anchor_scales, anchor_stride,
            negative_iou_thresh, ignore_iou_thresh,positive_iou_thresh,
            positive_percent, rpn_batch_size
        }
        feature_size: IntTensor, [4]. i.e. batch, num_anchors * 4, height, width
        ground_truth_bboxes: FloatTensor, [batch, max_num_gt_bboxes, 5]
        image_info: FloatTensor, [batch, 3]
        ignore_regions: FloatTensor, [batch, max_num_ignore_regions, 4]
    :returns
        cls_targets: Variable, [batch, num_anchors * 1, height, width]
        loc_targets, loc_masks: Variable, [batch, num_anchors * 4, height, width]
    '''
    ground_truth_bboxes, image_info, ignore_regions = \
        map(to_np_array, [ground_truth_bboxes, image_info, ignore_regions])

    batch_size, num_anchors_4, featmap_h, featmap_w = feature_size
    num_anchors = num_anchors_4 // 4
    assert (num_anchors * 4 == num_anchors_4)
    # [K*A, 4]
    anchors_overplane = anchor_helper.get_anchors_over_plane(
        featmap_h, featmap_w, cfg['anchor_ratios'], cfg['anchor_scales'],
        cfg['anchor_stride'])

    B = batch_size
    A = num_anchors
    K = featmap_h * featmap_w
    G = ground_truth_bboxes.shape[1]
    #logger.info("the number of gts is {}".format(G))
    labels = np.zeros([B, K * A], dtype=np.int64)
    if G != 0:
        # compute overlaps between anchors and gt_bboxes within each batch
        # shape: [B, K*A, G]
        overlaps = np.stack([
            bbox_helper.bbox_iou_overlaps(
                anchors_overplane, ground_truth_bboxes[ix]) for ix in range(B)
        ],
                            axis=0)

        # shape of [B, K*A]
        argmax_overlaps = overlaps.argmax(axis=2)
        max_overlaps = overlaps.max(axis=2)

        # [B, G]
        gt_max_overlaps = overlaps.max(axis=1)
        # ignore thoese gt_max_overlap too small
        gt_max_overlaps[gt_max_overlaps < 0.1] = -1
        gt_argmax_b_ix, gt_argmax_ka_ix, gt_argmax_g_ix = \
            np.where(overlaps == gt_max_overlaps[:, np.newaxis, :])
        # match each anchor to the ground truth bbox
        argmax_overlaps[gt_argmax_b_ix, gt_argmax_ka_ix] = gt_argmax_g_ix

        labels[max_overlaps < cfg['negative_iou_thresh']] = 0

        # remove negatives located in ignore regions
        if ignore_regions is not None:
            #logger.info('Anchor Ignore')
            iof_overlaps = np.stack([
                bbox_helper.bbox_iof_overlaps(
                    anchors_overplane, ignore_regions[ix]) for ix in range(B)
            ],
                                    axis=0)
            max_iof_overlaps = iof_overlaps.max(axis=2)  # [B, K*A]
            labels[max_iof_overlaps > cfg['ignore_iou_thresh']] = -1

        labels[gt_argmax_b_ix, gt_argmax_ka_ix] = 1
        labels[max_overlaps > cfg['positive_iou_thresh']] = 1
    # sampling
    num_pos_sampling = int(cfg['positive_percent'] * cfg['rpn_batch_size'] *
                           batch_size)
    pos_b_ix, pos_ka_ix = np.where(labels > 0)
    num_positives = len(pos_b_ix)
    if num_positives > num_pos_sampling:
        remove_ix = np.random.choice(num_positives,
                                     size=num_positives - num_pos_sampling,
                                     replace=False)
        labels[pos_b_ix[remove_ix], pos_ka_ix[remove_ix]] = -1
        num_positives = num_pos_sampling
    num_neg_sampling = cfg['rpn_batch_size'] * batch_size - num_positives
    neg_b_ix, neg_ka_ix = np.where(labels == 0)
    num_negatives = len(neg_b_ix)
    if num_negatives > num_neg_sampling:
        remove_ix = np.random.choice(num_negatives,
                                     size=num_negatives - num_neg_sampling,
                                     replace=False)
        labels[neg_b_ix[remove_ix], neg_ka_ix[remove_ix]] = -1

    loc_targets = np.zeros([B, K * A, 4], dtype=np.float32)
    loc_masks = np.zeros([B, K * A, 4], dtype=np.float32)
    if G != 0:
        pos_b_ix, pos_ka_ix = np.where(labels > 0)
        pos_anchors = anchors_overplane[pos_ka_ix, :]

        pos_target_ix = argmax_overlaps[pos_b_ix, pos_ka_ix]
        pos_target_gt = ground_truth_bboxes[pos_b_ix, pos_target_ix]
        pos_loc_targets = bbox_helper.compute_loc_targets(
            pos_anchors, pos_target_gt)

        loc_targets[pos_b_ix, pos_ka_ix, :] = pos_loc_targets
        # loc_weights = np.zeros([B, K*A, 4])
        loc_masks[pos_b_ix, pos_ka_ix, :] = 1.

    # transpose to match the predicted convolution shape
    cls_targets = Variable(
        torch.from_numpy(labels).long().view(B,
                                             featmap_h, featmap_w, A).permute(
                                                 0, 3, 1,
                                                 2)).cuda().contiguous()
    loc_targets = Variable(
        torch.from_numpy(loc_targets).float().view(B, featmap_h,
                                                   featmap_w, A * 4).permute(
                                                       0, 3, 1,
                                                       2)).cuda().contiguous()
    loc_masks = Variable(
        torch.from_numpy(loc_masks).float().view(B, featmap_h,
                                                 featmap_w, A * 4).permute(
                                                     0, 3, 1,
                                                     2)).cuda().contiguous()
    loc_nomalizer = max(1, len(np.where(labels >= 0)[0]))
    logger.debug('positive anchors:%d' % len(pos_b_ix))
    return cls_targets, loc_targets, loc_masks, loc_nomalizer