Ejemplo n.º 1
0
def _compute_targets(entry):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    rois = entry['boxes']
    overlaps = entry['max_overlaps']
    labels = entry['max_classes']
    gt_inds = np.where((entry['gt_classes'] > 0) & (entry['is_crowd'] == 0))[0]
    # Targets has format (class, tx, ty, tw, th)
    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return targets

    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= config.train.bbox_thresh)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = box_utils.bbox_overlaps(
        rois[ex_inds, :].astype(dtype=np.float32, copy=False),
        rois[gt_inds, :].astype(dtype=np.float32, copy=False))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]
    # Use class "1" for all boxes if using class_agnostic_bbox_reg
    targets[ex_inds, 0] = (1 if config.network.cls_agnostic_bbox_reg else
                           labels[ex_inds])
    targets[ex_inds,
            1:] = box_utils.bbox_transform_inv(ex_rois, gt_rois,
                                               config.network.bbox_reg_weights)
    return targets
Ejemplo n.º 2
0
def _merge_proposal_boxes_into_roidb(roidb, box_list):
    """Add proposal boxes to each roidb entry."""
    assert len(box_list) == len(roidb)
    for i, entry in enumerate(roidb):
        boxes = box_list[i]
        num_boxes = boxes.shape[0]
        gt_overlaps = np.zeros((num_boxes, entry['gt_overlaps'].shape[1]),
                               dtype=entry['gt_overlaps'].dtype)
        box_to_gt_ind_map = -np.ones(
            (num_boxes, ), dtype=entry['box_to_gt_ind_map'].dtype)

        # Note: unlike in other places, here we intentionally include all gt
        # rois, even ones marked as crowd. Boxes that overlap with crowds will
        # be filtered out later (see: _filter_crowd_proposals).
        gt_inds = np.where(entry['gt_classes'] > 0)[0]
        if len(gt_inds) > 0:
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_classes = entry['gt_classes'][gt_inds]
            proposal_to_gt_overlaps = box_utils.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            # Gt box that overlaps each input box the most
            # (ties are broken arbitrarily by class order)
            argmaxes = proposal_to_gt_overlaps.argmax(axis=1)
            # Amount of that overlap
            maxes = proposal_to_gt_overlaps.max(axis=1)
            # Those boxes with non-zero overlap with gt boxes
            I = np.where(maxes > 0)[0]
            # Record max overlaps with the class of the appropriate gt box
            gt_overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
            box_to_gt_ind_map[I] = gt_inds[argmaxes[I]]
        entry['boxes'] = np.append(entry['boxes'],
                                   boxes.astype(entry['boxes'].dtype,
                                                copy=False),
                                   axis=0)
        entry['gt_classes'] = np.append(
            entry['gt_classes'],
            np.zeros((num_boxes, ), dtype=entry['gt_classes'].dtype))
        entry['seg_areas'] = np.append(
            entry['seg_areas'],
            np.zeros((num_boxes, ), dtype=entry['seg_areas'].dtype))
        entry['gt_overlaps'] = np.append(entry['gt_overlaps'].toarray(),
                                         gt_overlaps,
                                         axis=0)
        entry['gt_overlaps'] = scipy.sparse.csr_matrix(entry['gt_overlaps'])
        entry['is_crowd'] = np.append(
            entry['is_crowd'],
            np.zeros((num_boxes, ), dtype=entry['is_crowd'].dtype))
        entry['box_to_gt_ind_map'] = np.append(
            entry['box_to_gt_ind_map'],
            box_to_gt_ind_map.astype(entry['box_to_gt_ind_map'].dtype,
                                     copy=False))
Ejemplo n.º 3
0
    def forward(self, rois, bbox_pred, cls_score, label, gt_rois, cls_idx,
                seg_gt, mask_gt, im_shape):

        rois = rois.data.cpu().numpy()
        bbox_pred = bbox_pred.data.cpu().numpy()
        cls_score = cls_score.data.cpu().numpy()
        cls_pred = np.argmax(cls_score, axis=1)
        label = label.data.cpu().numpy()
        gt_rois = gt_rois.cpu().numpy()

        rois = rois[:, 1:]

        bbox_overlap = bbox_overlaps(rois, gt_rois[:, 1:])  # #rois x #gt_rois
        max_bbox_overlap = np.argmax(bbox_overlap, axis=1)
        max_overlap = np.ones((gt_rois.shape[0]), dtype=np.int32) * -1

        matched_gt = torch.ones_like(seg_gt) * -1
        matched_gt = torch.where(
            seg_gt <=
            config.dataset.num_seg_classes - config.dataset.num_classes,
            seg_gt, matched_gt)
        matched_gt = torch.where(seg_gt >= 255, seg_gt, matched_gt)

        keep = np.ones((rois.shape[0]), dtype=np.int32)

        for i in range(rois.shape[0]):
            if bbox_overlap[i, max_bbox_overlap[i]] > 0.5:
                if max_overlap[max_bbox_overlap[i]] == -1:
                    max_overlap[max_bbox_overlap[i]] = i
                elif bbox_overlap[max_overlap[max_bbox_overlap[i]],
                                  max_bbox_overlap[i]] > bbox_overlap[
                                      i, max_bbox_overlap[i]]:
                    keep[i] = 0
                else:
                    keep[max_overlap[max_bbox_overlap[i]]] = 0
                    max_overlap[max_bbox_overlap[i]] = i
            elif cls_pred[i] == 0 and label[i] == 0:
                keep[i] = 0

        rois = rois[keep != 0]
        rois = np.hstack((np.zeros((rois.shape[0], 1)), rois))
        label = label[keep != 0]

        keep = np.cumsum(keep)
        if keep[-1] == 0:
            print(max_overlap)
            print(max_bbox_overlap)
            print(cls_pred)
            assert keep[-1] != 0

        for i in range(max_overlap.shape[0]):
            if max_overlap[i] != -1:
                roi = np.round(rois[keep[max_overlap[i]] - 1] / 4)
                mask_gt_i = mask_gt[[i]]
                matched_gt[mask_gt_i != 0] = int(keep[max_overlap[i]] - 1 +
                                                 self.num_seg_classes -
                                                 self.num_inst_classes)

        if config.train.panoptic_box_keep_fraction < 1:
            matched_gt[
                matched_gt ==
                -1] = self.num_seg_classes - self.num_inst_classes + rois.shape[
                    0]
        else:
            matched_gt[matched_gt == -1] = 255

        return torch.from_numpy(rois).to(
            matched_gt.device), torch.from_numpy(label).to(
                matched_gt.device), matched_gt
Ejemplo n.º 4
0
def _get_rpn_blobs(im_height, im_width, foas, all_anchors, gt_boxes):
    total_anchors = all_anchors.shape[0]
    straddle_thresh = config.train.rpn_straddle_thresh

    if straddle_thresh >= 0:
        # Only keep anchors inside the image by a margin of straddle_thresh
        # Set TRAIN.RPN_STRADDLE_THRESH to -1 (or a large value) to keep all
        # anchors
        inds_inside = np.where(
            (all_anchors[:, 0] >= -straddle_thresh)
            & (all_anchors[:, 1] >= -straddle_thresh)
            & (all_anchors[:, 2] < im_width + straddle_thresh)
            & (all_anchors[:, 3] < im_height + straddle_thresh))[0]
        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
    else:
        inds_inside = np.arange(all_anchors.shape[0])
        anchors = all_anchors
    num_inside = len(inds_inside)

    # Compute anchor labels:
    # label=1 is positive, 0 is negative, -1 is don't care (ignore)
    labels = np.empty((num_inside, ), dtype=np.int32)
    labels.fill(-1)
    if len(gt_boxes) > 0:
        # Compute overlaps between the anchors and the gt boxes overlaps
        anchor_by_gt_overlap = bbox_overlaps(anchors, gt_boxes)
        # Map from anchor to gt box that has highest overlap
        anchor_to_gt_argmax = anchor_by_gt_overlap.argmax(axis=1)
        # For each anchor, amount of overlap with most overlapping gt box
        anchor_to_gt_max = anchor_by_gt_overlap[np.arange(num_inside),
                                                anchor_to_gt_argmax]

        # Map from gt box to an anchor that has highest overlap
        gt_to_anchor_argmax = anchor_by_gt_overlap.argmax(axis=0)
        # For each gt box, amount of overlap with most overlapping anchor
        gt_to_anchor_max = anchor_by_gt_overlap[
            gt_to_anchor_argmax,
            np.arange(anchor_by_gt_overlap.shape[1])]
        # Find all anchors that share the max overlap amount
        # (this includes many ties)
        anchors_with_max_overlap = np.where(
            anchor_by_gt_overlap == gt_to_anchor_max)[0]

        # Fg label: for each gt use anchors with highest overlap
        # (including ties)
        labels[anchors_with_max_overlap] = 1
        # Fg label: above threshold IOU
        labels[anchor_to_gt_max >= config.train.rpn_positive_overlap] = 1

    # subsample positive labels if we have too many
    num_fg = int(config.train.rpn_fg_fraction * config.train.rpn_batch_size)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = np.random.choice(fg_inds,
                                        size=(len(fg_inds) - num_fg),
                                        replace=False)
        # print('assign_anchor debug use first 128 fg')
        # labels[fg_inds[-(len(fg_inds) - num_fg):]] = -1
        labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    # subsample negative labels if we have too many
    # (samples with replacement, but since the set of bg inds is large most
    # samples will not have repeats)
    num_bg = config.train.rpn_batch_size - np.sum(labels == 1)
    bg_inds = np.where(anchor_to_gt_max < config.train.rpn_negative_overlap)[0]
    if len(bg_inds) > num_bg:
        # enable_inds = bg_inds[np.random.randint(len(bg_inds), size=num_bg)]
        enable_inds = bg_inds[np.random.choice(len(bg_inds),
                                               num_bg,
                                               replace=False)]
        # print('assign_anchor debug use first 128 bg')
        # labels[bg_inds[:num_bg]] = 0
        labels[enable_inds] = 0
    bg_inds = np.where(labels == 0)[0]

    bbox_targets = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_targets[fg_inds, :] = compute_targets(
        anchors[fg_inds, :], gt_boxes[anchor_to_gt_argmax[fg_inds], :])

    # Bbox regression loss has the form:
    #   loss(x) = weight_outside * L(weight_inside * x)
    # Inside weights allow us to set zero loss on an element-wise basis
    # Bbox regression is only trained on positive examples so we set their
    # weights to 1.0 (or otherwise if config is different) and 0 otherwise
    bbox_inside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = (1.0, 1.0, 1.0, 1.0)

    # The bbox regression loss only averages by the number of images in the
    # mini-batch, whereas we need to average by the total number of example
    # anchors selected
    # Outside weights are used to scale each element-wise loss so the final
    # average over the mini-batch is correct
    bbox_outside_weights = np.zeros((num_inside, 4), dtype=np.float32)
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    bbox_outside_weights[labels == 1, :] = 1.0 / num_examples
    bbox_outside_weights[labels == 0, :] = 1.0 / num_examples

    # Map up to original set of anchors
    labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = unmap(bbox_inside_weights,
                                total_anchors,
                                inds_inside,
                                fill=0)
    bbox_outside_weights = unmap(bbox_outside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)

    # Split the generated labels, etc. into labels per each field of anchors
    blobs_out = []
    start_idx = 0
    for foa in foas:
        H = foa.field_size
        W = foa.field_size
        A = foa.num_cell_anchors
        end_idx = start_idx + H * W * A
        _labels = labels[start_idx:end_idx]
        _bbox_targets = bbox_targets[start_idx:end_idx, :]
        _bbox_inside_weights = bbox_inside_weights[start_idx:end_idx, :]
        _bbox_outside_weights = bbox_outside_weights[start_idx:end_idx, :]
        start_idx = end_idx

        # labels output with shape (1, A, height, width)
        _labels = _labels.reshape((1, H, W, A)).transpose(0, 3, 1, 2)
        # bbox_targets output with shape (1, 4 * A, height, width)
        _bbox_targets = _bbox_targets.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_inside_weights output with shape (1, 4 * A, height, width)
        _bbox_inside_weights = _bbox_inside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        # bbox_outside_weights output with shape (1, 4 * A, height, width)
        _bbox_outside_weights = _bbox_outside_weights.reshape(
            (1, H, W, A * 4)).transpose(0, 3, 1, 2)
        blobs_out.append(
            dict(rpn_labels_int32_wide=_labels,
                 rpn_bbox_targets_wide=_bbox_targets,
                 rpn_bbox_inside_weights_wide=_bbox_inside_weights,
                 rpn_bbox_outside_weights_wide=_bbox_outside_weights))
    return blobs_out[0] if len(blobs_out) == 1 else blobs_out
Ejemplo n.º 5
0
def assign_anchor(feat_shape,
                  gt_boxes,
                  im_info,
                  feat_stride=16,
                  scales=(8, 16, 32),
                  ratios=(0.5, 1, 2),
                  allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: dict of label
    'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    'bbox_inside_weight': *todo* mark the assigned anchors
    'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    scales = np.array(scales, dtype=np.float32)
    base_anchors = generate_anchors(base_size=feat_stride,
                                    ratios=list(ratios),
                                    scales=scales)
    num_anchors = base_anchors.shape[0]
    feat_height, feat_width = feat_shape[-2:]

    # 1. generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, feat_width) * feat_stride
    shift_y = np.arange(0, feat_height) * feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = num_anchors
    K = shifts.shape[0]
    all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where((all_anchors[:, 0] >= -allowed_border)
                           & (all_anchors[:, 1] >= -allowed_border)
                           & (all_anchors[:, 2] < im_info[1] + allowed_border)
                           & (all_anchors[:,
                                          3] < im_info[0] + allowed_border))[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not config.train.rpn_clobber_positives:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < config.train.rpn_negative_overlap] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= config.train.rpn_positive_overlap] = 1

        if config.train.rpn_clobber_positives:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < config.train.rpn_negative_overlap] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(config.train.rpn_fg_fraction * config.train.rpn_batch_size)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        # if DEBUG:
        #     disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = config.train.rpn_batch_size - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(config.train.rpn_bbox_weights)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    labels = labels.reshape(
        (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, A * feat_height * feat_width))
    bbox_targets = bbox_targets.reshape(
        (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2)
    bbox_weights = bbox_weights.reshape(
        (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2))

    label = {
        'label': labels,
        'bbox_target': bbox_targets,
        'bbox_weight': bbox_weights
    }
    return label
Ejemplo n.º 6
0
def assign_pyramid_anchor(gt_boxes,
                          im_info,
                          feat_strides=(64, 32, 16, 8, 4),
                          scales=(8, ),
                          ratios=(0.5, 1, 2),
                          allowed_border=0):
    """
    assign ground truth boxes to anchor positions
    :param feat_shape: infer output shape
    :param gt_boxes: assign ground truth
    :param im_info: filter out anchors overlapped with edges
    :param feat_stride: anchor position step
    :param scales: used to generate anchors, affects num_anchors (per location)
    :param ratios: aspect ratios of generated anchors
    :param allowed_border: filter out anchors with edge overlap > allowed_border
    :return: tuple
    labels: of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width)
    bbox_targets: of shape (batch_size, num_anchors * 4, feat_height, feat_width)
    bbox_weights: mark the assigned anchors
    """
    def _unmap(data, count, inds, fill=0):
        """" unmap a subset inds of data into original data of size count """
        if len(data.shape) == 1:
            ret = np.empty((count, ), dtype=np.float32)
            ret.fill(fill)
            ret[inds] = data
        else:
            ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
            ret.fill(fill)
            ret[inds, :] = data
        return ret

    DEBUG = False
    im_info = im_info[0]
    # clean up boxes
    nonneg = np.where(gt_boxes[:, 4] != -1)[0]
    gt_boxes = gt_boxes[nonneg]
    scales = np.array(scales, dtype=np.float32)

    anchors_list = []
    anchors_num_list = []
    inds_inside_list = []
    feat_infos = []
    A_list = []
    for i in range(len(feat_strides)):
        base_anchors = generate_anchors(base_size=feat_strides[i],
                                        ratios=list(ratios),
                                        scales=scales)
        num_anchors = base_anchors.shape[0]
        # feat_height, feat_width = feat_shape[i][-2:]
        feat_height, feat_width, s = im_info[0], im_info[1], feat_strides[i]
        s = s // 4
        feat_height, feat_width = int(np.ceil(feat_height / 2)) // 2, int(
            np.ceil(feat_width / 2)) // 2,
        while s > 1:
            feat_height, feat_width = int(np.ceil(feat_height / 2)), int(
                np.ceil(feat_width / 2))
            s = s // 2
        feat_stride = feat_strides[i]
        feat_infos.append([feat_height, feat_width])

        A = num_anchors
        A_list.append(A)
        K = feat_height * feat_width

        # shift_x = np.arange(0, feat_width) * feat_stride
        # shift_y = np.arange(0, feat_height) * feat_stride
        # shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        # shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        # all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        all_anchors = anchors_cython(feat_height, feat_width, feat_stride,
                                     base_anchors)
        all_anchors = all_anchors.reshape((K * A, 4))

        total_anchors = int(K * A)
        anchors_num_list.append(total_anchors)
        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -allowed_border)
            & (all_anchors[:, 1] >= -allowed_border)
            & (all_anchors[:, 2] < im_info[1] + allowed_border)
            & (all_anchors[:, 3] < im_info[0] + allowed_border))[0]
        if DEBUG:
            print('total_anchors', total_anchors)
            print('inds_inside', len(inds_inside))

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        anchors_list.append(anchors)
        inds_inside_list.append(inds_inside)

    # Concat anchors from each level
    anchors = np.concatenate(anchors_list)
    for i in range(1, len(inds_inside_list)):
        inds_inside_list[i] = inds_inside_list[i] + sum(anchors_num_list[:i])
    inds_inside = np.concatenate(inds_inside_list)
    total_anchors = sum(anchors_num_list)

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    if gt_boxes.size > 0:
        # overlap between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(anchors.astype(np.float),
                                 gt_boxes.astype(np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not config.train.rpn_clobber_positives:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < config.train.rpn_negative_overlap] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IoU
        labels[max_overlaps >= config.train.rpn_positive_overlap] = 1

        if config.train.rpn_clobber_positives:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < config.train.rpn_negative_overlap] = 0
    else:
        labels[:] = 0

    # subsample positive labels if we have too many
    num_fg = int(config.train.rpn_fg_fraction * config.train.rpn_batch_size)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        if DEBUG:
            disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = config.train.rpn_batch_size - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        if DEBUG:
            disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if gt_boxes.size > 0:
        bbox_targets[:] = bbox_transform(anchors,
                                         gt_boxes[argmax_overlaps, :4])

    bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_weights[labels == 1, :] = np.array(config.train.rpn_bbox_weights)

    if DEBUG:
        _sums = bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts = np.sum(labels == 1)
        means = _sums / (_counts + 1e-14)
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print('means', means)
        print('stdevs', stds)
    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        if gt_boxes.size > 0:
            print('rpn: max max_overlaps', np.max(max_overlaps))
        print('rpn: num_positives', np.sum(labels == 1))
        print('rpn: num_negatives', np.sum(labels == 0))
        _fg_sum = np.sum(labels == 1)
        _bg_sum = np.sum(labels == 0)
        _count = 1
        print('rpn: num_positive avg', _fg_sum / _count)
        print('rpn: num_negative avg', _bg_sum / _count)

    # resahpe
    label_list = list()
    bbox_target_list = list()
    bbox_weight_list = list()
    anchors_num_range = [0] + anchors_num_list
    for i in range(len(feat_strides)):
        feat_height, feat_width = feat_infos[i]
        A = A_list[i]
        label = labels[sum(anchors_num_range[:i +
                                             1]):sum(anchors_num_range[:i +
                                                                       1]) +
                       anchors_num_range[i + 1]]
        bbox_target = bbox_targets[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]
        bbox_weight = bbox_weights[sum(anchors_num_range[:i + 1]
                                       ):sum(anchors_num_range[:i + 1]) +
                                   anchors_num_range[i + 1]]

        label = label.reshape(
            (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2)
        label = label.reshape((1, A * feat_height * feat_width))
        bbox_target = bbox_target.reshape(
            (1, feat_height * feat_width, A * 4)).transpose(0, 2, 1)
        bbox_weight = bbox_weight.reshape(
            (1, feat_height * feat_width, A * 4)).transpose((0, 2, 1))

        label_list.append(label)
        bbox_target_list.append(bbox_target)
        bbox_weight_list.append(bbox_weight)

    label_concat = np.concatenate(label_list, axis=1)
    bbox_target_concat = np.concatenate(bbox_target_list, axis=2)
    bbox_weight_concat = np.concatenate(bbox_weight_list, axis=2)

    label = {
        'label': label_concat,
        'bbox_target': bbox_target_concat,
        'bbox_weight': bbox_weight_concat
    }
    return label
Ejemplo n.º 7
0
    def evaluate_box_proposals(self,
                               roidb,
                               thresholds=None,
                               area='all',
                               limit=None):
        """Evaluate detection proposal recall metrics. This function is a much
        faster alternative to the official COCO API recall evaluation code. However,
        it produces slightly different results.
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2]
        ]  # 512-inf
        assert area in areas, 'Unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for entry in roidb:
            gt_inds = np.where((entry['gt_classes'] > 0)
                               & (entry['is_crowd'] == 0))[0]
            gt_boxes = entry['boxes'][gt_inds, :]
            gt_areas = entry['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)
            non_gt_inds = np.where(entry['gt_classes'] == 0)[0]
            boxes = entry['boxes'][non_gt_inds, :]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]
            overlaps = bbox_transform.bbox_overlaps(
                boxes.astype(dtype=np.float32, copy=False),
                gt_boxes.astype(dtype=np.float32, copy=False))
            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in range(min(boxes.shape[0], gt_boxes.shape[0])):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert gt_ovr >= 0
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert _gt_overlaps[j] == gt_ovr
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps,
            'num_pos': num_pos
        }
Ejemplo n.º 8
0
def add_mask_rcnn_blobs(blobs, sampled_boxes, roidb, im_scale, batch_idx):
    """Add Mask R-CNN specific blobs to the input blob dictionary."""
    # Prepare the mask targets by associating one gt mask to each training roi
    # that has a fg (non-bg) class label.
    M = config.network.mask_size
    polys_gt_inds = np.where((roidb['gt_classes'] > 0)
                             & (roidb['is_crowd'] == 0))[0]
    polys_gt = [roidb['segms'][i] for i in polys_gt_inds]
    boxes_from_polys = polys_to_boxes(polys_gt)
    fg_inds = np.where(blobs['labels_int32'] > 0)[0]
    roi_has_mask = blobs['labels_int32'].copy()
    roi_has_mask[roi_has_mask > 0] = 1

    if fg_inds.shape[0] > 0:
        # Class labels for the foreground rois
        mask_class_labels = blobs['labels_int32'][fg_inds]
        masks = np.zeros((fg_inds.shape[0], M**2), dtype=np.int32)

        # Find overlap between all foreground rois and the bounding boxes
        # enclosing each segmentation
        rois_fg = sampled_boxes[fg_inds]
        overlaps_bbfg_bbpolys = bbox_overlaps(
            rois_fg.astype(np.float32, copy=False),
            boxes_from_polys.astype(np.float32, copy=False))
        # Map from each fg rois to the index of the mask with highest overlap
        # (measured by bbox overlap)
        fg_polys_inds = np.argmax(overlaps_bbfg_bbpolys, axis=1)

        # add fg targets
        for i in range(rois_fg.shape[0]):
            fg_polys_ind = fg_polys_inds[i]
            poly_gt = polys_gt[fg_polys_ind]
            roi_fg = rois_fg[i]
            # Rasterize the portion of the polygon mask within the given fg roi
            # to an M x M binary image
            mask = polys_to_mask_wrt_box(poly_gt, roi_fg, M)
            mask = np.array(mask > 0, dtype=np.int32)  # Ensure it's binary
            masks[i, :] = np.reshape(mask, M**2)
    else:  # If there are no fg masks (it does happen)
        # The network cannot handle empty blobs, so we must provide a mask
        # We simply take the first bg roi, given it an all -1's mask (ignore
        # label), and label it with class zero (bg).
        bg_inds = np.where(blobs['labels_int32'] == 0)[0]
        # rois_fg is actually one background roi, but that's ok because ...
        rois_fg = sampled_boxes[bg_inds[0]].reshape((1, -1))
        # We give it an -1's blob (ignore label)
        masks = -np.ones((1, M**2), dtype=np.int32)
        # We label it with class = 0 (background)
        mask_class_labels = np.zeros((1, ), dtype=np.float32)
        # Mark that the first roi has a mask
        roi_has_mask[0] = 1

    masks = _expand_to_class_specific_mask_targets(masks, mask_class_labels)

    # Scale rois_fg and format as (batch_idx, x1, y1, x2, y2)
    rois_fg *= im_scale
    repeated_batch_idx = batch_idx * np.ones(
        (rois_fg.shape[0], 1), dtype=np.float32)
    rois_fg = np.hstack((repeated_batch_idx, rois_fg))

    # Update blobs dict with Mask R-CNN blobs
    blobs['mask_rois'] = rois_fg
    blobs['roi_has_mask_int32'] = roi_has_mask
    blobs['mask_int32'] = masks
Ejemplo n.º 9
0
    def forward(self, cls_prob_p2, cls_prob_p3, cls_prob_p4, cls_prob_p5,
                cls_prob_p6, bbox_pred_p2, bbox_pred_p3, bbox_pred_p4,
                bbox_pred_p5, bbox_pred_p6, im_info):
        device_id = cls_prob_p2.get_device()
        nms = gpu_nms_wrapper(
            self.threshold,
            device_id=device_id) if not self.use_softnms else soft_nms_wrapper(
                self.threshold)
        context = torch.device('cuda', device_id)

        batch_size = cls_prob_p2.shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        cls_probs = [
            cls_prob_p2, cls_prob_p3, cls_prob_p4, cls_prob_p5, cls_prob_p6
        ]
        bbox_preds = [
            bbox_pred_p2, bbox_pred_p3, bbox_pred_p4, bbox_pred_p5,
            bbox_pred_p6
        ]

        pre_nms_topN = self.rpn_pre_nms_top_n
        post_nms_topN = self.rpn_post_nms_top_n
        min_size = self.rpn_min_size

        proposal_list = []
        score_list = []
        im_info = im_info.numpy()

        for s in range(len(self.feat_stride)):
            stride = int(self.feat_stride[s])
            sub_anchors = generate_anchors(stride=stride,
                                           sizes=self.scales * stride,
                                           aspect_ratios=self.ratios)
            scores = cls_probs[s].cpu().numpy()
            bbox_deltas = bbox_preds[s].cpu().numpy()
            # 1. Generate proposals from bbox_deltas and shifted anchors
            # use real image size instead of padded feature map sizes
            height, width = scores.shape[-2:]

            # Enumerate all shifts
            shift_x = np.arange(0, width) * stride
            shift_y = np.arange(0, height) * stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()

            # Enumerate all shifted anchors:
            #
            # add A anchors (1, A, 4) to
            # cell K shifts (K, 1, 4) to get
            # shift anchors (K, A, 4)
            # reshape to (K*A, 4) shifted anchors
            A = self.num_anchors
            K = shifts.shape[0]
            anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))
            # print(np.linalg.norm(anchors))

            # Transpose and reshape predicted bbox transformations to get them
            # into the same order as the anchors:
            #
            # bbox deltas will be (1, 4 * A, H, W) format
            # transpose to (1, H, W, 4 * A)
            # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
            # in slowest to fastest order
            # bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

            # Same story for the scores:
            #
            # scores are (1, A, H, W) format
            # transpose to (1, H, W, A)
            # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
            # scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

            if self.individual_proposals:
                # 4. sort all (proposal, score) pairs by score from highest to lowest
                # 5. take top pre_nms_topN (e.g. 6000)
                if pre_nms_topN <= 0 or pre_nms_topN >= len(scores):
                    order = np.argsort(-scores.squeeze())
                else:
                    # Avoid sorting possibly large arrays; First partition to get top K
                    # unsorted and then sort just those (~20x faster for 200k scores)
                    inds = np.argpartition(-scores.squeeze(),
                                           pre_nms_topN)[:pre_nms_topN]
                    order = np.argsort(-scores[inds].squeeze())
                    order = inds[order]
                    # order = np.argsort(-scores.squeeze())
                bbox_deltas = bbox_deltas[order, :]
                anchors = anchors[order, :]
                scores = scores[order]

            # Convert anchors into proposals via bbox transformations
            proposals = bbox_pred(anchors, bbox_deltas)

            # 2. clip predicted boxes to image
            proposals = clip_boxes(proposals, im_info[:2])

            # 3. remove predicted boxes with either height or width < threshold
            # (NOTE: convert min_size to input image scale stored in im_info[2])
            keep = self._filter_boxes(proposals, min_size * im_info[2])
            proposals = proposals[keep, :]
            scores = scores[keep]

            if self.crowd_gt_roi is not None:
                proposal_by_gt_overlap = bbox_overlaps(
                    proposals, self.crowd_gt_roi * im_info[2])
                proposal_by_gt_overlap_max = proposal_by_gt_overlap.max(axis=1)
                keep = np.where(proposal_by_gt_overlap_max < 0.5)[0]
                proposals = proposals[keep, :]
                scores = scores[keep]

            if self.individual_proposals:
                # 6. apply nms (e.g. threshold = 0.7)
                # 7. take after_nms_topN (e.g. 300)
                # 8. return the top proposals (-> RoIs top)
                if self.use_softnms:
                    det, keep = nms(
                        np.hstack((proposals, scores)).astype(np.float32))
                    det = det[keep]
                    det = det[np.argsort(det[:, 4])[::-1]]
                    if post_nms_topN > 0:
                        det = det[:post_nms_topN]
                    proposals = det[:, :4]
                    scores = det[:, 4]
                else:
                    keep = nms(
                        np.hstack((proposals, scores)).astype(np.float32))
                    if post_nms_topN > 0:
                        keep = keep[:post_nms_topN]
                    proposals = proposals[keep, :]
                    scores = scores[keep]

            proposal_list.append(proposals)
            score_list.append(scores)

        proposals = np.vstack(proposal_list)
        scores = np.vstack(score_list)

        if not self.individual_proposals:
            # 4. sort all (proposal, score) pairs by score from highest to lowest
            # 5. take top pre_nms_topN (e.g. 6000)
            order = scores.ravel().argsort()[::-1]
            if pre_nms_topN > 0:
                order = order[:pre_nms_topN]
            proposals = proposals[order, :]
            scores = scores[order]

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            if self.use_softnms:
                det, keep = nms(
                    np.hstack((proposals, scores)).astype(np.float32))
                det = det[keep]
                det = det[np.argsort(det[:, 4])[::-1]]
                if post_nms_topN > 0:
                    det = det[:post_nms_topN]
                proposals = det[:, :4]
                scores = det[:, 4]
            else:
                det = np.hstack((proposals, scores)).astype(np.float32)
                keep = nms(det)
                if post_nms_topN > 0:
                    keep = keep[:post_nms_topN]
                # pad to ensure output size remains unchanged
                if len(keep) < post_nms_topN:
                    pad = np.random.choice(keep,
                                           size=post_nms_topN - len(keep))
                    keep = np.hstack((keep, pad))
                proposals = proposals[keep, :]
                scores = scores[keep]
        else:
            scores = scores.squeeze()

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.ones(
            (proposals.shape[0], 1), dtype=np.float32) * self.batch_idx
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        return torch.tensor(blob, requires_grad=False).pin_memory().to(context, dtype=torch.float32, non_blocking=True), \
               torch.tensor(scores, requires_grad=False).pin_memory().to(context, dtype=torch.float32, non_blocking=True)