예제 #1
0
def proposal_target_layer(rpn_bbox, rpn_cls_prob, gt_boxes, num_classes):
    confidence_scores = rpn_cls_prob[:, 1]

    # Add ground truth boxes as part of proposals
    rpn_bbox = np.vstack([rpn_bbox, gt_boxes[:, 0:-1]])
    confidence_scores = np.concatenate(confidence_scores,
                                       np.ones(gt_boxes.shape[0], np.float32))

    # Sample objects and backgrounds
    fg_cnt = int(BATCH_SIZE * FG_RATIO)
    fg_idxs = np.where(confidence_scores >= 0.5)[0]
    if len(fg_idxs) > fg_cnt:
        pos_inds = np.random.choice(fg_idxs, size=fg_cnt, replace=False)
    bg_cnt = BATCH_SIZE - len(pos_inds)
    bg_idxs = np.where((confidence_scores >= 0.1)
                       & (confidence_scores < 0.5))[0]
    if len(bg_idxs) > bg_cnt:
        neg_inds = np.random.choice(bg_idxs, size=bg_cnt, replace=False)

    pos_bbox = rpn_bbox[pos_inds]
    overlaps = bbox.bbox_overlaps(pos_bbox, gt_boxes[:, 0:-1])
    argmax_overlaps = np.argmax(overlaps, axis=-1)
    pos_labels = gt_boxes[:, -1][argmax_overlaps] + 1
    neg_labels = np.zeros(len(neg_inds), np.int32)
    labels = np.concatenate(pos_labels, neg_labels)
    bbox_reg = np.zeros([len(labels), (num_classes + 1) * 4], np.float32)
    bbox_reg_ = bbox.bbox_transform(rpn_bbox,
                                    gt_boxes[argmax_overlaps][:, :-1])
    for i in range(len(pos_labels)):
        bbox_reg[i, pos_labels[i] * 4:(pos_labels[i] + 1) * 4] = bbox_reg_[i]

    neg_bbox = rpn_bbox[neg_inds]
    rpn_bbox = np.vstack([pos_bbox, neg_bbox])

    return labels, bbox_reg, rpn_bbox
예제 #2
0
def anchor_target_layer(gt_boxes, all_anchors, image_shape, feature_map_shape, k):
    """
    
    :param gt_boxes: 
    :param all_anchors: 
    :param image_shape: 
    :param feature_map_shape: 
    :param k: 
    :return: 
    """

    # If there is no object in the image
    if len(gt_boxes) == 0:
        labels = np.zeros((len(all_anchors),), dtype=np.int32)
        targets = np.zeros(all_anchors.shape, dtype=np.float32)
        return labels, targets

    num_total_anchors = all_anchors.shape[0]

    # Keep anchors that inside the image
    valid_idx = np.where((all_anchors[:, 0] >= 0) &
                         (all_anchors[:, 1] >= 0) &
                         (all_anchors[:, 2] < image_shape[1]) &
                         (all_anchors[:, 3] < image_shape[0]))[0]

    anchors = all_anchors[valid_idx, :]

    labels = np.empty((len(valid_idx),), dtype=np.int32)
    labels.fill(-1)

    overlaps = bbox.bbox_overlaps(anchors, gt_boxes)
    argmax_overlaps = np.argmax(overlaps, axis=1)
    max_overlaps = overlaps[np.arange(0, len(valid_idx), 1), argmax_overlaps]
    gt_argmax_overlaps = np.argmax(overlaps, axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(0, gt_boxes.shape[0], 1)]
    gt_max_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    labels[np.where(max_overlaps < BG_LOW_THRES)[0]] = 0
    labels[gt_max_overlaps] = 1
    labels[np.where(max_overlaps > FG_HIGH_THRES)[0]] = 1

    targets = bbox.bbox_transform(anchors, gt_boxes[argmax_overlaps, :])

    # Sampling positive and negative anchors
    fg_cnt = int(SAMPLE_NUMBER * FG_RATIO)
    fg_idxs = np.where(labels == 1)[0]
    if len(fg_idxs) > fg_cnt:
        disable_inds = np.random.choice(
            fg_idxs, size=(len(fg_idxs) - fg_cnt), replace=False)
        labels[disable_inds] = -1

    bg_cnt = SAMPLE_NUMBER - np.sum(labels == 1)
    bg_idxs = np.where(labels == 0)[0]
    if len(bg_idxs) > bg_cnt:
        disable_inds = np.random.choice(
            bg_idxs, size=(len(bg_idxs) - bg_cnt), replace=False)
        labels[disable_inds] = -1

    labels = _unmap(labels, num_total_anchors, valid_idx, -1)
    targets = _unmap(targets, num_total_anchors, valid_idx, 0)

    return labels, targets
예제 #3
0
    def worker(self, data):
        im_info, cur_crop, im_scale, nids, gtids, gt_boxes, boxes, classes = data[0:8]
        has_mask = True if len(data) > 8 else False

        anchors = self.all_anchors.copy()
        inds_inside = np.where((anchors[:, 0] >= -32) &
                               (anchors[:, 1] >= -32) &
                               (anchors[:, 2] < im_info[0] + 32) &
                               (anchors[:, 3] < im_info[1] + 32))[0]

        anchors = anchors[inds_inside, :]
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)
        total_anchors = int(self.K * self.num_anchors)

        gt_boxes[:, 0] -= cur_crop[0]
        gt_boxes[:, 2] -= cur_crop[0]
        gt_boxes[:, 1] -= cur_crop[1]
        gt_boxes[:, 3] -= cur_crop[1]

        vgt_boxes = boxes[np.intersect1d(gtids, nids)]

        vgt_boxes[:, 0] -= cur_crop[0]
        vgt_boxes[:, 2] -= cur_crop[0]
        vgt_boxes[:, 1] -= cur_crop[1]
        vgt_boxes[:, 3] -= cur_crop[1]

        gt_boxes = clip_boxes(np.round(gt_boxes * im_scale), im_info[:2])
        vgt_boxes = clip_boxes(np.round(vgt_boxes * im_scale), im_info[:2])

        ids = filter_boxes(gt_boxes, 10)
        if len(ids) == 0:
            gt_boxes = np.zeros((0, 4))
            classes = np.zeros((0, 1))

        if has_mask:
            mask_polys = data[8]
            # Shift and crop the mask polygons
            mask_polys = crop_polys(mask_polys, cur_crop, im_scale)
            # Create the padded encoded array
            if len(ids) > 0:
                polylen = len(mask_polys)
                tmask_polys = []
                tgt_boxes = []
                tclasses = []
                for i in ids:
                    if i < polylen:
                        tmask_polys.append(mask_polys[i])
                        tgt_boxes.append(gt_boxes[i])
                        tclasses.append(classes[i])
                if len(gt_boxes) > 0:
                    gt_boxes = np.array(tgt_boxes)
                    classes = np.array(tclasses).reshape(len(tclasses), 1)
                    mask_polys = tmask_polys
                else:
                    gt_boxes = np.zeros((0, 4))
                    classes = np.zeros((0, 1))

                encoded_polys = poly_encoder(mask_polys, classes[:, 0] - 1,
                                             max_poly_len=self.max_poly_len, max_n_gts=self.max_n_gts)
            else:
                encoded_polys = -np.ones((self.max_n_gts, self.max_poly_len), dtype=np.float32)
        else:
            if len(ids) > 0:
                gt_boxes = gt_boxes[ids]
                classes = classes[ids]

        agt_boxes = gt_boxes.copy()
        ids = filter_boxes(vgt_boxes, 10)
        if len(ids) > 0:
            vgt_boxes = vgt_boxes[ids]
        else:
            vgt_boxes = np.zeros((0, 4))

        if len(vgt_boxes) > 0:
            ov = bbox_overlaps(np.ascontiguousarray(gt_boxes).astype(float),
                               np.ascontiguousarray(vgt_boxes).astype(float))
            mov = np.max(ov, axis=1)
        else:
            mov = np.zeros((len(gt_boxes)))

        invalid_gtids = np.where(mov < 1)[0]
        valid_gtids = np.where(mov == 1)[0]
        invalid_boxes = gt_boxes[invalid_gtids, :]
        gt_boxes = gt_boxes[valid_gtids, :]

        def _unmap(data, count, inds, fill=0):
            """" unmap a subset inds of data into original data of size count """
            if len(data.shape) == 1:
                ret = np.empty((count,), dtype=np.float32)
                ret.fill(fill)
                ret[inds] = data
            else:
                ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
                ret.fill(fill)
                ret[inds, :] = data
            return ret

        if gt_boxes.size > 0:
            # overlap between the anchors and the gt boxes
            # overlaps (ex, gt)
            overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
            if invalid_boxes is not None:
                if len(invalid_boxes) > 0:
                    overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float))
                    argmax_overlapsn = overlapsn.argmax(axis=1)
                    max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn]
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            labels[max_overlaps < self.neg_thresh] = 0
            labels[gt_argmax_overlaps] = 1

            # fg label: above threshold IoU
            labels[max_overlaps >= self.pos_thresh] = 1

            if invalid_boxes is not None:
                if len(invalid_boxes) > 0:
                    labels[max_overlapsn > 0.3] = -1
        else:
            labels[:] = 0
            if len(invalid_boxes) > 0:
                overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float))
                argmax_overlapsn = overlapsn.argmax(axis=1)
                max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn]
                if len(invalid_boxes) > 0:
                    labels[max_overlapsn > 0.3] = -1

        # subsample positive labels if we have too many
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > self.num_fg:
            disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - self.num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = self.batch_size - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0])

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

        labels = labels.reshape((1, self.feat_height, self.feat_width, self.num_anchors)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, self.num_anchors * self.feat_height * self.feat_width)).astype(np.float16)
        bbox_targets = bbox_targets.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose(2, 0, 1)
        bbox_weights = bbox_weights.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose((2, 0, 1))
        pids = np.where(bbox_weights == 1)
        bbox_targets = bbox_targets[pids]

        fgt_boxes = -np.ones((100, 5))
        if len(agt_boxes) > 0:
            fgt_boxes[:min(len(agt_boxes), 100), :] = np.hstack((agt_boxes, classes))

        rval = [mx.nd.array(labels, dtype='float16'), bbox_targets, mx.nd.array(pids), mx.nd.array(fgt_boxes)]
        if has_mask:
            rval.append(mx.nd.array(encoded_polys))
        return rval
예제 #4
0
    def worker(self, data):
        im_info, cur_crop, im_scale, nids, gtids, gt_boxes, boxes, classes = data[0:8]
        has_mask = True if len(data) > 8 else False

        anchors = self.all_anchors.copy()
        inds_inside = np.where((anchors[:, 0] >= -32) &
                               (anchors[:, 1] >= -32) &
                               (anchors[:, 2] < im_info[0] + 32) &
                               (anchors[:, 3] < im_info[1] + 32))[0]

        anchors = anchors[inds_inside, :]
        labels = np.empty((len(inds_inside),), dtype=np.float32)
        labels.fill(-1)
        total_anchors = int(self.K * self.num_anchors)

        gt_boxes[:, 0] -= cur_crop[0]
        gt_boxes[:, 2] -= cur_crop[0]
        gt_boxes[:, 1] -= cur_crop[1]
        gt_boxes[:, 3] -= cur_crop[1]

        vgt_boxes = boxes[np.intersect1d(gtids, nids)]

        vgt_boxes[:, 0] -= cur_crop[0]
        vgt_boxes[:, 2] -= cur_crop[0]
        vgt_boxes[:, 1] -= cur_crop[1]
        vgt_boxes[:, 3] -= cur_crop[1]

        gt_boxes = clip_boxes(np.round(gt_boxes * im_scale), im_info[:2])
        vgt_boxes = clip_boxes(np.round(vgt_boxes * im_scale), im_info[:2])

        ids = filter_boxes(gt_boxes, 10)
        if len(ids) == 0:
            gt_boxes = np.zeros((0, 4))
            classes = np.zeros((0, 1))

        if has_mask:
            mask_polys = data[8]
            # Shift and crop the mask polygons
            mask_polys = crop_polys(mask_polys, cur_crop, im_scale)
            # Create the padded encoded array
            if len(ids) > 0:
                polylen = len(mask_polys)
                tmask_polys = []
                tgt_boxes = []
                tclasses = []
                for i in ids:
                    if i < polylen:
                        tmask_polys.append(mask_polys[i])
                        tgt_boxes.append(gt_boxes[i])
                        tclasses.append(classes[i])
                if len(gt_boxes) > 0:
                    gt_boxes = np.array(tgt_boxes)
                    classes = np.array(tclasses).reshape(len(tclasses), 1)
                    mask_polys = tmask_polys
                else:
                    gt_boxes = np.zeros((0, 4))
                    classes = np.zeros((0, 1))

                encoded_polys = poly_encoder(mask_polys, classes[:, 0] - 1,
                                             max_poly_len=self.max_poly_len, max_n_gts=self.max_n_gts)
            else:
                encoded_polys = -np.ones((self.max_n_gts, self.max_poly_len), dtype=np.float32)
        else:
            if len(ids) > 0:
                gt_boxes = gt_boxes[ids]
                classes = classes[ids]

        agt_boxes = gt_boxes.copy()
        ids = filter_boxes(vgt_boxes, 10)
        if len(ids) > 0:
            vgt_boxes = vgt_boxes[ids]
        else:
            vgt_boxes = np.zeros((0, 4))

        if len(vgt_boxes) > 0:
            ov = bbox_overlaps(np.ascontiguousarray(gt_boxes).astype(float),
                               np.ascontiguousarray(vgt_boxes).astype(float))
            mov = np.max(ov, axis=1)
        else:
            mov = np.zeros((len(gt_boxes)))

        invalid_gtids = np.where(mov < 1)[0]
        valid_gtids = np.where(mov == 1)[0]
        invalid_boxes = gt_boxes[invalid_gtids, :]
        gt_boxes = gt_boxes[valid_gtids, :]

        def _unmap(data, count, inds, fill=0):
            """" unmap a subset inds of data into original data of size count """
            if len(data.shape) == 1:
                ret = np.empty((count,), dtype=np.float32)
                ret.fill(fill)
                ret[inds] = data
            else:
                ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
                ret.fill(fill)
                ret[inds, :] = data
            return ret

        if gt_boxes.size > 0:
            # overlap between the anchors and the gt boxes
            # overlaps (ex, gt)
            overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float))
            if invalid_boxes is not None:
                if len(invalid_boxes) > 0:
                    overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float))
                    argmax_overlapsn = overlapsn.argmax(axis=1)
                    max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn]
            argmax_overlaps = overlaps.argmax(axis=1)
            max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])]
            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

            labels[max_overlaps < self.neg_thresh] = 0
            labels[gt_argmax_overlaps] = 1

            # fg label: above threshold IoU
            labels[max_overlaps >= self.pos_thresh] = 1

            if invalid_boxes is not None:
                if len(invalid_boxes) > 0:
                    labels[max_overlapsn > 0.3] = -1
        else:
            labels[:] = 0
            if len(invalid_boxes) > 0:
                overlapsn = bbox_overlaps(anchors.astype(np.float), invalid_boxes.astype(np.float))
                argmax_overlapsn = overlapsn.argmax(axis=1)
                max_overlapsn = overlapsn[np.arange(len(inds_inside)), argmax_overlapsn]
                if len(invalid_boxes) > 0:
                    labels[max_overlapsn > 0.3] = -1

        # subsample positive labels if we have too many
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > self.num_fg:
            disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - self.num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = self.batch_size - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if gt_boxes.size > 0:
            bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4])

        bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_weights[labels == 1, :] = np.array([1.0, 1.0, 1.0, 1.0])

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0)

        labels = labels.reshape((1, self.feat_height, self.feat_width, self.num_anchors)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, self.num_anchors * self.feat_height * self.feat_width)).astype(np.float16)
        bbox_targets = bbox_targets.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose(2, 0, 1)
        bbox_weights = bbox_weights.reshape((self.feat_height, self.feat_width, self.num_anchors * 4)).transpose((2, 0, 1))
        pids = np.where(bbox_weights == 1)
        bbox_targets = bbox_targets[pids]

        fgt_boxes = -np.ones((100, 5))
        if len(agt_boxes) > 0:
            fgt_boxes[:min(len(agt_boxes), 100), :] = np.hstack((agt_boxes, classes))

        rval = [mx.nd.array(labels, dtype='float16'), bbox_targets, mx.nd.array(pids), mx.nd.array(fgt_boxes)]
        if has_mask:
            rval.append(mx.nd.array(encoded_polys))
        return rval