def cfm_network_forward(self, im_i):
        im = cv2.imread(self.imdb.image_path_at(im_i))
        roidb_cache = os.path.join('data/cache/voc_2012_val_mcg_maskdb/',
                                   self.imdb._image_index[im_i] + '.mat')
        roidb = scipy.io.loadmat(roidb_cache)
        boxes = roidb['boxes']
        filter_keep = filter_small_boxes(boxes, min_size=16)
        boxes = boxes[filter_keep, :]
        masks = roidb['masks']
        masks = masks[filter_keep, :, :]
        assert boxes.shape[0] == masks.shape[0]

        # Resize input mask, make it the same as CFM's input size
        mask_resize = np.zeros((masks.shape[0], cfg.TEST.CFM_INPUT_MASK_SIZE,
                                cfg.TEST.CFM_INPUT_MASK_SIZE))
        for i in xrange(masks.shape[0]):
            mask_resize[i, :, :] = cv2.resize(
                masks[i, :, :].astype(np.float),
                (cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE))
        masks = mask_resize

        # Get top-k proposals from MCG
        if cfg.TEST.USE_TOP_K_MCG:
            num_keep = min(boxes.shape[0], cfg.TEST.USE_TOP_K_MCG)
            boxes = boxes[:num_keep, :]
            masks = masks[:num_keep, :, :]
            assert boxes.shape[0] == masks.shape[0]
        # deal with multi-scale test
        # we group several adjacent scales to do forward
        _, im_scale_factors = prep_im_for_blob_cfm(im, cfg.TEST.SCALES)
        orig_boxes = boxes.copy()
        boxes = pred_rois_for_blob(boxes, im_scale_factors)
        num_scale_iter = int(
            np.ceil(len(cfg.TEST.SCALES) / float(cfg.TEST.GROUP_SCALE)))
        LO_SCALE = 0
        MAX_ROIS_GPU = cfg.TEST.MAX_ROIS_GPU
        # set up return results
        res_boxes = np.zeros((0, 4), dtype=np.float32)
        res_masks = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE),
                             dtype=np.float32)
        res_seg_scores = np.zeros((0, self.num_classes), dtype=np.float32)

        for scale_iter in xrange(num_scale_iter):
            HI_SCALE = min(LO_SCALE + cfg.TEST.GROUP_SCALE,
                           len(cfg.TEST.SCALES))
            inds_this_scale = np.where((boxes[:, 0] >= LO_SCALE)
                                       & (boxes[:, 0] < HI_SCALE))[0]
            if len(inds_this_scale) == 0:
                LO_SCALE += cfg.TEST.GROUP_SCALE
                continue
            max_rois_this_scale = MAX_ROIS_GPU[scale_iter]
            boxes_this_scale = boxes[inds_this_scale, :]
            masks_this_scale = masks[inds_this_scale, :, :]
            num_iter_this_scale = int(
                np.ceil(boxes_this_scale.shape[0] /
                        float(max_rois_this_scale)))
            # make the batch index of input box start from 0
            boxes_this_scale[:, 0] -= min(boxes_this_scale[:, 0])
            # re-prepare im blob for this_scale
            input_blobs = {}
            input_blobs['data'], _ = prep_im_for_blob_cfm(
                im, cfg.TEST.SCALES[LO_SCALE:HI_SCALE])
            input_blobs['data'] = input_blobs['data'].astype(np.float32,
                                                             copy=False)
            input_start = 0
            for test_iter in xrange(num_iter_this_scale):
                input_end = min(input_start + max_rois_this_scale,
                                boxes_this_scale.shape[0])
                input_box = boxes_this_scale[input_start:input_end, :]
                input_mask = masks_this_scale[input_start:input_end, :, :]
                input_blobs['rois'] = input_box.astype(np.float32, copy=False)
                input_blobs['masks'] = input_mask.reshape(
                    input_box.shape[0], 1, cfg.TEST.CFM_INPUT_MASK_SIZE,
                    cfg.TEST.CFM_INPUT_MASK_SIZE).astype(np.float32,
                                                         copy=False)
                input_blobs['masks'] = (input_blobs['masks'] >=
                                        cfg.BINARIZE_THRESH).astype(np.float32,
                                                                    copy=False)
                self.net.blobs['data'].reshape(*input_blobs['data'].shape)
                self.net.blobs['rois'].reshape(*input_blobs['rois'].shape)
                self.net.blobs['masks'].reshape(*input_blobs['masks'].shape)
                blobs_out = self.net.forward(**input_blobs)
                output_mask = blobs_out['mask_prob'].copy()
                output_score = blobs_out['seg_cls_prob'].copy()
                res_masks = np.vstack(
                    (res_masks,
                     output_mask.reshape(input_box.shape[0], 1, cfg.MASK_SIZE,
                                         cfg.MASK_SIZE).astype(np.float32,
                                                               copy=False)))
                res_seg_scores = np.vstack((res_seg_scores, output_score))
                input_start += max_rois_this_scale
            res_boxes = np.vstack((res_boxes, orig_boxes[inds_this_scale, :]))
            LO_SCALE += cfg.TEST.GROUP_SCALE

        return res_masks, res_boxes, res_seg_scores
Example #2
0
    def cfm_network_forward(self, im_i):
        im = cv2.imread(self.imdb.image_path_at(im_i))
        roidb_cache = os.path.join('data/cache/voc_2012_val_mcg_maskdb/', self.imdb._image_index[im_i] + '.mat')
        roidb = scipy.io.loadmat(roidb_cache)
        boxes = roidb['boxes']
        filter_keep = filter_small_boxes(boxes, min_size=16)
        boxes = boxes[filter_keep, :]
        masks = roidb['masks']
        masks = masks[filter_keep, :, :]
        assert boxes.shape[0] == masks.shape[0]

        # Resize input mask, make it the same as CFM's input size
        mask_resize = np.zeros((masks.shape[0], cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE))
        for i in xrange(masks.shape[0]):
            mask_resize[i, :, :] = cv2.resize(masks[i, :, :].astype(np.float),
                                              (cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE))
        masks = mask_resize

        # Get top-k proposals from MCG
        if cfg.TEST.USE_TOP_K_MCG:
            num_keep = min(boxes.shape[0], cfg.TEST.USE_TOP_K_MCG)
            boxes = boxes[:num_keep, :]
            masks = masks[:num_keep, :, :]
            assert boxes.shape[0] == masks.shape[0]
        # deal with multi-scale test
        # we group several adjacent scales to do forward
        _, im_scale_factors = prep_im_for_blob_cfm(im, cfg.TEST.SCALES)
        orig_boxes = boxes.copy()
        boxes = pred_rois_for_blob(boxes, im_scale_factors)
        num_scale_iter = int(np.ceil(len(cfg.TEST.SCALES) / float(cfg.TEST.GROUP_SCALE)))
        LO_SCALE = 0
        MAX_ROIS_GPU = cfg.TEST.MAX_ROIS_GPU
        # set up return results
        res_boxes = np.zeros((0, 4), dtype=np.float32)
        res_masks = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE), dtype=np.float32)
        res_seg_scores = np.zeros((0, self.num_classes), dtype=np.float32)

        for scale_iter in xrange(num_scale_iter):
            HI_SCALE = min(LO_SCALE + cfg.TEST.GROUP_SCALE, len(cfg.TEST.SCALES))
            inds_this_scale = np.where((boxes[:, 0] >= LO_SCALE) & (boxes[:, 0] < HI_SCALE))[0]
            if len(inds_this_scale) == 0:
                LO_SCALE += cfg.TEST.GROUP_SCALE
                continue
            max_rois_this_scale = MAX_ROIS_GPU[scale_iter]
            boxes_this_scale = boxes[inds_this_scale, :]
            masks_this_scale = masks[inds_this_scale, :, :]
            num_iter_this_scale = int(np.ceil(boxes_this_scale.shape[0] / float(max_rois_this_scale)))
            # make the batch index of input box start from 0
            boxes_this_scale[:, 0] -= min(boxes_this_scale[:, 0])
            # re-prepare im blob for this_scale
            input_blobs = {}
            input_blobs['data'], _ = prep_im_for_blob_cfm(im, cfg.TEST.SCALES[LO_SCALE:HI_SCALE])
            input_blobs['data'] = input_blobs['data'].astype(np.float32, copy=False)
            input_start = 0
            for test_iter in xrange(num_iter_this_scale):
                input_end = min(input_start + max_rois_this_scale, boxes_this_scale.shape[0])
                input_box = boxes_this_scale[input_start:input_end, :]
                input_mask = masks_this_scale[input_start:input_end, :, :]
                input_blobs['rois'] = input_box.astype(np.float32, copy=False)
                input_blobs['masks'] = input_mask.reshape(input_box.shape[0], 1,
                                                    cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE
                                                    ).astype(np.float32, copy=False)
                input_blobs['masks'] = (input_blobs['masks'] >= cfg.BINARIZE_THRESH).astype(np.float32, copy=False)
                self.net.blobs['data'].reshape(*input_blobs['data'].shape)
                self.net.blobs['rois'].reshape(*input_blobs['rois'].shape)
                self.net.blobs['masks'].reshape(*input_blobs['masks'].shape)
                blobs_out = self.net.forward(**input_blobs)
                output_mask = blobs_out['mask_prob'].copy()
                output_score = blobs_out['seg_cls_prob'].copy()
                res_masks = np.vstack((res_masks,
                                       output_mask.reshape(
                                           input_box.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE
                                       ).astype(np.float32, copy=False)))
                res_seg_scores = np.vstack((res_seg_scores, output_score))
                input_start += max_rois_this_scale
            res_boxes = np.vstack((res_boxes, orig_boxes[inds_this_scale, :]))
            LO_SCALE += cfg.TEST.GROUP_SCALE

        return res_masks, res_boxes, res_seg_scores
Example #3
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[
            0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack(
            (batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {'rois': proposals}

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[
                    self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(
                np.float32, copy=False)
Example #4
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted transform deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)
        assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported'

        cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'
        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from transform deltas and shifted anchors
        height, width = scores.shape[-2:]
        self._height = height
        self._width = width
        # Enumerate all shifts
        shift_x = np.arange(0, self._width) * self._feat_stride
        shift_y = np.arange(0, self._height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        _, keep = clip_boxes(anchors, im_info[:2])
        self._anchor_index_before_clip = keep

        # Transpose and reshape predicted transform transformations to get them
        # into the same order as the anchors:
        #
        # transform deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via transform transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals, keep = clip_boxes(proposals, im_info[:2])
        # Record the cooresponding index before and after clip
        # This step doesn't need unmap
        # We need it to decide whether do back propagation
        self._proposal_index_before_clip = keep

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_small_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]
        self._ind_after_filter = keep

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]

        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        self._ind_after_sort = order
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]

        scores = scores[keep]
        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        proposals = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        self._proposal_index = keep

        blobs = {
            'rois': proposals
        }

        if str(self.phase) == 'TRAIN':
            if cfg.TRAIN.MIX_INDEX:
                all_rois_index = self._ind_after_filter[self._ind_after_sort[self._proposal_index]].reshape(1, len(keep))
                blobs['proposal_index'] = all_rois_index

        # Copy data to forward to top layer
        for blob_name, blob in blobs.iteritems():
            top[self._top_name_map[blob_name]].reshape(*blob.shape)
            top[self._top_name_map[blob_name]].data[...] = blob.astype(np.float32, copy=False)