def cfm_network_forward(self, im_i): im = cv2.imread(self.imdb.image_path_at(im_i)) roidb_cache = os.path.join('data/cache/voc_2012_val_mcg_maskdb/', self.imdb._image_index[im_i] + '.mat') roidb = scipy.io.loadmat(roidb_cache) boxes = roidb['boxes'] filter_keep = filter_small_boxes(boxes, min_size=16) boxes = boxes[filter_keep, :] masks = roidb['masks'] masks = masks[filter_keep, :, :] assert boxes.shape[0] == masks.shape[0] # Resize input mask, make it the same as CFM's input size mask_resize = np.zeros((masks.shape[0], cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE)) for i in xrange(masks.shape[0]): mask_resize[i, :, :] = cv2.resize( masks[i, :, :].astype(np.float), (cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE)) masks = mask_resize # Get top-k proposals from MCG if cfg.TEST.USE_TOP_K_MCG: num_keep = min(boxes.shape[0], cfg.TEST.USE_TOP_K_MCG) boxes = boxes[:num_keep, :] masks = masks[:num_keep, :, :] assert boxes.shape[0] == masks.shape[0] # deal with multi-scale test # we group several adjacent scales to do forward _, im_scale_factors = prep_im_for_blob_cfm(im, cfg.TEST.SCALES) orig_boxes = boxes.copy() boxes = pred_rois_for_blob(boxes, im_scale_factors) num_scale_iter = int( np.ceil(len(cfg.TEST.SCALES) / float(cfg.TEST.GROUP_SCALE))) LO_SCALE = 0 MAX_ROIS_GPU = cfg.TEST.MAX_ROIS_GPU # set up return results res_boxes = np.zeros((0, 4), dtype=np.float32) res_masks = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE), dtype=np.float32) res_seg_scores = np.zeros((0, self.num_classes), dtype=np.float32) for scale_iter in xrange(num_scale_iter): HI_SCALE = min(LO_SCALE + cfg.TEST.GROUP_SCALE, len(cfg.TEST.SCALES)) inds_this_scale = np.where((boxes[:, 0] >= LO_SCALE) & (boxes[:, 0] < HI_SCALE))[0] if len(inds_this_scale) == 0: LO_SCALE += cfg.TEST.GROUP_SCALE continue max_rois_this_scale = MAX_ROIS_GPU[scale_iter] boxes_this_scale = boxes[inds_this_scale, :] masks_this_scale = masks[inds_this_scale, :, :] num_iter_this_scale = int( np.ceil(boxes_this_scale.shape[0] / float(max_rois_this_scale))) # make the batch index of input box start from 0 boxes_this_scale[:, 0] -= min(boxes_this_scale[:, 0]) # re-prepare im blob for this_scale input_blobs = {} input_blobs['data'], _ = prep_im_for_blob_cfm( im, cfg.TEST.SCALES[LO_SCALE:HI_SCALE]) input_blobs['data'] = input_blobs['data'].astype(np.float32, copy=False) input_start = 0 for test_iter in xrange(num_iter_this_scale): input_end = min(input_start + max_rois_this_scale, boxes_this_scale.shape[0]) input_box = boxes_this_scale[input_start:input_end, :] input_mask = masks_this_scale[input_start:input_end, :, :] input_blobs['rois'] = input_box.astype(np.float32, copy=False) input_blobs['masks'] = input_mask.reshape( input_box.shape[0], 1, cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE).astype(np.float32, copy=False) input_blobs['masks'] = (input_blobs['masks'] >= cfg.BINARIZE_THRESH).astype(np.float32, copy=False) self.net.blobs['data'].reshape(*input_blobs['data'].shape) self.net.blobs['rois'].reshape(*input_blobs['rois'].shape) self.net.blobs['masks'].reshape(*input_blobs['masks'].shape) blobs_out = self.net.forward(**input_blobs) output_mask = blobs_out['mask_prob'].copy() output_score = blobs_out['seg_cls_prob'].copy() res_masks = np.vstack( (res_masks, output_mask.reshape(input_box.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE).astype(np.float32, copy=False))) res_seg_scores = np.vstack((res_seg_scores, output_score)) input_start += max_rois_this_scale res_boxes = np.vstack((res_boxes, orig_boxes[inds_this_scale, :])) LO_SCALE += cfg.TEST.GROUP_SCALE return res_masks, res_boxes, res_seg_scores
def cfm_network_forward(self, im_i): im = cv2.imread(self.imdb.image_path_at(im_i)) roidb_cache = os.path.join('data/cache/voc_2012_val_mcg_maskdb/', self.imdb._image_index[im_i] + '.mat') roidb = scipy.io.loadmat(roidb_cache) boxes = roidb['boxes'] filter_keep = filter_small_boxes(boxes, min_size=16) boxes = boxes[filter_keep, :] masks = roidb['masks'] masks = masks[filter_keep, :, :] assert boxes.shape[0] == masks.shape[0] # Resize input mask, make it the same as CFM's input size mask_resize = np.zeros((masks.shape[0], cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE)) for i in xrange(masks.shape[0]): mask_resize[i, :, :] = cv2.resize(masks[i, :, :].astype(np.float), (cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE)) masks = mask_resize # Get top-k proposals from MCG if cfg.TEST.USE_TOP_K_MCG: num_keep = min(boxes.shape[0], cfg.TEST.USE_TOP_K_MCG) boxes = boxes[:num_keep, :] masks = masks[:num_keep, :, :] assert boxes.shape[0] == masks.shape[0] # deal with multi-scale test # we group several adjacent scales to do forward _, im_scale_factors = prep_im_for_blob_cfm(im, cfg.TEST.SCALES) orig_boxes = boxes.copy() boxes = pred_rois_for_blob(boxes, im_scale_factors) num_scale_iter = int(np.ceil(len(cfg.TEST.SCALES) / float(cfg.TEST.GROUP_SCALE))) LO_SCALE = 0 MAX_ROIS_GPU = cfg.TEST.MAX_ROIS_GPU # set up return results res_boxes = np.zeros((0, 4), dtype=np.float32) res_masks = np.zeros((0, 1, cfg.MASK_SIZE, cfg.MASK_SIZE), dtype=np.float32) res_seg_scores = np.zeros((0, self.num_classes), dtype=np.float32) for scale_iter in xrange(num_scale_iter): HI_SCALE = min(LO_SCALE + cfg.TEST.GROUP_SCALE, len(cfg.TEST.SCALES)) inds_this_scale = np.where((boxes[:, 0] >= LO_SCALE) & (boxes[:, 0] < HI_SCALE))[0] if len(inds_this_scale) == 0: LO_SCALE += cfg.TEST.GROUP_SCALE continue max_rois_this_scale = MAX_ROIS_GPU[scale_iter] boxes_this_scale = boxes[inds_this_scale, :] masks_this_scale = masks[inds_this_scale, :, :] num_iter_this_scale = int(np.ceil(boxes_this_scale.shape[0] / float(max_rois_this_scale))) # make the batch index of input box start from 0 boxes_this_scale[:, 0] -= min(boxes_this_scale[:, 0]) # re-prepare im blob for this_scale input_blobs = {} input_blobs['data'], _ = prep_im_for_blob_cfm(im, cfg.TEST.SCALES[LO_SCALE:HI_SCALE]) input_blobs['data'] = input_blobs['data'].astype(np.float32, copy=False) input_start = 0 for test_iter in xrange(num_iter_this_scale): input_end = min(input_start + max_rois_this_scale, boxes_this_scale.shape[0]) input_box = boxes_this_scale[input_start:input_end, :] input_mask = masks_this_scale[input_start:input_end, :, :] input_blobs['rois'] = input_box.astype(np.float32, copy=False) input_blobs['masks'] = input_mask.reshape(input_box.shape[0], 1, cfg.TEST.CFM_INPUT_MASK_SIZE, cfg.TEST.CFM_INPUT_MASK_SIZE ).astype(np.float32, copy=False) input_blobs['masks'] = (input_blobs['masks'] >= cfg.BINARIZE_THRESH).astype(np.float32, copy=False) self.net.blobs['data'].reshape(*input_blobs['data'].shape) self.net.blobs['rois'].reshape(*input_blobs['rois'].shape) self.net.blobs['masks'].reshape(*input_blobs['masks'].shape) blobs_out = self.net.forward(**input_blobs) output_mask = blobs_out['mask_prob'].copy() output_score = blobs_out['seg_cls_prob'].copy() res_masks = np.vstack((res_masks, output_mask.reshape( input_box.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE ).astype(np.float32, copy=False))) res_seg_scores = np.vstack((res_seg_scores, output_score)) input_start += max_rois_this_scale res_boxes = np.vstack((res_boxes, orig_boxes[inds_this_scale, :])) LO_SCALE += cfg.TEST.GROUP_SCALE return res_masks, res_boxes, res_seg_scores
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted transform deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[ 0] == 1, 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] # 1. Generate proposals from transform deltas and shifted anchors height, width = scores.shape[-2:] self._height = height self._width = width # Enumerate all shifts shift_x = np.arange(0, self._width) * self._feat_stride shift_y = np.arange(0, self._height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) _, keep = clip_boxes(anchors, im_info[:2]) self._anchor_index_before_clip = keep # Transpose and reshape predicted transform transformations to get them # into the same order as the anchors: # # transform deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via transform transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals, keep = clip_boxes(proposals, im_info[:2]) # Record the cooresponding index before and after clip # This step doesn't need unmap # We need it to decide whether do back propagation self._proposal_index_before_clip = keep # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_small_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] self._ind_after_filter = keep # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] self._ind_after_sort = order # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) proposals = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self._proposal_index = keep blobs = {'rois': proposals} if str(self.phase) == 'TRAIN': if cfg.TRAIN.MIX_INDEX: all_rois_index = self._ind_after_filter[self._ind_after_sort[ self._proposal_index]].reshape(1, len(keep)) blobs['proposal_index'] = all_rois_index # Copy data to forward to top layer for blob_name, blob in blobs.iteritems(): top[self._top_name_map[blob_name]].reshape(*blob.shape) top[self._top_name_map[blob_name]].data[...] = blob.astype( np.float32, copy=False)
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted transform deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] # 1. Generate proposals from transform deltas and shifted anchors height, width = scores.shape[-2:] self._height = height self._width = width # Enumerate all shifts shift_x = np.arange(0, self._width) * self._feat_stride shift_y = np.arange(0, self._height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) _, keep = clip_boxes(anchors, im_info[:2]) self._anchor_index_before_clip = keep # Transpose and reshape predicted transform transformations to get them # into the same order as the anchors: # # transform deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via transform transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals, keep = clip_boxes(proposals, im_info[:2]) # Record the cooresponding index before and after clip # This step doesn't need unmap # We need it to decide whether do back propagation self._proposal_index_before_clip = keep # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_small_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] self._ind_after_filter = keep # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] self._ind_after_sort = order # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) proposals = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self._proposal_index = keep blobs = { 'rois': proposals } if str(self.phase) == 'TRAIN': if cfg.TRAIN.MIX_INDEX: all_rois_index = self._ind_after_filter[self._ind_after_sort[self._proposal_index]].reshape(1, len(keep)) blobs['proposal_index'] = all_rois_index # Copy data to forward to top layer for blob_name, blob in blobs.iteritems(): top[self._top_name_map[blob_name]].reshape(*blob.shape) top[self._top_name_map[blob_name]].data[...] = blob.astype(np.float32, copy=False)