def _im_detect(self, image): """Taken from https://github.com/daijifeng001/MNC/blob/master/tools/demo.py. Somehow combines different stages of the network. No idea how it works. :param image: An image (numpy array) of shape (height, width, 3). :return: A tuple of three numpy arrays, the n_proposals x n_classes scores, the corresponding n_proposals x 4 bounding boxes, where each bounding box is defined as <xul, yul, xlr, ylr> and the n_proposals x 1 x 21 x 21 segmentation masks. """ forward_kwargs, im_scales = self._prepare_mnc_args(image) blobs_out = self._net.forward(**forward_kwargs) # output we need to collect: # 1. output from phase1' rois_phase1 = self._net.blobs['rois'].data.copy() masks_phase1 = self._net.blobs['mask_proposal'].data[...] scores_phase1 = self._net.blobs['seg_cls_prob'].data[...] # 2. output from phase2 rois_phase2 = self._net.blobs['rois_ext'].data[...] masks_phase2 = self._net.blobs['mask_proposal_ext'].data[...] scores_phase2 = self._net.blobs['seg_cls_prob_ext'].data[...] # Boxes are in resized space, we un-scale them back rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] rois_phase2 = rois_phase2[:, 1:5] / im_scales[0] rois_phase1, _ = clip_boxes(rois_phase1, image.shape) rois_phase2, _ = clip_boxes(rois_phase2, image.shape) # concatenate two stages to get final network output masks = np.concatenate((masks_phase1, masks_phase2), axis=0) boxes = np.concatenate((rois_phase1, rois_phase2), axis=0) scores = np.concatenate((scores_phase1, scores_phase2), axis=0) return scores, boxes, masks
def forward_train(self, bottom, top): """ During forward, we need to do several things: 1. Apply bounding box regression output which has highest classification score to proposed ROIs 2. Sample ROIs based on there current overlaps, assign labels on them 3. Make mask regression targets and positive/negative weights, just like the proposal_target_layer """ rois = bottom[0].data bbox_deltas = bottom[1].data # Apply bounding box regression according to maximum segmentation score seg_scores = bottom[2].data self._bbox_reg_labels = seg_scores[:, 1:].argmax(axis=1) + 1 gt_boxes = bottom[3].data gt_masks = bottom[4].data im_info = bottom[5].data[0, :] mask_info = bottom[6].data # select bbox_deltas according to artificial_deltas = np.zeros((rois.shape[0], 4)) for i in xrange(rois.shape[0]): artificial_deltas[i, :] = bbox_deltas[i, 4 * self._bbox_reg_labels[i]:4 * (self._bbox_reg_labels[i] + 1)] artificial_deltas[self._bbox_reg_labels == 0, :] = 0 all_rois = np.zeros((rois.shape[0], 5)) all_rois[:, 0] = 0 all_rois[:, 1:5] = bbox_transform_inv(rois[:, 1:5], artificial_deltas) zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((all_rois, np.hstack((zeros, gt_boxes[:, :-1])))) all_rois[:, 1:5], self._clip_keep = clip_boxes(all_rois[:, 1:5], im_info[:2]) labels, rois_out, fg_inds, keep_inds, mask_targets, top_mask_info, bbox_targets, bbox_inside_weights = \ self._sample_output(all_rois, gt_boxes, im_info[2], gt_masks, mask_info) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype( np.float32) self._keep_inds = keep_inds mask_weight = np.zeros( (rois_out.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) mask_weight[0:len(fg_inds), :, :, :] = 1 blobs = { 'rois': rois_out, 'labels': labels, 'mask_targets': mask_targets, 'mask_weight': mask_weight, 'gt_mask_info': top_mask_info, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } return blobs
def forward_train(self, bottom, top): """ During forward, we need to do several things: 1. Apply bounding box regression output which has highest classification score to proposed ROIs 2. Sample ROIs based on there current overlaps, assign labels on them 3. Make mask regression targets and positive/negative weights, just like the proposal_target_layer """ rois = bottom[0].data bbox_deltas = bottom[1].data # Apply bounding box regression according to maximum segmentation score seg_scores = bottom[2].data self._bbox_reg_labels = seg_scores[:, 1:].argmax(axis=1) + 1 gt_boxes = bottom[3].data gt_masks = bottom[4].data im_info = bottom[5].data[0, :] mask_info = bottom[6].data # select bbox_deltas according to artificial_deltas = np.zeros((rois.shape[0], 4)) for i in xrange(rois.shape[0]): artificial_deltas[i, :] = bbox_deltas[i, 4*self._bbox_reg_labels[i]:4*(self._bbox_reg_labels[i]+1)] artificial_deltas[self._bbox_reg_labels == 0, :] = 0 all_rois = np.zeros((rois.shape[0], 5)) all_rois[:, 0] = 0 all_rois[:, 1:5] = bbox_transform_inv(rois[:, 1:5], artificial_deltas) zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack( (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) ) all_rois[:, 1:5], self._clip_keep = clip_boxes(all_rois[:, 1:5], im_info[:2]) labels, rois_out, fg_inds, keep_inds, mask_targets, top_mask_info, bbox_targets, bbox_inside_weights = \ self._sample_output(all_rois, gt_boxes, im_info[2], gt_masks, mask_info) bbox_outside_weights = np.array(bbox_inside_weights > 0).astype(np.float32) self._keep_inds = keep_inds mask_weight = np.zeros((rois_out.shape[0], 1, cfg.MASK_SIZE, cfg.MASK_SIZE)) mask_weight[0:len(fg_inds), :, :, :] = 1 blobs = { 'rois': rois_out, 'labels': labels, 'mask_targets': mask_targets, 'mask_weight': mask_weight, 'gt_mask_info': top_mask_info, 'bbox_targets': bbox_targets, 'bbox_inside_weights': bbox_inside_weights, 'bbox_outside_weights': bbox_outside_weights } return blobs
def _segmentation_forward(self, im): forward_kwargs, im_scales = self._prepare_mnc_args(im) blobs_out = self.net.forward(**forward_kwargs) # output we need to collect: # 1. output from phase1' rois_phase1 = self.net.blobs['rois'].data.copy() masks_phase1 = self.net.blobs['mask_proposal'].data[...] scores_phase1 = self.net.blobs['seg_cls_prob'].data[...] # 2. output from phase2 rois_phase2 = self.net.blobs['rois_ext'].data[...] masks_phase2 = self.net.blobs['mask_proposal_ext'].data[...] scores_phase2 = self.net.blobs['seg_cls_prob_ext'].data[...] # Boxes are in resized space, we un-scale them back rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] rois_phase2 = rois_phase2[:, 1:5] / im_scales[0] rois_phase1, _ = clip_boxes(rois_phase1, im.shape) rois_phase2, _ = clip_boxes(rois_phase2, im.shape) # concatenate two stages to get final network output masks = np.concatenate((masks_phase1, masks_phase2), axis=0) boxes = np.concatenate((rois_phase1, rois_phase2), axis=0) scores = np.concatenate((scores_phase1, scores_phase2), axis=0) return masks, boxes, scores
def im_detect(im, net): forward_kwargs, im_scales = prepare_mnc_args(im, net) blobs_out = net.forward(**forward_kwargs) # output we need to collect: # 1. output from phase1' rois_phase1 = net.blobs['rois'].data.copy() masks_phase1 = net.blobs['mask_proposal'].data[...] scores_phase1 = net.blobs['seg_cls_prob'].data[...] # 2. output from phase2 rois_phase2 = net.blobs['rois_ext'].data[...] masks_phase2 = net.blobs['mask_proposal_ext'].data[...] scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...] # Boxes are in resized space, we un-scale them back rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] rois_phase2 = rois_phase2[:, 1:5] / im_scales[0] rois_phase1, _ = clip_boxes(rois_phase1, im.shape) rois_phase2, _ = clip_boxes(rois_phase2, im.shape) # concatenate two stages to get final network output masks = np.concatenate((masks_phase1, masks_phase2), axis=0) boxes = np.concatenate((rois_phase1, rois_phase2), axis=0) scores = np.concatenate((scores_phase1, scores_phase2), axis=0) return boxes, masks, scores
def im_detect(im, net): forward_kwargs, im_scales = prepare_mnc_args(im, net, cfg.TEST.SCALES[0]) blobs_out = net.forward(**forward_kwargs) # output we need to collect: # 1. output from phase1' rois_phase1 = net.blobs['rois'].data.copy() #print 'rois_phase1:{}'.format(rois_phase1.shape) masks_phase1 = net.blobs['mask_proposal'].data[...] scores_phase1 = net.blobs['seg_cls_prob'].data[...] # 2. output from phase2 # Boxes are in resized space, we un-scale them back rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] rois_phase1, _ = clip_boxes(rois_phase1, im.shape) masks = masks_phase1 boxes = rois_phase1 scores = scores_phase1 #test_size_list = (550,580,630,650) test_size_list = cfg.TEST.SCALES print test_size_list for test_size in test_size_list: print '>>>>> use test_size %d' % test_size forward_kwargs, im_scales = prepare_mnc_args(im, net, test_size) blobs_out = net.forward(**forward_kwargs) rois_phase_t = net.blobs['rois'].data.copy() masks_phase_t = net.blobs['mask_proposal'].data[...] scores_phase_t = net.blobs['seg_cls_prob'].data[...] rois_phase_t = rois_phase_t[:, 1:5] / im_scales[0] rois_phase_t, _ = clip_boxes(rois_phase_t, im.shape) masks = np.concatenate((masks, masks_phase_t), axis=0) boxes = np.concatenate((boxes, rois_phase_t), axis=0) scores = np.concatenate((scores, scores_phase_t), axis=0) return boxes, masks, scores
def forward_test(self, bottom, top): rois = bottom[0].data bbox_deltas = bottom[1].data # get ~ n * 4(1+c) new rois all_rois = bbox_transform_inv(rois[:, 1:5], bbox_deltas) scores = bottom[2].data im_info = bottom[3].data # get highest scored category's bounding box regressor score_max = scores.argmax(axis=1) rois_out = np.zeros((rois.shape[0], 5)) # Single batch training rois_out[:, 0] = 0 for i in xrange(len(score_max)): rois_out[i, 1:5] = all_rois[i, 4 * score_max[i]:4 * (score_max[i] + 1)] rois_out[:, 1:5], _ = clip_boxes(rois_out[:, 1:5], im_info[0, :2]) blobs = {'rois': rois_out} return blobs
def forward_test(self, bottom, top): rois = bottom[0].data bbox_deltas = bottom[1].data # get ~ n * 4(1+c) new rois all_rois = bbox_transform_inv(rois[:, 1:5], bbox_deltas) scores = bottom[2].data im_info = bottom[3].data # get highest scored category's bounding box regressor score_max = scores.argmax(axis=1) rois_out = np.zeros((rois.shape[0], 5)) # Single batch training rois_out[:, 0] = 0 for i in xrange(len(score_max)): rois_out[i, 1:5] = all_rois[i, 4*score_max[i]:4*(score_max[i]+1)] rois_out[:, 1:5], _ = clip_boxes(rois_out[:, 1:5], im_info[0, :2]) blobs = { 'rois': rois_out } return blobs
def im_detect(im, net): forward_kwargs, im_scales = prepare_mnc_args(im, net) blobs_out = net.forward(**forward_kwargs) # output we need to collect: # 1. output from phase1' rois_phase1 = net.blobs['rois'].data.copy() #print 'rois_phase1:{}'.format(rois_phase1.shape) masks_phase1 = net.blobs['mask_proposal'].data[...] scores_phase1 = net.blobs['seg_cls_prob'].data[...] # 2. output from phase2 ''' rois_phase2 = net.blobs['rois_ext'].data[...] masks_phase2 = net.blobs['mask_proposal_ext'].data[...] scores_phase2 = net.blobs['seg_cls_prob_ext'].data[...] ''' # Boxes are in resized space, we un-scale them back rois_phase1 = rois_phase1[:, 1:5] / im_scales[0] rois_phase1, _ = clip_boxes(rois_phase1, im.shape) masks = masks_phase1 boxes = rois_phase1 scores = scores_phase1 return boxes, masks, scores
def _detection_forward(self, im): """ Detect object classes in an image given object proposals. Arguments: im (ndarray): color image to test (in BGR order) Returns: box_scores (ndarray): R x K array of object class scores (K includes background as object category 0) all_boxes (ndarray): R x (4*K) array of predicted bounding boxes """ forward_kwargs, im_scales = self._prepare_mnc_args(im) blobs_out = self.net.forward(**forward_kwargs) # There are some data we need to get: # 1. ROIS (with bbox regression) rois = self.net.blobs['rois'].data.copy() # un-scale back to raw image space boxes = rois[:, 1:5] / im_scales[0] box_deltas = blobs_out['bbox_pred'] pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes, _ = clip_boxes(pred_boxes, im.shape) # 2. Detection score scores = blobs_out['cls_prob'] return scores, pred_boxes
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted transform deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[ 0] == 1, 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] # 1. Generate proposals from transform deltas and shifted anchors height, width = scores.shape[-2:] self._height = height self._width = width # Enumerate all shifts shift_x = np.arange(0, self._width) * self._feat_stride shift_y = np.arange(0, self._height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) _, keep = clip_boxes(anchors, im_info[:2]) self._anchor_index_before_clip = keep # Transpose and reshape predicted transform transformations to get them # into the same order as the anchors: # # transform deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via transform transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals, keep = clip_boxes(proposals, im_info[:2]) # Record the cooresponding index before and after clip # This step doesn't need unmap # We need it to decide whether do back propagation self._proposal_index_before_clip = keep # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_small_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] self._ind_after_filter = keep # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] self._ind_after_sort = order # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) proposals = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self._proposal_index = keep blobs = {'rois': proposals} if str(self.phase) == 'TRAIN': if cfg.TRAIN.MIX_INDEX: all_rois_index = self._ind_after_filter[self._ind_after_sort[ self._proposal_index]].reshape(1, len(keep)) blobs['proposal_index'] = all_rois_index # Copy data to forward to top layer for blob_name, blob in blobs.iteritems(): top[self._top_name_map[blob_name]].reshape(*blob.shape) top[self._top_name_map[blob_name]].data[...] = blob.astype( np.float32, copy=False)
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted transform deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].data.shape[0] == 1, 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0].data[:, self._num_anchors:, :, :] bbox_deltas = bottom[1].data im_info = bottom[2].data[0, :] # 1. Generate proposals from transform deltas and shifted anchors height, width = scores.shape[-2:] self._height = height self._width = width # Enumerate all shifts shift_x = np.arange(0, self._width) * self._feat_stride shift_y = np.arange(0, self._height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) _, keep = clip_boxes(anchors, im_info[:2]) self._anchor_index_before_clip = keep # Transpose and reshape predicted transform transformations to get them # into the same order as the anchors: # # transform deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via transform transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals, keep = clip_boxes(proposals, im_info[:2]) # Record the cooresponding index before and after clip # This step doesn't need unmap # We need it to decide whether do back propagation self._proposal_index_before_clip = keep # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = filter_small_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] self._ind_after_filter = keep # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] self._ind_after_sort = order # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) proposals = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self._proposal_index = keep blobs = { 'rois': proposals } if str(self.phase) == 'TRAIN': if cfg.TRAIN.MIX_INDEX: all_rois_index = self._ind_after_filter[self._ind_after_sort[self._proposal_index]].reshape(1, len(keep)) blobs['proposal_index'] = all_rois_index # Copy data to forward to top layer for blob_name, blob in blobs.iteritems(): top[self._top_name_map[blob_name]].reshape(*blob.shape) top[self._top_name_map[blob_name]].data[...] = blob.astype(np.float32, copy=False)