def regress_rois(roi_proposals, roi_regression_factors, labels, dims_input): for i in range(len(labels)): label = labels[i] if label > 0: deltas = roi_regression_factors[i:i+1,label*4:(label+1)*4] roi_coords = roi_proposals[i:i+1,:] regressed_rois = bbox_transform_inv(roi_coords, deltas) roi_proposals[i,:] = regressed_rois if dims_input is not None: # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) pad_width, pad_height, scaled_image_width, scaled_image_height, _, _ = dims_input left = (pad_width - scaled_image_width) / 2 right = pad_width - left - 1 top = (pad_height - scaled_image_height) / 2 bottom = pad_height - top - 1 roi_proposals[:,0] = roi_proposals[:,0].clip(left, right) roi_proposals[:,1] = roi_proposals[:,1].clip(top, bottom) roi_proposals[:,2] = roi_proposals[:,2].clip(left, right) roi_proposals[:,3] = roi_proposals[:,3].clip(top, bottom) return roi_proposals
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = self._layer_config['test_pre_nms_topN'] post_nms_topN = self._layer_config['test_post_nms_topN'] nms_thresh = self._layer_config['test_nms_thresh'] min_size = self._layer_config['test_min_size'] else: pre_nms_topN = self._layer_config['train_pre_nms_topN'] post_nms_topN = self._layer_config['train_post_nms_topN'] nms_thresh = self._layer_config['train_nms_thresh'] min_size = self._layer_config['train_min_size'] bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0] if DEBUG: # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) print ('original im_size: ({}, {})'.format(im_info[4], im_info[5])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print ('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) cntk_image_scale = im_info[2] / im_info[4] keep = _filter_boxes(proposals, min_size * cntk_image_scale) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort(kind='mergesort')[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh, use_gpu_nms=False) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format(num_found_proposals)) proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1,) + proposals.shape return None, proposals
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # TODO: remove 'False and' once FreeDimension works if False and len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TEST"].RPN_NMS_THRESH min_size = cfg["TEST"].RPN_MIN_SIZE else: pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH min_size = cfg["TRAIN"].RPN_MIN_SIZE bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2] if DEBUG: print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scale: {}'.format(im_info[2])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print ('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format(num_found_proposals)) proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1,) + proposals.shape return None, proposals
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TEST"].RPN_NMS_THRESH min_size = cfg["TEST"].RPN_MIN_SIZE else: pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH min_size = cfg["TRAIN"].RPN_MIN_SIZE bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0] if DEBUG: # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) print('original im_size: ({}, {})'.format(im_info[4], im_info[5])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) cntk_image_scale = im_info[2] / im_info[4] keep = _filter_boxes(proposals, min_size * cntk_image_scale) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format( num_found_proposals)) proposals_padded = np.zeros( ((post_nms_topN, ) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1, ) + proposals.shape return None, proposals
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0, :] #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] #if DEBUG: # print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob