def refine_poses(self, keypoint_thresh=10, score_thresh=0.5, neck_thresh=0.59, margin=0.0): W, H = 104.73, 67.74 for i, basename in enumerate(tqdm(self.frame_basenames)): poses = self.poses[basename] # remove the poses with few keypoints or they keep = [] for ii in range(len(poses)): keypoints = poses[ii] valid = (keypoints[:, 2] > 0.).nonzero()[0] score = np.sum(keypoints[valid, 2]) if len( valid ) > keypoint_thresh and score > score_thresh and keypoints[ 1, 2] > neck_thresh: keep.append(ii) poses = [poses[ii] for ii in keep] root_part = 1 root_box = [] for ii in range(len(poses)): root_tmp = poses[ii][root_part, :] valid_keypoints = (poses[ii][:, 2] > 0).nonzero() root_box.append([ root_tmp[0] - 10, root_tmp[1] - 10, root_tmp[0] + 10, root_tmp[1] + 10, np.sum(poses[ii][valid_keypoints, 2]) ]) root_box = np.array(root_box) # Perform Neck NMS if len(root_box.shape) == 1: root_box = root_box[None, :] keep2 = [0] else: keep2 = nms(root_box.astype(np.float32), 0.1) poses = [poses[ii] for ii in keep2 if ii < len(poses)] # Remove poses outside of field keep3 = [] cam_mat = self.calib[basename] cam = cam_utils.Camera(basename, cam_mat['A'], cam_mat['R'], cam_mat['T'], self.shape[0], self.shape[1]) for ii in range(len(poses)): kp3 = misc_utils.lift_keypoints_in_3d(cam, poses[ii]) if (-W / 2. - margin) <= kp3[1, 0] <= (W / 2. + margin) and ( -H / 2. - margin) <= kp3[1, 2] <= (H / 2. + margin): keep3.append(ii) poses = [poses[ii] for ii in keep3] self.poses[basename] = poses
def refine_detectron(self, basename, score_thresh=0.9, nms_thresh=0.5, min_height=0.0, min_area=200): data = self.detectron[basename] boxes, segms, keyps, classes = data['boxes'], data['segms'], data['keyps'], data['classes'] valid = (boxes[:, 4] > score_thresh) * ([j == 1 for j in classes]) valid = (valid==True).nonzero()[0] boxes = boxes[valid, :] segms = [segms[i] for i in valid] classes = [classes[i] for i in valid] cam_mat = self.calib[basename] cam = cam_utils.Camera(basename, cam_mat['A'], cam_mat['R'], cam_mat['T'], self.shape[0], self.shape[1]) keep, __ = misc_utils.putting_objects_in_perspective(cam, boxes, min_height=min_height) boxes = boxes[keep, :] segms = [segms[i] for i in keep] classes = [classes[i] for i in keep] valid_nms = nms(boxes.astype(np.float32), nms_thresh) boxes = boxes[valid_nms, :] segms = [segms[i] for i in valid_nms] classes = [classes[i] for i in valid_nms] areas = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) valid_area = (areas > min_area).nonzero()[0] boxes = boxes[valid_area, :] segms = [segms[i] for i in valid_area] classes = [classes[i] for i in valid_area] return boxes, segms, keyps, classes
def get_ball_from_detectron(self, thresh=0.0, nms_thresh=0.5): for i, basename in enumerate(tqdm(self.frame_basenames)): data = self.detectron[basename] boxes, segms, keyps, classes = data['boxes'], data['segms'], data['keyps'], data['classes'] valid = (boxes[:, 4] > thresh)*([j == 33 for j in classes]) boxes = boxes[valid, :] valid_nms = nms(boxes.astype(np.float32), nms_thresh) boxes = boxes[valid_nms, :] self.ball[basename] = boxes
def refine_poses(self, keypoint_thresh=10, score_thresh=0.5, neck_thresh=0.59): for i, basename in enumerate(tqdm(self.frame_basenames)): poses = self.poses[basename] # remove the poses with few keypoints or they keep = [] for ii in range(len(poses)): keypoints = poses[ii] valid = (keypoints[:, 2] > 0.).nonzero()[0] score = np.sum(keypoints[valid, 2]) if len( valid ) > keypoint_thresh and score > score_thresh and keypoints[ 1, 2] > neck_thresh: keep.append(ii) poses = [poses[ii] for ii in keep] root_part = 1 root_box = [] for ii in range(len(poses)): root_tmp = poses[ii][root_part, :] valid_keypoints = (poses[ii][:, 2] > 0).nonzero() root_box.append([ root_tmp[0] - 10, root_tmp[1] - 10, root_tmp[0] + 10, root_tmp[1] + 10, np.sum(poses[ii][valid_keypoints, 2]) ]) root_box = np.array(root_box) # Perform Neck NMS if len(root_box.shape) == 1: root_box = root_box[None, :] keep2 = [0] else: keep2 = nms(root_box.astype(np.float32), 0.1) poses = [poses[ii] for ii in keep2] self.poses[basename] = poses
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TEST"].RPN_NMS_THRESH min_size = cfg["TEST"].RPN_MIN_SIZE else: pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH min_size = cfg["TRAIN"].RPN_MIN_SIZE bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0] if DEBUG: # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) print('original im_size: ({}, {})'.format(im_info[4], im_info[5])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) cntk_image_scale = im_info[2] / im_info[4] keep = _filter_boxes(proposals, min_size * cntk_image_scale) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format( num_found_proposals)) proposals_padded = np.zeros( ((post_nms_topN, ) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1, ) + proposals.shape return None, proposals
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0, :] #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] #if DEBUG: # print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] pre_nms_topN = self.cf.rpn_pre_nms_top_n post_nms_topN = self.cf.rpn_post_nms_top_n nms_thresh = self.cf.rpn_nms_thresh batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0, :] #if DEBUG: # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) # print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] #if DEBUG: # print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TEST"].RPN_NMS_THRESH min_size = cfg["TEST"].RPN_MIN_SIZE else: pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH min_size = cfg["TRAIN"].RPN_MIN_SIZE bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0] if DEBUG: # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) print ('original im_size: ({}, {})'.format(im_info[4], im_info[5])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print ('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) cntk_image_scale = im_info[2] / im_info[4] keep = _filter_boxes(proposals, min_size * cntk_image_scale) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format(num_found_proposals)) proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1,) + proposals.shape return None, proposals
def proposal_layer(inputs, anchors, thresh=0.5, args=None): """Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinment detals to anchors. Inputs: rpn_probs: [batch, anchors*height*width(fg prob)] rpn_bbox_deltas: [batch, anchors*height*width, 4] # gt_kps: [batch, num_keypoints, height, width] anchors: [batch, anchors*height*width, 4, (x1, y1, x2, y2)] Returns: Proposals in normalized coordinates [num_rois, 4 (x1, y1, x2, y2)] box_inds [num_rois] """ # Currently only supports batchsize 1 # inputs[0] = inputs[0].squeeze(0) # inputs[1] = inputs[1].squeeze(0) # Box Scores, select the fg prob. # scores = inputs[0][:, :, :, :, 1] # scores = scores.transpose(1, 3).contiguous().view(-1) scores = inputs[0] deltas = inputs[1] boxes_out = [] boxes_ind = [] # kps = inputs[2] # max_rois = args.max_rois gpu_count = torch.cuda.device_count() bs = args.batch // gpu_count # boxes_out = np.zeros((bs, max_rois, 4)) # box_ind = np.ones(bs, max_rois) * -1 # total_anchors = scores.size(1) // bs for i in range(bs): pos_ix = torch.nonzero(scores[i] > thresh) if pos_ix.dim() > 1: pos_ix = pos_ix.squeeze(1) try: # if pox_ix.dim() == 0: # pos_ix = pos_ix.unsqueeze(0) if pos_ix.size(0) == 0: # print('no positive ix') if bs == 1: raise Exception('no roi in this img') continue except: print('pos ix: ', pos_ix) continue # raise Exception('pos ix error') # print('positive ix') # Box deltas [batch, num_rois, 4] # boxes = torch.from_numpy(anchors).float().cuda().detach() # boxes = boxes.expand(bs, -1, -1, -1, -1) # boxes = boxes.transpose(0, 4).contiguous().view(-1, 4) # assert anchors.size(0) == deltas.size(0) scores_i = torch.index_select(scores[i], 0, pos_ix) # scores = scores[pos_ix] deltas_i = torch.index_select(deltas[i], 0, pos_ix) anchors = anchors.to(deltas.device) anchors_i = torch.index_select(anchors[i], 0, pos_ix) # only got 16 positive anchors, no need to remove # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. # pre_nms_limit = min(6000, anchors.size()[0]) # scores, order = scores.sort(descending=True) # order = order[:pre_nms_limit] # scores = scores[:pre_nms_limit] # deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff. # anchors = anchors[order.data, :] # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = apply_box_deltas(anchors_i, deltas_i) # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] size = args.img_size # int(config['train_datasets']['search_size']) height, width = size, size window = np.array([0, 0, height, width]).astype(np.float32) boxes = clip_boxes(boxes, window) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression nms_threshold = args.nms_threshold # float(config['train_datasets']['RPN_NMS']) # print('boxes shape: ', boxes.shape) # print('scores shape: ', scores.shape) # print('before nms boxes shape: ', boxes.shape) keep = nms( torch.cat((boxes, scores_i.unsqueeze(1)), 1).data, nms_threshold) # num_keep = len(keep) # if num_keep > max_rois: # keep = keep[:max_rois] # print('keep length: ', num_keep) if keep.size(0) > 1: keep = keep[:1] boxes = boxes[keep, :] boxes_out.append(boxes) boxes_ind.extend([i] * keep.size(0)) # ind_start = i * max_rois # boxes_out[i, ind_start:num_keep] = boxes # box_ind[i, ind_start:num_keep] = i # select_bs = keep // total_anchors # kps_i = torch.index_select(kps, 0, select_bs) boxes = torch.cat(boxes_out, 0) boxes_ind = torch.Tensor(boxes_ind).cuda() # Normalize dimensions to range of 0 to 1. norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False) norm = norm.cuda() normalized_boxes = boxes / norm return normalized_boxes, boxes_ind, True
def proposal_layer_bak(inputs, anchors, thresh=0.5, args=None): """Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinment detals to anchors. Inputs: rpn_probs: [batch, anchors*height*width(fg prob)] rpn_bbox_deltas: [batch, anchors*height*width, 4] anchors: [batch*anchors*height*width, 4, (x1, y1, x2, y2)] Returns: Proposals in normalized coordinates [num_rois, 4, (y1, x1, y2, x2)] """ # Currently only supports batchsize 1 # inputs[0] = inputs[0].squeeze(0) # inputs[1] = inputs[1].squeeze(0) # Box Scores, select the fg prob. # scores = inputs[0][:, :, :, :, 1] # scores = scores.transpose(1, 3).contiguous().view(-1) scores = inputs[0] deltas = inputs[1] pos_ix = torch.nonzero(scores > thresh).squeeze() if pos_ix.size(0) == 0: # print('no positive ix') return None, False # print('positive ix') # Box deltas [batch, num_rois, 4] # boxes = torch.from_numpy(anchors).float().cuda().detach() # boxes = boxes.expand(bs, -1, -1, -1, -1) # boxes = boxes.transpose(0, 4).contiguous().view(-1, 4) assert anchors.size(0) == deltas.size(0) scores = torch.index_select(scores, 0, pos_ix) # scores = scores[pos_ix] deltas = torch.index_select(deltas, 0, pos_ix) anchors = anchors.to(deltas.device) anchors = torch.index_select(anchors, 0, pos_ix) # only got 16 positive anchors, no need to remove # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. # pre_nms_limit = min(6000, anchors.size()[0]) # scores, order = scores.sort(descending=True) # order = order[:pre_nms_limit] # scores = scores[:pre_nms_limit] # deltas = deltas[order.data, :] # TODO: Support batch size > 1 ff. # anchors = anchors[order.data, :] # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = apply_box_deltas(anchors, deltas) # Clip to image boundaries. [batch, N, (y1, x1, y2, x2)] size = args.img_size # int(config['train_datasets']['search_size']) height, width = size, size window = np.array([0, 0, height, width]).astype(np.float32) boxes = clip_boxes(boxes, window) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression nms_threshold = args.nms_threshold # float(config['train_datasets']['RPN_NMS']) # print('boxes shape: ', boxes.shape) # print('scores shape: ', scores.shape) keep = nms(torch.cat((boxes, scores.unsqueeze(1)), 1).data, nms_threshold) boxes = boxes[keep, :] select_bs = keep // total_anchors # Normalize dimensions to range of 0 to 1. norm = Variable(torch.from_numpy(np.array([height, width, height, width])).float(), requires_grad=False) norm = norm.cuda() normalized_boxes = boxes / norm return normalized_boxes, True