def draw_result(out, im_scale, clss, bbox, nms_thresh, conf): CV_AA = 16 for cls_id in range(1, 21): _cls = clss[:, cls_id][:, np.newaxis] _bbx = bbox[:, cls_id * 4: (cls_id + 1) * 4] dets = np.hstack((_bbx, _cls)) keep = nms(dets, nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= conf)[0] for i in inds: x1, y1, x2, y2 = map(int, dets[i, :4]) cv.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 2, CV_AA) ret, baseline = cv.getTextSize( CLASSES[cls_id], cv.FONT_HERSHEY_SIMPLEX, 0.8, 1) cv.rectangle(out, (x1, y2 - ret[1] - baseline), (x1 + ret[0], y2), (0, 0, 255), -1) cv.putText(out, CLASSES[cls_id], (x1, y2 - baseline), cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 1, CV_AA) return out
def draw_result(out, im_scale, clss, bbox, nms_thresh, conf): CV_AA = 16 for cls_id in range(1,2): _cls = clss[:, cls_id][:, np.newaxis] _bbx = bbox[:, cls_id * 4: (cls_id + 1) * 4] dets = np.hstack((_bbx, _cls)) keep = nms(dets, nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= conf)[0] result = [] for i in inds: x1, y1, x2, y2 = map(int, dets[i, :4]) score = dets[i][-1] cv.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 2, CV_AA) ret, baseline = cv.getTextSize( CLASSES[cls_id], cv.FONT_HERSHEY_SIMPLEX, 0.8, 1) cv.putText(out, "%s:%.2f"%(CLASSES[cls_id], score), (x1, y1 - baseline), cv.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 1, CV_AA) result.append([x1, y1, x2, y2, score]) return out, result
def get_dictionary(self, out, im_scale, clss, bbox, nms_thresh, conf): dictionary = {} width = out.shape[1] height = out.shape[0] for cls_id in range(1, 21): _cls = clss[:, cls_id][:, np.newaxis] _bbx = bbox[:, cls_id * 4: (cls_id + 1) * 4] dets = np.hstack((_bbx, _cls)) keep = nms(dets, nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= conf)[0] for i in inds: x1, y1, x2, y2 = map(int, dets[i, :4]) if x2 > width: x2 = width if y2 > height: y2 = height name = CLASSES[cls_id] + "_" + str(i) dictionary[name] = (x1, y1, x2, y2) return dictionary
def draw_result(self, out, im_scale, clss, bbox, nms_thresh, conf): CV_AA = 16 for cls_id in range(1, 21): _cls = clss[:, cls_id][:, np.newaxis] _bbx = bbox[:, cls_id * 4: (cls_id + 1) * 4] dets = np.hstack((_bbx, _cls)) keep = nms(dets, nms_thresh) dets = dets[keep, :] inds = np.where(dets[:, -1] >= conf)[0] for i in inds: x1, y1, x2, y2 = map(int, dets[i, :4]) cv.rectangle(out, (x1, y1), (x2, y2), (0, 0, 255), 2, CV_AA) ret, baseline = cv.getTextSize( CLASSES[cls_id], cv.FONT_HERSHEY_SIMPLEX, 0.8, 1) cv.rectangle(out, (x1, y2 - ret[1] - baseline), (x1 + ret[0], y2), (0, 0, 255), -1) cv.putText(out, CLASSES[cls_id], (x1, y2 - baseline), cv.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 1, CV_AA) return out
def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self.TRAIN_RPN_PRE_NMS_TOP_N \ if train else self.TEST_RPN_PRE_NMS_TOP_N post_nms_topN = self.TRAIN_RPN_POST_NMS_TOP_N \ if train else self.TEST_RPN_POST_NMS_TOP_N nms_thresh = self.RPN_NMS_THRESH min_size = self.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :]) bbox_deltas = to_cpu(rpn_bbox_pred.data) im_info = im_info[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y)) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, -1) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32) return rois
def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self.RPN_PRE_NMS_TOP_N if train else 6000 post_nms_topN = self.RPN_POST_NMS_TOP_N if train else 300 nms_thresh = self.RPN_NMS_THRESH min_size = self.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :]) bbox_deltas = to_cpu(rpn_bbox_pred.data) im_info = im_info[0, :] # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y)) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, -1) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32) return rois