def im_detect(net, image): """Detect object classes in an image given object proposals. Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ im_data, im_scales = net.get_image_blob(image) im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) t0 = time.time() cls_prob, bbox_pred, rois = net(im_data, im_info) runtime = time.time() - t0 scores = cls_prob.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) return scores, pred_boxes, runtime
def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, nms=True, min_score=0.0, nms_thresh=0.3): scores = cls_prob.data.squeeze() # find class scores, inds = scores.max(1) keep = ((inds > 0) & (scores >= min_score)).nonzero().squeeze() scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas box_deltas = bbox_pred.data.squeeze()[keep] boxes = rois.data.squeeze()[:, 1:5][keep] if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(-1, 4 * self.n_classes) box_deltas = torch.cat([box_deltas[i, (inds[i] * 4): (inds[i] * 4 + 4)] \ for i in range(len(inds))], 0) box_deltas = box_deltas.view(-1, 4) boxes, box_deltas = boxes.unsqueeze(0), box_deltas.unsqueeze(0) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes = pred_boxes.squeeze() pred_boxes /= im_info.data[0][2] # nms if nms and pred_boxes.size(0) > 0: pred_boxes, scores, inds = nms_detections(pred_boxes, scores, nms_thresh, inds=inds) pred_boxes = pred_boxes.cpu().numpy() scores = scores.cpu().numpy() inds = inds.cpu().numpy() return pred_boxes, scores, self.classes[inds]
def im_detect(net_x, image_0, image_1): """Detect object classes in an image given object proposals. Returns: scores (ndarray): R x K array of object class scores (K includes background as object category 0) boxes (ndarray): R x (4*K) array of predicted bounding boxes """ im_data_0, im_scales_0 = net_x.get_image_blob(image_0) # im_data_0=0*im_data_0 im_data_1, im_scales_1 = net_x.get_image_blob(image_1) im_info = np.array( [[im_data_0.shape[1], im_data_0.shape[2], im_scales_0[0]]], dtype=np.float32) cls_prob_0, bbox_pred_0, rois = net_x(im_data_0, im_data_1, im_info) scores_0 = cls_prob_0.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas_0 = bbox_pred_0.data.cpu().numpy() pred_boxes_0 = bbox_transform_inv(boxes, box_deltas_0) pred_boxes_0 = clip_boxes(pred_boxes_0, image_0.shape) else: print "bbox reg compulsory" exit(1) return scores_0, pred_boxes_0
def im_detect(net, im_data, im_info): features, pooled_features, cls_score, cls_prob, bbox_pred, rois, score = net( im_data, im_info, gt_boxes=None) scores = cls_prob.data.cpu().numpy() # boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] boxes = rois.data.cpu().numpy()[:, 1:5] # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) # pred_boxes = clip_boxes(pred_boxes, im_info[0][:2] / im_info[0][2]) pred_boxes = clip_boxes(pred_boxes, im_info[0][:2]) return scores, pred_boxes, rois
def predict_image(self, image, threshold, eval_mode=False): """ Infer buildings for a single image. Inputs: image :: n x m x 3 ndarray - Should be in RGB format """ if type(image) is str: image = cv2.imread(image) else: image = image[:, :, (2, 1, 0)] # RGB -> BGR im_data, im_scales = self.model.get_image_blob(image) im_info = np.array( [[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) t0 = time.time() cls_prob, bbox_pred, rois = self.model(im_data, im_info) runtime = time.time() - t0 scores = cls_prob.data.cpu().numpy() boxes = rois.data.cpu().numpy()[:, 1:5] / im_info[0][2] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data.cpu().numpy() pred_boxes = bbox_transform_inv(boxes, box_deltas) pred_boxes = clip_boxes(pred_boxes, image.shape) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) current = np.concatenate( [ pred_boxes[:, 4:8], # (skip the background class) np.expand_dims(scores[:, 1], 1) ], axis=1) suppressed = current[py_cpu_nms(current.astype(np.float32), 0.3)] suppressed = pandas.DataFrame( suppressed, columns=['x1', 'y1', 'x2', 'y2', 'score']) if eval_mode: return suppressed[ suppressed['score'] >= threshold], suppressed, runtime else: return suppressed[suppressed['score'] >= threshold]
def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, im_shape, nms=True, clip=True, min_score=0.0): # find class scores, inds = cls_prob.data.max(1) scores, inds = scores.cpu().numpy(), inds.cpu().numpy() keep = np.where((inds > 0) & (scores >= min_score)) scores, inds = scores[keep], inds[keep] # Apply bounding-box regression deltas keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] pred_boxes = bbox_transform_inv(boxes, box_deltas) if clip: pred_boxes = clip_boxes(pred_boxes, im_shape) # nms if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds = nms_detections(pred_boxes, scores, 0.3, inds=inds) return pred_boxes, scores, self.classes[inds]
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchor_scales, anchor_ratios, is_relationship=False): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # layer_params = yaml.load(self.param_str_) _anchors = generate_anchors(scales=anchor_scales, ratios=anchor_ratios) _num_anchors = _anchors.shape[0] # don't need transpose for pytorch # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape, [0, 3, 1, 2]) # rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' # cfg_key = 'TEST' if is_relationship: pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N_REGION post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N_REGION nms_thresh = cfg[cfg_key].RPN_NMS_THRESH_REGION min_size = cfg[cfg_key].RPN_MIN_SIZE_REGION else: pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred # im_info = bottom[2].data[0, :] if DEBUG: print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print 'score map size: {}'.format(scores.shape) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image if cfg.TEST.RPN_DROPOUT_BOXES_RUNOFF_IMAGE: _allowed_border = 16 inds_inside = np.where( (proposals[:, 0] >= -_allowed_border) & (proposals[:, 1] >= -_allowed_border) & (proposals[:, 2] < im_info[1] + _allowed_border) & # width (proposals[:, 3] < im_info[0] + _allowed_border) # height )[0] proposals = proposals[inds_inside, :] proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 2000) # 8. return the top proposals (-> RoIs top) # print 'proposals', proposals # print 'scores', scores keep = nms(np.hstack((proposals, scores)).astype(np.float32), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # print(blob.shape) return blob, scores.reshape(-1)
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[16, ], anchor_scales=[4, 8, 16, 32]): """ Parameters ---------- rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN im_info: a list of [image_height, image_width, scale_ratios] cfg_key: 'TRAIN' or 'TEST' _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) #layer_params = yaml.load(self.param_str_) """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] # rpn_cls_prob_reshape = np.transpose(rpn_cls_prob_reshape,[0,3,1,2]) #-> (1 , 2xA, H , W) # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,1,2]) # -> (1 , Ax4, H , W) # rpn_cls_prob_reshape = np.transpose(np.reshape(rpn_cls_prob_reshape,[1,rpn_cls_prob_reshape.shape[0],rpn_cls_prob_reshape.shape[1],rpn_cls_prob_reshape.shape[2]]),[0,3,2,1]) # rpn_bbox_pred = np.transpose(rpn_bbox_pred,[0,3,2,1]) im_info = im_info[0] assert rpn_cls_prob_reshape.shape[0] == 1, \ 'Only single item batches are supported' # cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' # cfg_key = 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = rpn_cls_prob_reshape[:, _num_anchors:, :, :] bbox_deltas = rpn_bbox_pred # im_info = bottom[2].data[0, :] if DEBUG: print(('im_size: ({}, {})'.format(im_info[0], im_info[1]))) print(('scale: {}'.format(im_info[2]))) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print(('score map size: {}'.format(scores.shape))) # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] anchors = _anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) keep = _filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] # # remove irregular boxes, too fat too tall # keep = _filter_irregular_boxes(proposals) # proposals = proposals[keep, :] # scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob
cls_prob, bbox_pred, rois = net(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data box_deltas = bbox_pred.data boxes = rois.data[:, :, 1:5] del cls_prob, bbox_pred, rois if cfg.TEST.BBOX_REG: if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # scores.shape[1] is (cfg)BATCH_SIZE = P pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2] # P x n_classes scores = scores.squeeze() # P x n_classes*4 pred_boxes = pred_boxes.squeeze() det_toc = t.tic() detect_time = det_toc - det_tic misc_tic = t.tic()
def interpret_faster_rcnn(self, cls_prob, bbox_pred, rois, im_info, im_shape, nms=True, clip=True, min_score=0.0): """ 1. Filter proposals with (proposal_max_class_not_background && proposal_max_class_score>=thresh). 2. Combine bbox_pred(p', 4*21_cls) and rois(p', 5) to regression bbox(p', 4*21_cls). 3. Clip bbox. 4. Use nms to filter overlap bbox. :param cls_prob: (proposals, 21_cls) tensor :param bbox_pred: (proposals, 4*21_cls) tensor :param rois: (proposals, 5) tensor :param im_info: :param im_shape: :param nms: :param clip: :param min_score: :return: pred_boxes (p", 4) scores (p",) classes_string (p",) """ # ============================= filter proposals ============================= # find the max score class # (proposals,) numpy # (proposals,) numpy scores, inds = cls_prob.data.max(1) scores, inds = scores.cpu().numpy(), inds.cpu().numpy() # filter foreground and scores >= thesh proposals. # ([index, index, ...], ) keep = np.where((inds > 0) & (scores >= min_score)) # p'=len(keep[0]) # (p', ) numpy # (p', ) numpy scores, inds = scores[keep], inds[keep] # =================== Apply bounding-box regression deltas ====================================== keep = keep[0] box_deltas = bbox_pred.data.cpu().numpy()[keep] box_deltas = np.asarray([ box_deltas[i, (inds[i] * 4):(inds[i] * 4 + 4)] for i in range(len(inds)) ], dtype=np.float) boxes = rois.data.cpu().numpy()[keep, 1:5] / im_info[0][2] # (p', 4) pred_boxes = bbox_transform_inv(boxes, box_deltas) # =================================== clip and nms ========================================= if clip: pred_boxes = clip_boxes(pred_boxes, im_shape) # (p", 4) numpy # (p",) numpy # (p",) numpy if nms and pred_boxes.shape[0] > 0: pred_boxes, scores, inds = nms_detections(pred_boxes, scores, 0.3, inds=inds) return pred_boxes, scores, self.classes[inds]