def im_detect(data, model, batch_size, std, mean, cfg, nms_thresh): gt_tensor = torch.autograd.Variable(torch.from_numpy(data[0])) im_blobs_tensor = torch.autograd.Variable(torch.from_numpy(data[1])) im_info_tensor = torch.autograd.Variable(torch.from_numpy(data[2])) # print(im_info_tensor) results = [] with torch.no_grad(): rois, cls_prob, bbox_pred = model(im_blobs_tensor.cuda(), im_info_tensor.cuda(), gt_tensor.cuda()) # print(rois[:,:,1:5]) pred_boxes = bbox_transform_inv(rois[:, :, 1:5], bbox_pred, batch_size, std, mean) pred_boxes = clip_boxes(pred_boxes, im_info_tensor.data, 1) scores = cls_prob classes = len(cfg.class_list) for index in range(1, classes): cls_scores = scores[0, :, index] if not cfg.confidence_per_cls: raise RuntimeError("confidence_per_cls is not exist!!!") else: thresh = 0.5 #cfg.confidence_per_cls[cfg.class_list[index]][0] scores_over_thresh = (cls_scores > thresh) cls_keep = cls_scores[scores_over_thresh] bboxes_keep = pred_boxes[0, scores_over_thresh, index * 4:(index + 1) * 4] # filter_keep = _filter_boxes(bboxes_keep, 8) # cls_keep = cls_keep[filter_keep] # bboxes_keep = bboxes_keep[filter_keep, :] keep_idx_i = nms(bboxes_keep, cls_keep, nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) bboxes_keep = bboxes_keep[keep_idx_i, :] cls_keep = cls_keep[keep_idx_i] bboxes_keep[:, 0] /= im_info_tensor[0, 2] bboxes_keep[:, 1] /= im_info_tensor[0, 3] bboxes_keep[:, 2] /= im_info_tensor[0, 2] bboxes_keep[:, 3] /= im_info_tensor[0, 3] if bboxes_keep.size(0) > 0: result = np.zeros((bboxes_keep.size(0), 6), dtype=np.float32) result[:, 0:4] = bboxes_keep.cpu() result[:, 4] = cls_keep.cpu() result[:, 5] = index results.append(result) # threshold_list[key][0].append(result) return results
def im_detect(data, model, batch_size, thresh=0.8, nms_thresh=0.25, classes=2): gt_tensor = torch.autograd.Variable(torch.from_numpy(data[0])) im_blobs_tensor = torch.autograd.Variable(torch.from_numpy(data[1])) im_info_tensor = torch.autograd.Variable(torch.from_numpy(data[2])) #print(im_info_tensor) std = np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS, dtype=np.float32) mean = np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS, dtype=np.float32) std = torch.from_numpy(std).cuda() mean = torch.from_numpy(mean).cuda() with torch.no_grad(): rois, cls_prob, bbox_pred = model(im_blobs_tensor.cuda(), \ im_info_tensor.cuda(), \ gt_tensor.cuda()) #print(rois[:,:,1:5]) pred_boxes = bbox_transform_inv(rois[:, :, 1:5], bbox_pred, batch_size, std, mean) pred_boxes = clip_boxes(pred_boxes, im_info_tensor.data, 1) scores = cls_prob resluts = [] #print(rois.shape, scores.shape, rois.shape, bbox_pred.shape, classes) for index in range(1, classes): cls_scores = scores[0, :, index] scores_over_thresh = (cls_scores > thresh) cls_keep = cls_scores[scores_over_thresh] bboxes_keep = pred_boxes[0, scores_over_thresh, index * 4:(index + 1) * 4] filter_keep = _filter_boxes(bboxes_keep, 16) cls_keep = cls_keep[filter_keep] bboxes_keep = bboxes_keep[filter_keep, :] keep_idx_i = nms(bboxes_keep, cls_keep, nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) bboxes_keep = bboxes_keep[keep_idx_i, :] cls_keep = cls_keep[keep_idx_i] bboxes_keep[:, 0] /= im_info_tensor[0, 2] bboxes_keep[:, 1] /= im_info_tensor[0, 3] bboxes_keep[:, 2] /= im_info_tensor[0, 2] bboxes_keep[:, 3] /= im_info_tensor[0, 3] if bboxes_keep.size(0) > 0: result = np.zeros((bboxes_keep.size(0), 6), dtype=np.float32) result[:, 0:4] = bboxes_keep.cpu() result[:, 4] = cls_keep.cpu() result[:, 5] = index results.append(reslut) return results
def im_detect(data, model, batch_size, std, mean, classes): gt_tensor = torch.autograd.Variable(torch.from_numpy(data[0])) im_blobs_tensor = torch.autograd.Variable(torch.from_numpy(data[1])) im_info_tensor = torch.autograd.Variable(torch.from_numpy(data[2])) # print(im_blobs_tensor.shape) results = [] with torch.no_grad(): rois, cls_prob, bbox_pred = model(im_blobs_tensor.cuda(), im_info_tensor.cuda(), gt_tensor.cuda()) # print(rois[:,:,1:5]) pred_boxes = bbox_transform_inv(rois[:, :, 1:5], bbox_pred, batch_size, std, mean) pred_boxes = clip_boxes(pred_boxes, im_info_tensor.data, 1) scores = cls_prob for index in range(1, classes): cls_scores = scores[0, :, index] scores_over_thresh = (cls_scores > thresh) cls_keep = cls_scores[scores_over_thresh] bboxes_keep = pred_boxes[0, scores_over_thresh, index * 4:(index + 1) * 4] if False: filter_keep = _filter_boxes(bboxes_keep, 8) cls_keep = cls_keep[filter_keep] bboxes_keep = bboxes_keep[filter_keep, :] keep_idx_i = nms(bboxes_keep, cls_keep, nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) bboxes_keep = bboxes_keep[keep_idx_i, :] cls_keep = cls_keep[keep_idx_i] bboxes_keep[:, 0] /= im_info_tensor[0, 2] bboxes_keep[:, 1] /= im_info_tensor[0, 3] bboxes_keep[:, 2] /= im_info_tensor[0, 2] bboxes_keep[:, 3] /= im_info_tensor[0, 3] if bboxes_keep.size(0) > 0: result = np.zeros((bboxes_keep.size(0), 6), dtype=np.float32) result[:, 0:4] = bboxes_keep.cpu() result[:, 4] = cls_keep.cpu() result[:, 5] = index results.append(result) return results
# Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if args.cuda > 0 else _ pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im)
def forward(self, input): scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. #keep = self._filter_boxes(proposals, min_size, im_info, batch_size) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] ##### filter le min size bbox #keep = self._filter_boxes(proposals_single, min_size, im_info[i]) #print(proposals_single.shape, keep.shape) #proposals_single = proposals_single[keep] #scores_single = scores_single[keep] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] #order_single = order_single[keep] #print(proposals_single.shape, scores_single.shape, order_single.shape) if pre_nms_topN > 0 and pre_nms_topN < scores_keep[i].numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output