def box_results_with_nms_and_limit(scores, boxes): # NOTE: support single-batch """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.USE_GT_PROPOSALS: nms_dets = dets_j elif cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def box_results_with_nms_and_limit(scores, boxes): # NOTE: support single-batch """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def run_task(start_img_id, end_img_id, cls_id): # print("Task from {} to {}".format(start_img_id, end_img_id)) ret_boxes = [] ret_segms = [] for i in range(11): ret_boxes.append([]) ret_segms.append([]) for j in range(end_img_id - start_img_id): ret_boxes[i].append([]) ret_segms[i].append([]) for img_id in range(start_img_id, end_img_id): if len(all_segms[cls_id]) != 0: if len(all_segms[cls_id][img_id]) != 0: segms = all_segms[cls_id][img_id] boxes = all_boxes[cls_id][img_id] # nms nms_start_time = time.time() if BBOX_NMS: boxes = np.array(boxes).astype(np.float32, copy=False) keep = box_utils.nms(boxes, 0.5) else: keep = rle_mask_nms(segms, boxes, 0.5, mode='IOU') # nms_end_time = time.time() # print('nms spend {:.2f}s'.format(nms_end_time - nms_start_time)) top_boxes = boxes[keep, :] top_segms = [] for index in keep: top_segms.append(segms[index]) vote_start_time = time.time() # mask_vote # top_segms = rle_mask_voting( # top_segms, # segms, # boxes, # 0.9, # 0.5 # ) # trans from byte to str for json format if not top_segms is None and len(top_segms) > 0: for id, s in enumerate(top_segms): if type(s['counts']) == str: top_segms[id]['counts'] = s['counts'] else: top_segms[id]['counts'] = str(s['counts'], 'utf-8') vote_end_time = time.time() print('Img:{} cls:{} vote spend {:.2f}s'.format( img_id, cls_id, vote_end_time - vote_start_time)) ret_boxes[cls_id][img_id - start_img_id].append(top_boxes) ret_segms[cls_id][img_id - start_img_id].append(top_segms) return ret_boxes, ret_segms
def box_results_with_nms_and_limit_return_keep( scores, boxes): # NOTE: support single-batch """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] return_keep = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): # pdb.set_trace() inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] cls_boxes[j] = nms_dets return_keep[j] = inds[keep] # Limit to max_per_image detections **over all classes** # if cfg.TEST.DETECTIONS_PER_IM > 0: # image_scores = np.hstack( # [cls_boxes[j][:, -1] for j in range(1, num_classes)] # ) # if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: # image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] # for j in range(1, num_classes): # keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] # return_keep[j] = return_keep[j][keep] # cls_boxes[j] = cls_boxes[j][keep, :] # pdb.set_trace() return_keep_np_re = [] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) for j in range(1, num_classes): return_keep_np_re.extend(list(return_keep[j])) return_keep_np = np.array(return_keep_np_re) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes, return_keep_np
def box_results_with_nms_and_limit(self, scores, boxes, score_thresh=cfg.TEST.SCORE_THRESH): num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > score_thresh)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack( (boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # add labels label_j = np.ones((nms_dets.shape[0], 1), dtype=np.float32) * j nms_dets = np.hstack((nms_dets, label_j)) # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -2] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort( image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -2] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-2] scores = im_results[:, -2] labels = im_results[:, -1] return scores, boxes, labels
def get_nms_preds(self, cls_scores, det_boxes_all): # probs = F.softmax(cls_scores, -1).data.cpu().numpy() probs = cls_scores nms_mask = np.zeros_like(probs) for c in range(1, probs.shape[-1]): s_c = probs[:, c] boxes_c = det_boxes_all[:, c] dets_c = np.hstack((boxes_c, s_c[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_c, cfg.TEST.NMS) nms_mask[:, c][keep] = 1 obj_preds = (nms_mask * probs)[:, 1:].argmax(-1) + 1 return obj_preds
def iou_box_nms_and_limit(stage1_box, stage1_iou, dets_cls, scores): num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] for j in range(1, num_classes): inds = np.array(dets_cls[str(j)], dtype=np.int) if not inds.tolist(): boxes_j = np.empty((0, 4), dtype=np.float32) else: boxes_j = stage1_box[inds] iou_j = stage1_iou[inds] score_j = scores[inds] dets_j = np.hstack((boxes_j, iou_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) boxes_j = boxes_j[keep] score_j = score_j[keep] nms_dets = np.hstack( (boxes_j, score_j[:, np.newaxis])).astype(np.float32, copy=False) # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) region_feat = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale for cls_ind in range(1, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep]) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = np.argmax(cls_prob[keep_boxes], axis=1) obj_prob = np.amax(cls_prob[keep_boxes], axis=1) # proposal not in order! assert (np.sum(objects >= 1601) == 0) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': cls_boxes[keep_boxes], 'region_feat': region_feat[keep_boxes], 'object': objects, 'obj_prob': obj_prob }
def forward(self, loc_data, conf_data, prior_data): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() # print('*****:', i) for cl in range(1, self.num_classes): # print('$$$$$$$:', cl) t0 = time.time() c_mask = conf_scores[cl].gt(self.conf_thresh) scores = conf_scores[cl][c_mask] # print('score: ', scores) # print('score dimension: ', scores.dim()) # if scores.dim() == 0: # continue if len(scores) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_thresh, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) t1 = time.time() # print('timer_new: %.4f sec.' % (t1 - t0),flush=True) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def get_detections_from_im( cfg, model, im, image_id, feat_blob_name, MIN_BOXES, MAX_BOXES, background=False, conf_thresh=0.2, bboxes=None, ): with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes ) box_features = workspace.FetchBlob(feat_blob_name) cls_prob = workspace.FetchBlob("gpu_0/cls_prob") rois = workspace.FetchBlob("gpu_0/rois") max_conf = np.zeros((rois.shape[0])) # unscale back to raw image space cls_boxes = rois[:, 1:5] / im_scale start_index = 1 # Column 0 of the scores matrix is for the background class if background: start_index = 0 for cls_ind in range(start_index, cls_prob.shape[1]): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) max_conf[keep] = np.where( cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep] ) keep_boxes = np.where(max_conf >= conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] # Predict the class label using the scores objects = np.argmax(cls_prob[keep_boxes][start_index:], axis=1) return box_features[keep_boxes]
def get_nms_preds(self, cls_scores, det_rois, softmax=True): if softmax: probs = F.softmax(cls_scores, -1).data.cpu().numpy() else: probs = cls_scores.data.cpu().numpy() # probs = cls_scores # det_boxes_all = np.tile(np.expand_dims(det_rois[:, 1:], 1), (1, probs.shape[0], 1)) nms_mask = np.zeros_like(probs) for c in range(1, probs.shape[-1]): s_c = probs[:, c] boxes_c = det_rois[:, 1:] dets_c = np.hstack((boxes_c, s_c[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_c, cfg.TEST.NMS) nms_mask[:, c][keep] = 1 obj_preds = (nms_mask * probs)[:, 1:].argmax(-1) + 1 return obj_preds
def box_results_with_nms_and_limit(self, scores, boxes, score_thresh=cfg.TEST.SCORE_THRESH): num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class nms_mask = np.zeros_like(scores) for j in range(1, num_classes): inds = np.where(scores[:, j] > score_thresh)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, keep = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] nms_mask[:, j][keep] = 1.0 # add labels # Refine the post-NMS boxes using bounding-box voting dists_all = nms_mask * scores scores_pre, labels_pre = dists_all.max(-1), dists_all.argmax(-1) inds_all = np.where(scores_pre > 0)[0] labels_all = labels_pre[inds_all] scores_all = scores_pre[inds_all] idx = np.argsort(-scores_all) if cfg.TEST.DETECTIONS_PER_IM < idx.shape[0]: idx = idx[:cfg.TEST.DETECTIONS_PER_IM] scores = scores_all[idx] labels = labels_all[idx] return scores, idx, labels
def collect(inputs, is_training): cfg_key = 'TRAIN' if is_training else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH k_max = cfg.FPN.RPN_MAX_LEVEL k_min = cfg.FPN.RPN_MIN_LEVEL num_lvls = k_max - k_min + 1 roi_inputs = inputs[:num_lvls] score_inputs = inputs[num_lvls:] if is_training: score_inputs = score_inputs[:-2] # rois are in [[batch_idx, x0, y0, x1, y2], ...] format # Combine predictions across all levels and retain the top scoring rois = np.concatenate([blob.data for blob in roi_inputs]) scores = np.concatenate([blob.data for blob in score_inputs]).squeeze() if 0: inds = np.argsort(-scores)[:post_nms_topN] rois = rois[inds, :] else: if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] proposals = rois[order, 1:] scores = scores[order].reshape((-1, 1)) ids = rois[order, 0].reshape((-1, 1)) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] ids = ids[keep] rois = np.hstack((ids, proposals)) return rois
def im_detections(model, im, anchors): """Generate RetinaNet detections on a single image.""" k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2. ** lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape(( cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape(( box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition( cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack( [box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x)] ) pred_boxes = ( box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) # Combine predictions across all levels and retain the top scoring by class detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] boxes = detections[:, 0:4] scores = detections[:, 4] classes = detections[:, 5] return boxes, scores, classes
def forward(self, rpn_cls_probs, rpn_bbox_pred, im_height, im_width, scaling_factor, spatial_scale=None): if spatial_scale is None: spatial_scale = self._spatial_scale """See modeling.detector.GenerateProposals for inputs/outputs documentation. """ # 1. for each location i in a (H, W) grid: # generate A anchor boxes centered on cell i # apply predicted bbox deltas to each of the A anchors at cell i # 2. clip predicted boxes to image # 3. remove predicted boxes with either height or width < threshold # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take the top pre_nms_topN proposals before NMS # 6. apply NMS with a loose threshold (0.7) to the remaining proposals # 7. take after_nms_topN proposals after NMS # 8. return the top proposals # 1. get anchors at all features positions all_anchors_np = self.get_all_anchors(num_images = rpn_cls_probs.shape[0], feature_height = rpn_cls_probs.shape[2], feature_width = rpn_cls_probs.shape[3], spatial_scale = spatial_scale) all_anchors = Variable(torch.FloatTensor(all_anchors_np)) if rpn_cls_probs.is_cuda: all_anchors = all_anchors.cuda() # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = rpn_bbox_pred.squeeze(0).permute(1, 2, 0).contiguous().view(-1, 4) bbox_deltas_np = bbox_deltas.cpu().data.numpy() # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = rpn_cls_probs.squeeze(0).permute(1, 2, 0).contiguous().view(-1, 1) scores_np = scores.cpu().data.numpy() # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if self.rpn_pre_nms_top_n <= 0 or self.rpn_pre_nms_top_n >= len(scores_np): order = np.argsort(-scores_np.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores_np.squeeze(), self.rpn_pre_nms_top_n )[:self.rpn_pre_nms_top_n] order = np.argsort(-scores_np[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] bbox_deltas_np = bbox_deltas_np[order, :] scores = scores[order,:] scores_np = scores_np[order,:] all_anchors = all_anchors[order, :] all_anchors_np00 = all_anchors_np[order, :] # Transform anchors into proposals via bbox transformations proposals = self.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = self.clip_tiled_boxes(proposals, im_height, im_width) proposals_np = proposals.cpu().data.numpy() # 3. remove predicted boxes with either height or width < min_size keep = self.filter_boxes(proposals_np, self.rpn_min_size, scaling_factor, im_height, im_width) proposals = proposals[keep, :] proposals_np = proposals_np[keep, :] scores = scores[keep,:] scores_np = scores_np[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if self.rpn_nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals_np, scores_np)), self.rpn_nms_thresh) if self.rpn_post_nms_top_n > 0: keep = keep[:self.rpn_post_nms_top_n] proposals = proposals[keep, :] scores = scores[keep,:] return proposals, scores
def box_results_with_nms_and_limit( scores, boxes, num_classes=81, score_thresh=0.05, overlap_thresh=0.5, do_soft_nms=False, soft_nms_sigma=0.5, soft_nms_method='linear', do_bbox_vote=False, bbox_vote_thresh=0.8, bbox_vote_method='ID', max_detections_per_img=100, ### over all classes ### ): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). A number of #detections presist after this and are returned, sorted by class `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > score_thresh)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if do_soft_nms: nms_dets, _ = box_utils.soft_nms(dets_j, sigma=soft_nms_sigma, overlap_thresh=overlap_thresh, score_thresh=0.0001, method=soft_nms_method) else: keep = box_utils.nms(dets_j, overlap_thresh) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if do_bbox_vote: nms_dets = box_utils.box_voting(nms_dets, dets_j, bbox_vote_thresh, scoring_method=bbox_vote_method) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if max_detections_per_img > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)]) if len(image_scores) > max_detections_per_img: image_thresh = np.sort(image_scores)[-max_detections_per_img] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def box_results_with_nms_and_limit(scores, boxes, prev_cls_boxes=None): # NOTE: support single-batch """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] >= cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD ) else: keep = box_utils.nms(dets_j, cfg.TEST.NMS) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets_j, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if cfg.TEST.DETECTIONS_PER_IM > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] # select two best for each class if score is low.. ''' if cfg.TEST.NUM_DET_PER_CLASS_PRE==1: for j in range(1, num_classes): keep = np.argsort(-cls_boxes[j][:, -1])[:2] cls_boxes[j] = cls_boxes[j][keep, :] # if one has a very strong cls score, we only keep two boxes for weak cls score. if len(cls_boxes[j])>0 and cls_boxes[j][0,-1]>0.5: cls_boxes[j] = cls_boxes[j][:1, :]''' # nms between classes. if cfg.TEST.NMS_CROSS_CLASS > 0.: ''' # code to keep some of the dets for which class there is only one det. all_cls_boxes = [] reserved_cls = [] for j in range(1, num_classes): tmp_cls_boxes = np.copy(cls_boxes[j]) # if only one det for cls j, we keep it. if tmp_cls_boxes.shape[0] == 1: tmp_cls_boxes[:,-1] = 1.0 all_cls_boxes.append(tmp_cls_boxes) all_dets_for_nms = np.vstack(all_cls_boxes) ''' all_dets = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) class_ids = np.vstack([np.ones(shape=(len(cls_boxes[j]), 1))*j for j in range(1, num_classes)]) keep = box_utils.nms(all_dets, cfg.TEST.NMS_CROSS_CLASS) all_dets = all_dets[keep, :] class_ids = class_ids[keep, :] for j in range(1, num_classes): idx_j = np.where(class_ids==j)[0] cls_boxes[j] = all_dets[idx_j, :] # select one best for each class. if cfg.TEST.NUM_DET_PER_CLASS_PRE>0: for j in range(1, num_classes): keep = np.argsort(-cls_boxes[j][:, -1])[:cfg.TEST.NUM_DET_PER_CLASS_PRE] cls_boxes[j] = cls_boxes[j][keep, :] # nms by previous box. if cfg.TEST.NMS_SMALL_BOX_IOU>0: for j in range(1, num_classes): if prev_cls_boxes is not None: assert len(prev_cls_boxes[j])<2, 'number of prev boxes should <2.' if len(prev_cls_boxes[j])==1: if prev_cls_boxes[j][0][-1]<cfg.TEST.NMS_SMALL_BOX_SCORE_THRESHOLD: #if not confident about previous box, no nms. continue prev_cls_box = prev_cls_boxes[j][0][:-1] index_to_remove = [] for id_box in range(len(cls_boxes[j])-1,-1,-1): box = cls_boxes[j][id_box][:-1] iou = bb_intersection_over_union(prev_cls_box, box) if iou<cfg.TEST.NMS_SMALL_BOX_IOU: index_to_remove.append(id_box) cls_boxes[j] = np.delete(cls_boxes[j], index_to_remove, 0) im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def _forward(self, data, im_info, roidb=None, **rpn_kwargs): im_data = data if self.training: roidb = list(map(lambda x: blob_utils.deserialize(x)[0], roidb)) device_id = im_data.get_device() return_dict = {} # A dict to collect return variables blob_conv = self.Conv_Body(im_data) rpn_ret = self.RPN(blob_conv, im_info, roidb) # rpn proposals # if self.training: # # can be used to infer fg/bg ratio # return_dict['rois_label'] = rpn_ret['labels_int32'] rois_certification = False if cfg.FPN.FPN_ON: # Retain only the blobs that will be used for RoI heads. `blob_conv` may include # extra blobs that are used for RPN proposals, but not for RoI heads. blob_conv = blob_conv[-self.num_roi_levels:] if not self.training: return_dict['blob_conv'] = blob_conv if rois_certification: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL test_rpn_ret = {'rois': rpn_ret['rois']} lvls = fpn_utils.map_rois_to_fpn_levels(test_rpn_ret['rois'], lvl_min, lvl_max) rois_idx_order = np.empty((0, )) test_rois = test_rpn_ret['rois'] for output_idx, lvl in enumerate(range(lvl_min, lvl_max + 1)): idx_lvl = np.where(lvls == lvl)[0] rois_lvl = test_rois[idx_lvl, :] rois_idx_order = np.concatenate((rois_idx_order, idx_lvl)) test_rpn_ret['rois_fpn{}'.format(lvl)] = rois_lvl rois_idx_restore = np.argsort(rois_idx_order).astype(np.int32, copy=False) test_rpn_ret['rois_idx_restore_int32'] = rois_idx_restore test_feat = self.Box_Head(blob_conv, test_rpn_ret) test_cls_score, test_bbox_pred = self.Box_Outs(test_feat) test_cls_score = test_cls_score.data.cpu().numpy().squeeze() test_bbox_pred = test_bbox_pred.data.cpu().numpy().squeeze() if not cfg.MODEL.RPN_ONLY: if cfg.MODEL.SHARE_RES5 and self.training: box_feat, res5_feat = self.Box_Head(blob_conv, rpn_ret) # bbox proposals else: box_feat = self.Box_Head(blob_conv, rpn_ret) cls_score, bbox_pred = self.Box_Outs(box_feat) else: # TODO: complete the returns for RPN only situation pass # 在这里开始计算loss if self.training: return_dict['losses'] = {} return_dict['metrics'] = {} # rpn loss rpn_kwargs.update( dict((k, rpn_ret[k]) for k in rpn_ret.keys() if (k.startswith('rpn_cls_logits') or k.startswith('rpn_bbox_pred')))) loss_rpn_cls, loss_rpn_bbox = rpn_heads.generic_rpn_losses( **rpn_kwargs) if cfg.FPN.FPN_ON: for i, lvl in enumerate( range(cfg.FPN.RPN_MIN_LEVEL, cfg.FPN.RPN_MAX_LEVEL + 1)): return_dict['losses']['loss_rpn_cls_fpn%d' % lvl] = loss_rpn_cls[i] return_dict['losses']['loss_rpn_bbox_fpn%d' % lvl] = loss_rpn_bbox[i] else: return_dict['losses']['loss_rpn_cls'] = loss_rpn_cls return_dict['losses']['loss_rpn_bbox'] = loss_rpn_bbox # bbox loss loss_cls, loss_bbox, accuracy_cls = fast_rcnn_heads.fast_rcnn_losses( cls_score, bbox_pred, rpn_ret['labels_int32'], rpn_ret['bbox_targets'], rpn_ret['bbox_inside_weights'], rpn_ret['bbox_outside_weights']) return_dict['losses']['loss_cls'] = loss_cls return_dict['losses']['loss_bbox'] = loss_bbox return_dict['metrics']['accuracy_cls'] = accuracy_cls if cfg.MODEL.MASK_ON: if getattr(self.Mask_Head, 'SHARE_RES5', False): mask_feat = self.Mask_Head( res5_feat, rpn_ret, roi_has_mask_int32=rpn_ret['roi_has_mask_int32']) else: mask_feat = self.Mask_Head(blob_conv, rpn_ret) mask_pred = self.Mask_Outs(mask_feat) # return_dict['mask_pred'] = mask_pred # mask loss loss_mask = mask_rcnn_heads.mask_rcnn_losses( mask_pred, rpn_ret['masks_int32']) return_dict['losses']['loss_mask'] = loss_mask if cfg.MODEL.KEYPOINTS_ON: if getattr(self.Keypoint_Head, 'SHARE_RES5', False): # No corresponding keypoint head implemented yet (Neither in Detectron) # Also, rpn need to generate the label 'roi_has_keypoints_int32' kps_feat = self.Keypoint_Head( res5_feat, rpn_ret, roi_has_keypoints_int32=rpn_ret[ 'roi_has_keypoint_int32']) else: kps_feat = self.Keypoint_Head(blob_conv, rpn_ret) kps_pred = self.Keypoint_Outs(kps_feat) # return_dict['keypoints_pred'] = kps_pred # keypoints loss if cfg.KRCNN.NORMALIZE_BY_VISIBLE_KEYPOINTS: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights']) else: loss_keypoints = keypoint_rcnn_heads.keypoint_losses( kps_pred, rpn_ret['keypoint_locations_int32'], rpn_ret['keypoint_weights'], rpn_ret['keypoint_loss_normalizer']) return_dict['losses']['loss_kps'] = loss_keypoints # pytorch0.4 bug on gathering scalar(0-dim) tensors for k, v in return_dict['losses'].items(): return_dict['losses'][k] = v.unsqueeze(0) for k, v in return_dict['metrics'].items(): return_dict['metrics'][k] = v.unsqueeze(0) else: # Testing return_dict['rois'] = rpn_ret['rois'] import json if cfg.TEST.IOU_OUT: # 直接通过rpn_ret可以取出rois with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/raw_roi.json", 'w') as f: json.dump((return_dict['rois'][:, 1:] / im_info.numpy()[0][2]).tolist(), f) # 如果在FPN模式下,需要进到一个collect_and_distribute...的函数去取出分配后的scores # ,我直接在collect_and_distribute_fpn_rpn_proposals.py里把json输出 # 因此这里直接考虑RPN_ONLY模式的取值。 if not cfg.FPN.FPN_ON: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/rois_score.json", 'w') as f: score_2_json = [] for item in rpn_ret['rpn_roi_probs']: score_2_json.append(item.item()) json.dump(score_2_json, f) # 开始第二个fast_head阶段,首先通过rois和bbox_delta计算pred_box if cfg.FAST_RCNN.FAST_HEAD2_DEBUG: lvl_min = cfg.FPN.ROI_MIN_LEVEL lvl_max = cfg.FPN.ROI_MAX_LEVEL if cfg.FPN.FPN_ON: im_scale = im_info.data.cpu().numpy().squeeze()[2] rois = rpn_ret['rois'][:, 1:5] / im_scale bbox_pred = bbox_pred.data.cpu().numpy().squeeze() box_deltas = bbox_pred.reshape([-1, bbox_pred.shape[-1]]) shift_boxes = box_utils.bbox_transform( rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) shift_boxes = box_utils.clip_tiled_boxes( shift_boxes, im_info.data.cpu().numpy().squeeze()[0:2]) num_classes = cfg.MODEL.NUM_CLASSES onecls_pred_boxes = [] onecls_score = [] dets_cls = {} count = 0 for j in range(1, num_classes): inds = np.where( cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = shift_boxes[inds, j * 4:(j + 1) * 4] score_j = cls_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_score += score_j.tolist() dets_cls.update({j: []}) for k in range(len(boxes_j.tolist())): dets_cls[j].append(count) count += 1 assert count == len(onecls_pred_boxes) stage2_rois_score = np.array(onecls_score, dtype=np.float32) stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32) # Redistribute stage2_rois using fpn_utils module provided functions # calculate by formula cls_tracker = {} if not stage2_rois.tolist(): stage1_pred_iou = stage2_rois_score.tolist() stage2_final_boxes = np.empty((0, )) stage2_final_score = np.empty((0, )) logger.info("Detections above threshold is null.") else: alter_rpn = {} unresize_stage2_rois = stage2_rois * im_scale # unresize_stage2_rois = np.concatenate((unresize_stage2_rois, unresized_rois[:, 1:5])) lvls = fpn_utils.map_rois_to_fpn_levels( unresize_stage2_rois, lvl_min, lvl_max) # TAG: We might need to visualize "stage2_rois" to make sure. rois_idx_order = np.empty((0, )) dummy_batch = np.zeros( (unresize_stage2_rois.shape[0], 1), dtype=np.float32) alter_rpn["rois"] = np.hstack( (dummy_batch, unresize_stage2_rois)).astype(np.float32, copy=False) # alter_rpn['rois'] = np.concatenate((alter_rpn['rois'], unresized_rois)) for output_idx, lvl in enumerate( range(lvl_min, lvl_max + 1)): idx_lvl = np.where(lvls == lvl)[0] rois_lvl = unresize_stage2_rois[idx_lvl, :] rois_idx_order = np.concatenate( (rois_idx_order, idx_lvl)) _ = np.zeros((rois_lvl.shape[0], 1), dtype=np.float32) alter_rpn['rois_fpn{}'.format(lvl)] = np.hstack( (_, rois_lvl)).astype(np.float32, copy=False) rois_idx_restore = np.argsort(rois_idx_order).astype( np.int32, copy=False) alter_rpn['rois_idx_restore_int32'] = rois_idx_restore # Go through 2nd stage of FPN and fast_head stage2_feat = self.Box_Head(blob_conv, alter_rpn) stage2_cls_score, stage2_bbox_pred = self.Box_Outs( stage2_feat) # Transform shift value to original one to get final pred boxes coordinates stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy( ).squeeze() stage2_cls_score = stage2_cls_score.data.cpu().numpy() stage2_box_deltas = stage2_bbox_pred.reshape( [-1, bbox_pred.shape[-1]]) # Add some variance to box delta if cfg.FAST_RCNN.STAGE1_TURBULENCE: import random for i in range(len(stage2_box_deltas)): for j in range(len(stage2_box_deltas[i])): stage2_box_deltas[i][j] *= random.uniform( 0.9, 1.1) stage2_cls_out = box_utils.bbox_transform( stage2_rois, stage2_box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_cls_out = box_utils.clip_tiled_boxes( stage2_cls_out, im_info.data.cpu().numpy().squeeze()[0:2]) onecls_pred_boxes = [] onecls_score = [] count = 0 for j in range(1, num_classes): inds = np.where( stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = stage2_cls_out[inds, j * 4:(j + 1) * 4] score_j = stage2_cls_score[inds, j] dets_j = np.hstack( (boxes_j, score_j[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(dets_j, cfg.TEST.NMS) boxes_j = boxes_j[keep] score_j = score_j[keep] # 用于记录每个框属于第几类 onecls_score += score_j.tolist() onecls_pred_boxes += boxes_j.tolist() for k in range(len(score_j)): cls_tracker.update({count: j}) count += 1 assert count == len(onecls_score) stage2_final_boxes = np.array(onecls_pred_boxes, dtype=np.float32) stage2_final_score = np.array(onecls_score, dtype=np.float32) inds = np.where(stage2_final_score > 0.3)[0] # Filtered by keep index... preserve_stage2_final_boxes = copy.deepcopy( stage2_final_boxes) preserve_stage2_final_score = copy.deepcopy( stage2_final_score) stage2_final_boxes = stage2_final_boxes[inds] stage2_final_score = stage2_final_score[inds] # if nothing left after 0.3 threshold filter, reserve whole boxes to original. if stage2_final_boxes.size == 0: lower_inds = np.where( preserve_stage2_final_score > 0.1)[0] stage2_final_boxes = preserve_stage2_final_boxes[ lower_inds] stage2_final_score = preserve_stage2_final_score[ lower_inds] else: del preserve_stage2_final_boxes del preserve_stage2_final_score # if all boxes are clsfied into bg class. if stage2_final_boxes.size == 0: stage1_pred_iou = stage2_rois_score.tolist() stage2_final_boxes = np.empty((0, )) stage2_final_score = np.empty((0, )) logger.info("Detections above threshold is null.") else: # Restore stage2_pred_boxes to match the index with stage2_rois, Compute IOU between # final_boxes and stage2_rois, one by one flag = "cross_product" if flag == "element_wise": if stage2_final_boxes.shape[ 0] == stage2_rois.shape[0]: restored_stage2_final_boxes = stage2_final_boxes[ rois_idx_restore] stage1_pred_iou = [] for ind, item in enumerate(stage2_rois): stage1 = np.array( item, dtype=np.float32).reshape( (1, 4)) stage2 = np.array( restored_stage2_final_boxes[ind], dtype=np.float32).reshape((1, 4)) iou = box_utils.bbox_overlaps( stage1, stage2) stage1_pred_iou.append( iou.squeeze().item()) else: logger.info( "Mistake while processing {}".format( str(im_info))) elif flag == "cross_product": iou = box_utils.bbox_overlaps( stage2_rois, stage2_final_boxes) stage1_pred_iou = iou.max(axis=1).tolist() # stage1_pred is another name of stage2_rois assert len(stage1_pred_iou) == len(stage2_rois) if cfg.FAST_RCNN.IOU_NMS: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_score.json", "w") as f: json.dump(stage2_rois_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_score.json", "w") as f: json.dump(stage2_final_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_boxes.json", 'w') as f: json.dump(stage2_rois.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage1_pred_iou.json", 'w') as f: json.dump(stage1_pred_iou, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_stage2_pred_boxes.json", 'w') as f: json.dump(stage2_final_boxes.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_dets_cls.json", 'w') as f: json.dump(dets_cls, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/iou_cls_tracker.json", 'w') as f: json.dump(cls_tracker, f) elif cfg.FAST_RCNN.SCORE_NMS: with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_score.json", "w") as f: json.dump(stage2_rois_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_score.json", "w") as f: json.dump(stage2_final_score.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_boxes.json", 'w') as f: json.dump(stage2_rois.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage1_pred_iou.json", 'w') as f: json.dump(stage1_pred_iou, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_stage2_pred_boxes.json", 'w') as f: json.dump(stage2_final_boxes.tolist(), f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_dets_cls.json", 'w') as f: json.dump(dets_cls, f) with open( "/nfs/project/libo_i/IOU.pytorch/IOU_Validation/score_cls_tracker.json", 'w') as f: json.dump(cls_tracker, f) else: im_scale = im_info.data.cpu().numpy().squeeze()[2] rois = rpn_ret['rois'][:, 1:5] / im_scale # unscale back to raw image space box_deltas = bbox_pred.data.cpu().numpy().squeeze() fast_stage1_score = cls_score.data.cpu().numpy().squeeze() box_deltas = box_deltas.reshape([-1, bbox_pred.shape[-1]]) stage2_rois = box_utils.bbox_transform( rois, box_deltas, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_rois = box_utils.clip_tiled_boxes( stage2_rois, im_info.data.cpu().numpy().squeeze()[0:2]) num_classes = cfg.MODEL.NUM_CLASSES onecls_pred_boxes = [] onecls_cls_score = [] for j in range(1, num_classes): inds = np.where( cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] boxes_j = stage2_rois[inds, j * 4:(j + 1) * 4] score_j = fast_stage1_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_cls_score += score_j.tolist() stage2_rois = np.array(onecls_pred_boxes, dtype=np.float32) stage2_rois_score = np.array(onecls_cls_score, dtype=np.float32) assert len(stage2_rois) == len(stage2_rois_score) # Send stage2 rois to next stage fast head, do ROI ALIGN again # to modify rpn_ret['rois] , rpn_ret['rpn_rois'] and rpn['rois_rpn_score'] rpn_ret['rois'] = stage2_rois rpn_ret['rpn_rois'] = stage2_rois rpn_ret['rpn_roi_probs'] = stage2_rois_score stage2_box_feat = self.Box_Head(blob_conv, rpn_ret) stage2_cls_score, stage2_bbox_pred = self.Box_Outs( stage2_box_feat) stage2_bbox_pred = stage2_bbox_pred.data.cpu().numpy( ).squeeze() stage2_bbox_pred = stage2_bbox_pred.reshape( [-1, bbox_pred.shape[-1]]) stage2_cls_pred_boxes = box_utils.bbox_transform( stage2_rois, stage2_bbox_pred, cfg.MODEL.BBOX_REG_WEIGHTS) stage2_cls_pred_boxes = box_utils.clip_tiled_boxes( stage2_cls_pred_boxes, im_info.data.cpu().numpy().squeeze()[0:2]) onecls_pred_boxes = [] onecls_cls_score = [] for j in range(1, num_classes): inds = np.where( stage2_cls_score[:, j] > cfg.TEST.SCORE_THRESH)[0] if len(inds) != 0: print("KKKKK") boxes_j = stage2_cls_pred_boxes[inds, j * 4:(j + 1) * 4] score_j = stage2_cls_score[inds, j] onecls_pred_boxes += boxes_j.tolist() onecls_cls_score += score_j.tolist() stage2_bbox_pred = np.array(onecls_pred_boxes, dtype=np.float32) stage2_bbox_pred_score = np.array(onecls_cls_score, dtype=np.float32) # get stage2 pred_boxes here return_dict['cls_score'] = cls_score return_dict['bbox_pred'] = bbox_pred return return_dict
def forward(self, rpn_cls_probs, rpn_bbox_pred, im_height, im_width, scaling_factor, spatial_scale=None): if spatial_scale is None: spatial_scale = self._spatial_scale """See modeling.detector.GenerateProposals for inputs/outputs documentation. """ # 1. for each location i in a (H, W) grid: # generate A anchor boxes centered on cell i # apply predicted bbox deltas to each of the A anchors at cell i # 2. clip predicted boxes to image # 3. remove predicted boxes with either height or width < threshold # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take the top pre_nms_topN proposals before NMS # 6. apply NMS with a loose threshold (0.7) to the remaining proposals # 7. take after_nms_topN proposals after NMS # 8. return the top proposals # 1. get anchors at all features positions all_anchors_np = self.get_all_anchors( num_images=rpn_cls_probs.shape[0], feature_height=rpn_cls_probs.shape[2], feature_width=rpn_cls_probs.shape[3], spatial_scale=spatial_scale) all_anchors = Variable(torch.FloatTensor(all_anchors_np)) if rpn_cls_probs.is_cuda: all_anchors = all_anchors.cuda() # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = rpn_bbox_pred.squeeze(0).permute(1, 2, 0).contiguous().view( -1, 4) bbox_deltas_np = bbox_deltas.cpu().data.numpy() # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = rpn_cls_probs.squeeze(0).permute(1, 2, 0).contiguous().view(-1, 1) scores_np = scores.cpu().data.numpy() # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if self.rpn_pre_nms_top_n <= 0 or self.rpn_pre_nms_top_n >= len( scores_np): order = np.argsort(-scores_np.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition( -scores_np.squeeze(), self.rpn_pre_nms_top_n)[:self.rpn_pre_nms_top_n] order = np.argsort(-scores_np[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] bbox_deltas_np = bbox_deltas_np[order, :] scores = scores[order, :] scores_np = scores_np[order, :] all_anchors = all_anchors[order, :] all_anchors_np00 = all_anchors_np[order, :] # Transform anchors into proposals via bbox transformations proposals = self.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = self.clip_tiled_boxes(proposals, im_height, im_width) proposals_np = proposals.cpu().data.numpy() # 3. remove predicted boxes with either height or width < min_size keep = self.filter_boxes(proposals_np, self.rpn_min_size, scaling_factor, im_height, im_width) proposals = proposals[keep, :] proposals_np = proposals_np[keep, :] scores = scores[keep, :] scores_np = scores_np[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if self.rpn_nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals_np, scores_np)), self.rpn_nms_thresh) if self.rpn_post_nms_top_n > 0: keep = keep[:self.rpn_post_nms_top_n] proposals = proposals[keep, :] scores = scores[keep, :] return proposals, scores
def calculate_sim_ap(pkl_ls, gt_info_ls, pred_ls, thres): ap_ls = [] total_precision = [] total_recall = [] count_all = 0 tp_all = 0 gt_all = 0 ap_all_ls = [] support_box_cls_ls = get_box_cls_ls( pkl_ls) #print(len(support_box_cls_ls[0])) # for pred # fore pred_ls box_num = 0 #print(pred_ls) for img_id, img in enumerate(pred_ls): #print(img_id, img) box_feat = np.squeeze(img['box_feat'], axis=(2, 3)) box_num += box_feat.shape[0] feat_len = box_feat.shape[1] print('pred_box_num, feat_len:', box_num, feat_len) #assert False query_roidb_ls = np.zeros( (box_num, 5), dtype=np.float32) # init query_roidb_ls for the try-except query_box_feat_ls = np.zeros((box_num, feat_len), dtype=np.float32) pred_img_ls = [] for img_id, img in enumerate(pred_ls): #print(str(img_id), img['box_feat'].shape) box_feat = np.squeeze(img['box_feat'], axis=(2, 3)) roidb = img['roidb'] #print(roidb) current_box_num = img['box_feat'].shape[0] box_start = len(pred_img_ls) query_roidb_ls[box_start:box_start + current_box_num, :-1] = roidb[:, 1:] query_box_feat_ls[box_start:box_start + current_box_num, :] = box_feat pred_img_ls.extend([img_id] * current_box_num) # get img id of every box pred_img_ls = np.array(pred_img_ls) ''' # for pred_info_ls pred_id_ls = [] for img_id, img in enumerate(pred_info_ls): pred_id_ls.append(img['entry']['id']) #print('pred_info:', img['entry']['id']) ''' # for gt # for pkl_ls gt_img_ls = [] gt_cls_ls = [] gt_box_num = 0 #print(pred_ls) for img_id, img in enumerate(pkl_ls): #print(img_id, img) box_feat = np.squeeze(img['box_feat'], axis=(2, 3)) gt_box_num += box_feat.shape[0] feat_len = box_feat.shape[1] print('gt_box_num, feat_len:', gt_box_num, feat_len) gt_box_ls = np.zeros((gt_box_num, 4), dtype=np.float32) for img_id, img in enumerate(pkl_ls): roidb = img['roidb'] #print(roidb.shape) current_box_num = roidb.shape[0] box_start = len(gt_img_ls) gt_box_ls[box_start:box_start + current_box_num, :] = roidb[:, 1:] gt_img_ls.extend([img_id] * current_box_num) # get img id of every box gt_cls_ls.extend(roidb[:, 0].astype(np.int16).tolist()) # for gt_info_ls gt_id_ls = [] gt_path_ls = [] for img_id, img in enumerate(gt_info_ls): gt_id_ls.append(img['entry']['id']) gt_path_ls.append(img['entry']['image']) #print('gt_info:', img['entry']['id']) #print(pred_img_ls) for support_id in range(len( support_box_cls_ls[-1])): # pick -1 to avoid 0 bg num support_roidb_ls = np.array([]) support_box_feat_ls = np.array([]) support_box_ls = [] support_img_ls = [] support_id_ls = [] for cls_id in range(len(support_box_cls_ls)): if len(support_box_cls_ls[cls_id]) == 0: # for 0 bg number # assert False # it is bug when use predicted boxes continue else: support_item = support_box_cls_ls[cls_id][support_id] support_img_id = int(support_item.split('-')[0]) support_box_id = int(support_item.split('-')[-1]) support_img_ls.append(support_img_id) support_box_ls.append(support_box_id) #print(support_box_id) support_img = pkl_ls[int(support_img_id)] support_box_feat = np.squeeze( support_img['box_feat'], axis=(2, 3))[int(support_box_id)][np.newaxis, :] support_roidb = support_img['roidb'][int(support_box_id)][ np.newaxis, :] #print(support_box_feat.shape) if support_roidb_ls.shape[0] != 0: support_roidb_ls = np.concatenate( (support_roidb_ls, support_roidb), axis=0) support_box_feat_ls = np.concatenate( (support_box_feat_ls, support_box_feat), axis=0) else: support_roidb_ls = support_roidb support_box_feat_ls = support_box_feat #print(support_roidb_ls.shape) support_id_ls.append(str(cls_id) + '_' + support_item) support_img_ls = np.array(support_img_ls) start = time.time() #print(support_img_ls.shape) #print(query_roidb_ls.shape[0]) #print(query_box_feat_ls.shape) #print(support_box_feat_ls.shape) #print(pred_img_ls.shape) #print('support_img_ls:', support_img_ls) #print(np.in1d(pred_img_ls, support_img_ls).sum()) #continue #print(np.in1d(pred_img_ls, support_img_ls).shape) query_box_feat_ls_now = deepcopy(query_box_feat_ls) query_box_feat_ls_now = query_box_feat_ls_now[ ~np.in1d(pred_img_ls, support_img_ls)] sim_matrix = 1 - distance.cdist(query_box_feat_ls_now, support_box_feat_ls, 'cosine') sim_matrix_max = np.max(sim_matrix, axis=1) pred_box_ls = deepcopy(query_roidb_ls) pred_box_ls = pred_box_ls[~np.in1d(pred_img_ls, support_img_ls)] pred_box_ls[:, -1] = sim_matrix_max pred_img_ls_now = pred_img_ls[~np.in1d(pred_img_ls, support_img_ls)] sim_matrix_bg = np.where( sim_matrix_max <= thres) # if sim_matrix_max <= thres, it is background sim_matrix_cls = np.argmax( sim_matrix, axis=1 ) + 1 # wrong: when use gt box, it needs to +1, when use predicted boxes, remove +1. sim_matrix_cls[sim_matrix_bg] = 0 pred_cls_ls = deepcopy(sim_matrix_cls) #print(time.time() - start) #pred_box_num = sim_matrix_cls.shape[0] #tp = np.zeros(pred_box_num) #fp = np.zeros(pred_box_num) current_gt_cls_ls = np.array( gt_cls_ls)[~np.in1d(gt_img_ls, support_img_ls)] current_gt_img_ls = np.array( gt_img_ls)[~np.in1d(gt_img_ls, support_img_ls)] current_gt_box_ls = deepcopy( gt_box_ls)[~np.in1d(gt_img_ls, support_img_ls)] cls_unique = np.unique(current_gt_cls_ls) img_unique = np.unique(current_gt_img_ls) ap_ls = [] ovthresh = 0.5 for cls in cls_unique: ''' nd = (pred_cls_ls==cls).sum() tp = np.zeros(nd) fp = np.zeros(nd) print('nd:', nd) for img_id, img in enumerate(pred_img_ls.tolist()): # for pred pred_cls = deepcopy(pred_cls_ls)[img_id] #print(pred_cls, cls) if pred_cls != cls: continue ''' tp = [] fp = [] npos = (current_gt_cls_ls == cls).sum() #print(npos) total_pred_box = 0 total_pred_box_after_nms = 0 total_gt_box = 0 total_img = 0 start = time.time() confidence_ls = [] for img in img_unique: #assert pred_id_ls[img] == gt_id_ls[img] pred_box = deepcopy(pred_box_ls)[pred_img_ls_now == img, :] pred_cls = deepcopy(pred_cls_ls)[pred_img_ls_now == img] pred_box = pred_box[pred_cls == cls, :] keep = box_utils.nms(pred_box, 0.3) nms_box = pred_box[keep, :] # for gt gt_box = current_gt_box_ls[current_gt_img_ls == img, :] gt_cls = current_gt_cls_ls[current_gt_img_ls == img] gt_box = gt_box[gt_cls == cls, :] ''' if nms_box.shape[0] == 0: tp.extend([0.] * gt_box.shape[0]) fp.extend([1.] * gt_box.shape[0]) continue if gt_box.shape[0] == 0: tp.extend([0.] * nms_box.shape[0]) fp.extend([1.] * nms_box.shape[0]) continue ''' confidence = nms_box[:, -1] sorted_ind = np.argsort(-confidence) #print(confidence, sorted_ind) nms_box = nms_box[sorted_ind, :-1] # sort tp and fp according confidence confidence = confidence[sorted_ind] confidence_ls.extend(confidence.tolist()) det_flag = [False] * gt_box.shape[0] BBGT = gt_box.astype(float) total_pred_box += pred_box.shape[0] total_pred_box_after_nms += nms_box.shape[0] total_gt_box += gt_box.shape[0] if gt_box.shape[0] != 0: total_img += 1 for box_i in nms_box: bb = box_i.astype(float) ovmax = -np.inf #start1 = time.time() if BBGT.size > 0: # compute overlaps # intersection ixmin = np.maximum(BBGT[:, 0], bb[0]) iymin = np.maximum(BBGT[:, 1], bb[1]) ixmax = np.minimum(BBGT[:, 2], bb[2]) iymax = np.minimum(BBGT[:, 3], bb[3]) iw = np.maximum(ixmax - ixmin + 1., 0.) ih = np.maximum(iymax - iymin + 1., 0.) inters = iw * ih # union uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + (BBGT[:, 2] - BBGT[:, 0] + 1.) * (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) overlaps = inters / uni ovmax = np.max(overlaps) jmax = np.argmax(overlaps) #print(time.time() - start1) if ovmax > ovthresh: if not det_flag[jmax]: tp.append(1.) fp.append(0.) #tp[img_id] = 1. det_flag[jmax] = 1 else: tp.append(0.) fp.append(1.) #fp[img_id] = 1. else: tp.append(0.) fp.append(1.) #fp[img_id] = 1. tp = np.array(tp) fp = np.array(fp) confidence_ls = np.array(confidence_ls) sorted_ind = np.argsort(-confidence_ls) tp = tp[sorted_ind] fp = fp[sorted_ind] img_id = int(support_img_ls[cls - 1]) #print(img_id, gt_path_ls[img_id]) # compute precision recall fp = np.cumsum(fp) tp = np.cumsum(tp) #print(fp[-1], tp[-1]) #print('total pred box:', total_pred_box, ' total_pred_box_after_nms:', total_pred_box_after_nms, ' total_gt_box:', total_gt_box, 'total_image:', total_img) rec = tp / float(npos) # avoid divide by zero in case the first detection matches a difficult # ground truth prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) current_ap = voc_ap(rec, prec) #print('time:', time.time() - start) #print('cls, rec, prec, ap', cls, rec[-1], prec[-1], current_ap) # vis im_path = gt_path_ls[img_id] im_name = im_path.split('/')[-1] im_dir = os.path.join('./log/figure', str(thres), 'class_' + str(cls)) output_name = '%.4f' % round(current_ap, 4) + '_' + str(im_name) if not os.path.exists(im_dir): os.makedirs(im_dir) im = cv2.imread(im_path) current_support_box = support_roidb_ls[cls - 1][1:] assert cls == int(support_roidb_ls[cls - 1][0]) #print(current_support_box) #print(round(current_ap, 4)) vis_image( im[:, :, ::-1], current_support_box, output_name, im_dir, #os.path.join(output_dir, 'vis'), ) #assert False #print(support_id_ls) ap_ls.append(current_ap) if cls == 1: ap_save = str(thres) + ',' + str(support_id) + ',' + str( current_ap) ap_save = ap_save + ',' + str( current_ap) + ',' + support_id_ls[cls_id - 1] else: ap_save = ap_save + ',' + str( current_ap) + ',' + support_id_ls[cls_id - 1] ap = sum(ap_ls) / float( len(ap_ls) ) # remove bg when use gt boxes, when use predicted boxes, remove -1 ap_all_ls.append(ap) print('support_id:', support_id, ' threshold:', thres, ' ap:', ap) root_path = './log' support_ap_path = os.path.join(root_path, 'support_ap.csv') all_ap_path = os.path.join(root_path, 'all_ap.csv') if not os.path.exists(root_path): os.makedirs(root_path) #if os.path.exists(support_ap_path): # os.remove(support_ap_path) #if os.path.exists(all_ap_path): # os.remove(all_ap_path) with open(support_ap_path, 'a') as f: f.write(str(thres) + ',' + str(support_id) + ',' + str(ap) + '\n') with open(all_ap_path, 'a') as f1: f1.write(ap_save + '\n') return ap_all_ls
def main(): """main function""" if not torch.cuda.is_available(): sys.exit("Need a CUDA device to run the code.") args = parse_args() print('Called with args:') print(args) assert args.image_dir or args.images assert bool(args.image_dir) ^ bool(args.images) dataset = datasets.get_hospital_dataset() cfg.MODEL.NUM_CLASSES = 20 # with bg num_class = cfg.MODEL.NUM_CLASSES sents = dataset.sents th_cls = dataset.th_cls cls2eng = dataset.cls2eng eng2type = dataset.eng2type print('load cfg from file: {}'.format(args.cfg_file)) cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) assert bool(args.load_ckpt) ^ bool(args.load_detectron), \ 'Exactly one of --load_ckpt and --load_detectron should be specified.' cfg.MODEL.LOAD_IMAGENET_PRETRAINED_WEIGHTS = False # Don't need to load imagenet pretrained weights assert_and_infer_cfg() maskRCNN = Generalized_RCNN() if args.cuda: maskRCNN.cuda() if args.load_ckpt: load_name = args.load_ckpt print("loading checkpoint %s" % (load_name)) checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage) net_utils.load_ckpt(maskRCNN, checkpoint['model']) if args.load_detectron: print("loading detectron weights %s" % args.load_detectron) load_detectron_weight(maskRCNN, args.load_detectron) maskRCNN = mynn.DataParallel(maskRCNN, cpu_keywords=['im_info', 'roidb'], minibatch=True, device_ids=[0]) # only support single GPU maskRCNN.eval() if args.image_dir: imglist = misc_utils.get_imagelist_from_dir(args.image_dir) else: imglist = args.images num_images = len(imglist) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) for i in xrange(num_images): # for each image print('img', i) im = cv2.imread(imglist[i]) assert im is not None # segmentation # d = segment(im) # pdb.set_trace() timers = defaultdict(Timer) # detection cls_boxes, cls_segms, cls_keyps = im_detect_all(maskRCNN, im, timers=timers) # first we collect boxes from all classes dets_total = np.empty([0, 6], dtype=np.float32) for cls in range(1, num_class): # for each cls dets = cls_boxes[cls] if dets.shape[0] == 0: continue dets_extend = np.pad( dets, ((0, 0), (0, 1)), # add 0 rows above, below and left, but 1 row right mode='constant', constant_values=cls) # append cls to dets dets_total = np.vstack((dets_total, dets_extend)) # then use a loose NMS to make each region has only one symptom keep = box_utils.nms(dets_total, 0.7) nms_dets = dets_total[keep, :] # iterate through remained boxes report, healthy = '', True have_sym_of_cls = [False for _ in range(num_class)] n = nms_dets.shape[0] final_results = [] # return to the web for idx in range(n): # for each region th, cls = nms_dets[idx, -2], int(nms_dets[idx, -1]) if th > th_cls[cls]: # diagnosed to have the sym report += sents[cls][1] have_sym_of_cls[cls] = True healthy = False ename = cls2eng[int(cls)] _type = eng2type[ename] final_results.append({ 'name': ename, 'type': _type, 'box': list(nms_dets[idx, 0:4]) }) for cls in range(1, num_class): # for each cls if not have_sym_of_cls[cls]: # if have no sym of this cls report += sents[cls][0] if healthy: report = sents[0][0] print(report) pdb.set_trace() # healthy = True # flag indicating healthy or not # for cls in range(1, num_class): # for each cls # dets = cls_boxes[cls] # if dets.shape[0] == 0: # report += sents[cls][0] # continue # n = dets.shape[0] # flag = False # indicates if the sym exists # for k in range(n): # for each region # if dets[k, -1] > th_cls[cls]: # above threshold for this cls, means have this cls of symptom # report += sents[cls][1] # flag = True # healthy = False # if not flag: # don't have this symptom # report += sents[cls][0] # # if healthy: # use the report for healthy people # report = sents[0][0] im_name, _ = os.path.splitext(os.path.basename(imglist[i])) # vis_utils.vis_one_image( # im[:, :, ::-1], # BGR -> RGB for visualization # im_name, # args.output_dir, # cls_boxes, # cls_segms, # cls_keyps, # dataset=dataset, # box_alpha=0.3, # show_class=True, # thresh=0.05, # kp_thresh=2 # ) if args.merge_pdfs and num_images > 1: merge_out_path = '{}/results.pdf'.format(args.output_dir) if os.path.exists(merge_out_path): os.remove(merge_out_path) command = "pdfunite {}/*.pdf {}".format(args.output_dir, merge_out_path) subprocess.call(command, shell=True)
def evaluate_mAP_combine(json_datasets, roidbs, all_boxes_list, output_dir, cleanup=False): """ LJY all_boxes: num_cls x num_images x [num_boxes x 5] """ json_dataset = json_datasets[0] mAP_folder = '/home/liujingyu/code/mAP' roidb, roidb_part = roidbs[0], roidbs[1] all_boxes, all_boxes_part = all_boxes_list[0], all_boxes_list[1] small_classes = ['结节', '肺实变', '膈面异常', '骨折'] for i, (entry, entry_part) in enumerate(zip( roidb, roidb_part)): # for each pair of images # print(i, entry['eva_id'], entry['file_name']) assert entry['file_name'] == entry_part['file_name'] file_name = entry['file_name'][:-4] + '.txt' fgt = open(osp.join(mAP_folder, 'ground-truth', file_name), 'w') fpred = open(osp.join(mAP_folder, 'predicted', file_name), 'w') for cls_ind, cls in enumerate(json_dataset.classes): # for each cls if cls == '__background__': continue if cls_ind >= len(all_boxes): break gt_classes = roidb[i]['gt_classes'] ind = np.where(gt_classes == cls_ind) gt_boxes = roidb[i]['boxes'][ind] dets = all_boxes[cls_ind][i] # N x 5, [x1, y1, x2, y2, score] dets_part = all_boxes_part[cls_ind][i] # N x 5 # offset the dets_part based on offset dets_part[:, 0] += entry_part['offset_x'] dets_part[:, 2] += entry_part['offset_x'] dets_part[:, 1] += entry_part['offset_y'] dets_part[:, 3] += entry_part['offset_y'] # select # if cls in small_classes: # dets = dets_part # merge dets = np.vstack((dets, dets_part)) # NMS on dets and dets_part if cfg.TEST.SOFT_NMS.ENABLED: nms_dets, _ = box_utils.soft_nms( dets, sigma=cfg.TEST.SOFT_NMS.SIGMA, overlap_thresh=cfg.TEST.NMS, score_thresh=0.0001, method=cfg.TEST.SOFT_NMS.METHOD) else: keep = box_utils.nms(dets, cfg.TEST.NMS) nms_dets = dets[keep, :] # Refine the post-NMS boxes using bounding-box voting if cfg.TEST.BBOX_VOTE.ENABLED: nms_dets = box_utils.box_voting( nms_dets, dets, cfg.TEST.BBOX_VOTE.VOTE_TH, scoring_method=cfg.TEST.BBOX_VOTE.SCORING_METHOD) dets = nms_dets # write gt boxes, format: tvmonitor 2 10 173 238 for k in range(gt_boxes.shape[0]): s = '{} {:f} {:f} {:f} {:f}'.format(cls, gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2], gt_boxes[k, 3]) fgt.write(s) fgt.write('\n') if cls == '肿块' or cls == '结节' or cls == '钙化' or cls == '乳头影': s = '{} {:f} {:f} {:f} {:f}'.format( '肿块结节钙化', gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2], gt_boxes[k, 3]) fgt.write(s) fgt.write('\n') if cls == '纤维化表现' or cls == '肺实变' or cls == '肺纹理增多' or cls == '肿块' or cls == '弥漫性结节': s = '{} {:f} {:f} {:f} {:f}'.format( '高密度影', gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2], gt_boxes[k, 3]) fgt.write(s) fgt.write('\n') if cls == '气胸' or cls == '气肿': s = '{} {:f} {:f} {:f} {:f}'.format( '低密度影', gt_boxes[k, 0], gt_boxes[k, 1], gt_boxes[k, 2], gt_boxes[k, 3]) fgt.write(s) fgt.write('\n') # write pred boxes, format: tvmonitor 0.471781 0 13 174 244 for k in range(dets.shape[0]): s = '{} {:f} {:f} {:f} {:f} {:f}'.format( cls, dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3]) fpred.write(s) fpred.write('\n') if cls == '肿块' or cls == '结节' or cls == '钙化' or cls == '乳头影': s = '{} {:f} {:f} {:f} {:f} {:f}'.format( '肿块结节钙化', dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3]) fpred.write(s) fpred.write('\n') if cls == '纤维化表现' or cls == '肺实变' or cls == '肺纹理增多' or cls == '肿块' or cls == '弥漫性结节': s = '{} {:f} {:f} {:f} {:f} {:f}'.format( '高密度影', dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3]) fpred.write(s) fpred.write('\n') if cls == '气胸' or cls == '气肿': s = '{} {:f} {:f} {:f} {:f} {:f}'.format( '低密度影', dets[k, -1], dets[k, 0], dets[k, 1], dets[k, 2], dets[k, 3]) fpred.write(s) fpred.write('\n') if gt_boxes.shape[0] > 0: # then we draw gt boxes and pred boxes im = cv2.imread(entry['image']) more_text = str(entry['eva_id']) + ' ' + entry['doc_name'] im = vis_boxes_ljy(im, gt_boxes, dets[:, :-1], more_text) out_path = os.path.join( '/data5/liujingyu/mask_rcnn_Outputs/vis', cls, entry['file_name']) cv2.imwrite(out_path, im) pdb.set_trace()
def im_detect_all(model, im, box_proposals=None, timers=None, im_name_tag=None): """Process the outputs of model for testing Args: model: the network module im_data: Pytorch variable. Input batch to the model. im_info: Pytorch variable. Input batch to the model. gt_boxes: Pytorch variable. Input batch to the model. num_boxes: Pytorch variable. Input batch to the model. args: arguments from command line. timer: record the cost of time for different steps The rest of inputs are of type pytorch Variables and either input to or output from the model. """ if timers is None: timers = defaultdict(Timer) timers['im_detect_bbox'].tic() if cfg.TEST.BBOX_AUG.ENABLED: scores, boxes, im_scale, blob_conv = im_detect_bbox_aug( model, im, box_proposals) else: scores, boxes, im_scale, blob_conv = im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, box_proposals, file_tag_path=im_name_tag) timers['im_detect_bbox'].toc() dict_i = {} path = "/nfs/project/libo_i/IOU.pytorch/IOU_Validation" timers['misc_bbox'].tic() if cfg.FAST_RCNN.FAST_HEAD2_DEBUG: if cfg.FAST_RCNN.IOU_NMS: with open(os.path.join(path, "iou_stage1_pred_iou.json"), "r") as f: stage1_pred_iou = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "iou_stage1_pred_boxes.json"), "r") as f: stage1_pred_boxes = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "iou_stage2_pred_boxes.json"), "r") as f: stage2_pred_boxes = np.array(json.load(f), dtype="float32") with open(os.path.join(path, 'iou_dets_cls.json'), 'r') as f: dets_cls = json.load(f) with open(os.path.join(path, "iou_stage1_score.json"), "r") as f: stage1_score = json.load(f) with open(os.path.join(path, "iou_stage2_score.json"), "r") as f: stage2_score = json.load(f) elif cfg.FAST_RCNN.SCORE_NMS: with open(os.path.join(path, "score_stage1_pred_iou.json"), "r") as f: stage1_pred_iou = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "score_stage1_pred_boxes.json"), "r") as f: stage1_pred_boxes = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "score_stage2_pred_boxes.json"), "r") as f: stage2_pred_boxes = np.array(json.load(f), dtype="float32") with open(os.path.join(path, 'score_dets_cls.json'), 'r') as f: dets_cls = json.load(f) with open(os.path.join(path, "score_stage1_score.json"), "r") as f: stage1_score = json.load(f) with open(os.path.join(path, "score_stage2_score.json"), "r") as f: stage2_score = json.load(f) dict_i['stage1_out'] = stage1_pred_boxes dict_i['shift_iou'] = stage1_pred_iou dict_i['stage1_score'] = stage1_score dict_i['stage2_score'] = stage2_score dict_i['stage2_out'] = stage2_pred_boxes stage1_score = np.array(stage1_score, dtype=np.float32) if cfg.FAST_RCNN.IOU_NMS: scores, boxes, cls_boxes = iou_box_nms_and_limit( stage1_pred_boxes, stage1_pred_iou, dets_cls, stage1_score) elif cfg.FAST_RCNN.SCORE_NMS: scores, boxes, cls_boxes = iou_box_nms_and_limit( stage1_pred_boxes, stage1_score, dets_cls, stage1_score) logger.info("Length of inds is {}".format(boxes.shape[0])) dict_i['boxes'] = boxes dict_i['score'] = scores # score and boxes are from the whole image after score thresholding and nms # (they are not separated by class) (numpy.ndarray) # cls_boxes boxes and scores are separated by class and in the format used # for evaluating results else: scores, boxes, cls_boxes = box_results_with_nms_and_limit( scores, boxes) logger.info("Length of inds is {}".format(boxes.shape[0])) timers['misc_bbox'].toc() if cfg.TEST.IOU_OUT: with open(os.path.join(path, "shifted_boxes.json"), "r") as f: pred_boxes = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "raw_roi.json"), "r") as f: raw_roi = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "rois_score.json"), "r") as f: rpn_score = np.array(json.load(f), dtype="float32") with open(os.path.join(path, "shifted_boxes_scores.json"), "r") as f: pred_box_scores = np.array(json.load(f), dtype="float32") roi_to_shift = predbox_roi_iou(raw_roi, pred_boxes) # 顾老师的NMS流程,不知道加不加上? if cfg.FAST_RCNN.IOU_NMS: bbox_with_score = np.hstack( (raw_roi, roi_to_shift[:, np.newaxis])).astype(np.float32, copy=False) else: bbox_with_score = np.hstack( (raw_roi, rpn_score[:, np.newaxis])).astype(np.float32, copy=False) keep = box_utils.nms(bbox_with_score, cfg.TEST.NMS) dict_i['rois'] = raw_roi dict_i['shift_iou'] = roi_to_shift.tolist() dict_i['rois_score'] = rpn_score.tolist() dict_i['pred_boxes'] = pred_boxes dict_i['keep'] = keep dict_i['pred_boxes_scores'] = pred_box_scores.tolist() if cfg.MODEL.MASK_ON and boxes.shape[0] > 0: timers['im_detect_mask'].tic() if cfg.TEST.MASK_AUG.ENABLED: masks = im_detect_mask_aug(model, im, boxes, im_scale, blob_conv) else: masks = im_detect_mask(model, im_scale, boxes, blob_conv) timers['im_detect_mask'].toc() timers['misc_mask'].tic() cls_segms = segm_results(cls_boxes, masks, boxes, im.shape[0], im.shape[1]) timers['misc_mask'].toc() else: cls_segms = None if cfg.MODEL.KEYPOINTS_ON and boxes.shape[0] > 0: timers['im_detect_keypoints'].tic() if cfg.TEST.KPS_AUG.ENABLED: heatmaps = im_detect_keypoints_aug(model, im, boxes, im_scale, blob_conv) else: heatmaps = im_detect_keypoints(model, im_scale, boxes, blob_conv) timers['im_detect_keypoints'].toc() timers['misc_keypoints'].tic() cls_keyps = keypoint_results(cls_boxes, heatmaps, boxes) timers['misc_keypoints'].toc() else: cls_keyps = None return cls_boxes, cls_segms, cls_keyps, dict_i
def get_detections_from_im(cfg, model, im, image_id, featmap_blob_name, feat_blob_name, MIN_BOXES, MAX_BOXES, conf_thresh=0.2, bboxes=None): assert conf_thresh >= 0. with c2_utils.NamedCudaScope(0): scores, cls_boxes, im_scale = infer_engine.im_detect_bbox( model, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE, boxes=bboxes) num_rpn = scores.shape[0] region_feat = workspace.FetchBlob(feat_blob_name) max_conf = np.zeros((num_rpn, ), dtype=np.float32) max_cls = np.zeros((num_rpn, ), dtype=np.int32) max_box = np.zeros((num_rpn, 4), dtype=np.float32) for cls_ind in range(1, cfg.MODEL.NUM_CLASSES): cls_scores = scores[:, cls_ind] dets = np.hstack((cls_boxes[:, (cls_ind * 4):(cls_ind * 4 + 4)], cls_scores[:, np.newaxis])).astype(np.float32) keep = np.array(nms(dets, cfg.TEST.NMS)) inds_update = np.where(cls_scores[keep] > max_conf[keep]) kinds = keep[inds_update] max_conf[kinds] = cls_scores[kinds] max_cls[kinds] = cls_ind max_box[kinds] = dets[kinds][:, :4] keep_boxes = np.where(max_conf > conf_thresh)[0] if len(keep_boxes) < MIN_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES] objects = max_cls[keep_boxes] obj_prob = max_conf[keep_boxes] obj_boxes = max_box[keep_boxes, :] cls_prob = scores[keep_boxes, :] # print('{} ({}x{}): {} boxes, box size {}, feature size {}, class size {}'.format(image_id, # np.size(im, 0), np.size(im, 1), len(keep_boxes), cls_boxes[keep_boxes].shape, # box_features[keep_boxes].shape, objects.shape)) # print(cls_boxes[keep_boxes][:10, :], objects[:10], obj_prob[:10]) assert (np.sum(objects >= cfg.MODEL.NUM_CLASSES) == 0) # assert(np.min(obj_prob[:10])>=0.2) # if np.min(obj_prob) < 0.2: # print('confidence score too low!', np.min(obj_prob[:10])) # if np.max(cls_boxes[keep_boxes]) > max(np.size(im, 0), np.size(im, 1)): # print('box is offscreen!', np.max(cls_boxes[keep_boxes]), np.size(im, 0), np.size(im, 1)) return { "image_id": image_id, "image_h": np.size(im, 0), "image_w": np.size(im, 1), 'num_boxes': len(keep_boxes), 'boxes': obj_boxes, 'region_feat': region_feat[keep_boxes, :], 'object': objects, 'obj_prob': obj_prob, 'cls_prob': cls_prob }
def proposals_for_one_image(self, im_info, all_anchors, bbox_deltas, scores): # Get mode-dependent configuration cfg_key = 'TRAIN' if self._train else 'TEST' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # - bbox deltas will be (4 * A, H, W) format from conv output # - transpose to (H, W, 4 * A) # - reshape to (H * W * A, 4) where rows are ordered by (H, W, A) # in slowest to fastest order to match the enumerated anchors bbox_deltas = bbox_deltas.transpose((1, 2, 0)).reshape((-1, 4)) # Same story for the scores: # - scores are (A, H, W) format from conv output # - transpose to (H, W, A) # - reshape to (H * W * A, 1) where rows are ordered by (H, W, A) # to match the order of anchors and bbox_deltas scores = scores.transpose((1, 2, 0)).reshape((-1, 1)) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: # Avoid sorting possibly large arrays; First partition to get top K # unsorted and then sort just those (~20x faster for 200k scores) inds = np.argpartition(-scores.squeeze(), pre_nms_topN)[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] all_anchors = all_anchors[order, :] scores = scores[order] # Transform anchors into proposals via bbox transformations proposals = box_utils.bbox_transform(all_anchors, bbox_deltas, (1.0, 1.0, 1.0, 1.0)) # 2. clip proposals to image (may result in proposals with zero area # that will be removed in the next step) proposals = box_utils.clip_tiled_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < min_size keep = _filter_boxes(proposals, min_size, im_info) proposals = proposals[keep, :] scores = scores[keep] # 6. apply loose nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if nms_thresh > 0: keep = box_utils.nms(np.hstack((proposals, scores)), nms_thresh) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] return proposals, scores
print('%d/%d: %s' % (i, len(image_subset), im_name)) im = cv2.imread(osp.join(args.data_dir, im_name)) assert im.size != 0 # Detect faces and regress bounding-boxes scores, boxes, im_scale, blob_conv = im_detect_bbox( net, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) # Format the detection output cls_ind = 1 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = box_utils.nms(dets, NMS_THRESH) dets = dets[keep, :] keep = np.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] # (x, y, w, h) dets[:, 2] = dets[:, 2] - dets[:, 0] + 1 dets[:, 3] = dets[:, 3] - dets[:, 1] + 1 print('Num. detections: %d' % dets.shape[0]) # if dets.size == 0: # nothing detected # continue # Saving visualized frames viz_out_path = osp.join(img_output_dir, osp.basename(im_name)) if dets.size != 0: im_det = draw_detection_list(im, dets.copy()) cv2.imwrite(viz_out_path, im_det)
txt_name = os.path.splitext(im_name)[0] + '.txt' dir_name, tmp_im_name = os.path.split(im_name) im = cv2.imread(os.path.join(data_dir, im_name)) assert im is not None scores, boxes, im_scale, blob_conv = im_detect_bbox( net, im, cfg.TEST.SCALE, cfg.TEST.MAX_SIZE) cls_ind = 1 cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = box_utils.nms(dets, cfg.TEST.NMS) dets = dets[keep, :] keep = np.where(dets[:, 4] > CONF_THRESH) dets = dets[keep] dets[:, 2] = dets[:, 2] - dets[:, 0] + 1 dets[:, 3] = dets[:, 3] - dets[:, 1] + 1 # Save detection results -- [x y w h score] dir_name, tmp_im_name = os.path.split(im_name) if not os.path.exists(os.path.join(det_dir, dir_name)): os.makedirs(os.path.join(det_dir, dir_name)) with open(os.path.join(det_dir, txt_name), 'w') as fid: fid.write(im_name + '\n')
def im_detect_bbox(model, im, timers=None): """Generate RetinaNet detections on a single image.""" if timers is None: timers = defaultdict(Timer) # Although anchors are input independent and could be precomputed, # recomputing them per image only brings a small overhead anchors = _create_cell_anchors() timers['im_detect_bbox'].tic() k_max, k_min = cfg.FPN.RPN_MAX_LEVEL, cfg.FPN.RPN_MIN_LEVEL A = cfg.RETINANET.SCALES_PER_OCTAVE * len(cfg.RETINANET.ASPECT_RATIOS) inputs = {} inputs['data'], inputs['im_info'] = _get_image_blob(im) cls_probs, box_preds = [], [] for lvl in range(k_min, k_max + 1): suffix = 'fpn{}'.format(lvl) cls_probs.append(core.ScopedName('retnet_cls_prob_{}'.format(suffix))) box_preds.append(core.ScopedName('retnet_bbox_pred_{}'.format(suffix))) for k, v in inputs.items(): workspace.FeedBlob(core.ScopedName(k), v.astype(np.float32, copy=False)) workspace.RunNet(model.net.Proto().name) scale = inputs['im_info'][0, 2] cls_probs = workspace.FetchBlobs(cls_probs) box_preds = workspace.FetchBlobs(box_preds) # here the boxes_all are [x0, y0, x1, y1, score] boxes_all = defaultdict(list) cnt = 0 for lvl in range(k_min, k_max + 1): # create cell anchors array stride = 2.**lvl cell_anchors = anchors[lvl] # fetch per level probability cls_prob = cls_probs[cnt] box_pred = box_preds[cnt] cls_prob = cls_prob.reshape( (cls_prob.shape[0], A, int(cls_prob.shape[1] / A), cls_prob.shape[2], cls_prob.shape[3])) box_pred = box_pred.reshape( (box_pred.shape[0], A, 4, box_pred.shape[2], box_pred.shape[3])) cnt += 1 if cfg.RETINANET.SOFTMAX: cls_prob = cls_prob[:, :, 1::, :, :] cls_prob_ravel = cls_prob.ravel() # In some cases [especially for very small img sizes], it's possible that # candidate_ind is empty if we impose threshold 0.05 at all levels. This # will lead to errors since no detections are found for this image. Hence, # for lvl 7 which has small spatial resolution, we take the threshold 0.0 th = cfg.RETINANET.INFERENCE_TH if lvl < k_max else 0.0 candidate_inds = np.where(cls_prob_ravel > th)[0] if (len(candidate_inds) == 0): continue pre_nms_topn = min(cfg.RETINANET.PRE_NMS_TOP_N, len(candidate_inds)) inds = np.argpartition(cls_prob_ravel[candidate_inds], -pre_nms_topn)[-pre_nms_topn:] inds = candidate_inds[inds] inds_5d = np.array(np.unravel_index(inds, cls_prob.shape)).transpose() classes = inds_5d[:, 2] anchor_ids, y, x = inds_5d[:, 1], inds_5d[:, 3], inds_5d[:, 4] scores = cls_prob[:, anchor_ids, classes, y, x] boxes = np.column_stack((x, y, x, y)).astype(dtype=np.float32) boxes *= stride boxes += cell_anchors[anchor_ids, :] if not cfg.RETINANET.CLASS_SPECIFIC_BBOX: box_deltas = box_pred[0, anchor_ids, :, y, x] else: box_cls_inds = classes * 4 box_deltas = np.vstack([ box_pred[0, ind:ind + 4, yi, xi] for ind, yi, xi in zip(box_cls_inds, y, x) ]) pred_boxes = (box_utils.bbox_transform(boxes, box_deltas) if cfg.TEST.BBOX_REG else boxes) pred_boxes /= scale pred_boxes = box_utils.clip_tiled_boxes(pred_boxes, im.shape) box_scores = np.zeros((pred_boxes.shape[0], 5)) box_scores[:, 0:4] = pred_boxes box_scores[:, 4] = scores for cls in range(1, cfg.MODEL.NUM_CLASSES): inds = np.where(classes == cls - 1)[0] if len(inds) > 0: boxes_all[cls].extend(box_scores[inds, :]) timers['im_detect_bbox'].toc() # Combine predictions across all levels and retain the top scoring by class timers['misc_bbox'].tic() detections = [] for cls, boxes in boxes_all.items(): cls_dets = np.vstack(boxes).astype(dtype=np.float32) # do class specific nms here keep = box_utils.nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep, :] out = np.zeros((len(keep), 6)) out[:, 0:5] = cls_dets out[:, 5].fill(cls) detections.append(out) # detections (N, 6) format: # detections[:, :4] - boxes # detections[:, 4] - scores # detections[:, 5] - classes detections = np.vstack(detections) # sort all again inds = np.argsort(-detections[:, 4]) detections = detections[inds[0:cfg.TEST.DETECTIONS_PER_IM], :] # Convert the detections to image cls_ format (see core/test_engine.py) num_classes = cfg.MODEL.NUM_CLASSES cls_boxes = [[] for _ in range(cfg.MODEL.NUM_CLASSES)] for c in range(1, num_classes): inds = np.where(detections[:, 5] == c)[0] cls_boxes[c] = detections[inds, :5] timers['misc_bbox'].toc() return cls_boxes
def box_results_with_nms_and_limit(scores, boxes, num_classes=81, score_thresh=0.05, overlap_thresh=0.5, do_soft_nms=False, soft_nms_sigma=0.5, soft_nms_method='linear', do_bbox_vote=False, bbox_vote_thresh=0.8, bbox_vote_method='ID', max_detections_per_img=100, ### over all classes ### ): """Returns bounding-box detection results by thresholding on scores and applying non-maximum suppression (NMS). A number of #detections presist after this and are returned, sorted by class `boxes` has shape (#detections, 4 * #classes), where each row represents a list of predicted bounding boxes for each of the object classes in the dataset (including the background class). The detections in each row originate from the same object proposal. `scores` has shape (#detection, #classes), where each row represents a list of object detection confidence scores for each of the object classes in the dataset (including the background class). `scores[i, j]`` corresponds to the box at `boxes[i, j * 4:(j + 1) * 4]`. """ cls_boxes = [[] for _ in range(num_classes)] # Apply threshold on detection probabilities and apply NMS # Skip j = 0, because it's the background class for j in range(1, num_classes): inds = np.where(scores[:, j] > score_thresh)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, j * 4:(j + 1) * 4] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype( np.float32, copy=False ) if do_soft_nms: nms_dets, _ = box_utils.soft_nms( dets_j, sigma=soft_nms_sigma, overlap_thresh=overlap_thresh, score_thresh=0.0001, method=soft_nms_method ) else: keep = box_utils.nms(dets_j, overlap_thresh) nms_dets = dets_j[keep, :] # Refine the post-NMS boxes using bounding-box voting if do_bbox_vote: nms_dets = box_utils.box_voting( nms_dets, dets_j, bbox_vote_thresh, scoring_method=bbox_vote_method ) cls_boxes[j] = nms_dets # Limit to max_per_image detections **over all classes** if max_detections_per_img > 0: image_scores = np.hstack( [cls_boxes[j][:, -1] for j in range(1, num_classes)] ) if len(image_scores) > max_detections_per_img: image_thresh = np.sort(image_scores)[-max_detections_per_img] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes