def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes scores = rpn_cls_prob[:, :, :, num_anchors:] rpn_bbox_pred = rpn_bbox_pred.view((-1, 4)) scores = scores.contiguous().view(-1, 1) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals scores, order = scores.view(-1).sort(descending=True) if pre_nms_topN > 0: order = order[:pre_nms_topN] scores = scores[:pre_nms_topN].view(-1, 1) proposals = proposals[order.data, :] # Non-maximal suppression keep = nms(proposals, scores.squeeze(1), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep, ] # Only support single image as input batch_inds = proposals.new_zeros(proposals.size(0), 1) blob = torch.cat((batch_inds, proposals), 1) return blob, scores
def im_detect(data, model, batch_size, std, mean, classes): gt_tensor = torch.autograd.Variable(torch.from_numpy(data[0])) im_blobs_tensor = torch.autograd.Variable(torch.from_numpy(data[1])) im_info_tensor = torch.autograd.Variable(torch.from_numpy(data[2])) #print(im_info_tensor) results = [] with torch.no_grad(): rois, cls_prob, bbox_pred = model(im_blobs_tensor.cuda(), \ im_info_tensor.cuda(), \ gt_tensor.cuda()) #print(rois[:,:,1:5]) pred_boxes = bbox_transform_inv(rois[:,:,1:5], bbox_pred, batch_size, std, mean) pred_boxes = clip_boxes(pred_boxes, im_info_tensor.data, 1) scores = cls_prob #results = [] #print(rois.shape, scores.shape, rois.shape, bbox_pred.shape, classes) for index in range(1, classes): cls_scores = scores[0,:,index] scores_over_thresh = (cls_scores > thresh) cls_keep = cls_scores[scores_over_thresh] bboxes_keep = pred_boxes[0,scores_over_thresh,index*4:(index+1)*4] filter_keep = _filter_boxes(bboxes_keep, 16) cls_keep = cls_keep[filter_keep] bboxes_keep = bboxes_keep[filter_keep,:] keep_idx_i = nms(bboxes_keep, cls_keep, nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) bboxes_keep = bboxes_keep[keep_idx_i, :] cls_keep = cls_keep[keep_idx_i] bboxes_keep[:,0] /= im_info_tensor[0,2] bboxes_keep[:,1] /= im_info_tensor[0,3] bboxes_keep[:,2] /= im_info_tensor[0,2] bboxes_keep[:,3] /= im_info_tensor[0,3] if bboxes_keep.size(0) > 0: result = np.zeros((bboxes_keep.size(0), 6), dtype=np.float32) result [:,0:4] = bboxes_keep.cpu() result [:,4] = cls_keep.cpu() result [:,5] = index results.append(result) return results
def box_results_with_nms_and_limit(scores, boxes): num_classes = cfg.MODEL.NUM_CLASSES + 1 cls_boxes = [[] for _ in range(num_classes)] for j in range(1, num_classes): inds = np.where(scores[:, j] > cfg.TEST.SCORE_THRESH)[0] scores_j = scores[inds, j] boxes_j = boxes[inds, :] dets_j = np.hstack((boxes_j, scores_j[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(torch.tensor(boxes_j), torch.tensor(scores_j), cfg.TEST.NMS).numpy() nms_dets = dets_j[keep, :] cls_boxes[j] = nms_dets if cfg.TEST.DETECTIONS_PER_IM > 0: data = [] for j in range(1, num_classes): data.append(cls_boxes[j][:, -1]) image_scores = np.hstack(data) if len(image_scores) > cfg.TEST.DETECTIONS_PER_IM: image_thresh = np.sort(image_scores)[-cfg.TEST.DETECTIONS_PER_IM] for j in range(1, num_classes): keep = np.where(cls_boxes[j][:, -1] >= image_thresh)[0] cls_boxes[j] = cls_boxes[j][keep, :] im_results = np.vstack([cls_boxes[j] for j in range(1, num_classes)]) boxes = im_results[:, :-1] scores = im_results[:, -1] return scores, boxes, cls_boxes
def forward(self, inputs): if self.training: img_batch, annotations = inputs else: img_batch = inputs regression, classification = self.forward_dummy(img_batch) anchors = self.anchors(img_batch) if self.training: return self.focalLoss(classification, regression, anchors, annotations) else: transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.5) nms_scores, nms_class = classification[0, anchors_nms_idx, :].max( dim=1) return [ nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :] ]
def box_nms(prob, size, iou=0.1, min_prob=0.01, keep_top_k=0): # requires https://github.com/open-mmlab/mmdetection. # Warning : BUILD FROM SOURCE using command MMCV_WITH_OPS=1 pip install -e # from mmcv.ops import nms as nms_mmdet from torchvision.ops import nms """Performs non maximum suppression on the heatmap by considering hypothetical bounding boxes centered at each pixel's location (e.g. corresponding to the receptive field). Optionally only keeps the top k detections. Arguments: prob: the probability heatmap, with shape `[H, W]`. size: a scalar, the size of the bouding boxes. iou: a scalar, the IoU overlap threshold. min_prob: a threshold under which all probabilities are discarded before NMS. keep_top_k: an integer, the number of top scores to keep. """ pts = torch.nonzero(prob > min_prob).float() # [N, 2] prob_nms = torch.zeros_like(prob) if pts.nelement() == 0: return prob_nms size = torch.tensor(size/2.).cuda() boxes = torch.cat([pts-size, pts+size], dim=1) # [N, 4] scores = prob[pts[:, 0].long(), pts[:, 1].long()] if keep_top_k != 0: indices = nms(boxes, scores, iou) else: raise NotImplementedError # indices, _ = nms(boxes, scores, iou, boxes.size()[0]) # print("boxes: ", boxes.shape) # print("scores: ", scores.shape) # proposals = torch.cat([boxes, scores.unsqueeze(-1)], dim=-1) # dets, indices = nms_mmdet(proposals, iou) # indices = indices.long() # indices = box_nms_retinaNet(boxes, scores, iou) pts = torch.index_select(pts, 0, indices) scores = torch.index_select(scores, 0, indices) prob_nms[pts[:, 0].long(), pts[:, 1].long()] = scores return prob_nms
def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores"): """ Performs non-maximum suppression on a boxlist, with scores specified in a boxlist field via score_field. Arguments: boxlist(BoxList) nms_thresh (float) max_proposals (int): if > 0, then only the top max_proposals are kept after non-maximum suppression score_field (str) """ if nms_thresh <= 0: return boxlist mode = boxlist.mode boxlist = boxlist.convert("xyxy") boxes = boxlist.bbox score = boxlist.get_field(score_field) keep = nms(boxes, score, nms_thresh) if max_proposals > 0: keep = keep[:max_proposals] boxlist = boxlist[keep] return boxlist.convert(mode)
def forward(self, x, annotations=None): _, _, H, W = x.size() backbone_out = self.backbone(x) detection_body_out = self.detection_body(backbone_out) # print('detection_body_out.size(): ', detection_body_out.size()) classification = self.classificationModel(detection_body_out) regression = self.regressionModel(detection_body_out) # print('classification.shape, regression.shape:', classification.size(), regression.size()) # print(classification) anchors = self.anchor(x).to(self.device) # print('anchors shape:', anchors.shape) if self.training: loss = self.focalloss(classification, regression, anchors, annotations) return loss else: img_batch = x transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) scores = torch.max(classification, dim=2, keepdim=True)[0] # 获取最大的值 [batch_size, 230400, 1] scores_over_thresh = (scores > 0.5)[0, :, 0] # 设置阈值 0.5 if scores_over_thresh.sum() == 0: # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.5) # 设置nms的iou阈值 0.5 nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
def filter(self, det): orders = torch.argsort(det['scores'], descending=True)[:self.cfg.keep_top_k] class_ids = det['class_ids'][orders] scores = det['scores'][orders] boxes = det['boxes'][orders, :] # class-wise nms filtered_class_ids, filtered_scores, filtered_boxes = [], [], [] for cls_id in range(self.cfg.num_classes): idx_cur_class = (class_ids == cls_id) if torch.sum(idx_cur_class) == 0: continue class_ids_cur_class = class_ids[idx_cur_class] scores_cur_class = scores[idx_cur_class] boxes_cur_class = boxes[idx_cur_class, :] keeps = nms(boxes_cur_class, scores_cur_class, self.cfg.nms_thresh) filtered_class_ids.append(class_ids_cur_class[keeps]) filtered_scores.append(scores_cur_class[keeps]) filtered_boxes.append(boxes_cur_class[keeps, :]) filtered_class_ids = torch.cat(filtered_class_ids) filtered_scores = torch.cat(filtered_scores) filtered_boxes = torch.cat(filtered_boxes, dim=0) keeps = filtered_scores > self.cfg.score_thresh if torch.sum(keeps) == 0: det = None else: det = { 'class_ids': filtered_class_ids[keeps], 'scores': filtered_scores[keeps], 'boxes': filtered_boxes[keeps, :] } return det
def visualize(args): # Set up root directory partition = args.partition dataset = HICODet( None, os.path.join(args.data_root, 'instances_{}.json'.format(args.partition))) # Set up image instance path image_name = dataset.filename(args.image_idx) print("Image name: ", image_name) image_path = os.path.join( args.data_root, 'hico_20160224_det/images/{}'.format(args.partition), image_name) detection_path = os.path.join(args.detection_root, image_name.replace('.jpg', '.json')) # Load image instance image = Image.open(image_path) with open(detection_path, 'r') as f: detections = json.load(f) # Remove low-scoring boxes box_score_thresh = args.box_score_thresh boxes = np.asarray(detections['boxes']) scores = np.asarray(detections['scores']) keep_idx = np.where(scores >= box_score_thresh)[0] boxes = boxes[keep_idx, :] scores = scores[keep_idx] # Perform NMS keep_idx = nms(torch.from_numpy(boxes), torch.from_numpy(scores), args.nms_thresh) boxes = boxes[keep_idx] scores = scores[keep_idx] # Draw boxes canvas = ImageDraw.Draw(image) for idx in range(boxes.shape[0]): coords = boxes[idx, :].tolist() canvas.rectangle(coords) canvas.text(coords[:2], str(scores[idx])[:4]) image.show()
def forward(self, outputs): self._batch += 1 self._total_batches += 1 if (not self._batch % self.skip) and self._total_batches >= self.initial_skip: for b, batch in enumerate(outputs.inference): t, n = batch.shape batch = batch[batch[:, 4] > self.obj_threshold] indices = nms(xywh2rect(batch[:, 0:4]), batch[:, 4], self.iou_threshold) k = torch.zeros([batch.shape[0]] * 2, device=batch.device) k[:indices.shape[0]] = torch.eye(batch.shape[0], device=batch.device)[indices] batch = torch.mm(k, batch) batch[torch.all(k == 0)] = -1 new_batch = torch.zeros(t, n, device=batch.device, dtype=batch.dtype).fill_(-1) new_batch[0:batch.shape[0]] = batch outputs.inference[b] = new_batch return outputs
def _suppress(self, raw_cls_bbox, raw_prob): bbox = list() label = list() score = list() # skip cls_id = 0 because it is the background class for l in range(1, self.n_class): cls_bbox_l = raw_cls_bbox.reshape((-1, self.n_class, 4))[:, l, :] prob_l = raw_prob[:, l] mask = prob_l > self.score_thresh cls_bbox_l = cls_bbox_l[mask] prob_l = prob_l[mask] keep = nms(cls_bbox_l, prob_l, self.nms_thresh) # import ipdb;ipdb.set_trace() # keep = cp.asnumpy(keep) bbox.append(cls_bbox_l[keep].cpu().numpy()) # The labels are in [0, self.n_class - 2]. label.append((l - 1) * np.ones((len(keep), ))) score.append(prob_l[keep].cpu().numpy()) bbox = np.concatenate(bbox, axis=0).astype(np.float32) label = np.concatenate(label, axis=0).astype(np.int32) score = np.concatenate(score, axis=0).astype(np.float32) return bbox, label, score
def DOPE_NMS(scores, boxes, pose2d, pose3d, min_score=0.5, iou_threshold=0.1): if scores.numel() == 0: return torch.LongTensor([]), torch.LongTensor([]) maxscores, bestcls = torch.max(scores[:, 1:], dim=1) valid_indices = torch.nonzero(maxscores >= min_score) if valid_indices.numel() == 0: return torch.LongTensor([]), torch.LongTensor([]) else: valid_indices = valid_indices[:, 0] boxes = _boxes_from_poses(pose2d[valid_indices, bestcls[valid_indices], :, :], margin=0.1) indices = valid_indices[nms(boxes, maxscores[valid_indices, ...], iou_threshold)] bestcls = bestcls[indices] return { 'score': scores[indices, bestcls + 1], 'pose2d': pose2d[indices, bestcls, :, :], 'pose3d': pose3d[indices, bestcls, :, :] }, indices, bestcls
def get_top_detections(self, proposals, pred_scores, pred_deltas, images): img_size = (images[0].shape[1], images[0].shape[2]) start_idx = 0 results = [] for proposal in proposals: pred_score = pred_scores[start_idx:start_idx + len(proposal)] pred_delta = pred_deltas[start_idx:start_idx + len(proposal)] start_idx = start_idx + len(proposal) result = [] for i in range(1, self.num_labels + 1): idxs = torch.where(pred_score[:, i] > self.score_threshold)[0] pred_scores_i = pred_score[idxs, i] pred_delta_i = pred_delta[idxs, :] proposal_i = proposal[idxs] detections_i = Box.delta_to_pos(proposal_i, pred_delta_i) # valid check pred_scores_i, detections_i = Box.box_valid_check( pred_scores_i, detections_i, img_size) # nms keep = ops.nms(detections_i, pred_scores_i, self.nms_threshold) if cfg.visualize: visualize_box(self.img, detections_i[keep], './outputs/debug.jpg') result.append({ 'label': i, 'score': pred_scores_i[keep], 'bbox': detections_i[keep] }) results.append(result) return results
def choose_roi_for_subnet_only_forward(rpn_cls,rpn_loc,propose_num,propose_roi_num,anchorbox, validIndex,nms_threshold=0.7,rows=320,cols=320,width_threshold=16): #type(rpn_loc)==torch.Tensor,type(rpn_cls)==torch.Tensor rpn_cls=rpn_cls.cpu() rpn_loc=rpn_loc.cpu() k,h,w=rpn_cls.shape[1]//2,rpn_cls.shape[2],rpn_cls.shape[3] batch=rpn_cls.shape[0] rpn_cls=F.softmax(rpn_cls.reshape(-1,k,2,h*w),dim=2).permute(0,3,1,2).reshape(-1,h*w,k,2) rpn_loc=rpn_loc.reshape(-1,k,4,h*w).permute(0,3,1,2).reshape(-1,h*w*k,4) pos_scores=rpn_cls[:,:,:,1].reshape(batch,-1) pos_scores=pos_scores[:,validIndex] scores_value,pos_scores_index=pos_scores.sort(descending=True,dim=1) #选择概率最大的propose_num个建议框 scores_value=scores_value[:,:propose_num] pos_scores_index=pos_scores_index[:,:propose_num] valid_loc=rpn_loc[:,validIndex] batch_index=np.arange(batch).reshape(batch,1) propose_loc=valid_loc[batch_index,pos_scores_index] anchorbox=anchorbox[validIndex][pos_scores_index] src_box=loc2box(propose_loc,anchorbox) #对建议框进行裁剪 lt=np.maximum(src_box[:,:,:2],[0,0]) src_box[:,:,:2]=lt rd=np.minimum(src_box[:,:,2:],[cols-1,rows-1]) src_box[:,:,2:]=rd filted_indices=(rd-lt>width_threshold).all(axis=2) tmp=[] for i in range(len(src_box)): box,score,index = src_box[i],scores_value[i],filted_indices[i] validbox=box[index].to(torch.float32) validscore=score[index] #执行极大值抑制,选择建议框 indices=ops.nms(validbox,validscore,nms_threshold)[:propose_roi_num] subbox=validbox[indices] tmp.append(subbox) rois=torch.stack(tmp) if(rois.ndim==2): rois=rois.unsqueeze(dim=0) return rois
def get_test_predictions_nms(models, test_images, device, img_size): modelListRes = [] for m in models: modelListRes.append(get_test_predictions(m, test_images, device, img_size)) resDict = {} for modelRes in modelListRes: for res in modelRes: resDict[res['patient_id']] = resDict.get(res['patient_id'], {'patient_id': res['patient_id']}) if resDict[res['patient_id']].get('boxes', np.array([])).any(): resDict[res['patient_id']]['boxes'] = np.vstack((resDict[res['patient_id']]['boxes'], res['boxes'])) else: resDict[res['patient_id']]['boxes'] = res['boxes'] if resDict[res['patient_id']].get('scores', np.array([])).any(): resDict[res['patient_id']]['scores'] = np.concatenate((resDict[res['patient_id']]['scores'], res['scores'])) else: resDict[res['patient_id']]['scores'] = res['scores'] resList = [] for key, value in resDict.items(): boxes = torch.tensor(value['boxes']) scores = torch.tensor(value['scores']) nmsResult = nms(boxes, scores, 0.3) imgInfo = {'patient_id': key, 'boxes': [], 'scores': []} for i in range(len(nmsResult)): imgInfo['boxes'].append(boxes[nmsResult[i]].tolist()) imgInfo['scores'].append(scores[nmsResult[i]].item()) if i == 2: break imgInfo['boxes'] = np.array(imgInfo['boxes']) resList.append(imgInfo) return resList
def do_nms(proposals, image_path, flips=None, nms_thresh=0.1, size_opt='lm'): idx = [] bbox = [] paths = [] l = 0 for i, (x, path) in enumerate(zip(proposals, image_path)): path = int(path.split('/')[-1].split('.')[0]) boxes = x.proposal_boxes.tensor H, W = x._image_size area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) ind = size_condition(area, size_opt) boxes = boxes[ind] logits = x.objectness_logits[ind] nonzero_ind = torch.nonzero(ind).view(-1) if hasattr(x, 'gt_classes'): ind = x.gt_classes[ind] == -1 boxes = boxes[ind] logits = logits[ind] nonzero_ind = nonzero_ind[ind] keep = nms(boxes, logits, nms_thresh) nonzero_ind = nonzero_ind[keep] idx.append(l + nonzero_ind) l = len(x) boxes = boxes[keep] paths.append(torch.ones((len(keep)), device=logits.device) * path) boxes = boxes.div(torch.as_tensor([[W, H, W, H]], device=boxes.device)) if flips[i] == 1: boxes[:, 0] = 1 - boxes[:, 0] boxes[:, 2] = 1 - boxes[:, 2] boxes = torch.index_select( boxes, -1, torch.as_tensor([2, 1, 0, 3], device=boxes.device)) bbox.append(boxes) return torch.cat(idx), torch.cat(bbox), torch.cat(paths)
def batched_nms(boxes, scores, idxs, iou_threshold): """ Performs non-maximum suppression in a batched fashion. Each index value correspond to a category, and NMS will not be applied between elements of different categories. Parameters ---------- boxes : Tensor[N, 4] boxes where NMS will be performed. They are expected to be in (x1, y1, x2, y2) format scores : Tensor[N] scores for each one of the boxes idxs : Tensor[N] indices of the categories for each one of the boxes. iou_threshold : float discards all overlapping boxes with IoU < iou_threshold Returns ------- keep : Tensor int64 tensor with the indices of the elements that have been kept by NMS, sorted in decreasing order of scores """ if boxes.numel() == 0: return torch.empty((0, ), dtype=torch.int64, device=boxes.device) # strategy: in order to perform NMS independently per class. # we add an offset to all the boxes. The offset is dependent # only on the class idx, and is large enough so that boxes # from different classes do not overlap max_coordinate = boxes.max() offsets = idxs.to(boxes) * (max_coordinate + 1) boxes_for_nms = boxes + offsets[:, None] keep = ops.nms(boxes_for_nms, scores, iou_threshold) return keep
def nonmax_suppression(dets, score_thresh=0.5, iou_thresh=0.4): '''检测器输出的非最大值抑制 Args: dets (torch.Tensor): 检测器输出, dets.size()=[batch_size, #proposals, #dim], 其中#proposals是所有尺度输出的建议 框数量, #dim是每个建议框的属性维度 score_thresh (float): 置信度阈值, score_thresh∈[0,1] iou_thresh (float): 重叠建议框的叫并面积比阈值, iou_thresh∈[0,1] Returns: nms_dets (torch.Tensor): 经NMS的检测器输出 ''' nms_dets = [None for _ in range(dets.size(0))] for i, det in enumerate(dets): keep = det[:, 4] > score_thresh det = det[keep] if not det.size(0): continue det[:, :4] = xywh2ltrb(det[:, :4]) keep = nms(det[:, :4], det[:, 4], iou_thresh) det = det[keep] nms_dets[i] = det return nms_dets
def _nms_single_im(scores, boxes, pre_nms_topn=12000, post_nms_topn=2000, nms_thresh=0.7): '''Replaced the usage of compiled "nms" function with torchvision nms and moved this whole file into lib/nms so that the "excluded folders" rule doesn't apply to it. ''' vs, idx = torch.sort(scores, dim=0, descending=True) if idx.size(0) > pre_nms_topn: idx = idx[:pre_nms_topn] boxes_sorted = boxes[idx].contiguous() scores = scores[idx].contiguous() # keep = torch.cuda.IntTensor(boxes_sorted.size(0)) # num_out = nms.nms_apply(keep, boxes_sorted, nms_thresh) # num_out = nms(boxes_sorted, scores, nms_thresh) # keep = scores[:num_out].long() keep = nms(boxes_sorted, scores, nms_thresh) num_out = min(keep.shape[0], post_nms_topn) keep = keep[:num_out] # keep = keep[:num_out].long() # keep = idx[keep.cuda(scores.get_device())] keep = idx[keep] #keep = keep.cpu() return keep
def nms(self, boxes, scores): """ Operate non_maximal_suppresion on boxes. boxes: (N, [y1, x1, y2, x2]) in normalized coordinates. scores: (N, [fg_probs]) return: Remained boxes after nms. """ indices = ops.nms(boxes, scores, self.nms_threshold) proposals = torch.index_select(boxes, dim=0, index=indices) # Because torch can't limit the proposal_count, it should be realized by myself. if proposals.shape[0] > self.proposal_count: scores, ix = torch.topk(scores, k=self.proposal_count, dim=-1, sorted=True) proposals = torch.index_select(proposals, dim=0, index=ix) # Pad the batch slice so that it can be concatenated again. padding_count = max(self.proposal_count - proposals.shape[0], 0) # This parameter "pad" means filling on dim=-2 and in the bottom. proposals = torch.nn.functional.pad(proposals, pad=[0, 0, 0, padding_count]) return proposals
def filter_boxes(self, boxes: torch.Tensor) -> typing.List[np.ndarray]: """Performs NMS and score thresholding Args: boxes (torch.Tensor): shape [N, B, 5] with (xmin, ymin, xmax, ymax, score) Returns: list: N np.ndarray of shape [B, 5] """ final_output = [] for i in range(len(boxes)): scores = boxes[i, :, 4] keep_idx = scores >= self.confidence_threshold boxes_ = boxes[i, keep_idx, :-1] scores = scores[keep_idx] if scores.dim() == 0: final_output.append(torch.empty(0, 5)) continue keep_idx = nms(boxes_, scores, self.nms_iou_threshold) scores = scores[keep_idx].view(-1, 1) boxes_ = boxes_[keep_idx].view(-1, 4) output = torch.cat((boxes_, scores), dim=-1) final_output.append(output) return final_output
def postprocess(x, anchors, regression, classification, regressBoxes, clipBoxes, threshold, iou_threshold): transformed_anchors = regressBoxes(anchors, regression) transformed_anchors = clipBoxes(transformed_anchors, x) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > threshold)[:, :, 0] out = [] for i in range(x.shape[0]): if scores_over_thresh.sum() == 0: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) continue classification_per = classification[i, scores_over_thresh[i, :], ...].permute(1, 0) transformed_anchors_per = transformed_anchors[i, scores_over_thresh[i, :], ...] scores_per = scores[i, scores_over_thresh[i, :], ...] anchors_nms_idx = nms(transformed_anchors_per, scores_per[:, 0], iou_threshold=iou_threshold) if anchors_nms_idx.shape[0] != 0: scores_, classes_ = classification_per[:, anchors_nms_idx].max(dim=0) boxes_ = transformed_anchors_per[anchors_nms_idx, :] out.append({ 'rois': boxes_.cpu().numpy(), 'class_ids': classes_.cpu().numpy(), 'scores': scores_.cpu().numpy(), }) else: out.append({ 'rois': np.array(()), 'class_ids': np.array(()), 'scores': np.array(()), }) return out
def select_over_all_levels(self, boxlists): num_images = len(boxlists) results = [] for i in range(num_images): # multiclass nms bbox, scores = boxlists[i][:, :4], boxlists[i][:, 4] result = nms(bbox, scores, self.nms_thresh) result_score = scores[result] result_bbox = bbox[result] number_of_detections = len(result) # Limit to max_per_image detections **over all classes** if number_of_detections > self.fpn_post_nms_top_n > 0: cls_scores = result_score image_thresh, _ = torch.kthvalue( cls_scores.cpu(), number_of_detections - self.fpn_post_nms_top_n + 1 ) keep = cls_scores >= image_thresh.item() keep = torch.nonzero(keep).squeeze(1) result_bbox = result_bbox[keep] result_score = result_score[keep] results.append(torch.cat([result_bbox, result_score], 1)) return results
def nonMaximumSuppression(self, boxes, landms, scores): inds = torch.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort(descending=True)[:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS keep = nms(boxes, scores, self.nms_threshold) boxes = boxes[keep, :] landms = landms[keep] # keep top-K faster NMS boxes = boxes[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] boxes = tonumpy(boxes) landms = tonumpy(landms) return boxes, landms
def forward(self, inputs): if self.is_training: inputs, annotations = inputs else: inputs = inputs x = self.extract_feat(inputs) outs = self.bbox_head(x) classification = torch.cat([out for out in outs[0]], dim=1) regression = torch.cat([out for out in outs[1]], dim=1) feature_shapes = [np.array([_.shape[2], _.shape[3]]) for _ in x] anchors = self.anchors(inputs, feature_shapes) if self.is_training: return self.criterion(classification, regression, anchors, annotations) else: transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, inputs) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > self.threshold)[0, :, 0] if scores_over_thresh.sum() == 0: print('No boxes to NMS') # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], iou_threshold=self.iou_threshold) nms_scores, nms_class = classification[0, anchors_nms_idx, :].max( dim=1) return [ nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :] ]
def visualize(args): image_dir = dict(train='mscoco2014/train2014', val='mscoco2014/train2014', trainval='mscoco2014/train2014', test='mscoco2014/val2014') dataset = VCOCO(root=os.path.join(args.data_root, image_dir[args.partition]), anno_file=os.path.join( args.data_root, 'instances_vcoco_{}.json'.format(args.partition))) image, _ = dataset[args.image_idx] image_name = dataset.filename(args.image_idx) detection_path = os.path.join(args.partition, image_name.replace('.jpg', '.json')) with open(detection_path, 'r') as f: detections = json.load(f) # Remove low-scoring boxes box_score_thresh = args.box_score_thresh boxes = np.asarray(detections['boxes']) scores = np.asarray(detections['scores']) keep_idx = np.where(scores >= box_score_thresh)[0] boxes = boxes[keep_idx, :] scores = scores[keep_idx] # Perform NMS keep_idx = nms(torch.from_numpy(boxes), torch.from_numpy(scores), args.nms_thresh) boxes = boxes[keep_idx] scores = scores[keep_idx] # Draw boxes canvas = ImageDraw.Draw(image) for idx in range(boxes.shape[0]): coords = boxes[idx, :].tolist() canvas.rectangle(coords) canvas.text(coords[:2], str(scores[idx])[:4]) image.show()
def forward(self, inputs): features = self.efficientnet(inputs) features = self.BIFPN(features[2:]) regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1) classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1) anchors = self.anchors(inputs) if self.is_training: return classification, regression, anchors else: transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, inputs) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > self.threshold)[0, :, 0] if scores_over_thresh.sum() == 0: print('No boxes to NMS') # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], iou_threshold = 0.5) nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]
def non_max_suppression(prediction, conf_thres=0.5, nms_thres=0.2): """ Removes detections with lower object confidence score than 'conf_thres' Non-Maximum Suppression to further filter detections. Returns detections with shape: (x1, y1, x2, y2, object_conf, class_score, class_pred) """ output = [None for _ in range(len(prediction))] for image_i, pred in enumerate(prediction): # Filter out confidence scores below threshold # Get score and class with highest confidence v = pred[:, 4] > conf_thres v = v.nonzero().squeeze() if len(v.shape) == 0: v = v.unsqueeze(0) pred = pred[v] # If none are remaining => process next image nP = pred.shape[0] if not nP: continue # From (center x, center y, width, height) to (x1, y1, x2, y2) pred[:, :4] = xywh2xyxy(pred[:, :4]) nms_indices = nms(pred[:, :4], pred[:, 4], nms_thres) det_max = pred[nms_indices] if len(det_max) > 0: # Add max detections to outputs output[ image_i] = det_max if output[image_i] is None else torch.cat( (output[image_i], det_max)) return output
def forward(self, detections: torch.Tensor, bboxes: torch.Tensor, scores: torch.Tensor, iou_threshold: torch.Tensor) -> torch.Tensor: """ improvised from torch retinaface TODO : consider more extensible version, check dimension """ if not len(scores.shape) == 1: raise RuntimeError( "expects `scores` to be 1-dimensional tensor, got %s with shape of %s" % (len(scores.shape), scores.shape)) if not len(bboxes.shape) == 2: raise RuntimeError( "expects `bboxes` to be 2-dimensional tensor, got %s with shape of %s" % (len(bboxes.shape), bboxes.shape)) if not len(detections.shape) == 3: raise RuntimeError( "expects `detections` to be 3-dimensional tensor, got %s with shape of %s" % (len(detections.shape), detections.shape)) if detections.size()[0] > 1: raise RuntimeError("current version only support single batch") keep = ops.nms(bboxes, scores, iou_threshold) detections = detections.index_select(1, keep) return detections
def forward(self, inputs): if self.training: img_batch, annotations = inputs else: img_batch = inputs x = self.conv1(img_batch) x = self.bn1(x) x = self.relu(x) x = self.maxpool(x) x1 = self.layer1(x) x2 = self.layer2(x1) x3 = self.layer3(x2) x4 = self.layer4(x3) features = self.fpn([x2, x3, x4]) regression = torch.cat([self.regressionModel(feature) for feature in features], dim=1) classification = torch.cat([self.classificationModel(feature) for feature in features], dim=1) anchors = self.anchors(img_batch) if self.training: return self.focalLoss(classification, regression, anchors, annotations) else: transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, img_batch) scores = torch.max(classification, dim=2, keepdim=True)[0] scores_over_thresh = (scores > 0.05)[0, :, 0] if scores_over_thresh.sum() == 0: # no boxes to NMS, just return return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] classification = classification[:, scores_over_thresh, :] transformed_anchors = transformed_anchors[:, scores_over_thresh, :] scores = scores[:, scores_over_thresh, :] anchors_nms_idx = nms(transformed_anchors[0, :, :], scores[0, :, 0], 0.3) nms_scores, nms_class = classification[0, anchors_nms_idx, :].max(dim=1) return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]]