def decoder(self, ims, anchors, cls_score, bbox_pred, thresh=0.6, nms_thresh=0.2, test_conf=None): if test_conf is not None: thresh = test_conf bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht') bboxes = clip_boxes(bboxes, ims) scores = torch.max(cls_score, dim=2, keepdim=True)[0] keep = (scores >= thresh)[0, :, 0] if keep.sum() == 0: return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)] scores = scores[:, keep, :] anchors = anchors[:, keep, :] cls_score = cls_score[:, keep, :] bboxes = bboxes[:, keep, :] # NMS anchors_nms_idx = nms( torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh) nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1) output_boxes = torch.cat( [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]], dim=1) return [nms_scores, nms_class, output_boxes]
def post_process(self, im, sim_ops, scale_factor=1): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. """ im = im[:, :, (2, 1, 0)] cls_score = sim_ops[0] cls_prob = sim_ops[1] bbox_pred = sim_ops[2] rois = sim_ops[3] boxes = rois[:, 1:5] / scale_factor scores = cls_prob box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas, False) pred_boxes = self._clip_boxes(pred_boxes, im.shape) fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.6 NMS_THRESH = 0.4 for cls_ind, cls in enumerate(self.classes[1:]): cls_ind += 1 # because we skipped background cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] cls_scores = scores[:, cls_ind] dets = np.hstack( (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
def im_detect(model, src, target_sizes, use_gpu=True, conf=None): if isinstance(target_sizes, int): target_sizes = [target_sizes] if len(target_sizes) == 1: return single_scale_detect(model, src, target_size=target_sizes[0], use_gpu=use_gpu, conf=conf) else: ms_dets = None for ind, scale in enumerate(target_sizes): cls_dets = single_scale_detect(model, src, target_size=scale, use_gpu=use_gpu, conf=conf) if cls_dets.shape[0] == 0: continue if ms_dets is None: ms_dets = cls_dets else: ms_dets = np.vstack((ms_dets, cls_dets)) if ms_dets is None: return np.zeros((0, 7)) cls_dets = np.hstack( (ms_dets[:, 2:7], ms_dets[:, 1][:, np.newaxis])).astype(np.float32, copy=False) keep = nms(cls_dets, 0.1) return ms_dets[keep, :]
def __call__(self, image): """ :param image: rgb image :return: {'label_name':[x1,y1,x2,y2,score],...} """ boxes = np.empty((0, 4)) scores = np.empty((0, self.labels_numb)) for img, p in self.__chips__(image): b = [p[0], p[1], p[0], p[1]] boxes_t, scores_t = self.__net__(img) boxes_t += list(map(float, b)) boxes = np.vstack((boxes, boxes_t)) scores = np.vstack((scores, scores_t)) # filter bounding boxes results = dict() for j in range(1, self.labels_numb): inds = np.where(scores[:, j] > self.thresh)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keeped = nms(c_dets, 0.45, force_cpu=0) c_dets = c_dets[keeped, :] results[self.labels_name[j]] = c_dets return results
def predict(self, img, threshold=0.6): if type(img) == str: img = cv2.imread(img) boxes, scores = self.sess.run(self.net.get_output(), feed_dict={'input:0': img}) scale = ([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) boxes = boxes[0] scores = scores[0] boxes *= scale label_text = [] labels = [] bboxes_out = [] scores_out = [] classes_out = [] # scale each detection back up to the image for j in range(1, self.num_classes + 1): inds = np.where(scores[:, j] > 0.45)[0] c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, 0.45, force_cpu=True) c_dets = c_dets[keep, :] for i in range(len(c_dets)): box = [c_dets[i][0], c_dets[i][1], c_dets[i][2], c_dets[i][3]] bboxes_out.append(box) scores_out.append(c_dets[i][4]) classes_out.append(j) for cls_id in classes_out: if cls_id in self.categories: class_name = self.categories[cls_id]['name'] label_text.append(class_name) return bboxes_out, label_text, classes_out, scores_out, c_dets
def nms_process(num_classes, i, scores, boxes, cfg, min_thresh, all_boxes, max_per_image): for j in range(1, num_classes): # ignore the bg(category_id=0) inds = np.where(scores[:, j] > min_thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) # keep only the highest boxes keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :]
def get_proposal(cls_score_pred, bbox_pred, image_raw_size, anchor_list, num_anchors): nms_thresh = float(0.7) cls_score_pred = cls_score_pred[:, :, :, num_anchors:] scores = cls_score_pred.reshape(-1) bbox_pred = bbox_pred.reshape(-1, 4) #get the origin box bboxes = bbox_inv(anchor_list, bbox_pred, image_raw_size) #get top n bois, scores = get_top_n(bboxes, scores, top=12000) #nms bois = bois.reshape(-1, 4).astype(np.float32) scores = scores.reshape(-1, 1).astype(np.float32) keep = nms(np.hstack((bois, scores)), nms_thresh) post_nms_topN = 2000 keep = keep[:post_nms_topN] bois = bois[keep] scores = scores[keep] old_bois = bois #get batch size zeros = np.zeros((bois.shape[0], 1), dtype=np.float32) bois = np.hstack((zeros, bois)) return bois, scores
def im_detect(img, net, detector, transform, thresh=0.01): with torch.no_grad(): t0 = time.time() w, h = img.shape[1], img.shape[0] x = transform(img)[0].unsqueeze(0) x = x.cuda() t1 = time.time() output = net(x) boxes, scores = detector.forward(output) t2 = time.time() max_conf, max_id = scores[0].topk(1, 1, True, True) pos = max_id > 0 if len(pos) == 0: return np.empty((0, 6)) boxes = boxes[0][pos.view(-1, 1).expand(len(pos), 4)].view(-1, 4) scores = max_conf[pos].view(-1, 1) max_id = max_id[pos].view(-1, 1) inds = scores > thresh if len(inds) == 0: return np.empty((0, 6)) boxes = boxes[inds.view(-1, 1).expand(len(inds), 4)].view(-1, 4) scores = scores[inds].view(-1, 1) max_id = max_id[inds].view(-1, 1) c_dets = torch.cat((boxes, scores, max_id.float()), 1).cpu().numpy() img_classes = np.unique(c_dets[:, -1]) output = None flag = False for cls in img_classes: cls_mask = np.where(c_dets[:, -1] == cls)[0] image_pred_class = c_dets[cls_mask, :] keep = nms(image_pred_class[:, :5], cfg.TEST.NMS_OVERLAP, force_cpu=False) keep = keep[:50] image_pred_class = image_pred_class[keep, :] if not flag: output = image_pred_class flag = True else: output = np.concatenate((output, image_pred_class), axis=0) if output is not None: output[:, 0:2][output[:, 0:2] < 0] = 0 output[:, 2:4][output[:, 2:4] > 1] = 1 # scale = np.array([w, h, w, h]) # output[:, :4] = output[:, :4] * scale scale = np.array([512, 512, 512, 512]) output[:, :4] = output[:, :4] * scale roi_offset = np.array((1100, 700)) output[:, :2] += roi_offset output[:, 2:4] += roi_offset t3 = time.time() print("transform_t:", round(t1 - t0, 3), "detect_time:", round(t2 - t1, 3), "nms_time:", round(t3 - t2, 3)) return output
def facebox_detect(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # w, h, w, h scale_coords =torch.Tensor(np.tile([img.shape[1], img.shape[0]], 5)) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) scale_coords = scale_coords.to(self.device) loc, conf, coords = self.model(img) # forward pass print("bbbb", loc.shape, conf.shape, coords.shape) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) coords = decode_f(coords, self.cfg['variance']) # may XXXXXXXXX boxes = boxes * scale coords = coords * scale_coords coords = coords.data.squeeze(0).cpu().numpy() #coords = coords.cpu().detach().squeeze(0).numpy() # coords is grad variable, can't trans to numpy direct boxes = boxes.cpu().numpy() # print("aaaa",boxes.shape, coords.shape) scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.cfg['confidence_threshold'])[0] boxes = boxes[inds] scores = scores[inds] coords = coords[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.cfg['top_k']] boxes = boxes[order] scores = scores[order] coords = coords[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.cfg['nms_threshold'],False) # change nms for coords, make code simple dets = dets[keep, :] coords = coords[keep, :] # keep top-K faster NMS boxes_score = dets[:self.cfg['keep_top_k'], :] coords = coords[:self.cfg['keep_top_k'], :] # boxes_score[:, :-1] += 1 # remove the locat is not positive po_ng = np.array([np.any(box<0) for box in boxes_score]) boxes_score = boxes_score[np.where(po_ng==False)] coords = coords[np.where(po_ng==False)] boxes_score_coords = np.hstack((boxes_score, coords)) # print("boxes_score_coords:", boxes_score_coords, boxes_score_coords.shape) return boxes_score_coords
def get_results(prediction, confidence, num_classes, nms_conf = 0.4): st = time.time() conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2) prediction = prediction*conf_mask try: ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() except: return 0 box_a = prediction.new(prediction.shape) box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2) box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2) box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2) prediction[:,:,:4] = box_a[:,:,:4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): st = time.time() image_pred = prediction[ind] #Get the class having maximum score, and the index of that class #Get rid of num_classes softmax scores #Add the class index and the class score of class having maximum score max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_score = max_conf_score.float().unsqueeze(1) seq = (image_pred[:,:5], max_conf, max_conf_score) image_pred = torch.cat(seq, 1) #Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:,4])) image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7) #Get the various classes detected in the image try: img_classes = unique(image_pred_[:,-1]) except: continue for cls in img_classes: cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1,7) keep = nms(image_pred_class.cpu().numpy(), nms_conf, force_cpu=True) image_pred_class = image_pred_class[keep] # print(image_pred_class) batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq,1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output
def test_net(net,img,name,detector,transform,priors,top_k=200,thresh=0.01): scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) with torch.no_grad(): x = transform(img).unsqueeze(0) x = x.cuda() scale = scale.cuda() out = net(x,test=True) boxes, scores = detector.forward(out, priors) boxes = boxes[0] scores = scores[0] boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() flag = True for j in range(1, 21): inds = np.where(scores[:, j] > thresh)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) keep = nms(c_dets, 0.45, force_cpu=True) c_dets = c_dets[keep, :] cls = np.ones(c_dets.shape[0])*j c_dets = np.column_stack((c_dets,cls)) if flag: result = c_dets flag = False else: result = np.vstack((result,c_dets)) reslut = list(result) rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() plt.imshow(rgb_image) currentAxis = plt.gca() for (x1,y1,x2,y2,s,cls) in result: x1 = int(x1) y1 = int(y1) x2 = int(x2) y2 = int(y2) cls = int(cls) title = "%s:%.2f" % (CLASSES[int(cls)], s) coords = (x1,y1), x2-x1+1, y2-y1+1 color = colors[cls] currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2)) currentAxis.text(x1, y1, title, bbox={'facecolor': color, 'alpha': 0.5}) plt.axis('off') plt.savefig(name.split('.')[0]+'.eps',format='eps',bbox_inches = 'tight') plt.show()
def predict(self, img_name): img = np.float32(cv2.imread(img_name, cv2.IMREAD_COLOR)) resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) _t = {'forward_pass': Timer(), 'misc': Timer()} _t['forward_pass'].tic() loc, conf = self.net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, self.nms_threshold) keep = nms(dets, self.nms_threshold, force_cpu=self.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] _t['misc'].toc() return dets
def predict(cls, input): """For the input, do the predictions and return them. Args: input (a pandas dataframe): The data on which to do the predictions. There will be one prediction per row in the dataframe""" net, priors, _preprocess, detector = cls.get_model() np_image = np.array(input) image = np_image[:, :, ::-1].copy() loop_start = time.time() w, h = image.shape[1], image.shape[0] img = _preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms( c_dets, cfg.test_cfg.iou, force_cpu=soft_nms ) #min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] # response_str = '' # response_str = response_str+'\n'.join(['pos:{}, ids:{}, score:{:.3f}'.format('(%.1f,%.1f,%.1f,%.1f)' % (o[0],o[1],o[2],o[3]) \ # ,labels[int(oo)],ooo) for o,oo,ooo in zip(boxes,cls_inds,scores)]) # #print (response_str) # return response_str # response = {} response['pos'] = list(boxes.reshape(-1)) response['cls_inds'] = list(cls_inds) response['scores'] = list(scores) return response
def post_process(self, im, sim_ops, scale_factor=1): """ MUST HAVE FUNCTION IN ALL NETWORKS !!!! Post-processing of the results from network. This function can be used to visualize data from hardware. self.post_process(im, [cls_score, cls_prob, bbox_pred, rois], scale_factor) """ print("cls_score:\n") print(sim_ops[0]) print("cls_prob:\n") print(sim_ops[1]) print("bbox_pred:\n") print(sim_ops[2]) print("rois:\n") print(sim_ops[3]) print("scale_factor:\n") print(scale_factor) im = im[:, :, (2, 1, 0)] cls_score = sim_ops[0] cls_score = convert_to_float_py(cls_score, self._layer_map[77]['fl']) cls_prob = sim_ops[1] bbox_pred = sim_ops[2] bbox_pred = convert_to_float_py(bbox_pred, self._layer_map[78]['fl']) rois = sim_ops[3] boxes = rois[:, 1:5] / scale_factor # ABINASH ONLY FOR DEBUG DELETE IT scores = cls_prob #scores = cls_score box_deltas = bbox_pred pred_boxes = bbox_transform_inv(boxes, box_deltas, False) pred_boxes = self._clip_boxes(pred_boxes, im.shape) fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(im, aspect='equal') CONF_THRESH = 0.6 NMS_THRESH = 0.4 for cls_ind, cls in enumerate(self.classes[1:]): cls_ind += 1 # because we skipped background cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)] print("TL DEBUG, pred_boxes shape: %s, cls_boxes shape: %s, scores shape: %s, cls_scores index: %d\n" %(str(pred_boxes.shape),str(cls_boxes.shape),str(scores.shape), cls_ind)) cls_scores = scores[:, cls_ind] print(cls_scores) dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, NMS_THRESH) dets = dets[keep, :] self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH) plt.show()
def detect_face(net, img, resize): if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) #img = img[[2, 1, 0], :, :] img = torch.from_numpy(img).unsqueeze(0) if args.cuda: img = img.cuda() scale = scale.cuda() out = net(img) # forward pass priorbox = PriorBox(cfg, out[2], (im_height, im_width), phase='test') priors = priorbox.forward() if args.cuda: priors = priors.cuda() loc, conf, _ = out print(loc.size(), conf.size()) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] scores = scores[order] #print(boxes) # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, args.nms_threshold, force_cpu=args.cpu) dets = dets[keep, :] #print(dets) # keep top-K faster NMS dets = dets[:args.keep_top_k, :] return dets
def im_detect_batch(imgs, img_info, net, detector, thresh=0.01, num_classes=10): num_images = len(imgs) boxes_batch = [[[] for _ in range(num_images)] for _ in range(num_classes)] with torch.no_grad(): t1 = time.time() x = torch.from_numpy(np.array(imgs)) print(x.shape) x = x.cuda() output = net(x) t4 = time.time() boxes, scores = detector.forward(output) t2 = time.time() for k in range(boxes.size(0)): i = k boxes_ = boxes[k] scores_ = scores[k] img_wh = img_info[k] boxes_ = boxes_.cpu().numpy() scores_ = scores_.cpu().numpy() scale = np.array([img_wh[0], img_wh[1], img_wh[0], img_wh[1]]) boxes_ *= scale for j in range(1, num_classes): inds = np.where(scores_[:, j] > thresh)[0] if len(inds) == 0: boxes_batch[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes_[inds] c_scores = scores_[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, cfg.TEST.NMS_OVERLAP, force_cpu=True) keep = keep[:50] c_dets = c_dets[keep, :] boxes_batch[j][i] = c_dets t3 = time.time() detect_time = t2 - t4 nms_time = t3 - t2 forward_time = t4 - t1 fps_time = t3 - t1 print( 'im_detect: forward_time: {:.3f}s, detect_time {:.3f}s, nms_time: {:.3f}s, fps_time: {:.3f}s' .format(forward_time, detect_time, nms_time, fps_time)) return boxes_batch, fps_time, forward_time, detect_time, nms_time
def forward_torch_nms(self, arm_loc_data, arm_conf_data, odm_loc_data, odm_conf_data, prior_data): """ Deprecated. Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ loc_data = odm_loc_data conf_data = odm_conf_data arm_object_conf = arm_conf_data.data[:, :, 1:] no_object_index = arm_object_conf <= self.objectness_threshold conf_data[no_object_index.expand_as(conf_data)] = 0 num = loc_data.size(0) # batch size num_priors = prior_data.size(0) output = torch.zeros(num, self.num_classes, self.top_k, 5) conf_preds = conf_data.view(num, num_priors, self.num_classes).transpose(2, 1) # Decode predictions into bboxes. for i in range(num): default = decode(arm_loc_data[i], prior_data, self.variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() for cl in range(1, self.num_classes): c_mask = conf_scores[cl].gt(self.confidence_threshold) scores = conf_scores[cl][c_mask] if scores.size(0) == 0: continue l_mask = c_mask.unsqueeze(1).expand_as(decoded_boxes) boxes = decoded_boxes[l_mask].view(-1, 4) # idx of highest scoring and non-overlapping boxes per class ids, count = nms(boxes, scores, self.nms_threshold, self.top_k) output[i, cl, :count] = \ torch.cat((scores[ids[:count]].unsqueeze(1), boxes[ids[:count]]), 1) flt = output.contiguous().view(num, -1, 5) _, idx = flt[:, :, 0].sort(1, descending=True) _, rank = idx.sort(1) flt[(rank < self.keep_top_k).unsqueeze(-1).expand_as(flt)].fill_(0) return output
def predict(self, img): _t = {'im_detect': Timer(), 'misc': Timer()} scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) with torch.no_grad(): x = self.transform(img).unsqueeze(0) if self.cuda: x = x.cuda() scale = scale.cuda() _t['im_detect'].tic() out = net(x) # forward pass boxes, scores = self.detection.forward(out, priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores = scores[0] # scale each detection back up to the image boxes *= scale boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() _t['misc'].tic() all_boxes = [[] for _ in range(num_classes)] for j in range(1, num_classes): inds = np.where(scores[:, j] > self.thresh)[0] if len(inds) == 0: all_boxes[j] = np.zeros([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] #print(scores[:, j]) c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = nms(c_bboxes,c_scores) keep = nms(c_dets, 0.4, force_cpu=args.cpu) c_dets = c_dets[keep, :] all_boxes[j] = c_dets nms_time = _t['misc'].toc() total_time = detect_time + nms_time #print('total time: ', total_time) return all_boxes, total_time
def get_bbox(self, img_raw): img = torch.FloatTensor(img_raw).to(self.device) im_height, im_width, _ = img.size() scale = torch.FloatTensor([im_width, im_height, im_width, im_height]).to(self.device) img -= torch.FloatTensor((104, 117, 123)).to(self.device) img = img.permute(2, 0, 1).unsqueeze(0) loc, conf = self.net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > 0.05)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, 0.3, force_cpu=False) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:750, :] bboxes = [] for b in dets: if b[4] < 0.65: continue b = list(map(int, b)) bboxes.append((b[0], b[1], b[2], b[3])) return bboxes
def facebox_detect(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf = self.model(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.cfg['confidence_threshold'])[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.cfg['top_k']] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.cfg['nms_threshold'], False) dets = dets[keep, :] # keep top-K faster NMS boxes_score = dets[:self.cfg['keep_top_k'], :] # boxes_score[:, :-1] += 1 # remove the locat is not positive po_ng = np.array([np.any(box < 0) for box in boxes_score]) boxes_score = boxes_score[np.where(po_ng == False)] return boxes_score
def imgCallback(msg): global captureImage captureImage = msg try: cv_img = CvBridge().imgmsg_to_cv2(captureImage, "bgr8") w, h = cv_img.shape[1], cv_img.shape[0] img = _preprocess(cv_img).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = net(img) boxes, scores = detector.forward(out, priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, cfg.model.m2det_config.num_classes): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms( c_dets, cfg.test_cfg.iou, force_cpu=soft_nms ) #min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] pub_result(cv_img, boxes, scores, cls_inds) except CvBridgeError as e: print(e)
def Predict(self, im_path, thresh=0.5, visualize=False, output_img_path="output.jpg"): loop_start = time.time() image = cv2.imread(im_path, cv2.IMREAD_COLOR) w, h = image.shape[1], image.shape[0] img = self.system_dict["_preprocess"](image).unsqueeze(0) if self.system_dict["cfg"].test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.system_dict["net"](img) boxes, scores = self.system_dict["detector"].forward(out, self.system_dict["priors"]) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] for j in range(1, self.system_dict["num_classes"]): inds = np.where(scores[:, j] > self.system_dict["cfg"].test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) soft_nms = self.system_dict["cfg"].test_cfg.soft_nms # min_thresh, device_id=0 if cfg.test_cfg.cuda else None) keep = nms(c_dets, self.system_dict["cfg"].test_cfg.iou, force_cpu=soft_nms) keep = keep[:self.system_dict["cfg"].test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start print("Inference time 2 - {} sec".format(loop_time)); allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] im2show = self.draw_detection(image, boxes, scores, cls_inds, -1, thresh) if im2show.shape[0] > 1100: im2show = cv2.resize(im2show, (int(1000. * float(im2show.shape[1]) / im2show.shape[0]), 1000)) if visualize: cv2.imshow('test', im2show) cv2.waitKey(2000) cv2.imwrite(output_img_path, im2show)
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors): """A simplified version compared to fast/er RCNN For details please see the technical report """ if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # Get the scores and bounding boxes ''' scores = tf.reshape(rpn_cls_prob, shape=(-1, 2)) scores = scores[:, 1:] ''' scores = rpn_cls_prob[:, :, :, num_anchors:] scores = scores.reshape((-1, 1)) rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4)) proposals = bbox_transform_inv(anchors, rpn_bbox_pred) proposals = clip_boxes(proposals, im_info[:2]) # Pick the top region proposals order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # Non-maximal suppression keep = nms(np.hstack((proposals, scores)), nms_thresh) # Pick th top region proposals after NMS if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Only support single image as input batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def predict_on_video(self, v_f): cap = cv2.VideoCapture(v_f) while cap.isOpened(): ok, frame = cap.read() if ok: img = frame boxes, scores = self.predict_on_img(frame) # print(boxes.shape) # print(scores.shape) # scale each detection back up to the image tic = time.time() for j in range(1, self.num_classes): # print(max(scores[:, j])) inds = np.where(scores[:, j] > 0.6)[0] # conf > 0.6 if inds is None: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) keep = nms(c_dets, 0.6) c_dets = c_dets[keep, :] c_bboxes = c_dets[:, :4] # print(c_bboxes.shape) # print(c_bboxes.shape[0]) if c_bboxes.shape[0] != 0: # print(c_bboxes.shape) # print('{}: {}'.format(j, c_bboxes)) for box in c_bboxes: label = self.label_map_list[j-1] cv2.rectangle(img, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1, 0) cv2.putText(img, label, (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA) # print('post process time: {}'.format(time.time() - tic)) cv2.imshow('rr', frame) cv2.waitKey(1) else: print('Done') exit(0)
def detect_faces(self, img, resize=1.0): im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf = self.net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, self.args.nms_threshold) keep = nms(dets, self.args.nms_threshold, force_cpu=self.args.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.args.keep_top_k, :] return dets
def nms_process(self, network_output, scale, im_height, im_width) -> List[TrackingRegion]: priorbox = PriorBox(cfg, network_output[2], (im_height, im_width), phase='test') priors = priorbox.forward() if self.use_gpu: priors = priors.cuda() loc, conf, _ = network_output prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.score_min)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS, top_k = 5 order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, 0.3, force_cpu=False) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:750, :] regions = [] for i in range(dets.shape[0]): face_region = TrackingRegion() face_region.set_rect(left=dets[i, 0], top=dets[i, 1], right=dets[i, 2], bottom=dets[i, 3]) face_region.confidence = dets[i, 4] face_region.data["class_id"] = "face" regions.append(face_region) return regions
def _single_infer(self, img=None, save_to=''): img_copy = img.copy() im_height, im_width, _ = img.shape img = np.float32(img) with torch.no_grad(): scale = torch.Tensor([im_width, im_height, im_width, im_height]).to(self.device) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0).to(faceu.device) loc, conf = self.model(img) # forward pass priors = clib.PriorBox(img_size=(im_height, im_width)).forward().to(self.device) boxes = decode(loc.data.squeeze(0), priors.data) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.args.conf_thres)[0] boxes, scores = boxes[inds], scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.args.top_k] boxes, scores = boxes[order], scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, self.args.nms_thres, force_cpu=self.args.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.args.keep_top_k, :] if self.args.save_flag and len(save_to) > 0: self._easy_vis(img_copy, dets, save_to) return dets
def detect(self, image): loop_start = time.time() w, h = image.shape[1], image.shape[0] img = self._preprocess(image).unsqueeze(0) if cfg.test_cfg.cuda: img = img.cuda() scale = torch.Tensor([w, h, w, h]) out = self.net(img) boxes, scores = self.detector.forward(out, self.priors) boxes = (boxes[0] * scale).cpu().numpy() scores = scores[0].cpu().numpy() allboxes = [] count = 0 # for j in [2, 6, 7, 14, 15]: for j in range(1, len(ch_labels)): inds = np.where(scores[:, j] > cfg.test_cfg.score_threshold)[0] if len(inds) == 0: continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) soft_nms = cfg.test_cfg.soft_nms keep = nms(c_dets, cfg.test_cfg.iou, force_cpu=soft_nms) keep = keep[:cfg.test_cfg.keep_per_class] c_dets = c_dets[keep, :] allboxes.extend([_.tolist() + [j] for _ in c_dets]) loop_time = time.time() - loop_start allboxes = np.array(allboxes) boxes = allboxes[:, :4] scores = allboxes[:, 4] cls_inds = allboxes[:, 5] infos, im2show = draw_detection(image, boxes, scores, cls_inds, -1, args.thresh) return infos, im2show
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): if not os.path.exists(save_folder): os.mkdir(save_folder) # dump predictions and assoc. ground truth to text file for now num_images = len(testset) num_classes = (21, 81)[args.dataset == 'COCO'] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(save_folder, 'detections.pkl') if args.retest: f = open(det_file, 'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) x = Variable(transform(img).unsqueeze(0), volatile=True) if cuda: x = x.cuda() _t['im_detect'].tic() out = net(x=x, test=True) # forward pass boxes, scores = detector.forward(out, priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores = scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) if args.dataset == 'VOC': cpu = False else: cpu = False keep = nms(c_dets, 0.45, force_cpu=cpu) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) _t['im_detect'].clear() _t['misc'].clear() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') if args.dataset == 'VOC': APs, mAP = testset.evaluate_detections(all_boxes, save_folder) return APs, mAP else: testset.evaluate_detections(all_boxes, save_folder)
def nms_detections(pred_boxes, scores, nms_thresh): dets = np.hstack((pred_boxes, scores[:, np.newaxis])).astype(np.float32) keep = nms(dets, nms_thresh) return keep
def forward(self, arguments, device=None, outputs_to_retain=None): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # use potentially different number of proposals for training vs evaluation if len(outputs_to_retain) == 0: # print("EVAL") pre_nms_topN = self._layer_config['test_pre_nms_topN'] post_nms_topN = self._layer_config['test_post_nms_topN'] nms_thresh = self._layer_config['test_nms_thresh'] min_size = self._layer_config['test_min_size'] else: pre_nms_topN = self._layer_config['train_pre_nms_topN'] post_nms_topN = self._layer_config['train_post_nms_topN'] nms_thresh = self._layer_config['train_nms_thresh'] min_size = self._layer_config['train_min_size'] bottom = arguments assert bottom[0].shape[0] == 1, \ 'Only single item batches are supported' # the first set of _num_anchors channels are bg probs # the second set are the fg probs, which we want scores = bottom[0][:, self._num_anchors:, :, :] bbox_deltas = bottom[1] im_info = bottom[2][0] if DEBUG: # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) print ('original im_size: ({}, {})'.format(im_info[4], im_info[5])) # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] if DEBUG: print ('score map size: {}'.format(scores.shape)) # Enumerate all shifts shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] anchors = self._anchors.reshape((1, A, 4)) + \ shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: # # bbox deltas will be (1, 4 * A, H, W) format # transpose to (1, H, W, 4 * A) # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) # in slowest to fastest order bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) # Same story for the scores: # # scores are (1, A, H, W) format # transpose to (1, H, W, A) # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) cntk_image_scale = im_info[2] / im_info[4] keep = _filter_boxes(proposals, min_size * cntk_image_scale) proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) order = scores.ravel().argsort(kind='mergesort')[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep = nms(np.hstack((proposals, scores)), nms_thresh, use_gpu_nms=False) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # pad with zeros if too few rois were found num_found_proposals = proposals.shape[0] if num_found_proposals < post_nms_topN: if DEBUG: print("Only {} proposals generated in ProposalLayer".format(num_found_proposals)) proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32) proposals_padded[:num_found_proposals, :] = proposals proposals = proposals_padded # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 # for CNTK: add batch axis to output shape proposals.shape = (1,) + proposals.shape return None, proposals
def test_net(save_folder, net, detector, cuda, testset, transform, max_per_image=300, thresh=0.005): if not os.path.exists(save_folder): os.mkdir(save_folder) # dump predictions and assoc. ground truth to text file for now num_images = len(testset) num_classes = (21, 81)[args.dataset == 'COCO'] all_boxes = [[[] for _ in range(num_images)] for _ in range(num_classes)] _t = {'im_detect': Timer(), 'misc': Timer()} det_file = os.path.join(save_folder, 'detections.pkl') if args.retest: f = open(det_file,'rb') all_boxes = pickle.load(f) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder) return for i in range(num_images): img = testset.pull_image(i) x = Variable(transform(img).unsqueeze(0),volatile=True) if cuda: x = x.cuda() _t['im_detect'].tic() out = net(x) # forward pass boxes, scores = detector.forward(out,priors) detect_time = _t['im_detect'].toc() boxes = boxes[0] scores=scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, num_classes): inds = np.where(scores[:, j] > thresh)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack((c_bboxes, c_scores[:, np.newaxis])).astype( np.float32, copy=False) if args.dataset == 'VOC': cpu = True else: cpu = False keep = nms(c_dets, 0.45, force_cpu=cpu) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1,num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc() if i % 20 == 0: print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' .format(i + 1, num_images, detect_time, nms_time)) _t['im_detect'].clear() _t['misc'].clear() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') testset.evaluate_detections(all_boxes, save_folder)