def box_handle(img, conf, im_height, im_width, scale, loc, landms): priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 landms = landms.cpu().numpy() inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) return dets
def do_detect(img_raw, net, device, cfg): resize = 1 img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] # dets = np.concatenate((dets, landms), axis=1) return dets, landms
def find_faces(frames: Tensor, chunk_size: int, model: torch.nn.Module, device: torch.device, conf: Dict[str, Any]) -> List[Tensor]: D, H, W, C = frames.shape # D, H, W, C -> D, C, H, W frames_orig = frames.permute(0, 3, 1, 2) frames, scale = prepare_imgs(frames) prior_box = PriorBox(conf, image_size=(H, W)) priors = prior_box.forward().to(device) scale = scale.to(device) detections = [] for start in range(0, D, chunk_size): end = start + chunk_size with torch.no_grad(): locations, confidence, landmarks = model(frames[start:end]) del landmarks det_chunk = postproc_detections_gpu(locations, confidence, priors, scale, conf) detections.extend(det_chunk) del locations, confidence del priors, prior_box, scale, frames num_faces = np.array(list(map(len, detections)), dtype=np.uint8) max_faces = max_num_faces(num_faces, conf['max_face_num_thresh']) faces = [] for f in range(D): for bbox in detections[f][:max_faces]: face = crop_square_torch(frames_orig[f], bbox[:4]) if face is not None: faces.append(face) del detections, frames_orig return faces
def _initialize_priorbox(self, cfg, im_height, im_width): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data return prior_data
def facebox_detect(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) # w, h, w, h scale_coords =torch.Tensor(np.tile([img.shape[1], img.shape[0]], 5)) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) scale_coords = scale_coords.to(self.device) loc, conf, coords = self.model(img) # forward pass print("bbbb", loc.shape, conf.shape, coords.shape) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) coords = decode_f(coords, self.cfg['variance']) # may XXXXXXXXX boxes = boxes * scale coords = coords * scale_coords coords = coords.data.squeeze(0).cpu().numpy() #coords = coords.cpu().detach().squeeze(0).numpy() # coords is grad variable, can't trans to numpy direct boxes = boxes.cpu().numpy() # print("aaaa",boxes.shape, coords.shape) scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.cfg['confidence_threshold'])[0] boxes = boxes[inds] scores = scores[inds] coords = coords[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.cfg['top_k']] boxes = boxes[order] scores = scores[order] coords = coords[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.cfg['nms_threshold'],False) # change nms for coords, make code simple dets = dets[keep, :] coords = coords[keep, :] # keep top-K faster NMS boxes_score = dets[:self.cfg['keep_top_k'], :] coords = coords[:self.cfg['keep_top_k'], :] # boxes_score[:, :-1] += 1 # remove the locat is not positive po_ng = np.array([np.any(box<0) for box in boxes_score]) boxes_score = boxes_score[np.where(po_ng==False)] coords = coords[np.where(po_ng==False)] boxes_score_coords = np.hstack((boxes_score, coords)) # print("boxes_score_coords:", boxes_score_coords, boxes_score_coords.shape) return boxes_score_coords
def process_face_data(cfg, im, im_height, im_width, loc, scale, conf, landms, resize, top_k=5000, nms_threshold=0.4, keep_top_k=750): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.cuda() priors_data = priors.data boxes = decode(loc.data.squeeze(0), priors_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), priors_data, cfg['variance']) scale_landm = torch.from_numpy( np.array([ im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2] ])) scale_landm = scale_landm.float() scale_landm = scale_landm.cuda() landms = landms * scale_landm / resize landms = landms.cpu().numpy() # ignore low score inds = np.where(scores > 0.6)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = np.argsort(-scores)[:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do nms dets = np.hstack((boxes, scores[:, np.newaxis])).astype(float, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K fater NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) result_data = dets[:, :5].tolist() return result_data
def pipeline(net, frame, args, device, resize, cfg): img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) objects_to_draw = dict(draw_box=True, draw_text=True, draw_landmarks=True) frame = draw(frame, dets, args.vis_thres, **objects_to_draw) return frame
def predict(self, img_name): img = np.float32(cv2.imread(img_name, cv2.IMREAD_COLOR)) resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) _t = {'forward_pass': Timer(), 'misc': Timer()} _t['forward_pass'].tic() loc, conf = self.net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, self.nms_threshold) keep = nms(dets, self.nms_threshold, force_cpu=self.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] _t['misc'].toc() return dets
def GetFacialPoints(img_raw): img = np.float32(img_raw) height, width, _ = img_raw.shape scale = torch.Tensor([width, height, width, height]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(height, width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / config.resize boxes = boxes.cpu().detach().numpy() scores = conf.squeeze(0).data.cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / config.resize landms = landms.cpu().detach().numpy() # ignore low scores inds = np.where(scores > config.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:config.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, config.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:config.keep_top_k, :] landms = landms[:config.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) torch.cuda.empty_cache() return dets
def detect_image(self, img) -> List[FaceDetection]: # TODO: add detect logic for single image print(np.shape(img)) tic = time.time() img = np.float32(img) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] dets = dets[:args.keep_top_k, :] # show image box_list = [] for b in dets: if b[4] < args.vis_thres: continue score = b[4] b = list(map(int, b)) box_list.append(FaceDetection(b[0], b[1], b[2], b[3], 0, score)) print('net forward time: {:.4f}'.format(time.time() - tic)) return box_list
def decode_output(image, detection_boxes, detection_scores, detection_landmark, cfg_plate): # print(image.shape[2:]) image_h, image_w = image.shape[2:] # image_h, image_w, _ = image.shape # cfg_plate['image_size'] = (480, 640) detection_scores = F.softmax(detection_scores, dim=-1) # detection_scores = detection_scores.cpu().detach().numpy() # priorbox = PriorBox(cfg_plate, # image_size=(cfg_plate['image_size'], cfg_plate['image_size']), phase='test') # height, width priorbox = PriorBox(cfg_plate, image_size=(image_h, image_w), phase='test') # height, width priors = priorbox.forward() priors = priors.to(torch.device('cuda')) prior_data = priors.data boxes = decode(detection_boxes.data.squeeze(0), prior_data, cfg_plate['variance']) # boxes[:, 0::2] = boxes[:, 0::2] * cfg_plate['image_size'] # width # boxes[:, 1::2] = boxes[:, 1::2] * cfg_plate['image_size'] # height boxes[:, 0::2] = boxes[:, 0::2] * image_w # width boxes[:, 1::2] = boxes[:, 1::2] * image_h # height boxes = boxes.cpu().numpy() scores = scores = detection_scores.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(detection_landmark.data.squeeze(0), prior_data, cfg_plate['variance']) # landms[:, 0::2] = landms[:, 0::2] * cfg_plate['image_size'] # landms[:, 1::2] = landms[:, 1::2] * cfg_plate['image_size'] landms[:, 0::2] = landms[:, 0::2] * image_w landms[:, 1::2] = landms[:, 1::2] * image_h landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > cfg_plate['confidence_threshold'])[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:cfg_plate['top_k']] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, cfg_plate['nms_threshold']) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:cfg_plate['keep_top_k'], :] landms = landms[:cfg_plate['keep_top_k'], :] dets = np.concatenate((dets, landms), axis=1) # draw_ouput2(image, dets) return dets
def detect_faces(ops, detect_model, img_raw, device): resize = 1 img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf = detect_model(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > ops.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:ops.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, ops.nms_threshold) # keep = nms(dets, ops.nms_threshold,force_cpu=True) keep = py_cpu_nms(dets, ops.nms_threshold) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:ops.keep_top_k, :] return dets
def set_default_size(self,imgshape=[640,480,3]):#[H,W,nCh] im_height, im_width, im_nch = imgshape if im_height == self.im_height and im_width == self.im_width and self._priors is not None: pass else: self.im_height,self.im_width,self.im_nch = imgshape """ priorbox shape [-1,4]; dim0: number of predicted bbox from network; dim1:[x_center,y_center,w,h] priorbox存储的内容分别是bbox中心点的位置以及人脸预设的最小尺寸,长宽比例通过variance解决 这里的数值都是相对图像尺寸而言的相对值,取值在(0,1)之间 """ priorbox = PriorBox(self.cfg,image_size=(self.im_height,self.im_width)) self._priors = priorbox.forward()
def get_bbox(self, img_raw): img = torch.FloatTensor(img_raw).to(self.device) im_height, im_width, _ = img.size() scale = torch.FloatTensor([im_width, im_height, im_width, im_height]).to(self.device) img -= torch.FloatTensor((104, 117, 123)).to(self.device) img = img.permute(2, 0, 1).unsqueeze(0) loc, conf = self.net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > 0.05)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, 0.3, force_cpu=False) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:750, :] bboxes = [] for b in dets: if b[4] < 0.65: continue b = list(map(int, b)) bboxes.append((b[0], b[1], b[2], b[3])) return bboxes
def decode_params(self, height, width): cache_key = (height, width) try: return self.decode_param_cache[cache_key] except KeyError: priorbox = PriorBox(self.cfg, image_size=(height, width)) priors = priorbox.forward() prior_data = priors.data scale = torch.Tensor([width, height] * 2) scale1 = torch.Tensor([width, height] * 5) result = (prior_data, scale, scale1) self.decode_param_cache[cache_key] = result return result
def facebox_detect(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf = self.model(img) # forward pass priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.cfg['confidence_threshold'])[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.cfg['top_k']] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) #keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.cfg['nms_threshold'], False) dets = dets[keep, :] # keep top-K faster NMS boxes_score = dets[:self.cfg['keep_top_k'], :] # boxes_score[:, :-1] += 1 # remove the locat is not positive po_ng = np.array([np.any(box < 0) for box in boxes_score]) boxes_score = boxes_score[np.where(po_ng == False)] return boxes_score
def face_location(self, img, resize=1, cof=0.5): # 处理图片 img = np.float32(img) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape priorbox = PriorBox(cfg, image_size=(im_height, im_width)) scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) # 前向传播 loc, conf = self.net(img) # forward pass # priors = priorbox.forward() prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, args.nms_threshold) keep = nms(dets, self.nms_threshold) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] # 筛选出置信度较高的人脸 dets = dets[dets[:, 4] > cof, :4] return dets
def detect_faces(self, img, resize=1.0): im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf = self.net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.args.confidence_threshold)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.args.top_k] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) # keep = py_cpu_nms(dets, self.args.nms_threshold) keep = nms(dets, self.args.nms_threshold, force_cpu=self.args.cpu) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:self.args.keep_top_k, :] return dets
def nms_process(self, network_output, scale, im_height, im_width) -> List[TrackingRegion]: priorbox = PriorBox(cfg, network_output[2], (im_height, im_width), phase='test') priors = priorbox.forward() if self.use_gpu: priors = priors.cuda() loc, conf, _ = network_output prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] # ignore low scores inds = np.where(scores > self.score_min)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS, top_k = 5 order = scores.argsort()[::-1][:5000] boxes = boxes[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, 0.3, force_cpu=False) dets = dets[keep, :] # keep top-K faster NMS dets = dets[:750, :] regions = [] for i in range(dets.shape[0]): face_region = TrackingRegion() face_region.set_rect(left=dets[i, 0], top=dets[i, 1], right=dets[i, 2], bottom=dets[i, 3]) face_region.confidence = dets[i, 4] face_region.data["class_id"] = "face" regions.append(face_region) return regions
def detect(sample: Union[np.ndarray, Tensor], model: torch.nn.Module, cfg: Dict[str, any], device: torch.device) -> List[np.ndarray]: num_frames, height, width, ch = sample.shape bs = cfg['batch_size'] bs = adjust_bs(bs, height, width) imgs, scale = prepare_imgs(sample) priorbox = PriorBox(cfg, image_size=(height, width)) priors = priorbox.forward().to(device) scale = scale.to(device) detections = [] for start in range(0, num_frames, bs): end = start + bs imgs_batch = imgs[start:end].to(device) with torch.no_grad(): loc, conf, landms = model(imgs_batch) imgs_batch, landms = None, None dets = postproc_detections(loc, conf, priors, scale, cfg) detections.extend(dets) loc, conf = None, None return detections
def find_faces(frames: Tensor, model: torch.nn.Module, device: torch.device, conf: Dict[str, Any]) -> List[Tensor]: D, H, W, C = frames.shape frames_orig = frames.permute(0, 3, 1, 2) frames, scale = prepare_imgs(frames) prior_box = PriorBox(conf, image_size=(H, W)) priors = prior_box.forward().to(device) scale = scale.to(device) with torch.no_grad(): locations, confidence, landmarks = model(frames) detections = postproc_detections_gpu(locations, confidence, priors, scale, conf) num_faces = np.array(list(map(len, detections)), dtype=np.uint8) while (num_faces.mean() < conf['min_positive_rate'] and conf['score_thresh'] >= conf['score_thresh_min']): conf = dict(conf) conf['score_thresh'] -= conf['score_thresh_step'] detections = postproc_detections_gpu(locations, confidence, priors, scale, conf) num_faces = np.array(list(map(len, detections)), dtype=np.uint8) logging.debug( "Rerun detection postprocessing with score_thresh={:.02f}, " "avg_pos_rate={:.02f}".format(conf['score_thresh'], num_faces.mean())) max_faces = max_num_faces(num_faces, conf['max_face_num_thresh']) del locations, confidence, landmarks, priors, prior_box, scale, frames faces = [] for f in range(D): for bbox in detections[f][:max_faces]: face = crop_square_torch(frames_orig[f], bbox[:4]) if face is not None: faces.append(face) del detections, frames_orig return faces
print("The image shape is ", img.shape) scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]
def Predict(self, img_path="test.jpg", thresh=0.5, out_img_path="result.jpg"): image_path = img_path confidence_threshold = thresh vis_thres = thresh nms_threshold = 0.4 top_k = 1000 keep_top_k = 750 save_image = True name = out_img_path device = self.system_dict["local"]["device"] net = self.system_dict["local"]["net"] cfg = self.system_dict["local"]["cfg"] resize = 1 img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image tmp = {} tmp["scores"] = [] tmp["bboxes"] = [] tmp["labels"] = [] for b in dets: if b[4] < vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) tmp["scores"].append(b[4]) tmp["bboxes"].append([b[0], b[1], b[2], b[3]]) tmp["labels"].append(text) # landms #cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) #cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) #cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) #cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) #cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if save_image: cv2.imwrite(name, img_raw) return tmp
class SSD(nn.Module): """Single Shot Multibox Architecture The network is composed of a base VGG network followed by the added multibox conv layers. Each multibox layer branches into 1) conv2d for class conf scores 2) conv2d for localization predictions 3) associated priorbox layer to produce default bounding boxes specific to the layer's feature map size. See: https://arxiv.org/pdf/1512.02325.pdf for more details. Args: phase: (string) Can be "test" or "train" base: VGG16 layers for input, size of either 300 or 500 extras: extra layers that feed to multibox loc and conf layers head: "multibox head" consists of loc and conf conv layers """ def __init__(self, phase, base, extras, head, num_classes): super(SSD, self).__init__() self.phase = phase self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.priorbox = PriorBox(v2) self.priors = Variable(self.priorbox.forward(), volatile=True) self.size = 512 # SSD network self.vgg = nn.ModuleList(base) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm = L2Norm(512, 20) self.extras = nn.ModuleList(extras) # fused conv4_3 and conv5_3 self.conv3_3 = nn.Conv2d(256, 256, 3, 1, 1) self.conv4_3 = nn.Conv2d(512, 512, 3, 1, 1) self.deconv = nn.ConvTranspose2d(512, 512, 2, 2) self.deconv2 = nn.ConvTranspose2d(512, 256, 2, 2) self.conv5_3 = nn.Conv2d(512, 512, 3, 1, 1) self.L2Norm5_3 = L2Norm(512, 10) self.L2Norm3_3 = L2Norm(256, 20) self.loc = nn.ModuleList(head[0]) self.conf = nn.ModuleList(head[1]) if self.phase == 'test': self.softmax = nn.Softmax() self.detect = Detect(num_classes, 0, 300, 0.01, 0.45) def forward(self, x): """Applies network layers and ops on input image(s) x. Args: x: input image or batch of images. Shape: [batch,3,300,300]. Return: Depending on phase: test: Variable(tensor) of output class label predictions, confidence score, and corresponding location predictions for each object detected. Shape: [batch,topk,7] train: list of concat outputs from: 1: confidence layers, Shape: [batch*num_priors,num_classes] 2: localization layers, Shape: [batch,num_priors*4] 3: priorbox layers, Shape: [2,num_priors*4] """ sources = list() loc = list() conf = list() # Apply vgg up to conv4_3 relu # Fused conv4_3,conv5_3 for k in range(16): x = self.vgg[k](x) conv3_3 = self.conv3_3(x) s3_3 = self.L2Norm3_3(conv3_3) for k in range(16, 23): x = self.vgg[k](x) conv4_3 = self.conv4_3(x) s4_3 = self.L2Norm(conv4_3) for k in range(23, 30): x = self.vgg[k](x) deconv = self.deconv(x) conv5_3 = self.conv5_3(deconv) s5_3 = self.L2Norm5_3(conv5_3) s2 = F.relu(s4_3 + s5_3) s1 = F.relu(s3_3 + self.deconv2(s2)) sources.extend([s1, s2]) # apply vgg up to fc7 for k in range(30, len(self.vgg)): x = self.vgg[k](x) sources.append(x) # Apply extra layers and cache source layer outputs for k, v in enumerate(self.extras): x = F.relu(v(x), inplace=True) if k % 2 == 1: sources.append(x) # Apply multibox head to source layers for (x, l, c) in zip(sources, self.loc, self.conf): loc.append(l(x).permute(0, 2, 3, 1).contiguous()) conf.append(c(x).permute(0, 2, 3, 1).contiguous()) loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1) conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1) if self.phase == 'test': conf_preds = conf.view(-1, self.num_classes) conf_preds = self.softmax(conf_preds).view(conf.size(0), -1, self.num_classes) # TODO 测试 loc = loc.view(loc.size(0), -1, 4) # print(loc.size()) # print(conf_preds.size()) # print(self.priors.size()) output = self.detect( loc.view(loc.size(0), -1, 4), # loc preds conf_preds, self.priors.type(type(x.data)) # default boxes ) else: output = (loc.view(loc.size(0), -1, 4), conf.view(conf.size(0), -1, self.num_classes), self.priors) return output def load_weights(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') self.load_state_dict( torch.load(base_file, map_location=lambda storage, loc: storage)) print('Finished!') else: print("Sorry only .pth or .pkl files supported.") def load_weights_fused(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') params = torch.load(base_file, map_location=lambda storage, loc: storage) own_dict = self.state_dict() for k, v in list(own_dict.items())[:51]: param = params.get(k) if k == "extras.7.weight" or param is None: continue v.copy_(param) print('Finished!') else: print("Sorry only .pth or .pkl files supported.") def load_weights_for_rosd(self, base_file): other, ext = os.path.splitext(base_file) if ext == '.pkl' or '.pth': print('Loading weights into state dict ...') params = torch.load(base_file, map_location=lambda storage, loc: storage) own_dict = self.state_dict() for k, v in list(own_dict.items())[:-28]: param = params.get(k) if param is None: continue v.copy_(param) print('Finished!') else: print("Sorry only .pth or .pkl files supported.")
def process_frames( torched_frames: torch.tensor, is_fp16: bool, resize_factor: float, video_path: Path, frame_ids: np.array, frames: np.array, device: str, batch_size: int, cfg: dict, nms_threshold: float, confidence_threshold: float, is_save_crops: bool, is_save_boxes: bool, output_path: Path, net: torch.nn.Module, min_size: int, resize_scale: float, keep_top_k: Optional[int], ) -> None: if is_save_crops and output_path is not None: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) if is_save_boxes and output_path is not None: output_label_path: Path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if is_fp16: torched_frames = torched_frames.half() num_frames = torched_frames.shape[0] video_id = video_path.stem labels: List[dict] = [] image_height, image_width = torched_frames.shape[2:] scale1 = torch.Tensor([ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ]) scale1 = scale1.to(device) scale = torch.Tensor( [image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data for start_index in range(0, num_frames, batch_size): end_index = min(start_index + batch_size, num_frames) loc, conf, land = net(torched_frames[start_index:end_index].to(device)) for pred_id in range(loc.shape[0]): frame_id = frame_ids[start_index + pred_id] boxes = decode(loc.data[pred_id], prior_data, cfg["variance"]) boxes *= scale / resize_factor scores = conf[pred_id][:, 1] landmarks = decode_landm(land.data[pred_id], prior_data, cfg["variance"]) landmarks *= scale1 / resize_factor # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=True) boxes = boxes[order][:keep_top_k, :] landmarks = landmarks[order][:keep_top_k, :] scores = scores[order][:keep_top_k] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() landmarks = landmarks[keep].int() if boxes.shape[0] == 0: continue scores = scores[keep].cpu().numpy().astype(np.float64) for crop_id, bbox in enumerate(boxes): bbox = bbox.cpu().numpy().tolist() x_min, y_min, x_max, y_max = bbox x_min = np.clip(x_min, 0, image_width - 1) y_min = np.clip(y_min, 0, image_height - 1) x_max = np.clip(x_max, x_min + 1, image_width - 1) y_max = np.clip(y_max, y_min + 1, image_height - 1) crop_width = x_max - x_min crop_hegith = y_max - y_min if crop_width < min_size or crop_hegith < min_size: continue labels += [{ "frame_id": int(frame_id), "crop_id": crop_id, "bbox": [x_min, y_min, x_max, y_max], "score": scores[crop_id], "landmarks": landmarks[crop_id].tolist(), }] if is_save_crops: x_min, y_min, x_max, y_max = resize( x_min, y_min, x_max, y_max, image_height, image_width, resize_coeff=resize_scale) crop = frames[pred_id][y_min:y_max, x_min:x_max] target_folder = output_image_path / f"{video_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if is_save_boxes: result = { "file_path": str(video_path), "file_id": video_id, "bboxes": labels, } with open(output_label_path / f"{video_id}.json", "w") as f: json.dump(result, f, indent=2)
def detection_image(self, image): """ in:mat data out:lst([[score,xmin,ymin,xmax,ymax]]) """ lst = [] if len(image.shape) == 3: h, w, _ = image.shape elif len(image.shape) == 2: h, w = image.shape else: return 0 image_resize = cv2.resize(image, (256, int(256 * h / w))) image_resize = np.float32(image_resize) if len(image_resize.shape) == 3: im_height, im_width, _ = image_resize.shape elif len(image_resize.shape) == 2: im_height, im_width = image_resize.shape scale = torch.Tensor([w, h, w, h]) image_resize -= self.mean image_resize /= self.val img = image_resize.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) out = self.net(img) priorbox = PriorBox(self.cfg, out[2], (im_height, im_width), phase="test") priors = priorbox.forward() priors = priors.to(self.device) loc, conf, _ = out prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.data.cpu().numpy()[:, 1] inds = np.where(scores > self.confidence_threshold)[0] boxes1 = boxes[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes1 = boxes1[order] scores = scores[order] dets = np.hstack((boxes1, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(dets, self.nms_threshold, force_cpu=self.use_cpu) dets = dets[keep, :] dets = dets[:self.keep_top_k, :] for k in range(dets.shape[0]): face_rectangle = {} xmin = dets[k, 0] ymin = dets[k, 1] xmax = dets[k, 2] ymax = dets[k, 3] score = dets[k, 4] if score > self.yuzhi: lst.append([score, int(xmin), int(ymin), int(xmax), int(ymax)]) return lst
def face_detector(frame): img_raw = frame.copy() img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > CONFIDENCE)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESHOLD) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:750, :] landms = landms[:750, :] dets = np.concatenate((dets, landms), axis=1) bboxs = [] for b in dets: if b[4] < VIZ_THRESHOLD: continue b = list(map(int, b)) margin = 5 x1, y1, x2, y2 = b[0], b[1], b[2], b[3] img_h, img_w, _ = frame.shape w = x2 - x1 h = y2 - y1 margin = int(min(w, h) * margin / 100) x_a = x1 - margin y_a = y1 - margin x_b = x1 + w + margin y_b = y1 + h + margin if x_a < 0: x_b = min(x_b - x_a, img_w - 1) x_a = 0 if y_a < 0: y_b = min(y_b - y_a, img_h - 1) y_a = 0 if x_b > img_w: x_a = max(x_a - (x_b - img_w), 0) x_b = img_w if y_b > img_h: y_a = max(y_a - (y_b - img_h), 0) y_b = img_h name = "" face = frame[y_a:y_b, x_a:x_b] rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) encodings = face_recognition.face_encodings(rgb, [(y_a, x_b, y_b, x_a)]) matches = face_recognition.compare_faces(face_data["encodings"], encodings[0], tolerance=0.55) if True in matches: matchedIdxs = [i for (i, b) in enumerate(matches) if b] counts = {} for i in matchedIdxs: name = face_data["names"][i] counts[name] = counts.get(name, 0) + 1 name = max(counts, key=counts.get) cv2.putText(img_raw, name, (x_a + 10, y_a), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 1, cv2.LINE_AA) cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 0), 1) bboxs.append([x_a, y_a, x_b, y_b]) return img_raw, bboxs
def detect_faces(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) if self.on_gpu: img = img.to(self.device) scale = scale.to(self.device) # graph = 0 tic = time.time() loc, conf, landms = self.detector(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() if self.on_gpu: priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) if self.on_gpu: scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) faces = [] for f in dets: # fr: top, right, bottom, left # retina: left, right, bottom, top faces.append((int(f[1]), int(f[2]), int(f[3]), int(f[0]))) return faces
def do_inference(net, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) args.save_folder.mkdir(exist_ok=True) fw = open(os.path.join(args.save_folder, args.dataset + "_dets.txt"), "w") # testing dataset testset_folder = os.path.join("data", args.dataset, "images/") testset_list = os.path.join("data", args.dataset, "img_list.txt") with open(testset_list, "r") as fr: test_dataset = fr.read().split() num_images = len(test_dataset) # testing scale resize = 1 _t = {"forward_pass": Timer(), "misc": Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = testset_folder + img_name + ".jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t["forward_pass"].tic() loc, conf, landms = net(img) # forward pass _t["forward_pass"].toc() _t["misc"].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t["misc"].toc() # save dets if args.dataset == "FDDB": fw.write("{:s}\n".format(img_name)) fw.write("{:.1f}\n".format(dets.shape[0])) for k in range(dets.shape[0]): xmin = dets[k, 0] ymin = dets[k, 1] xmax = dets[k, 2] ymax = dets[k, 3] score = dets[k, 4] w = xmax - xmin + 1 h = ymax - ymin + 1 # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) fw.write("{:d} {:d} {:d} {:d} {:.10f}\n".format( int(xmin), int(ymin), int(w), int(h), score)) print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s". format(i + 1, num_images, _t["forward_pass"].average_time, _t["misc"].average_time)) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw) fw.close()