def get_FPS(self, image, test_interval): image = np.array(image,np.float32) im_height, im_width, _ = np.shape(image) scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] if self.letterbox_image: image = np.array(letterbox_image(image,[self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t1 = time.time() for _ in range(test_interval): with torch.no_grad(): loc, conf, landms = self.net(image) boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)>0: if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time
def box_handle(img, conf, im_height, im_width, scale, loc, landms): priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 landms = landms.cpu().numpy() inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) return dets
def _postprocess(loc, conf, landms, priors, cfg, img): """Postprocess TensorRT outputs. # Args loc: [x, y, w, h] conf: [not object confidence, object confidence] landms: [eye_left.x, eye_left.y, eye_right.x, eye_right.y, nose.x, nose.y mouth_left.x, mouth_right.y mouth_left.x, mouth_right.y] priors: priors boxes with retinaface model cfg: retinaface model parameter configure img: input image # Returns facePositions, landmarks (after NMS) """ long_side = max(img.shape) img_size = cfg['image_size'] variance = cfg['variance'] scale = np.ones(4) * img_size scale1 = np.ones(10) * img_size confidence_threshold = 0.2 top_k = 50 nms_threshold = 0.5 # decode boxes boxes = decode(np.squeeze(loc, axis=0), priors, variance) boxes = boxes * scale # decode landmarks landms = decode_landm(np.squeeze(landms, axis=0), priors, variance) landms = landms * scale1 # ignore low scores scores = np.squeeze(conf, axis=0)[:, 1] inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # resize res = long_side / img_size facePositions = (dets[:, :4] * res).astype(int).tolist() landmarks = (landms * res).astype(int).tolist() return facePositions, landmarks
def detect_image(self, image): # 绘制人脸框 old_image = image.copy() image = np.array(image, np.float32) im_height, im_width, _ = np.shape(image) # 它的作用是将归一化后的框坐标转换成原图的大小 scale = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) scale_for_landmarks = torch.Tensor([np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]]) # pytorch image = preprocess_input(image).transpose(2, 0, 1) # 增加batch_size维度 image = torch.from_numpy(image).unsqueeze(0) # 计算先验框 anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): if self.cuda: scale = scale.cuda() scale_for_landmarks = scale_for_landmarks.cuda() image = image.cuda() anchors = anchors.cuda() loc, conf, landms = self.net(image) # forward pass boxes = decode(loc.data.squeeze(0), anchors, self.cfg['variance']) boxes = boxes * scale boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), anchors, self.cfg['variance']) landms = landms * scale_for_landmarks landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes,conf,landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) pnum = len(boxes_conf_landms) return old_image , pnum
def detect(self, img): device = self.device prior_data, scale, scale1 = self.decode_params(*img.shape[:2]) # REF: test_fddb.py img = np.float32(img) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device, dtype=torch.float32) loc, conf, landms = self.net(img) loc = loc.cpu() conf = conf.cpu() landms = landms.cpu() # Decode results boxes = decode(loc.squeeze(0), prior_data, self.variance) boxes = boxes * scale scores = conf.squeeze(0)[:, 1] landms = decode_landm(landms.squeeze(0), prior_data, self.variance) landms = landms * scale1 inds = scores > self.confidence_threshold boxes = boxes[inds] landms = landms[inds] return boxes, landms
def do_detect(img_raw, net, device, cfg): resize = 1 img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] # dets = np.concatenate((dets, landms), axis=1) return dets, landms
def pipeline(net, frame, args, device, resize, cfg): img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) objects_to_draw = dict(draw_box=True, draw_text=True, draw_landmarks=True) frame = draw(frame, dets, args.vis_thres, **objects_to_draw) return frame
def process_face_data(cfg, im, im_height, im_width, loc, scale, conf, landms, resize, top_k=5000, nms_threshold=0.4, keep_top_k=750): priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.cuda() priors_data = priors.data boxes = decode(loc.data.squeeze(0), priors_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), priors_data, cfg['variance']) scale_landm = torch.from_numpy( np.array([ im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2], im.shape[3], im.shape[2] ])) scale_landm = scale_landm.float() scale_landm = scale_landm.cuda() landms = landms * scale_landm / resize landms = landms.cpu().numpy() # ignore low score inds = np.where(scores > 0.6)[0] boxes = boxes[inds] scores = scores[inds] # keep top-K before NMS order = np.argsort(-scores)[:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do nms dets = np.hstack((boxes, scores[:, np.newaxis])).astype(float, copy=False) keep = py_cpu_nms(dets, nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K fater NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) result_data = dets[:, :5].tolist() return result_data
def GetFacialPoints(img_raw): img = np.float32(img_raw) height, width, _ = img_raw.shape scale = torch.Tensor([width, height, width, height]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(height, width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / config.resize boxes = boxes.cpu().detach().numpy() scores = conf.squeeze(0).data.cpu().detach().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / config.resize landms = landms.cpu().detach().numpy() # ignore low scores inds = np.where(scores > config.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:config.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, config.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:config.keep_top_k, :] landms = landms[:config.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) torch.cuda.empty_cache() return dets
def decode_output(image, detection_boxes, detection_scores, detection_landmark, cfg_plate): # print(image.shape[2:]) image_h, image_w = image.shape[2:] # image_h, image_w, _ = image.shape # cfg_plate['image_size'] = (480, 640) detection_scores = F.softmax(detection_scores, dim=-1) # detection_scores = detection_scores.cpu().detach().numpy() # priorbox = PriorBox(cfg_plate, # image_size=(cfg_plate['image_size'], cfg_plate['image_size']), phase='test') # height, width priorbox = PriorBox(cfg_plate, image_size=(image_h, image_w), phase='test') # height, width priors = priorbox.forward() priors = priors.to(torch.device('cuda')) prior_data = priors.data boxes = decode(detection_boxes.data.squeeze(0), prior_data, cfg_plate['variance']) # boxes[:, 0::2] = boxes[:, 0::2] * cfg_plate['image_size'] # width # boxes[:, 1::2] = boxes[:, 1::2] * cfg_plate['image_size'] # height boxes[:, 0::2] = boxes[:, 0::2] * image_w # width boxes[:, 1::2] = boxes[:, 1::2] * image_h # height boxes = boxes.cpu().numpy() scores = scores = detection_scores.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(detection_landmark.data.squeeze(0), prior_data, cfg_plate['variance']) # landms[:, 0::2] = landms[:, 0::2] * cfg_plate['image_size'] # landms[:, 1::2] = landms[:, 1::2] * cfg_plate['image_size'] landms[:, 0::2] = landms[:, 0::2] * image_w landms[:, 1::2] = landms[:, 1::2] * image_h landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > cfg_plate['confidence_threshold'])[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:cfg_plate['top_k']] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, cfg_plate['nms_threshold']) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:cfg_plate['keep_top_k'], :] landms = landms[:cfg_plate['keep_top_k'], :] dets = np.concatenate((dets, landms), axis=1) # draw_ouput2(image, dets) return dets
def nms_gpu(_loc, _conf, _landms, _prior_data, _scale_boxes, _scale_landms, _scaling_ratio, _variance, _confidence_threshold, _nms_threshold, _nms='nms_torchvision'): if _nms == 'nms_torchvision': nms = nms_torchvision else: raise NotImplementedError boxes = decode(_loc.data.squeeze(0), _prior_data, _variance) boxes = boxes * _scale_boxes / _scaling_ratio landms = decode_landm(_landms.data.squeeze(0), _prior_data, _variance) landms = landms * _scale_landms / _scaling_ratio scores = _conf.squeeze(0).data[:, 1] # ignore low scores keep_idx = scores > _confidence_threshold # Time bottleneck boxes_ = boxes[keep_idx, :] landms_ = landms[keep_idx, :] scores_ = scores[keep_idx] # NMS keep_idx = nms(boxes_, scores_, _nms_threshold) boxes_ = boxes_[keep_idx].view(-1, 4) landms_ = landms_[keep_idx].view(-1, 10) scores_ = scores_[keep_idx].view(-1, 1) dets = torch.cat((boxes_, scores_, landms_), dim=1).cpu().numpy() return dets
def detect(self, frame): resize = 1 img = np.float32(frame) im_height, im_width, _ = img.shape scale = torch.Tensor([im_width, im_height, im_width, im_height]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(self.device) scale = scale.to(self.device) loc, conf, landms = self.net(img) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze( 0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(self.device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype( np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_thresh) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) results = [] for det in dets: r = {} r["point"] = {} r["point"]["x1"] = int(det[0]) r["point"]["y1"] = int(det[1]) r["point"]["x2"] = int(det[2]) r["point"]["y2"] = int(det[3]) r["confidence"] = det[4] r["landmark"] = {} r["landmark"]["p1_x"] = int(det[5]) r["landmark"]["p1_y"] = int(det[6]) r["landmark"]["p2_x"] = int(det[7]) r["landmark"]["p2_y"] = int(det[8]) r["landmark"]["p3_x"] = int(det[9]) r["landmark"]["p3_y"] = int(det[10]) r["landmark"]["p4_x"] = int(det[11]) r["landmark"]["p4_y"] = int(det[12]) r["landmark"]["p5_x"] = int(det[13]) r["landmark"]["p5_y"] = int(det[14]) results.append(r) return results
def run(args): # net and load cfg = cfg_mnet net = RetinaFace(cfg=cfg, phase='test') new_state_dict = load_normal(args.trained_model) net.load_state_dict(new_state_dict) print('Finished loading model!') print(net) torch.set_grad_enabled(False) device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) input = torch.randn(1, 3, 270, 480).cuda() flops, params = profile(net, inputs=(input, )) print('flops:', flops, 'params:', params) # testing dataset with open(args.test_list_dir, 'r') as fr: test_dataset = fr.read().split() test_dataset.sort() _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin if not os.path.isdir(args.save_folder): os.makedirs(args.save_folder) f_ = open(os.path.join(args.save_folder, 'vis_bbox.txt'), 'w') net.eval() for i, image_path in enumerate(test_dataset): #img_name = os.path.split(image_path)[-1] img_name = image_path[image_path.find('datasets') + 9:] img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) # img_raw = cv2.resize(img_raw, None, fx=1./3, fy=1.0/3, interpolation=cv2.INTER_AREA) img = np.float32(img_raw) # testing scale target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- save_name = os.path.join(args.save_folder, 'txt', img_name)[:-4] + '.txt' dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" fd.write(line) print('im_detect: {:d}/{:d}' ' forward_pass_time: {:.4f}s' ' misc: {:.4f}s' ' img_shape:{:}'.format(i + 1, len(test_dataset), _t['forward_pass'].average_time, _t['misc'].average_time, img.shape)) # save bbox-image line_write = save_image(dets, args.vis_thres, img_raw, args.save_folder, img_name, save_all=args.save_image_all) f_.write(line_write) f_.flush() f_.close()
def detect_one_image(name, input_path, output_path): image_path = os.path.join(input_path, name) img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = long_side max_size = long_side im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('Inferece {} take: {:.4f}'.format(name, time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image bbox_res = {} bbox_res[name] = [] if save_image: for b in dets: if b[4] < vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) x, y, x_plus_w, y_plus_h = b[0], b[1], b[2], b[3] bbox_res[name].append({ 'x': x, 'y': y, 'w': x_plus_w - x, 'h': y_plus_h - y }) # Blur face # sub_face = img_raw[y:y_plus_h, x:x_plus_w] # sub_face = cv2.GaussianBlur(sub_face, (81, 81), 75) # img_raw[y:y_plus_h, x:x_plus_w] = sub_face cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) # save image out_name = os.path.join(output_path, name) cv2.imwrite(out_name, img_raw) return bbox_res
def wxf(img): cap = cv2.VideoCapture(img) cap.set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')) torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() # print('Finished loading model!') # print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) while (1): ret, imgre = cap.read() if not ret: print('Video open error.') break img = np.float32(imgre) target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(imgre, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 # cv2.putText(imgre, text, (cx, cy), # cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # # landms # cv2.circle(imgre, (b[5], b[6]), 1, (0, 0, 255), 4) # cv2.circle(imgre, (b[7], b[8]), 1, (0, 255, 255), 4) # cv2.circle(imgre, (b[9], b[10]), 1, (255, 0, 255), 4) # cv2.circle(imgre, (b[11], b[12]), 1, (0, 255, 0), 4) # cv2.circle(imgre, (b[13], b[14]), 1, (255, 0, 0), 4) #img = numpy.array(img) cv2.imshow('wyfRetinaface', imgre) if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def detect_faces(self, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) if self.on_gpu: img = img.to(self.device) scale = scale.to(self.device) # graph = 0 tic = time.time() loc, conf, landms = self.detector(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(self.cfg, image_size=(im_height, im_width)) priors = priorbox.forward() if self.on_gpu: priors = priors.to(self.device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, self.cfg['variance']) boxes = boxes * scale / self.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, self.cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) if self.on_gpu: scale1 = scale1.to(self.device) landms = landms * scale1 / self.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > self.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:self.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, self.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:self.keep_top_k, :] landms = landms[:self.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) faces = [] for f in dets: # fr: top, right, bottom, left # retina: left, right, bottom, top faces.append((int(f[1]), int(f[2]), int(f[3]), int(f[0]))) return faces
def Predict(self, img_path="test.jpg", thresh=0.5, out_img_path="result.jpg"): image_path = img_path confidence_threshold = thresh vis_thres = thresh nms_threshold = 0.4 top_k = 1000 keep_top_k = 750 save_image = True name = out_img_path device = self.system_dict["local"]["device"] net = self.system_dict["local"]["net"] cfg = self.system_dict["local"]["cfg"] resize = 1 img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:keep_top_k, :] landms = landms[:keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image tmp = {} tmp["scores"] = [] tmp["bboxes"] = [] tmp["labels"] = [] for b in dets: if b[4] < vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) tmp["scores"].append(b[4]) tmp["bboxes"].append([b[0], b[1], b[2], b[3]]) tmp["labels"].append(text) # landms #cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) #cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) #cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) #cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) #cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if save_image: cv2.imwrite(name, img_raw) return tmp
def detect_faces(ops, img_raw): img = np.float32(img_raw) if ops.resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= ops.color_mean img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = detect_model(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / ops.resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / ops.resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > ops.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:ops.keep_top_k] # order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, ops.nms_threshold) dets = dets[keep, :] landms = landms[keep] dets = np.concatenate((dets, landms), axis=1) return dets
def main(): cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet18": cfg = cfg_re18 elif args.network == "resnet34": cfg = cfg_re34 elif args.network == "resnet50": cfg = cfg_re50 elif args.network == "Efficientnet-b0": cfg = cfg_eff_b0 elif args.network == "Efficientnet-b4": cfg = cfg_eff_b4 # net and model net = RetinaFace(cfg=cfg, phase='test') net = load_model(net, args.trained_model, args.cpu) net.eval() print('Finished loading model!') print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) # testing dataset testset_folder = args.dataset_folder # testset_list = args.dataset_folder[:-7] + "wider_val.txt" # with open(testset_list, 'r') as fr: # test_dataset = fr.read().split() test_dataset = [] for event in os.listdir(testset_folder): subdir = os.path.join(testset_folder, event) img_names = os.listdir(subdir) for img_name in img_names: test_dataset.append([event, os.path.join(subdir, img_name)]) num_images = len(test_dataset) _t = {'forward_pass': Timer(), 'misc': Timer()} # testing begin for i, (event, img_name) in enumerate(test_dataset): if i % 100 == 0: torch.cuda.empty_cache() # image_path = testset_folder + img_name img_raw = cv2.imread(img_name, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = 480 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img = (img - 127.5) / 128.0 # img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t['forward_pass'].tic() loc, conf, landms = net(img) # forward pass _t['forward_pass'].toc() _t['misc'].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t['misc'].toc() # -------------------------------------------------------------------- # save_name = args.save_folder + img_name[:-4] + ".txt" save_name = os.path.join( args.save_folder, event, img_name.split('/')[-1].split('.')[0] + ".txt") dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + " " + str( h) + " " + confidence + " \n" fd.write(line) print('im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s'. format(i + 1, num_images, _t['forward_pass'].average_time, _t['misc'].average_time)) # save image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw)
def main(): args = get_args() torch.set_grad_enabled(False) if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 else: raise NotImplementedError( f"Only mobile0.25 and resnet50 are suppoted.") # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() if args.fp16: net = net.half() print("Finished loading model!") cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) file_paths = sorted(args.input_path.rglob("*.jpg")) if args.num_gpu is not None: start, end = split_array(len(file_paths), args.num_gpu, args.gpu_id) file_paths = file_paths[start:end] output_path = args.output_path if args.save_boxes: output_label_path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if args.save_crops: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) transform = albu.Compose([ albu.Normalize( p=1, mean=(104, 117, 123), std=(1.0, 1.0, 1.0), max_pixel_value=1) ], p=1) test_loader = DataLoader( InferenceDataset(file_paths, args.origin_size, transform=transform), batch_size=args.batch_size, num_workers=args.num_workers, pin_memory=True, drop_last=False, ) with torch.no_grad(): for raw_input in tqdm(test_loader): torched_images = raw_input["torched_image"] if args.fp16: torched_images = torched_images.half() resizes = raw_input["resize"] image_paths = Path(raw_input["image_path"]) raw_images = raw_input["raw_image"] labels = [] if (args.batch_size == 1 and args.save_boxes and (output_label_path / f"{Path(image_paths[0]).stem}.json").exists()): continue loc, conf, land = net(torched_images.to(device)) # forward pass batch_size = torched_images.shape[0] image_height, image_width = torched_images.shape[2:] scale1 = torch.Tensor([ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ]) scale1 = scale1.to(device) scale = torch.Tensor( [image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data for batch_id in range(batch_size): image_path = image_paths[batch_id] file_id = Path(image_path).stem raw_image = raw_images[batch_id] resize = resizes[batch_id].float() boxes = decode(loc.data[batch_id], prior_data, cfg["variance"]) boxes *= scale / resize scores = conf[batch_id][:, 1] landmarks = decode_landm(land.data[batch_id], prior_data, cfg["variance"]) landmarks *= scale1 / resize # ignore low scores valid_index = torch.where( scores > args.confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=True) boxes = boxes[order] landmarks = landmarks[order] scores = scores[order] # do NMS keep = nms(boxes, scores, args.nms_threshold) boxes = boxes[keep, :].int() landmarks = landmarks[keep].int() if boxes.shape[0] == 0: continue scores = scores[keep].cpu().numpy().astype(np.float64) for crop_id, bbox in enumerate(boxes): bbox = bbox.cpu().numpy() labels += [{ "crop_id": crop_id, "bbox": bbox.tolist(), "score": scores[crop_id], "landmarks": landmarks[crop_id].tolist(), }] if args.save_crops: x_min, y_min, x_max, y_max = bbox x_min = max(0, x_min) y_min = max(0, y_min) crop = raw_image[y_min:y_max, x_min:x_max].cpu().numpy() target_folder = output_image_path / f"{file_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{file_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if args.save_boxes: result = { "file_path": image_path, "file_id": file_id, "bboxes": labels, } with open(output_label_path / f"{file_id}.json", "w") as f: json.dump(result, f, indent=2)
def val_to_text(): txt_origin_size = True txt_confidence_threshold = 0.02 txt_nms_threshold = 0.4 txt_save_folder = args.save_folder + 'widerface_txt/' testset_list = 'data/widerface/val/wider_val.txt' testset_folder = 'data/widerface/val/images/' with open(testset_list, 'r') as fr: test_dataset = fr.read().split() # testing begin for i, img_name in enumerate(tqdm(test_dataset)): image_path = testset_folder + img_name img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) # testing scale target_size = 1600 max_size = 2150 im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if txt_origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.cuda() scale = scale.cuda() net.phase = 'test' loc, conf, landms = net(img) # forward pass net.phase = 'train' priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.cuda() prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.cuda() landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > txt_confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1] # order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype( np.float32, copy=False) keep = py_cpu_nms(dets, txt_nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # -------------------------------------------------------------------- save_name = txt_save_folder + img_name[:-4] + ".txt" dirname = os.path.dirname(save_name) if not os.path.isdir(dirname): os.makedirs(dirname) with open(save_name, "w") as fd: bboxs = dets file_name = os.path.basename(save_name)[:-4] + "\n" bboxs_num = str(len(bboxs)) + "\n" fd.write(file_name) fd.write(bboxs_num) for box in bboxs: x = int(box[0]) y = int(box[1]) w = int(box[2]) - int(box[0]) h = int(box[3]) - int(box[1]) confidence = str(box[4]) line = str(x) + " " + str(y) + " " + str(w) + \ " " + str(h) + " " + confidence + " \n" fd.write(line) return txt_save_folder
def main(): args = get_args() torch.set_grad_enabled(False) cfg = None if args.network == "mobile0.25": cfg = cfg_mnet elif args.network == "resnet50": cfg = cfg_re50 # net and model net = RetinaFace(cfg=cfg, phase="test") net = load_model(net, args.trained_model, args.cpu) net.eval() print("Finished loading model!") print(net) cudnn.benchmark = True device = torch.device("cpu" if args.cpu else "cuda") net = net.to(device) args.save_folder.mkdir(exist_ok=True) fw = open(os.path.join(args.save_folder, args.dataset + "_dets.txt"), "w") # testing dataset testset_folder = os.path.join("data", args.dataset, "images/") testset_list = os.path.join("data", args.dataset, "img_list.txt") with open(testset_list, "r") as fr: test_dataset = fr.read().split() num_images = len(test_dataset) # testing scale resize = 1 _t = {"forward_pass": Timer(), "misc": Timer()} # testing begin for i, img_name in enumerate(test_dataset): image_path = testset_folder + img_name + ".jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) _t["forward_pass"].tic() loc, conf, landms = net(img) # forward pass _t["forward_pass"].toc() _t["misc"].tic() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg["variance"]) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg["variance"]) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS # order = scores.argsort()[::-1][:args.top_k] order = scores.argsort()[::-1] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS # dets = dets[:args.keep_top_k, :] # landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) _t["misc"].toc() # save dets if args.dataset == "FDDB": fw.write("{:s}\n".format(img_name)) fw.write("{:.1f}\n".format(dets.shape[0])) for k in range(dets.shape[0]): xmin = dets[k, 0] ymin = dets[k, 1] xmax = dets[k, 2] ymax = dets[k, 3] score = dets[k, 4] w = xmax - xmin + 1 h = ymax - ymin + 1 # fw.write('{:.3f} {:.3f} {:.3f} {:.3f} {:.10f}\n'.format(xmin, ymin, w, h, score)) fw.write("{:d} {:d} {:d} {:d} {:.10f}\n".format( int(xmin), int(ymin), int(w), int(h), score)) print("im_detect: {:d}/{:d} forward_pass_time: {:.4f}s misc: {:.4f}s". format(i + 1, num_images, _t["forward_pass"].average_time, _t["misc"].average_time)) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4) # save image if not os.path.exists("./results/"): os.makedirs("./results/") name = "./results/" + str(i) + ".jpg" cv2.imwrite(name, img_raw) fw.close()
def do_inference(net, img_raw): img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # show image if args.save_image: for b in dets: if b[4] < args.vis_thres: continue text = "{:.4f}".format(b[4]) b = list(map(int, b)) cv2.rectangle(img_raw, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(img_raw, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) # landms cv2.circle(img_raw, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(img_raw, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(img_raw, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(img_raw, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(img_raw, (b[13], b[14]), 1, (255, 0, 0), 4)
def main(): args = parser.parse_args() assert os.path.isfile(args.checkpoint) checkpoint = torch.load(args.checkpoint, map_location="cpu") cfg = checkpoint["config"] device = torch.device("cpu" if args.cpu else "cuda") # net and model net = RetinaFace(**cfg) net.load_state_dict(checkpoint["net_state_dict"], strict=False) net.eval().requires_grad_(False) net.to(device) print('Finished loading model!') resize = 1 # testing begin for i in range(100): image_path = "./curve/test.jpg" img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize scores = conf.squeeze(0)[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2]]) scale1 = scale1.to(device) landms = landms * scale1 / resize # ignore low scores inds = torch.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort() boxes = boxes[order][:args.top_k] landms = landms[order][:args.top_k] scores = scores[order][:args.top_k] # do NMS keep = nms(boxes, scores, args.nms_threshold) boxes = boxes[keep] scores = scores[keep] landms = landms[keep] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() landms = landms.cpu().numpy() dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) dets = np.concatenate((dets, landms), axis=1) # save image if args.save_image: draw_keypoint(img_raw, dets, args.vis_thres) # save image name = "test.jpg" cv2.imwrite(name, img_raw)
def face_detector(frame): img_raw = frame.copy() img = np.float32(img_raw) if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) loc, conf, landms = net(img) # forward pass priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > CONFIDENCE)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:5000] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, NMS_THRESHOLD) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:750, :] landms = landms[:750, :] dets = np.concatenate((dets, landms), axis=1) bboxs = [] for b in dets: if b[4] < VIZ_THRESHOLD: continue b = list(map(int, b)) margin = 5 x1, y1, x2, y2 = b[0], b[1], b[2], b[3] img_h, img_w, _ = frame.shape w = x2 - x1 h = y2 - y1 margin = int(min(w, h) * margin / 100) x_a = x1 - margin y_a = y1 - margin x_b = x1 + w + margin y_b = y1 + h + margin if x_a < 0: x_b = min(x_b - x_a, img_w - 1) x_a = 0 if y_a < 0: y_b = min(y_b - y_a, img_h - 1) y_a = 0 if x_b > img_w: x_a = max(x_a - (x_b - img_w), 0) x_b = img_w if y_b > img_h: y_a = max(y_a - (y_b - img_h), 0) y_b = img_h name = "" face = frame[y_a:y_b, x_a:x_b] rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) encodings = face_recognition.face_encodings(rgb, [(y_a, x_b, y_b, x_a)]) matches = face_recognition.compare_faces(face_data["encodings"], encodings[0], tolerance=0.55) if True in matches: matchedIdxs = [i for (i, b) in enumerate(matches) if b] counts = {} for i in matchedIdxs: name = face_data["names"][i] counts[name] = counts.get(name, 0) + 1 name = max(counts, key=counts.get) cv2.putText(img_raw, name, (x_a + 10, y_a), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 255), 1, cv2.LINE_AA) cv2.rectangle(img_raw, (x_a, y_a), (x_b, y_b), (255, 0, 0), 1) bboxs.append([x_a, y_a, x_b, y_b]) return img_raw, bboxs
def detect_image(self, image): #---------------------------------------------------# # 对输入图像进行一个备份,后面用于绘图 #---------------------------------------------------# old_image = image.copy() image = np.array(image,np.float32) #---------------------------------------------------# # 计算scale,用于将获得的预测框转换成原图的高宽 #---------------------------------------------------# scale = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] scale_for_landmarks = [np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0], np.shape(image)[1], np.shape(image)[0]] im_height, im_width, _ = np.shape(image) #---------------------------------------------------------# # letterbox_image可以给图像增加灰条,实现不失真的resize #---------------------------------------------------------# if self.letterbox_image: image = np.array(letterbox_image(image, [self.input_shape[1], self.input_shape[0]]), np.float32) else: self.anchors = Anchors(self.cfg, image_size=(im_height, im_width)).get_anchors() with torch.no_grad(): #-----------------------------------------------------------# # 图片预处理,归一化。 #-----------------------------------------------------------# image = torch.from_numpy(preprocess_input(image).transpose(2, 0, 1)).unsqueeze(0) if self.cuda: self.anchors = self.anchors.cuda() image = image.cuda() loc, conf, landms = self.net(image) #-----------------------------------------------------------# # 将预测结果进行解码 #-----------------------------------------------------------# boxes = decode(loc.data.squeeze(0), self.anchors, self.cfg['variance']) boxes = boxes.cpu().numpy() conf = conf.data.squeeze(0)[:,1:2].cpu().numpy() landms = decode_landm(landms.data.squeeze(0), self.anchors, self.cfg['variance']) landms = landms.cpu().numpy() boxes_conf_landms = np.concatenate([boxes, conf, landms],-1) boxes_conf_landms = non_max_suppression(boxes_conf_landms, self.confidence) if len(boxes_conf_landms)<=0: return old_image #---------------------------------------------------------# # 如果使用了letterbox_image的话,要把灰条的部分去除掉。 #---------------------------------------------------------# if self.letterbox_image: boxes_conf_landms = retinaface_correct_boxes(boxes_conf_landms, \ np.array([self.input_shape[0], self.input_shape[1]]), np.array([im_height, im_width])) boxes_conf_landms[:,:4] = boxes_conf_landms[:,:4]*scale boxes_conf_landms[:,5:] = boxes_conf_landms[:,5:]*scale_for_landmarks for b in boxes_conf_landms: text = "{:.4f}".format(b[4]) b = list(map(int, b)) # b[0]-b[3]为人脸框的坐标,b[4]为得分 cv2.rectangle(old_image, (b[0], b[1]), (b[2], b[3]), (0, 0, 255), 2) cx = b[0] cy = b[1] + 12 cv2.putText(old_image, text, (cx, cy), cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) print(b[0], b[1], b[2], b[3], b[4]) # b[5]-b[14]为人脸关键点的坐标 cv2.circle(old_image, (b[5], b[6]), 1, (0, 0, 255), 4) cv2.circle(old_image, (b[7], b[8]), 1, (0, 255, 255), 4) cv2.circle(old_image, (b[9], b[10]), 1, (255, 0, 255), 4) cv2.circle(old_image, (b[11], b[12]), 1, (0, 255, 0), 4) cv2.circle(old_image, (b[13], b[14]), 1, (255, 0, 0), 4) return old_image
def process_frames( torched_frames: torch.tensor, is_fp16: bool, resize_factor: float, video_path: Path, frame_ids: np.array, frames: np.array, device: str, batch_size: int, cfg: dict, nms_threshold: float, confidence_threshold: float, is_save_crops: bool, is_save_boxes: bool, output_path: Path, net: torch.nn.Module, min_size: int, resize_scale: float, keep_top_k: Optional[int], ) -> None: if is_save_crops and output_path is not None: output_image_path = output_path / "images" output_image_path.mkdir(exist_ok=True, parents=True) if is_save_boxes and output_path is not None: output_label_path: Path = output_path / "labels" output_label_path.mkdir(exist_ok=True, parents=True) if is_fp16: torched_frames = torched_frames.half() num_frames = torched_frames.shape[0] video_id = video_path.stem labels: List[dict] = [] image_height, image_width = torched_frames.shape[2:] scale1 = torch.Tensor([ image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, image_width, image_height, ]) scale1 = scale1.to(device) scale = torch.Tensor( [image_width, image_height, image_width, image_height]) scale = scale.to(device) priorbox = PriorBox(cfg, image_size=(image_height, image_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data for start_index in range(0, num_frames, batch_size): end_index = min(start_index + batch_size, num_frames) loc, conf, land = net(torched_frames[start_index:end_index].to(device)) for pred_id in range(loc.shape[0]): frame_id = frame_ids[start_index + pred_id] boxes = decode(loc.data[pred_id], prior_data, cfg["variance"]) boxes *= scale / resize_factor scores = conf[pred_id][:, 1] landmarks = decode_landm(land.data[pred_id], prior_data, cfg["variance"]) landmarks *= scale1 / resize_factor # ignore low scores valid_index = torch.where(scores > confidence_threshold)[0] boxes = boxes[valid_index] landmarks = landmarks[valid_index] scores = scores[valid_index] order = scores.argsort(descending=True) boxes = boxes[order][:keep_top_k, :] landmarks = landmarks[order][:keep_top_k, :] scores = scores[order][:keep_top_k] # do NMS keep = nms(boxes, scores, nms_threshold) boxes = boxes[keep, :].int() landmarks = landmarks[keep].int() if boxes.shape[0] == 0: continue scores = scores[keep].cpu().numpy().astype(np.float64) for crop_id, bbox in enumerate(boxes): bbox = bbox.cpu().numpy().tolist() x_min, y_min, x_max, y_max = bbox x_min = np.clip(x_min, 0, image_width - 1) y_min = np.clip(y_min, 0, image_height - 1) x_max = np.clip(x_max, x_min + 1, image_width - 1) y_max = np.clip(y_max, y_min + 1, image_height - 1) crop_width = x_max - x_min crop_hegith = y_max - y_min if crop_width < min_size or crop_hegith < min_size: continue labels += [{ "frame_id": int(frame_id), "crop_id": crop_id, "bbox": [x_min, y_min, x_max, y_max], "score": scores[crop_id], "landmarks": landmarks[crop_id].tolist(), }] if is_save_crops: x_min, y_min, x_max, y_max = resize( x_min, y_min, x_max, y_max, image_height, image_width, resize_coeff=resize_scale) crop = frames[pred_id][y_min:y_max, x_min:x_max] target_folder = output_image_path / f"{video_id}" target_folder.mkdir(exist_ok=True, parents=True) crop_file_path = target_folder / f"{frame_id}_{crop_id}.jpg" if crop_file_path.exists(): continue cv2.imwrite( str(crop_file_path), cv2.cvtColor(crop, cv2.COLOR_BGR2RGB), [int(cv2.IMWRITE_JPEG_QUALITY), 90], ) if is_save_boxes: result = { "file_path": str(video_path), "file_id": video_id, "bboxes": labels, } with open(output_label_path / f"{video_id}.json", "w") as f: json.dump(result, f, indent=2)
def crop_face(net, device, cfg, data_dir, target_dir, left_scale=0.0, right_scale=0.0, up_scale=0.0, low_scale=0.0): resize = 1 landmark_target_dir = target_dir + '_landmark' # testing begin for dir, dirs, files in tqdm.tqdm(os.walk(data_dir)): new_dir = dir.replace(data_dir, target_dir) new_landmark_dir = dir.replace(data_dir, landmark_target_dir) if not os.path.isdir(new_dir): os.mkdir(new_dir) if not os.path.isdir(new_landmark_dir): os.mkdir(new_landmark_dir) for file in files: filepath = os.path.join(dir, file) # print(filepath) image_path = filepath img_raw = cv2.imread(image_path, cv2.IMREAD_COLOR) if img_raw is None: continue im_height, im_width, _ = img_raw.shape # print(img_raw.shape) scale_with = 640 scale_height = 480 if im_height > scale_height: scale_rate = scale_height / im_height img_raw = cv2.resize( img_raw, (int(im_width * scale_rate), scale_height)) elif im_width > scale_with: scale_rate = scale_with / im_width img_raw = cv2.resize(img_raw, (scale_with, int(im_height * scale_rate))) img = np.float32(img_raw) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) # tic = time.time() loc, conf, landms = net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # save image if args.save_image: max_bb = 0 max_index = 0 if len(dets) == 0: continue elif len(dets) > 1: print('warning detect more than one:', filepath) # find maximum bounding box for di, b in enumerate(dets): if b[4] < args.vis_thres: continue b = list(map(int, b)) b = [p if p > 0 else 0 for p in b] box_w = abs(b[1] - b[3]) box_h = abs(b[0] - b[2]) if max_bb < max(box_w, box_h): max_bb = max(box_w, box_h) max_index = di di = max_index b = list(map(int, dets[max_index])) # for di, b in enumerate(dets): # if b[4] < args.vis_thres: # continue # text = "{:.4f}".format(b[4]) # b = list(map(int, b)) # # print(b[0], b[1]) # # print(b[2], b[3]) b = [p if p > 0 else 0 for p in b] b[1] -= int((b[3] - b[1]) * up_scale) b[3] += int((b[3] - b[1]) * low_scale) b[0] -= int((b[2] - b[0]) * left_scale) b[2] += int((b[2] - b[0]) * right_scale) b[1] = b[1] if b[1] >= 0 else 0 b[3] = b[3] if b[3] < im_height else im_height - 1 b[0] = b[0] if b[0] >= 0 else 0 b[2] = b[2] if b[2] < im_width else im_width - 1 # retain background b_width = b[2] - b[0] b_height = b[3] - b[1] if b_width > b_height: b[1] -= abs(b_width - b_height) // 2 b[3] += abs(b_width - b_height) // 2 elif b_width < b_height: b[0] -= abs(b_width - b_height) // 2 b[2] += abs(b_width - b_height) // 2 b[1] = b[1] if b[1] >= 0 else 0 b[3] = b[3] if b[3] < im_height else im_height - 1 b[0] = b[0] if b[0] >= 0 else 0 b[2] = b[2] if b[2] < im_width else im_width - 1 roi_image = np.copy(img_raw[b[1]:b[3], b[0]:b[2]]) box_w = abs(b[1] - b[3]) box_h = abs(b[0] - b[2]) # print(b[1], b[3]) # print(b[0], b[2]) # print(box_w, box_h) show_image = roi_image.copy() leftEyeCenter = (int(landms[di][0] - b[0]), int(landms[di][1] - b[1])) rightEyeCenter = (int(landms[di][2] - b[0]), int(landms[di][3] - b[1])) noseCenter = (int(landms[di][4] - b[0]), int(landms[di][5] - b[1])) mouth1 = (int(landms[di][6] - b[0]), int(landms[di][7] - b[1])) mouth2 = (int(landms[di][8] - b[0]), int(landms[di][9] - b[1])) cv2.circle(show_image, (leftEyeCenter[0], leftEyeCenter[1]), 3, (0, 255, 0), -1) cv2.circle(show_image, (rightEyeCenter[0], rightEyeCenter[1]), 3, (0, 255, 0), -1) cv2.circle(show_image, (noseCenter[0], noseCenter[1]), 3, (0, 255, 0), -1) cv2.circle(show_image, (mouth1[0], mouth1[1]), 3, (0, 255, 0), -1) cv2.circle(show_image, (mouth2[0], mouth2[1]), 3, (0, 255, 0), -1) # compute the angle between the eye centroids eye_dis = np.sqrt((leftEyeCenter[0] - rightEyeCenter[0])**2 + (leftEyeCenter[1] - rightEyeCenter[1])**2) # print('eye_dis:', eye_dis) if eye_dis < 16.0: angle = 0 else: dY = rightEyeCenter[1] - leftEyeCenter[1] dX = rightEyeCenter[0] - leftEyeCenter[0] angle = np.degrees(np.arctan2(dY, dX)) # print('angle:', angle) desiredLeftEye = (1.0, 1.0) desiredFaceWidth = roi_image.shape[1] desiredFaceHeight = roi_image.shape[0] # compute the desired right eye x-coordinate based on the # desired x-coordinate of the left eye desiredRightEyeX = 1.0 - desiredLeftEye[0] # determine the scale of the new resulting image by taking # the ratio of the distance between eyes in the *current* # image to the ratio of distance between eyes in the # *desired* image # dist = np.sqrt((dX ** 2) + (dY ** 2)) # desiredDist = (desiredRightEyeX - desiredLeftEye[0]) # desiredDist *= desiredFaceWidth # scale = desiredDist / dist scale = desiredFaceWidth / max(roi_image.shape[:2]) resize_roi_image = cv2.resize( roi_image, (int(roi_image.shape[1] * scale), int(roi_image.shape[0] * scale))) # cv2.imshow('resize_roi_image', resize_roi_image) # print(max(roi_image.shape)) # print(scale) # compute center (x, y)-coordinates (i.e., the median point) # between the two eyes in the input image eyesCenter = ((leftEyeCenter[0] + rightEyeCenter[0]) // 2, (leftEyeCenter[1] + rightEyeCenter[1]) // 2) # grab the rotation matrix for rotating and scaling the face M = cv2.getRotationMatrix2D(eyesCenter, angle, 1.0) # apply the affine transformation (w, h) = (desiredFaceWidth, desiredFaceHeight) aligned_image = cv2.warpAffine(roi_image, M, (w, h), flags=cv2.INTER_CUBIC) if box_h < box_w: padding_size = abs(box_w - box_h) // 2 aligned_image = cv2.copyMakeBorder(aligned_image, 0, 0, padding_size, padding_size, cv2.BORDER_CONSTANT, value=[0, 0, 0]) elif box_h > box_w: padding_size = abs(box_w - box_h) // 2 aligned_image = cv2.copyMakeBorder(aligned_image, padding_size, padding_size, 0, 0, cv2.BORDER_CONSTANT, value=[0, 0, 0]) new_image = cv2.resize(aligned_image, (112, 112), interpolation=cv2.INTER_AREA) new_path = filepath.replace(data_dir, target_dir) new_landmark_path = filepath.replace(data_dir, landmark_target_dir) new_path = new_path.replace(new_path.split('.')[-1], 'jpg') new_landmark_path = new_landmark_path.replace( new_landmark_path.split('.')[-1], 'jpg') # print(new_path) cv2.imwrite(new_path, new_image)
tic = time.time() loc, conf, landms = net(img) # forward pass print('net forward time: {:.4f}'.format(time.time() - tic)) priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds]
def detect_face(net, img): img = np.float32(img_raw) # testing scale target_size = args.long_side max_size = args.long_side im_shape = img.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) resize = float(target_size) / float(im_size_min) # prevent bigger axis from being more than max_size: if np.round(resize * im_size_max) > max_size: resize = float(max_size) / float(im_size_max) if args.origin_size: resize = 1 if resize != 1: img = cv2.resize(img, None, None, fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR) # print("\nimg shape resize: ", img.shape) im_height, im_width, _ = img.shape scale = torch.Tensor( [img.shape[1], img.shape[0], img.shape[1], img.shape[0]]) img -= (104, 117, 123) img = img.transpose(2, 0, 1) img = torch.from_numpy(img).unsqueeze(0) img = img.to(device) scale = scale.to(device) tic = time.time() loc, conf, landms = net(img) # forward pass # print('net forward time: {:.4f}'.format(time.time() - tic)) tic = time.time() priorbox = PriorBox(cfg, image_size=(im_height, im_width)) priors = priorbox.forward() priors = priors.to(device) prior_data = priors.data boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance']) boxes = boxes * scale / resize boxes = boxes.cpu().numpy() scores = conf.squeeze(0).data.cpu().numpy()[:, 1] landms = decode_landm(landms.data.squeeze(0), prior_data, cfg['variance']) scale1 = torch.Tensor([ img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2], img.shape[3], img.shape[2] ]) scale1 = scale1.to(device) landms = landms * scale1 / resize landms = landms.cpu().numpy() # ignore low scores inds = np.where(scores > args.confidence_threshold)[0] boxes = boxes[inds] landms = landms[inds] scores = scores[inds] # keep top-K before NMS order = scores.argsort()[::-1][:args.top_k] boxes = boxes[order] landms = landms[order] scores = scores[order] # do NMS dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False) keep = py_cpu_nms(dets, args.nms_threshold) # keep = nms(dets, args.nms_threshold,force_cpu=args.cpu) dets = dets[keep, :] landms = landms[keep] # keep top-K faster NMS dets = dets[:args.keep_top_k, :] landms = landms[:args.keep_top_k, :] dets = np.concatenate((dets, landms), axis=1) # print('post processing time: {:.4f}'.format(time.time() - tic)) return dets