def __init__(self, net='mtcnn', type='cuda'): cudnn.benchmark = True self.net = net self.device = torch.device(type) self.pnet = PNet().to(self.device) self.rnet = RNet().to(self.device) self.onet = ONet().to(self.device)
def create_mtcnn_net(p_model_path=None, r_model_path=None, o_model_path=None, use_cuda=True): """ 模型加载,默认使用cpu,正常使用GPU """ pnet, rnet, onet = None, None, None if p_model_path is not None: pnet = PNet(use_cuda=use_cuda) if(use_cuda): print('p_model_path:{0}'.format(p_model_path)) pnet.load_state_dict(torch.load(p_model_path)) pnet.cuda() else: pnet.load_state_dict(torch.load(p_model_path, map_location=lambda storage, loc: storage)) pnet.eval() if r_model_path is not None: rnet = RNet(use_cuda=use_cuda) if (use_cuda): print('r_model_path:{0}'.format(r_model_path)) rnet.load_state_dict(torch.load(r_model_path)) rnet.cuda() else: rnet.load_state_dict(torch.load(r_model_path, map_location=lambda storage, loc: storage)) rnet.eval() if o_model_path is not None: onet = ONet(use_cuda=use_cuda) if (use_cuda): print('o_model_path:{0}'.format(o_model_path)) onet.load_state_dict(torch.load(o_model_path)) onet.cuda() else: onet.load_state_dict(torch.load(o_model_path, map_location=lambda storage, loc: storage)) onet.eval() return pnet,rnet,onet
def create_mtcnn_net(self): ''' Create the mtcnn model ''' pnet, rnet, onet = None, None, None if len(self.args.pnet) > 0: pnet = PNet(use_cuda=self.use_gpu) if self.use_gpu: pnet.load_state_dict(torch.load(self.args.pnet)) pnet = torch.nn.DataParallel(pnet, device_ids=self.gpu_ids) else: pnet.load_state_dict( torch.load(self.args.pnet, map_location=lambda storage, loc: storage)) pnet.eval() if len(self.args.rnet) > 0: rnet = RNet(use_cuda=self.use_gpu) if self.use_gpu: rnet.load_state_dict(torch.load(self.args.rnet)) rnet = torch.nn.DataParallel(rnet, device_ids=self.gpu_ids) else: rnet.load_state_dict( torch.load(self.args.rnet, map_location=lambda storage, loc: storage)) rnet.eval() if len(self.args.onet) > 0: onet = ONet(use_cuda=self.use_gpu) if self.use_gpu: onet.load_state_dict(torch.load(self.args.onet)) onet = torch.nn.DataParallel(onet, device_ids=self.gpu_ids) else: onet.load_state_dict( torch.load(self.args.onet, map_location=lambda storage, loc: storage)) onet.eval() self.pnet_detector = pnet self.rnet_detector = rnet self.onet_detector = onet
""" for b in bounding_boxes: cv2.rectangle(img, (int(b[0]), int(b[1])), (int(b[2]), int(b[3])), (255, 255, 255), 1) for p in facial_landmarks: for i in range(5): cv2.circle(img, (int(p[i]), int(p[i + 5])), 1, (0, 255, 0), -1) return img # LOAD MODELS pnet = PNet().to(device) rnet = RNet().to(device) onet = ONet().to(device) if __name__ == '__main__': video_src = './mtcnn/video/1.mp4' # video source # video_src = 0 # camera device id capture = cv2.VideoCapture(video_src) if not capture.isOpened(): print('Camera is not opened!') else: idx_frame = 0 while True: ret, frame = capture.read() idx_frame += 1 if idx_frame % 2 != 0: continue idx_frame = 0
def detect_faces(image, min_face_size=20.0, thresholds=[0.6, 0.7, 0.8], nms_thresholds=[0.7, 0.7, 0.7]): """ Arguments: image: an instance of PIL.Image. min_face_size: a float number. thresholds: a list of length 3. nms_thresholds: a list of length 3. Returns: two float numpy arrays of shapes [n_boxes, 4] and [n_boxes, 10], bounding boxes and facial landmarks. """ with torch.no_grad(): # LOAD MODELS pnet = PNet().to(device) rnet = RNet().to(device) onet = ONet().to(device) onet.eval() # BUILD AN IMAGE PYRAMID width, height = image.size min_length = min(height, width) min_detection_size = 12 factor = 0.707 # sqrt(0.5) # scales for scaling the image scales = [] # scales the image so that # minimum size that we can detect equals to # minimum face size that we want to detect m = min_detection_size / min_face_size min_length *= m factor_count = 0 while min_length > min_detection_size: scales.append(m * factor**factor_count) min_length *= factor factor_count += 1 # STAGE 1 # it will be returned bounding_boxes = [] # run P-Net on different scales for s in scales: boxes = run_first_stage(image, pnet, scale=s, threshold=thresholds[0]) bounding_boxes.append(boxes) # collect boxes (and offsets, and scores) from different scales bounding_boxes = [i for i in bounding_boxes if i is not None] bounding_boxes = np.vstack(bounding_boxes) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # use offsets predicted by pnet to transform bounding boxes bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # shape [n_boxes, 5] bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 2 img_boxes = get_image_boxes(bounding_boxes, image, size=24) img_boxes = Variable(torch.FloatTensor(img_boxes).to(device)) output = rnet(img_boxes) offsets = output[0].data.cpu().numpy() # shape [n_boxes, 4] probs = output[1].data.cpu().numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # STAGE 3 img_boxes = get_image_boxes(bounding_boxes, image, size=48) if len(img_boxes) == 0: return [], [] img_boxes = Variable(torch.FloatTensor(img_boxes).to(device)) output = onet(img_boxes) landmarks = output[0].data.cpu().numpy() # shape [n_boxes, 10] offsets = output[1].data.cpu().numpy() # shape [n_boxes, 4] probs = output[2].data.cpu().numpy() # shape [n_boxes, 2] keep = np.where(probs[:, 1] > thresholds[2])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 1].reshape((-1, )) offsets = offsets[keep] landmarks = landmarks[keep] # compute landmark points width = bounding_boxes[:, 2] - bounding_boxes[:, 0] + 1.0 height = bounding_boxes[:, 3] - bounding_boxes[:, 1] + 1.0 xmin, ymin = bounding_boxes[:, 0], bounding_boxes[:, 1] landmarks[:, 0:5] = np.expand_dims( xmin, 1) + np.expand_dims(width, 1) * landmarks[:, 0:5] landmarks[:, 5:10] = np.expand_dims( ymin, 1) + np.expand_dims(height, 1) * landmarks[:, 5:10] bounding_boxes = calibrate_box(bounding_boxes, offsets) keep = nms(bounding_boxes, nms_thresholds[2], mode='min') bounding_boxes = bounding_boxes[keep] landmarks = landmarks[keep] return bounding_boxes, landmarks