def test_onet(inoutDir, outputDir, model): pnet, rnet, onet_jiang = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/" + model + ".pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet_jiang, min_face_size=24) files = os.listdir(inoutDir) i = 0 for image in files: i += 1 image = os.path.join(inoutDir, image) img = cv2.imread(image) img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) landmarks2_jiang = mtcnn_detector.detect_onet_raw(img) vis_face_test(img_bg, landmarks2_jiang, outputDir + model + "-" + str(i) + ".jpg") if i == 50: break
def gen_onet_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False): pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12) imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path) imdb = imagedb.load_imdb() image_reader = TestImageLoader(imdb, 1, False) all_boxes = list() batch_idx = 0 print('size:%d' % image_reader.size) for databatch in image_reader: if batch_idx % 50 == 0: print("%d images done" % batch_idx) im = databatch t = time.time() # pnet detection = [x1, y1, x2, y2, score, reg] p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im) # rnet detection boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align) if boxes_align is None: all_boxes.append(np.array([])) batch_idx += 1 continue if vis: rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) vision.vis_two(rgb_im, boxes, boxes_align) t1 = time.time() - t t = time.time() all_boxes.append(boxes_align) batch_idx += 1 save_path = './model_store' if not os.path.exists(save_path): os.mkdir(save_path) save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) with open(save_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) gen_onet_sample_data(data_dir, anno_file, save_file, prefix_path)
def gen_rnet_data(data_dir, anno_dir, pnet_model_file, use_cuda=True): ''' Generate the train data of RNet with trained-PNet ''' # load trained pnet model pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12) # load original_anno_file, length = 12880 anno_file = os.path.join(anno_dir, 'anno_store/wide_anno_train.txt' ) # TODO :: [local_wide_anno, wide_anno_train] imagedb = ImageDB(anno_file, mode='test', prefix_path='') imdb = imagedb.load_imdb() image_reader = TestImageLoader(imdb, 1, False) print('size:%d' % image_reader.size) batch_idx, all_boxes = 0, list() for databatch in image_reader: if (batch_idx + 1) % 100 == 0: print("%d images done" % (batch_idx + 1)) im = databatch # obtain boxes and aligned boxes boxes_align = mtcnn_detector.detect_pnet(im=im) # Time costly if boxes_align is None: all_boxes.append(np.array([])) batch_idx += 1 continue # if vis: # rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) # vision.vis_two(rgb_im, boxes[:100, :], boxes_align[:100, :]) all_boxes.append(boxes_align) batch_idx += 1 save_path = os.path.join(anno_dir, 'rnet') if not os.path.exists(save_path): os.mkdir(save_path) save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) with open(save_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) gen_rnet_sample_data(data_dir, anno_dir, save_file)
def index(): pnet, rnet, onet = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) #get_json=flask.request.get_json(force=True) print(request.data) base_data = request.json['image'] img = base64_to_image(base_data) bboxs, landmarks = mtcnn_detector.detect_face(img) #初始化一下json res = {} faces = {} if bboxs.shape[0] < 1: res["success"] = False res["faces_detected"] = faces return flask.jsonify(res) else: res["success"] = True #这里开始处理一幅图中有多个人脸的情况 for i in range(bboxs.shape[0]): x1 = int(bboxs[i][0]) x2 = int(bboxs[i][2]) y1 = int(bboxs[i][1]) y2 = int(bboxs[i][3]) face = img[y1:y2, x1:x2] face_name = "face_" + str(i) return_base64 = image_to_base64(face) faces[face_name] = return_base64 res["faces_detected"] = faces return flask.jsonify(res)
def test(inoutDir, outputDir, model): # 原模型的P,R,net + 自行训练后的Onet,展示并保存检测后的图片 pnet, rnet, onet_jiang = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/" + model + ".pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet_jiang, min_face_size=24) files = os.listdir(inoutDir) i = 0 for image in files: i += 1 image = os.path.join("./lfpw_test/", image) img = cv2.imread(image) img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) bboxs, landmarks1 = mtcnn_detector.detect_face(img) # 原始的图片用原始网络检测 vis_face(img_bg, bboxs, landmarks1, outputDir + model + "-" + str(i) + ".jpg") # 保存图片
def detect(self): pnet, rnet, onet = create_mtcnn_net(p_model_path=self.p_net_m, r_model_path=self.r_net_m, o_model_path=self.o_net_m, use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=[0.1, 0.1, 0.1]) event_list = os.listdir(self.image_dir) for event in event_list: print(event) event_dir = os.path.join(self.image_dir, event) res_dir = os.path.join(self.result_dir, event) if not os.path.exists(res_dir): os.makedirs(res_dir) images_list = os.listdir(event_dir) for images in images_list: images_path = os.path.join(event_dir, images) img = cv2.imread(images_path) bboxs, landmarks = mtcnn_detector.detect_face(img) if bboxs.shape[0] != 0: bboxs[:, 2] = bboxs[:, 2] - bboxs[:, 0] bboxs[:, 3] = bboxs[:, 3] - bboxs[:, 1] bboxs[:, :4] = np.round(bboxs[:, :4]) """ print(bboxs) save_name = 'r_304.jpg' vis_face(img,bboxs,landmarks, save_name) """ fpath = os.path.join(res_dir, images[:-4] + '.txt') f = open(fpath, 'w') f.write(images[:-4] + '\n') f.write(str(bboxs.shape[0]) + '\n') for i in range(bboxs.shape[0]): f.write('{:.0f} {:.0f} {:.0f} {:.0f} {:.3f}\n'.format( bboxs[i, 0], bboxs[i, 1], bboxs[i, 2], bboxs[i, 3], bboxs[i, 4])) f.close()
import cv2 from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector from mtcnn.core.vision import vis_face if __name__ == '__main__': #original model """ p_model_path = "./original_model/pnet_epoch.pt" r_model_path = "./original_model/rnet_epoch.pt" o_model_path = "./original_model/onet_epoch.pt" """ #trained model p_model_path = "./original_model/pnet_epoch_train.pt" r_model_path = "./original_model/rnet_epoch_train.pt" o_model_path = "./original_model/onet_epoch_train.pt" pnet, rnet, onet = create_mtcnn_net(p_model_path=p_model_path, r_model_path=r_model_path, o_model_path=o_model_path, use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=[0.6, 0.7, 0.7]) img = cv2.imread("1.jpg") img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) bboxs, landmarks = mtcnn_detector.detect_face(img) # print box_align save_name = 'r_1.jpg' vis_face(img_bg, bboxs, landmarks, save_name)
class VisSingleCase(object): def __init__(self, args): self.model = MtcnnDetector(args) def _fliter_doc_bbox(self, bboxes, landmarks): ''' Filter the face_box on card ''' area = (bboxes[:, 2] - bboxes[:, 0] + 1) * (bboxes[:, 3] - bboxes[:, 1] + 1) * -1 area_index = area.argsort() bbox = bboxes[area_index[0]] landmark = landmarks[area_index[0]] #prob_index = (bboxes[-1]*-1).argsort() # assist # if bboxes.shape[0] == 2 or area_index[0] == prob_index[0]: # bbox = bboxes[area_index[0]] # landmark = landmark[area_index[0]] return bbox, landmark def _fetch_block(self, bbox, landmark, lmk_flag=True): ''' Crop the chin_block of the detected face ''' if lmk_flag: landmark = landmark.reshape(-1, 2) left_down = landmark[6] # default : 6 nose_point = landmark[33] # point-34 | nose right_down = landmark[10] # default : 10 chin_point = landmark[8] # point-9 | chin x1, y1 = int(left_down[0]), int(nose_point[1]) x2, y2 = int(right_down[0]), int(chin_point[1]) else: x1, y1 = int(bbox[0]), int(bbox[1]) x2, y2 = int(bbox[2]), int(bbox[3]) return (x1, y1, x2, y2) def _vis_result(self, img, bbox, landmark): ''' Visual the detect-result and block ''' pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) draw = ImageDraw.Draw(pil_img) block = self._fetch_block(bbox, landmark) draw.rectangle([(bbox[0], bbox[1]), (bbox[2], bbox[3])], outline='yellow') draw.rectangle([(block[0], block[1]), (block[2], block[3])], outline='red') pts = landmark.reshape((68, 2)) for idx in range(68): point = (pts[idx, 0] - 1, pts[idx, 1] - 1, pts[idx, 0] + 1, pts[idx, 1] + 1) draw.ellipse(point, fill=None, outline='green') pil_img.show() def check_case(self, img_path): ''' Check the single case ''' try: img = cv2.imread(img_path) bboxes, landmarks = self.model.detect_face(img, verbose=False) print((img_path, bboxes.shape)) except Exception as e: print(e) else: if bboxes.shape[0] == 0: print('No face detected in %s' % img_path) else: bbox, landmark = self._fliter_doc_bbox(bboxes, landmarks) self._vis_result(img, bbox, landmark)
for j in range(point_nums): cv2.circle(frame, (int(landmarks_one[j, 0]), int(landmarks_one[j, 1])), 2, (255, 0, 0), -1) out.write(frame) else: break cap.release() out.release() cv2.destroyAllWindows() if __name__ == '__main__': point_nums = 24 threshold = [0.6, 0.7, 0.7] # [0.99, 0.1, 0.6] # pnet, rnet, onet = create_mtcnn_net( p_model_path=r'model_store/final/pnet_epoch_19.pt', r_model_path=r'model_store/final/rnet_epoch_7.pt', o_model_path=r'model_store/final/onet_epoch_92.pt', use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=threshold) videos_root_path = 'test_video/' save_path_root = 'result_video' detect_video(mtcnn_detector, videos_root_path, save_path_root)
import argparse from mtcnn.core.detect import MtcnnDetector,create_mtcnn_net import cv2 import time from mtcnn.config import * pnet, rnet, onet = create_mtcnn_net(p_model_path=PNET_MODEL_PATH,r_model_path=RNET_MODEL_PATH, o_model_path=ONET_MODEL_PATH,use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=48) cap = cv2.VideoCapture(0) f = 0 stime = time.time() while (True): ret, frame = cap.read() # 读取一帧的图像 frame = cv2.resize(frame,(480,360)) # frame = cv2.imread('face.jpg') boxes, boxes_align = mtcnn_detector.detect_pnet(im=frame) rboxes, rboxes_align = mtcnn_detector.detect_rnet(im=frame, dets=boxes_align) # oboxes,olandmark = mtcnn_detector.detect_onet(im=frame,dets=rboxes_align) if rboxes_align is not None: for box in rboxes_align: cv2.rectangle(frame, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (255, 0, 128), 2) if f % 20 == 0: fps = int(20/(time.time()-stime)) f = 0 stime = time.time() cv2.putText(frame, '{:d}fps'.format(fps), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.8,(255, 0, 255), 2) cv2.imshow('Face Recognition', frame)
def __init__(self, args): self.model = MtcnnDetector(args)
# parser.add_argument('--onet_file', type=str, default='model/onet.pt') # parser.add_argument('--onet_file', type=str, default='model/onet_v1/onet_epoch_7.pt') # parser.add_argument('--onet_file', type=str, default='model/onet_v2/onet_epoch_6.pt') parser.add_argument('--onet_file', type=str, default='model/onet_v3/onet_epoch_10.pt') parser.add_argument('--use_cuda', type=bool, default=True) # TODO parser.add_argument('--gpu_ids', type=list, default=[0, 1]) # TODO parser.add_argument('--prob_thres', type=list, default=[0.6, 0.7, 0.7]) args = parser.parse_args() return args if __name__ == '__main__': imglists = [s.split('.')[0] for s in os.listdir('aku_imgs/')] mtcnn_detector = MtcnnDetector(parse_args()) for img_name in imglists: try: img = cv2.imread('aku_imgs/%s.jpg' % img_name) bboxs, landmarks = mtcnn_detector.detect_face(img, verbose=False) save_name = 'result/r3_%s.jpg' % img_name print('save img name : %s' % save_name) visual_face(img, bboxs, landmarks, save_name) except Exception as e: print(e)
cv2.circle(frame,(int(landmarks_one[j,0]),int(landmarks_one[j,1])),radius=2,color=(0, 0, 255)) def save_face(image, tag, num): # DATA_TRAIN为抓取的人脸存放目录 DATA_TRAIN = './Data/FaceID' img_name = os.path.join(DATA_TRAIN, str(tag), '{}_{}.jpg'.format(int(time.time()), num)) # 保存人脸图像到指定的位置, 其中会创建一个tag对应的目录,用于后面的分类训练 image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) cv2.imwrite(img_name, image) if __name__ == '__main__': pnet, rnet, onet = create_mtcnn_net(p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) window_name = 'main' camera_idx = 0 cv2.namedWindow(window_name) # 视频来源,可以来自一段已存好的视频,也可以直接来自摄像头 cap = cv2.VideoCapture(camera_idx, cv2.CAP_DSHOW) while cap.isOpened(): # 读取一帧数据 ok, frame = cap.read() if not ok: break # 抓取人脸的方法, 后面介绍 catch_face(frame) # 输入'q'退出程序 cv2.imshow(window_name, frame)
def gen_landmark48_data(data_dir, anno_file, pnet_model_file, rnet_model_file, prefix_path='', use_cuda=True, vis=False): anno_file = os.path.join(data_dir, anno_file) pnet, rnet, _ = create_mtcnn_net(p_model_path=pnet_model_file, r_model_path=rnet_model_file, use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, min_face_size=12) imagedb = ImageDB(anno_file, mode="test", prefix_path=os.path.join(data_dir, 'img')) imdb = imagedb.load_imdb() image_reader = TestImageLoader(imdb, 1, False) all_boxes = list() batch_idx = 0 for databatch in image_reader: if batch_idx % 500 == 0: print("%d images done" % batch_idx) im = databatch if im.shape[0] >= 1200 or im.shape[1] >= 1200: all_boxes.append(np.array([])) batch_idx += 1 continue t = time.time() p_boxes, p_boxes_align = mtcnn_detector.detect_pnet(im=im) boxes, boxes_align = mtcnn_detector.detect_rnet(im=im, dets=p_boxes_align) if boxes_align is None: all_boxes.append(np.array([])) batch_idx += 1 continue if vis: rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) vision.vis_two(rgb_im, boxes, boxes_align) t1 = time.time() - t t = time.time() all_boxes.append(boxes_align) batch_idx += 1 save_path = config.MODEL_STORE_DIR if not os.path.exists(save_path): os.mkdir(save_path) save_file = os.path.join(save_path, "detections_celeba.pkl") with open(save_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
import cv2 from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector from mtcnn.core.vision import vis_face import warnings warnings.filterwarnings("ignore") if __name__ == '__main__': use_cuda = True pnet, rnet, onet = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) img = cv2.imread("./img/part2_002268.jpg") img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #b, g, r = cv2.split(img) #img2 = cv2.merge([r, g, b]) bboxs, landmarks = mtcnn_detector.detect_face(img) print(bboxs) # print box_align save_file = './img/result.jpg' bboxs = mtcnn_detector.box_expand(bboxs, 0.3, 0.25) vis_face(img_bg, bboxs, landmarks, save_file)
import cv2 from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector from mtcnn.core.vision import vis_face if __name__ == '__main__': pnet, rnet, onet = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/onet_epoch.pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24) img = cv2.imread("./s_l.jpg") img_bg = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # b, g, r = cv2.split(img) # img2 = cv2.merge([r, g, b]) bboxs, landmarks = mtcnn_detector.detect_face(img) # print box_align save_name = 'r_4.jpg' vis_face(img_bg, bboxs, landmarks, save_name)
def gen_rnet_data(data_dir, anno_file, pnet_model_file, prefix_path='', use_cuda=True, vis=False): """ :param data_dir: train data :param anno_file: :param pnet_model_file: :param prefix_path: :param use_cuda: :param vis: :return: """ # load trained pnet model pnet, _, _ = create_mtcnn_net(p_model_path=pnet_model_file, use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, min_face_size=12) # load original_anno_file, length = 12880 imagedb = ImageDB(anno_file, mode="test", prefix_path=prefix_path) imdb = imagedb.load_imdb() image_reader = TestImageLoader(imdb, 1, False) all_boxes = list() batch_idx = 0 print('size:%d' % image_reader.size) for databatch in image_reader: if batch_idx % 100 == 0: print("%d images done" % batch_idx) im = databatch t = time.time() # obtain boxes and aligned boxes boxes, boxes_align = mtcnn_detector.detect_pnet(im=im) if boxes_align is None: all_boxes.append(np.array([])) batch_idx += 1 continue if vis: rgb_im = cv2.cvtColor(np.asarray(im), cv2.COLOR_BGR2RGB) vision.vis_two(rgb_im, boxes, boxes_align) t1 = time.time() - t t = time.time() all_boxes.append(boxes_align) batch_idx += 1 # if batch_idx == 100: # break # print("shape of all boxes {0}".format(all_boxes)) # time.sleep(5) # save_path = model_store_path() # './model_store' save_path = './model_store' if not os.path.exists(save_path): os.mkdir(save_path) save_file = os.path.join(save_path, "detections_%d.pkl" % int(time.time())) with open(save_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) gen_rnet_sample_data(data_dir, anno_file, save_file, prefix_path)
def dete_picture(): eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"} point_nums = 24 threshold = [0.6, 0.7, 0.7] data_trans = Transforms.Compose([ Transforms.Resize((24, 24)), Transforms.ToTensor(), Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)), # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)), ]) mixnet = MixNet(input_size=(24, 24), num_classes=3) # eye_class_dict = {0:"open_eye",1:"close_eye"} # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth") weight_dict = torch.load( "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/relabel_mix_24_24_20210302/Mixnet_epoch_59.pth" ) new_state_dict = OrderedDict() for k, v in weight_dict.items(): name = k[7:] new_state_dict[name] = v mixnet.load_state_dict(new_state_dict) # stat(net,(3,48,48)) mixnet.to('cuda:0') mixnet.eval() pnet, rnet, onet = create_mtcnn_net( p_model_path=r'model_store/final/pnet_epoch_19.pt', r_model_path=r'model_store/final/rnet_epoch_7.pt', o_model_path=r'model_store/final/onet_epoch_92.pt', use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=threshold) img_file = "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/test_video/caiji_0123" img_save = "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/relabel_img_result_adma_01" img_path = [ os.path.join(img_file, file_name) for file_name in glob.glob(os.path.join(img_file, "*.jpg")) ] # videos_root_path = 'test_video/DMS_RAW_Nebula_20201201-143038_518.mp4' # save_path_root = 'result_video/24_24_DMS_RAW_Nebula_20201201-143038_518.avi' # cap = cv2.VideoCapture(videos_root_path) # fourcc = cv2.VideoWriter_fourcc(*'XVID') # fps = cap.get(cv2.CAP_PROP_FPS) # size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) # tpa # fname = os.path.splitext(os.path.split(tpa)[1])[0] # save_path = os.path.join("/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)",fname+".avi") # out = cv2.VideoWriter(save_path_root, fourcc, fps, size) for img_p in tqdm(img_path): frame = cv2.imread(img_p) copy_frame = frame.copy() left_right_eye = [] bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame, rgb=True) if landmarks is not None: for i in range(landmarks.shape[0]): landmarks_one = landmarks[i, :] landmarks_one = landmarks_one.reshape((point_nums, 2)) left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :]) xmin = np.min(left_eye[:, 0]) ymin = np.min(left_eye[:, 1]) xmax = np.max(left_eye[:, 0]) ymax = np.max(left_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :]) xmin = np.min(right_eye[:, 0]) ymin = np.min(right_eye[:, 1]) xmax = np.max(right_eye[:, 0]) ymax = np.max(right_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) for j in [*left_eye, *right_eye]: cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0), -1) crop_img = [] for xmin, ymin, xmax, ymax in left_right_eye: w, h = xmax - xmin, ymax - ymin # 随机扩展大小0.05-0.15 k = 0.1 ratio = h / w if ratio > 1: ratio = ratio - 1 xmin -= (ratio / 2 * w + k * h) ymin -= (k * h) xmax += (ratio / 2 * w + k * h) ymax += (k * h) else: ratio = w / h - 1 xmin -= (k * w) ymin -= (ratio / 2 * h + k * w) xmax += (k * w) ymax += (ratio / 2 * h + k * w) cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 2) temp_img = copy_frame[int(ymin):int(ymax), int(xmin):int(xmax)] # temp_img = cv2.resize(temp_img,(24,24)) crop_img.append(temp_img) if len(crop_img) < 2: img_name = os.path.split(img_p)[-1] cv2.imwrite(os.path.join(img_save, img_name), frame) # out.write(frame) continue # compose_img = np.hstack((crop_img[0],crop_img[1])) result_buff = [] score_buff = [] for i in crop_img: i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) compose_img = Image.fromarray(i) img = data_trans(compose_img) img = img.unsqueeze(0) with torch.no_grad(): outputs = mixnet(img.to('cuda:0')) spft_max = torch.nn.functional.softmax(outputs, dim=1) score_buff.append(spft_max.cpu().numpy()) # 0,1->data,id score, result = torch.max(spft_max, 1) result_buff.append([result.item(), score]) bias = 30 eye_bias = 100 for i in range(2): t_result = result_buff[i][0] if 0 == t_result: # eye_class = "close_eye" # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \ # ,thickness=2) eye_class = "open_eye:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \ ,thickness=2) elif 1 == t_result: # eye_class = "open_eye" # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \ # ,thickness=2) eye_class = "close_eye:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \ ,thickness=2) else: eye_class = "other:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \ ,thickness=2) # bias += 30 eye_bias = 0 # left_eye left_eye_open, left_eye_close, left_eye_other = score_buff[0][ 0] cv2.putText(frame,"left_open:{:.2f}".format(left_eye_open) ,(10, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_close:{:.2f}".format(left_eye_close) ,(10, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_other:{:.2f}".format(left_eye_other) ,(10, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) #right_eye right_eye_open, right_eye_close, right_eye_other = score_buff[ 1][0] cv2.putText(frame,"left_open:{:.2f}".format(right_eye_open) ,(200, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_close:{:.2f}".format(right_eye_close) ,(200, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_other:{:.2f}".format(right_eye_other) ,(200, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) # eye_class = "open_eye" if 0 in t_result else "close_eye" img_name = os.path.split(img_p)[-1] cv2.imwrite(os.path.join(img_save, img_name), frame)
def show_with_camera(): eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"} point_nums = 24 threshold = [0.6, 0.7, 0.7] data_trans = Transforms.Compose([ Transforms.Resize((24, 24)), Transforms.ToTensor(), Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)), # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)), ]) mixnet = MixNet(input_size=(24, 24), num_classes=3) # eye_class_dict = {0:"open_eye",1:"close_eye"} # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth") weight_dict = torch.load( "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/mix_mbhk_change_signal_eye_24_24/Mixnet_epoch_59.pth" ) new_state_dict = OrderedDict() for k, v in weight_dict.items(): name = k[7:] new_state_dict[name] = v mixnet.load_state_dict(new_state_dict) # stat(net,(3,48,48)) mixnet.to('cuda:0') mixnet.eval() pnet, rnet, onet = create_mtcnn_net( p_model_path=r'model_store/final/pnet_epoch_19.pt', r_model_path=r'model_store/final/rnet_epoch_7.pt', o_model_path=r'model_store/final/onet_epoch_92.pt', use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=threshold) videos_root_path = 'test_video/20200506143954001_0.avi' save_path_root = 'result_video/camera_test_20210301.avi' cap = cv2.VideoCapture(0) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) # tpa # fname = os.path.splitext(os.path.split(tpa)[1])[0] # save_path = os.path.join("/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)",fname+".avi") out = cv2.VideoWriter(save_path_root, fourcc, fps, size) while True: ret, frame = cap.read() if ret: copy_frame = frame.copy() left_right_eye = [] bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame, rgb=True) if landmarks is not None: for i in range(landmarks.shape[0]): landmarks_one = landmarks[i, :] landmarks_one = landmarks_one.reshape((point_nums, 2)) left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :]) xmin = np.min(left_eye[:, 0]) ymin = np.min(left_eye[:, 1]) xmax = np.max(left_eye[:, 0]) ymax = np.max(left_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :]) xmin = np.min(right_eye[:, 0]) ymin = np.min(right_eye[:, 1]) xmax = np.max(right_eye[:, 0]) ymax = np.max(right_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) for j in [*left_eye, *right_eye]: cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0), -1) crop_img = [] for xmin, ymin, xmax, ymax in left_right_eye: w, h = xmax - xmin, ymax - ymin # 随机扩展大小0.05-0.15 k = 0.1 ratio = h / w if ratio > 1: ratio = ratio - 1 xmin -= (ratio / 2 * w + k * h) ymin -= (k * h) xmax += (ratio / 2 * w + k * h) ymax += (k * h) else: ratio = w / h - 1 xmin -= (k * w) ymin -= (ratio / 2 * h + k * w) xmax += (k * w) ymax += (ratio / 2 * h + k * w) cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 2) temp_img = copy_frame[int(ymin):int(ymax), int(xmin):int(xmax)] # temp_img = cv2.resize(temp_img,(24,24)) crop_img.append(temp_img) frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) if len(crop_img) < 2: cv2.imshow("test", frame) tget_in = cv2.waitKey(10) # print(ord('q'),tget_in) if tget_in == ord('q'): print("get out") break out.write(frame) continue # compose_img = np.hstack((crop_img[0],crop_img[1])) t_result = [] for i in crop_img: i = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY) i = cv2.cvtColor(i, cv2.COLOR_GRAY2RGB) compose_img = Image.fromarray(i) img = data_trans(compose_img) img = img.unsqueeze(0) with torch.no_grad(): outputs = mixnet(img.to('cuda:0')) result = torch.max(outputs, 1)[1] t_result.append(result.item()) if 0 in t_result: eye_class = "open_eye" cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \ ,thickness=2) elif 1 in t_result: eye_class = "close_eye" cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \ ,thickness=2) else: eye_class = "other" cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,0,255) \ ,thickness=2) cv2.imshow("test", frame) tget_in = cv2.waitKey(10) if tget_in == ord('q'): print("get out") break # eye_class = "open_eye" if 0 in t_result else "close_eye" # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \ # if 0 in t_result else (255,255,0),thickness=2) out.write(frame) else: print("finish") break
def dete_signal_video(): eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"} point_nums = 24 threshold = [0.6, 0.7, 0.7] data_trans = Transforms.Compose([ Transforms.Resize((24, 24)), Transforms.ToTensor(), Transforms.Normalize((0.45, 0.448, 0.455), (0.082, 0.082, 0.082)), # Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)), ]) mixnet = MixNet(input_size=(24, 24), num_classes=3) # eye_class_dict = {0:"open_eye",1:"close_eye"} # weight_dict = torch.load("weight/signal_eye/Mixnet_epoch_29.pth") weight_dict = torch.load( "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/weight/relabel_04_mix_SGD_mutillabel_24_24_20210302/Mixnet_epoch_49.pth" ) new_state_dict = OrderedDict() for k, v in weight_dict.items(): name = k[7:] new_state_dict[name] = v mixnet.load_state_dict(new_state_dict) # stat(net,(3,48,48)) mixnet.to('cuda:0') mixnet.eval() pnet, rnet, onet = create_mtcnn_net( p_model_path=r'model_store/final/pnet_epoch_19.pt', r_model_path=r'model_store/final/rnet_epoch_7.pt', o_model_path=r'model_store/final/onet_epoch_92.pt', use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=threshold) videos_root_path = 'test_video/hhh/02_65_6504_0_be4ba2aeac264ed992aae74c15b91b18.mp4' save_path_root = 'result_video/debug_test.avi' cap = cv2.VideoCapture(videos_root_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) # tpa fname = os.path.splitext(os.path.split(videos_root_path)[1])[0] save_path = os.path.join( "/media/omnisky/D4T/JSH/faceFenlei/Projects/hul_eye_class/result_video/data(2)", fname + ".avi") out = cv2.VideoWriter(save_path_root, fourcc, fps, size) while True: ret, frame = cap.read() if ret: copy_frame = frame.copy() left_right_eye = [] bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame, rgb=True) temp_path, trmp_name = os.path.split(save_path) # trmp_name = os.path.splitext(trmp_name)[0] + "{:04d}.jpg".format(img_count) # tsave_path = os.path.join(temp_path, trmp_name) if landmarks is not None: eye_wild_buf = [] for i in range(landmarks.shape[0]): landmarks_one = landmarks[i, :] landmarks_one = landmarks_one.reshape((point_nums, 2)) left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :]) xmin = np.min(left_eye[:, 0]) ymin = np.min(left_eye[:, 1]) xmax = np.max(left_eye[:, 0]) ymax = np.max(left_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :]) xmin = np.min(right_eye[:, 0]) ymin = np.min(right_eye[:, 1]) xmax = np.max(right_eye[:, 0]) ymax = np.max(right_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) #绘制眼睛点 # for j in [*left_eye,*right_eye]: # cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0), -1) crop_img = [] for xmin, ymin, xmax, ymax in left_right_eye: w, h = xmax - xmin, ymax - ymin # 随机扩展大小0.05-0.15 k = 0.1 ratio = h / w if ratio > 1: ratio = ratio - 1 xmin -= (ratio / 2 * w + k * h) ymin -= (k * h) xmax += (ratio / 2 * w + k * h) ymax += (k * h) else: ratio = w / h - 1 xmin -= (k * w) ymin -= (ratio / 2 * h + k * w) xmax += (k * w) ymax += (ratio / 2 * h + k * w) eye_wild_buf.append(w) cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 1) # 输出眼睛像素的长宽 temp_img = copy_frame[int(ymin):int(ymax), int(xmin):int(xmax)] # temp_img = cv2.resize(temp_img,(24,24)) crop_img.append(temp_img) if len(crop_img) < 2: cv2.imwrite(tsave_path, frame) # out.write(frame) continue # compose_img = np.hstack((crop_img[0],crop_img[1])) result_buff = [] score_buff = [] for i in crop_img: i = cv2.cvtColor(i, cv2.COLOR_BGR2RGB) t1 = time.time() compose_img = Image.fromarray(i) img = data_trans(compose_img) img = img.unsqueeze(0) with torch.no_grad(): outputs = mixnet(img.to('cuda:0')) spft_max = torch.nn.functional.softmax(outputs, dim=1) # 左眼右眼,分别三个类别的分数 score_buff.append(spft_max.cpu().numpy()) # 0,1->data,id score, result = torch.max(spft_max, 1) # result:最大值的id score:最大值的分数 result_buff.append([result.item(), score]) run_time = time.time() - t1 #0.005819 bias = 30 eye_bias = 100 for i in range(2): t_result = result_buff[i][0] #眼睛抠图的宽度 eye_w = eye_wild_buf[i] cv2.putText(frame,"w:{}".format(int(eye_w)),(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-50),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \ ,thickness=2) if 0 == t_result: # eye_class = "close_eye" # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(0,255,0) \ # ,thickness=2) eye_class = "open_eye:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,255) \ ,thickness=2) elif 1 == t_result: # eye_class = "open_eye" # cv2.putText(frame,eye_class,(int(xmax), int(ymax)-20),cv2.FONT_HERSHEY_COMPLEX,1.0,(255,0,255) \ # ,thickness=2) eye_class = "close_eye:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \ ,thickness=2) else: eye_class = "other:{:.2f}".format( result_buff[i][1].cpu().item()) cv2.putText(frame,eye_class,(int(left_right_eye[i][0])-eye_bias, int(left_right_eye[i][1])-bias),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \ ,thickness=2) # bias += 30 eye_bias = 0 # left_eye left_eye_open, left_eye_close, left_eye_other = score_buff[0][ 0] cv2.putText(frame,"left_open:{:.2f}".format(left_eye_open) ,(10, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_close:{:.2f}".format(left_eye_close) ,(10, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_other:{:.2f}".format(left_eye_other) ,(10, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) #right_eye right_eye_open, right_eye_close, right_eye_other = score_buff[ 1][0] cv2.putText(frame,"left_open:{:.2f}".format(right_eye_open) ,(200, 20),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_close:{:.2f}".format(right_eye_close) ,(200, 40),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) cv2.putText(frame,"left_other:{:.2f}".format(right_eye_other) ,(200, 60),cv2.FONT_HERSHEY_COMPLEX,0.6,(20,150,0) \ ,thickness=2) # 计算最大概率的标号 max_id,max_score = (result_buff[0][0],result_buff[0][1].cpu().item()) if \ result_buff[0][1].cpu().item()>result_buff[1][1].cpu().item() else (result_buff[1][0],result_buff[1][1].cpu().item()) # 测试信息 eye_wild_buf_info = "w:[{:.2f},{:.2f}]".format( eye_wild_buf[0], eye_wild_buf[1]) # 测试时那个眼镜框最大 max_wilde_left_right = 0 if eye_wild_buf[0] > eye_wild_buf[1] else 1 # 获得最大宽度框的id和分数 # 宽度最大的 id 和分数 宽度第二大的 id和分数 max_wilde_id,max_wilde_score,max_wiled_second_id,max_wilde_second_score = (result_buff[0][0],result_buff[0][1].cpu().item(),result_buff[1][0],result_buff[1][1].cpu().item()) if \ max_wilde_left_right==0 else (result_buff[1][0],result_buff[1][1].cpu().item(),result_buff[0][0],result_buff[0][1].cpu().item()) score_buff_info = "score:[left: {:.2f}] [right: {:.2f}]".format( score_buff[0][0][2], score_buff[1][0][2]) cv2.putText(frame,eye_wild_buf_info,(400,80),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \ ,thickness=2) cv2.putText(frame,score_buff_info,(400,100),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \ ,thickness=2) # 如果 # if np.any(np.array(eye_wild_buf[:2])<19.0 )and max_score < 0.9 or np.any(np.array(eye_wild_buf[:2])<17.0 ) or np.any(np.array([score_buff[0][0][2],score_buff[1][0][2]])>= 0.5) and \ # max_score<0.9 or max_id==2: # 添加最大框 概率最大id=2 宽度最大的id=2 # if (eye_wild_buf[max_wilde_left_right]<17.0 ) or ((max_wilde_score>= 0.5) and \ # max_wilde_id==2 and max_wilde_second_score<0.85) or max_id==2 and (max_wilde_score < 0.8 and max_wilde_id != 2) or (max_id==2 and max_wilde_id == 2 and(max_wilde_second_score<0.8) ) or \ # (max_wilde_id == 2 and max_wiled_second_id==2 and (max_wilde_second_score>0.5 or max_wilde_score>0.5)) or ( eye_wild_buf[ 0 if max_wilde_left_right else 1]<17.0 ) or \ # ((eye_wild_buf[ 0 if max_wilde_left_right else 1]>23 and max_wilde_second_score>0.8 and max_wilde_id==2) or \ # (eye_wild_buf[max_wilde_left_right]>23 and max_wilde_score >0.8 and max_wiled_second_id==2)): # 左眼右眼宽度大于23 且概率大于0.8 且id=2 # 存在小于17像素的框且最大宽度的分数小于0.8 # 存在other概率大于0.5 # 存在小于10像素直接判断为other if ((eye_wild_buf[ 0 if max_wilde_left_right else 1]>23 and max_wilde_second_score>0.8 and max_wiled_second_id==2) or \ (eye_wild_buf[ max_wilde_left_right]>23 and max_wilde_score >0.8 and max_wilde_score==2) or \ (np.any(np.array(eye_wild_buf[:2])<17.0) and (max_wilde_score<0.8)) or ((max_wilde_id==2 and max_wilde_score>0.5 and max_wilde_second_score<0.9) or (max_wiled_second_id==2 and max_wilde_second_score>0.5 and max_wilde_score<0.9)) or\ (np.any(np.array(eye_wild_buf[:2])<10.0)) ): # 如果像素小于19且最大概率的眼睛小于0.9 或 任何一个像素小于12 且 max分数小于0.9 或 other # 2.任意一个other>=50 cv2.putText(frame,"other",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,0,255) \ ,thickness=2) # elif np.any(np.array([score_buff[0][0][1],score_buff[1][0][1]])>= 0.85) \ # or (max_id==1 and max_score>0.750): elif (max_wilde_id==1 and max_wilde_score>=0.80) \ or (max_id==1 and max_score>0.750): # elif (max_wilde_score >= 0.85) and max_wilde_id==1 \ # or (max_wilde_id==1 and max_wilde_score>0.750): # 任意一个闭眼概率大于0.9 # 最大值是闭眼且概率大于0.75 cv2.putText(frame,"close",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(0,255,0) \ ,thickness=2) else: cv2.putText(frame,"open",(400,60),cv2.FONT_HERSHEY_COMPLEX,0.6,(255,0,0) \ ,thickness=2) # cv2.imshow("frame",frame) out.write(frame) else: print("finish") break
def test_Onet_without_PRnet(self, annotation, outputDir, test_moudel, xxyy, savePic): imagedb = ImageDB(annotation) gt_imdb = imagedb.load_imdb() pnet, rnet, onet_jiang = create_mtcnn_net( p_model_path="./original_model/pnet_epoch.pt", r_model_path="./original_model/rnet_epoch.pt", o_model_path="./original_model/" + test_moudel + ".pt", use_cuda=False) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet_jiang, min_face_size=24) test_data = TrainImageReader(gt_imdb, 48, batch_size=100, shuffle=False) # 读入1个batch的数据 # train_data.reset() total_errors = 0 cnt = 0 for i, (images, (gt_labels, gt_bboxes, gt_landmarks)) in enumerate(test_data): # 取1个batch list_imgs = [images[i, :, :, :] for i in range(images.shape[0])] # 100张图片 list_bboxes = [gt_bboxes[i, :] for i in range(gt_bboxes.shape[0])] list_gt_landmarks = [ gt_landmarks[i, :] for i in range(gt_landmarks.shape[0]) ] mix = list(zip(list_imgs, list_bboxes, list_gt_landmarks)) batch_errors = [] for img, gt_bbox, gt_landmark in mix: # 取1个图片 if xxyy: bboxs, landmarks = mtcnn_detector.detect_onet_xxyy( img, gt_bbox) # 原始的图片用原始网络检测,xxyy else: bboxs, landmarks = mtcnn_detector.detect_onet( img, gt_bbox) # 原始的图片用原始网络检测,xxyy if landmarks.size: cnt += 1 bboxs = bboxs[:1] # 多个检测框保留第一个 landmarks = landmarks[:1] if savePic: vis_face(img, bboxs, landmarks, self.output_dir + str(cnt) + ".jpg") # 保存图片 gt_landmark = np.array(gt_landmark).reshape(5, 2) landmarks = np.array(landmarks).reshape(5, 2) normDist = np.linalg.norm(gt_landmark[1] - gt_landmark[0]) # 左右眼距离 error = np.mean( np.sqrt(np.sum( (landmarks - gt_landmark)**2, axis=1))) / normDist batch_errors.append(error) batch_errors = np.array(batch_errors).sum() total_errors += batch_errors print("%s: %s pics mean error is %s" % (datetime.datetime.now(), cnt, total_errors / cnt)) if cnt > 999: print("%s:%s pics mean error is %s" % (datetime.datetime.now(), cnt, total_errors / cnt)) f = open("landmark_test.txt", "a+") f.write("%s, moudel_name:%s.pt, %s pics mean error is %s\n" % (datetime.datetime.now(), test_moudel, cnt, np.array(total_errors).reshape(1, -1).sum() / cnt)) f.close() return print("%s:%s pics mean error is %s" % (datetime.datetime.now(), cnt, total_errors / cnt))
class HistOccBlock(object): def __init__(self, args): self.args = args self.imgs_list = None self.model = MtcnnDetector(self.args) self.pdf_list = None def _prepare_folder(self): ''' Prepare the face-model, imgs_list ''' # imgs_list folder_path = os.path.join(self.args.data_dir, self.args.folder) imgs_list = [] for img_name in os.listdir(folder_path): # idx = folder.split('_')[-1] # img_name = '%s_%d.jpg' % (self.args.img_type, int(idx)) imgs_list.append(os.path.join(folder_path, img_name)) self.imgs_list = imgs_list print('there are %d imgs in %s' % (len(imgs_list), self.args.folder)) return imgs_list def _prepare_csv(self): ''' Prepare the face-model, imgs_list ''' csv_file = os.path.join(self.args.data_dir, 'csv_raw', self.args.csv_file) df_csv = pd.read_csv(csv_file) print('csv_file.shape : ', df_csv.shape) df_test = None if self.args.check_mode == 'pos': check_mode = -1 elif self.args.check_mode == 'neg': check_mode = 0 else: check_mode = 1 print('attention, evaluate-mode was started ...') if check_mode < 1: df_csv = df_csv[df_csv['anno_label'] == check_mode] imgs_list = [] for idx, row in df_csv.iterrows(): img_name = '/'.join(row['img_path'].split('/')[-2:]) img_path = os.path.join(self.args.data_dir, img_name) imgs_list.append(img_path) self.imgs_list = imgs_list else: df_test = [] for idx, row in df_csv.iterrows(): img_name = '/'.join(row['img_path'].split('/')[-2:]) img_path = os.path.join(self.args.data_dir, img_name) df_test.append([img_path, row['img_type'], row['anno_label']]) df_test = pd.DataFrame(df_test, columns=df_csv.columns) print('after filtering, df_test.shape : ', df_test.shape) print('imgs_list was prepared ...') return df_test def _fliter_doc_bbox(self, bboxes, landmarks): ''' Filter the face_box on card ''' area = (bboxes[:, 2] - bboxes[:, 0] + 1) * (bboxes[:, 3] - bboxes[:, 1] + 1) * -1 area_index = area.argsort() bbox = bboxes[area_index[0]] landmark = landmarks[area_index[0]] #prob_index = (bboxes[-1]*-1).argsort() # assist # if bboxes.shape[0] == 2 or area_index[0] == prob_index[0]: # bbox = bboxes[area_index[0]] # landmark = landmark[area_index[0]] return bbox, landmark def _fetch_block(self, img, bbox, landmark, lmk_flag = True): ''' Crop the chin_block of the detected face ''' landmark = landmark.reshape(-1,2) if lmk_flag: left_down = landmark[6] nose_point = landmark[33] # point-34 | nose right_down = landmark[10] chin_point = landmark[8] # point-9 | chin x1, y1 = int(left_down[0]), int(nose_point[1]) x2, y2 = int(right_down[0]), int(chin_point[1]) else: x1, y1 = int(bbox[0]), int(bbox[1]) x2, y2 = int(bbox[2]), int(bbox[3]) crop_block = img[y1:y2 + 1, x1:x2 + 1, :] return crop_block def _statistics(self, block): ''' Statistics the block-pixels info ''' height, width, _ = block.shape num_pixels = height * width bgr_prob = np.zeros((3, 256), dtype=np.int) for y in range(height): for x in range(width): pixel = block[y,x] bgr_prob[0, pixel[0]] += 1 bgr_prob[1, pixel[1]] += 1 bgr_prob[2, pixel[2]] += 1 bgr_prob = bgr_prob / num_pixels return bgr_prob def _hist_go(self): ''' Statistic the info of resz_block step - 1. detect face with trained model step - 2. filter the doc_bbox step - 3. crop and resize the target block step - 4. get the statistics_info ''' pdf_list = [] for img_path in self.imgs_list: try: img = cv2.imread(img_path) bboxes, landmarks = self.model.detect_face(img, verbose=False) # save_name = 'result/r_%s' % img_path.split('/')[-1] # vis_utils.visual_face(img, bboxes, landmarks, save_name) print((img_path, bboxes.shape)) except Exception as e: print(e) else: if bboxes.shape[0] == 0: print('No face detected in %s' % img_path) continue else: bbox, landmark = self._fliter_doc_bbox(bboxes, landmarks) block = self._fetch_block(img, bbox, landmark) bgr_prob = self._statistics(block) pdf_list.append(bgr_prob.reshape(1, -1)[0]) self.pdf_list = pdf_list date_stamp = self.args.csv_file.split('_')[-1].split('.')[0] save_name = 'pdf/npy_data/%s_details_%s.npy' % (self.args.check_mode, date_stamp) print('npy_data was savd in %s' % save_name) np.save(save_name, pdf_list) print('hist-module was finished ...') def runner(self, vis = False): ''' Pipeline of HistOccBlock ''' self._prepare_csv() self._hist_go()
def __init__(self, args): self.args = args self.imgs_list = None self.model = MtcnnDetector(self.args) self.pdf_list = None
import torch.nn.functional as F from torch.autograd import Variable import transforms as transform from mtcnn.core.detect import create_mtcnn_net, MtcnnDetector from mtcnn.core.vision import vis_face from models import vgg_prune #from models import resnet_prune use_cuda = False cut_size = 46 fps1 = 0.0 fps2 = 0.0 v = 0.0000000001 pnet, rnet, onet = create_mtcnn_net(p_model_path="mtcnn_models/pnet.pt", r_model_path="mtcnn_models/rnet.pt", o_model_path="mtcnn_models/onet.pt", use_cuda=use_cuda) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size = 48, stride=2, threshold=[0.66, 0.7, 0.7], scale_factor=0.709) #class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] class_names = ['just so so', 'just so so', 'just so so', 'good', 'common', 'just so so', 'common'] transform_test = transform.Compose([ transform.TenCrop(cut_size), transform.Lambda(lambda crops: torch.stack([transform.ToTensor()(crop) for crop in crops])), ]) print('==> ori_model ...') net = vgg_prune.VGG() checkpoint = torch.load('ori_models/fer_Pri_vgg16.pth') net.load_state_dict(checkpoint['state_dict']) #print('==> ori_model ...') #net = resnet_prune_1.resnet()
def main(): eye_class_dict = {0: "open_eye", 1: "close_eye", 2: "other"} point_nums = 24 threshold = [0.6, 0.7, 0.7] data_trans = Transforms.Compose([ # Transforms.Resize((24, 48)), Transforms.ToTensor(), Transforms.Normalize((0.407, 0.405, 0.412), (0.087, 0.087, 0.087)), ]) mixnet = MixNet(input_size=(24, 48), num_classes=3) weight_dict = torch.load("weight/change_mix_data_0202/Mixnet_epoch_59.pth") new_state_dict = OrderedDict() for k, v in weight_dict.items(): name = k[7:] new_state_dict[name] = v mixnet.load_state_dict(new_state_dict) # stat(net,(3,48,48)) mixnet.to('cuda:0') mixnet.eval() pnet, rnet, onet = create_mtcnn_net( p_model_path=r'model_store/final/pnet_epoch_19.pt', r_model_path=r'model_store/final/rnet_epoch_7.pt', o_model_path=r'model_store/final/onet_epoch_92.pt', use_cuda=True) mtcnn_detector = MtcnnDetector(pnet=pnet, rnet=rnet, onet=onet, min_face_size=24, threshold=threshold) videos_root_path = 'test_video/20200522164730261_0.avi' save_path_root = 'result_video/20200522164730261_0.avi' cap = cv2.VideoCapture(videos_root_path) fourcc = cv2.VideoWriter_fourcc(*'XVID') fps = cap.get(cv2.CAP_PROP_FPS) size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(save_path_root, fourcc, fps, size) while True: ret, frame = cap.read() if ret: copy_frame = frame.copy() left_right_eye = [] bboxs, landmarks, wearmask = mtcnn_detector.detect_face(frame, rgb=True) if landmarks is not None: for i in range(landmarks.shape[0]): landmarks_one = landmarks[i, :] landmarks_one = landmarks_one.reshape((point_nums, 2)) left_eye = np.array(landmarks_one[[6, 8, 10, 11, 14], :]) xmin = np.min(left_eye[:, 0]) ymin = np.min(left_eye[:, 1]) xmax = np.max(left_eye[:, 0]) ymax = np.max(left_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) right_eye = np.array(landmarks_one[[7, 9, 12, 13, 15], :]) xmin = np.min(right_eye[:, 0]) ymin = np.min(right_eye[:, 1]) xmax = np.max(right_eye[:, 0]) ymax = np.max(right_eye[:, 1]) left_right_eye.append([xmin, ymin, xmax, ymax]) # cv2.rectangle(frame,(int(xmin),int(ymin)),(int(xmax),int(ymax)),(0,255,0),2) for j in [*left_eye, *right_eye]: cv2.circle(frame, (int(j[0]), int(j[1])), 2, (255, 0, 0), -1) crop_img = [] for xmin, ymin, xmax, ymax in left_right_eye: w, h = xmax - xmin, ymax - ymin # 随机扩展大小0.05-0.15 k = 0.1 ratio = h / w if ratio > 1: ratio = ratio - 1 xmin -= (ratio / 2 * w + k * h) ymin -= (k * h) xmax += (ratio / 2 * w + k * h) ymax += (k * h) else: ratio = w / h - 1 xmin -= (k * w) ymin -= (ratio / 2 * h + k * w) xmax += (k * w) ymax += (ratio / 2 * h + k * w) cv2.rectangle(frame, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 2) temp_img = copy_frame[int(ymin):int(ymax), int(xmin):int(xmax)] temp_img = cv2.resize(temp_img, (24, 24)) crop_img.append(temp_img) if len(crop_img) < 2: out.write(frame) continue compose_img = np.hstack((crop_img[0], crop_img[1])) compose_img = cv2.cvtColor(compose_img, cv2.COLOR_BGR2RGB) compose_img = Image.fromarray(compose_img) img = data_trans(compose_img) img = img.unsqueeze(0) with torch.no_grad(): outputs = mixnet(img.to('cuda:0')) result = torch.max(outputs, 1)[1] eye_class = eye_class_dict[result.item()] cv2.putText(frame,eye_class,(0,20),cv2.FONT_HERSHEY_COMPLEX,1.3,(255,0,255) \ if result.item() == 0 else (255,255,0),thickness=2) out.write(frame) else: print("finish") break