def main(args): cfg = yaml.load(open(args.config), Loader=yaml.FullLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) print(f'Detect {len(boxes)} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=args.dense_flag) wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(get_suffix(args.img_fp), "")}_dense{args.dense_flag}.jpg' draw_landmarks(img, ver_lst, show_flg=args.show_flg, dense_flg=args.dense_flag, wfp=wfp)
def main(args): _t = {'det': Timer(), 'reg': Timer(), 'recon': Timer()} cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: tddfa = TDDFA(**cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) print(f'Input image: {args.img_fp}') # Detect faces, get 3DMM params and roi boxes print(f'Input shape: {img.shape}') if args.warmup: print('Warmup by once') boxes = face_boxes(img) param_lst, roi_box_lst = tddfa(img, boxes) ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=args.dense_flag) for _ in range(args.repeated): img = cv2.imread(args.img_fp) _t['det'].tic() boxes = face_boxes(img) _t['det'].toc() n = len(boxes) if n == 0: print(f'No face detected, exit') sys.exit(-1) _t['reg'].tic() param_lst, roi_box_lst = tddfa(img, boxes) _t['reg'].toc() _t['recon'].tic() ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=args.dense_flag) _t['recon'].toc() mode = 'Dense' if args.dense_flag else 'Sparse' print(f"Face detection: {_t['det'].average_time * 1000:.2f}ms, " f"3DMM regression: {_t['reg'].average_time * 1000:.2f}ms, " f"{mode} reconstruction: {_t['recon'].average_time * 1000:.2f}ms")
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] suffix = get_suffix(args.video_fp) video_wfp = f'examples/results/videos/{fn.replace(suffix, "")}_{args.opt}.mp4' writer = imageio.get_writer(video_wfp, fps=fps) # run dense_flag = args.opt in ('3d',) pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] pre_ver = ver # for tracking if args.opt == '2d': res = cv_draw_landmark(frame_bgr, ver) elif args.opt == '3d': res = render(frame_bgr, [ver]) else: raise Exception(f'Unknown opt {args.opt}') writer.append_data(res[..., ::-1]) # BGR->RGB writer.close() print(f'Dump to {video_wfp}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # run dense_flag = args.opt in ('2d_dense', '3d') pre_ver = None if not os.path.isdir(args.save_dataset_path): os.makedirs(args.save_dataset_path) data = glob.glob(args.training_dataset_path + '/*.jpg') # print(args.training_dataset_path + '/*.jpg') for img_file in tqdm(data): label_file = img_file.split('\\')[-1].replace('.jpg', '.txt') label_path = os.path.join(args.save_dataset_path, label_file) label_f = open(label_path, mode='wt', encoding='utf-8') img = cv2.imread(img_file) img_save = cv2.resize(img, (640, 480), interpolation=cv2.INTER_LINEAR) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) boxes = face_boxes(img) # print(boxes) if len(boxes) == 0: boxes = [[0, 0, img.shape[1], img.shape[0]]] param_lst, roi_box_lst = tddfa(img, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(img, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # write img # cv2.imwrite(label_path.replace('.txt','.jpg'), img_save) # # write label label = '' for pt in range(ver.shape[1]): label += str(ver[0, pt] / img.shape[1]) + ' ' + str( ver[1, pt] / img.shape[0]) + ' ' label += '\n' label_f.write(label) label_f.close()
def main(args): # Init TDDFA cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Init FaceBoxes face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) print(f'Detect {n} faces') if n == 0: print(f'No face detected, exit') sys.exit(-1) param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.FullLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] video_wfp = f'examples/results/videos/{fn.replace(".avi", ".mp4")}' writer = imageio.get_writer(video_wfp, fps=fps) pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[:, :, ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] pre_ver = ver # for tracking img_draw = cv_draw_landmark(frame_bgr, ver) writer.append_data(img_draw[:, :, ::-1]) # BGR->RGB writer.close() print(f'Dump to {video_wfp}')
def __init__(self, model = 'PLFD', checkpoint = os.path.join(BASE_DIR, 'thirdparty', \ 'pytorch_face_landmark','checkpoint','pfld_model_best.pth.tar'), \ detector = 'MTCNN'): import torch sys.path.append(os.path.join(BASE_DIR,'thirdparty','pytorch_face_landmark')) if model == 'MobileNet': from models.basenet import MobileNet_GDConv self.model = MobileNet_GDConv(136) self.model = torch.nn.DataParallel(self.model) self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 224 elif model == 'MobileFaceNet': from models.mobilefacenet import MobileFaceNet self.model = MobileFaceNet([112, 112], 136) self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 112 elif model == 'PLFD': from models.pfld_compressed import PFLDInference self.model = PFLDInference() self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 112 if detector == 'MTCNN': from MTCNN import detect_faces self.detect_fun = lambda x: detect_faces(x[:,:,::-1]) elif detector == 'FaceBoxes': from FaceBoxes import FaceBoxes self.detector = FaceBoxes() self.detect_fun = lambda x: self.detector.face_boxex(x) elif detector == 'Retinaface': from Retinaface import Retinaface self.detector = Retinaface.Retinaface() self.detect_fun = lambda x: self.detector(x) else: import dlib self.detector = dlib.get_frontal_face_detector() self.detect_fun = lambda x: self.detector(cv2.cvtColor(x,cv2.COLOR_BGR2GRAY))
def main(args): # Init TDDFA cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Init FaceBoxes face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) print(f'Detect {n} faces') if n == 0: print(f'No face detected, exit') sys.exit(-1) param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ( '2d_dense', '3d', 'depth' ) # if opt is 2d_dense or 3d, reconstruct dense vertices ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) suffix = get_suffix(args.img_fp) wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(suffix, "")}_{args.opt}.jpg' if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': depth(img, ver_lst, show_flag=args.show_flag, wfp=wfp) else: raise Exception(f'Unknown opt {args.opt}')
def main(args): # Init TDDFA or TDDFA_ONNX cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) if args.onnx: from TDDFA_ONNX import TDDFA_ONNX tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] suffix = get_suffix(args.video_fp) video_wfp = f'examples/results/videos/{fn.replace(suffix, "")}_{args.opt}_smooth.mp4' writer = imageio.get_writer(video_wfp, fps=fps) # the simple implementation of average smoothing by looking ahead by n_next frames # assert the frames of the video >= n n_pre, n_next = args.n_pre, args.n_next n = n_pre + n_next + 1 queue_ver = deque() queue_frame = deque() # run dense_flag = args.opt in ( '2d_dense', '3d', ) pre_ver = None for i, frame in tqdm(enumerate(reader)): if args.start > 0 and i < args.start: continue if args.end > 0 and i > args.end: break frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # detect boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # padding queue for _ in range(n_pre): queue_ver.append(ver.copy()) queue_ver.append(ver.copy()) for _ in range(n_pre): queue_frame.append(frame_bgr.copy()) queue_frame.append(frame_bgr.copy()) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) pre_ver = ver # for tracking # smoothing: enqueue and dequeue ops if len(queue_ver) >= n: ver_ave = np.mean(queue_ver, axis=0) if args.opt == '2d_sparse': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding elif args.opt == '2d_dense': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave, size=1) elif args.opt == '3d': img_draw = render(queue_frame[n_pre], [ver_ave], tddfa.bfm.tri, alpha=0.7) else: raise ValueError(f'Unknown opt {args.opt}') writer.append_data(img_draw[:, :, ::-1]) # BGR->RGB queue_ver.popleft() queue_frame.popleft() # we will lost the last n_next frames, still padding for _ in range(n_next): queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) # the last frame ver_ave = np.mean(queue_ver, axis=0) if args.opt == '2d_sparse': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding elif args.opt == '2d_dense': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave, size=1) elif args.opt == '3d': img_draw = render(queue_frame[n_pre], [ver_ave], tddfa.bfm.tri, alpha=0.7) else: raise ValueError(f'Unknown opt {args.opt}') writer.append_data(img_draw[..., ::-1]) # BGR->RGB queue_ver.popleft() queue_frame.popleft() writer.close() print(f'Dump to {video_wfp}')
def main(args): # Init TDDFA or TDDFA_ONNX cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) if args.onnx: from TDDFA_ONNX import TDDFA_ONNX tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] dense_flag = args.opt in ('3d',) nick_orig = cv2.imread('Assets4FacePaper/nick.bmp') tv_neutral, tc_neutral, tv_set, tc_set, nick_param = load_vertices_set( 'Assets4FacePaper', ['sam_0_0.jpg', 'sam_0_1.jpg', 'sam_0_2.jpg', 'sam_0_3.jpg', 'sam_0_4.jpg', 'sam_0_5.jpg', 'sam_0_6.jpg'], face_boxes, tddfa ) sv_neutral, sc_neutral, sv_set, sc_set, thanh_param = load_vertices_set( 'Assets4FacePaper', ['Thanh.jpg', 'Thanh1.jpg', 'Thanh2.jpg', 'Thanh3.jpg', 'Thanh4.jpg', 'Thanh5.jpg', 'Thanh6.jpg'], face_boxes, tddfa ) _, _, nick_alpha_shp, nick_alpha_exp = _parse_param(nick_param) nick_act_masks = calc_activation_masks(tv_neutral, tv_set) delta_set_nick = get_delta_vertices_set(tv_neutral, tv_set) _, _, thanh_alpha_shp, thanh_alpha_exp = _parse_param(thanh_param) thanh_act_masks = calc_activation_masks(sv_neutral, sv_set) delta_set_thanh = get_delta_vertices_set(sv_neutral, sv_set) nick_ver = tc_neutral nick_color = tc_neutral suffix = get_suffix(args.video_fp) video_wfp = f'examples/results/videos/{fn.replace(suffix, "")}_{args.opt}.mp4' # run pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') R, offset, _ , alpha_exp = _parse_param(param_lst[0]) ver = recon_dense_explicit(R, offset, nick_alpha_shp, alpha_exp, roi_box_lst[0], tddfa.size) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) R, offset, _ , alpha_exp = _parse_param(param_lst[0]) ver = recon_dense_explicit(R, offset, nick_alpha_shp, alpha_exp, roi_box_lst[0], tddfa.size) # Write object pre_ver = ver # for tracking ser_to_obj_multiple_colors(nick_color, [ver], height=int(nick_orig.shape[0]*1.5), wfp='./sam_exp_only_obj/' + str(i) +'.obj') print(f'Dump to sam_exp_only_obj/{str(i)}.obj') if i > 1000: break
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel print(args.img_fp) if not os.path.isfile(args.img_fp): raise ValueError( '--face argument must be a valid path to video/image file') elif args.img_fp.split('.')[1] in ['jpg', 'png', 'jpeg']: full_frames = [cv2.imread(args.img_fp)] else: video_stream = cv2.VideoCapture(args.img_fp) fps = video_stream.get(cv2.CAP_PROP_FPS) print('Reading video frames...') full_frames = [] while 1: still_reading, frame = video_stream.read() if not still_reading: video_stream.release() break full_frames.append(frame) print("num of frames : ", len(full_frames)) for i, img in enumerate(full_frames): # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' idx = str(i) while (len(idx) != 4): idx = "0" + idx wfp = f'examples/results/obj/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + idx + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) if args.opt == 'obj': ser_to_obj(img, ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) # elif args.opt == '2d_sparse': # draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) # elif args.opt == '2d_dense': # draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) # elif args.opt == '3d': # render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) # elif args.opt == 'depth': # # if `with_bf_flag` is False, the background is black # depth(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp) # elif args.opt == 'pose': # viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) # elif args.opt == 'ply': # ser_to_ply(ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
cfg = yaml.load(open('/home/wei/exp/data_process/DDFA_V2/configs/mb1_120x120.yml'), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag # if args.onnx: # import os # os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' # os.environ['OMP_NUM_THREADS'] = '4' # from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX # from TDDFA_ONNX import TDDFA_ONNX # face_boxes = FaceBoxes_ONNX() # tddfa = TDDFA_ONNX(**cfg) # else: gpu_mode = True tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() def main(args,img): global tddfa global face_boxes # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) #img = img.astype('unsigned char') # print(img.shape) # print(img2.shape) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) # boxes = face_boxes(img) n = len(boxes)
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() img_list = [args.img_fp] for file in img_list: if not os.path.isfile(file): print("Error: not found: \"" + str(file) + "\".") continue # Given a still image path and load to BGR channel img = cv2.imread(file) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = True ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) # repair all the bs because the matrices axis alignment... lm68 = np.reshape( np.reshape(ver_lst[0].T, (-1, 1))[tddfa.bfm.keypoints], (-1, 3)) for i in range(lm68.shape[0]): lm68[i, 1] = img.shape[0] - lm68[i, 1] for i in range(ver_lst[0].shape[1]): ver_lst[0][1, i] = img.shape[0] - ver_lst[0][1, i] vertices = ver_lst[0].T useful_tri = np.copy(tddfa.tri) for i in range(useful_tri.shape[0]): tmp = useful_tri[i, 2] useful_tri[i, 2] = useful_tri[i, 0] useful_tri[i, 0] = tmp #useful_tri = useful_tri + 1 # save if args.obj: obj = lib.wavefront.Wavefront() obj.vertices = vertices obj.facevertices = useful_tri + 1 out_file = file[:-4] + ".obj" obj.write(out_file) else: mesh = dict() mesh["vertices"] = vertices mesh["faces"] = useful_tri mesh["lm68"] = lm68 out_file = file[:-4] + ".pickle" with open(out_file, "wb+") as f: _pickle.dump(mesh, f)
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() dataset = args.dataset.lower().strip() if dataset.startswith("f"): dataset = "facescape" else: dataset = "bu4dfe" files = [] if dataset == "facescape": save_path = fs_save_path data_path = fs_path files = sorted([str(f) for f in pathlib.Path(data_path).rglob("*.jpg") if int(f.parent.name) >= 344]) else: save_path = bu4_save_path data_path = bu4_path keynums = [25, 70] for n in keynums: files += [str(f) for f in pathlib.Path(data_path).rglob("*"+str(n).zfill(3)+".jpg")] files += [str(f)+"/000.jpg" for f in pathlib.Path(data_path).rglob("Angry") if os.path.isdir(str(f))] files = sorted(files) for file in files: out_file = save_path + file[len(data_path):-4] + ".obj" out_dir = str(pathlib.Path(out_file).parent) if not os.path.isdir(out_dir): os.makedirs(out_dir) elif os.path.isfile(out_file): print("Skipping file: \""+file+"\".") continue # Given a still image path and load to BGR channel img = cv2.imread(file) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') continue #print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = True ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) # repair all the bs because the matrices axis alignment... lm68 = np.reshape(np.reshape(ver_lst[0].T, (-1,1))[tddfa.bfm.keypoints], (-1,3)) for i in range(lm68.shape[0]): lm68[i,1] = img.shape[0] - lm68[i,1] for i in range(ver_lst[0].shape[1]): ver_lst[0][1,i] = img.shape[0] - ver_lst[0][1,i] vertices = ver_lst[0].T useful_tri = np.copy(tddfa.tri) for i in range(useful_tri.shape[0]): tmp = useful_tri[i,2] useful_tri[i,2] = useful_tri[i,0] useful_tri[i,0] = tmp #useful_tri = useful_tri + 1 # save if True: mesh = dict() mesh["vertices"] = vertices mesh["faces"] = useful_tri mesh["lm68"] = lm68 out_file = out_file[:-4]+".pickle" with open(out_file, "wb+") as f: _pickle.dump(mesh, f) else: obj = lib.wavefront.Wavefront() obj.vertices = vertices obj.facevertices = useful_tri+1 obj.write(out_file) print(out_file)
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a camera # before run this line, make sure you have installed `imageio-ffmpeg` reader = imageio.get_reader("<video0>") # the simple implementation of average smoothing by looking ahead by n_next frames # assert the frames of the video >= n n_pre, n_next = args.n_pre, args.n_next n = n_pre + n_next + 1 queue_ver = deque() queue_frame = deque() # run dense_flag = args.opt in ('2d_dense', '3d') pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # padding queue for _ in range(n_pre): queue_ver.append(ver.copy()) queue_ver.append(ver.copy()) for _ in range(n_pre): queue_frame.append(frame_bgr.copy()) queue_frame.append(frame_bgr.copy()) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) pre_ver = ver # for tracking # smoothing: enqueue and dequeue ops if len(queue_ver) >= n: ver_ave = np.mean(queue_ver, axis=0) img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding cv2.imshow('image', img_draw) k = cv2.waitKey(20) if (k & 0xff == ord('q')): break queue_ver.popleft() queue_frame.popleft()
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] video_wfp = f'examples/results/videos/{fn.replace(".avi", "_smooth.mp4")}' writer = imageio.get_writer(video_wfp, fps=fps) # the simple implementation of average smoothing by looking ahead by n_next frames # assert the frames of the video >= n n_pre, n_next = args.n_pre, args.n_next n = n_pre + n_next + 1 queue_ver = deque() queue_frame = deque() pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # detect boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] # padding queue for j in range(n_pre): queue_ver.append(ver.copy()) queue_ver.append(ver.copy()) for j in range(n_pre): queue_frame.append(frame_bgr.copy()) queue_frame.append(frame_bgr.copy()) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst)[0] queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) pre_ver = ver # for tracking # smoothing: enqueue and dequeue ops if len(queue_ver) >= n: ver_ave = np.mean(queue_ver, axis=0) img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding writer.append_data(img_draw[:, :, ::-1]) # BGR->RGB queue_ver.popleft() queue_frame.popleft() # we will lost the last n_next frames, still padding for j in range(n_next): queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) # the last frame ver_ave = np.mean(queue_ver, axis=0) img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding writer.append_data(img_draw[..., ::-1]) # BGR->RGB queue_ver.popleft() queue_frame.popleft() writer.close() print(f'Dump to {video_wfp}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) print(ver_lst[0].shape) print(tddfa.bfm.u.shape) lm68 = np.reshape( np.reshape(ver_lst[0].T, (-1, 1))[tddfa.bfm.keypoints], (-1, 3)) print(lm68.shape) for i in range(lm68.shape[0]): lm68[i, 1] = img.shape[0] - lm68[i, 1] for i in range(ver_lst[0].shape[1]): ver_lst[0][1, i] = img.shape[0] - ver_lst[0][1, i] useful_tri = np.copy(tddfa.tri) for i in range(useful_tri.shape[0]): tmp = useful_tri[i, 2] useful_tri[i, 2] = useful_tri[i, 0] useful_tri[i, 0] = tmp useful_tri = useful_tri + 1 np.save("asd_lm.npy", lm68) np.save("asd_v.npy", ver_lst[0].T) np.save("asd_f.npy", useful_tri) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a camera # before run this line, make sure you have installed `imageio-ffmpeg` reader = imageio.get_reader("<video0>") # the simple implementation of average smoothing by looking ahead by n_next frames # assert the frames of the video >= n n_pre, n_next = args.n_pre, args.n_next n = n_pre + n_next + 1 queue_ver = deque() queue_frame = deque() # run dense_flag = args.opt in ('2d_dense', '3d') pre_ver = None for i, frame in tqdm(enumerate(reader)): frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # padding queue for _ in range(n_pre): queue_ver.append(ver.copy()) queue_ver.append(ver.copy()) for _ in range(n_pre): queue_frame.append(frame_bgr.copy()) queue_frame.append(frame_bgr.copy()) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] queue_ver.append(ver.copy()) queue_frame.append(frame_bgr.copy()) pre_ver = ver # for tracking # smoothing: enqueue and dequeue ops if len(queue_ver) >= n: ver_ave = np.mean(queue_ver, axis=0) if args.opt == '2d_sparse': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave) # since we use padding elif args.opt == '2d_dense': img_draw = cv_draw_landmark(queue_frame[n_pre], ver_ave, size=1) elif args.opt == '3d': img_draw = render(queue_frame[n_pre], [ver_ave], tddfa.tri, alpha=0.7) else: raise ValueError(f'Unknown opt {args.opt}') cv2.imshow('image', img_draw) k = cv2.waitKey(20) if (k & 0xff == ord('q')): break queue_ver.popleft() queue_frame.popleft()
class LandmarksDetectorPytorch: def __init__(self, model = 'PLFD', checkpoint = os.path.join(BASE_DIR, 'thirdparty', \ 'pytorch_face_landmark','checkpoint','pfld_model_best.pth.tar'), \ detector = 'MTCNN'): import torch sys.path.append(os.path.join(BASE_DIR,'thirdparty','pytorch_face_landmark')) if model == 'MobileNet': from models.basenet import MobileNet_GDConv self.model = MobileNet_GDConv(136) self.model = torch.nn.DataParallel(self.model) self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 224 elif model == 'MobileFaceNet': from models.mobilefacenet import MobileFaceNet self.model = MobileFaceNet([112, 112], 136) self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 112 elif model == 'PLFD': from models.pfld_compressed import PFLDInference self.model = PFLDInference() self.model.load_state_dict(torch.load(checkpoint)['state_dict']) self.model.eval() self.size = 112 if detector == 'MTCNN': from MTCNN import detect_faces self.detect_fun = lambda x: detect_faces(x[:,:,::-1]) elif detector == 'FaceBoxes': from FaceBoxes import FaceBoxes self.detector = FaceBoxes() self.detect_fun = lambda x: self.detector.face_boxex(x) elif detector == 'Retinaface': from Retinaface import Retinaface self.detector = Retinaface.Retinaface() self.detect_fun = lambda x: self.detector(x) else: import dlib self.detector = dlib.get_frontal_face_detector() self.detect_fun = lambda x: self.detector(cv2.cvtColor(x,cv2.COLOR_BGR2GRAY)) def preprocess(self, img): import torch if img.max() < 2: img = (img * 255).astype(np.uint8) elif img.dtype == np.float32 or img.dtype == np.float64: img[img < 0] = 0 img[img > 255]= 255 img = img.astype(np.uint8) rects = self.detect_fun(img) if isinstance(rects, tuple): rects = rects[0] # MTCNN if len(rects) == 0: return [] x1,y1,x2,y2 = rects[0,:4] else: if len(rects) == 0: return [] rect = rects[0] try: x1,y1,x2,y2 = rect[:4] except: x1 = rect.left() y1 = rect.top() x2 = rect.right() y2 = rect.bottom() w = x2 - x1 + 1 h = y2 - y1 + 1 size = int(min([w, h])*1.2) cx = x1 + w//2 cy = y1 + h//2 x1 = cx - size//2 x2 = x1 + size y1 = cy - size//2 y2 = y1 + size dx = max(0, -x1) dy = max(0, -y1) x1 = max(0, x1) y1 = max(0, y1) edx = max(0, x2 - img.shape[1]) edy = max(0, y2 - img.shape[0]) x2 = min(img.shape[1], x2) y2 = min(img.shape[0], y2) self.bbox = [int(x1), int(x2), int(y1), int(y2)] cropped = img[self.bbox[2]:self.bbox[3], self.bbox[0]:self.bbox[1]] if dx > 0 or dy > 0 or edx > 0 or edy > 0: cropped = cv2.copyMakeBorder(cropped,int(dy),int(edy),int(dx),int(edx), \ cv2.BORDER_CONSTANT, 0) cropped_face = cv2.resize(cropped, (self.size, self.size)) input_ = np.expand_dims(cropped_face.astype(np.float32).transpose([2,0,1]), 0) / 255. return [torch.from_numpy(input_).float()] def detect(self, img): img = self.preprocess(img) if len(img) == 0: return None lmks = [] for i in img: lmk = self.model(i)[0].cpu().data.numpy() lmk = lmk.reshape(-1, 2) lmk[:,0] = lmk[:,0] * (self.bbox[1]-self.bbox[0]) + self.bbox[0] lmk[:,1] = lmk[:,1] * (self.bbox[3]-self.bbox[2]) + self.bbox[2] lmks += [lmk] return lmks[0]
def main(args): # Init TDDFA or TDDFA_ONNX cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) if args.onnx: from TDDFA_ONNX import TDDFA_ONNX tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Initialize FaceBoxes face_boxes = FaceBoxes() # Given a video path fn = args.video_fp.split('/')[-1] reader = imageio.get_reader(args.video_fp) fps = reader.get_meta_data()['fps'] dense_flag = args.opt in ('3d', ) nick_orig = cv2.imread('Assets4FacePaper/nick.bmp') tv_neutral, tc_neutral, tv_set, tc_set, nick_param = load_vertices_set( 'Assets4FacePaper', [ 'nick.bmp', 'nick_crop_1.jpg', 'nick_crop_2.jpg', 'nick_crop_3.jpg', 'nick_crop_4.jpg', 'nick_crop_5.jpg', 'nick_crop_6.png' ], face_boxes, tddfa) sv_neutral, sc_neutral, sv_set, sc_set, thanh_param = load_vertices_set( 'Assets4FacePaper', [ 'Thanh.jpg', 'Thanh1.jpg', 'Thanh2.jpg', 'Thanh3.jpg', 'Thanh4.jpg', 'Thanh5.jpg', 'Thanh6.jpg' ], face_boxes, tddfa) _, _, nick_alpha_shp, nick_alpha_exp = _parse_param(nick_param) nick_act_masks = calc_activation_masks(tv_neutral, tv_set) delta_set_nick = get_delta_vertices_set(tv_neutral, tv_set) _, _, thanh_alpha_shp, thanh_alpha_exp = _parse_param(thanh_param) thanh_act_masks = calc_activation_masks(sv_neutral, sv_set) delta_set_thanh = get_delta_vertices_set(sv_neutral, sv_set) nick_ver = tc_neutral nick_color = tc_neutral suffix = get_suffix(args.video_fp) video_wfp = f'examples/results/videos/{fn.replace(suffix, "")}_{args.opt}.mp4' N = list(tv_neutral.size())[0] E = list(tv_set.size())[0] # run pre_ver = None total_timer = Timer() optimizing_timer = Timer() for i, frame in tqdm(enumerate(reader)): total_timer.tic() frame_bgr = frame[..., ::-1] # RGB->BGR if i == 0: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') R, offset, alpha_shp, alpha_exp = _parse_param(param_lst[0]) roi_box = roi_box_lst[0] ver = recon_dense_explicit(R, offset, nick_alpha_shp, alpha_exp, roi_box, tddfa.size) ver_base = recon_dense_base(nick_alpha_shp, alpha_exp) else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) R, offset, alpha_shp, alpha_exp = _parse_param(param_lst[0]) ver_base = recon_dense_base(nick_alpha_shp, alpha_exp) ver = recon_dense_explicit(R, offset, nick_alpha_shp, alpha_exp, roi_box, tddfa.size) # Write object pre_ver = ver # for tracking ver = torch.from_numpy(ver_base.transpose()).float().to(device) optimizing_timer.tic() a_set = fit_coeffs_aio(ver, tv_neutral, delta_set_nick, 100) print(a_set) tver = torch.matmul(delta_set_nick.view(E, -1).transpose(0, 1), a_set).view(N, 3) + tv_neutral tver = transform_vertices(R, offset, tver.cpu().numpy().transpose(), roi_box, tddfa.size) # 3 N cver = calc_colors_w_act_mask(a_set, nick_act_masks, tc_neutral, tc_set).cpu().numpy() optimizing_time = optimizing_timer.toc() execution_time = total_timer.toc() print("Execution time: {}, Total time: {}".format( optimizing_time, execution_time)) ''' tver = torch.matmul(delta_set_thanh.view(E, -1).transpose(0, 1), a_set).view( N, 3) + sv_neutral tver = transform_vertices(R, offset, tver.numpy().transpose(), roi_box, tddfa.size) # 3 N cver = calc_colors_w_act_mask(a_set, thanh_act_masks, sc_neutral, sc_set).numpy() print(cver.shape) ''' ser_to_obj_multiple_colors(cver, [tver], height=int(nick_orig.shape[0] * 1.5), wfp='nick_' + str(i) + '.obj') print(f'Dump to nick_{str(i)}.obj') if i > 1000: break
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print('No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' +\ new_suffix wfp = base_path / 'examples' / 'results' / wfp ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) vis = args.vis UDP_IP = args.ip UDP_PORT = args.port fov = args.fov with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock, \ imageio.get_reader("<video0>") as reader: # Initialize FaceBoxes face_boxes = FaceBoxes() dense_flag = False pre_ver = None first = True for frame in reader: frame_bgr = frame[..., ::-1] # RGB->BGR #inference_time = time.time() if first: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) if boxes: boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] first = False else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) if boxes: boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) else: first = True pre_ver = None if boxes: ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] #inference_time = time.time()-inference_time if boxes: pre_ver = ver # for tracking # Most of what follows here is an attempt to compute the translation t # of the head model in world space. The depth coordinate shall be denoted tz. h, w, _ = frame.shape f, R, t = P2sRt(param_lst[0][:12].reshape(3, -1)) P, pose = calc_pose(param_lst[0]) x0, y0, x1, y1 = map(int, roi_box_lst[0]) # Compute the translation vector in image space. # The prediction of t is defined in some arbitrary scaled way (from our perspective here.) # See tddfa_util.similar_transform(), which computes the orthographic # projection of 3d mesh positions to the image space of our roi_box. t_img_x = x0 + (t[0]-1) * (x1-x0) / tddfa.size t_img_y = y0 + (tddfa.size-t[1]) * (y1-y0) / tddfa.size # The following should give us the scaling transfrom from the # space of the (deformed) model to image pixels. scale_model_to_roi_x = f / tddfa.size * (x1-x0) # Probably the y "version" makes no sense ... Idk ... scale_model_to_roi_y = f / tddfa.size * (y1-y0) # In order to relate length in our roi box to the real world we can utilize # the size of the deformable model. Judging from magnitude of the coordinate values # in the model files (https://github.com/cleardusk/3DDFA/tree/master/, u_exp.npy and u_shp.npy), # they are defined in micrometers. Hence ... scale_model_to_meters = 1.e-6 # micrometers to meters. # Furthermore let's define the focal length of our webcam considered as pinhole camera. # (I think this is focal length, or rather one over it) focal_length_inv_x = math.tan(fov*math.pi/180.*0.5) focal_length_inv_y = math.tan(fov*h/w*math.pi/180.*0.5) # Considering the perspective transform, we know that tz/world_space_size = f/projected_size, # where projected_size ranges from -1 to 1 as it is defined in screen space. Or rearanging slightly. # tz = focal_length * projected_size/world_space_size. The quotient in the back is just a scaling # factor which we can compute with the quantities we have. I.e. scale_model_to_roi_x/w gives us # the model->screen scale trafo. The inverse screen->model. Multiply by model->meters, and we have # our desired meters->screen scaling. # Uhm ... except I have this twice now, one for x, one for y ... tz_roi_x = scale_model_to_meters * w / (scale_model_to_roi_x * focal_length_inv_x) tz_roi_y = scale_model_to_meters * h / (scale_model_to_roi_y * focal_length_inv_y) # Just average ... tz_roi = (tz_roi_x + tz_roi_y)*0.5 # I don't know how the length units of the predicted translation depth are defined. # This scaling factor does not seem to bad though. scale_model_z = scale_model_to_meters / f # So we just add the scaled predicted depth to the guestimated depth from the head size. tz = tz_roi + t[2]*scale_model_z # Using the perspective transform, calculating the world x and y translation is easy. tx = (t_img_x / w * 2. - 1.) * tz * focal_length_inv_x ty = (t_img_y / h * 2. - 1.) * tz * focal_length_inv_y # This final part corrects the yaw and pitch values. Why is simple. The network only sees the # roi around the face. When the roi moves off-center our head is seen at an angle due to perspective # even though in world space we face straight forward in z-direction. By adding the angle between # the forward direction and the head translation vector, we can compensate for this effect. yaw_correction = math.atan2(tx, tz) * RAD2DEG pitch_correction = math.atan2(ty, tz) * RAD2DEG pose[0] += yaw_correction pose[1] += pitch_correction print (f"H {pose[0]:.1f} P {pose[1]:.1f} B {pose[2]:.1f}" + f"X {tx:.2f} Y {ty:.2f} Z {tz:.2f}") # Send the pose to opentrack a = np.zeros((6,), dtype=np.float64) # I guess it wants cm ... a[0] = tx * 100. a[1] = ty * 100. a[2] = tz * 100. # Degrees are fine. a[3] = pose[0] a[4] = pose[1] a[5] = pose[2] sock.sendto(a.tobytes(), (UDP_IP, UDP_PORT)) #print ("update period ", time.time()-time_val, "inference time", inference_time) #time_val = time.time() if vis: img_draw = cv_draw_landmark(frame_bgr, ver) # since we use padding viz_pose(img_draw, param_lst, [ver]) # The 2d bounding box cv2.rectangle(img_draw, (x0, y0), (x1, y1) ,(128,0,0),1) # Draw the projection of a 10cm radius circle. Sanity check for the # scaling and the translation estimation. cx = int((tx / (focal_length_inv_x*tz) + 1.)*0.5*w) cy = int((ty / (focal_length_inv_y*tz) + 1.)*0.5*h) r = int(0.1 * w / (focal_length_inv_x*tz)) cv2.circle(img_draw, (cx, cy), r, (0,0,255), 1) cv2.circle(img_draw, (cx, cy), 3, (0,0,255), -1) cv2.imshow('image', img_draw) cv2.waitKey(20)
def main(args): # input and output folder image_path = "G:\\BU-4DFE\\Extracted" save_path = "G:\\bu4dfe_3ddfa_proc" if not os.path.exists(save_path): os.makedirs(save_path) cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() img_list = sorted([str(x) for x in pathlib.Path(image_path).rglob("*.jpg")]) print("Reconstructing:\n") for file in img_list: out_file = save_path + file[len(image_path):-4] + ".pickle" if os.path.isfile(out_file): continue out_dir = str(pathlib.Path(out_file).parent) if not os.path.isdir(out_dir): os.makedirs(out_dir) # Given a still image path and load to BGR channel img = cv2.imread(file) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, skipping \"'+file+'\".') continue #print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = True ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) # repair all the bs because of the matrices' axis alignment... lm68 = np.reshape(np.reshape(ver_lst[0].T, (-1,1))[tddfa.bfm.keypoints], (-1,3)) for i in range(lm68.shape[0]): lm68[i,1] = img.shape[0] - lm68[i,1] for i in range(ver_lst[0].shape[1]): ver_lst[0][1,i] = img.shape[0] - ver_lst[0][1,i] vertices = ver_lst[0].T useful_tri = np.copy(tddfa.tri) for i in range(useful_tri.shape[0]): tmp = useful_tri[i,2] useful_tri[i,2] = useful_tri[i,0] useful_tri[i,0] = tmp #useful_tri = useful_tri + 1 # save mesh = dict() mesh["vertices"] = vertices mesh["faces"] = useful_tri mesh["lm68"] = lm68 with open(out_file, "wb+") as f: _pickle.dump(mesh, f) print(out_file)