def main_s(args): global tddfa global face_boxes # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') return #sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) # print(len(ver_lst),ver_lst[0].shape) if args.opt == '3d': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) return render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): # Init TDDFA cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) # Init FaceBoxes face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) print(f'Detect {n} faces') if n == 0: print(f'No face detected, exit') sys.exit(-1) param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) vis = args.vis UDP_IP = args.ip UDP_PORT = args.port fov = args.fov with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as sock, \ imageio.get_reader("<video0>") as reader: # Initialize FaceBoxes face_boxes = FaceBoxes() dense_flag = False pre_ver = None first = True for frame in reader: frame_bgr = frame[..., ::-1] # RGB->BGR #inference_time = time.time() if first: # the first frame, detect face, here we only use the first face, you can change depending on your need boxes = face_boxes(frame_bgr) if boxes: boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] # refine param_lst, roi_box_lst = tddfa(frame_bgr, [ver], crop_policy='landmark') ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] first = False else: param_lst, roi_box_lst = tddfa(frame_bgr, [pre_ver], crop_policy='landmark') roi_box = roi_box_lst[0] # todo: add confidence threshold to judge the tracking is failed if abs(roi_box[2] - roi_box[0]) * abs(roi_box[3] - roi_box[1]) < 2020: boxes = face_boxes(frame_bgr) if boxes: boxes = [boxes[0]] param_lst, roi_box_lst = tddfa(frame_bgr, boxes) else: first = True pre_ver = None if boxes: ver = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag)[0] #inference_time = time.time()-inference_time if boxes: pre_ver = ver # for tracking # Most of what follows here is an attempt to compute the translation t # of the head model in world space. The depth coordinate shall be denoted tz. h, w, _ = frame.shape f, R, t = P2sRt(param_lst[0][:12].reshape(3, -1)) P, pose = calc_pose(param_lst[0]) x0, y0, x1, y1 = map(int, roi_box_lst[0]) # Compute the translation vector in image space. # The prediction of t is defined in some arbitrary scaled way (from our perspective here.) # See tddfa_util.similar_transform(), which computes the orthographic # projection of 3d mesh positions to the image space of our roi_box. t_img_x = x0 + (t[0]-1) * (x1-x0) / tddfa.size t_img_y = y0 + (tddfa.size-t[1]) * (y1-y0) / tddfa.size # The following should give us the scaling transfrom from the # space of the (deformed) model to image pixels. scale_model_to_roi_x = f / tddfa.size * (x1-x0) # Probably the y "version" makes no sense ... Idk ... scale_model_to_roi_y = f / tddfa.size * (y1-y0) # In order to relate length in our roi box to the real world we can utilize # the size of the deformable model. Judging from magnitude of the coordinate values # in the model files (https://github.com/cleardusk/3DDFA/tree/master/, u_exp.npy and u_shp.npy), # they are defined in micrometers. Hence ... scale_model_to_meters = 1.e-6 # micrometers to meters. # Furthermore let's define the focal length of our webcam considered as pinhole camera. # (I think this is focal length, or rather one over it) focal_length_inv_x = math.tan(fov*math.pi/180.*0.5) focal_length_inv_y = math.tan(fov*h/w*math.pi/180.*0.5) # Considering the perspective transform, we know that tz/world_space_size = f/projected_size, # where projected_size ranges from -1 to 1 as it is defined in screen space. Or rearanging slightly. # tz = focal_length * projected_size/world_space_size. The quotient in the back is just a scaling # factor which we can compute with the quantities we have. I.e. scale_model_to_roi_x/w gives us # the model->screen scale trafo. The inverse screen->model. Multiply by model->meters, and we have # our desired meters->screen scaling. # Uhm ... except I have this twice now, one for x, one for y ... tz_roi_x = scale_model_to_meters * w / (scale_model_to_roi_x * focal_length_inv_x) tz_roi_y = scale_model_to_meters * h / (scale_model_to_roi_y * focal_length_inv_y) # Just average ... tz_roi = (tz_roi_x + tz_roi_y)*0.5 # I don't know how the length units of the predicted translation depth are defined. # This scaling factor does not seem to bad though. scale_model_z = scale_model_to_meters / f # So we just add the scaled predicted depth to the guestimated depth from the head size. tz = tz_roi + t[2]*scale_model_z # Using the perspective transform, calculating the world x and y translation is easy. tx = (t_img_x / w * 2. - 1.) * tz * focal_length_inv_x ty = (t_img_y / h * 2. - 1.) * tz * focal_length_inv_y # This final part corrects the yaw and pitch values. Why is simple. The network only sees the # roi around the face. When the roi moves off-center our head is seen at an angle due to perspective # even though in world space we face straight forward in z-direction. By adding the angle between # the forward direction and the head translation vector, we can compensate for this effect. yaw_correction = math.atan2(tx, tz) * RAD2DEG pitch_correction = math.atan2(ty, tz) * RAD2DEG pose[0] += yaw_correction pose[1] += pitch_correction print (f"H {pose[0]:.1f} P {pose[1]:.1f} B {pose[2]:.1f}" + f"X {tx:.2f} Y {ty:.2f} Z {tz:.2f}") # Send the pose to opentrack a = np.zeros((6,), dtype=np.float64) # I guess it wants cm ... a[0] = tx * 100. a[1] = ty * 100. a[2] = tz * 100. # Degrees are fine. a[3] = pose[0] a[4] = pose[1] a[5] = pose[2] sock.sendto(a.tobytes(), (UDP_IP, UDP_PORT)) #print ("update period ", time.time()-time_val, "inference time", inference_time) #time_val = time.time() if vis: img_draw = cv_draw_landmark(frame_bgr, ver) # since we use padding viz_pose(img_draw, param_lst, [ver]) # The 2d bounding box cv2.rectangle(img_draw, (x0, y0), (x1, y1) ,(128,0,0),1) # Draw the projection of a 10cm radius circle. Sanity check for the # scaling and the translation estimation. cx = int((tx / (focal_length_inv_x*tz) + 1.)*0.5*w) cy = int((ty / (focal_length_inv_y*tz) + 1.)*0.5*h) r = int(0.1 * w / (focal_length_inv_x*tz)) cv2.circle(img_draw, (cx, cy), r, (0,0,255), 1) cv2.circle(img_draw, (cx, cy), 3, (0,0,255), -1) cv2.imshow('image', img_draw) cv2.waitKey(20)
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print('No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' +\ new_suffix wfp = base_path / 'examples' / 'results' / wfp ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')
def main(args): cfg = yaml.load(open(args.config), Loader=yaml.SafeLoader) # Init FaceBoxes and TDDFA, recommend using onnx flag if args.onnx: import os os.environ['KMP_DUPLICATE_LIB_OK'] = 'True' os.environ['OMP_NUM_THREADS'] = '4' from FaceBoxes.FaceBoxes_ONNX import FaceBoxes_ONNX from TDDFA_ONNX import TDDFA_ONNX face_boxes = FaceBoxes_ONNX() tddfa = TDDFA_ONNX(**cfg) else: gpu_mode = args.mode == 'gpu' tddfa = TDDFA(gpu_mode=gpu_mode, **cfg) face_boxes = FaceBoxes() # Given a still image path and load to BGR channel img = cv2.imread(args.img_fp) # Detect faces, get 3DMM params and roi boxes boxes = face_boxes(img) n = len(boxes) if n == 0: print(f'No face detected, exit') sys.exit(-1) print(f'Detect {n} faces') param_lst, roi_box_lst = tddfa(img, boxes) # Visualization and serialization dense_flag = args.opt in ('2d_dense', '3d', 'depth', 'pncc', 'uv_tex', 'ply', 'obj') old_suffix = get_suffix(args.img_fp) new_suffix = f'.{args.opt}' if args.opt in ('ply', 'obj') else '.jpg' wfp = f'examples/results/{args.img_fp.split("/")[-1].replace(old_suffix, "")}_{args.opt}' + new_suffix ver_lst = tddfa.recon_vers(param_lst, roi_box_lst, dense_flag=dense_flag) print(ver_lst[0].shape) print(tddfa.bfm.u.shape) lm68 = np.reshape( np.reshape(ver_lst[0].T, (-1, 1))[tddfa.bfm.keypoints], (-1, 3)) print(lm68.shape) for i in range(lm68.shape[0]): lm68[i, 1] = img.shape[0] - lm68[i, 1] for i in range(ver_lst[0].shape[1]): ver_lst[0][1, i] = img.shape[0] - ver_lst[0][1, i] useful_tri = np.copy(tddfa.tri) for i in range(useful_tri.shape[0]): tmp = useful_tri[i, 2] useful_tri[i, 2] = useful_tri[i, 0] useful_tri[i, 0] = tmp useful_tri = useful_tri + 1 np.save("asd_lm.npy", lm68) np.save("asd_v.npy", ver_lst[0].T) np.save("asd_f.npy", useful_tri) if args.opt == '2d_sparse': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '2d_dense': draw_landmarks(img, ver_lst, show_flag=args.show_flag, dense_flag=dense_flag, wfp=wfp) elif args.opt == '3d': render(img, ver_lst, tddfa.tri, alpha=0.6, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'depth': # if `with_bf_flag` is False, the background is black depth(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'pncc': pncc(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp, with_bg_flag=True) elif args.opt == 'uv_tex': uv_tex(img, ver_lst, tddfa.tri, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'pose': viz_pose(img, param_lst, ver_lst, show_flag=args.show_flag, wfp=wfp) elif args.opt == 'ply': ser_to_ply(ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) elif args.opt == 'obj': ser_to_obj(img, ver_lst, tddfa.tri, height=img.shape[0], wfp=wfp) else: raise ValueError(f'Unknown opt {args.opt}')