def fit_3D_mesh(target_3d_mesh_fname, model_fname, weights, show_fitting=True): ''' Fit FLAME to 3D mesh in correspondence to the FLAME mesh (i.e. same number of vertices, same mesh topology) :param target_3d_mesh_fname: target 3D mesh filename :param model_fname: saved FLAME model :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' target_mesh = Mesh(filename=target_3d_mesh_fname) tf_trans = tf.Variable(np.zeros((1,3)), name="trans", dtype=tf.float64, trainable=True) tf_rot = tf.Variable(np.zeros((1,3)), name="pose", dtype=tf.float64, trainable=True) tf_pose = tf.Variable(np.zeros((1,12)), name="pose", dtype=tf.float64, trainable=True) tf_shape = tf.Variable(np.zeros((1,300)), name="shape", dtype=tf.float64, trainable=True) tf_exp = tf.Variable(np.zeros((1,100)), name="expression", dtype=tf.float64, trainable=True) smpl = SMPL(model_fname) tf_model = tf.squeeze(smpl(tf_trans, tf.concat((tf_shape, tf_exp), axis=-1), tf.concat((tf_rot, tf_pose), axis=-1))) with tf.Session() as session: session.run(tf.global_variables_initializer()) mesh_dist = tf.reduce_sum(tf.square(tf.subtract(tf_model, target_mesh.v))) neck_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,:3])) jaw_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,3:6])) eyeballs_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,6:])) shape_reg = tf.reduce_sum(tf.square(tf_shape)) exp_reg = tf.reduce_sum(tf.square(tf_exp)) # Optimize global transformation first vars = [tf_trans, tf_rot] loss = mesh_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='BFGS', options={'disp': 1}) print('Optimize rigid transformation') optimizer.minimize(session) # Optimize for the model parameters vars = [tf_trans, tf_rot, tf_pose, tf_shape, tf_exp] loss = weights['data'] * mesh_dist + weights['shape'] * shape_reg + weights['expr'] * exp_reg + \ weights['neck_pose'] * neck_pose_reg + weights['jaw_pose'] * jaw_pose_reg + weights['eyeballs_pose'] * eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='BFGS', options={'disp': 1}) print('Optimize model parameters') optimizer.minimize(session) print('Fitting done') if show_fitting: # Visualize fitting mv = MeshViewer() fitting_mesh = Mesh(session.run(tf_model), smpl.f) fitting_mesh.set_vertex_colors('light sky blue') mv.set_static_meshes([target_mesh, fitting_mesh]) six.moves.input('Press key to continue') return Mesh(session.run(tf_model), smpl.f)
def fit_lmk3d(target_3d_lmks, model_fname, lmk_face_idx, lmk_b_coords, weights, show_fitting=True): ''' Fit FLAME to 3D landmarks :param target_3d_lmks: target 3D landmarks provided as (num_lmks x 3) matrix :param model_fname: saved Tensorflow FLAME model :param lmk_face_idx: face indices of the landmark embedding in the FLAME topology :param lmk_b_coords: barycentric coordinates of the landmark embedding in the FLAME topology (i.e. weighting of the three vertices for the trinagle, the landmark is embedded in :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' tf_trans = tf.Variable(np.zeros((1,3)), name="trans", dtype=tf.float64, trainable=True) tf_rot = tf.Variable(np.zeros((1,3)), name="pose", dtype=tf.float64, trainable=True) tf_pose = tf.Variable(np.zeros((1,12)), name="pose", dtype=tf.float64, trainable=True) tf_shape = tf.Variable(np.zeros((1,300)), name="shape", dtype=tf.float64, trainable=True) tf_exp = tf.Variable(np.zeros((1,100)), name="expression", dtype=tf.float64, trainable=True) smpl = SMPL(model_fname) tf_model = tf.squeeze(smpl(tf_trans, tf.concat((tf_shape, tf_exp), axis=-1), tf.concat((tf_rot, tf_pose), axis=-1))) with tf.Session() as session: session.run(tf.global_variables_initializer()) lmks = tf_get_model_lmks(tf_model, smpl.f, lmk_face_idx, lmk_b_coords) lmk_dist = tf.reduce_sum(tf.square(1000 * tf.subtract(lmks, target_3d_lmks))) neck_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,:3])) jaw_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,3:6])) eyeballs_pose_reg = tf.reduce_sum(tf.square(tf_pose[:,6:])) shape_reg = tf.reduce_sum(tf.square(tf_shape)) exp_reg = tf.reduce_sum(tf.square(tf_exp)) # Optimize global transformation first vars = [tf_trans, tf_rot] loss = weights['lmk'] * lmk_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 1, 'ftol': 5e-6}) print('Optimize rigid transformation') optimizer.minimize(session) # Optimize for the model parameters vars = [tf_trans, tf_rot, tf_pose, tf_shape, tf_exp] loss = weights['lmk'] * lmk_dist + weights['shape'] * shape_reg + weights['expr'] * exp_reg + \ weights['neck_pose'] * neck_pose_reg + weights['jaw_pose'] * jaw_pose_reg + weights['eyeballs_pose'] * eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 1, 'ftol': 5e-6}) print('Optimize model parameters') optimizer.minimize(session) print('Fitting done') if show_fitting: # Visualize landmark fitting mv = MeshViewer() mv.set_static_meshes(create_lmk_spheres(target_3d_lmks, 0.001, [255.0, 0.0, 0.0])) mv.set_dynamic_meshes([Mesh(session.run(tf_model), smpl.f)] + create_lmk_spheres(session.run(lmks), 0.001, [0.0, 0.0, 255.0]), blocking=True) six.moves.input('Press key to continue') return Mesh(session.run(tf_model), smpl.f)
def fit(img, j2ds, prior_path, model_dir, gen='n', camProject=None): smpl_joints_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20, 24] torso_ids = [2, 3, 8, 9] cids = range(12) + [13] flength = 5000 center = np.array([img.shape[1]/2, img.shape[0]/2]) sess = tf.Session() # Load model model = loadSMPL(model_dir, gender=gen) # Conf * base_weights(LSP data) base_weights = tf.reshape(tf.constant([1.,1.,0.5,0.5,1.,1.,1.,1.,1.,1.,1.,1.,1.], dtype=tf.float32), [-1, 1]) # Load prior initial_param, pose_mean, pose_covariance = load_init_para(prior_path) pose_mean = tf.constant(pose_mean, dtype=tf.float32) pose_covariance = tf.constant(pose_covariance, dtype=tf.float32) param_shape = tf.Variable(initial_param[:10].reshape([1, -1]), dtype=tf.float32) param_rot = tf.Variable(initial_param[10:13].reshape([1, -1]), dtype=tf.float32) param_pose = tf.Variable(initial_param[13:82].reshape([1, -1]), dtype=tf.float32) param_trans = tf.Variable(initial_param[-3:].reshape([1, -1]), dtype=tf.float32) params = tf.concat([param_shape, param_rot, param_pose, param_trans], axis=1) # Get 3D joints j3ds, v = model.get_3d_joints(params, smpl_joints_ids) diff3d = tf.concat([[j3ds[9] - j3ds[3]], [j3ds[8] - j3ds[2]]], axis=0) m3h = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(diff3d), axis=1))) diff2d = tf.constant(np.array([j2ds[9] - j2ds[3], j2ds[8] - j2ds[2]]), dtype=tf.float32) m2h = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(diff2d), axis=1))) est_d = flength * (m3h / m2h) init_t = tf.concat([[0.], [0.], [est_d]], axis=0) # init_t = tf.reshape([[0.], [0.], [est_d]], [1, -1]) rt = np.zeros(3) sess.run(tf.global_variables_initializer()) init_t = init_t.eval(session=sess) camProject = Camera(flength, flength, center[0], center[1], init_t, rt) camProject.trans = tf.Variable(camProject.trans, dtype=tf.float32) sess.run(tf.global_variables_initializer()) j2ds_model = tf.convert_to_tensor(camProject.project(j3ds)) # VIS = True VIS = False def lc(j2ds_model): for i in range(0, 1): import copy tmp = copy.copy(img) for j2d in j2ds: x = int( j2d[1] ) y = int( j2d[0] ) if x > img.shape[0] or x > img.shape[1]: continue tmp[x:x+5, y:y+5, :] = np.array([0, 255, 0]) for j2d in j2ds_model: x = int( j2d[1] ) y = int( j2d[0] ) if x > img.shape[0] or x > img.shape[1]: continue tmp[x:x+5, y:y+5, :] = np.array([255, 0, 0]) plt.cla() plt.imshow(tmp) plt.pause(0.1) plt.show() if VIS: ls = lc else: ls = None objs = {} for idx, j in enumerate(torso_ids): objs['j2d_%d' % idx] = tf.reduce_sum(tf.square(j2ds_model[j] - j2ds[j])) objs['camt'] = tf.reduce_sum(tf.square(camProject.trans[2] - init_t[2])) loss = tf.reduce_mean(objs.values()) optimizer = scipy_pt(loss=loss, var_list=[param_rot, camProject.trans], options={'ftol':0.001, 'maxiter':50, 'disp':False}, method='L-BFGS-B') optimizer.minimize(sess, fetches = [j2ds_model], loss_callback=ls) # Non Rigid objs = {} pose_diff = tf.reshape(param_pose - pose_mean, [1, -1]) objs['J2D_Loss'] = tf.reduce_sum(tf.square(j2ds_model - j2ds[cids]) * base_weights) objs['Prior_Loss'] = 5 * tf.squeeze(tf.matmul(tf.matmul(pose_diff, pose_covariance), tf.transpose(pose_diff))) objs['Prior_Shape'] = 5 * tf.squeeze(tf.reduce_sum(tf.square(param_shape))) loss = tf.reduce_mean(objs.values()) optimizer = scipy_pt(loss=loss, var_list=[param_rot, param_trans, param_pose, param_shape], options={'ftol':.001, 'maxiter':100, 'disp':False}, method='L-BFGS-B') optimizer.minimize(sess, fetches = [j2ds_model], loss_callback=ls) plt.close('all') verts = sess.run(v) faces = sess.run(model.f).astype(int) pose, betas, trans = sess.run([tf.concat([param_rot, param_pose], axis=1), param_shape, param_trans]) verts = verts - trans model_params = {'trans': trans, 'pose': pose, 'shape': betas} t = sess.run(camProject.trans) camRender = {'f': np.array([flength, flength]), 'c': center, 't': np.array(t), 'rt': rt} sess.close() del sess return model_params, verts, faces, camRender
sess.run(tf.global_variables_initializer()) objs = {} #objs['J2D_Loss'] = tf.reduce_sum( tf.square(j2ds_est - j2ds) ) j3d_gt = np.random.rand(11, 3) objs['J3D_Loss'] = tf.reduce_sum(tf.square(res_batch[0] - j3d_gt)) loss = tf.reduce_mean(objs.values()) #optimizer.minimize(sess, fetches = [j3ds, params, objs, j2ds_est], loss_callback=lc) from tensorflow.contrib.opt import ScipyOptimizerInterface as scipy_pt optimizer = scipy_pt( loss=loss, var_list=[param_rot, param_shape, param_pose, param_trans], options={ 'ftol': 0.001, 'maxiter': 500, 'disp': True }, method='L-BFGS-B') def lc(loss, res): return m = Mesh(v=res) m2 = Mesh(v=j3d_gt) m.show() m2.show() import time time.sleep(3) optimizer.minimize(sess,
def fit_3D_mesh(target_3d_mesh_fname, template_fname, tf_model_fname, weights, show_fitting=True): ''' Fit FLAME to 3D mesh in correspondence to the FLAME mesh (i.e. same number of vertices, same mesh topology) :param target_3d_mesh_fname: target 3D mesh filename :param template_fname: template mesh in FLAME topology (only the face information are used) :param tf_model_fname: saved Tensorflow FLAME model :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' target_mesh = Mesh(filename=target_3d_mesh_fname) template_mesh = Mesh(filename=template_fname) if target_mesh.v.shape[0] != template_mesh.v.shape[0]: print('Target mesh does not have the same number of vertices') return saver = tf.train.import_meta_graph(tf_model_fname + '.meta') graph = tf.get_default_graph() tf_model = graph.get_tensor_by_name(u'vertices:0') with tf.Session() as session: saver.restore(session, tf_model_fname) # Workaround as existing tf.Variable cannot be retrieved back with tf.get_variable # tf_v_template = [x for x in tf.trainable_variables() if 'v_template' in x.name][0] tf_trans = [x for x in tf.trainable_variables() if 'trans' in x.name][0] tf_rot = [x for x in tf.trainable_variables() if 'rot' in x.name][0] tf_pose = [x for x in tf.trainable_variables() if 'pose' in x.name][0] tf_shape = [x for x in tf.trainable_variables() if 'shape' in x.name][0] tf_exp = [x for x in tf.trainable_variables() if 'exp' in x.name][0] mesh_dist = tf.reduce_sum(tf.square(tf.subtract(tf_model, target_mesh.v))) neck_pose_reg = tf.reduce_sum(tf.square(tf_pose[:3])) jaw_pose_reg = tf.reduce_sum(tf.square(tf_pose[3:6])) eyeballs_pose_reg = tf.reduce_sum(tf.square(tf_pose[6:])) shape_reg = tf.reduce_sum(tf.square(tf_shape)) exp_reg = tf.reduce_sum(tf.square(tf_exp)) # Optimize global transformation first vars = [tf_trans, tf_rot] loss = mesh_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 1}) print('Optimize rigid transformation') optimizer.minimize(session) # Optimize for the model parameters vars = [tf_trans, tf_rot, tf_pose, tf_shape, tf_exp] loss = mesh_dist + weights['shape'] * shape_reg + weights['expr'] * exp_reg + \ weights['neck_pose'] * neck_pose_reg + weights['jaw_pose'] * jaw_pose_reg + weights['eyeballs_pose'] * eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 1}) print('Optimize model parameters') optimizer.minimize(session) print('Fitting done') if show_fitting: # Visualize fitting mv = MeshViewer() fitting_mesh = Mesh(session.run(tf_model), template_mesh.f) fitting_mesh.set_vertex_colors('light sky blue') mv.set_static_meshes([target_mesh, fitting_mesh]) raw_input('Press key to continue') return Mesh(session.run(tf_model), template_mesh.f)
def __init__( self, landmark_dir, neutral_mesh_faces, dd, lmk_face_idx, lmk_b_coords, ): os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" tf.compat.v1.reset_default_graph() tf.keras.backend.clear_session() self.graph1 = tf.Graph() with self.graph1.as_default(): weights = { "lmk": 1.0, "shape": 1e-3, "expr": 1e-3, "neck_pose": 100.0, "jaw_pose": 1e-3, "eyeballs_pose": 10.0, } self.template_mesh = tf.constant(neutral_mesh_faces) self.target_2d_lmks_x = tf.Variable(np.zeros((51, 1))) self.target_2d_lmks_y = tf.Variable(np.zeros((51, 1))) self.target_2d_lmks = tf.concat( [self.target_2d_lmks_x, 1024 - self.target_2d_lmks_y], axis=1 ) self.tf_trans = tf.Variable( np.zeros((1, 3)), name="trans", dtype=tf.float64, trainable=True ) self.tf_rot = tf.Variable( np.zeros((1, 3)), name="rot", dtype=tf.float64, trainable=True ) self.tf_pose = tf.Variable( np.zeros((1, 12)), name="pose", dtype=tf.float64, trainable=True ) self.tf_shape = tf.Variable( np.zeros((1, 300)), name="shape", dtype=tf.float64, trainable=True ) self.tf_exp = tf.Variable( np.zeros((1, 100)), name="expression", dtype=tf.float64, trainable=True ) # tf_scale = tf.Variable(0, dtype=tf.float64) smpl = SMPL(dd) self.tf_model = tf.squeeze( smpl( self.tf_trans, tf.concat((self.tf_shape, self.tf_exp), axis=-1), tf.concat((self.tf_rot, self.tf_pose), axis=-1), ) ) lmks_3d = self.tf_get_model_lmks( self.tf_model, self.template_mesh, lmk_face_idx, lmk_b_coords ) self.s2d = tf.reduce_mean( tf.linalg.norm( self.target_2d_lmks - tf.reduce_mean(self.target_2d_lmks, axis=0), axis=1, ) ) self.s3d = tf.reduce_mean( tf.sqrt( tf.reduce_sum( tf.square(lmks_3d - tf.reduce_mean(lmks_3d, axis=0))[:, :2], axis=1, ) ) ) self.tf_scale = tf.Variable(self.s2d / self.s3d, dtype=lmks_3d.dtype) self.lmks_proj_2d = self.tf_project_points( lmks_3d, self.tf_scale, np.zeros(2) ) factor = tf.math.maximum( tf.math.reduce_max(self.target_2d_lmks[:, 0]) - tf.math.reduce_min(self.target_2d_lmks[:, 0]), tf.math.reduce_max(self.target_2d_lmks[:, 1]) - tf.math.reduce_min(self.target_2d_lmks[:, 1]), ) self.lmk_dist = ( weights["lmk"] * tf.reduce_sum( tf.square(tf.subtract(self.lmks_proj_2d, self.target_2d_lmks)) ) / (factor ** 2) ) self.neck_pose_reg = weights["neck_pose"] * tf.reduce_sum( tf.square(self.tf_pose[:3]) ) self.jaw_pose_reg = weights["jaw_pose"] * tf.reduce_sum( tf.square(self.tf_pose[3:6]) ) self.eyeballs_pose_reg = weights["eyeballs_pose"] * tf.reduce_sum( tf.square(self.tf_pose[6:]) ) self.shape_reg = weights["shape"] * tf.reduce_sum(tf.square(self.tf_shape)) self.exp_reg = weights["expr"] * tf.reduce_sum(tf.square(self.tf_exp)) self.optimizer1 = scipy_pt( loss=self.lmk_dist, var_list=[self.tf_scale, self.tf_trans, self.tf_rot], method="L-BFGS-B", options={"disp": 0, "ftol": 5e-6}, ) loss = ( self.lmk_dist + self.shape_reg + self.exp_reg + self.neck_pose_reg + self.jaw_pose_reg + self.eyeballs_pose_reg ) self.optimizer2 = scipy_pt( loss=loss, var_list=[ self.tf_scale, self.tf_trans[:2], self.tf_rot, self.tf_pose, self.tf_shape, self.tf_exp, ], method="L-BFGS-B", options={"disp": 0, "ftol": 1e-7}, )
def refine_optimization(poses, betas, trans, data_dict, hmr_dict, LR_cameras, texture_img, texture_vt, Util): frame_num = len(poses) start_time = time.time() start_time_total = time.time() j3dss = Util.load_pose_pkl() j2ds = data_dict["j2ds"][:frame_num,:,:] confs = data_dict["confs"][:frame_num,:] j2ds_face = data_dict["j2ds_face"][:frame_num,:,:] confs_face = data_dict["confs_face"][:frame_num,:] j2ds_head = data_dict["j2ds_head"][:frame_num,:,:] confs_head = data_dict["confs_head"][:frame_num,:] j2ds_foot = data_dict["j2ds_foot"][:frame_num,:,:] confs_foot = data_dict["confs_foot"][:frame_num,:] imgs = data_dict["imgs"][:frame_num] Util.img_width = imgs[0].shape[1] Util.img_height = imgs[0].shape[0] Util.img_widthheight = int("1" + "%04d" % Util.img_width + "%04d" % Util.img_height) hmr_thetas = hmr_dict["hmr_thetas"][:frame_num,:] hmr_betas = hmr_dict["hmr_betas"][:frame_num,:] hmr_trans = hmr_dict["hmr_trans"][:frame_num,:] hmr_cams = hmr_dict["hmr_cams"][:frame_num,:] hmr_joint3ds = hmr_dict["hmr_joint3ds"][:frame_num,:,:] smpl_model = SMPL(Util.SMPL_COCO_PATH, Util.SMPL_NORMAL_PATH) initial_param, pose_mean, pose_covariance = Util.load_initial_param() param_shapes = tf.Variable(betas.reshape([-1, 10])[:frame_num,:], dtype=tf.float32) param_rots = tf.Variable(poses[:frame_num, :3].reshape([-1, 3])[:frame_num,:], dtype=tf.float32) param_poses = tf.Variable(poses[:frame_num, 3:72].reshape([-1, 69])[:frame_num,:], dtype=tf.float32) param_trans = tf.Variable(trans[:frame_num,:].reshape([-1, 3])[:frame_num,:], dtype=tf.float32) initial_param_tf = tf.concat([param_shapes, param_rots, param_poses, param_trans], axis=1) ## N * (72+10+3) cam = Perspective_Camera(LR_cameras[0][0], LR_cameras[0][0], LR_cameras[0][1], LR_cameras[0][2], np.zeros(3), np.zeros(3)) j3ds, v, j3dsplus = smpl_model.get_3d_joints(initial_param_tf, Util.SMPL_JOINT_IDS) #### divide into different body parts j3ds_body = j3ds[:, 2:, :] j3ds_head = j3ds[:, 14:16, :] j3ds_foot = j3ds[:, :2, :] j3ds_face = j3dsplus[:, 14:19, :] j3ds_body = tf.reshape(j3ds_body, [-1, 3]) ## (N*12) * 3 j3ds_head = tf.reshape(j3ds_head, [-1, 3]) ## (N*2) * 3 j3ds_foot = tf.reshape(j3ds_foot, [-1, 3]) ## (N*2) * 3 j3ds_face = tf.reshape(j3ds_face, [-1, 3]) ## (N*5) * 3 j2ds_body_est = cam.project(tf.squeeze(j3ds_body)) ## (N*14) * 2 j2ds_head_est = cam.project(tf.squeeze(j3ds_head)) ## (N*2) * 2 j2ds_foot_est = cam.project(tf.squeeze(j3ds_foot)) ## (N*2) * 2 j2ds_face_est = cam.project(tf.squeeze(j3ds_face)) ## (N*5) * 2 v = tf.reshape(v, [-1, 3]) ## (N*6890) * 3 verts_est_mask = cam.project(tf.squeeze(v)) ## (N*6890) * 2 verts_est = cam.project(tf.squeeze(v)) ## (N*6890) * 2 # TODO convert the loss function into batch input objs = {} j2ds = j2ds.reshape([-1, 2]) ## (N*14) * 2 confs = confs.reshape(-1) ## N*14 base_weights = np.array( [1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]) base_weights = np.tile(base_weights, frame_num) ## N*14 weights = confs * base_weights ## N*14 weights = tf.constant(weights, dtype=tf.float32) ## N*14 objs['J2D_Loss'] = Util.J2D_refine_Loss * tf.reduce_sum(weights * tf.reduce_sum(tf.square(j2ds_body_est - j2ds), 1)) j2ds_face = j2ds_face.reshape([-1, 2]) ## (N*5) * 2 confs_face = confs_face.reshape(-1) ## N*5 base_weights_face = np.array( [1.0, 1.0, 1.0, 1.0, 1.0]) base_weights_face = np.tile(base_weights_face, frame_num) ## N*5 weights_face = confs_face * base_weights_face weights_face = tf.constant(weights_face, dtype=tf.float32) objs['J2D_face_Loss'] = Util.J2D_face_refine_Loss * tf.reduce_sum( weights_face * tf.reduce_sum(tf.square(j2ds_face_est - j2ds_face), 1)) j2ds_head = j2ds_head.reshape([-1, 2]) ## (N*2) * 2 confs_head = confs_head.reshape(-1) ## N*2 base_weights_head = np.array( [1.0, 1.0]) base_weights_head = np.tile(base_weights_head, frame_num) ## N*2 weights_head = confs_head * base_weights_head weights_head = tf.constant(weights_head, dtype=tf.float32) objs['J2D_head_Loss'] = Util.J2D_head_refine_Loss * tf.reduce_sum( weights_head * tf.reduce_sum(tf.square(j2ds_head - j2ds_head_est), 1)) j2ds_foot = j2ds_foot.reshape([-1, 2]) ## (N*2) * 2 confs_foot = confs_foot.reshape(-1) ## N*2 base_weights_foot = np.array( [1.0, 1.0]) base_weights_foot = np.tile(base_weights_foot, frame_num) ## N*2 weights_foot = confs_foot * base_weights_foot ## N*2 weights_foot = tf.constant(weights_foot, dtype=tf.float32) objs['J2D_foot_Loss'] = Util.J2D_foot_refine_Loss * tf.reduce_sum( weights_foot * tf.reduce_sum(tf.square(j2ds_foot - j2ds_foot_est), 1)) # TODO try L1, L2 or other penalty function objs['Prior_Loss'] = Util.Prior_Loss_refine * tf.reduce_sum(tf.square(param_poses - poses[:frame_num, 3:72])) objs['Prior_Shape'] = 5.0 * tf.reduce_sum(tf.square(param_shapes)) w1 = np.array([1.04 * 2.0, 1.04 * 2.0, 5.4 * 2.0, 5.4 * 2.0]) w1 = tf.constant(w1, dtype=tf.float32) # objs["angle_elbow_knee"] = 0.008 * tf.reduce_sum(w1 * [ # tf.exp(param_poses[:, 52]), tf.exp(-param_poses[:, 55]), # tf.exp(-param_poses[:, 9]), tf.exp(-param_poses[:, 12])]) objs["angle_elbow_knee"] = 0.005 * tf.reduce_sum(w1[0] * tf.exp(param_poses[:, 52]) + w1[1] * tf.exp(-param_poses[:, 55]) + w1[2] * tf.exp(-param_poses[:, 9]) + w1[3] * tf.exp(-param_poses[:, 12])) param_pose_full = tf.concat([param_rots, param_poses], axis=1) ## N * 72 objs['hmr_constraint'] = Util.hmr_constraint_refine * tf.reduce_sum( tf.square(tf.squeeze(param_pose_full) - hmr_thetas)) w_temporal = [0.5, 0.5, 1.0, 1.5, 2.5, 2.5, 1.5, 1.0, 1.0, 1.5, 2.5, 2.5, 1.5, 1.0, 7.0, 7.0] for i in range(frame_num - 1): j3d_old = j3ds[i, :, :] j3d = j3ds[i + 1, :, :] j3d_old_tmp = tf.reshape(j3d_old, [-1, 3]) ## (N*16) * 3 j2d_old = cam.project(tf.squeeze(j3d_old_tmp)) ## (N*16) * 2 j3d_tmp = tf.reshape(j3d, [-1, 3]) ## (N*16) * 3 j2d = cam.project(tf.squeeze(j3d_tmp)) ## (N*16) * 2 param_pose_old = param_poses[i, :] param_pose = param_poses[i + 1, :] if i == 0: objs['temporal3d'] = Util.temporal3d_refine * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j3d - j3d_old), 1)) objs['temporal2d'] = Util.temporal2d_refine * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j2d - j2d_old), 1)) objs['temporal_pose'] = Util.temporal_pose_refine * tf.reduce_sum( tf.square(param_pose_old - param_pose)) else: objs['temporal3d'] = objs['temporal3d'] + Util.temporal3d_refine * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j3d - j3d_old), 1)) objs['temporal2d'] = objs['temporal2d'] + Util.temporal2d_refine * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j2d - j2d_old), 1)) objs['temporal_pose'] = objs['temporal_pose'] + Util.temporal_pose_refine * tf.reduce_sum( tf.square(param_pose_old - param_pose)) # TODO add optical flow constraint # body_idx = np.array(body_parsing_idx[0]).squeeze() # body_idx = body_idx.reshape([-1, 1]).astype(np.int64) # verts_est_body = tf.gather_nd(verts_est, body_idx) # optical_ratio = 0.0 # objs['dense_optflow'] = util.params["LR_parameters"]["dense_optflow"] * tf.reduce_sum(tf.square( # verts_est_body - verts_body_old)) # optimization process loss = tf.reduce_sum(objs.values()) duration = time.time() - start_time print("pre-processing time is %f" % duration) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) optimizer = scipy_pt(loss=loss, var_list=[param_shapes, param_rots, param_trans, param_poses, cam.cx, cam.cy], options={'eps': 1e-6, 'ftol': 1e-6, 'maxiter': 10000, 'disp': False}) print(">>>>>>>>>>>>>start to optimize<<<<<<<<<<<") start_time = time.time() optimizer.minimize(sess) duration = time.time() - start_time print("minimize is %f" % duration) start_time = time.time() poses_final, betas_final, trans_final, cam_cx, cam_cy, v_final, verts_est_final, j3ds_final, _objs = sess.run( [tf.concat([param_rots, param_poses], axis=1), param_shapes, param_trans, cam.cx, cam.cy, v, verts_est, j3ds, objs]) v_final = v_final.reshape([frame_num, 6890, 3]) duration = time.time() - start_time print("run time is %f" % duration) start_time = time.time() cam_for_save = np.array([LR_cameras[0][0], cam_cx, cam_cy, np.zeros(3)]) ### no sense LR_cameras = [] for i in range(frame_num): LR_cameras.append(cam_for_save) ############# camera = render.camera(cam_for_save[0], cam_for_save[1], cam_for_save[2], cam_for_save[3], Util.img_widthheight) output_path = Util.hmr_path + Util.refine_output_path if not os.path.exists(output_path): os.makedirs(output_path) if not os.path.exists(output_path + "output_mask"): os.makedirs(output_path + "output_mask") videowriter = [] for ind in range(frame_num): print(">>>>>>>>>>>>>>>>>>>>>>%d index frame<<<<<<<<<<<<<<<<<<<<<<" % ind) if Util.mode == "full": smpl = smpl_np.SMPLModel('./smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl') template = np.load(Util.texture_path + "template.npy") smpl.set_template(template) v = smpl.get_verts(poses_final[ind, :], betas_final[ind, :], trans_final[ind, :]) texture_vt = np.load(Util.texture_path + "vt.npy") texture_img = cv2.imread(Util.texture_path + "../../output_nonrigid/texture.png") img_result_texture = camera.render_texture(v, texture_img, texture_vt) cv2.imwrite(output_path + "/hmr_optimization_texture_%04d.png" % ind, img_result_texture) img_bg = cv2.resize(imgs[ind], (Util.img_width, Util.img_height)) img_result_texture_bg = camera.render_texture_imgbg(img_result_texture, img_bg) cv2.imwrite(output_path + "/texture_bg_%04d.png" % ind, img_result_texture_bg) if Util.video is True: if ind == 0: fps = 15 size = (imgs[0].shape[1], imgs[0].shape[0]) video_path = output_path + "/texture.mp4" videowriter = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, size) videowriter.write(img_result_texture) img_result_naked = camera.render_naked(v, imgs[ind]) img_result_naked = img_result_naked[:, :, :3] cv2.imwrite(output_path + "/hmr_optimization_%04d.png" % ind, img_result_naked) bg = np.ones_like(imgs[ind]).astype(np.uint8) * 255 img_result_naked1 = camera.render_naked(v, bg) cv2.imwrite(output_path + "/hmr_optimization_naked_%04d.png" % ind, img_result_naked1) img_result_naked_rotation = camera.render_naked_rotation(v, 90, imgs[ind]) cv2.imwrite(output_path + "/hmr_optimization_rotation_%04d.png" % ind, img_result_naked_rotation) res = {'pose': poses_final[ind, :], 'betas': betas_final[ind, :], 'trans': trans_final[ind, :], 'cam_HR': cam_for_save, 'j3ds': j3ds_final[ind, :]} with open(output_path + "/hmr_optimization_pose_%04d.pkl" % ind, 'wb') as fout: pkl.dump(res, fout) # for z in range(len(verts_est_final)): # if int(verts_est_final[z][0]) > masks[ind].shape[0] - 1: # verts_est_final[z][0] = masks[ind].shape[0] - 1 # if int(verts_est_final[z][1]) > masks[ind].shape[1] - 1: # verts_est_final[z][1] = masks[ind].shape[1] - 1 # (masks[ind])[int(verts_est_final[z][0]), int(verts_est_final[z][1])] = 127 # cv2.imwrite(Util.hmr_path + "output_mask/%04d.png" % ind, masks[ind]) if Util.mode == "pose": img_result_naked = camera.render_naked(v_final[ind, :, :], imgs[ind]) img_result_naked = img_result_naked[:, :, :3] cv2.imwrite(output_path + "/hmr_optimization_%04d.png" % ind, img_result_naked) if Util.video is True: if ind == 0: fps = 15 size = (imgs[0].shape[1], imgs[0].shape[0]) video_path = output_path + "/texture.mp4" videowriter = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, size) videowriter.write(img_result_naked) bg = np.ones_like(imgs[ind]).astype(np.uint8) * 255 img_result_naked1 = camera.render_naked(v_final[ind, :, :], bg) cv2.imwrite(output_path + "/hmr_optimization_naked_%04d.png" % ind, img_result_naked1) img_result_naked_rotation = camera.render_naked_rotation(v_final[ind, :, :], 90, imgs[ind]) cv2.imwrite(output_path + "/hmr_optimization_rotation_%04d.png" % ind, img_result_naked_rotation) res = {'pose': poses_final[ind, :], 'betas': betas_final[ind, :], 'trans': trans_final[ind, :], 'cam': cam_for_save, 'j3ds': j3ds_final[ind, :]} with open(output_path + "/hmr_optimization_pose_%04d.pkl" % ind, 'wb') as fout: pkl.dump(res, fout) # for z in range(len(verts_est_final)): # if int(verts_est_final[z][0]) > masks[ind].shape[0] - 1: # verts_est_final[z][0] = masks[ind].shape[0] - 1 # if int(verts_est_final[z][1]) > masks[ind].shape[1] - 1: # verts_est_final[z][1] = masks[ind].shape[1] - 1 # (masks[ind])[int(verts_est_final[z][0]), int(verts_est_final[z][1])] = 127 # cv2.imwrite(Util.hmr_path + "output_mask/%04d.png" % ind, masks[ind]) for name in _objs: print("the %s loss is %f" % (name, _objs[name])) duration = time.time() - start_time print("post-processing time is %f" % duration) duration = time.time() - start_time_total print("total time is %f" % duration)
def wh(img_path): print img_path imgs, j2ds, segs, cams = util.load_data(img_path, util.NUM_VIEW) #j2ds = tf.constant(j2ds, dtype=tf.float32) initial_param, pose_mean, pose_covariance = util.load_initial_param() pose_mean = tf.constant(pose_mean, dtype=tf.float32) pose_covariance = tf.constant(pose_covariance, dtype=tf.float32) param_shape = tf.Variable(initial_param[:10].reshape([1, -1]), dtype=tf.float32) param_rot = tf.Variable(initial_param[10:13].reshape([1, -1]), dtype=tf.float32) param_pose = tf.Variable(initial_param[13:82].reshape([1, -1]), dtype=tf.float32) param_trans = tf.Variable(initial_param[-3:].reshape([1, -1]), dtype=tf.float32) param = tf.concat([param_shape, param_rot, param_pose, param_trans], axis=1) smpl_model = SMPL(util.SMPL_PATH) j3ds, v = smpl_model.get_3d_joints(param, util.SMPL_JOINT_IDS) j3ds = tf.reshape(j3ds, [-1, 3]) j2ds_est = [] for idx in range(0, util.NUM_VIEW): tmp = cams[idx].project(tf.squeeze(j3ds)) j2ds_est.append(tmp) j2ds_est = tf.convert_to_tensor(j2ds_est) #j2ds_est = tf.concat(j2ds_est, axis=0) def lc(j2d_est): _, ax = plt.subplots(1, 3) for idx in range(0, util.NUM_VIEW): import copy tmp = copy.copy(imgs[idx]) for j2d in j2ds[idx]: x = int(j2d[1]) y = int(j2d[0]) if x > imgs[0].shape[0] or x > imgs[0].shape[1]: continue tmp[x:x + 5, y:y + 5, :] = np.array([0, 0, 255]) for j2d in j2d_est[idx]: x = int(j2d[1]) y = int(j2d[0]) if x > imgs[0].shape[0] or x > imgs[0].shape[1]: continue tmp[x:x + 5, y:y + 5, :] = np.array([255, 0, 0]) ax[idx].imshow(tmp) plt.show() if util.VIS_OR_NOT: func_lc = lc else: func_lc = None objs = {} for idx in range(0, util.NUM_VIEW): for j, jdx in enumerate(util.TORSO_IDS): objs['J2D_%d_%d' % (idx, j)] = tf.reduce_sum( tf.square(j2ds_est[idx][jdx] - j2ds[idx][jdx])) loss = tf.reduce_mean(objs.values()) sess = tf.Session() sess.run(tf.global_variables_initializer()) optimizer = scipy_pt(loss=loss, var_list=[param_rot, param_trans], options={ 'ftol': 0.001, 'maxiter': 500, 'disp': True }, method='L-BFGS-B') optimizer.minimize(sess, fetches=[j2ds_est], loss_callback=func_lc) objs = {} pose_diff = tf.reshape(param_pose - pose_mean, [1, -1]) objs['J2D_Loss'] = tf.reduce_sum(tf.square(j2ds_est - j2ds)) objs['Prior_Loss'] = 5 * tf.squeeze( tf.matmul(tf.matmul(pose_diff, pose_covariance), tf.transpose(pose_diff))) objs['Prior_Shape'] = 5 * tf.squeeze(tf.reduce_sum(tf.square(param_shape))) loss = tf.reduce_mean(objs.values()) optimizer = scipy_pt( loss=loss, var_list=[param_rot, param_trans, param_pose, param_shape], options={ 'ftol': 0.001, 'maxiter': 500, 'disp': True }, method='L-BFGS-B') optimizer.minimize(sess, fetches=[j2ds_est], loss_callback=func_lc) v_final = sess.run(v) model_f = sess.run(smpl_model.f) model_f = model_f.astype(int).tolist() pose_final, betas_final, trans_final = sess.run( [tf.concat([param_rot, param_pose], axis=1), param_shape, param_trans]) from psbody.meshlite import Mesh m = Mesh(v=np.squeeze(v_final), f=model_f) out_ply_path = img_path.replace('Image', 'Res_1') extension = os.path.splitext(out_ply_path)[1] out_ply_path = out_ply_path.replace(extension, '.ply') m.write_ply(out_ply_path) res = {'pose': pose_final, 'betas': betas_final, 'trans': trans_final} out_pkl_path = out_ply_path.replace('.ply', '.pkl') with open(out_pkl_path, 'wb') as fout: pkl.dump(res, fout)
def fit_lmk2d(target_img, target_2d_lmks, template_fname, model_fname, lmk_face_idx, lmk_b_coords, weights): ''' Fit FLAME to 2D landmarks :param target_2d_lmks: target 2D landmarks provided as (num_lmks x 3) matrix :param template_fname: template mesh in FLAME topology (only the face information are used) :param model_fname: saved FLAME model :param lmk_face_idx: face indices of the landmark embedding in the FLAME topology :param lmk_b_coords: barycentric coordinates of the landmark embedding in the FLAME topology (i.e. weighting of the three vertices for the trinagle, the landmark is embedded in :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' template_mesh = Mesh(filename=template_fname) tf_trans = tf.Variable(np.zeros((1, 3)), name="trans", dtype=tf.float64, trainable=True) tf_rot = tf.Variable(np.zeros((1, 3)), name="pose", dtype=tf.float64, trainable=True) tf_pose = tf.Variable(np.zeros((1, 12)), name="pose", dtype=tf.float64, trainable=True) tf_shape = tf.Variable(np.zeros((1, 300)), name="shape", dtype=tf.float64, trainable=True) tf_exp = tf.Variable(np.zeros((1, 100)), name="expression", dtype=tf.float64, trainable=True) smpl = SMPL(model_fname) tf_model = tf.squeeze( smpl(tf_trans, tf.concat((tf_shape, tf_exp), axis=-1), tf.concat((tf_rot, tf_pose), axis=-1))) with tf.Session() as session: session.run(tf.global_variables_initializer()) # Mirror landmark y-coordinates target_2d_lmks[:, 1] = target_img.shape[0] - target_2d_lmks[:, 1] lmks_3d = tf_get_model_lmks(tf_model, template_mesh, lmk_face_idx, lmk_b_coords) s2d = np.mean( np.linalg.norm(target_2d_lmks - np.mean(target_2d_lmks, axis=0), axis=1)) s3d = tf.reduce_mean( tf.sqrt( tf.reduce_sum( tf.square(lmks_3d - tf.reduce_mean(lmks_3d, axis=0))[:, :2], axis=1))) tf_scale = tf.Variable(s2d / s3d, dtype=lmks_3d.dtype) # trans = 0.5*np.array((target_img.shape[0], target_img.shape[1]))/tf_scale # trans = 0.5 * s3d * np.array((target_img.shape[0], target_img.shape[1])) / s2d lmks_proj_2d = tf_project_points(lmks_3d, tf_scale, np.zeros(2)) factor = max( max(target_2d_lmks[:, 0]) - min(target_2d_lmks[:, 0]), max(target_2d_lmks[:, 1]) - min(target_2d_lmks[:, 1])) lmk_dist = weights['lmk'] * tf.reduce_sum( tf.square(tf.subtract(lmks_proj_2d, target_2d_lmks))) / (factor**2) neck_pose_reg = weights['neck_pose'] * tf.reduce_sum( tf.square(tf_pose[:3])) jaw_pose_reg = weights['jaw_pose'] * tf.reduce_sum( tf.square(tf_pose[3:6])) eyeballs_pose_reg = weights['eyeballs_pose'] * tf.reduce_sum( tf.square(tf_pose[6:])) shape_reg = weights['shape'] * tf.reduce_sum(tf.square(tf_shape)) exp_reg = weights['expr'] * tf.reduce_sum(tf.square(tf_exp)) session.run(tf.global_variables_initializer()) def on_step(verts, scale, faces, target_img, target_lmks, opt_lmks, lmk_dist=0.0, shape_reg=0.0, exp_reg=0.0, neck_pose_reg=0.0, jaw_pose_reg=0.0, eyeballs_pose_reg=0.0): import cv2 import sys import numpy as np from psbody.mesh import Mesh from utils.render_mesh import render_mesh if lmk_dist > 0.0 or shape_reg > 0.0 or exp_reg > 0.0 or neck_pose_reg > 0.0 or jaw_pose_reg > 0.0 or eyeballs_pose_reg > 0.0: print( 'lmk_dist: %f, shape_reg: %f, exp_reg: %f, neck_pose_reg: %f, jaw_pose_reg: %f, eyeballs_pose_reg: %f' % (lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg)) plt_target_lmks = target_lmks.copy() plt_target_lmks[:, 1] = target_img.shape[0] - plt_target_lmks[:, 1] for (x, y) in plt_target_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (0, 0, 255), -1) plt_opt_lmks = opt_lmks.copy() plt_opt_lmks[:, 1] = target_img.shape[0] - plt_opt_lmks[:, 1] for (x, y) in plt_opt_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (255, 0, 0), -1) if sys.version_info >= (3, 0): rendered_img = render_mesh(Mesh(scale * verts, faces), height=target_img.shape[0], width=target_img.shape[1]) for (x, y) in plt_opt_lmks: cv2.circle(rendered_img, (int(x), int(y)), 4, (255, 0, 0), -1) target_img = np.hstack((target_img, rendered_img)) cv2.imshow('img', target_img) cv2.waitKey(10) print('Optimize rigid transformation') vars = [tf_scale, tf_trans, tf_rot] loss = lmk_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 1, 'ftol': 5e-6 }) optimizer.minimize(session, fetches=[ tf_model, tf_scale, tf.constant(template_mesh.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d ], loss_callback=on_step) print('Optimize model parameters') vars = [tf_scale, tf_trans[:2], tf_rot, tf_pose, tf_shape, tf_exp] loss = lmk_dist + shape_reg + exp_reg + neck_pose_reg + jaw_pose_reg + eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 0, 'ftol': 1e-7 }) optimizer.minimize(session, fetches=[ tf_model, tf_scale, tf.constant(template_mesh.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d, lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg ], loss_callback=on_step) print('Fitting done') np_verts, np_scale = session.run([tf_model, tf_scale]) def _get_verts(expr): assign_expr = tf.assign(tf_exp, expr[np.newaxis, :]) session.run([assign_expr]) _verts, _scale = session.run([tf_model, tf_scale]) return _verts expr_basis = [] neutral_verts = _get_verts(np.zeros((100), np.float64)) for i in range(100): expr_i = np.zeros((100), np.float64) expr_i[i] = 1 verts_i = _get_verts(expr_i) - neutral_verts expr_basis.append(verts_i.flatten()) expr_basis = np.asarray(expr_basis, np.float32).transpose(1, 0) return Mesh(np_verts, template_mesh.f), np_scale, expr_basis
def fit_lmk2d(target_img, target_2d_lmks, template_fname, tf_model_fname, lmk_face_idx, lmk_b_coords, weights): ''' Fit FLAME to 2D landmarks :param target_2d_lmks: target 2D landmarks provided as (num_lmks x 3) matrix :param template_fname: template mesh in FLAME topology (only the face information are used) :param tf_model_fname: saved Tensorflow FLAME model :param lmk_face_idx: face indices of the landmark embedding in the FLAME topology :param lmk_b_coords: barycentric coordinates of the landmark embedding in the FLAME topology (i.e. weighting of the three vertices for the trinagle, the landmark is embedded in :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' template_mesh = Mesh(filename=template_fname) saver = tf.train.import_meta_graph(tf_model_fname + '.meta') graph = tf.get_default_graph() tf_model = graph.get_tensor_by_name(u'vertices:0') with tf.Session() as session: saver.restore(session, tf_model_fname) # Workaround as existing tf.Variable cannot be retrieved back with tf.get_variable # tf_v_template = [x for x in tf.trainable_variables() if 'v_template' in x.name][0] tf_trans = [x for x in tf.trainable_variables() if 'trans' in x.name][0] tf_rot = [x for x in tf.trainable_variables() if 'rot' in x.name][0] tf_pose = [x for x in tf.trainable_variables() if 'pose' in x.name][0] tf_shape = [x for x in tf.trainable_variables() if 'shape' in x.name][0] tf_exp = [x for x in tf.trainable_variables() if 'exp' in x.name][0] # Mirror landmark y-coordinates target_2d_lmks[:, 1] = target_img.shape[0] - target_2d_lmks[:, 1] lmks_3d = tf_get_model_lmks(tf_model, template_mesh, lmk_face_idx, lmk_b_coords) s2d = np.mean( np.linalg.norm(target_2d_lmks - np.mean(target_2d_lmks, axis=0), axis=1)) s3d = tf.reduce_mean( tf.sqrt( tf.reduce_sum( tf.square(lmks_3d - tf.reduce_mean(lmks_3d, axis=0))[:, :2], axis=1))) tf_scale = tf.Variable(s2d / s3d, dtype=lmks_3d.dtype) # trans = 0.5*np.array((target_img.shape[0], target_img.shape[1]))/tf_scale # trans = 0.5 * s3d * np.array((target_img.shape[0], target_img.shape[1])) / s2d lmks_proj_2d = tf_project_points(lmks_3d, tf_scale, np.zeros(2)) factor = max( max(target_2d_lmks[:, 0]) - min(target_2d_lmks[:, 0]), max(target_2d_lmks[:, 1]) - min(target_2d_lmks[:, 1])) lmk_dist = weights['lmk'] * tf.reduce_sum( tf.square(tf.subtract(lmks_proj_2d, target_2d_lmks))) / (factor**2) neck_pose_reg = weights['neck_pose'] * tf.reduce_sum( tf.square(tf_pose[:3])) jaw_pose_reg = weights['jaw_pose'] * tf.reduce_sum( tf.square(tf_pose[3:6])) eyeballs_pose_reg = weights['eyeballs_pose'] * tf.reduce_sum( tf.square(tf_pose[6:])) shape_reg = weights['shape'] * tf.reduce_sum(tf.square(tf_shape)) exp_reg = weights['expr'] * tf.reduce_sum(tf.square(tf_exp)) session.run(tf.global_variables_initializer()) def on_step(verts, scale, faces, target_img, target_lmks, opt_lmks, lmk_dist=0.0, shape_reg=0.0, exp_reg=0.0, neck_pose_reg=0.0, jaw_pose_reg=0.0, eyeballs_pose_reg=0.0): import cv2 import sys import numpy as np from psbody.mesh import Mesh from utils.render_mesh import render_mesh if lmk_dist > 0.0 or shape_reg > 0.0 or exp_reg > 0.0 or neck_pose_reg > 0.0 or jaw_pose_reg > 0.0 or eyeballs_pose_reg > 0.0: print( 'lmk_dist: %f, shape_reg: %f, exp_reg: %f, neck_pose_reg: %f, jaw_pose_reg: %f, eyeballs_pose_reg: %f' % (lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg)) plt_target_lmks = target_lmks.copy() plt_target_lmks[:, 1] = target_img.shape[0] - plt_target_lmks[:, 1] for (x, y) in plt_target_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (0, 0, 255), -1) plt_opt_lmks = opt_lmks.copy() plt_opt_lmks[:, 1] = target_img.shape[0] - plt_opt_lmks[:, 1] for (x, y) in plt_opt_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (255, 0, 0), -1) if sys.version_info >= (3, 0): rendered_img = render_mesh(Mesh(scale * verts, faces), height=target_img.shape[0], width=target_img.shape[1]) for (x, y) in plt_opt_lmks: cv2.circle(rendered_img, (int(x), int(y)), 4, (255, 0, 0), -1) target_img = np.hstack((target_img, rendered_img)) cv2.imshow('img', target_img) cv2.waitKey(10) print('Optimize rigid transformation') vars = [tf_scale, tf_trans, tf_rot] loss = lmk_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 1, 'ftol': 5e-6 }) optimizer.minimize(session, fetches=[ tf_model, tf_scale, tf.constant(template_mesh.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d ], loss_callback=on_step) print('Optimize model parameters') vars = [tf_scale, tf_trans[:2], tf_rot, tf_pose, tf_shape, tf_exp] loss = lmk_dist + shape_reg + exp_reg + neck_pose_reg + jaw_pose_reg + eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 0, 'ftol': 1e-7 }) optimizer.minimize(session, fetches=[ tf_model, tf_scale, tf.constant(template_mesh.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d, lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg ], loss_callback=on_step) print('Fitting done') np_verts, np_scale = session.run([tf_model, tf_scale]) return Mesh(np_verts, template_mesh.f), np_scale
def fit_sources( dir_tup_list, tf_model_fname, template_fname, weight_reg_shape, weight_reg_expr, weight_reg_neck_pos, weight_reg_jaw_pos, weight_reg_eye_pos, showing=False ): global g_mv if showing: g_mv = MeshViewer() saver = tf.train.import_meta_graph(tf_model_fname + '.meta') graph = tf.get_default_graph() tf_model = graph.get_tensor_by_name(u'vertices:0') with tf.Session() as session: saver.restore(session, tf_model_fname) template = Mesh(filename=template_fname) tf_src = tf.Variable(tf.zeros(template.v.shape, dtype=tf.float64)) # get all params tf_trans = [x for x in tf.trainable_variables() if 'trans' in x.name][0] tf_rot = [x for x in tf.trainable_variables() if 'rot' in x.name][0] tf_pose = [x for x in tf.trainable_variables() if 'pose' in x.name][0] tf_shape = [x for x in tf.trainable_variables() if 'shape' in x.name][0] tf_exp = [x for x in tf.trainable_variables() if 'exp' in x.name][0] def _save_state(*names, **kwargs): state = dict() if "trans" in names: state["trans"] = tf_trans.eval() if "rot" in names: state["rot"] = tf_rot.eval() if "pose" in names: state["pose"] = tf_pose.eval() if "shape" in names: state["shape"] = tf_shape.eval() if "exp" in names: state["exp"] = tf_exp.eval() if kwargs.get("set_zero", False): _zero_state(*names) return state def _load_state(state): ops = [] if "trans" in state: ops.append(tf_trans.assign(state["trans"])) if "rot" in state: ops.append(tf_rot.assign (state["rot"] )) if "pose" in state: ops.append(tf_pose.assign (state["pose"] )) if "shape" in state: ops.append(tf_shape.assign(state["shape"])) if "exp" in state: ops.append(tf_exp.assign (state["exp"] )) session.run(ops) def _zero_state(*names): ops = [] if "trans" in names: ops.append(tf_trans.assign(tf.zeros_like(tf_trans))) if "rot" in names: ops.append(tf_rot .assign(tf.zeros_like(tf_rot ))) if "pose" in names: ops.append(tf_pose .assign(tf.zeros_like(tf_pose ))) if "shape" in names: ops.append(tf_shape.assign(tf.zeros_like(tf_shape))) if "exp" in names: ops.append(tf_exp .assign(tf.zeros_like(tf_exp ))) session.run(ops) mesh_dist = tf.reduce_sum(tf.square(tf.subtract(tf_model, tf_src))) neck_pose_reg = tf.reduce_sum(tf.square(tf_pose[:3])) jaw_pose_reg = tf.reduce_sum(tf.square(tf_pose[3:6])) eye_pose_reg = tf.reduce_sum(tf.square(tf_pose[6:])) shape_reg = tf.reduce_sum(tf.square(tf_shape)) exp_reg = tf.reduce_sum(tf.square(tf_exp)) reg_term = ( weight_reg_neck_pos * neck_pose_reg + weight_reg_jaw_pos * jaw_pose_reg + weight_reg_eye_pos * eye_pose_reg + weight_reg_shape * shape_reg + weight_reg_expr * exp_reg ) # optimizers optim_shared_rigid = scipy_pt( loss=mesh_dist, var_list=[tf_trans, tf_rot], method='L-BFGS-B', options={'disp': 0} ) optim_shared_all = scipy_pt( loss=mesh_dist+reg_term, var_list=[tf_trans, tf_rot, tf_pose, tf_shape, tf_exp], method='L-BFGS-B', options={'disp': 0} ) optim_seq = scipy_pt( loss=mesh_dist+reg_term, var_list=[tf_shape, tf_exp], method='L-BFGS-B', options={'disp': 0, 'maxiter': 50} ) def _fit_sentence(src_dir, dst_dir, prm_dir, last_speaker): _anchor = os.path.join(dst_dir, "_anchor") if os.path.exists(_anchor): print("- Skip " + src_dir) return if not os.path.exists(src_dir): print("- Failed to find " + src_dir) return if not os.path.exists(dst_dir): os.makedirs(dst_dir) if not os.path.exists(prm_dir): os.makedirs(prm_dir) ply_files = [] for root, _, files in os.walk(src_dir): for f in files: if os.path.splitext(f)[1] == ".ply": ply_files.append(os.path.join(root, f)) ply_files = sorted(ply_files) # get shared src_mesh = Mesh(filename=ply_files[0]) session.run(tf.assign(tf_src, src_mesh.v)) speaker = os.path.basename(os.path.dirname(src_dir)) if last_speaker != speaker: print("- clear speaker information") _zero_state("trans", "rot", "pose", "shape", "exp") else: _zero_state("exp") stt_dir = os.path.join(os.path.dirname(dst_dir), "state") if os.path.exists(stt_dir): state_dict = dict( trans = np.load(os.path.join(stt_dir, "trans.npy")), rot = np.load(os.path.join(stt_dir, "rot.npy")), pose = np.load(os.path.join(stt_dir, "pose.npy")), shape = np.load(os.path.join(stt_dir, "shape.npy")), ) _load_state(state_dict) fitting_mesh = Mesh(session.run(tf_model), src_mesh.f) fitting_mesh.write_ply(os.path.join(stt_dir, "zero.ply")) fit_zero_dir = os.path.join(os.path.dirname(os.path.dirname(dst_dir)), "zero_exp") if not os.path.exists(fit_zero_dir): os.makedirs(fit_zero_dir) print("- " + speaker + " " + os.path.basename(src_dir)) print(" -> fit shared parameters...") optim_shared_rigid.minimize(session) optim_shared_all.minimize(session) state_dict = _save_state("exp", set_zero=True) fitting_mesh = Mesh(session.run(tf_model), src_mesh.f) fitting_mesh.write_ply(os.path.join(fit_zero_dir, "{}.ply".format(speaker))) _load_state(state_dict) return if not os.path.exists(stt_dir): os.makedirs(stt_dir) np.save(os.path.join(stt_dir, "trans.npy"), tf_trans.eval(), allow_pickle=False) np.save(os.path.join(stt_dir, "rot.npy"), tf_rot.eval(), allow_pickle=False) np.save(os.path.join(stt_dir, "pose.npy"), tf_pose.eval(), allow_pickle=False) np.save(os.path.join(stt_dir, "shape.npy"), tf_shape.eval(), allow_pickle=False) progress = tqdm(ply_files) for src_fname in progress: frame = os.path.basename(src_fname) progress.set_description(" -> " + frame) dst_fname = os.path.join(dst_dir, frame) # param filename prm_fname = os.path.join(prm_dir, frame) exp_fname = os.path.splitext(prm_fname)[0] + '_exp.npy' idn_fname = os.path.splitext(prm_fname)[0] + '_idn.npy' src_mesh = Mesh(filename=src_fname) session.run(tf.assign(tf_src, src_mesh.v)) optim_seq.minimize(session) # save expr np.save(exp_fname, tf_exp.eval()) np.save(idn_fname, tf_shape.eval()) # state_dict = _save_state("trans", "rot", "pose", "shape", set_zero=True) # save mesh fitting_mesh = Mesh(session.run(tf_model), src_mesh.f) fitting_mesh.write_ply(dst_fname) # _load_state(state_dict) # print(tf_shape.eval()) if showing: g_mv.set_static_meshes([fitting_mesh]) os.system("touch {}".format(_anchor)) return speaker last_speaker = None for (src, dst, prm) in dir_tup_list: last_speaker = _fit_sentence(src, dst, prm, last_speaker)
def fit_lmk2d(target_img, target_2d_lmks, model_fname, lmk_face_idx, lmk_b_coords, weights, visualize): ''' Fit FLAME to 2D landmarks :param target_2d_lmks target 2D landmarks provided as (num_lmks x 3) matrix :param model_fname saved FLAME model :param lmk_face_idx face indices of the landmark embedding in the FLAME topology :param lmk_b_coords barycentric coordinates of the landmark embedding in the FLAME topology (i.e. weighting of the three vertices for the trinagle, the landmark is embedded in :param weights weights of the individual objective functions :param visualize visualize fitting progress :return: a mesh with the fitting results ''' ''' pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)), 'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)), 'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)), 'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)), 'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)), 'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)), 'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)), 'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)), 'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4)) } ''' lmks_weights = [[1,1]] * 68 for idx in range(36, 48): lmks_weights[idx] = [100, 100] tf_lmks_weights = tf.constant( lmks_weights, tf.float64 ) tf_trans = tf.Variable(np.zeros((1,3)), name="trans", dtype=tf.float64, trainable=True) tf_rot = tf.Variable(np.zeros((1,3)), name="rot", dtype=tf.float64, trainable=True) tf_pose = tf.Variable(np.zeros((1,12)), name="pose", dtype=tf.float64, trainable=True) tf_shape = tf.Variable(np.zeros((1,300)), name="shape", dtype=tf.float64, trainable=True) tf_exp = tf.Variable(np.zeros((1,100)), name="expression", dtype=tf.float64, trainable=True) smpl = SMPL(model_fname) tf_model = tf.squeeze(smpl(tf_trans, tf.concat((tf_shape, tf_exp), axis=-1), tf.concat((tf_rot, tf_pose), axis=-1))) with tf.Session() as session: # session.run(tf.global_variables_initializer()) # Mirror landmark y-coordinates target_2d_lmks[:,1] = target_img.shape[0]-target_2d_lmks[:,1] lmks_3d = tf_get_model_lmks(tf_model, smpl.f, lmk_face_idx, lmk_b_coords) s2d = np.mean(np.linalg.norm(target_2d_lmks-np.mean(target_2d_lmks, axis=0), axis=1)) s3d = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square(lmks_3d-tf.reduce_mean(lmks_3d, axis=0))[:, :2], axis=1))) tf_scale = tf.Variable(s2d/s3d, dtype=lmks_3d.dtype) # trans = 0.5*np.array((target_img.shape[0], target_img.shape[1]))/tf_scale # trans = 0.5 * s3d * np.array((target_img.shape[0], target_img.shape[1])) / s2d lmks_proj_2d = tf_project_points(lmks_3d, tf_scale, np.zeros(2)) factor = max(max(target_2d_lmks[:,0]) - min(target_2d_lmks[:,0]),max(target_2d_lmks[:,1]) - min(target_2d_lmks[:,1])) #lmk_dist = weights['lmk']*tf.reduce_sum(tf.square(tf.subtract(lmks_proj_2d, target_2d_lmks))) / (factor ** 2) lmk_dist = weights['lmk']*tf.reduce_sum( tf.square(tf.subtract(lmks_proj_2d, target_2d_lmks)) * tf_lmks_weights ) / (factor ** 2) neck_pose_reg = weights['neck_pose']*tf.reduce_sum(tf.square(tf_pose[:,:3])) jaw_pose_reg = weights['jaw_pose']*tf.reduce_sum(tf.square(tf_pose[:,3:6])) eyeballs_pose_reg = weights['eyeballs_pose']*tf.reduce_sum(tf.square(tf_pose[:,6:])) shape_reg = weights['shape']*tf.reduce_sum(tf.square(tf_shape)) exp_reg = weights['expr']*tf.reduce_sum(tf.square(tf_exp)) session.run(tf.global_variables_initializer()) if visualize: def on_step(verts, scale, faces, target_img, target_lmks, opt_lmks, lmk_dist=0.0, shape_reg=0.0, exp_reg=0.0, neck_pose_reg=0.0, jaw_pose_reg=0.0, eyeballs_pose_reg=0.0): import cv2 import sys import numpy as np from psbody.mesh import Mesh from utils.render_mesh import render_mesh if lmk_dist>0.0 or shape_reg>0.0 or exp_reg>0.0 or neck_pose_reg>0.0 or jaw_pose_reg>0.0 or eyeballs_pose_reg>0.0: print('lmk_dist: %f, shape_reg: %f, exp_reg: %f, neck_pose_reg: %f, jaw_pose_reg: %f, eyeballs_pose_reg: %f' % (lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg)) plt_target_lmks = target_lmks.copy() plt_target_lmks[:, 1] = target_img.shape[0] - plt_target_lmks[:, 1] for (x, y) in plt_target_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (0, 0, 255), -1) plt_opt_lmks = opt_lmks.copy() plt_opt_lmks[:,1] = target_img.shape[0] - plt_opt_lmks[:,1] for (x, y) in plt_opt_lmks: cv2.circle(target_img, (int(x), int(y)), 4, (255, 0, 0), -1) if sys.version_info >= (3, 0): rendered_img = render_mesh(Mesh(scale*verts, faces), height=target_img.shape[0], width=target_img.shape[1]) for (x, y) in plt_opt_lmks: cv2.circle(rendered_img, (int(x), int(y)), 4, (255, 0, 0), -1) target_img = np.hstack((target_img, rendered_img)) #cv2.imshow('img', target_img) #cv2.waitKey(10) else: def on_step(*_): pass print('Optimize rigid transformation') vars = [tf_scale, tf_trans, tf_rot] loss = lmk_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 1, 'ftol': 5e-6}) optimizer.minimize(session, fetches=[tf_model, tf_scale, tf.constant(smpl.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d], loss_callback=on_step) print('Optimize model parameters') vars = [tf_scale, tf_trans[:2], tf_rot, tf_pose, tf_shape, tf_exp] loss = lmk_dist + shape_reg + exp_reg + neck_pose_reg + jaw_pose_reg + eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={'disp': 0, 'ftol': 1e-7}) optimizer.minimize(session, fetches=[tf_model, tf_scale, tf.constant(smpl.f), tf.constant(target_img), tf.constant(target_2d_lmks), lmks_proj_2d, lmk_dist, shape_reg, exp_reg, neck_pose_reg, jaw_pose_reg, eyeballs_pose_reg], loss_callback=on_step) print('Fitting done') np_verts, np_scale = session.run([tf_model, tf_scale]) return Mesh(np_verts, smpl.f), np_scale
def fit_lmk3d(target_3d_lmks, template_fname, tf_model_fname, lmk_face_idx, lmk_b_coords, weights, show_fitting=True): ''' Fit FLAME to 3D landmarks :param target_3d_lmks: target 3D landmarks provided as (num_lmks x 3) matrix :param template_fname: template mesh in FLAME topology (only the face information are used) :param tf_model_fname: saved Tensorflow FLAME model :param lmk_face_idx: face indices of the landmark embedding in the FLAME topology :param lmk_b_coords: barycentric coordinates of the landmark embedding in the FLAME topology (i.e. weighting of the three vertices for the trinagle, the landmark is embedded in :param weights: weights of the individual objective functions :return: a mesh with the fitting results ''' template_mesh = Mesh(filename=template_fname) saver = tf.train.import_meta_graph(tf_model_fname + '.meta') graph = tf.get_default_graph() tf_model = graph.get_tensor_by_name(u'vertices:0') with tf.Session() as session: saver.restore(session, tf_model_fname) # Workaround as existing tf.Variable cannot be retrieved back with tf.get_variable # tf_v_template = [x for x in tf.trainable_variables() if 'v_template' in x.name][0] tf_trans = [x for x in tf.trainable_variables() if 'trans' in x.name][0] tf_rot = [x for x in tf.trainable_variables() if 'rot' in x.name][0] tf_pose = [x for x in tf.trainable_variables() if 'pose' in x.name][0] tf_shape = [x for x in tf.trainable_variables() if 'shape' in x.name][0] tf_exp = [x for x in tf.trainable_variables() if 'exp' in x.name][0] lmks = tf_get_model_lmks(tf_model, template_mesh, lmk_face_idx, lmk_b_coords) lmk_dist = tf.reduce_sum( tf.square(1000 * tf.subtract(lmks, target_3d_lmks))) neck_pose_reg = tf.reduce_sum(tf.square(tf_pose[:3])) jaw_pose_reg = tf.reduce_sum(tf.square(tf_pose[3:6])) eyeballs_pose_reg = tf.reduce_sum(tf.square(tf_pose[6:])) shape_reg = tf.reduce_sum(tf.square(tf_shape)) exp_reg = tf.reduce_sum(tf.square(tf_exp)) # Optimize global transformation first vars = [tf_trans, tf_rot] loss = weights['lmk'] * lmk_dist optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 1, 'ftol': 5e-6 }) print('Optimize rigid transformation') optimizer.minimize(session) # Optimize for the model parameters vars = [tf_trans, tf_rot, tf_pose, tf_shape, tf_exp] loss = weights['lmk'] * lmk_dist + weights['shape'] * shape_reg + weights['expr'] * exp_reg + \ weights['neck_pose'] * neck_pose_reg + weights['jaw_pose'] * jaw_pose_reg + weights['eyeballs_pose'] * eyeballs_pose_reg optimizer = scipy_pt(loss=loss, var_list=vars, method='L-BFGS-B', options={ 'disp': 1, 'ftol': 5e-6 }) print('Optimize model parameters') optimizer.minimize(session) print('Fitting done') if show_fitting: # Visualize landmark fitting mv = MeshViewer() mv.set_static_meshes( create_lmk_spheres(target_3d_lmks, 0.001, [255.0, 0.0, 0.0])) mv.set_dynamic_meshes( [Mesh(session.run(tf_model), template_mesh.f)] + create_lmk_spheres(session.run(lmks), 0.001, [0.0, 0.0, 255.0]), blocking=True) six.moves.input('Press key to continue') return Mesh(session.run(tf_model), template_mesh.f)
def Bundle_Adjustment_optimization(params): ''' Using Bundle Adjustment optimize SMPL parameters with tensorflow-gpu :param hmr_dict: :param data_dict: :return: results ''' start_time = time.time() start_time_total = time.time() Util = Utility() Util.read_utility_parameters(params) smpl_model = SMPL(Util.SMPL_COCO_PATH, Util.SMPL_NORMAL_PATH) j3dss, success = Util.load_pose_pkl() hmr_dict, data_dict = Util.load_hmr_data() hmr_thetas = hmr_dict["hmr_thetas"] hmr_betas = hmr_dict["hmr_betas"] hmr_trans = hmr_dict["hmr_trans"] hmr_cams = hmr_dict["hmr_cams"] hmr_joint3ds = hmr_dict["hmr_joint3ds"] j2ds = data_dict["j2ds"] confs = data_dict["confs"] j2ds_face = data_dict["j2ds_face"] confs_face = data_dict["confs_face"] j2ds_head = data_dict["j2ds_head"] confs_head = data_dict["confs_head"] j2ds_foot = data_dict["j2ds_foot"] confs_foot = data_dict["confs_foot"] imgs = data_dict["imgs"] masks = data_dict["masks"] Util.img_width = imgs[0].shape[1] Util.img_height = imgs[0].shape[0] Util.img_widthheight = int("1" + "%04d" % Util.img_width + "%04d" % Util.img_height) frame_num = len(j2ds) for ind in range(frame_num): hmr_theta = hmr_thetas[ind, :].squeeze() # hmr_shape = hmr_betas[ind, :].squeeze() # hmr_tran = hmr_trans[ind, :].squeeze() # hmr_cam = hmr_cams[0, :].squeeze() hmr_joint3d = hmr_joint3ds[ind, :, :] ######### Arm Correction ######### # if Util.pedestrian_constraint == True and success == True: # prej3d = j3dss[ind] # if abs(prej3d[2, 2] - prej3d[7, 2]) > 0.1: # print("leg_error>0.1") # if prej3d[2, 2] < prej3d[7, 2]: # hmr_thetas[ind][51] = 0.8 # hmr_theta[52] = 1e-8 # hmr_theta[53] = 1.0 # hmr_theta[58] = 1e-8 # forward_arm = "left" # else: # hmr_theta[48] = 0.8 # hmr_theta[49] = 1e-8 # hmr_theta[50] = -1.0 # hmr_theta[55] = 1e-8 # forward_arm = "right" if Util.pedestrian_constraint == True: if abs(hmr_joint3ds[ind, 0, 2] - hmr_joint3ds[ind, 5, 2]) > 0.1: print("leg_error>0.1") if hmr_joint3ds[ind, 0, 2] < hmr_joint3ds[ind, 5, 2]: hmr_thetas[ind, 51] = 0.8 hmr_thetas[ind, 52] = 1e-8 hmr_thetas[ind, 53] = 1.0 hmr_thetas[ind, 58] = 1e-8 forward_arm = "left" else: hmr_thetas[ind, 48] = 0.8 hmr_thetas[ind, 49] = 1e-8 hmr_thetas[ind, 50] = -1.0 hmr_thetas[ind, 55] = 1e-8 forward_arm = "right" initial_param, pose_mean, pose_covariance = Util.load_initial_param() param_shapes = tf.Variable(hmr_betas.reshape([-1, 10]), dtype=tf.float32) param_rots = tf.Variable(hmr_thetas[:, :3].reshape([-1, 3]), dtype=tf.float32) param_poses = tf.Variable(hmr_thetas[:, 3:72].reshape([-1, 69]), dtype=tf.float32) param_trans = tf.Variable(hmr_trans.reshape([-1, 3]), dtype=tf.float32) initial_param_tf = tf.concat( [param_shapes, param_rots, param_poses, param_trans], axis=1) ## N * (72+10+3) hmr_cam = hmr_cams[0, :].squeeze() cam = Perspective_Camera(hmr_cam[0], hmr_cam[0], hmr_cam[1], hmr_cam[2], np.zeros(3), np.zeros(3)) j3ds, v, j3dsplus = smpl_model.get_3d_joints(initial_param_tf, Util.SMPL_JOINT_IDS) #### divide into different body parts j3ds_body = j3ds[:, 2:, :] j3ds_head = j3ds[:, 14:16, :] j3ds_foot = j3ds[:, :2, :] j3ds_face = j3dsplus[:, 14:19, :] j3ds_body = tf.reshape(j3ds_body, [-1, 3]) ## (N*12) * 3 j3ds_head = tf.reshape(j3ds_head, [-1, 3]) ## (N*2) * 3 j3ds_foot = tf.reshape(j3ds_foot, [-1, 3]) ## (N*2) * 3 j3ds_face = tf.reshape(j3ds_face, [-1, 3]) ## (N*5) * 3 j2ds_body_est = cam.project(tf.squeeze(j3ds_body)) ## (N*14) * 2 j2ds_head_est = cam.project(tf.squeeze(j3ds_head)) ## (N*2) * 2 j2ds_foot_est = cam.project(tf.squeeze(j3ds_foot)) ## (N*2) * 2 j2ds_face_est = cam.project(tf.squeeze(j3ds_face)) ## (N*5) * 2 v = tf.reshape(v, [-1, 3]) ## (N*6890) * 3 verts_est_mask = cam.project(tf.squeeze(v)) ## (N*6890) * 2 verts_est = cam.project(tf.squeeze(v)) ## (N*6890) * 2 # TODO convert the loss function into batch input objs = {} j2ds = j2ds.reshape([-1, 2]) ## (N*14) * 2 confs = confs.reshape(-1) ## N*14 base_weights = np.array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0]) base_weights = np.tile(base_weights, frame_num) ## N*14 weights = confs * base_weights ## N*14 weights = tf.constant(weights, dtype=tf.float32) ## N*14 objs['J2D_Loss'] = Util.J2D_Loss * tf.reduce_sum( weights * tf.reduce_sum(tf.square(j2ds_body_est - j2ds), 1)) j2ds_face = j2ds_face.reshape([-1, 2]) ## (N*5) * 2 confs_face = confs_face.reshape(-1) ## N*5 base_weights_face = np.array([1.0, 1.0, 1.0, 1.0, 1.0]) base_weights_face = np.tile(base_weights_face, frame_num) ## N*5 weights_face = confs_face * base_weights_face weights_face = tf.constant(weights_face, dtype=tf.float32) objs['J2D_face_Loss'] = Util.J2D_face_Loss * tf.reduce_sum( weights_face * tf.reduce_sum(tf.square(j2ds_face_est - j2ds_face), 1)) j2ds_head = j2ds_head.reshape([-1, 2]) ## (N*2) * 2 confs_head = confs_head.reshape(-1) ## N*2 base_weights_head = np.array([1.0, 1.0]) base_weights_head = np.tile(base_weights_head, frame_num) ## N*2 weights_head = confs_head * base_weights_head weights_head = tf.constant(weights_head, dtype=tf.float32) objs['J2D_head_Loss'] = Util.J2D_head_Loss * tf.reduce_sum( weights_head * tf.reduce_sum(tf.square(j2ds_head - j2ds_head_est), 1)) j2ds_foot = j2ds_foot.reshape([-1, 2]) ## (N*2) * 2 confs_foot = confs_foot.reshape(-1) ## N*2 base_weights_foot = np.array([1.0, 1.0]) base_weights_foot = np.tile(base_weights_foot, frame_num) ## N*2 weights_foot = confs_foot * base_weights_foot ## N*2 weights_foot = tf.constant(weights_foot, dtype=tf.float32) objs['J2D_foot_Loss'] = Util.J2D_foot_Loss * tf.reduce_sum( weights_foot * tf.reduce_sum(tf.square(j2ds_foot - j2ds_foot_est), 1)) pose_mean = tf.constant(pose_mean, dtype=tf.float32) pose_covariance = tf.constant(pose_covariance, dtype=tf.float32) for i in range(frame_num): pose_diff = tf.reshape(param_poses[i, :] - pose_mean, [1, -1]) if i == 0: objs['Prior_Loss'] = 1.0 * tf.squeeze( tf.matmul(tf.matmul(pose_diff, pose_covariance), tf.transpose(pose_diff))) else: objs['Prior_Loss'] = objs['Prior_Loss'] + 1.0 * tf.squeeze( tf.matmul(tf.matmul(pose_diff, pose_covariance), tf.transpose(pose_diff))) objs['Prior_Shape'] = 5.0 * tf.reduce_sum(tf.square(param_shapes)) w1 = np.array([1.04 * 2.0, 1.04 * 2.0, 5.4 * 2.0, 5.4 * 2.0]) w1 = tf.constant(w1, dtype=tf.float32) # objs["angle_elbow_knee"] = 0.008 * tf.reduce_sum(w1 * [ # tf.exp(param_poses[:, 52]), tf.exp(-param_poses[:, 55]), # tf.exp(-param_poses[:, 9]), tf.exp(-param_poses[:, 12])]) objs["angle_elbow_knee"] = 0.08 * tf.reduce_sum( w1[0] * tf.exp(param_poses[:, 52]) + w1[1] * tf.exp(-param_poses[:, 55]) + w1[2] * tf.exp(-param_poses[:, 9]) + w1[3] * tf.exp(-param_poses[:, 12])) # TODO add a function that deal with masks with batch tmp_batch = [] for i in range(frame_num): verts2dsilhouette = algorithms.verts_to_silhouette_tf( verts_est_mask, masks[i].shape[1], masks[i].shape[0]) tmp_batch.append(verts2dsilhouette) verts2dsilhouette_batch = tf.convert_to_tensor(tmp_batch) masks = np.array(masks) masks_tf = tf.cast(tf.convert_to_tensor(masks), dtype=tf.float32) objs['mask'] = Util.mask * tf.reduce_sum( verts2dsilhouette_batch / 255.0 * (255.0 - masks_tf) / 255.0 + (255.0 - verts2dsilhouette_batch) / 255.0 * masks_tf / 255.0) # TODO try L1, L2 or other penalty function param_pose_full = tf.concat([param_rots, param_poses], axis=1) ## N * 72 objs['hmr_constraint'] = Util.hmr_constraint * tf.reduce_sum( tf.square(tf.squeeze(param_pose_full) - hmr_thetas)) objs['hmr_hands_constraint'] = Util.hmr_hands_constraint * tf.reduce_sum( tf.square(tf.squeeze(param_pose_full)[:, 21] - hmr_thetas[:, 21]) + tf.square(tf.squeeze(param_pose_full)[:, 23] - hmr_thetas[:, 23]) + tf.square(tf.squeeze(param_pose_full)[:, 20] - hmr_thetas[:, 20]) + tf.square(tf.squeeze(param_pose_full)[:, 22] - hmr_thetas[:, 22])) w_temporal = [ 0.5, 0.5, 1.0, 1.5, 2.5, 2.5, 1.5, 1.0, 1.0, 1.5, 2.5, 2.5, 1.5, 1.0, 7.0, 7.0 ] for i in range(frame_num - 1): j3d_old = j3ds[i, :, :] j3d = j3ds[i + 1, :, :] j3d_old_tmp = tf.reshape(j3d_old, [-1, 3]) ## (N*16) * 3 j2d_old = cam.project(tf.squeeze(j3d_old_tmp)) ## (N*16) * 2 j3d_tmp = tf.reshape(j3d, [-1, 3]) ## (N*16) * 3 j2d = cam.project(tf.squeeze(j3d_tmp)) ## (N*16) * 2 param_pose_old = param_poses[i, :] param_pose = param_poses[i + 1, :] if i == 0: objs['temporal3d'] = Util.temporal3d * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j3d - j3d_old), 1)) objs['temporal2d'] = Util.temporal2d * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j2d - j2d_old), 1)) objs['temporal_pose'] = Util.temporal_pose * tf.reduce_sum( tf.square(param_pose_old - param_pose)) else: objs['temporal3d'] = objs[ 'temporal3d'] + Util.temporal3d * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j3d - j3d_old), 1)) objs['temporal2d'] = objs[ 'temporal2d'] + Util.temporal2d * tf.reduce_sum( w_temporal * tf.reduce_sum(tf.square(j2d - j2d_old), 1)) objs['temporal_pose'] = objs[ 'temporal_pose'] + Util.temporal_pose * tf.reduce_sum( tf.square(param_pose_old - param_pose)) # TODO add optical flow constraint # body_idx = np.array(body_parsing_idx[0]).squeeze() # body_idx = body_idx.reshape([-1, 1]).astype(np.int64) # verts_est_body = tf.gather_nd(verts_est, body_idx) # optical_ratio = 0.0 # objs['dense_optflow'] = util.params["LR_parameters"]["dense_optflow"] * tf.reduce_sum(tf.square( # verts_est_body - verts_body_old)) # optimization process loss = tf.reduce_sum(objs.values()) duration = time.time() - start_time print("pre-processing time is %f" % duration) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) optimizer = scipy_pt(loss=loss, var_list=[ param_shapes, param_rots, param_trans, param_poses, cam.cx, cam.cy ], options={ 'eps': 1e-20, 'ftol': 1e-20, 'maxiter': 10000, 'disp': True }) print(">>>>>>>>>>>>>start to optimize<<<<<<<<<<<") start_time = time.time() optimizer.minimize(sess) duration = time.time() - start_time print("minimize is %f" % duration) start_time = time.time() poses_final, betas_final, trans_final, cam_cx, cam_cy, v_final, verts_est_final, j3ds_final, _objs = sess.run( [ tf.concat([param_rots, param_poses], axis=1), param_shapes, param_trans, cam.cx, cam.cy, v, verts_est, j3ds, objs ]) v_final = v_final.reshape([frame_num, 6890, 3]) duration = time.time() - start_time print("run time is %f" % duration) start_time = time.time() cam_for_save = np.array([hmr_cam[0], cam_cx, cam_cy, np.zeros(3)]) ### no sense LR_cameras = [] for i in range(frame_num): LR_cameras.append(cam_for_save) ############# camera = render.camera(cam_for_save[0], cam_for_save[1], cam_for_save[2], cam_for_save[3], Util.img_widthheight) output_path = Util.hmr_path + Util.output_path if not os.path.exists(output_path): os.makedirs(output_path) if not os.path.exists(Util.hmr_path + "output_mask"): os.makedirs(Util.hmr_path + "output_mask") videowriter = [] for ind in range(frame_num): print( ">>>>>>>>>>>>>>>>>>>>>>%d index frame<<<<<<<<<<<<<<<<<<<<<<" % ind) if Util.mode == "full": smpl = smpl_np.SMPLModel( './smpl/models/basicmodel_m_lbs_10_207_0_v1.0.0.pkl') template = np.load(Util.texture_path + "template.npy") smpl.set_template(template) #v = smpl.get_verts(poses_final[ind, :], betas_final[ind, :], trans_final[ind, :]) #texture_vt = np.load(Util.texture_path + "vt.npy") #texture_img = cv2.imread(Util.texture_path + "../../output_nonrigid/texture.png") #img_result_texture = camera.render_texture(v, texture_img, texture_vt) #cv2.imwrite(output_path + "/hmr_optimization_texture_%04d.png" % ind, img_result_texture) #img_bg = cv2.resize(imgs[ind], (Util.img_width, Util.img_height)) #img_result_texture_bg = camera.render_texture_imgbg(img_result_texture, img_bg) #cv2.imwrite(output_path + "/texture_bg_%04d.png" % ind, #img_result_texture_bg) # if Util.video is True: # if ind == 0: # fps = 15 # size = (imgs[0].shape[1], imgs[0].shape[0]) # video_path = output_path + "/texture.mp4" # videowriter = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, size) # videowriter.write(img_result_texture) # img_result_naked = camera.render_naked(v, imgs[ind]) # img_result_naked = img_result_naked[:, :, :3] # cv2.imwrite(output_path + "/hmr_optimization_%04d.png" % ind, img_result_naked) # bg = np.ones_like(imgs[ind]).astype(np.uint8) * 255 # img_result_naked1 = camera.render_naked(v, bg) # cv2.imwrite(output_path + "/hmr_optimization_naked_%04d.png" % ind, img_result_naked1) # img_result_naked_rotation = camera.render_naked_rotation(v, 90, imgs[ind]) # cv2.imwrite(output_path + "/hmr_optimization_rotation_%04d.png" % ind, # img_result_naked_rotation) res = { 'pose': poses_final[ind, :], 'betas': betas_final[ind, :], 'trans': trans_final[ind, :], 'cam': cam_for_save, 'j3ds': j3ds_final[ind, :] } with open( output_path + "/hmr_optimization_pose_%04d.pkl" % ind, 'wb') as fout: pkl.dump(res, fout) # for z in range(len(verts_est_final)): # if int(verts_est_final[z][0]) > masks[ind].shape[0] - 1: # verts_est_final[z][0] = masks[ind].shape[0] - 1 # if int(verts_est_final[z][1]) > masks[ind].shape[1] - 1: # verts_est_final[z][1] = masks[ind].shape[1] - 1 # (masks[ind])[int(verts_est_final[z][0]), int(verts_est_final[z][1])] = 127 # cv2.imwrite(Util.hmr_path + "output_mask/%04d.png" % ind, masks[ind]) if Util.mode == "pose": # img_result_naked = camera.render_naked(v_final[ind, :, :], imgs[ind]) # img_result_naked = img_result_naked[:, :, :3] # cv2.imwrite(output_path + "/hmr_optimization_%04d.png" % ind, img_result_naked) # if Util.video is True: # if ind == 0: # fps = 15 # size = (imgs[0].shape[1], imgs[0].shape[0]) # video_path = output_path + "/texture.mp4" # videowriter = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc('D', 'I', 'V', 'X'), fps, size) # videowriter.write(img_result_naked) # bg = np.ones_like(imgs[ind]).astype(np.uint8) * 255 # img_result_naked1 = camera.render_naked(v_final[ind, :, :], bg) # cv2.imwrite(output_path + "/hmr_optimization_naked_%04d.png" % ind, img_result_naked1) # img_result_naked_rotation = camera.render_naked_rotation(v_final[ind, :, :], 90, imgs[ind]) # cv2.imwrite(output_path + "/hmr_optimization_rotation_%04d.png" % ind, # img_result_naked_rotation) res = { 'pose': poses_final[ind, :], 'betas': betas_final[ind, :], 'trans': trans_final[ind, :], 'cam': cam_for_save, 'j3ds': j3ds_final[ind, :] } with open( output_path + "/hmr_optimization_pose_%04d.pkl" % ind, 'wb') as fout: pkl.dump(res, fout) # for z in range(len(verts_est_final)): # if int(verts_est_final[z][0]) > masks[ind].shape[0] - 1: # verts_est_final[z][0] = masks[ind].shape[0] - 1 # if int(verts_est_final[z][1]) > masks[ind].shape[1] - 1: # verts_est_final[z][1] = masks[ind].shape[1] - 1 # (masks[ind])[int(verts_est_final[z][0]), int(verts_est_final[z][1])] = 127 # cv2.imwrite(Util.hmr_path + "output_mask/%04d.png" % ind, masks[ind]) for name in _objs: print("the %s loss is %f" % (name, _objs[name])) util_func.save_pkl_to_csv(output_path) util_func.save_pkl_to_npy(output_path) duration = time.time() - start_time print("post-processing time is %f" % duration) duration = time.time() - start_time_total print("total time is %f" % duration)
def main(img_files): imgs, j2ds, cams, poses, mean_betas, trans = util.load_data_temporal( img_files) j2ds = np.array(j2ds).reshape([-1, 2]) dct_mtx = util.load_dct_base() dct_mtx = tf.constant(dct_mtx.T, dtype=tf.float32) # For SMPL parameters params_tem = [] params_pose_tem = [] param_shape = tf.constant(mean_betas, dtype=tf.float32) for idx in range(0, util.BATCH_FRAME_NUM): param_pose = tf.Variable(poses[idx], dtype=tf.float32, name='Pose_%d' % idx) param_trans = tf.constant(trans[idx], dtype=tf.float32) param = tf.concat([param_shape, param_pose, param_trans], axis=1) params_tem.append(param) params_pose_tem.append(param_pose) params_tem = tf.concat(params_tem, axis=0) # For DCT prior params c_dct = tf.Variable(np.zeros( [len(util.TEM_SMPL_JOINT_IDS), 3, util.DCT_NUM]), dtype=tf.float32, name='C_DCT') smpl_model = SMPL(util.SMPL_PATH) j3ds, vs = smpl_model.get_3d_joints(params_tem, util.TEM_SMPL_JOINT_IDS) # N x M x 3 j3ds = j3ds[:, :-1] j3ds_flatten = tf.reshape(j3ds, [-1, 3]) j2ds_est = [] for idx in range(0, util.NUM_VIEW): tmp = cams[idx].project(j3ds_flatten) j2ds_est.append(tmp) j2ds_est = tf.concat(j2ds_est, axis=0) j2ds_est = tf.reshape( j2ds_est, [util.NUM_VIEW, util.BATCH_FRAME_NUM, len(util.TEM_SMPL_JOINT_IDS), 2]) j2ds_est = tf.transpose(j2ds_est, [1, 0, 2, 3]) j2ds_est = tf.reshape(j2ds_est, [-1, 2]) _, pose_mean, pose_covariance = util.load_initial_param() pose_mean = tf.constant(pose_mean, dtype=tf.float32) pose_covariance = tf.constant(pose_covariance, dtype=tf.float32) objs = {} objs['J2D_Loss'] = tf.reduce_sum(tf.square(j2ds_est - j2ds)) for i in range(0, util.BATCH_FRAME_NUM): pose_diff = params_pose_tem[i][:, -69:] - pose_mean objs['Prior_Loss_%d' % i] = 5 * tf.squeeze( tf.matmul(tf.matmul(pose_diff, pose_covariance), tf.transpose(pose_diff))) for i, jid in enumerate(util.TEM_SMPL_JOINT_IDS): for j, aid in enumerate([0, 1, 2]): #for j, aid in enumerate([0, 2]): trajectory = j3ds[:, i, aid] ''' c_dct_initial = tf.matmul(tf.expand_dims(trajectory, axis=0), dct_mtx) c_dct_initial = tf.squeeze(c_dct_initial) ''' #import ipdb; ipdb.set_trace() #with tf.control_dependencies( [tf.assign(c_dct[i, j], c_dct_initial)] ): trajectory_dct = tf.matmul(dct_mtx, tf.expand_dims(c_dct[i, j], axis=-1)) trajectory_dct = tf.squeeze(trajectory_dct) objs['DCT_%d_%d' % (i, j)] = tf.reduce_sum( tf.square(trajectory - trajectory_dct)) loss = tf.reduce_mean(objs.values()) if util.VIS_OR_NOT: func_callback = on_step else: func_callback = None sess = tf.Session() sess.run(tf.global_variables_initializer()) def lc(j2d_est): _, ax = plt.subplots(1, 3) for idx in range(0, util.NUM_VIEW): import copy tmp = copy.copy(imgs[idx]) for j2d in j2ds[idx]: x = int(j2d[1]) y = int(j2d[0]) if x > imgs[0].shape[0] or x > imgs[0].shape[1]: continue tmp[x:x + 5, y:y + 5, :] = np.array([0, 0, 255]) for j2d in j2d_est[idx]: x = int(j2d[1]) y = int(j2d[0]) if x > imgs[0].shape[0] or x > imgs[0].shape[1]: continue tmp[x:x + 5, y:y + 5, :] = np.array([255, 0, 0]) ax[idx].imshow(tmp) plt.show() if util.VIS_OR_NOT: func_lc = None else: func_lc = None optimizer = scipy_pt(loss=loss, var_list=params_pose_tem + [c_dct], options={ 'ftol': 0.001, 'maxiter': 500, 'disp': True }, method='L-BFGS-B') #optimizer.minimize(sess, fetches = [objs], loss_callback=func_lc) optimizer.minimize(sess, loss_callback=func_lc) print sess.run(c_dct) vs_final = sess.run(vs) pose_final = sess.run(params_pose_tem) betas = sess.run(param_shape) model_f = sess.run(smpl_model.f) model_f = model_f.astype(int).tolist() for fid in range(0, util.BATCH_FRAME_NUM / 2): from psbody.meshlite import Mesh m = Mesh(v=vs_final[fid], f=model_f) out_ply_path = img_files[fid].replace('Image', 'Res_2') extension = os.path.splitext(out_ply_path)[1] out_ply_path = out_ply_path.replace(extension, '.ply') m.write_ply(out_ply_path) res = {'pose': pose_final[fid], 'betas': betas, 'trans': trans[fid]} out_pkl_path = out_ply_path.replace('.ply', '.pkl') print out_pkl_path with open(out_pkl_path, 'wb') as fout: pkl.dump(res, fout)