def guess_init(model, focal_length, j2d, init_pose): """Initialize the camera translation via triangle similarity, by using the torso joints . :param model: SMPL model :param focal_length: camera focal length (kept fixed) :param j2d: 14x2 array of CNN joints :param init_pose: 72D vector of pose parameters used for initialization (kept fixed) :returns: 3D vector corresponding to the estimated camera translation """ cids = np.arange(0, 12) # map from LSP to SMPL joints j2d_here = j2d[cids] smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation(opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) Jtr = Jtr[smpl_ids].r # 9 is L shoulder, 3 is L hip # 8 is R shoulder, 2 is R hip diff3d = np.array([Jtr[9] - Jtr[3], Jtr[8] - Jtr[2]]) mean_height3d = np.mean(np.sqrt(np.sum(diff3d**2, axis=1))) diff2d = np.array([j2d_here[9] - j2d_here[3], j2d_here[8] - j2d_here[2]]) mean_height2d = np.mean(np.sqrt(np.sum(diff2d**2, axis=1))) est_d = focal_length * (mean_height3d / mean_height2d) # just set the z value init_t = np.array([0., 0., est_d]) return init_t
def guess_init(model, focal_length, j2d, init_pose): """Initialize the camera translation via triangle similarity, by using the torso joints . :param model: SMPL model :param focal_length: camera focal length (kept fixed) :param j2d: 14x2 array of CNN joints :param init_pose: 72D vector of pose parameters used for initialization (kept fixed) :returns: 3D vector corresponding to the estimated camera translation """ cids = np.arange(0, 12) # map from LSP to SMPL joints j2d_here = j2d[cids] smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation( opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) Jtr = Jtr[smpl_ids].r # 9 is L shoulder, 3 is L hip # 8 is R shoulder, 2 is R hip diff3d = np.array([Jtr[9] - Jtr[3], Jtr[8] - Jtr[2]]) mean_height3d = np.mean(np.sqrt(np.sum(diff3d**2, axis=1))) diff2d = np.array([j2d_here[9] - j2d_here[3], j2d_here[8] - j2d_here[2]]) mean_height2d = np.mean(np.sqrt(np.sum(diff2d**2, axis=1))) est_d = focal_length * (mean_height3d / mean_height2d) # just set the z value init_t = np.array([0., 0., est_d]) return init_t
def compute_imu_data(gender, betas, poses, frame_rate): if gender == 'male': Jdirs = Jdirs_male model = model_male else: Jdirs = Jdirs_female model = model_female betas[:] = 0 J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) A_global_list = [] print('Length of poses: %d' % (len(poses) / 1)) for idx, p in enumerate(poses): (_, A_global) = global_rigid_transformation(p, J_onbetas, model.kintree_table, xp=ch) A_global_list.append(A_global) vertex = [] for idx, p in enumerate(poses): model.pose[:] = p model.betas[:] = 0 model.betas[:10] = betas tmp = model.r[VERTEX_IDS] vertex.append(tmp) # 6*3 orientation, acceleration = get_ori_accel(A_global_list, vertex, frame_rate) return orientation, acceleration
def initialize_camera(model, j2d, img, init_pose, flength=flength): """Initialize camera translation and body orientation :param model: SMPL model :param j2d: 14x2 array of CNN joints :param img: h x w x 3 image :param init_pose: 72D vector of pose parameters used for initialization :param flength: camera focal length (kept fixed) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the estimated camera, a boolean deciding if both the optimized body orientation and its flip should be considered, 3D vector for the body orientation """ # optimize camera translation and body orientation based on torso joints # LSP torso ids: # 2=right hip, 3=left hip, 8=right shoulder, 9=left shoulder torso_cids = [2, 3, 8, 9] # corresponding SMPL torso ids torso_smpl_ids = [2, 1, 17, 16] center = np.array([img.shape[1] / 2, img.shape[0] / 2]) rt = ch.zeros(3) # initial camera rotation init_t = guess_init(model, flength, j2d, init_pose) t = ch.array(init_t) # initial camera translation opt_pose = ch.array(init_pose) _, A_global = global_rigid_transformation(opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) # initialize the camera and project SMPL joints cam = ProjectPoints(f=np.array([flength, flength]), rt=rt, t=t, k=np.zeros(5), c=center) cam.v = Jtr # project SMPL joints # optimize for camera translation and body orientation free_variables = [cam.t, opt_pose[:3]] ch.minimize( # data term defined over torso joints... {'cam': j2d[torso_cids] - cam[torso_smpl_ids], # ...plus a regularizer for the camera translation 'cam_t': 1e2 * (cam.t[2] - init_t[2])}, x0=free_variables, method='dogleg', callback=None, options={'maxiter': 100, 'e_3': .0001, 'disp': 0}) return cam, opt_pose[:3].r
def initialize_camera(model, j2d, img, init_pose, flength=5000., pix_thsh=25., viz=False): """Initialize camera translation and body orientation :param model: SMPL model :param j2d: 14x2 array of CNN joints :param img: h x w x 3 image :param init_pose: 72D vector of pose parameters used for initialization :param flength: camera focal length (kept fixed) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the estimated camera, a boolean deciding if both the optimized body orientation and its flip should be considered, 3D vector for the body orientation """ # optimize camera translation and body orientation based on torso joints # LSP torso ids: # 2=right hip, 3=left hip, 8=right shoulder, 9=left shoulder torso_cids = [2, 3, 8, 9] # corresponding SMPL torso ids torso_smpl_ids = [2, 1, 17, 16] center = np.array([img.shape[1] / 2, img.shape[0] / 2]) # initialize camera rotation rt = ch.zeros(3) # initialize camera translation _LOGGER.info('initializing translation via similar triangles') init_t = guess_init(model, flength, j2d, init_pose) t = ch.array(init_t) # check how close the shoulder joints are try_both_orient = np.linalg.norm(j2d[8] - j2d[9]) < pix_thsh opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation(opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) # initialize the camera cam = ProjectPoints(f=np.array([flength, flength]), rt=rt, t=t, k=np.zeros(5), c=center) # we are going to project the SMPL joints cam.v = Jtr if viz: viz_img = img.copy() # draw the target (CNN) joints for coord in np.around(j2d).astype(int): if (coord[0] < img.shape[1] and coord[0] >= 0 and coord[1] < img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 255, 0]) import matplotlib.pyplot as plt plt.ion() # draw optimized joints at each iteration def on_step(_): """Draw a visualization.""" plt.figure(1, figsize=(5, 5)) plt.subplot(1, 1, 1) viz_img = img.copy() for coord in np.around(cam.r[torso_smpl_ids]).astype(int): if (coord[0] < viz_img.shape[1] and coord[0] >= 0 and coord[1] < viz_img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 0, 255]) plt.imshow(viz_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-3) else: on_step = None # optimize for camera translation and body orientation free_variables = [cam.t, opt_pose[:3]] ch.minimize( # data term defined over torso joints... { 'cam': j2d[torso_cids] - cam[torso_smpl_ids], # ...plus a regularizer for the camera translation 'cam_t': 1e2 * (cam.t[2] - init_t[2]) }, x0=free_variables, method='dogleg', callback=on_step, options={ 'maxiter': 100, 'e_3': .0001, # disp set to 1 enables verbose output from the optimizer 'disp': 0 }) if viz: plt.ioff() return (cam, try_both_orient, opt_pose[:3].r)
def optimize_on_joints(j2d, model, cam, img, prior, try_both_orient, body_orient, n_betas=10, regs=None, conf=None, viz=False): """Fit the model to the given set of joints, given the estimated camera :param j2d: 14x2 array of CNN joints :param model: SMPL model :param cam: estimated camera :param img: h x w x 3 image :param prior: mixture of gaussians pose prior :param try_both_orient: boolean, if True both body_orient and its flip are considered for the fit :param body_orient: 3D vector, initialization for the body orientation :param n_betas: number of shape coefficients considered during optimization :param regs: regressors for capsules' axis and radius, if not None enables the interpenetration error term :param conf: 14D vector storing the confidence values from the CNN :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the optimized model, its joints projected on image space, the camera translation """ t0 = time() # define the mapping LSP joints -> SMPL joints # cids are joints ids for LSP: cids = range(12) + [13] # joint ids for SMPL # SMPL does not have a joint for head, instead we use a vertex for the head # and append it later. smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] # the vertex id for the joint corresponding to the head head_id = 411 # weights assigned to each joint during optimization; # the definition of hips in SMPL and LSP is significantly different so set # their weights to zero base_weights = np.array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=np.float64) if try_both_orient: flipped_orient = cv2.Rodrigues(body_orient)[0].dot( cv2.Rodrigues(np.array([0., np.pi, 0]))[0]) flipped_orient = cv2.Rodrigues(flipped_orient)[0].ravel() orientations = [body_orient, flipped_orient] else: orientations = [body_orient] if try_both_orient: # store here the final error for both orientations, # and pick the orientation resulting in the lowest error errors = [] svs = [] cams = [] for o_id, orient in enumerate(orientations): # initialize the shape to the mean shape in the SMPL training set betas = ch.zeros(n_betas) # initialize the pose by using the optimized body orientation and the # pose prior init_pose = np.hstack((orient, prior.weights.dot(prior.means))) # instantiate the model: # verts_decorated allows us to define how many # shape coefficients (directions) we want to consider (here, n_betas) sv = verts_decorated(trans=ch.zeros(3), pose=ch.array(init_pose), v_template=model.v_template, J=model.J_regressor, betas=betas, shapedirs=model.shapedirs[:, :, :n_betas], weights=model.weights, kintree_table=model.kintree_table, bs_style=model.bs_style, f=model.f, bs_type=model.bs_type, posedirs=model.posedirs) # make the SMPL joints depend on betas Jdirs = np.dstack([ model.J_regressor.dot(model.shapedirs[:, :, i]) for i in range(len(betas)) ]) J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) # get joint positions as a function of model pose, betas and trans (_, A_global) = global_rigid_transformation(sv.pose, J_onbetas, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) + sv.trans # add the head joint, corresponding to a vertex... Jtr = ch.vstack((Jtr, sv[head_id])) # ... and add the joint id to the list if o_id == 0: smpl_ids.append(len(Jtr) - 1) # update the weights using confidence values weights = base_weights * conf[ cids] if conf is not None else base_weights # project SMPL joints on the image plane using the estimated camera cam.v = Jtr # data term: distance between observed and estimated joints in 2D obj_j2d = lambda w, sigma: (w * weights.reshape((-1, 1)) * GMOf( (j2d[cids] - cam[smpl_ids]), sigma)) # mixture of gaussians pose prior pprior = lambda w: w * prior(sv.pose) # joint angles pose prior, defined over a subset of pose parameters: # 55: left elbow, 90deg bend at -np.pi/2 # 58: right elbow, 90deg bend at np.pi/2 # 12: left knee, 90deg bend at np.pi/2 # 15: right knee, 90deg bend at np.pi/2 alpha = 10 my_exp = lambda x: alpha * ch.exp(x) obj_angle = lambda w: w * ch.concatenate([ my_exp(sv.pose[55]), my_exp(-sv.pose[58]), my_exp(-sv.pose[12]), my_exp(-sv.pose[15]) ]) if viz: import matplotlib.pyplot as plt plt.ion() def on_step(_): """Create visualization.""" plt.figure(1, figsize=(10, 10)) plt.subplot(1, 2, 1) # show optimized joints in 2D tmp_img = img.copy() for coord, target_coord in zip( np.around(cam.r[smpl_ids]).astype(int), np.around(j2d[cids]).astype(int)): if (coord[0] < tmp_img.shape[1] and coord[0] >= 0 and coord[1] < tmp_img.shape[0] and coord[1] >= 0): cv2.circle(tmp_img, tuple(coord), 3, [0, 0, 255]) if (target_coord[0] < tmp_img.shape[1] and target_coord[0] >= 0 and target_coord[1] < tmp_img.shape[0] and target_coord[1] >= 0): cv2.circle(tmp_img, tuple(target_coord), 3, [0, 255, 0]) plt.imshow(tmp_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-2) on_step(_) else: on_step = None if regs is not None: # interpenetration term sp = SphereCollisions(pose=sv.pose, betas=sv.betas, model=model, regs=regs) sp.no_hands = True # weight configuration used in the paper, with joints + confidence values from the CNN # (all the weights used in the code were obtained via grid search, see the paper for more details) # the first list contains the weights for the pose priors, # the second list contains the weights for the shape prior opt_weights = zip([4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], [1e2, 5 * 1e1, 1e1, .5 * 1e1]) # run the optimization in 4 stages, progressively decreasing the # weights for the priors for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) objs = {} objs['j2d'] = obj_j2d(1., 100) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas if regs is not None: objs['sph_coll'] = 1e3 * sp ch.minimize(objs, x0=[sv.betas, sv.pose], method='dogleg', callback=on_step, options={ 'maxiter': 100, 'e_3': .0001, 'disp': 0 }) t1 = time() _LOGGER.info('elapsed %.05f', (t1 - t0)) if try_both_orient: errors.append((objs['j2d'].r**2).sum()) svs.append(sv) cams.append(cam) if try_both_orient and errors[0] > errors[1]: choose_id = 1 else: choose_id = 0 if viz: plt.ioff() return (svs[choose_id], cams[choose_id].r, cams[choose_id].t.r, Jtr)
def initialize_camera(model, j2d, img, init_pose, flength=5000., pix_thsh=25.): """Initialize camera translation and body orientation :param model: SMPL model :param j2d: 14x2 array of CNN joints :param img: h x w x 3 image :param init_pose: 72D vector of pose parameters used for initialization :param flength: camera focal length (kept fixed) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :returns: a tuple containing the estimated camera, a boolean deciding if both the optimized body orientation and its flip should be considered, 3D vector for the body orientation """ # optimize camera translation and body orientation based on torso joints # LSP torso ids: # 2=right hip, 3=left hip, 8=right shoulder, 9=left shoulder torso_cids = [2, 3, 8, 9] # corresponding SMPL torso ids torso_smpl_ids = [2, 1, 17, 16] center = np.array([img.shape[1] / 2, img.shape[0] / 2]) # initialize camera rotation rt = ch.zeros(3) # initialize camera translation print 'initializing translation via similar triangles' init_t = guess_init(model, flength, j2d, init_pose) t = ch.array(init_t) # check how close the shoulder joints are try_both_orient = np.linalg.norm(j2d[8] - j2d[9]) < pix_thsh opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation(opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) # initialize the camera cam = ProjectPoints(f=np.array([flength, flength]), rt=rt, t=t, k=np.zeros(5), c=center) # we are going to project the SMPL joints cam.v = Jtr on_step = None # optimize for camera translation and body orientation free_variables = [cam.t, opt_pose[:3]] ch.minimize( # data term defined over torso joints... { 'cam': j2d[torso_cids] - cam[torso_smpl_ids], # ...plus a regularizer for the camera translation 'cam_t': 1e2 * (cam.t[2] - init_t[2]) }, x0=free_variables, method='dogleg', callback=on_step, # maxiter: 100, e_3: .0001 options={ 'maxiter': 100, 'e_3': .0001, # disp set to 1 enables verbose output from the optimizer 'disp': 0 }) return (cam, try_both_orient, opt_pose[:3].r)
def optimize_on_joints_and_silhouette(j2d, sil, model, cam, img, prior, init_pose, init_shape, n_betas=10, conf=None): """Fit the model to the given set of joints, given the estimated camera :param j2d: 14x2 array of CNN joints :param sil: h x w silhouette with soft boundaries (np.float32, range(-1, 1)) :param model: SMPL model :param cam: estimated camera :param img: h x w x 3 image :param prior: mixture of gaussians pose prior :param init_pose: 72D vector, pose prediction results provided by HMR :param init_shape: 10D vector, shape prediction results provided by HMR :param n_betas: number of shape coefficients considered during optimization :param conf: 14D vector storing the confidence values from the CNN :returns: a tuple containing the optimized model, its joints projected on image space, the camera translation """ # define the mapping LSP joints -> SMPL joints cids = range(12) + [13] smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] head_id = 411 # weights assigned to each joint during optimization; # the definition of hips in SMPL and LSP is significantly different so set # their weights to zero base_weights = np.array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=np.float64) betas = ch.array(init_shape) # instantiate the model: sv = verts_decorated(trans=ch.zeros(3), pose=ch.array(init_pose), v_template=model.v_template, J=model.J_regressor, betas=betas, shapedirs=model.shapedirs[:, :, :n_betas], weights=model.weights, kintree_table=model.kintree_table, bs_style=model.bs_style, f=model.f, bs_type=model.bs_type, posedirs=model.posedirs) # make the SMPL joints depend on betas Jdirs = np.dstack([ model.J_regressor.dot(model.shapedirs[:, :, i]) for i in range(len(betas)) ]) J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) # get joint positions as a function of model pose, betas and trans (_, A_global) = global_rigid_transformation(sv.pose, J_onbetas, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) + sv.trans # add the head joint Jtr = ch.vstack((Jtr, sv[head_id])) smpl_ids.append(len(Jtr) - 1) # update the weights using confidence values weights = base_weights * conf[cids] if conf is not None else base_weights # project SMPL joints and vertex on the image plane using the estimated camera cam.v = ch.vstack([Jtr, sv]) # obtain a gradient map of the soft silhouette grad_x = cv2.Sobel(sil, cv2.CV_32FC1, 1, 0) * 0.125 grad_y = cv2.Sobel(sil, cv2.CV_32FC1, 0, 1) * 0.125 # data term #1: distance between observed and estimated joints in 2D obj_j2d = lambda w, sigma: (w * weights.reshape((-1, 1)) * GMOf( (j2d[cids] - cam[smpl_ids]), sigma)) # data term #2: distance between the observed and projected boundaries obj_s2d = lambda w, sigma, flag, target_pose: (w * flag * GMOf( (target_pose - cam[len(Jtr):(len(Jtr) + 6890)]), sigma)) # mixture of gaussians pose prior pprior = lambda w: w * prior(sv.pose) # joint angles pose prior, defined over a subset of pose parameters: # 55: left elbow, 90deg bend at -np.pi/2 # 58: right elbow, 90deg bend at np.pi/2 # 12: left knee, 90deg bend at np.pi/2 # 15: right knee, 90deg bend at np.pi/2 alpha = 10 my_exp = lambda x: alpha * ch.exp(x) obj_angle = lambda w: w * ch.concatenate([ my_exp(sv.pose[55]), my_exp(-sv.pose[58]), my_exp(-sv.pose[12]), my_exp(-sv.pose[15]) ]) # run the optimization in 4 stages, progressively decreasing the # weights for the priors print('****** Optimization on joints') curr_pose = sv.pose.r opt_weights = zip([4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], [1e2, 5 * 1e1, 1e1, .5 * 1e1]) for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) objs = {} objs['j2d'] = obj_j2d(1., 100) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas objs['thetas'] = wbetas * (sv.pose - curr_pose ) # constrain theta changes ch.minimize(objs, x0=[sv.betas, sv.pose], method='dogleg', callback=None, options={ 'maxiter': 100, 'e_3': .001, 'disp': 0 }) curr_pose = sv.pose.r # cam.v = ch.vstack([Jtr, sv.r]) # run the optimization in 2 stages, progressively decreasing the # weights for the priors print('****** Optimization on silhouette and joints') opt_weights = zip([57.4, 4.78], [2e2, 1e2]) for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) # find the boundary vertices and estimate their expected location smpl_vs = cam.r[len(Jtr):, :] boundary_flag = np.zeros((smpl_vs.shape[0], 1)) expected_pos = np.zeros((smpl_vs.shape[0], 2)) for vi, v in enumerate(smpl_vs): r, c = int(v[1]), int(v[0]) if r < 0 or r >= sil.shape[0] or c < 0 or c >= sil.shape[1]: continue sil_v = sil[r, c] grad = np.array([grad_x[r, c], grad_y[r, c]]) grad_n = np.linalg.norm(grad) if grad_n > 1e-1 and sil_v < 0.4: # vertex on or out of the boundaries boundary_flag[vi] = 1.0 step = (grad / grad_n) * (sil_v / grad_n) expected_pos[vi] = np.array([c - step[0], r - step[1]]) # run optimization objs = {} objs['j2d'] = obj_j2d(1., 100) objs['s2d'] = obj_s2d(5., 100, boundary_flag, expected_pos) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas # constrain beta changes objs['thetas'] = wbetas * (sv.pose - curr_pose ) # constrain theta changes ch.minimize(objs, x0=[sv.betas, sv.pose], method='dogleg', callback=None, options={ 'maxiter': 100, 'e_3': .001, 'disp': 0 }) return sv, cam.r, cam.t.r
def optimize_on_joints(j2d, model, cam, img, prior, init_pose, init_shape, n_betas=10, conf=None): """Fit the model to the given set of joints, given the estimated camera :param j2d: 14x2 array of CNN joints :param model: SMPL model :param cam: estimated camera :param img: h x w x 3 image :param prior: mixture of gaussians pose prior :param init_pose: 72D vector, pose prediction results provided by HMR :param init_shape: 10D vector, shape prediction results provided by HMR :param n_betas: number of shape coefficients considered during optimization :param conf: 14D vector storing the confidence values from the CNN :returns: a tuple containing the optimized model, its joints projected on image space, the camera translation """ # define the mapping LSP joints -> SMPL joints # cids are joints ids for LSP: cids = range(12) + [13] # joint ids for SMPL # SMPL does not have a joint for head, instead we use a vertex for the head # and append it later. smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] # the vertex id for the joint corresponding to the head head_id = 411 # weights assigned to each joint during optimization; # the definition of hips in SMPL and LSP is significantly different so set # their weights to zero base_weights = np.array([1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=np.float64) # initialize the shape to the mean shape in the SMPL training set betas = ch.array(init_shape) # instantiate the model: # verts_decorated allows us to define how many # shape coefficients (directions) we want to consider (here, n_betas) sv = verts_decorated(trans=ch.zeros(3), pose=ch.array(init_pose), v_template=model.v_template, J=model.J_regressor, betas=betas, shapedirs=model.shapedirs[:, :, :n_betas], weights=model.weights, kintree_table=model.kintree_table, bs_style=model.bs_style, f=model.f, bs_type=model.bs_type, posedirs=model.posedirs) # make the SMPL joints depend on betas Jdirs = np.dstack([ model.J_regressor.dot(model.shapedirs[:, :, i]) for i in range(len(betas)) ]) J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) # get joint positions as a function of model pose, betas and trans (_, A_global) = global_rigid_transformation(sv.pose, J_onbetas, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) + sv.trans # add the head joint, corresponding to a vertex... Jtr = ch.vstack((Jtr, sv[head_id])) # ... and add the joint id to the list smpl_ids.append(len(Jtr) - 1) # update the weights using confidence values weights = base_weights * conf[cids] if conf is not None else base_weights # project SMPL joints on the image plane using the estimated camera cam.v = Jtr # data term: distance between observed and estimated joints in 2D obj_j2d = lambda w, sigma: (w * weights.reshape((-1, 1)) * GMOf( (j2d[cids] - cam[smpl_ids]), sigma)) # mixture of gaussians pose prior pprior = lambda w: w * prior(sv.pose) # joint angles pose prior, defined over a subset of pose parameters: # 55: left elbow, 90deg bend at -np.pi/2 # 58: right elbow, 90deg bend at np.pi/2 # 12: left knee, 90deg bend at np.pi/2 # 15: right knee, 90deg bend at np.pi/2 alpha = 10 my_exp = lambda x: alpha * ch.exp(x) obj_angle = lambda w: w * ch.concatenate([ my_exp(sv.pose[55]), my_exp(-sv.pose[58]), my_exp(-sv.pose[12]), my_exp(-sv.pose[15]) ]) # weight configuration used in the paper, with joints + confidence values from the CNN # (all the weights used in the code were obtained via grid search, see the paper for more details) # the first list contains the weights for the pose priors, # the second list contains the weights for the shape prior opt_weights = zip([4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], [1e2, 5 * 1e1, 1e1, .5 * 1e1]) # run the optimization in 4 stages, progressively decreasing the # weights for the priors for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) objs = {} objs['j2d'] = obj_j2d(1., 100) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas ch.minimize(objs, x0=[sv.betas, sv.pose], method='dogleg', callback=None, options={ 'maxiter': 100, 'e_3': .0001, 'disp': 0 }) return sv, cam.r, cam.t.r
def initialize_camera(model, j2d, img, init_pose, flength=5000., pix_thsh=25., viz=False): """Initialize camera translation and body orientation :param model: SMPL model :param j2d: 14x2 array of CNN joints :param img: h x w x 3 image :param init_pose: 72D vector of pose parameters used for initialization :param flength: camera focal length (kept fixed) :param pix_thsh: threshold (in pixel), if the distance between shoulder joints in 2D is lower than pix_thsh, the body orientation as ambiguous (so a fit is run on both the estimated one and its flip) :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the estimated camera, a boolean deciding if both the optimized body orientation and its flip should be considered, 3D vector for the body orientation """ # optimize camera translation and body orientation based on torso joints # LSP torso ids: # 2=right hip, 3=left hip, 8=right shoulder, 9=left shoulder torso_cids = [2, 3, 8, 9] # corresponding SMPL torso ids torso_smpl_ids = [2, 1, 17, 16] center = np.array([img.shape[1] / 2, img.shape[0] / 2]) # initialize camera rotation rt = ch.zeros(3) # initialize camera translation _LOGGER.info('initializing translation via similar triangles') init_t = guess_init(model, flength, j2d, init_pose) t = ch.array(init_t) # check how close the shoulder joints are try_both_orient = np.linalg.norm(j2d[8] - j2d[9]) < pix_thsh opt_pose = ch.array(init_pose) (_, A_global) = global_rigid_transformation( opt_pose, model.J, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) # initialize the camera cam = ProjectPoints( f=np.array([flength, flength]), rt=rt, t=t, k=np.zeros(5), c=center) # we are going to project the SMPL joints cam.v = Jtr if viz: viz_img = img.copy() # draw the target (CNN) joints for coord in np.around(j2d).astype(int): if (coord[0] < img.shape[1] and coord[0] >= 0 and coord[1] < img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 255, 0]) import matplotlib.pyplot as plt plt.ion() # draw optimized joints at each iteration def on_step(_): """Draw a visualization.""" plt.figure(1, figsize=(5, 5)) plt.subplot(1, 1, 1) viz_img = img.copy() for coord in np.around(cam.r[torso_smpl_ids]).astype(int): if (coord[0] < viz_img.shape[1] and coord[0] >= 0 and coord[1] < viz_img.shape[0] and coord[1] >= 0): cv2.circle(viz_img, tuple(coord), 3, [0, 0, 255]) plt.imshow(viz_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-3) else: on_step = None # optimize for camera translation and body orientation free_variables = [cam.t, opt_pose[:3]] ch.minimize( # data term defined over torso joints... {'cam': j2d[torso_cids] - cam[torso_smpl_ids], # ...plus a regularizer for the camera translation 'cam_t': 1e2 * (cam.t[2] - init_t[2])}, x0=free_variables, method='dogleg', callback=on_step, options={'maxiter': 100, 'e_3': .0001, # disp set to 1 enables verbose output from the optimizer 'disp': 0}) if viz: plt.ioff() return (cam, try_both_orient, opt_pose[:3].r)
def optimize_on_joints(j2d, model, cam, img, prior, try_both_orient, body_orient, n_betas=10, regs=None, conf=None, viz=False): """Fit the model to the given set of joints, given the estimated camera :param j2d: 14x2 array of CNN joints :param model: SMPL model :param cam: estimated camera :param img: h x w x 3 image :param prior: mixture of gaussians pose prior :param try_both_orient: boolean, if True both body_orient and its flip are considered for the fit :param body_orient: 3D vector, initialization for the body orientation :param n_betas: number of shape coefficients considered during optimization :param regs: regressors for capsules' axis and radius, if not None enables the interpenetration error term :param conf: 14D vector storing the confidence values from the CNN :param viz: boolean, if True enables visualization during optimization :returns: a tuple containing the optimized model, its joints projected on image space, the camera translation """ t0 = time() # define the mapping LSP joints -> SMPL joints # cids are joints ids for LSP: cids = range(12) + [13] # joint ids for SMPL # SMPL does not have a joint for head, instead we use a vertex for the head # and append it later. smpl_ids = [8, 5, 2, 1, 4, 7, 21, 19, 17, 16, 18, 20] # the vertex id for the joint corresponding to the head head_id = 411 # weights assigned to each joint during optimization; # the definition of hips in SMPL and LSP is significantly different so set # their weights to zero base_weights = np.array( [1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=np.float64) if try_both_orient: flipped_orient = cv2.Rodrigues(body_orient)[0].dot( cv2.Rodrigues(np.array([0., np.pi, 0]))[0]) flipped_orient = cv2.Rodrigues(flipped_orient)[0].ravel() orientations = [body_orient, flipped_orient] else: orientations = [body_orient] if try_both_orient: # store here the final error for both orientations, # and pick the orientation resulting in the lowest error errors = [] svs = [] cams = [] for o_id, orient in enumerate(orientations): # initialize the shape to the mean shape in the SMPL training set betas = ch.zeros(n_betas) # initialize the pose by using the optimized body orientation and the # pose prior init_pose = np.hstack((orient, prior.weights.dot(prior.means))) # instantiate the model: # verts_decorated allows us to define how many # shape coefficients (directions) we want to consider (here, n_betas) sv = verts_decorated( trans=ch.zeros(3), pose=ch.array(init_pose), v_template=model.v_template, J=model.J_regressor, betas=betas, shapedirs=model.shapedirs[:, :, :n_betas], weights=model.weights, kintree_table=model.kintree_table, bs_style=model.bs_style, f=model.f, bs_type=model.bs_type, posedirs=model.posedirs) # make the SMPL joints depend on betas Jdirs = np.dstack([model.J_regressor.dot(model.shapedirs[:, :, i]) for i in range(len(betas))]) J_onbetas = ch.array(Jdirs).dot(betas) + model.J_regressor.dot( model.v_template.r) # get joint positions as a function of model pose, betas and trans (_, A_global) = global_rigid_transformation( sv.pose, J_onbetas, model.kintree_table, xp=ch) Jtr = ch.vstack([g[:3, 3] for g in A_global]) + sv.trans # add the head joint, corresponding to a vertex... Jtr = ch.vstack((Jtr, sv[head_id])) # ... and add the joint id to the list if o_id == 0: smpl_ids.append(len(Jtr) - 1) # update the weights using confidence values weights = base_weights * conf[ cids] if conf is not None else base_weights # project SMPL joints on the image plane using the estimated camera cam.v = Jtr # data term: distance between observed and estimated joints in 2D obj_j2d = lambda w, sigma: ( w * weights.reshape((-1, 1)) * GMOf((j2d[cids] - cam[smpl_ids]), sigma)) # mixture of gaussians pose prior pprior = lambda w: w * prior(sv.pose) # joint angles pose prior, defined over a subset of pose parameters: # 55: left elbow, 90deg bend at -np.pi/2 # 58: right elbow, 90deg bend at np.pi/2 # 12: left knee, 90deg bend at np.pi/2 # 15: right knee, 90deg bend at np.pi/2 alpha = 10 my_exp = lambda x: alpha * ch.exp(x) obj_angle = lambda w: w * ch.concatenate([my_exp(sv.pose[55]), my_exp(-sv.pose[ 58]), my_exp(-sv.pose[12]), my_exp(-sv.pose[15])]) if viz: import matplotlib.pyplot as plt plt.ion() def on_step(_): """Create visualization.""" plt.figure(1, figsize=(10, 10)) plt.subplot(1, 2, 1) # show optimized joints in 2D tmp_img = img.copy() for coord, target_coord in zip( np.around(cam.r[smpl_ids]).astype(int), np.around(j2d[cids]).astype(int)): if (coord[0] < tmp_img.shape[1] and coord[0] >= 0 and coord[1] < tmp_img.shape[0] and coord[1] >= 0): cv2.circle(tmp_img, tuple(coord), 3, [0, 0, 255]) if (target_coord[0] < tmp_img.shape[1] and target_coord[0] >= 0 and target_coord[1] < tmp_img.shape[0] and target_coord[1] >= 0): cv2.circle(tmp_img, tuple(target_coord), 3, [0, 255, 0]) plt.imshow(tmp_img[:, :, ::-1]) plt.draw() plt.show() plt.pause(1e-2) on_step(_) else: on_step = None if regs is not None: # interpenetration term sp = SphereCollisions( pose=sv.pose, betas=sv.betas, model=model, regs=regs) sp.no_hands = True # weight configuration used in the paper, with joints + confidence values from the CNN # (all the weights used in the code were obtained via grid search, see the paper for more details) # the first list contains the weights for the pose priors, # the second list contains the weights for the shape prior opt_weights = zip([4.04 * 1e2, 4.04 * 1e2, 57.4, 4.78], [1e2, 5 * 1e1, 1e1, .5 * 1e1]) # run the optimization in 4 stages, progressively decreasing the # weights for the priors for stage, (w, wbetas) in enumerate(opt_weights): _LOGGER.info('stage %01d', stage) objs = {} objs['j2d'] = obj_j2d(1., 100) objs['pose'] = pprior(w) objs['pose_exp'] = obj_angle(0.317 * w) objs['betas'] = wbetas * betas if regs is not None: objs['sph_coll'] = 1e3 * sp ch.minimize( objs, x0=[sv.betas, sv.pose], method='dogleg', callback=on_step, options={'maxiter': 100, 'e_3': .0001, 'disp': 0}) t1 = time() _LOGGER.info('elapsed %.05f', (t1 - t0)) if try_both_orient: errors.append((objs['j2d'].r**2).sum()) svs.append(sv) cams.append(cam) if try_both_orient and errors[0] > errors[1]: choose_id = 1 else: choose_id = 0 if viz: plt.ioff() return (svs[choose_id], cams[choose_id].r, cams[choose_id].t.r)