def build_eval_metrics(t): rootrelative_diff = tfu3d.root_relative(t.coords3d_pred - t.coords3d_true) dist = tf.norm(rootrelative_diff, axis=-1) t.mean_error = tfu.reduce_mean_masked(dist, t.joint_validity_mask) t.coords3d_pred_procrustes = tfu3d.rigid_align( t.coords3d_pred, t.coords3d_true, joint_validity_mask=t.joint_validity_mask, scale_align=True) rootrelative_diff_procrust = tfu3d.root_relative( t.coords3d_pred_procrustes - t.coords3d_true) dist_procrustes = tf.norm(rootrelative_diff_procrust, axis=-1) t.mean_error_procrustes = tfu.reduce_mean_masked(dist_procrustes, t.joint_validity_mask) threshold = np.float32(150) auc_score = tf.maximum(np.float32(0), 1 - dist / threshold) t.auc_for_nms = tfu.reduce_mean_masked(auc_score, t.joint_validity_mask, axis=0) t.mean_auc = tfu.reduce_mean_masked(auc_score, t.joint_validity_mask) is_correct = tf.cast(dist <= threshold, tf.float32) t.pck = tfu.reduce_mean_masked(is_correct, t.joint_validity_mask, axis=0) t.mean_pck = tfu.reduce_mean_masked(is_correct, t.joint_validity_mask)
def h36m_numbers(coords3d_true, coords3d_pred, activity_name, procrustes=False, joint_validity_mask=None): if joint_validity_mask is None: joint_validity_mask = np.full_like(coords3d_true[..., 0], fill_value=True, dtype=np.bool) coords3d_true = tfu3d.root_relative(coords3d_true) coords3d_pred = tfu3d.root_relative(coords3d_pred) if procrustes in ('rigid', 'rigid+scale'): coords3d_pred = util3d.rigid_align_many( coords3d_pred, coords3d_true, joint_validity_mask=joint_validity_mask, scale_align=procrustes == 'rigid+scale') dist = np.linalg.norm(coords3d_true - coords3d_pred, axis=-1) overall_mean_error = np.mean(dist[joint_validity_mask]) result = [] ordered_actions = 'Directions,Discussion,Eating,Greeting,Phoning,Posing,Purchases,Sitting,' \ 'SittingDown,Smoking,Photo,Waiting,Walking,WalkDog,WalkTogether'.split(',') for activity in ordered_actions: activity = activity.encode('utf8') mask = np.logical_and(np.expand_dims(activity_name == activity, -1), joint_validity_mask) act_mean_error = np.mean(dist[mask]) result.append(act_mean_error) result.append(overall_mean_error) return result
def build_25d_inference_model(joint_info, t): t.x = tf.identity(t.x, 'input') coords25d = predict_25d(t.x, joint_info) t.coords2d_pred = coords25d[..., :2] camcoords2d_homog = model.util.matmul_joint_coords( t.inv_intrinsics, model.util.to_homogeneous(t.coords2d_pred)) delta_z_pred = (coords25d[..., 2] - coords25d[:, -1:, 2]) if 'bone-lengths' in FLAGS.scale_recovery: if FLAGS.bone_length_dataset: dataset = data.datasets3d.get_dataset(FLAGS.bone_length_dataset) else: dataset = data.datasets3d.get_dataset(FLAGS.dataset) if FLAGS.scale_recovery == 'bone-lengths-true': bone_lengths_true = get_bone_lengths(t.coords3d_true, joint_info) z_offset = optimize_z_offset_by_bones_tensor( camcoords2d_homog, delta_z_pred, bone_lengths_true, joint_info.stick_figure_edges) else: target_bone_lengths = (dataset.trainval_bones if FLAGS.train_on == 'trainval' else dataset.train_bones) z_offset = optimize_z_offset_by_bones( camcoords2d_homog, delta_z_pred, target_bone_lengths, joint_info.stick_figure_edges) t.coords3d_pred = model.util.back_project(camcoords2d_homog, delta_z_pred, z_offset) elif FLAGS.scale_recovery == 'true-root-depth': t.coords3d_pred = model.util.back_project(camcoords2d_homog, delta_z_pred, t.coords3d_true[:, -1, 2]) t.coords3d_pred_rootrel = tf.identity(tfu3d.root_relative(t.coords3d_pred), 'pred') if 'rot_to_orig_cam' in t: t.coords3d_pred_orig_cam = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_pred_world = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1) if 'coords3d_true' in t: t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true) if 'rot_to_orig_cam' in t: t.coords3d_true_orig_cam = model.util.to_orig_cam( t.coords3d_true, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_true_world = model.util.to_orig_cam( t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1)
def tdhp_numbers(coords3d_true, coords3d_pred, activity_name, scene_name, procrustes=False, joint_validity_mask=None): if joint_validity_mask is None: joint_validity_mask = np.full_like(coords3d_true[..., 0], fill_value=True, dtype=np.bool) coords3d_true = tfu3d.root_relative(coords3d_true) coords3d_pred = tfu3d.root_relative(coords3d_pred) if procrustes in ('rigid', 'rigid+scale'): coords3d_pred = util3d.rigid_align_many( coords3d_pred, coords3d_true, joint_validity_mask=joint_validity_mask, scale_align=procrustes == 'rigid+scale') dist = np.linalg.norm(coords3d_true - coords3d_pred, axis=-1) overall_mean_error = np.mean(dist[joint_validity_mask]) ordered_actions = ('Stand/Walk,Exercise,Sit on Chair,' 'Reach/Crouch,On Floor,Sports,Misc.'.split(',')) ordered_scenes = ['green-screen', 'no-green-screen', 'outdoor'] rel_dist = dist / 150 overall_pck = get_pck(rel_dist[joint_validity_mask]) overall_auc = get_auc(rel_dist[joint_validity_mask]) result = [] for activity in ordered_actions: activity = activity.encode('utf8') mask = np.logical_and(np.expand_dims(activity_name == activity, -1), joint_validity_mask) act_pck = get_pck(rel_dist[mask]) result.append(act_pck) for scene in ordered_scenes: scene = scene.encode('utf8') mask = np.logical_and(np.expand_dims(scene_name == scene, -1), joint_validity_mask) act_pck = get_pck(rel_dist[mask]) result.append(act_pck) result += [overall_pck, overall_auc, overall_mean_error] return result
def build_metro_inference_model(joint_info, t): t.coords3d_pred = predict_metro(t.x, joint_info) if 'rot_to_orig_cam' in t: t.coords3d_pred_orig_cam = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_pred_world = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1) if 'coords3d_true' in t: t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true) if 'rot_to_orig_cam' in t: t.coords3d_true_orig_cam = model.util.to_orig_cam( t.coords3d_true, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_true_world = model.util.to_orig_cam( t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1)
def build_metrabs_inference_model(joint_info, t): t.coords2d_pred, t.coords3d_rel_pred = predict_heads_metrabs( t.x, joint_info) t.coords3d_pred = reconstruct_absolute(t.coords2d_pred, t.coords3d_rel_pred, t.inv_intrinsics) if 'rot_to_orig_cam' in t: t.coords3d_pred_orig_cam = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_pred_world = model.util.to_orig_cam( t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1) if 'coords3d_true' in t: t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true) if 'rot_to_orig_cam' in t: t.coords3d_true_orig_cam = model.util.to_orig_cam( t.coords3d_true, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_true_world = model.util.to_orig_cam( t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims( t.cam_loc, 1)
def build_inference_model(joint_info, learning_phase, t, reuse=None): stride = FLAGS.stride_train if learning_phase == TRAIN else FLAGS.stride_test depth = FLAGS.depth def im2pred(im, reuse=reuse): net_output = model.architectures.resnet( im, n_out=depth * joint_info.n_joints, scope='MainPart', reuse=reuse, stride=stride, centered_stride=FLAGS.centered_stride, resnet_name=FLAGS.architecture) return net_output_to_heatmap_and_coords(net_output, joint_info) t.x = tf.identity(t.x, 'input') t.softmaxed, coords3d = im2pred(t.x) t.heatmap_pred_z = tf.reduce_sum(t.softmaxed, axis=[2, 3]) if FLAGS.bone_length_dataset: dataset = data.datasets.get_dataset(FLAGS.bone_length_dataset) else: dataset = data.datasets.current_dataset() if 'bone-lengths' in FLAGS.scale_recovery: t.coords2d_pred = coords3d[..., :2] im_pred2d = heatmap_to_image(t.coords2d_pred, learning_phase) im_pred2d_homog = to_homogeneous_coords(im_pred2d) camcoords2d_homog = matmul_joint_coords(t.inv_intrinsics, im_pred2d_homog) delta_z_pred = (coords3d[..., 2] - coords3d[:, -1:, 2]) * FLAGS.box_size_mm target_bone_lengths = ( dataset.trainval_bones if FLAGS.train_on == 'trainval' else dataset.train_bones) if FLAGS.scale_recovery == 'bone-lengths-true': bone_lengths_true = get_bone_lengths(t.coords3d_true, joint_info) z_offset_gt_bonelen = optimize_z_offset_by_bones_tensor( camcoords2d_homog, delta_z_pred, bone_lengths_true, joint_info.stick_figure_edges) t.coords3d_pred = back_project(camcoords2d_homog, delta_z_pred, z_offset_gt_bonelen) else: z_offset = optimize_z_offset_by_bones( camcoords2d_homog, delta_z_pred, target_bone_lengths, joint_info.stick_figure_edges) t.coords3d_pred = back_project(camcoords2d_homog, delta_z_pred, z_offset) elif FLAGS.scale_recovery == 'true-root-depth': t.coords2d_pred = coords3d[..., :2] im_pred2d = heatmap_to_image(t.coords2d_pred, learning_phase) im_pred2d_homog = to_homogeneous_coords(im_pred2d) camcoords2d_homog = matmul_joint_coords(t.inv_intrinsics, im_pred2d_homog) delta_z_pred = (coords3d[..., 2] - coords3d[:, -1:, 2]) * FLAGS.box_size_mm t.coords3d_pred = back_project( camcoords2d_homog, delta_z_pred, t.coords3d_true[:, -1, 2]) else: t.coords3d_pred = heatmap_to_metric(coords3d, learning_phase) t.coords3d_pred_rootrel = tf.identity(tfu3d.root_relative(t.coords3d_pred), 'pred') if 'rot_to_orig_cam' in t: t.coords3d_pred_orig_cam = to_orig_cam(t.coords3d_pred, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_pred_world = to_orig_cam( t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims(t.cam_loc, 1) if 'coords3d_true' in t: t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true) if 'rot_to_orig_cam' in t: t.coords3d_true_orig_cam = to_orig_cam(t.coords3d_true, t.rot_to_orig_cam, joint_info) if 'rot_to_world' in t: t.coords3d_true_world = to_orig_cam( t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims(t.cam_loc, 1)
def build_metro_model(joint_info, learning_phase, t, reuse=None): if learning_phase != TRAIN: return build_inference_model(joint_info, learning_phase, t, reuse=reuse) with tf.name_scope(None, 'Prediction'): depth = FLAGS.depth def im2pred(im, reuse=reuse): net_output = model.architectures.resnet( im, n_out=depth * joint_info.n_joints, scope='MainPart', reuse=reuse, stride=FLAGS.stride_train, centered_stride=FLAGS.centered_stride, resnet_name=FLAGS.architecture) net_output_nchw = tfu.std_to_nchw(net_output) side = tfu.static_image_shape(net_output)[0] reshaped = tf.reshape(net_output_nchw, [-1, depth, joint_info.n_joints, side, side]) logits = tf.transpose(reshaped, [0, 2, 3, 4, 1]) softmaxed = tfu.softmax(logits, axis=[2, 3, 4]) coords3d = tf.stack(tfu.decode_heatmap(softmaxed, [3, 2, 4]), axis=-1) return logits, softmaxed, coords3d # PREDICT FOR 3D BATCH t.logits, t.softmaxed, coords3d = im2pred(t.x) t.coords3d_pred = heatmap_to_metric(coords3d, learning_phase) if FLAGS.train_mixed: coords3d_2d = im2pred(t.x_2d)[-1] t.coords3d_pred_2d = heatmap_to_metric(coords3d_2d, learning_phase) # PREDICT FOR 2D BATCH if FLAGS.train_mixed: if FLAGS.dataset == 'mpi_inf_3dhp': t.coords3d_pred_2d = adjust_skeleton_3dhp_to_mpii(t.coords3d_pred_2d, joint_info) joint_info_2d = data.datasets2d.get_dataset(FLAGS.dataset2d).joint_info joint_ids_3d = [joint_info.ids[name] for name in joint_info_2d.names] t.coords2d_pred_2d = tf.gather(t.coords3d_pred_2d, joint_ids_3d, axis=1)[..., :2] # LOSS 3D BATCH t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true) t.coords3d_pred_rootrel = tfu3d.root_relative(t.coords3d_pred) absdiff = tf.abs(t.coords3d_true_rootrel - t.coords3d_pred_rootrel) t.loss3d = tfu.reduce_mean_masked(absdiff, t.joint_validity_mask) / 1000 # LOSS 2D BATCH if FLAGS.train_mixed: t.coords2d_true_2d_scaled, t.coords2d_pred_2d = align_2d_skeletons( t.coords2d_true_2d, t.coords2d_pred_2d, t.joint_validity_mask_2d) scale = 1 / FLAGS.proc_side * FLAGS.box_size_mm / 1000 t.loss2d = tfu.reduce_mean_masked( tf.abs(t.coords2d_true_2d_scaled - t.coords2d_pred_2d), t.joint_validity_mask_2d) * scale else: t.loss2d = 0 loss2d_factor = FLAGS.loss2d_factor t.loss = t.loss3d + loss2d_factor * t.loss2d t.coords3d_pred_orig_cam = to_orig_cam(t.coords3d_pred, t.rot_to_orig_cam, joint_info) t.coords3d_true_orig_cam = to_orig_cam(t.coords3d_true, t.rot_to_orig_cam, joint_info)