Esempio n. 1
0
def build_eval_metrics(t):
    rootrelative_diff = tfu3d.root_relative(t.coords3d_pred - t.coords3d_true)
    dist = tf.norm(rootrelative_diff, axis=-1)
    t.mean_error = tfu.reduce_mean_masked(dist, t.joint_validity_mask)
    t.coords3d_pred_procrustes = tfu3d.rigid_align(
        t.coords3d_pred,
        t.coords3d_true,
        joint_validity_mask=t.joint_validity_mask,
        scale_align=True)

    rootrelative_diff_procrust = tfu3d.root_relative(
        t.coords3d_pred_procrustes - t.coords3d_true)
    dist_procrustes = tf.norm(rootrelative_diff_procrust, axis=-1)
    t.mean_error_procrustes = tfu.reduce_mean_masked(dist_procrustes,
                                                     t.joint_validity_mask)

    threshold = np.float32(150)
    auc_score = tf.maximum(np.float32(0), 1 - dist / threshold)
    t.auc_for_nms = tfu.reduce_mean_masked(auc_score,
                                           t.joint_validity_mask,
                                           axis=0)
    t.mean_auc = tfu.reduce_mean_masked(auc_score, t.joint_validity_mask)

    is_correct = tf.cast(dist <= threshold, tf.float32)
    t.pck = tfu.reduce_mean_masked(is_correct, t.joint_validity_mask, axis=0)
    t.mean_pck = tfu.reduce_mean_masked(is_correct, t.joint_validity_mask)
Esempio n. 2
0
def h36m_numbers(coords3d_true,
                 coords3d_pred,
                 activity_name,
                 procrustes=False,
                 joint_validity_mask=None):
    if joint_validity_mask is None:
        joint_validity_mask = np.full_like(coords3d_true[..., 0],
                                           fill_value=True,
                                           dtype=np.bool)

    coords3d_true = tfu3d.root_relative(coords3d_true)
    coords3d_pred = tfu3d.root_relative(coords3d_pred)

    if procrustes in ('rigid', 'rigid+scale'):
        coords3d_pred = util3d.rigid_align_many(
            coords3d_pred,
            coords3d_true,
            joint_validity_mask=joint_validity_mask,
            scale_align=procrustes == 'rigid+scale')

    dist = np.linalg.norm(coords3d_true - coords3d_pred, axis=-1)
    overall_mean_error = np.mean(dist[joint_validity_mask])

    result = []
    ordered_actions = 'Directions,Discussion,Eating,Greeting,Phoning,Posing,Purchases,Sitting,' \
                      'SittingDown,Smoking,Photo,Waiting,Walking,WalkDog,WalkTogether'.split(',')
    for activity in ordered_actions:
        activity = activity.encode('utf8')
        mask = np.logical_and(np.expand_dims(activity_name == activity, -1),
                              joint_validity_mask)
        act_mean_error = np.mean(dist[mask])
        result.append(act_mean_error)

    result.append(overall_mean_error)
    return result
Esempio n. 3
0
def build_25d_inference_model(joint_info, t):
    t.x = tf.identity(t.x, 'input')
    coords25d = predict_25d(t.x, joint_info)
    t.coords2d_pred = coords25d[..., :2]
    camcoords2d_homog = model.util.matmul_joint_coords(
        t.inv_intrinsics, model.util.to_homogeneous(t.coords2d_pred))
    delta_z_pred = (coords25d[..., 2] - coords25d[:, -1:, 2])

    if 'bone-lengths' in FLAGS.scale_recovery:
        if FLAGS.bone_length_dataset:
            dataset = data.datasets3d.get_dataset(FLAGS.bone_length_dataset)
        else:
            dataset = data.datasets3d.get_dataset(FLAGS.dataset)

        if FLAGS.scale_recovery == 'bone-lengths-true':
            bone_lengths_true = get_bone_lengths(t.coords3d_true, joint_info)
            z_offset = optimize_z_offset_by_bones_tensor(
                camcoords2d_homog, delta_z_pred, bone_lengths_true,
                joint_info.stick_figure_edges)
        else:
            target_bone_lengths = (dataset.trainval_bones if FLAGS.train_on
                                   == 'trainval' else dataset.train_bones)
            z_offset = optimize_z_offset_by_bones(
                camcoords2d_homog, delta_z_pred, target_bone_lengths,
                joint_info.stick_figure_edges)
        t.coords3d_pred = model.util.back_project(camcoords2d_homog,
                                                  delta_z_pred, z_offset)
    elif FLAGS.scale_recovery == 'true-root-depth':
        t.coords3d_pred = model.util.back_project(camcoords2d_homog,
                                                  delta_z_pred,
                                                  t.coords3d_true[:, -1, 2])

    t.coords3d_pred_rootrel = tf.identity(tfu3d.root_relative(t.coords3d_pred),
                                          'pred')
    if 'rot_to_orig_cam' in t:
        t.coords3d_pred_orig_cam = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_orig_cam, joint_info)
    if 'rot_to_world' in t:
        t.coords3d_pred_world = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims(
                t.cam_loc, 1)

    if 'coords3d_true' in t:
        t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true)
        if 'rot_to_orig_cam' in t:
            t.coords3d_true_orig_cam = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_orig_cam, joint_info)
        if 'rot_to_world' in t:
            t.coords3d_true_world = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims(
                    t.cam_loc, 1)
Esempio n. 4
0
def tdhp_numbers(coords3d_true,
                 coords3d_pred,
                 activity_name,
                 scene_name,
                 procrustes=False,
                 joint_validity_mask=None):
    if joint_validity_mask is None:
        joint_validity_mask = np.full_like(coords3d_true[..., 0],
                                           fill_value=True,
                                           dtype=np.bool)

    coords3d_true = tfu3d.root_relative(coords3d_true)
    coords3d_pred = tfu3d.root_relative(coords3d_pred)

    if procrustes in ('rigid', 'rigid+scale'):
        coords3d_pred = util3d.rigid_align_many(
            coords3d_pred,
            coords3d_true,
            joint_validity_mask=joint_validity_mask,
            scale_align=procrustes == 'rigid+scale')

    dist = np.linalg.norm(coords3d_true - coords3d_pred, axis=-1)
    overall_mean_error = np.mean(dist[joint_validity_mask])

    ordered_actions = ('Stand/Walk,Exercise,Sit on Chair,'
                       'Reach/Crouch,On Floor,Sports,Misc.'.split(','))
    ordered_scenes = ['green-screen', 'no-green-screen', 'outdoor']

    rel_dist = dist / 150
    overall_pck = get_pck(rel_dist[joint_validity_mask])
    overall_auc = get_auc(rel_dist[joint_validity_mask])

    result = []
    for activity in ordered_actions:
        activity = activity.encode('utf8')
        mask = np.logical_and(np.expand_dims(activity_name == activity, -1),
                              joint_validity_mask)
        act_pck = get_pck(rel_dist[mask])
        result.append(act_pck)

    for scene in ordered_scenes:
        scene = scene.encode('utf8')
        mask = np.logical_and(np.expand_dims(scene_name == scene, -1),
                              joint_validity_mask)
        act_pck = get_pck(rel_dist[mask])
        result.append(act_pck)

    result += [overall_pck, overall_auc, overall_mean_error]
    return result
Esempio n. 5
0
def build_metro_inference_model(joint_info, t):
    t.coords3d_pred = predict_metro(t.x, joint_info)
    if 'rot_to_orig_cam' in t:
        t.coords3d_pred_orig_cam = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_orig_cam, joint_info)
    if 'rot_to_world' in t:
        t.coords3d_pred_world = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims(
                t.cam_loc, 1)

    if 'coords3d_true' in t:
        t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true)
        if 'rot_to_orig_cam' in t:
            t.coords3d_true_orig_cam = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_orig_cam, joint_info)
        if 'rot_to_world' in t:
            t.coords3d_true_world = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims(
                    t.cam_loc, 1)
Esempio n. 6
0
def build_metrabs_inference_model(joint_info, t):
    t.coords2d_pred, t.coords3d_rel_pred = predict_heads_metrabs(
        t.x, joint_info)
    t.coords3d_pred = reconstruct_absolute(t.coords2d_pred,
                                           t.coords3d_rel_pred,
                                           t.inv_intrinsics)

    if 'rot_to_orig_cam' in t:
        t.coords3d_pred_orig_cam = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_orig_cam, joint_info)
    if 'rot_to_world' in t:
        t.coords3d_pred_world = model.util.to_orig_cam(
            t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims(
                t.cam_loc, 1)

    if 'coords3d_true' in t:
        t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true)
        if 'rot_to_orig_cam' in t:
            t.coords3d_true_orig_cam = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_orig_cam, joint_info)
        if 'rot_to_world' in t:
            t.coords3d_true_world = model.util.to_orig_cam(
                t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims(
                    t.cam_loc, 1)
Esempio n. 7
0
def build_inference_model(joint_info, learning_phase, t, reuse=None):
    stride = FLAGS.stride_train if learning_phase == TRAIN else FLAGS.stride_test
    depth = FLAGS.depth

    def im2pred(im, reuse=reuse):
        net_output = model.architectures.resnet(
            im, n_out=depth * joint_info.n_joints, scope='MainPart', reuse=reuse,
            stride=stride, centered_stride=FLAGS.centered_stride,
            resnet_name=FLAGS.architecture)
        return net_output_to_heatmap_and_coords(net_output, joint_info)

    t.x = tf.identity(t.x, 'input')
    t.softmaxed, coords3d = im2pred(t.x)
    t.heatmap_pred_z = tf.reduce_sum(t.softmaxed, axis=[2, 3])

    if FLAGS.bone_length_dataset:
        dataset = data.datasets.get_dataset(FLAGS.bone_length_dataset)
    else:
        dataset = data.datasets.current_dataset()
    if 'bone-lengths' in FLAGS.scale_recovery:
        t.coords2d_pred = coords3d[..., :2]
        im_pred2d = heatmap_to_image(t.coords2d_pred, learning_phase)
        im_pred2d_homog = to_homogeneous_coords(im_pred2d)
        camcoords2d_homog = matmul_joint_coords(t.inv_intrinsics, im_pred2d_homog)
        delta_z_pred = (coords3d[..., 2] - coords3d[:, -1:, 2]) * FLAGS.box_size_mm

        target_bone_lengths = (
            dataset.trainval_bones if FLAGS.train_on == 'trainval' else dataset.train_bones)

        if FLAGS.scale_recovery == 'bone-lengths-true':
            bone_lengths_true = get_bone_lengths(t.coords3d_true, joint_info)
            z_offset_gt_bonelen = optimize_z_offset_by_bones_tensor(
                camcoords2d_homog, delta_z_pred, bone_lengths_true,
                joint_info.stick_figure_edges)
            t.coords3d_pred = back_project(camcoords2d_homog, delta_z_pred, z_offset_gt_bonelen)
        else:
            z_offset = optimize_z_offset_by_bones(
                camcoords2d_homog, delta_z_pred, target_bone_lengths,
                joint_info.stick_figure_edges)
            t.coords3d_pred = back_project(camcoords2d_homog, delta_z_pred, z_offset)
    elif FLAGS.scale_recovery == 'true-root-depth':
        t.coords2d_pred = coords3d[..., :2]
        im_pred2d = heatmap_to_image(t.coords2d_pred, learning_phase)
        im_pred2d_homog = to_homogeneous_coords(im_pred2d)
        camcoords2d_homog = matmul_joint_coords(t.inv_intrinsics, im_pred2d_homog)
        delta_z_pred = (coords3d[..., 2] - coords3d[:, -1:, 2]) * FLAGS.box_size_mm
        t.coords3d_pred = back_project(
            camcoords2d_homog, delta_z_pred, t.coords3d_true[:, -1, 2])
    else:
        t.coords3d_pred = heatmap_to_metric(coords3d, learning_phase)

    t.coords3d_pred_rootrel = tf.identity(tfu3d.root_relative(t.coords3d_pred), 'pred')
    if 'rot_to_orig_cam' in t:
        t.coords3d_pred_orig_cam = to_orig_cam(t.coords3d_pred, t.rot_to_orig_cam, joint_info)
    if 'rot_to_world' in t:
        t.coords3d_pred_world = to_orig_cam(
            t.coords3d_pred, t.rot_to_world, joint_info) + tf.expand_dims(t.cam_loc, 1)

    if 'coords3d_true' in t:
        t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true)
        if 'rot_to_orig_cam' in t:
            t.coords3d_true_orig_cam = to_orig_cam(t.coords3d_true, t.rot_to_orig_cam, joint_info)
        if 'rot_to_world' in t:
            t.coords3d_true_world = to_orig_cam(
                t.coords3d_true, t.rot_to_world, joint_info) + tf.expand_dims(t.cam_loc, 1)
Esempio n. 8
0
def build_metro_model(joint_info, learning_phase, t, reuse=None):
    if learning_phase != TRAIN:
        return build_inference_model(joint_info, learning_phase, t, reuse=reuse)

    with tf.name_scope(None, 'Prediction'):
        depth = FLAGS.depth

        def im2pred(im, reuse=reuse):
            net_output = model.architectures.resnet(
                im, n_out=depth * joint_info.n_joints, scope='MainPart', reuse=reuse,
                stride=FLAGS.stride_train, centered_stride=FLAGS.centered_stride,
                resnet_name=FLAGS.architecture)

            net_output_nchw = tfu.std_to_nchw(net_output)
            side = tfu.static_image_shape(net_output)[0]
            reshaped = tf.reshape(net_output_nchw, [-1, depth, joint_info.n_joints, side, side])
            logits = tf.transpose(reshaped, [0, 2, 3, 4, 1])
            softmaxed = tfu.softmax(logits, axis=[2, 3, 4])
            coords3d = tf.stack(tfu.decode_heatmap(softmaxed, [3, 2, 4]), axis=-1)
            return logits, softmaxed, coords3d

        # PREDICT FOR 3D BATCH
        t.logits, t.softmaxed, coords3d = im2pred(t.x)
        t.coords3d_pred = heatmap_to_metric(coords3d, learning_phase)
        if FLAGS.train_mixed:
            coords3d_2d = im2pred(t.x_2d)[-1]
            t.coords3d_pred_2d = heatmap_to_metric(coords3d_2d, learning_phase)

        # PREDICT FOR 2D BATCH
        if FLAGS.train_mixed:
            if FLAGS.dataset == 'mpi_inf_3dhp':
                t.coords3d_pred_2d = adjust_skeleton_3dhp_to_mpii(t.coords3d_pred_2d, joint_info)

            joint_info_2d = data.datasets2d.get_dataset(FLAGS.dataset2d).joint_info
            joint_ids_3d = [joint_info.ids[name] for name in joint_info_2d.names]
            t.coords2d_pred_2d = tf.gather(t.coords3d_pred_2d, joint_ids_3d, axis=1)[..., :2]

        # LOSS 3D BATCH
        t.coords3d_true_rootrel = tfu3d.root_relative(t.coords3d_true)
        t.coords3d_pred_rootrel = tfu3d.root_relative(t.coords3d_pred)

        absdiff = tf.abs(t.coords3d_true_rootrel - t.coords3d_pred_rootrel)
        t.loss3d = tfu.reduce_mean_masked(absdiff, t.joint_validity_mask) / 1000

        # LOSS 2D BATCH
        if FLAGS.train_mixed:
            t.coords2d_true_2d_scaled, t.coords2d_pred_2d = align_2d_skeletons(
                t.coords2d_true_2d, t.coords2d_pred_2d, t.joint_validity_mask_2d)
            scale = 1 / FLAGS.proc_side * FLAGS.box_size_mm / 1000

            t.loss2d = tfu.reduce_mean_masked(
                tf.abs(t.coords2d_true_2d_scaled - t.coords2d_pred_2d),
                t.joint_validity_mask_2d) * scale
        else:
            t.loss2d = 0

        loss2d_factor = FLAGS.loss2d_factor
        t.loss = t.loss3d + loss2d_factor * t.loss2d

    t.coords3d_pred_orig_cam = to_orig_cam(t.coords3d_pred, t.rot_to_orig_cam, joint_info)
    t.coords3d_true_orig_cam = to_orig_cam(t.coords3d_true, t.rot_to_orig_cam, joint_info)