Ejemplo n.º 1
0
    def get_confidences(self, frame_id):
        """

        Args:
            frame_id (int): Frame id in question.

        Returns:
            confidences (np.ndarray): (N, 16)
                Array of confidences
        """
        N_JOINTS = Joint.get_num_joints()
        skeleton = self._skeleton
        confs = []  # type: List(List(float))
        for actor_id in range(skeleton.n_actors):
            frame_id2 = skeleton.unmod_frame_id(
              frame_id=frame_id, actor_id=actor_id,
              frames_mod=skeleton._frames_mod
            )
            if skeleton.has_pose(frame_id2):
                _confs = [
                    skeleton.get_confidence(frame_id=frame_id2, joint=j)
                    for j in range(N_JOINTS)]  # type: List(float)
                actor_and_frame_ids = self._pose_ids[frame_id][len(confs)]  # type: ActorAndFrameId
                assert actor_and_frame_ids.frame_id == frame_id \
                       and actor_and_frame_ids.frame_id2 == frame_id2 \
                       and actor_and_frame_ids.actor_id == actor_id
                confs.append(_confs)
            # else:
            #     lg.warning("Warning, no pose for %d %d"
            #                % (frame_id, frame_id2))

        return np.array(confs)
Ejemplo n.º 2
0
def extract_skeleton(scene,
                     frame_ids=None,
                     frame_multiplier=1.,
                     time_multiplier=1.):
    joints = {
        ob.name.split('.')[1]: ob
        for ob in bpy.data.objects
        if ob.name.startswith('Output') and ob.name.endswith('Sphere')
    }
    print("joints: %s" % joints)
    assert len(joints) == Joint.get_num_joints(), \
        "No: %s != %s" % (len(joints), Joint.get_num_joints())
    if not frame_ids:
        frame_ids = range(scene.frame_start, scene.frame_end + 1)

    skeleton = Skeleton()
    for frame_id in frame_ids:
        o_frame_id = int(round(frame_id * frame_multiplier))
        if skeleton.has_pose(o_frame_id):
            print("skipping %s, because already set" % frame_id)
            continue
        print("frame_id: %s, o_frame_id: %s" % (frame_id, o_frame_id))
        scene.frame_set(frame_id)
        bpy.context.scene.update()
        pose = np.zeros(shape=(Skeleton.DIM, len(joints)))
        for joint, ob in joints.items():
            pos = ob.matrix_world.col[3]
            joint_id = Joint.from_string(joint)
            pose[:, joint_id] = from_blender(pos)
        assert not skeleton.has_pose(o_frame_id), "Already has %s" % frame_id
        skeleton.set_pose(frame_id=o_frame_id,
                          pose=pose,
                          time=o_frame_id * time_multiplier)

    # scenelet = Scenelet(skeleton=skeleton)
    # scenelet.save(path=path_out, save_obj=False)

    return skeleton
Ejemplo n.º 3
0
    def add(self, entry, gt, occluded, frame_id, scale=1.):
        assert entry.shape[0] <= 3 and gt.shape[0] <= 3, (entry, gt)
        diff = np.linalg.norm(entry - gt, axis=0) * scale

        sub = []
        for j in range(Joint.get_num_joints()):
            if j in (Joint.PELV, Joint.NECK):
                continue
            if occluded[j]:
                sub.append(diff[j])
                self._stats.add(occluded=OccludedType.OCCLUDED,
                                title=self._title,
                                name_method=self._name_method,
                                frame_id=frame_id,
                                diff=diff[j])
            else:
                self._stats.add(occluded=OccludedType.VISIBLE,
                                title=self._title,
                                name_method=self._name_method,
                                frame_id=frame_id,
                                diff=diff[j])
        self._data.extend(sub)
Ejemplo n.º 4
0
def extract_annotated_scenelet(
        scene,
        prefix_obj='obb',
        frame_ids=None,
        frame_multiplier=1.,
        time_multiplier=1.,
        f_ob_is_joint=lambda ob: ob.name.startswith(
            'Output') and ob.name.endswith('Sphere'),
        f_joint_name_from_ob=lambda ob: ob.name.split('.')[1]):
    """
    
    Args:
        scene (bpy.types.Scene):
            The current scene (e.g. bpy.context.scene).
        prefix_obj (str):
            Start of object names that we want to include in the scenelet
            as oriented bounding boxes.
        frame_ids (List[int]):
            A subset of frame IDs to export.
        frame_multiplier (float):
            Scaling for frame IDs. The result will be rounded and truncated.
            output.frame_id := int(round(frame_id * frame_multiplier))
        time_multipler (float):
            Scaling for times associated with frame_ids.
            output.time := int(round(frame_id * frame_multiplier)) 
            * time_multiplier.
        f_ob_is_joint (Callable[[bpy.types.Object], bool]]):
            Decides if a Blender object is a joint.
        f_joint_name_from_ob (Callable[[bpy.types.Object], str]):
            Gets the joint name from the Blender object name.
    """
    # joints = {
    #     ob.name.split('.')[1]: ob
    #     for ob in bpy.data.objects
    #     if ob.name.startswith('Output') and ob.name.endswith('Sphere')}
    joints = {
        f_joint_name_from_ob(ob): ob
        for ob in bpy.data.objects if f_ob_is_joint(ob)
    }
    print("joints: %s" % joints)
    skeleton = Skeleton()
    if len(joints):
        assert len(joints) == 16, "No: %s" % len(joints)
        if not frame_ids:
            frame_ids = range(scene.frame_start, scene.frame_end + 1)
        for frame_id in frame_ids:
            o_frame_id = int(round(frame_id * frame_multiplier))
            if skeleton.has_pose(o_frame_id):
                print("skipping %s" % frame_id)
                continue
            print("frame_id: %s" % frame_id)
            scene.frame_set(frame_id)
            bpy.context.scene.update()
            # bpy.ops.anim.change_frame(frame_id)
            pose = np.zeros(shape=(3, len(joints)))
            for joint, ob in joints.items():
                pos = ob.matrix_world.col[3]
                print("pos[%s]: %s" % (ob.name, pos))
                joint_id = Joint.from_string(joint)
                print("joint %s is %s" % (joint, Joint(joint_id)))
                pose[:, joint_id] = from_blender(pos)
            print("o_frame: %s from %s" % (o_frame_id, frame_id))
            assert not skeleton.has_pose(o_frame_id), \
                "Already has %s" % frame_id
            skeleton.set_pose(frame_id=o_frame_id,
                              pose=pose,
                              time=o_frame_id * time_multiplier)
    objs_bl = {}
    for obj in bpy.data.objects:
        if obj.name.startswith(prefix_obj) and not obj.hide:
            obj_id = int(obj.name.split('_')[1])
            try:
                objs_bl[obj_id].append(obj)
            except KeyError:
                objs_bl[obj_id] = [obj]

    print("objs: %s" % objs_bl)
    scenelet = Scenelet(skeleton=skeleton)
    print("scenelet: %s" % scenelet)
    for obj_id, parts_bl in objs_bl.items():
        name_category = None
        scene_obj = None
        for part_id, part_bl in enumerate(parts_bl):
            transl, rot, scale = part_bl.matrix_world.decompose()
            rot = rot.to_matrix()
            if any(comp < 0. for comp in scale):
                scale *= -1.
                rot *= -1.
            assert not any(comp < 0. for comp in scale), "No: %s" % scale

            matrix_world = part_bl.matrix_world.copy()

            # need to save full scale, not only half axes
            for c in range(3):
                for r in range(3):
                    matrix_world[r][c] *= 2.
            name_parts = part_bl.name.split('_')
            if name_category is None:
                name_category = name_parts[2]
                scene_obj = SceneObj(label=name_category)
            else:
                assert name_category == name_parts[2], \
                    "No: %s %s" % (name_category, name_parts[2])
            name_part = name_parts[3]
            print("part: %s" % name_part)
            part = SceneObjPart(name_part)
            part.obb = Obb(centroid=np.array(
                from_blender([transl[0], transl[1], transl[2]])),
                           axes=np.array([[rot[0][0], rot[0][1], rot[0][2]],
                                          [-rot[2][0], -rot[2][1], -rot[2][2]],
                                          [rot[1][0], rot[1][1], rot[1][2]]]),
                           scales=np.array(
                               [scale[0] * 2., scale[1] * 2., scale[2] * 2.]))
            # if 'table' in name_category:
            #     print(part.obb.axes)
            #     raise RuntimeError("stop")
            print("obb: %s" % part.obb.to_json(0))
            scene_obj.add_part(part_id, part)
        scenelet.add_object(obj_id, scene_obj, clone=False)
    return scenelet
Ejemplo n.º 5
0
def export_scenelet(um,
                    o_pos_3d,
                    o_polys_3d,
                    query_full_skeleton,
                    scenes,
                    joints_active,
                    transform_id=None):
    """Extract a scenelet (poses and objects) from the data from the
    optimized problem.

    Args:
        um (stealth.pose.unk_manager.UnkManager):
            Data manager.
        o_pos_3d (np.ndarray):
            Output 3D poses.
        o_polys_3d (np.ndarray): (6K, 4, 3)
            3D oriented bounding boxes stored stacked.
        query_full_skeleton (stealth.logic.skeleton.Skeleton):
            Initial path containing time information.
        joints_active (list):
            List of joint_ids that were optimized for.
            Usage: pose16[:, joints_active] = o_pos_3d[pid, :, :]
        transform_id (int):
            Export only a specific group. Everything is exported, if None.
    Returns:
        A scenelet extracted from the data provided.
    """
    # cache function
    _guess_time_at = query_full_skeleton.guess_time_at

    # all poses or the ones that belong to a group/scenelet
    if transform_id is None:
        pids_sorted = sorted([(pid, pid2scene)
                              for pid, pid2scene in um.pids_2_scenes.items()],
                             key=lambda e: e[1].frame_id)
    else:
        # pids_sorted = sorted([(pid, pid2scene)
        #                       for pid, pid2scene in um.pids_2_scenes.items()
        #                       if pid2scene.transform_id == transform_id],
        #                      key=lambda e: e[1].frame_id)
        pids_2_scenes = um.pids_2_scenes
        pids_sorted = sorted([(pid, pids_2_scenes[pid])
                              for pid in um.get_pids_for(transform_id)],
                             key=lambda e: e[1].frame_id)

    # create output scenelet
    o = Scenelet()
    charness = None

    #
    # Skeleton
    #

    # cache skeleton reference
    skeleton = o.skeleton
    # fill skeleton
    for pid, pid2scene in pids_sorted:
        if charness is None:
            scene = scenes[pid2scene.id_scene]
            charness = scene.charness
            o.add_aux_info('name_scenelet', scene.name_scenelet)
            o.charness = charness
        # get frame_id
        frame_id = int(pid2scene.frame_id)
        # check if already exists
        if skeleton.has_pose(frame_id):
            # TODO: fix overlapping frame_ids
            lg.warning("[export_scenelet] Overwriting output frame_id %d" %
                       frame_id)
        # add with time guessed from input skeleton rate
        pose = np.zeros((3, Joint.get_num_joints()))
        pose[:, joints_active] = o_pos_3d[pid, :, :]
        pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) / 2.
        pose[:, Joint.NECK] = (pose[:, Joint.HEAD] + pose[:, Joint.THRX]) / 2.
        # for j, jid in joints_remap.items():
        #     pose[:, j] = o_pos_3d[pid, :, jid]
        assert not skeleton.has_pose(frame_id=frame_id), \
            'Already has pose: {}'.format(frame_id)
        skeleton.set_pose(frame_id=frame_id,
                          pose=pose,
                          time=_guess_time_at(frame_id))

    #
    # Objects
    #

    scene_obj = None
    scene_obj_oid = 0  # unique identifier that groups parts to objects
    for polys2scene in um.polys2scene.values():
        # Check, if we are restricted to a certain group
        if transform_id is not None \
          and polys2scene.transform_id != transform_id:
            continue
        start = polys2scene.poly_id_start
        end = start + polys2scene.n_polys

        # 6 x 4 x 3
        polys = o_polys_3d[start:end, ...]
        assert polys.shape[0] == 6, "Assumed cuboids here"
        if scene_obj is None or scene_obj_oid != polys2scene.object_id:
            category = next(cat for cat in CATEGORIES
                            if CATEGORIES[cat] == polys2scene.cat_id)
            scene_obj = SceneObj(label=category)
            scene_obj_oid = polys2scene.object_id
            o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False)
        part = scene_obj.add_part(part_id=-1,
                                  label_or_part=polys2scene.part_label)
        # TODO: average for numerical precision errors
        centroid = np.mean(polys, axis=(0, 1))
        ax0 = polys[0, 1, :] - polys[0, 0, :]
        scale0 = np.linalg.norm(ax0)
        ax0 /= scale0
        ax1 = polys[0, 3, :] - polys[0, 0, :]
        scale1 = np.linalg.norm(ax1)
        ax1 /= scale1
        ax2 = polys[1, 0, :] - polys[0, 0, :]
        scale2 = np.linalg.norm(ax2)
        ax2 /= scale2
        part.obb = Obb(centroid=centroid,
                       axes=np.concatenate(
                           (ax0[:, None], ax1[:, None], ax2[:, None]), axis=1),
                       scales=[scale0, scale1, scale2])
    # if scene_obj is not None:
    #     o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False)
    # else:
    #     lg.warning("No objects in scenelet?")

    # scene_obj = SceneObj('couch')
    # for poly_id in range(0, o_polys_3d.shape[0], 6):
    #     rects = o_polys_3d[poly_id : poly_id + 6, ...]
    #     # lg.debug("rects:\n%s" % rects)
    #     scene_obj.add_part(poly_id, 'seat')
    #
    #     # fig = plt.figure()
    #     # ax = fig.add_subplot(111, projection='3d')
    #     # for rid, rect in enumerate(rects):
    #     #     wrapped = np.concatenate((rect, rect[0:1, :]), axis=0)
    #     #     ax.plot(wrapped[:, 0], wrapped[:, 2], wrapped[:, 1])
    #     #     for ci in range(4):
    #     #         c = rect[ci, :]
    #     #         ax.text(c[0], c[2], c[1], s="%d, %d, %d"
    #     #                                     % (poly_id, rid, ci))
    #     #     if rid >= 1:
    #     #         break
    #     #
    #     # plt.show()
    #     part = scene_obj.get_part(poly_id)
    #     centroid = np.mean(rects, axis=(0, 1))
    #     ax0 = rects[0, 1, :] - rects[0, 0, :]
    #     scale0 = np.linalg.norm(ax0)
    #     ax0 /= scale0
    #     ax1 = rects[0, 3, :] - rects[0, 0, :]
    #     scale1 = np.linalg.norm(ax1)
    #     ax1 /= scale1
    #     ax2 = rects[1, 0, :] - rects[0, 0, :]
    #     scale2 = np.linalg.norm(ax2)
    #     ax2 /= scale2
    #     part.obb = Obb(centroid=centroid,
    #                    axes=np.concatenate((
    #                        ax0[:, None], ax1[:, None], ax2[:, None]
    #                    ), axis=1),
    #                    scales=[scale0, scale1, scale2])
    # o.add_object(obj_id=99, scene_obj=scene_obj,
    #                     clone=False)

    return o
Ejemplo n.º 6
0
def main(argv):
    conf = Conf.get()
    parser = argparse.ArgumentParser("Denis pose converter")
    parser.add_argument('camera_name',
                        help="Camera name ('G15', 'S6')",
                        type=str)
    parser.add_argument(
        '-d',
        dest='dir',
        required=True,
        help="Path to the <scene folder>/denis containing skeletons.json")
    parser.add_argument(
        '-filter',
        dest='with_filtering',
        action="store_true",
        help="Should we do post-filtering (1-euro) on the pelvis positions")
    parser.add_argument('-huber',
                        required=False,
                        help="Should we do huber loss?",
                        action='store_true')
    parser.add_argument('-smooth',
                        type=float,
                        default=0.005,
                        help="Should we have a smoothness term (l2/huber)?")
    parser.add_argument(
        '--winsorize-limit',
        type=float,
        default=conf.optimize_path.winsorize_limit,
        help='Threshold for filtering too large jumps of the 2D centroid')
    parser.add_argument('--no-resample',
                        action='store_true',
                        help="add resampled frames")
    parser.add_argument('--n-actors',
                        type=int,
                        default=1,
                        help="How many skeletons to track.")
    parser.add_argument('-n-actors',
                        type=int,
                        default=1,
                        help="Max number of people in scene.")
    # parser.add_argument(
    #     '-r', type=float,
    #     help='Video rate. Default: 1, if avconv -r 5. '
    #          'Original video sampling rate (no subsampling) should be '
    #          '24/5=4.8. avconv -r 10 leads to 24/10=2.4.',
    #     required=True)
    parser.add_argument('--person_height',
                        type=float,
                        help='Assumed height of human(s) in video.',
                        default=Conf.get().optimize_path.person_height)
    parser.add_argument(
        '--forwards-window-size',
        type=int,
        help='How many poses in time to look before AND after to '
        'average forward direction. 0 means no averaging. Default: 0.',
        default=0)
    parser.add_argument('--no-img',
                        action='store_true',
                        help='Read and write images (vis reproj error)')
    parser.add_argument('--postfix',
                        type=str,
                        help="output file postfix.",
                        default='unannot')
    args = parser.parse_args(argv)
    show = False
    args.resample = not args.no_resample
    # assert not args.resample, "resample should be off"
    assert os.path.exists(args.dir), "Source does not exist: %s" % args.dir
    p_scene = os.path.normpath(os.path.join(args.dir, os.pardir))  # type: str
    p_video_params = os.path.join(p_scene, 'video_params.json')
    assert os.path.exists(p_video_params), "Need video_params.json for rate"
    if 'r' not in args or args.r is None:
        args.r = json.load(open(p_video_params, 'r'))['rate-avconv']

    # manual parameters (depth initialization, number of actors)
    p_scene_params = os.path.join(args.dir, os.pardir, 'scene_params.json')
    if not os.path.exists(p_scene_params):
        scene_params = {
            'depth_init': 10.,
            'actors': args.n_actors,
            'ground_rot': [0., 0., 0.]
        }
        json.dump(scene_params, open(p_scene_params, 'w'))
        raise RuntimeError("Inited scene_params.json, please check: %s" %
                           p_scene_params)
    else:
        scene_params = json.load(open(p_scene_params, 'r'))
        lg.warning("Will work with %d actors and init depth to %g" %
                   (scene_params['actors'], scene_params['depth_init']))
        assert '--n-actors' not in argv \
               or args.n_actors == scene_params['actors'], \
            "Actor count mismatch, remove %d from args, because " \
            "scene_params.json says %d?" \
            % (args.n_actors, scene_params['actors'])
        args.n_actors = scene_params['actors']
        ground_rot = scene_params['ground_rot'] or [0., 0., 0.]

    # load images
    path_images = os.path.abspath(os.path.join(args.dir, os.pardir, 'origjpg'))
    images = {}
    shape_orig = None
    if not args.no_img:
        images, shape_orig = load_images(path_images)

    path_skeleton = \
        max((f for f in os.listdir(os.path.join(args.dir))
             if f.startswith('skeletons') and f.endswith('json')),
            key=lambda s: int(os.path.splitext(s)[0].split('_')[1]))
    print("path_skeleton: %s" % path_skeleton)
    data = json.load(open(os.path.join(args.dir, path_skeleton), 'r'))
    # data, pose_constraints, first_run = \
    #     cleanup(data, p_dir=os.path.join(args.dir, os.pardir))
    # poses_2d = []
    # plt.figure()
    # show_images(images, data)
    if False:
        # pose_ids = identify_actors_multi(data, n_actors=1)
        p_segm_pickle = os.path.join(args.dir, os.pardir,
                                     "label_skeletons.pickle")
        problem = None
        if False and os.path.exists(p_segm_pickle):
            lg.warning("Loading skeleton segmentation from pickle %s" %
                       p_segm_pickle)
            pose_ids, problem = pickle_load(open(p_segm_pickle, 'rb'))
        if not problem or problem._n_actors != args.n_actors:
            pose_ids, problem, data = more_actors_gurobi(
                data,
                n_actors=args.n_actors,
                constraints=pose_constraints,
                first_run=first_run)
            if True or show:
                show_multi(images,
                           data,
                           pose_ids,
                           problem,
                           p_dir=os.path.join(args.dir, os.pardir),
                           first_run=first_run,
                           n_actors=args.n_actors)
            pickle.dump((pose_ids, problem), open(p_segm_pickle, 'wb'), -1)
    else:
        pose_ids = greedy_actors(data, n_actors=args.n_actors)
        data = DataPosesWrapper(data=data)

    visible_f = {a: {} for a in range(args.n_actors)}
    visible_f_max = 0.
    if show:
        plt.ion()
        fig = None
        axe = None
        scatters = dict()

    # how many images we have
    min_frame_id = min(f for f in pose_ids)
    frames_mod = max(f for f in pose_ids) - min_frame_id + 1
    skel_ours = Skeleton(frames_mod=frames_mod,
                         n_actors=args.n_actors,
                         min_frame_id=min_frame_id)
    skel_ours_2d = Skeleton(frames_mod=frames_mod,
                            n_actors=args.n_actors,
                            min_frame_id=min_frame_id)

    # assert len(images) == 0 or max(f for f in images) + 1 == frames_mod, \
    #     "Assumed image count is %d, but max_frame_id is %d" \
    #     % (len(images), frames_mod-1)
    if isinstance(data, DataPosesWrapper):
        frames = data.get_frames()
    else:
        frames = []
        for frame_str in sorted(data.get_frames()):
            try:
                frame_id = int(frame_str.split('_')[1])
            except ValueError:
                print("skipping key %s" % frame_id)
                continue
            frames.append(frame_id)
    my_visibilities = [[], []]
    for frame_id in frames:
        frame_str = DataPosesWrapper._to_frame_str(frame_id)
        pose_in = data.get_poses_3d(frame_id=frame_id)
        # np.asarray(data[frame_str][u'centered_3d'])
        # pose_in_2d = np.asarray(data[frame_str][u'pose_2d'])
        pose_in_2d = data.get_poses_2d(frame_id=frame_id)
        # visible = np.asarray(data[frame_str][u'visible'])

        if False and len(pose_in.shape) > 2:
            pose_id = pose_ids[frame_id]
            if not args.no_img:
                im = cv2.cvtColor(images[frame_id], cv2.COLOR_RGB2BGR)
                for i in range(pose_in.shape[0]):
                    c = (1., 0., 0., 1.)
                    if i == pose_id:
                        c = (0., 1., 0., 1.)
                    color = tuple(int(c_ * 255) for c_ in c[:3])
                    for p2d in pose_in_2d[i, :, :]:
                        # color = (c[0] * 255, c[1] * 255., c[2] * 255.)
                        cv2.circle(im, (p2d[1], p2d[0]),
                                   radius=3,
                                   color=color,
                                   thickness=-1)
                    center = np.mean(pose_in_2d[i, :, :],
                                     axis=0).round().astype('i4').tolist()
                    cv2.putText(im, "%d" % i, (center[1], center[0]), 1, 1,
                                color)
                if show:
                    cv2.imshow("im", im)
                    cv2.waitKey(100)
            # if sid not in scatters:
            #     scatters[sid] = axe.scatter(pose_in_2d[i, :, 1], pose_in_2d[i, :, 0], c=c)
            # else:
            #     scatters[sid].set_offsets(pose_in_2d[i, :, [1, 0]])
            #     scatters[sid].set_array(np.tile(np.array(c), pose_in_2d.shape[1]))
            # scatter.set_color(c)
            # plt.draw()
            # plt.pause(1.)
            pose_in = pose_in[pose_id, :, :]
            pose_in_2d = pose_in_2d[pose_id, :, :]
            visible = visible[pose_id]
        # else:
        # pose_id = 0
        # pose_id = pose_ids[frame_id]

        for actor_id in range(args.n_actors):
            # if actor_id in (2, 3, 4, 5, 8, 9)
            # expanded frame_id
            frame_id2 = Skeleton.unmod_frame_id(frame_id=frame_id,
                                                actor_id=actor_id,
                                                frames_mod=frames_mod)
            assert (actor_id != 0) ^ (frame_id2 == frame_id), "no"
            frame_id_mod = skel_ours.mod_frame_id(frame_id=frame_id2)

            assert frame_id_mod == frame_id, \
                "No: %d %d %d" % (frame_id, frame_id2, frame_id_mod)
            actor_id2 = skel_ours.get_actor_id(frame_id2)
            assert actor_id2 == actor_id, "no: %s %s" % (actor_id, actor_id2)

            # which pose explains this actor in this frame
            pose_id = pose_ids[frame_id][actor_id]
            # check, if actor found
            if pose_id < 0:
                continue

            # 3D pose
            pose = pose_in[pose_id, :, JointDenis.revmap].T
            # added by Aron on 4/4/2018 (Denis' pelvis is too high up)
            pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) \
                                  / 2.
            skel_ours.set_pose(frame_id2, pose)

            # 2D pose
            pose_2d = pose_in_2d[pose_id, :, :]
            arr = np.array(JointDenis.pose_2d_to_ours(pose_2d),
                           dtype=np.float32).T
            skel_ours_2d.set_pose(frame_id2, arr)

            #
            # visibility (binary) and confidence (float)
            #

            # np.asarray(data[frame_str][u'visible'][pose_id])
            vis_i = data.get_visibilities(frame_id)[pose_id]

            # vis_f = np.asarray(data[frame_str][u'visible_float'][pose_id])
            vis_f = data.get_confidences(frame_id)[pose_id]
            for jid, visible in enumerate(vis_i):  # for each joint
                # binary visibility
                jid_ours = JointDenis.to_ours_2d(jid)
                skel_ours_2d.set_visible(frame_id2, jid_ours, visible)

                # confidence (fractional visibility)
                if np.isnan(vis_f[jid]):
                    continue

                try:
                    visible_f[actor_id][frame_id2][jid_ours] = vis_f[jid]
                except KeyError:
                    visible_f[actor_id][frame_id2] = {jid_ours: vis_f[jid]}
                visible_f_max = max(visible_f_max, vis_f[jid])
                conf_ = get_conf_thresholded(vis_f[jid],
                                             thresh_log_conf=None,
                                             dtype_np=np.float32)
                skel_ours_2d.set_confidence(frame_id=frame_id2,
                                            joint=jid_ours,
                                            confidence=conf_)
                my_visibilities[0].append(vis_f[jid])
                my_visibilities[1].append(conf_)
            skel_ours_2d._confidence_normalized = True

    plt.figure()
    plt.plot(my_visibilities[0], my_visibilities[1], 'o')
    plt.savefig('confidences.pdf')

    assert skel_ours.n_actors == args.n_actors, "no"
    assert skel_ours_2d.n_actors == args.n_actors, "no"
    # align to room
    min_z = np.min(skel_ours.poses[:, 2, :])
    print("min_max: %s, %s" % (min_z, np.max(skel_ours.poses[:, 2, :])))
    skel_ours.poses[:, 2, :] += min_z
    skel_ours.poses /= 1000.
    # The output is scaled to 2m by Denis.
    # We change this to 1.8 * a scale in order to correct for
    # the skeletons being a bit too high still.
    skel_ours.poses *= \
        args.person_height * conf.optimize_path.height_correction / 2.
    skel_ours.poses[:, 2, :] *= -1.
    skel_ours.poses = skel_ours.poses[:, [0, 2, 1], :]

    # refine
    name_video = args.dir.split(os.sep)[-2]
    out_path = os.path.join(args.dir, os.pardir,
                            "skel_%s_%s.json" % (name_video, args.postfix))
    out_path_orig = os.path.join(args.dir, os.pardir,
                                 "skel_%s_lfd_orig.json" % name_video)
    sclt_orig = Scenelet(skeleton=copy.deepcopy(skel_ours))
    sclt_orig.save(out_path_orig)

    skel_ours_2d_all = copy.deepcopy(skel_ours_2d)
    assert len(skel_ours_2d_all.get_frames()), skel_ours_2d_all.get_frames()

    #
    # Optimize
    #

    # frames_ignore = [(282, 372), (516, 1000)]

    skel_ours, skel_ours_2d, intrinsics, \
    frame_ids_filled_in = prepare(
      args.camera_name,
      winsorize_limit=args.winsorize_limit,
      shape_orig=shape_orig,
      path_scene=p_scene,
      skel_ours_2d=skel_ours_2d,
      skel_ours=skel_ours,
      resample=args.resample,
    path_skel=path_skeleton)
    frames_ignore = []
    tr_ground = np.eye(4, dtype=np.float32)
    skel_opt, out_images, K = \
        optimize_path(
          skel_ours, skel_ours_2d, images, intrinsics=intrinsics,
          path_skel=out_path, shape_orig=shape_orig,
          use_huber=args.huber, weight_smooth=args.smooth,
          frames_ignore=frames_ignore, resample=args.resample,
          depth_init=scene_params['depth_init'],
          ground_rot=ground_rot)

    for frame_id in skel_opt.get_frames():
        skel_opt.set_time(frame_id=frame_id, time=float(frame_id) / args.r)

    skel_opt_raw = copy.deepcopy(skel_opt)
    skel_opt_resampled = Skeleton.resample(skel_opt)

    # Filter pelvis
    if args.with_filtering:
        out_filter_path = os.path.join(args.dir, os.pardir, "vis_filtering")
        skel_opt = filter_(skel_opt_resampled,
                           out_filter_path=out_filter_path,
                           skel_orig=skel_opt,
                           weight_smooth=args.smooth,
                           forwards_window_size=args.forwards_window_size)
    else:
        skel_opt.estimate_forwards(k=args.forwards_window_size)
        skel_opt_resampled.estimate_forwards(k=args.forwards_window_size)

    # if len(images):
    #     skel_opt.fill_with_closest(images.keys()[0], images.keys()[-1])

    min_y, max_y = skel_opt.get_min_y(tr_ground)
    print("min_y: %s, max_y: %s" % (min_y, max_y))

    #
    # save
    #
    frame_ids_old = set(skel_opt.get_frames())
    if args.resample:
        skel_opt = skel_opt_resampled
        frame_ids_filled_in.update(
            set(skel_opt.get_frames()).difference(frame_ids_old))
        lg.warning("Saving resampled scenelet!")
    scenelet = Scenelet(skel_opt)
    del skel_opt
    # skel_dict = skel_opt.to_json()
    tr_ground[1, 3] = min_y
    scenelet.aux_info['ground'] = tr_ground.tolist()
    assert isinstance(ground_rot, list) and len(ground_rot) == 3
    scenelet.add_aux_info('ground_rot', ground_rot)
    scenelet.add_aux_info(
        'path_opt_params', {
            'rate': args.r,
            'w-smooth': args.smooth,
            'winsorize-limit': args.winsorize_limit,
            'camera': args.camera_name,
            'huber': args.huber,
            'height_correction': conf.optimize_path.height_correction,
            'focal_correction': conf.optimize_path.focal_correction
        })
    scenelet.add_aux_info('frame_ids_filled_in', list(frame_ids_filled_in))

    # To MATLAB
    # _skeleton.get_min_y(_tr_ground)
    # with skel_opt as skeleton:
    # skeleton = skel_opt
    # skeleton_name = os.path.split(args.dir)[0]
    # skeleton_name = skeleton_name[skeleton_name.rfind('/')+1:]
    # mdict = skeleton.to_mdict(skeleton_name)
    # mdict['room_transform'] = tr_ground
    # mdict['room_transform'][1, 3] *= -1.
    # print(mdict)
    # print("scene_name?: %s" % os.path.split(args.dir)[0])
    # skeleton.save_matlab(
    #     os.path.join(os.path.dirname(args.dir), "skeleton_opt.mat"),
    #     mdict=mdict)

    assert scenelet.skeleton.has_forwards(), "No forwards??"
    scenelet.save(out_path)
    if show:
        # save path plot
        out_path_path = os.path.join(args.dir, os.pardir,
                                     "%s_path.jpg" % name_video)
        path_fig = plot_path(scenelet.skeleton)
        legend = ["smooth %g" % args.smooth]

    # hack debug
    # path_skel2 = os.path.join(args.dir, os.pardir, 'skel_lobby7_nosmooth.json')
    # if os.path.exists(path_skel2):
    #     skel2 = Skeleton.load(path_skel2)
    #     path_fig = plot_path(skel2, path_fig)
    #     legend.append('no smooth')
    if show:
        plt.legend(legend)
        path_fig.savefig(out_path_path)

    # backup args
    path_args = os.path.join(args.dir, os.pardir, 'args_denis.txt')
    with open(path_args, 'a') as f_args:
        f_args.write("%s %s\n" %
                     (os.path.basename(sys.executable), " ".join(argv)))

    # save 2D detections to file
    if args.postfix == 'unannot':
        path_skel_ours_2d = os.path.join(
            args.dir, os.pardir, "skel_%s_2d_%02d.json" % (name_video, 0))
        sclt_2d = Scenelet(skel_ours_2d_all)
        print('Saving {} to {}'.format(len(skel_ours_2d_all.get_frames()),
                                       path_skel_ours_2d))
        sclt_2d.skeleton.aux_info = {}
        sclt_2d.save(path_skel_ours_2d)
    else:
        print(args.postfix)

    logging.info("Saving images...")
    if len(images) and len(out_images):
        path_out_images = os.path.join(args.dir, os.pardir, 'color')
        try:
            os.makedirs(path_out_images)
        except OSError:
            pass
        visible_f_max_log = np.log(visible_f_max)
        frames = list(out_images.keys())
        for frame_id in range(frames[0], frames[-1] + 1):
            im = out_images[frame_id] if frame_id in out_images \
                else cv2.cvtColor(images[frame_id], cv2.COLOR_BGR2RGB)
            for actor_id in range(args.n_actors):
                if frame_id in visible_f[actor_id]:
                    frame_id2 = skel_ours_2d_all.unmod_frame_id(
                        frame_id=frame_id,
                        actor_id=actor_id,
                        frames_mod=skel_ours_2d_all.frames_mod)
                    for joint, is_vis in visible_f[actor_id][frame_id].items():
                        p2d = skel_ours_2d_all.get_joint_3d(joint,
                                                            frame_id=frame_id2)
                        # radius = np.log(is_vis) / visible_f_max_log
                        # lg.debug("r0: %g" % radius)
                        # radius = np.exp(np.log(is_vis) / visible_f_max_log)
                        # lg.debug("radius is %g" % radius)
                        vis_bool = True
                        if skel_ours_2d_all.has_visible(frame_id=frame_id2,
                                                        joint_id=joint):
                            vis_bool &= skel_ours_2d_all.is_visible(
                                frame_id2, joint)
                        radius = abs(np.log(is_vis / 0.1 + 1e-6))
                        if not np.isnan(radius):
                            p2d = (int(round(p2d[0])), int(round(p2d[1])))
                            cv2.circle(im,
                                       center=p2d,
                                       radius=int(round(radius)),
                                       color=(1., 1., 1., 0.5),
                                       thickness=1)
                            conf = get_conf_thresholded(conf=is_vis,
                                                        thresh_log_conf=None,
                                                        dtype_np=np.float32)
                            if conf > 0.5:
                                cv2.putText(img=im,
                                            text=Joint(joint).get_name(),
                                            org=p2d,
                                            fontFace=1,
                                            fontScale=1,
                                            color=(10., 150., 10., 100.))
                    # lg.debug("set visibility to %g, radius %g" % (is_vis, radius))
            # if frame_id in out_images:
            scale = (shape_orig[1] / float(im.shape[1]),
                     shape_orig[0] / float(im.shape[0]))
            cv2.imwrite(
                os.path.join(path_out_images, "color_%05d.jpg" % frame_id),
                cv2.resize(im, (0, 0),
                           fx=scale[0],
                           fy=scale[1],
                           interpolation=cv2.INTER_CUBIC))
            # else:
            #     fname = "color_%05d.jpg" % frame_id
            #     shutil.copyfile(
            #         os.path.join(path_images, fname),
            #         os.path.join(path_out_images, fname))
        lg.info("Wrote images to %s/" % path_out_images)
Ejemplo n.º 7
0
def optimize_path(skel_ours,
                  skel_ours_2d,
                  images,
                  intrinsics,
                  path_skel,
                  ground_rot,
                  shape_orig=None,
                  use_huber=False,
                  weight_smooth=0.01,
                  show=False,
                  frames_ignore=None,
                  resample=True,
                  depth_init=10.,
                  p_constraints=None,
                  smooth_mode=SmoothMode.ACCEL):
    """Optimize 3D path so that it matches the 2D corresponding observations.

    Args:
        skel_ours (Skeleton):
            3D skeleton from LFD.
        skel_ours_2d (Skeleton):
            2D feature points from LFD.
        images (dict):
            Color images for debug, keyed by frame_ids.
        camera_name (str):
            Initialize intrinsics matrix based on name of camera.
        path_skel (str):
            Path of input file from LFD on disk, used to create paths for
            intermediate result.
        shape_orig (tuple):
            Height and width of original images before LFD scaled them.
        use_huber (bool):
            Deprecated.
        weight_smooth (float):
            Smoothness term weight.
        winsorize_limit (float):
            Outlier detection parameter.
        show (bool):
            Show debug visualizations.
        frames_ignore (set):
            Deprecated.
        resample (bool):
            Fill in missing poses by interpolating using Blender's IK.
        depth_init (float):
            Initial depth for LFD poses.
        p_constraints (str):
            Path to 3D constraints scenelet file.
        smooth_mode (SmoothMode):
            Smooth velocity or acceleration.
    """

    # scale 2D detections to canonical camera coordinates
    np_poses_2d = \
        skel_ours_2d.poses[:, :2, :] \
        - np.expand_dims(intrinsics[:2, 2], axis=1)
    np_poses_2d[:, 0, :] /= intrinsics[0, 0]
    np_poses_2d[:, 1, :] /= intrinsics[1, 1]

    n_frames = skel_ours.poses.shape[0]
    np_translation = np.zeros(shape=(n_frames, 3), dtype=np.float32)
    np_translation[:, 1] = -1.
    np_translation[:, 2] = \
        np.random.uniform(-depth_init * 0.25, depth_init * 0.25,
                          np_translation.shape[0]) \
        + depth_init
    np_rotation = np.zeros(shape=(n_frames, 3), dtype=np.float32)

    frame_ids = np.array(skel_ours.get_frames(), dtype=np.float32)
    np_visibility = skel_ours_2d.get_confidence_matrix(frame_ids=frame_ids,
                                                       dtype='f4')

    if p_constraints is not None:
        sclt_cnstr = Scenelet.load(p_constraints)
        np_cnstr_mask = np.zeros(shape=(len(frame_ids),
                                        Joint.get_num_joints()),
                                 dtype=np.float32)
        np_cnstr = np.zeros(shape=(len(frame_ids), 3, Joint.get_num_joints()),
                            dtype=np.float32)
        for frame_id, confs in sclt_cnstr.confidence.items():
            lin_id = None
            for j, conf in confs.items():
                if conf > 0.5:
                    if lin_id is None:
                        lin_id = next(
                            lin_id_
                            for lin_id_, frame_id_ in enumerate(frame_ids)
                            if frame_id_ == frame_id)
                    np_cnstr_mask[lin_id, j] = conf
                    np_cnstr[lin_id, :, j] = \
                        sclt_cnstr.skeleton.get_joint_3d(
                          joint_id=j, frame_id=frame_id)
    else:
        np_cnstr_mask = None
        np_cnstr = None

    spans = skel_ours.get_actor_empty_frames()
    dt = frame_ids[1:].astype(np.float32) \
         - frame_ids[:-1].astype(np.float32)
    dt_pos_inv = np.reciprocal(dt, dtype=np.float32)
    dt_vel_inv = np.divide(np.float32(2.), dt[1:] + dt[:-1])
    # ensure smoothness weight multipliers are not affected by
    # actor-transitions
    if skel_ours.n_actors > 1 and len(spans):
        for lin_id in range(len(dt)):
            frame_id0 = frame_ids[lin_id]
            frame_id1 = frame_ids[lin_id + 1]
            span = next((span_ for span_ in spans if span_[0] == frame_id0),
                        None)
            if span is not None:
                assert frame_id1 == span[1], "No"
                dt[lin_id] = 0.
                dt_pos_inv[lin_id] = 0.
                dt_vel_inv[lin_id] = 0.
                dt_vel_inv[lin_id - 1] = 1. / dt[lin_id - 1]

    forwards = np.array([
        skel_ours.get_forward(frame_id, estimate_ok=True, k=0)
        for frame_id in skel_ours.get_frames()
    ])
    # from alignment import get_angle
    # xs = np.hstack((
    # np.ones(shape=(len(forwards), 1)),
    # np.zeros(shape=(len(forwards), 2))
    # ))
    # print(xs.shape)
    print(forwards.shape)
    unit_x = np.array((1., 0., 0.))
    np_angles = [-np.arctan2(forward[2], forward[0]) for forward in forwards]
    print(forwards, np_angles)
    # ank_diff = \
    #     np.exp(
    #        -2. * np.max(
    #           [
    #               np.linalg.norm(
    #                  (skel_ours.poses[1:, :, joint]
    #                   - skel_ours.poses[:-1, :, joint]).T
    #                  * dt_pos_inv, axis=0
    #               ).astype(np.float32)
    #               for joint in {Joint.LANK, Joint.RANK}
    #           ],
    #           axis=0
    #        )
    #     )
    # assert ank_diff.shape == (skel_ours.poses.shape[0]-1,), \
    #     "Wrong shape: %s" % repr(ank_diff.shape)

    # cam_angle = [np.deg2rad(-8.)]
    assert np.isclose(ground_rot[1], 0.) and np.isclose(ground_rot[2], 0.), \
        "Assumed only x rotation"
    # assert ground_rot[0] <= 0, "Negative means looking down, why looknig up?"
    cam_angle = [np.deg2rad(ground_rot[0])]
    # assert False, "Fixed angle!"
    device_name = '/gpu:0' if tf.test.is_gpu_available() else '/cpu:0'
    devices = {device_name}
    for device in devices:
        with Timer(device, verbose=True):
            graph = tf.Graph()
            with graph.as_default(), tf.device(device):
                tf_visibility = tf.Variable(np.tile(np_visibility, (1, 2, 1)),
                                            name='visibility',
                                            trainable=False,
                                            dtype=tf.float32)
                tf_dt_pos_inv = \
                    tf.Variable(np.tile(dt_pos_inv, (1, 3)).reshape(-1, 3),
                                name='dt_pos_inv', trainable=False,
                                dtype=tf.float32)
                tf_dt_vel_inv = \
                    tf.constant(np.tile(dt_vel_inv, (1, 3)).reshape(-1, 3),
                                name='dt_vel_inv', dtype=tf.float32)

                # input data
                pos_3d_in = tf.Variable(skel_ours.poses.astype(np.float32),
                                        trainable=False,
                                        name='pos_3d_in',
                                        dtype=tf.float32)
                pos_2d_in = tf.Variable(np_poses_2d.astype(np.float32),
                                        trainable=False,
                                        name='pos_2d_in',
                                        dtype=tf.float32)

                params_camera = tf.Variable(initial_value=cam_angle,
                                            dtype=tf.float32,
                                            trainable=True)

                cam_sn = tf.sin(params_camera)
                cam_cs = tf.cos(params_camera)
                transform_camera = tf.reshape(tf.stack([
                    1., 0., 0., 0., 0., cam_cs[0], cam_sn[0], 0., 0.,
                    -cam_sn[0], cam_cs[0], 0., 0., 0., 0., 1.
                ],
                                                       axis=0),
                                              shape=(4, 4))

                # 3D translation
                translation = tf.Variable(np_translation, name='translation')
                # 3D rotation (Euler XYZ)
                rotation = tf.Variable(np_rotation, name='rotation')
                fw_angles = tf.Variable(np_angles, name='angles')

                # rotation around y
                my_zeros = tf.zeros((n_frames, 1))
                my_ones = tf.ones((n_frames, 1))
                c = tf.cos(tf.slice(rotation, [0, 1], [n_frames, 1]))
                s = tf.sin(tf.slice(rotation, [0, 1], [n_frames, 1]))
                t0 = tf.concat([c, my_zeros, -s, my_zeros], axis=1)
                t1 = tf.concat([my_zeros, my_ones, my_zeros, my_zeros], axis=1)
                t2 = tf.concat([s, my_zeros, c, my_zeros], axis=1)
                t3 = tf.concat([my_zeros, my_zeros, my_zeros, my_ones], axis=1)
                transform = tf.stack([t0, t1, t2, t3],
                                     axis=2,
                                     name="transform")

                transform = tf.einsum('ij,ajk->aik', transform_camera,
                                      transform)[:, :3, :3]

                # transform to 3d
                pos_3d = tf.matmul(transform, pos_3d_in) \
                    + tf.tile(tf.expand_dims(translation, 2),
                              [1, 1, int(pos_3d_in.shape[2])])

                # constraints
                loss_cnstr = None
                if np_cnstr is not None:
                    constraints = tf.Variable(np_cnstr,
                                              trainable=False,
                                              name='constraints',
                                              dtype=tf.float32)
                    constraints_mask = tf.Variable(np_cnstr_mask,
                                                   trainable=False,
                                                   name='constraints_mask',
                                                   dtype=tf.float32)
                    cnstr_diff = tf.reduce_sum(tf.squared_difference(
                        pos_3d, constraints),
                                               axis=1,
                                               name='constraints_difference')
                    cnstr_diff_masked = tf.multiply(
                        constraints_mask,
                        cnstr_diff,
                        name='constraints_difference_masked')
                    loss_cnstr = tf.reduce_sum(cnstr_diff_masked,
                                               name='constraints_loss')

                # perspective divide
                pos_2d = tf.divide(
                    tf.slice(pos_3d, [0, 0, 0], [n_frames, 2, -1]),
                    tf.slice(pos_3d, [0, 2, 0], [n_frames, 1, -1]))

                if use_huber:
                    diff = huber_loss(pos_2d_in, pos_2d, 1.)
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("Doing huber on reprojection, NOT translation")
                else:
                    # re-projection loss
                    diff = pos_2d - pos_2d_in
                    # mask loss by 2d key-point visibility
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("NOT doing huber")

                sys.stderr.write(
                    "TODO: Move huber to translation, not reconstruction\n")

                # translation smoothness
                dx = tf.multiply(
                    x=0.5,
                    y=tf.add(
                        pos_3d[1:, :, Joint.LHIP] - pos_3d[:-1, :, Joint.LHIP],
                        pos_3d[1:, :, Joint.RHIP] - pos_3d[:-1, :, Joint.RHIP],
                    ),
                    name="average_hip_displacement_3d")
                tf_velocity = tf.multiply(dx, tf_dt_pos_inv)

                tf_acceleration_z = tf.multiply(x=dx[1:, 2:3] - dx[:-1, 2:3],
                                                y=tf_dt_vel_inv[:, 2:3],
                                                name="acceleration_z")

                if smooth_mode == SmoothMode.VELOCITY:
                    # if GT, use full smoothness to fix 2-frame flicker
                    if np_cnstr is not None:
                        print('Smoothing all velocity!')
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity)
                    else:  # Normal mode, don't oversmooth screen-space
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity[:, 2:3])
                elif smooth_mode == SmoothMode.ACCEL:
                    loss_transl_smooth = \
                        weight_smooth * tf.nn.l2_loss(tf_acceleration_z)
                else:
                    raise RuntimeError(
                        'Unknown smooth mode: {}'.format(smooth_mode))

                if show:
                    sqr_accel_z = weight_smooth * tf.square(tf_acceleration_z)

                if weight_smooth > 0.:
                    lg.info("Smoothing in time!")
                    loss = loss_reproj + loss_transl_smooth
                else:
                    lg.warning("Not smoothing!")
                    loss = loss_reproj

                if loss_cnstr is not None:
                    loss += 1000 * loss_cnstr

                # hip0 = tf.nn.l2_normalize(pos_3d[:-1, :, Joint.RHIP] - pos_3d[:-1, :, Joint.LHIP])
                # hip1 = tf.nn.l2_normalize(pos_3d[1:, :, Joint.RHIP] - pos_3d[1:, :, Joint.RHIP])
                # dots = tf.reduce_sum(tf.multiply(hip0, hip1), axis=1)
                # print(dots)
                # loss_dot = tf.nn.l2_loss(1. - dots)
                # loss_ang = fw_angles + rotation[:, 1]
                # print(loss_ang)
                # loss_ang = tf.square(loss_ang[1:] - loss_ang[:-1])
                # print(loss_ang)
                # two_pi_sqr = tf.constant((2. * 3.14159)**2., dtype=tf.float32)
                # print(two_pi_sqr)
                # loss_ang = tf.reduce_mean(tf.where(loss_ang > two_pi_sqr, loss_ang - two_pi_sqr, loss_ang))
                # print(loss_ang)
                # loss += loss_ang

                #
                # optimize
                #
                optimizer = ScipyOptimizerInterface(
                    loss,
                    var_list=[translation, rotation],
                    options={'gtol': 1e-12},
                    var_to_bounds={rotation: (-np.pi / 2., np.pi / 2.)})

            with tf.Session(graph=graph) as session:
                session.run(tf.global_variables_initializer())

                optimizer.minimize(session)
                np_pos_3d_out, np_pos_2d_out, np_transl_out, np_masked, \
                np_acceleration, np_loss_transl_smooth, np_dt_vel = \
                    session.run([pos_3d, pos_2d, translation, masked,
                                 tf_acceleration_z, loss_transl_smooth,
                                 tf_dt_vel_inv])
                if show:
                    o_sqr_accel_z = session.run(sqr_accel_z)
                o_vel = session.run(tf_velocity)
                o_dx = session.run(dx)
                o_rot = session.run(rotation)
                # o_dx, o_dx2 = session.run([accel_bak, acceleration2])
                # assert np.allclose(o_dx, o_dx2), "no"
                o_cam = session.run(fetches=[params_camera])
                print("camera angle: %s" % np.rad2deg(o_cam[0]))
                # o_losses = session.run([loss_reproj, loss_transl_smooth, loss_dot, loss_ang])
                o_losses = session.run([loss_reproj, loss_transl_smooth])
                print('losses: {}'.format(o_losses))
                # o_dots = session.run(dots)
                # with open('tmp/dots.txt', 'w') as fout:
                #     fout.write('\n'.join((str(e) for e in o_dots.tolist())))

    fixed_frames = []
    # for lin_frame_id in range(np_transl_out.shape[0]):
    #     if np_transl_out[lin_frame_id, 2] < 0.:
    #         print("Correcting frame_id %d: %s"
    #               % (skel_ours.get_lin_id_for_frame_id(lin_frame_id),
    #                  np_transl_out[lin_frame_id, :]))
    #         if lin_frame_id > 0:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id-1, :]
    #         else:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id+1, :]
    #         fixed_frames.append(lin_frame_id)

    # debug_forwards(skel_ours.poses, np_pos_3d_out, o_rot, forwards, np_angles)

    # z_jumps = np_pos_3d_out[1:, 2, Joint.PELV] - np_pos_3d_out[:-1, 2, Joint.PELV]
    # out = scipy.stats.mstats.winsorize(z_jumps, limits=1.)
    # plt.figure()
    # plt.plot(pos_3d[:, 2, Joint.PELV])
    # plt.show()
    # sys.exit(0)
    # diff = np.linalg.norm(out - displ, axis=1)
    if len(fixed_frames):
        print("Re-optimizing...")
        with tf.Session(graph=graph) as session:
            np_pos_3d_out, np_pos_2d_out, np_transl_out = \
                session.run(fetches=[pos_3d, pos_2d, translation],
                            feed_dict={transform: np_transl_out})

    if show:
        lim_fr = [105, 115, 135]
        fig = plt.figure()
        accel_thr = 0.  # np.percentile(o_sqr_accel_z, 25)

        ax = plt.subplot2grid((2, 2), (0, 0), colspan=2)
        # print("np_masked:%s" % np_masked)
        # plt.plot(np_masked[:, )
        ax.plot(np.linalg.norm(np_acceleration[lim_fr[0]:lim_fr[1]], axis=1),
                '--o',
                label='accel')
        ax.add_artist(Line2D([0, len(o_sqr_accel_z)], [accel_thr, accel_thr]))
        # plt.plot(np_dt_vel[:, 0], label='dt velocity')
        # plt.plot(np.linalg.norm(np_f_accel, axis=1), '--x', label='f_accel')
        # plt.plot(ank_diff, label='ank_diff')
        ax.plot(o_sqr_accel_z[lim_fr[0]:lim_fr[1] + 1],
                '--x',
                label='loss accel_z')
        ax.legend()

        ax2 = plt.subplot2grid((2, 2), (1, 0), aspect='equal')
        ax2.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, vel in enumerate(o_vel):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue

            p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
            p1 = np_pos_3d_out[i, [0, 2], Joint.PELV]
            ax2.annotate(
                "%f = ((%g - %g) + (%g - %g)) * %g = %g" %
                (vel[2], np_pos_3d_out[i + 1, 2, Joint.LHIP],
                 np_pos_3d_out[i, 2, Joint.LHIP], np_pos_3d_out[i + 1, 2,
                                                                Joint.RHIP],
                 np_pos_3d_out[i, 2, Joint.RHIP], np_dt_vel[i, 2], o_dx[i, 2]),
                xy=((p0[0] + p1[0]) / 2., (p0[1] + p1[1]) / 2.))
        ax2.set_title('velocities')

        ax1 = plt.subplot2grid((2, 2), (1, 1), aspect='equal')
        ax1.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, lacc in enumerate(o_sqr_accel_z):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue
            if lacc > accel_thr:
                p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
                ax1.annotate("%.3f" % np_acceleration[i], xy=(p0[0], p0[1]))
                ax.annotate("%.3f" % np.log10(lacc),
                            xy=(i - lim_fr[0], abs(np_acceleration[i])))
        ax1.set_title('accelerations')

        plt.show()

    np.set_printoptions(linewidth=200)
    np_pos_2d_out[:, 0, :] *= intrinsics[0, 0]
    np_pos_2d_out[:, 1, :] *= intrinsics[1, 1]
    np_pos_2d_out[:, 0, :] += intrinsics[0, 2]
    np_pos_2d_out[:, 1, :] += intrinsics[1, 2]

    np_poses_2d[:, 0, :] *= intrinsics[0, 0]
    np_poses_2d[:, 1, :] *= intrinsics[1, 1]
    np_poses_2d[:, 0, :] += intrinsics[0, 2]
    np_poses_2d[:, 1, :] += intrinsics[1, 2]

    out_images = {}
    if shape_orig is not None:
        frames_2d = skel_ours_2d.get_frames()
        for frame_id2 in frames_2d:
            try:
                lin_frame_id = skel_ours_2d.get_lin_id_for_frame_id(frame_id2)
            except KeyError:
                lin_frame_id = None
            frame_id = skel_ours_2d.mod_frame_id(frame_id=frame_id2)

            im = None
            if frame_id in out_images:
                im = out_images[frame_id]
            elif len(images):
                if frame_id not in images:
                    lg.warning("Not enough images, the video was probably cut "
                               "after LiftingFromTheDeep was run.")
                    continue
                im = copy.deepcopy(images[frame_id])
                im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
            else:
                im = np.zeros(
                    (shape_orig[0].astype(int), shape_orig[1].astype(int), 3),
                    dtype='i1')
            if lin_frame_id is not None:
                for jid in range(np_pos_2d_out.shape[2]):
                    if skel_ours_2d.is_visible(frame_id2, jid):
                        p2d = tuple(np_pos_2d_out[lin_frame_id, :,
                                                  jid].astype(int).tolist())
                        p2d_det = tuple(np_poses_2d[lin_frame_id, :,
                                                    jid].astype(int).tolist())
                        cv2.line(im,
                                 p2d,
                                 p2d_det,
                                 color=(100, 100, 100),
                                 thickness=3)
                        cv2.circle(im,
                                   p2d,
                                   radius=3,
                                   color=(0, 0, 200),
                                   thickness=-1)
                        cv2.circle(im,
                                   p2d_det,
                                   radius=3,
                                   color=(0, 200, 0),
                                   thickness=-1)
            out_images[frame_id] = im
            # cv2.imshow("Out", im)
            # cv2.waitKey(50)

        if False:
            # visualize
            fig = plt.figure()
            ax = fig.gca(projection='3d')
            for frame_id in range(0, np_pos_3d_out.shape[0], 1):
                j = Joint.PELV
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
            # smallest = np_pos_3d_out.min()
            # largest = np_pos_3d_out.max()
            ax.set_xlim3d(-5., 5.)
            ax.set_xlabel('x')
            ax.set_ylim3d(-5., 5.)
            ax.set_ylabel('y')
            ax.set_zlim3d(-5., 5.)
            ax.set_zlabel('z')

    if False:
        # visualize
        fig = plt.figure()
        ax = fig.gca(projection='3d')
        for frame_id in range(0, np_pos_3d_out.shape[0], 1):
            for j in range(np_pos_3d_out.shape[2]):
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
        # smallest = np_pos_3d_out.min()
        # largest = np_pos_3d_out.max()
        ax.set_xlim3d(-5., 5.)
        ax.set_xlabel('x')
        ax.set_ylim3d(-5., 5.)
        ax.set_ylabel('y')
        ax.set_zlim3d(-5., 5.)
        ax.set_zlabel('z')
    plt.show()

    assert all(a == b
               for a, b in zip(skel_ours.poses.shape, np_pos_3d_out.shape)), \
        "no"
    skel_ours.poses = np_pos_3d_out
    return skel_ours, out_images, intrinsics
Ejemplo n.º 8
0
def show_multi(images, data, pose_ids, problem, p_dir,
               thresh_log_conf=Conf.get().path.thresh_log_conf,
               first_run=False, n_actors=1):
    """

    Args:
        images (Dict[int, np.ndarray]):
        data (SkeletonPosesWrapper):
        pose_ids (Dict[str, Dict[int, int]]):
        problem:
        p_dir (str):
        thresh_log_conf:
        first_run (bool):
            Will output labeling_orig if True allowing the inspection of
            pose_ids.
    """
    _confs = []
    # colors = {
    #     0: (.8, .1, .1, 1.),
    #     1: (.1, .8, .1, 1.),
    #     2: (.8, .8, .1, 1.),
    #     3: (.1, .8, .8, 1.),
    #     4: (.8, .1, .8, 1.),
    #     5: (.6, .4, .8, 1.),
    #     6: (.6, .4, .8, 1.)
    # }

    color_norm = cmNormalize(vmin=0, vmax=n_actors+1)
    scalar_map = cm.ScalarMappable(norm=color_norm, cmap='gist_earth')
    colors = [tuple(c for c in scalar_map.to_rgba(i+1))
              for i in range(n_actors)]

    p_labeling = os.path.join(p_dir, 'debug',
                              'labeling' if not first_run else 'labeling_orig')
    try:
        os.makedirs(p_labeling)
    except OSError:
        pass

    limits = (min(fid for fid in images), max(fid for fid in images)+1)
    scale = None
    for frame_id in range(limits[0], limits[1]):
        frame_str = "color_%05d" % frame_id
        # try:
        #     frame_id = int(frame_str.split('_')[1])
        # except ValueError:
        #     print("skipping key %s" % frame_id)
        #     continue
        # im = cv2.cvtColor(images[frame_id], cv2.COLOR_RGB2BGR)
        im = images[frame_id].copy()
        if im.shape[1] < 1900:
            if scale is None:
                scale = 1900 // im.shape[1] + 1
            im = cv2.resize(im, dsize=None, fx=scale, fy=scale,
                            interpolation=cv2.INTER_CUBIC)
        elif scale is None:
            scale = 1.

        # for frame_id in data.get_frames():
        #     frame_str = "color_%05d" % frame_id
            # pose_in = np.asarray(data[frame_str][u'centered_3d'])
        pose_in_2d = data.get_poses_2d(frame_id=frame_id)

        # np.asarray(data[frame_str][u'pose_2d'])
        # visible = np.asarray(data[frame_str][u'visible'])
        # vis_f = np.asarray(data[frame_str][u'visible_float'])
        vis_f = data.get_confidences(frame_id=frame_id)

        # pose_id = pose_ids[frame_id]
        for pose_id in range(pose_in_2d.shape[0]):
            actor_id = next(
              (actor_id_
               for actor_id_, pose_id_ in pose_ids[frame_id].items()
               if pose_id == pose_id_),
              None)
            if actor_id is None:
                ccolor = (0.5, 0.5, 0.5, 1.)
            else:
                ccolor = colors[actor_id % len(colors)]
            _confs.append(vis_f[pose_id:pose_id+1, :])
            color = tuple(int(c_ * 255) for c_ in ccolor[:3])
            # threshed = get_conf_thresholded(vis_f[pose_id:pose_id+1, :],
            #                                 thresh_log_conf=thresh_log_conf,
            #                                 dtype_np=np.float32)
            # lg.debug("avg_vis: %s" % threshed)
            # avg_vis = np.count_nonzero(threshed > 0.05, axis=1)
            # if avg_vis > 0.4:
            p2d_mean = np.mean(pose_in_2d[pose_id, :, 1]) * scale
            # cv2.putText(im, "%.2f" % (avg_vis / threshed.shape[1]),
            #             (int(p2d_mean) - 20, 50), 1, 1, thickness=2,
            #             color=(200, 200, 200))
            if actor_id is None:
                actor_id = -1
            cv2.putText(im, "a%d" % actor_id,
                        (int(p2d_mean) - 20, 30), fontFace=1, fontScale=2,
                        thickness=2, color=tuple(_c * 0.2 for _c in color))

            for j in range(pose_in_2d.shape[1]):
                p2d = [int(round(c * scale))
                       for c in pose_in_2d[pose_id, j, :]]
                conf = get_conf_thresholded(conf=vis_f[pose_id, j],
                                            thresh_log_conf=thresh_log_conf,
                                            dtype_np=np.float32)
                if conf > 0.5:
                    cv2.circle(
                      im, (p2d[0], p2d[1]), radius=3, color=color,
                      thickness=-1)

                    # jid_ours = JointDenis.to_ours_2d(j)
                    jid_ours = j
                    cv2.putText(im, Joint(jid_ours).get_name(),
                                (p2d[0], p2d[1]-5), 1, 1, color=color,
                                thickness=1)

            center = (scale * np.mean(pose_in_2d[pose_id, :, :], axis=0)) \
                .round().astype('i4').tolist()
            # center = (scale * pose_in_2d[pose_id, 5, :])\
            #     .round().astype('i4').tolist()
            cv2.putText(im, "p%da%d" % (pose_id, actor_id),
                        (center[0], center[1]), 1, 2,
                        [c_ * 1.2 for c_ in color], thickness=2)


        # frame_id
        cv2.putText(im, "#%d" % frame_id, (20, 30), 1, 2, (255, 255, 255),
                    thickness=2)
        # cv2.imshow("im", im)
        p_im = os.path.join(p_labeling, "im_%04d.jpg" % frame_id)
        cv2.imwrite(p_im, im)