예제 #1
0
def match(query_full,
          d_query,
          query_2d_full,
          scene,
          intr,
          gap,
          tr_ground,
          scale,
          thresh_log_conf=7.5,
          w_3d=0.01,
          fps=3,
          step_samples=100):
    with_y = False  # optimize for y as well
    np.set_printoptions(suppress=True, linewidth=220)

    pjoin = os.path.join

    len_gap = gap[1] - gap[0] + 1
    query, q_v = get_partial_scenelet(query_full,
                                      start=gap[0],
                                      end=gap[1] + 1,
                                      fps=1)
    q_v_sum = np.sum(q_v)
    q_v_sum_inv = np.float32(1. / q_v_sum)
    # lg.debug("q_v_sum: %s/%s" % (q_v_sum, q_v.size))
    # scene_min_y = scene.skeleton.get_min_y(tr_ground)
    # lg.debug("scene_min_y: %s" % repr(scene_min_y))

    mid_frames = range(len_gap * fps,
                       scene.skeleton.poses.shape[0] - len_gap * fps,
                       step_samples)
    if not len(mid_frames):
        return []

    scenelets, sc_v = (np.array(e) for e in zip(*[
        get_partial_scenelet(
            scene, mid_frame_id=mid_frame_id, n_frames=len_gap, fps=fps)
        for mid_frame_id in mid_frames
    ]))
    # for i, (scenelet, sc_v_) in enumerate(zip(scenelets, sc_v)):
    #     mn = np.min(scenelet[sc_v_.astype('b1'), 1, :])
    #     scenelets[i, :, 1, :] -= mn
    # mn = np.min(scenelets[i, sc_v_.astype('b1'), 1, :])
    # scenelets = np.array(scenelets, dtype=np.float32)
    # sc_v = np.array(sc_v, dtype=np.int32)
    # print("sc_v: %s" % sc_v)
    # print("q_v: %s" % q_v)

    lg.debug("have %d/%d 3D poses in scenelet, and %d/%d in query" %
             (np.sum(sc_v), sc_v.shape[0], np.sum(q_v), q_v.shape[0]))

    query_2d = np.zeros((len_gap, 2, 16), dtype=np.float32)
    conf_2d = np.zeros((len_gap, 1, 16), dtype=np.float32)
    for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):

        if query_2d_full.has_pose(frame_id):
            query_2d[lin_id, :, :] = query_2d_full.get_pose(frame_id)[:2, :]
        # else:
        #     lg.warning("Query2d_full does not have pose at %d?" % frame_id)

        # im = im_.copy()
        if query_2d_full.has_confidence(frame_id):
            # print("showing %s" % frame_id)
            for joint, conf in query_2d_full._confidence[frame_id].items():
                log_conf = abs(np.log(conf)) if conf >= 0. else 0.
                # print("conf: %g, log_conf: %g" % (conf, log_conf))
                # if log_conf <= thresh_log_conf:
                #     p2d = scale * query_2d_full.get_joint_3d(joint,
                #                                              frame_id=frame_id)
                #     p2d = (int(round(p2d[0])), int(round(p2d[1])))
                #     cv2.circle(im, center=p2d,
                #                radius=int(round(3)),
                #                color=(1., 1., 1., 0.5), thickness=1)
                conf_2d[lin_id, 0, joint] = max(
                    0., (thresh_log_conf - log_conf) / thresh_log_conf)

            # cv2.imshow('im', im)
            # cv2.waitKey(100)
    # while cv2.waitKey() != 27: pass
    conf_2d /= np.max(conf_2d)

    # scale from Denis' scale to current image size
    query_2d *= scale

    # move to normalized camera coordinates
    query_2d -= intr[:2, 2:3]
    query_2d[:, 0, :] /= intr[0, 0]
    query_2d[:, 1, :] /= intr[1, 1]

    #
    # initialize translation
    #

    # centroid of query poses
    c3d = np.mean(query[q_v.astype('b1'), :, :], axis=(0, 2))
    # estimate scenelet centroids
    sclt_means = np.array([
        np.mean(scenelets[i, sc_v[i, ...].astype('b1'), ...], axis=(0, 2))
        for i in range(scenelets.shape[0])
    ],
                          dtype=np.float32)
    # don't change height
    sclt_means[:, 1] = 0
    scenelets -= sclt_means[:, None, :, None]
    lg.debug("means: %s" % repr(sclt_means.shape))
    if with_y:
        np_translation = np.array([c3d for i in range(scenelets.shape[0])],
                                  dtype=np.float32)
    else:
        np_translation = np.array(
            [c3d[[0, 2]] for i in range(scenelets.shape[0])], dtype=np.float32)
    np_rotation = np.array(
        [np.pi * (i % 2) for i in range(scenelets.shape[0])],
        dtype=np.float32)[:, None]
    n_cands = np_translation.shape[0]
    graph = tf.Graph()
    with graph.as_default(), tf.device('/gpu:0'):
        # 3D translation
        translation_ = tf.Variable(initial_value=np_translation,
                                   name='translation',
                                   dtype=tf.float32)
        t_y = tf.fill(dims=(n_cands, ),
                      value=(tr_ground[1, 3]).astype(np.float32))
        # t_y = tf.fill(dims=(n_cands,), value=np.float32(0.))
        lg.debug("t_y: %s" % t_y)
        if with_y:
            translation = translation_
        else:
            translation = tf.concat(
                (translation_[:, 0:1], t_y[:, None], translation_[:, 1:2]),
                axis=1)

        lg.debug("translation: %s" % translation)
        # 3D rotation (Euler XYZ)
        rotation = tf.Variable(np_rotation, name='rotation', dtype=tf.float32)
        # lg.debug("rotation: %s" % rotation)

        w = tf.Variable(conf_2d, trainable=False, name='w', dtype=tf.float32)

        pos_3d_in = tf.Variable(query,
                                trainable=False,
                                name='pos_3d_in',
                                dtype=tf.float32)
        # pos_3d_in = tf.constant(query, name='pos_3d_in', dtype=tf.float32)

        pos_2d_in = tf.Variable(query_2d,
                                trainable=False,
                                name='pos_2d_in',
                                dtype=tf.float32)
        # pos_2d_in = tf.constant(query_2d, name='pos_2d_in',
        #                         dtype=tf.float32)

        pos_3d_sclt = tf.Variable(scenelets,
                                  trainable=False,
                                  name='pos_3d_sclt',
                                  dtype=tf.float32)
        # print("pos_3d_sclt: %s" % pos_3d_sclt)

        # rotation around y
        my_zeros = tf.zeros((n_cands, 1), dtype=tf.float32, name='my_zeros')
        # tf.add_to_collection('to_init', my_zeros)
        my_ones = tf.ones((n_cands, 1))
        # tf.add_to_collection('to_init', my_ones)
        c = tf.cos(rotation, 'cos')
        # tf.add_to_collection('to_init', c)
        s = tf.sin(rotation, 'sin')
        # t0 = tf.concat([c, my_zeros, -s], axis=1)
        # t1 = tf.concat([my_zeros, my_ones, my_zeros], axis=1)
        # t2 = tf.concat([s, my_zeros, c], axis=1)
        # transform = tf.stack([t0, t1, t2], axis=2, name="transform")
        # print("t: %s" % transform)
        transform = tf.concat(
            [c, my_zeros, -s, my_zeros, my_ones, my_zeros, s, my_zeros, c],
            axis=1)
        transform = tf.reshape(transform, ((-1, 3, 3)), name='transform')
        print("t2: %s" % transform)
        # lg.debug("transform: %s" % transform)

        # transform to 3d
        # pos_3d = tf.matmul(transform, pos_3d_sclt) \
        #          + tf.tile(tf.expand_dims(translation, 2),
        #                    [1, 1, int(pos_3d_in.shape[2])])
        # pos_3d = tf.einsum("bjk,bcjd->bcjd", transform, pos_3d_sclt)
        shp = pos_3d_sclt.get_shape().as_list()
        transform_tiled = tf.tile(transform[:, None, :, :, None],
                                  (1, shp[1], 1, 1, shp[3]))
        # print("transform_tiled: %s" % transform_tiled)
        pos_3d = tf.einsum("abijd,abjd->abid", transform_tiled, pos_3d_sclt)
        # print("pos_3d: %s" % pos_3d)
        pos_3d += translation[:, None, :, None]
        #pos_3d = pos_3d_sclt
        # print("pos_3d: %s" % pos_3d)

        # perspective divide
        # pos_2d = tf.divide(
        #     tf.slice(pos_3d, [0, 0, 0], [n_cands, 2, -1]),
        #     tf.slice(pos_3d, [0, 2, 0], [n_cands, 1, -1]))
        pos_2d = tf.divide(pos_3d[:, :, :2, :], pos_3d[:, :, 2:3, :])

        # print("pos_2d: %s" % pos_2d)

        diff = pos_2d - pos_2d_in
        # mask loss by 2d key-point visibility
        # print("w: %s" % w)
        # w_sum = tf.reduce_sum()
        masked = tf.multiply(diff, w)
        # print(masked)
        # loss_reproj = tf.nn.l2_loss(masked)
        # loss_reproj = tf.reduce_sum(tf.square(masked[:, :, 0, :])
        #                             + tf.square(masked[:, :, 1, :]),
        #                             axis=[1, 2])
        masked_sqr = tf.square(masked[:, :, 0, :]) \
                     + tf.square(masked[:, :, 1, :])
        loss_reproj = tf.reduce_sum(masked_sqr, axis=[1, 2])
        # lg.debug("loss_reproj: %s" % loss_reproj)

        # distance from existing 3D skeletons
        d_3d = q_v_sum_inv * tf.multiply(pos_3d - query[None, ...],
                                         q_v[None, :, None, None],
                                         name='diff_3d')
        # print(d_3d)

        loss_3d = w_3d * tf.reduce_sum(tf.square(d_3d[:, :, 0, :]) + tf.square(
            d_3d[:, :, 1, :]) + tf.square(d_3d[:, :, 2, :]),
                                       axis=[1, 2],
                                       name='loss_3d_each')
        # print(loss_3d)

        loss = tf.reduce_sum(loss_reproj) + tf.reduce_sum(loss_3d)

        # optimize
        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[translation_, rotation],
                                            options={'gtol': 1e-12})

    with Timer('solve', verbose=True) as t:
        with tf.Session(graph=graph) as session:
            session.run(tf.global_variables_initializer())
            optimizer.minimize(session)
            o_pos_3d, o_pos_2d, o_masked, o_t, o_r, o_w, o_d_3d, \
                o_loss_reproj, o_loss_3d, o_transform, o_translation = \
                session.run([
                    pos_3d, pos_2d, masked, translation, rotation, w,
                    d_3d, loss_reproj, loss_3d, transform, translation])
            o_masked_sqr = session.run(masked_sqr)
        # o_t, o_r = session.run([translation, rotation])
    # print("pos_3d: %s" % o_pos_3d)
    # print("pos_2d: %s" % o_pos_2d)
    # print("o_loss_reproj: %s, o_loss_3d: %s" % (o_loss_reproj, o_loss_3d))
    # print("t: %s" % o_t)
    # print("r: %s" % o_r)
    chosen = sorted((i for i in range(o_loss_reproj.shape[0])),
                    key=lambda i2: o_loss_reproj[i2] + o_loss_3d[i2])
    lg.info("Best candidate is %d with error %g + %g" %
            (chosen[0], o_loss_reproj[chosen[0]], o_loss_3d[chosen[0]]))
    # print("masked: %s" % o_masked)
    # opp = np.zeros_like(o_pos_3d)
    # for i in range(o_pos_3d.shape[0]):
    #     for j in range(o_pos_3d.shape[1]):
    #         for k in range(16):
    #             opp[i, j, :2, k] = o_pos_3d[i, j, :2, k] / o_pos_3d[i, j, 2:3, k]
    #             # opp[i, j, 0, k] *= intr[0, 0]
    #             # opp[i, j, 1, k] *= intr[1, 1]
    #             # opp[i, j, :2, k] *= intr[1, 1]
    #             a = o_pos_2d[i, j, :, k]
    #             b = opp[i, j, :2, k]
    #             if not np.allclose(a, b):
    #                 print("diff: %s, %s" % (a, b))

    o_pos_2d[:, :, 0, :] *= intr[0, 0]
    o_pos_2d[:, :, 1, :] *= intr[1, 1]
    o_pos_2d += intr[:2, 2:3]

    # for cand_id in range(o_pos_2d.shape[0]):
    if False:
        # return
        # print("w: %s" % o_w)
        # print("conf_2d: %s" % conf_2d)
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d[:, 0, :] *= intr[0, 0]
        query_2d[:, 1, :] *= intr[1, 1]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d += intr[:2, 2:3]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])

        ims = {}
        for cand_id in chosen[:5]:
            lg.debug("starting %s" % cand_id)
            pos_ = o_pos_2d[cand_id, ...]
            for lin_id in range(pos_.shape[0]):
                frame_id = gap[0] + lin_id
                try:
                    im = ims[frame_id].copy()
                except KeyError:
                    p_im = pjoin(d_query, 'origjpg',
                                 "color_%05d.jpg" % frame_id)
                    ims[frame_id] = cv2.imread(p_im)
                    im = ims[frame_id].copy()
                # im = im_.copy()
                for jid in range(pos_.shape[-1]):

                    xy2 = int(round(query_2d[lin_id, 0, jid])), \
                          int(round(query_2d[lin_id, 1, jid]))
                    # print("printing %s" % repr(xy))
                    cv2.circle(im,
                               center=xy2,
                               radius=5,
                               color=(10., 200., 10.),
                               thickness=-1)

                    if o_masked[cand_id, lin_id, 0, jid] > 0 \
                       or o_w[lin_id, 0, jid] > 0:
                        xy = int(round(pos_[lin_id, 0, jid])), \
                             int(round(pos_[lin_id, 1, jid]))
                        # print("printing %s" % repr(xy))
                        cv2.circle(im,
                                   center=xy,
                                   radius=3,
                                   color=(200., 10., 10.),
                                   thickness=-1)
                        cv2.putText(im,
                                    "d2d: %g" %
                                    o_masked_sqr[cand_id, lin_id, jid],
                                    org=((xy2[0] - xy[0]) // 2 + xy[0],
                                         (xy2[1] - xy[1]) // 2 + xy[1]),
                                    fontFace=1,
                                    fontScale=1,
                                    color=(0., 0., 0.))
                        cv2.line(im, xy, xy2, color=(0., 0., 0.))
                        d3d = o_d_3d[cand_id, lin_id, :, jid]
                        d3d_norm = np.linalg.norm(d3d)
                        if d3d_norm > 0.:
                            cv2.putText(
                                im,
                                "%g" % d3d_norm,
                                org=((xy2[0] - xy[0]) // 2 + xy[0] + 10,
                                     (xy2[1] - xy[1]) // 2 + xy[1]),
                                fontFace=1,
                                fontScale=1,
                                color=(0., 0., 255.))

                cv2.putText(im,
                            text="%d::%02d" % (cand_id, lin_id),
                            org=(40, 80),
                            fontFace=1,
                            fontScale=2,
                            color=(255., 255., 255.))

                # pos_2d_ = np.matmul(intr, pos_[lin_id, :2, :] / pos_[lin_id, 2:3, :])
                # for p2d in pos_2d_
                cv2.imshow('im', im)
                cv2.waitKey()
            break

        while cv2.waitKey() != 27:
            pass

    out_scenelets = []
    for cand_id in chosen[:1]:
        lg.debug("score of %d is %g + %g = %g" %
                 (cand_id, o_loss_reproj[cand_id], o_loss_3d[cand_id],
                  o_loss_reproj[cand_id] + o_loss_3d[cand_id]))
        scenelet = Scenelet()
        rate = query_full.skeleton.get_rate()
        prev_time = None
        for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):
            time_ = query_full.get_time(frame_id)
            if lin_id and rate is None:
                rate = time_ - prev_time
            if time_ == frame_id:
                time_ = prev_time + rate
            scenelet.skeleton.set_pose(frame_id=frame_id,
                                       pose=o_pos_3d[cand_id, lin_id, :, :],
                                       time=time_)
            prev_time = time_
        tr = np.concatenate((np.concatenate(
            (o_transform[cand_id, ...], o_translation[cand_id, None, :].T),
            axis=1), [[0., 0., 0., 1.]]),
                            axis=0)
        tr_m = np.concatenate(
            (np.concatenate((np.identity(3), -sclt_means[cand_id, None, :].T),
                            axis=1), [[0., 0., 0., 1.]]),
            axis=0)
        tr = np.matmul(tr, tr_m)
        for oid, ob in scene.objects.items():
            if ob.label in ('wall', 'floor'):
                continue
            ob2 = copy.deepcopy(ob)
            ob2.apply_transform(tr)
            scenelet.add_object(obj_id=oid, scene_obj=ob2, clone=False)
        scenelet.name_scene = scene.name_scene
        out_scenelets.append((o_loss_reproj[cand_id], scenelet))
    return out_scenelets
예제 #2
0
def extract_annotated_scenelet(
        scene,
        prefix_obj='obb',
        frame_ids=None,
        frame_multiplier=1.,
        time_multiplier=1.,
        f_ob_is_joint=lambda ob: ob.name.startswith(
            'Output') and ob.name.endswith('Sphere'),
        f_joint_name_from_ob=lambda ob: ob.name.split('.')[1]):
    """
    
    Args:
        scene (bpy.types.Scene):
            The current scene (e.g. bpy.context.scene).
        prefix_obj (str):
            Start of object names that we want to include in the scenelet
            as oriented bounding boxes.
        frame_ids (List[int]):
            A subset of frame IDs to export.
        frame_multiplier (float):
            Scaling for frame IDs. The result will be rounded and truncated.
            output.frame_id := int(round(frame_id * frame_multiplier))
        time_multipler (float):
            Scaling for times associated with frame_ids.
            output.time := int(round(frame_id * frame_multiplier)) 
            * time_multiplier.
        f_ob_is_joint (Callable[[bpy.types.Object], bool]]):
            Decides if a Blender object is a joint.
        f_joint_name_from_ob (Callable[[bpy.types.Object], str]):
            Gets the joint name from the Blender object name.
    """
    # joints = {
    #     ob.name.split('.')[1]: ob
    #     for ob in bpy.data.objects
    #     if ob.name.startswith('Output') and ob.name.endswith('Sphere')}
    joints = {
        f_joint_name_from_ob(ob): ob
        for ob in bpy.data.objects if f_ob_is_joint(ob)
    }
    print("joints: %s" % joints)
    skeleton = Skeleton()
    if len(joints):
        assert len(joints) == 16, "No: %s" % len(joints)
        if not frame_ids:
            frame_ids = range(scene.frame_start, scene.frame_end + 1)
        for frame_id in frame_ids:
            o_frame_id = int(round(frame_id * frame_multiplier))
            if skeleton.has_pose(o_frame_id):
                print("skipping %s" % frame_id)
                continue
            print("frame_id: %s" % frame_id)
            scene.frame_set(frame_id)
            bpy.context.scene.update()
            # bpy.ops.anim.change_frame(frame_id)
            pose = np.zeros(shape=(3, len(joints)))
            for joint, ob in joints.items():
                pos = ob.matrix_world.col[3]
                print("pos[%s]: %s" % (ob.name, pos))
                joint_id = Joint.from_string(joint)
                print("joint %s is %s" % (joint, Joint(joint_id)))
                pose[:, joint_id] = from_blender(pos)
            print("o_frame: %s from %s" % (o_frame_id, frame_id))
            assert not skeleton.has_pose(o_frame_id), \
                "Already has %s" % frame_id
            skeleton.set_pose(frame_id=o_frame_id,
                              pose=pose,
                              time=o_frame_id * time_multiplier)
    objs_bl = {}
    for obj in bpy.data.objects:
        if obj.name.startswith(prefix_obj) and not obj.hide:
            obj_id = int(obj.name.split('_')[1])
            try:
                objs_bl[obj_id].append(obj)
            except KeyError:
                objs_bl[obj_id] = [obj]

    print("objs: %s" % objs_bl)
    scenelet = Scenelet(skeleton=skeleton)
    print("scenelet: %s" % scenelet)
    for obj_id, parts_bl in objs_bl.items():
        name_category = None
        scene_obj = None
        for part_id, part_bl in enumerate(parts_bl):
            transl, rot, scale = part_bl.matrix_world.decompose()
            rot = rot.to_matrix()
            if any(comp < 0. for comp in scale):
                scale *= -1.
                rot *= -1.
            assert not any(comp < 0. for comp in scale), "No: %s" % scale

            matrix_world = part_bl.matrix_world.copy()

            # need to save full scale, not only half axes
            for c in range(3):
                for r in range(3):
                    matrix_world[r][c] *= 2.
            name_parts = part_bl.name.split('_')
            if name_category is None:
                name_category = name_parts[2]
                scene_obj = SceneObj(label=name_category)
            else:
                assert name_category == name_parts[2], \
                    "No: %s %s" % (name_category, name_parts[2])
            name_part = name_parts[3]
            print("part: %s" % name_part)
            part = SceneObjPart(name_part)
            part.obb = Obb(centroid=np.array(
                from_blender([transl[0], transl[1], transl[2]])),
                           axes=np.array([[rot[0][0], rot[0][1], rot[0][2]],
                                          [-rot[2][0], -rot[2][1], -rot[2][2]],
                                          [rot[1][0], rot[1][1], rot[1][2]]]),
                           scales=np.array(
                               [scale[0] * 2., scale[1] * 2., scale[2] * 2.]))
            # if 'table' in name_category:
            #     print(part.obb.axes)
            #     raise RuntimeError("stop")
            print("obb: %s" % part.obb.to_json(0))
            scene_obj.add_part(part_id, part)
        scenelet.add_object(obj_id, scene_obj, clone=False)
    return scenelet
예제 #3
0
def export_scenelet(um,
                    o_pos_3d,
                    o_polys_3d,
                    query_full_skeleton,
                    scenes,
                    joints_active,
                    transform_id=None):
    """Extract a scenelet (poses and objects) from the data from the
    optimized problem.

    Args:
        um (stealth.pose.unk_manager.UnkManager):
            Data manager.
        o_pos_3d (np.ndarray):
            Output 3D poses.
        o_polys_3d (np.ndarray): (6K, 4, 3)
            3D oriented bounding boxes stored stacked.
        query_full_skeleton (stealth.logic.skeleton.Skeleton):
            Initial path containing time information.
        joints_active (list):
            List of joint_ids that were optimized for.
            Usage: pose16[:, joints_active] = o_pos_3d[pid, :, :]
        transform_id (int):
            Export only a specific group. Everything is exported, if None.
    Returns:
        A scenelet extracted from the data provided.
    """
    # cache function
    _guess_time_at = query_full_skeleton.guess_time_at

    # all poses or the ones that belong to a group/scenelet
    if transform_id is None:
        pids_sorted = sorted([(pid, pid2scene)
                              for pid, pid2scene in um.pids_2_scenes.items()],
                             key=lambda e: e[1].frame_id)
    else:
        # pids_sorted = sorted([(pid, pid2scene)
        #                       for pid, pid2scene in um.pids_2_scenes.items()
        #                       if pid2scene.transform_id == transform_id],
        #                      key=lambda e: e[1].frame_id)
        pids_2_scenes = um.pids_2_scenes
        pids_sorted = sorted([(pid, pids_2_scenes[pid])
                              for pid in um.get_pids_for(transform_id)],
                             key=lambda e: e[1].frame_id)

    # create output scenelet
    o = Scenelet()
    charness = None

    #
    # Skeleton
    #

    # cache skeleton reference
    skeleton = o.skeleton
    # fill skeleton
    for pid, pid2scene in pids_sorted:
        if charness is None:
            scene = scenes[pid2scene.id_scene]
            charness = scene.charness
            o.add_aux_info('name_scenelet', scene.name_scenelet)
            o.charness = charness
        # get frame_id
        frame_id = int(pid2scene.frame_id)
        # check if already exists
        if skeleton.has_pose(frame_id):
            # TODO: fix overlapping frame_ids
            lg.warning("[export_scenelet] Overwriting output frame_id %d" %
                       frame_id)
        # add with time guessed from input skeleton rate
        pose = np.zeros((3, Joint.get_num_joints()))
        pose[:, joints_active] = o_pos_3d[pid, :, :]
        pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) / 2.
        pose[:, Joint.NECK] = (pose[:, Joint.HEAD] + pose[:, Joint.THRX]) / 2.
        # for j, jid in joints_remap.items():
        #     pose[:, j] = o_pos_3d[pid, :, jid]
        assert not skeleton.has_pose(frame_id=frame_id), \
            'Already has pose: {}'.format(frame_id)
        skeleton.set_pose(frame_id=frame_id,
                          pose=pose,
                          time=_guess_time_at(frame_id))

    #
    # Objects
    #

    scene_obj = None
    scene_obj_oid = 0  # unique identifier that groups parts to objects
    for polys2scene in um.polys2scene.values():
        # Check, if we are restricted to a certain group
        if transform_id is not None \
          and polys2scene.transform_id != transform_id:
            continue
        start = polys2scene.poly_id_start
        end = start + polys2scene.n_polys

        # 6 x 4 x 3
        polys = o_polys_3d[start:end, ...]
        assert polys.shape[0] == 6, "Assumed cuboids here"
        if scene_obj is None or scene_obj_oid != polys2scene.object_id:
            category = next(cat for cat in CATEGORIES
                            if CATEGORIES[cat] == polys2scene.cat_id)
            scene_obj = SceneObj(label=category)
            scene_obj_oid = polys2scene.object_id
            o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False)
        part = scene_obj.add_part(part_id=-1,
                                  label_or_part=polys2scene.part_label)
        # TODO: average for numerical precision errors
        centroid = np.mean(polys, axis=(0, 1))
        ax0 = polys[0, 1, :] - polys[0, 0, :]
        scale0 = np.linalg.norm(ax0)
        ax0 /= scale0
        ax1 = polys[0, 3, :] - polys[0, 0, :]
        scale1 = np.linalg.norm(ax1)
        ax1 /= scale1
        ax2 = polys[1, 0, :] - polys[0, 0, :]
        scale2 = np.linalg.norm(ax2)
        ax2 /= scale2
        part.obb = Obb(centroid=centroid,
                       axes=np.concatenate(
                           (ax0[:, None], ax1[:, None], ax2[:, None]), axis=1),
                       scales=[scale0, scale1, scale2])
    # if scene_obj is not None:
    #     o.add_object(obj_id=-1, scene_obj=scene_obj, clone=False)
    # else:
    #     lg.warning("No objects in scenelet?")

    # scene_obj = SceneObj('couch')
    # for poly_id in range(0, o_polys_3d.shape[0], 6):
    #     rects = o_polys_3d[poly_id : poly_id + 6, ...]
    #     # lg.debug("rects:\n%s" % rects)
    #     scene_obj.add_part(poly_id, 'seat')
    #
    #     # fig = plt.figure()
    #     # ax = fig.add_subplot(111, projection='3d')
    #     # for rid, rect in enumerate(rects):
    #     #     wrapped = np.concatenate((rect, rect[0:1, :]), axis=0)
    #     #     ax.plot(wrapped[:, 0], wrapped[:, 2], wrapped[:, 1])
    #     #     for ci in range(4):
    #     #         c = rect[ci, :]
    #     #         ax.text(c[0], c[2], c[1], s="%d, %d, %d"
    #     #                                     % (poly_id, rid, ci))
    #     #     if rid >= 1:
    #     #         break
    #     #
    #     # plt.show()
    #     part = scene_obj.get_part(poly_id)
    #     centroid = np.mean(rects, axis=(0, 1))
    #     ax0 = rects[0, 1, :] - rects[0, 0, :]
    #     scale0 = np.linalg.norm(ax0)
    #     ax0 /= scale0
    #     ax1 = rects[0, 3, :] - rects[0, 0, :]
    #     scale1 = np.linalg.norm(ax1)
    #     ax1 /= scale1
    #     ax2 = rects[1, 0, :] - rects[0, 0, :]
    #     scale2 = np.linalg.norm(ax2)
    #     ax2 /= scale2
    #     part.obb = Obb(centroid=centroid,
    #                    axes=np.concatenate((
    #                        ax0[:, None], ax1[:, None], ax2[:, None]
    #                    ), axis=1),
    #                    scales=[scale0, scale1, scale2])
    # o.add_object(obj_id=99, scene_obj=scene_obj,
    #                     clone=False)

    return o