Exemple #1
0
def read_charness_histograms(path):
    """
    ['pigraph_norm_factor', 'pigraph_histogram_params',
     'pigraph_histogram_charness', 'pigraph_pose_charness',
     'pigraph_scenelet_names', 'categories']
    """
    hash_mat_file_current = hash_path_md5(path)
    hists = None

    path_pickle = "%s.pickle" % path
    if os.path.exists(path_pickle):
        with open(path_pickle, 'rb') as f:
            # hists, hash_mat_file = pickle_load(f)
            tmp = pickle_load(f)
            if tmp[-1] != hash_mat_file_current:
                hists = None
                lg.warning("Hashes don't match, reloading hists")
            else:
                if len(tmp) == 3:
                    hists = tmp[1]  # pose_charness, hists, hash
                else:
                    hists = tmp[0]  # hists, hash
                lg.info("Loaded hists from\n\t%s!!!" % split_path(path_pickle))

    if hists is None:
        dmat = scipy.io.loadmat(path)
        hists = parse_charness_histograms(dmat)
        with open(path_pickle, 'wb') as f:
            pickle.dump((hists, hash_mat_file_current), f, -1)
            lg.info("Saved hists to %s" % path_pickle)
    # print(hists.keys())
    # key = list(hists.keys())[0]
    # logging.info("key: %s, %s" % (key, hists[key].volume))
    return hists
Exemple #2
0
def read_scenelets(d_scenelets, limit=0):
    pjoin = os.path.join

    # get full pigraph scenes
    p_scenelets = [
        pjoin(d_scenelets, d) for d in os.listdir(d_scenelets)
        if os.path.isdir(pjoin(d_scenelets, d))
    ]
    # for
    p_scenelets = [
        pjoin(d, f) for d in p_scenelets for f in os.listdir(d)
        if f.startswith('skel') and f.endswith('.json')
    ]

    out = []
    for p_scenelet in p_scenelets:
        lg.info("Reading %s" % p_scenelet)
        out.append(Scenelet.load(p_scenelet))
        if limit != 0 and len(out) >= limit:
            break

    return out
Exemple #3
0
    def save(self, path, save_obj=False):
        """Save to disk.

        Args:
            path (str):
                Output path
            save_obj (bool):
                Save obj-s not just json for objects.
        """
        if path[-4:] == 'json':
            path = path[:-5]
        out = self.skeleton.to_json()
        assert 'frame_ids' in out, "Time should be in skeleton..."

        if self.name_scene is not None and len(self.name_scene):
            out['name_scene'] = self.name_scene
        if self.name_scenelet is not None and len(self.name_scenelet):
            out['name_scenelet'] = self.name_scenelet

        # aux_info
        for key, value in sorted(self.aux_info.items()):
            if key == 'frame_ids':
                raise DeprecationWarning("Time is in skeleton now")
            else:
                assert key not in out, \
                    "Key already added? %s" % key
                out[key] = value

        out_folder, stem = os.path.split(path)
        obj_folder = os.path.join(out_folder, stem + "_objects")
        if len(self.objects):
            if os.path.exists(obj_folder):
                shutil.rmtree(obj_folder)
            os.mkdir(obj_folder)

        obj_paths = {}
        for obj_id, scene_obj in sorted(self.objects.items()):
            added_paths = \
                scene_obj.save(
                  path_prefix=os.path.join(obj_folder,
                                           scene_obj.get_name(obj_id)),
                  save_obj=save_obj)
            for k, v in added_paths.items():
                if k in obj_paths:
                    obj_paths[k].extend(v)
                else:
                    obj_paths[k] = v
        out['scenelets'] = {}
        for k, v in sorted(obj_paths.items()):
            out['scenelets'][k] = \
                [
                    os.path.join(os.path.basename(obj_folder),
                                 os.path.split(p)[1])
                    for p in v
                ]

        j_transforms = {}
        for frame_id, transform in self.transforms.items():
            j_transforms[frame_id] = transform.tolist()
        out['scenelets']['transforms'] = j_transforms
        out['charness'] = float(self.charness)
        out['match_score'] = float(self.match_score)
        # if hasattr(self, 'confidence') and self.confidence is not None:
        #     out['confidence'] = self.confidence
        if hasattr(self, 'density') and self.density is not None:
            out['density'] = self.density
        if hasattr(self, 'charness_poses') and self.charness_poses is not None:
            out['charness_poses'] = {
                'frame_ids': list(self.charness_poses.keys()),
                'values': [float(v) for v in self.charness_poses.values()]
            }

        # write to disk
        json_path = path + ('.json' if path[-4:] != 'json' else '')
        with codecs.open(json_path, 'w', encoding='utf-8') as fout:
            if MyJsonEncode is None:
                json.dump(out, fout, sort_keys=True, indent=4)
            else:
                json.dump(out,
                          fout,
                          sort_keys=True,
                          indent=4,
                          cls=MyJsonEncode)
        lg.info("Wrote to %s" % os.path.abspath(json_path))
Exemple #4
0
def match(query_full,
          d_query,
          query_2d_full,
          scene,
          intr,
          gap,
          tr_ground,
          scale,
          thresh_log_conf=7.5,
          w_3d=0.01,
          fps=3,
          step_samples=100):
    with_y = False  # optimize for y as well
    np.set_printoptions(suppress=True, linewidth=220)

    pjoin = os.path.join

    len_gap = gap[1] - gap[0] + 1
    query, q_v = get_partial_scenelet(query_full,
                                      start=gap[0],
                                      end=gap[1] + 1,
                                      fps=1)
    q_v_sum = np.sum(q_v)
    q_v_sum_inv = np.float32(1. / q_v_sum)
    # lg.debug("q_v_sum: %s/%s" % (q_v_sum, q_v.size))
    # scene_min_y = scene.skeleton.get_min_y(tr_ground)
    # lg.debug("scene_min_y: %s" % repr(scene_min_y))

    mid_frames = range(len_gap * fps,
                       scene.skeleton.poses.shape[0] - len_gap * fps,
                       step_samples)
    if not len(mid_frames):
        return []

    scenelets, sc_v = (np.array(e) for e in zip(*[
        get_partial_scenelet(
            scene, mid_frame_id=mid_frame_id, n_frames=len_gap, fps=fps)
        for mid_frame_id in mid_frames
    ]))
    # for i, (scenelet, sc_v_) in enumerate(zip(scenelets, sc_v)):
    #     mn = np.min(scenelet[sc_v_.astype('b1'), 1, :])
    #     scenelets[i, :, 1, :] -= mn
    # mn = np.min(scenelets[i, sc_v_.astype('b1'), 1, :])
    # scenelets = np.array(scenelets, dtype=np.float32)
    # sc_v = np.array(sc_v, dtype=np.int32)
    # print("sc_v: %s" % sc_v)
    # print("q_v: %s" % q_v)

    lg.debug("have %d/%d 3D poses in scenelet, and %d/%d in query" %
             (np.sum(sc_v), sc_v.shape[0], np.sum(q_v), q_v.shape[0]))

    query_2d = np.zeros((len_gap, 2, 16), dtype=np.float32)
    conf_2d = np.zeros((len_gap, 1, 16), dtype=np.float32)
    for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):

        if query_2d_full.has_pose(frame_id):
            query_2d[lin_id, :, :] = query_2d_full.get_pose(frame_id)[:2, :]
        # else:
        #     lg.warning("Query2d_full does not have pose at %d?" % frame_id)

        # im = im_.copy()
        if query_2d_full.has_confidence(frame_id):
            # print("showing %s" % frame_id)
            for joint, conf in query_2d_full._confidence[frame_id].items():
                log_conf = abs(np.log(conf)) if conf >= 0. else 0.
                # print("conf: %g, log_conf: %g" % (conf, log_conf))
                # if log_conf <= thresh_log_conf:
                #     p2d = scale * query_2d_full.get_joint_3d(joint,
                #                                              frame_id=frame_id)
                #     p2d = (int(round(p2d[0])), int(round(p2d[1])))
                #     cv2.circle(im, center=p2d,
                #                radius=int(round(3)),
                #                color=(1., 1., 1., 0.5), thickness=1)
                conf_2d[lin_id, 0, joint] = max(
                    0., (thresh_log_conf - log_conf) / thresh_log_conf)

            # cv2.imshow('im', im)
            # cv2.waitKey(100)
    # while cv2.waitKey() != 27: pass
    conf_2d /= np.max(conf_2d)

    # scale from Denis' scale to current image size
    query_2d *= scale

    # move to normalized camera coordinates
    query_2d -= intr[:2, 2:3]
    query_2d[:, 0, :] /= intr[0, 0]
    query_2d[:, 1, :] /= intr[1, 1]

    #
    # initialize translation
    #

    # centroid of query poses
    c3d = np.mean(query[q_v.astype('b1'), :, :], axis=(0, 2))
    # estimate scenelet centroids
    sclt_means = np.array([
        np.mean(scenelets[i, sc_v[i, ...].astype('b1'), ...], axis=(0, 2))
        for i in range(scenelets.shape[0])
    ],
                          dtype=np.float32)
    # don't change height
    sclt_means[:, 1] = 0
    scenelets -= sclt_means[:, None, :, None]
    lg.debug("means: %s" % repr(sclt_means.shape))
    if with_y:
        np_translation = np.array([c3d for i in range(scenelets.shape[0])],
                                  dtype=np.float32)
    else:
        np_translation = np.array(
            [c3d[[0, 2]] for i in range(scenelets.shape[0])], dtype=np.float32)
    np_rotation = np.array(
        [np.pi * (i % 2) for i in range(scenelets.shape[0])],
        dtype=np.float32)[:, None]
    n_cands = np_translation.shape[0]
    graph = tf.Graph()
    with graph.as_default(), tf.device('/gpu:0'):
        # 3D translation
        translation_ = tf.Variable(initial_value=np_translation,
                                   name='translation',
                                   dtype=tf.float32)
        t_y = tf.fill(dims=(n_cands, ),
                      value=(tr_ground[1, 3]).astype(np.float32))
        # t_y = tf.fill(dims=(n_cands,), value=np.float32(0.))
        lg.debug("t_y: %s" % t_y)
        if with_y:
            translation = translation_
        else:
            translation = tf.concat(
                (translation_[:, 0:1], t_y[:, None], translation_[:, 1:2]),
                axis=1)

        lg.debug("translation: %s" % translation)
        # 3D rotation (Euler XYZ)
        rotation = tf.Variable(np_rotation, name='rotation', dtype=tf.float32)
        # lg.debug("rotation: %s" % rotation)

        w = tf.Variable(conf_2d, trainable=False, name='w', dtype=tf.float32)

        pos_3d_in = tf.Variable(query,
                                trainable=False,
                                name='pos_3d_in',
                                dtype=tf.float32)
        # pos_3d_in = tf.constant(query, name='pos_3d_in', dtype=tf.float32)

        pos_2d_in = tf.Variable(query_2d,
                                trainable=False,
                                name='pos_2d_in',
                                dtype=tf.float32)
        # pos_2d_in = tf.constant(query_2d, name='pos_2d_in',
        #                         dtype=tf.float32)

        pos_3d_sclt = tf.Variable(scenelets,
                                  trainable=False,
                                  name='pos_3d_sclt',
                                  dtype=tf.float32)
        # print("pos_3d_sclt: %s" % pos_3d_sclt)

        # rotation around y
        my_zeros = tf.zeros((n_cands, 1), dtype=tf.float32, name='my_zeros')
        # tf.add_to_collection('to_init', my_zeros)
        my_ones = tf.ones((n_cands, 1))
        # tf.add_to_collection('to_init', my_ones)
        c = tf.cos(rotation, 'cos')
        # tf.add_to_collection('to_init', c)
        s = tf.sin(rotation, 'sin')
        # t0 = tf.concat([c, my_zeros, -s], axis=1)
        # t1 = tf.concat([my_zeros, my_ones, my_zeros], axis=1)
        # t2 = tf.concat([s, my_zeros, c], axis=1)
        # transform = tf.stack([t0, t1, t2], axis=2, name="transform")
        # print("t: %s" % transform)
        transform = tf.concat(
            [c, my_zeros, -s, my_zeros, my_ones, my_zeros, s, my_zeros, c],
            axis=1)
        transform = tf.reshape(transform, ((-1, 3, 3)), name='transform')
        print("t2: %s" % transform)
        # lg.debug("transform: %s" % transform)

        # transform to 3d
        # pos_3d = tf.matmul(transform, pos_3d_sclt) \
        #          + tf.tile(tf.expand_dims(translation, 2),
        #                    [1, 1, int(pos_3d_in.shape[2])])
        # pos_3d = tf.einsum("bjk,bcjd->bcjd", transform, pos_3d_sclt)
        shp = pos_3d_sclt.get_shape().as_list()
        transform_tiled = tf.tile(transform[:, None, :, :, None],
                                  (1, shp[1], 1, 1, shp[3]))
        # print("transform_tiled: %s" % transform_tiled)
        pos_3d = tf.einsum("abijd,abjd->abid", transform_tiled, pos_3d_sclt)
        # print("pos_3d: %s" % pos_3d)
        pos_3d += translation[:, None, :, None]
        #pos_3d = pos_3d_sclt
        # print("pos_3d: %s" % pos_3d)

        # perspective divide
        # pos_2d = tf.divide(
        #     tf.slice(pos_3d, [0, 0, 0], [n_cands, 2, -1]),
        #     tf.slice(pos_3d, [0, 2, 0], [n_cands, 1, -1]))
        pos_2d = tf.divide(pos_3d[:, :, :2, :], pos_3d[:, :, 2:3, :])

        # print("pos_2d: %s" % pos_2d)

        diff = pos_2d - pos_2d_in
        # mask loss by 2d key-point visibility
        # print("w: %s" % w)
        # w_sum = tf.reduce_sum()
        masked = tf.multiply(diff, w)
        # print(masked)
        # loss_reproj = tf.nn.l2_loss(masked)
        # loss_reproj = tf.reduce_sum(tf.square(masked[:, :, 0, :])
        #                             + tf.square(masked[:, :, 1, :]),
        #                             axis=[1, 2])
        masked_sqr = tf.square(masked[:, :, 0, :]) \
                     + tf.square(masked[:, :, 1, :])
        loss_reproj = tf.reduce_sum(masked_sqr, axis=[1, 2])
        # lg.debug("loss_reproj: %s" % loss_reproj)

        # distance from existing 3D skeletons
        d_3d = q_v_sum_inv * tf.multiply(pos_3d - query[None, ...],
                                         q_v[None, :, None, None],
                                         name='diff_3d')
        # print(d_3d)

        loss_3d = w_3d * tf.reduce_sum(tf.square(d_3d[:, :, 0, :]) + tf.square(
            d_3d[:, :, 1, :]) + tf.square(d_3d[:, :, 2, :]),
                                       axis=[1, 2],
                                       name='loss_3d_each')
        # print(loss_3d)

        loss = tf.reduce_sum(loss_reproj) + tf.reduce_sum(loss_3d)

        # optimize
        optimizer = ScipyOptimizerInterface(loss,
                                            var_list=[translation_, rotation],
                                            options={'gtol': 1e-12})

    with Timer('solve', verbose=True) as t:
        with tf.Session(graph=graph) as session:
            session.run(tf.global_variables_initializer())
            optimizer.minimize(session)
            o_pos_3d, o_pos_2d, o_masked, o_t, o_r, o_w, o_d_3d, \
                o_loss_reproj, o_loss_3d, o_transform, o_translation = \
                session.run([
                    pos_3d, pos_2d, masked, translation, rotation, w,
                    d_3d, loss_reproj, loss_3d, transform, translation])
            o_masked_sqr = session.run(masked_sqr)
        # o_t, o_r = session.run([translation, rotation])
    # print("pos_3d: %s" % o_pos_3d)
    # print("pos_2d: %s" % o_pos_2d)
    # print("o_loss_reproj: %s, o_loss_3d: %s" % (o_loss_reproj, o_loss_3d))
    # print("t: %s" % o_t)
    # print("r: %s" % o_r)
    chosen = sorted((i for i in range(o_loss_reproj.shape[0])),
                    key=lambda i2: o_loss_reproj[i2] + o_loss_3d[i2])
    lg.info("Best candidate is %d with error %g + %g" %
            (chosen[0], o_loss_reproj[chosen[0]], o_loss_3d[chosen[0]]))
    # print("masked: %s" % o_masked)
    # opp = np.zeros_like(o_pos_3d)
    # for i in range(o_pos_3d.shape[0]):
    #     for j in range(o_pos_3d.shape[1]):
    #         for k in range(16):
    #             opp[i, j, :2, k] = o_pos_3d[i, j, :2, k] / o_pos_3d[i, j, 2:3, k]
    #             # opp[i, j, 0, k] *= intr[0, 0]
    #             # opp[i, j, 1, k] *= intr[1, 1]
    #             # opp[i, j, :2, k] *= intr[1, 1]
    #             a = o_pos_2d[i, j, :, k]
    #             b = opp[i, j, :2, k]
    #             if not np.allclose(a, b):
    #                 print("diff: %s, %s" % (a, b))

    o_pos_2d[:, :, 0, :] *= intr[0, 0]
    o_pos_2d[:, :, 1, :] *= intr[1, 1]
    o_pos_2d += intr[:2, 2:3]

    # for cand_id in range(o_pos_2d.shape[0]):
    if False:
        # return
        # print("w: %s" % o_w)
        # print("conf_2d: %s" % conf_2d)
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d[:, 0, :] *= intr[0, 0]
        query_2d[:, 1, :] *= intr[1, 1]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])
        query_2d += intr[:2, 2:3]
        # lg.debug("query_2d[0, 0, ...]: %s" % query_2d[0, 0, ...])

        ims = {}
        for cand_id in chosen[:5]:
            lg.debug("starting %s" % cand_id)
            pos_ = o_pos_2d[cand_id, ...]
            for lin_id in range(pos_.shape[0]):
                frame_id = gap[0] + lin_id
                try:
                    im = ims[frame_id].copy()
                except KeyError:
                    p_im = pjoin(d_query, 'origjpg',
                                 "color_%05d.jpg" % frame_id)
                    ims[frame_id] = cv2.imread(p_im)
                    im = ims[frame_id].copy()
                # im = im_.copy()
                for jid in range(pos_.shape[-1]):

                    xy2 = int(round(query_2d[lin_id, 0, jid])), \
                          int(round(query_2d[lin_id, 1, jid]))
                    # print("printing %s" % repr(xy))
                    cv2.circle(im,
                               center=xy2,
                               radius=5,
                               color=(10., 200., 10.),
                               thickness=-1)

                    if o_masked[cand_id, lin_id, 0, jid] > 0 \
                       or o_w[lin_id, 0, jid] > 0:
                        xy = int(round(pos_[lin_id, 0, jid])), \
                             int(round(pos_[lin_id, 1, jid]))
                        # print("printing %s" % repr(xy))
                        cv2.circle(im,
                                   center=xy,
                                   radius=3,
                                   color=(200., 10., 10.),
                                   thickness=-1)
                        cv2.putText(im,
                                    "d2d: %g" %
                                    o_masked_sqr[cand_id, lin_id, jid],
                                    org=((xy2[0] - xy[0]) // 2 + xy[0],
                                         (xy2[1] - xy[1]) // 2 + xy[1]),
                                    fontFace=1,
                                    fontScale=1,
                                    color=(0., 0., 0.))
                        cv2.line(im, xy, xy2, color=(0., 0., 0.))
                        d3d = o_d_3d[cand_id, lin_id, :, jid]
                        d3d_norm = np.linalg.norm(d3d)
                        if d3d_norm > 0.:
                            cv2.putText(
                                im,
                                "%g" % d3d_norm,
                                org=((xy2[0] - xy[0]) // 2 + xy[0] + 10,
                                     (xy2[1] - xy[1]) // 2 + xy[1]),
                                fontFace=1,
                                fontScale=1,
                                color=(0., 0., 255.))

                cv2.putText(im,
                            text="%d::%02d" % (cand_id, lin_id),
                            org=(40, 80),
                            fontFace=1,
                            fontScale=2,
                            color=(255., 255., 255.))

                # pos_2d_ = np.matmul(intr, pos_[lin_id, :2, :] / pos_[lin_id, 2:3, :])
                # for p2d in pos_2d_
                cv2.imshow('im', im)
                cv2.waitKey()
            break

        while cv2.waitKey() != 27:
            pass

    out_scenelets = []
    for cand_id in chosen[:1]:
        lg.debug("score of %d is %g + %g = %g" %
                 (cand_id, o_loss_reproj[cand_id], o_loss_3d[cand_id],
                  o_loss_reproj[cand_id] + o_loss_3d[cand_id]))
        scenelet = Scenelet()
        rate = query_full.skeleton.get_rate()
        prev_time = None
        for lin_id, frame_id in enumerate(range(gap[0], gap[1] + 1)):
            time_ = query_full.get_time(frame_id)
            if lin_id and rate is None:
                rate = time_ - prev_time
            if time_ == frame_id:
                time_ = prev_time + rate
            scenelet.skeleton.set_pose(frame_id=frame_id,
                                       pose=o_pos_3d[cand_id, lin_id, :, :],
                                       time=time_)
            prev_time = time_
        tr = np.concatenate((np.concatenate(
            (o_transform[cand_id, ...], o_translation[cand_id, None, :].T),
            axis=1), [[0., 0., 0., 1.]]),
                            axis=0)
        tr_m = np.concatenate(
            (np.concatenate((np.identity(3), -sclt_means[cand_id, None, :].T),
                            axis=1), [[0., 0., 0., 1.]]),
            axis=0)
        tr = np.matmul(tr, tr_m)
        for oid, ob in scene.objects.items():
            if ob.label in ('wall', 'floor'):
                continue
            ob2 = copy.deepcopy(ob)
            ob2.apply_transform(tr)
            scenelet.add_object(obj_id=oid, scene_obj=ob2, clone=False)
        scenelet.name_scene = scene.name_scene
        out_scenelets.append((o_loss_reproj[cand_id], scenelet))
    return out_scenelets
Exemple #5
0
def main(argv):
    np.set_printoptions(suppress=True, linewidth=200)
    pjoin = os.path.join

    parser = argparse.ArgumentParser("matcher")
    parser.add_argument("d_scenelets",
                        type=argparse_check_exists,
                        help="Folder containing scenelets")
    parser.add_argument("video", type=argparse_check_exists, help="Input path")
    parser.add_argument("--gap-size-limit",
                        type=int,
                        help="Smallest gap size to still explain")
    args = parser.parse_args(argv)
    d_query = os.path.dirname(args.video)

    # 2d keypoint rescale
    p_im = pjoin(d_query, 'origjpg', 'color_00100.jpg')
    im_ = cv2.imread(p_im)
    shape_orig = im_.shape
    scale_2d = shape_orig[0] / float(INPUT_SIZE)

    query = Scenelet.load(args.video, no_obj=True)
    tr_ground = np.array(query.aux_info['ground'], dtype=np.float32)
    print("tr: %s" % tr_ground)

    name_query = os.path.basename(args.video).split('_')[1]
    query_2d = Scenelet.load(pjoin(d_query,
                                   "skel_%s_2d_00.json" % name_query)).skeleton
    p_intr = pjoin(d_query, 'intrinsics.json')
    intr = np.array(json.load(open(p_intr, 'r')), dtype=np.float32)
    lg.debug("intr: %s" % intr)

    gaps = find_gaps(query.skeleton, min_pad=1)

    p_scenelets_pickle = pjoin(args.d_scenelets, 'match_gap_scenelets.pickle')
    if os.path.exists(p_scenelets_pickle):
        scenelets = pickle_load(open(p_scenelets_pickle, 'rb'))
    else:
        scenelets = read_scenelets(args.d_scenelets, limit=0)
        pickle.dump(scenelets, open(p_scenelets_pickle, 'wb'))

    p_out_sclts = pjoin(d_query, 'fill')
    if os.path.exists(p_out_sclts):
        shutil.rmtree(p_out_sclts)
    os.makedirs(p_out_sclts)
    times = []
    for gap_id, gap in enumerate(gaps):
        lg.debug("gap is %s" % repr(gap))
        if gap[1] - gap[0] < args.gap_size_limit:
            continue
        with Timer("gap %d" % gap_id) as timer:
            chosen = []
            for sc_id, sclt in enumerate(scenelets):
                lg.info("scenelet %d / %d" % (sc_id, len(scenelets)))
                sclt = scenelets[sc_id]
                ground_obj = next(ob for ob in sclt.objects.values()
                                  if ob.label == 'floor')
                ground_part = ground_obj.get_part_by_name('floor')
                lg.debug("floor: %s" % ground_part)
                ground_transform = ground_part.obb.as_transform()
                lg.debug("floor: %s" % ground_transform)
                # sys.exit(0)
                # chosen.extend(
                out_sclts = match(query,
                                  d_query,
                                  query_2d,
                                  sclt,
                                  intr,
                                  gap,
                                  tr_ground,
                                  scale=scale_2d)
                if not len(out_sclts):
                    continue
                # pick best from scene
                chosen.append(
                    [out_sclts[0][i]
                     for i in range(len(out_sclts[0]))] + [sc_id])
                # break

            # os.system("rm %s/skel_%s_fill_%03d_%03d__*.json"
            #           % (p_out_sclts, name_query, gap[0], gap[1]))
            chosen = sorted(chosen, key=lambda score_sclt: score_sclt[0])
            for sid, (score, out_sclt, sc_id) in enumerate(chosen):
                p_out = pjoin(
                    p_out_sclts, "skel_%s_fill_%03d_%03d__%02d.json" %
                    (name_query, gap[0], gap[1], sid))
                out_sclt.save(p_out)
                if sid > 5:
                    break
        times.append(timer.get_elapsed_ms())
    lg.info("mean time per gap: %s" % np.mean(times))
Exemple #6
0
def main(argv=None):
    np.set_printoptions(suppress=True)
    parser = argparse.ArgumentParser()
    parser.add_argument('d', help="Folder of scene")
    parser.add_argument('-resolution',
                        help='Target resolution for occupancy map',
                        default=0.1)
    parser.add_argument(
        '-thresh-area',
        help='Ratio of occupancy map cell area that has to be occupied '
        'for it to count as occupied',
        default=0.1)
    parser.add_argument('-postfix',
                        type=str,
                        help="Scene postfix for augmentation",
                        default="")
    args = parser.parse_args(argv if argv is not None else sys.argv)
    res_target = args.resolution
    if args.postfix and len(args.postfix) and not args.postfix.startswith('_'):
        args.postfix = "_%s" % args.postfix

    path_parent, name_input = os.path.split(os.path.abspath(args.d))
    lg.warning("name input: %s" % name_input)
    path_for_tf = os.path.abspath(
        os.path.join(path_parent, os.pardir, 'dataset'))
    # if 'video' not in path_parent else os.path.join(path_parent, 'dataset')
    if not os.path.exists(path_for_tf):
        os.makedirs(path_for_tf, mode=0o0775)

    lg.debug("Loading scenelet...")
    path_scenelet = os.path.join(args.d, "skel_%s.json" % name_input)
    scenelet = Scenelet.load(path_scenelet)
    lg.debug("Scenelet: %s" % scenelet)

    path_state_pickle = os.path.join(args.d, "state%s.pickle" % args.postfix)
    if not os.path.exists(path_state_pickle):
        lg.error("Does not exist: %s" % path_state_pickle)
        return False

    # assert os.path.exists(path_state_pickle), \
    #     "Does not exist: %s" % path_state_pickle
    lg.debug("Loading volume...")
    state = pickle_load(open(path_state_pickle, 'rb'))
    lg.debug("Loaded volume...")

    lg.debug("Creating scene from scenelet")
    if not no_vis:
        vis = Visualizer(win_size=(1024, 1024))
        vis.add_coords()
    else:
        vis = None
    # scene = Scene(scenelet.name_scenelet)
    # colors = {0: (200., 0., 0.), 1: (0., 200., 0.), 2: (0., 0., 200.)}
    # unit_x = np.array((1., 0., 0.))

    occup = State(room=state.room,
                  tr_ground_inv=None,
                  res_theta=state.resolution[3],
                  resolution=[res_target, res_target, res_target])
    occup.get_volume(labels_to_lin_ids_arg=state.get_labels_to_lin_ids())
    occup_angle = np.ones(shape=(len(
        occup.volume), occup.volume[0].shape[0], occup.volume[0].shape[1], 1),
                          dtype=np.float32) * -1.
    assert np.min(occup_angle) < 0. and np.max(occup_angle) < 0., "Not empty"

    grid_polys = get_grid_shapely(occup=occup, res_orig=state.resolution)
    occup.volume.flags.writeable = True
    volume_occp = occup.volume
    angles = sorted(state.get_angles())
    labels_to_lin_ids = occup.get_labels_to_lin_ids()
    had_vtk_problem = no_vis

    plt.figure()

    rects = []
    for oid, ob in scenelet.objects.items():
        assert oid >= 0, "Need positive here"
        label = ob.label
        if label in TRANSLATIONS_CATEGORIES:
            label = TRANSLATIONS_CATEGORIES[label]

        if label not in labels_to_lin_ids:
            continue

        try:
            poly = get_poly([part.obb for part in ob.parts.values()])
        except ValueError as e:
            print("\n===========\n\nShapely error: %s for %s\n\n" %
                  (e, (label, oid, ob)))
            with open('error.log', 'a') as f:
                f.write("[%s] %d, %s, %s\n" % (args.d, oid, label, ob))
            continue

        ob_angle = ob.get_angle(positive_only=True)
        assert 0. <= ob_angle <= 2 * np.pi, "No: %g" % ob_angle

        rect = get_rectangle(poly, ob_angle)
        rect.extend([oid, CATEGORIES[label]])
        rects.append(rect)

        cat_id = labels_to_lin_ids[label]  # cat_id in volume, not categories
        for gp in grid_polys:
            # skip, if not occupied enough
            if gp.poly.intersection(poly).area / gp.area < args.thresh_area:
                continue
            # save occupancy
            gp.occupancy = 1.
            id_angle_lower = None
            id_angle_upper = None
            if ob_angle > angles[-1]:
                id_angle_lower = len(angles) - 1
                id_angle_upper = 0
            else:
                for id_angle, angle in enumerate(angles):
                    if ob_angle < angle:
                        id_angle_upper = id_angle
                        id_angle_lower = id_angle - 1
                        break
            assert id_angle_lower is not None \
                   and id_angle_upper is not None, \
                "Wrong?"
            assert id_angle_upper != id_angle_lower, \
                "? %s %s" % (id_angle_lower, id_angle_upper)

            # cache
            xy = gp.xy

            # zero means empty in occupancy,
            # so object ids are shifted with 1
            # we need object ids to filter "untouched" objects
            # in tfrecords_create
            if volume_occp[cat_id, xy[0], xy[1], id_angle_lower] == 0 \
               or label in CATEGORIES_DOMINANT:
                volume_occp[cat_id, xy[0], xy[1], id_angle_lower] = oid + 1
            if volume_occp[cat_id, xy[0], xy[1], id_angle_upper] == 0 \
               or label in CATEGORIES_DOMINANT:
                volume_occp[cat_id, xy[0], xy[1], id_angle_upper] = oid + 1

            # angles are right now not per-category, but per-scene
            # hence, an object can only overwrite, if it's usually "above"
            # other objects, e.g. a table
            # this is a hack for a z-test
            if occup_angle[cat_id, xy[0], xy[1], 0] < 0. \
               or label in CATEGORIES_DOMINANT:
                occup_angle[cat_id, xy[0], xy[1], 0] = ob_angle

        if not had_vtk_problem:
            color = COLORS_CATEGORIES[label] if label in COLORS_CATEGORIES \
                else (200., 200., 200.)
            try:
                for id_part, part in ob.parts.items():
                    vis.add_mesh(MeshOBJ.from_obb(part.obb),
                                 name="ob_%02d_part_%02d" % (oid, id_part),
                                 color=color)
            except AttributeError:
                print("VTK problem...")
                had_vtk_problem = True
    #plt.savefig()
    plt.close()
    if not had_vtk_problem:
        vis.set_camera_pos(pos=(0., -1., 0.))
        vis.camera().SetFocalPoint(0., 0., 0.)
        vis.camera().SetViewUp(-1., 0., 0.)
        vis.set_camera_type(is_ortho=True)
        vis.camera().SetParallelScale(3.)
    # vis.show()

    name_recording = "%s_%s" % (os.path.basename(args.d), args.postfix) \
        if args.postfix else os.path.basename(args.d)
    lg.info("name_recording: %s" % name_recording)

    path_out_occp = os.path.join(os.path.dirname(args.d), os.pardir,
                                 'occupancy', name_recording)
    if not os.path.exists(path_out_occp):
        os.makedirs(path_out_occp)

    # prepare www storage
    www_grid = {'evidence': {}, 'occ': {}}

    # normalize evidence maps
    vmax = 0.
    ims = {}
    for cat, cat_id in labels_to_lin_ids.items():
        ims[cat] = np.squeeze(
            np.sum(state.volume[cat_id, :, :, :], axis=2, keepdims=True))
        vmax = max(vmax, np.max(ims[cat]))

    # gather joined occupancy map
    im_sum = None

    # for each evidence category
    for cat, cat_id in labels_to_lin_ids.items():
        im = ims[cat] / vmax * 255.
        path_out_im = os.path.join(path_out_occp, "e_%s.jpg" % cat)
        cv2.imwrite(path_out_im, im)
        # lg.debug("wrote to %s" % path_out_im)
        www_grid['evidence'][cat] = path_out_im

        im = np.squeeze(volume_occp[cat_id, :, :, 0])
        path_out_im = os.path.join(path_out_occp, "o_%s.jpg" % cat)
        cv2.imwrite(path_out_im, im * 255.)
        # lg.debug("wrote to %s" % path_out_im)
        www_grid['occ'][cat] = path_out_im

        if im_sum is None:
            im_sum = im.copy()
        else:
            im_sum = np.maximum(im, im_sum)

    #
    # save dataset
    #
    name_input_old = name_input
    if args.postfix is not None and len(args.postfix):
        name_input = "%s_%s" % (name_input, args.postfix)

    # state
    path_state_dest = os.path.join(path_for_tf, "state_%s.pickle" % name_input)
    shutil.copyfile(path_state_pickle, path_state_dest)
    lg.info("Copied\n\t%s to\n\t%s" % (path_state_pickle, path_state_dest))

    # occupancy
    path_occup_dest = os.path.join(path_for_tf, "occup_%s.pickle" % name_input)
    pickle.dump(occup, open(path_occup_dest, 'wb'), -1)
    lg.info("Wrote to %s" % path_occup_dest)

    # occupancy_angle
    path_occup_angle_dest = os.path.join(path_for_tf,
                                         "angle_%s.npy" % name_input)
    min_angle = np.min(occup_angle)
    assert min_angle < 0., "No empty cells??"
    lg.debug("min angle is %s" % min_angle)
    np.save(open(path_occup_angle_dest, 'wb'), occup_angle)
    lg.info("Wrote to %s" % path_occup_angle_dest)

    # skeleton
    path_copied = shutil.copy2(path_scenelet, path_for_tf)
    lg.info("Copied\n\t%s to \n\t%s" % (path_scenelet, path_copied))

    # charness skeleton
    name_skeleton_charness = "skel_%s-charness.json" % name_input_old
    path_scenelet_charness = os.path.join(args.d, name_skeleton_charness)
    assert os.path.exists(path_scenelet_charness), \
        "Does not exist: %s" % path_scenelet_charness
    shutil.copy2(path_scenelet_charness, path_for_tf)
    assert os.path.exists(os.path.join(path_for_tf, name_skeleton_charness)), \
        "Does not exist: %s" % os.path.join(path_for_tf,
                                            name_skeleton_charness)

    # rectangles
    name_rectangles = "rectangles_%s.npy" % name_input_old
    path_rectangles = os.path.join(path_for_tf, name_rectangles)
    np.save(open(path_rectangles, 'wb'), rects)

    #
    # visualize
    #

    path_out_im = os.path.join(path_out_occp, '3d.png')
    if not had_vtk_problem:
        vis.save_png(path_out_im)
    www_grid['3d'] = path_out_im

    path_out_im = os.path.join(path_out_occp, 'o_sum.png')
    max_im_sum = np.max(im_sum)
    if max_im_sum > 0.:
        cv2.imwrite(path_out_im, im_sum / max_im_sum * 255.)
    else:
        cv2.imwrite(path_out_im, im_sum * 255.)
    www_grid['o_sum'] = path_out_im

    path_www = os.path.join(path_out_occp, os.pardir)
    with open(os.path.join(path_www, 'index.html'), 'a') as f:
        f.write("<style> img {image-rendering: pixelated; } </style>\n")
        f.write("<script>\n")
        f.write("</script>\n")
        f.write("<h3>%s</h3>" % os.path.basename(args.d))
        f.write('<table>\n')

        f.write("<tr>\n")
        f.write("<th>3d</th>")
        f.write("<th>Occupancy sum</th>")
        for cat in www_grid['evidence']:
            f.write("\t<th>%s</th>\n" % cat)
        f.write("<th></th>\n")  # titles
        f.write("</tr>\n")

        f.write("<tr>\n")
        # 3D
        f.write("\t<td rowspan=\"2\">\n")
        path_im = os.path.relpath(www_grid['3d'], path_www)
        f.write("\t<a href=\"%s\">\n"
                "\t\t<img src=\"%s\" height=\"400\" />\n"
                "\t</a>\n" % (path_im, path_im))

        # Evidence sum
        f.write("\t<td rowspan=\"2\">\n")
        path_im = os.path.relpath(www_grid['o_sum'], path_www)
        f.write("\t<a href=\"%s\">\n"
                "\t\t<img src=\"%s\" height=\"400\" />\n"
                "\t</a>\n" % (path_im, path_im))
        # Evidence
        for cat in www_grid['evidence']:
            f.write("<td style=\"padding-bottom: 2px\">\n")
            path_im = os.path.relpath(www_grid['evidence'][cat], path_www)
            f.write("\t<a href=\"%s\">\n"
                    "\t\t<img src=\"%s\" height=\"200\" />\n"
                    "\t</a>\n" % (path_im, path_im))
            f.write("</td>\n")
        f.write("<td>Evidence</td>\n")

        f.write("\t</td>\n")
        f.write("</tr>\n")

        f.write("<tr>\n")
        for cat in www_grid['occ']:
            f.write("<td>\n")
            path_im = os.path.relpath(www_grid['occ'][cat], path_www)
            f.write("\t<a href=\"%s\">\n"
                    "\t\t<img src=\"%s\" height=\"200\" />\n"
                    "</a>\n" % (path_im, path_im))
            f.write("</td>\n")
        f.write("<td>Occupancy map</td>\n")
        f.write("</tr>")

        f.write('</table>')

    return True
Exemple #7
0
def main(argv):
    conf = Conf.get()
    parser = argparse.ArgumentParser("Denis pose converter")
    parser.add_argument('camera_name',
                        help="Camera name ('G15', 'S6')",
                        type=str)
    parser.add_argument(
        '-d',
        dest='dir',
        required=True,
        help="Path to the <scene folder>/denis containing skeletons.json")
    parser.add_argument(
        '-filter',
        dest='with_filtering',
        action="store_true",
        help="Should we do post-filtering (1-euro) on the pelvis positions")
    parser.add_argument('-huber',
                        required=False,
                        help="Should we do huber loss?",
                        action='store_true')
    parser.add_argument('-smooth',
                        type=float,
                        default=0.005,
                        help="Should we have a smoothness term (l2/huber)?")
    parser.add_argument(
        '--winsorize-limit',
        type=float,
        default=conf.optimize_path.winsorize_limit,
        help='Threshold for filtering too large jumps of the 2D centroid')
    parser.add_argument('--no-resample',
                        action='store_true',
                        help="add resampled frames")
    parser.add_argument('--n-actors',
                        type=int,
                        default=1,
                        help="How many skeletons to track.")
    parser.add_argument('-n-actors',
                        type=int,
                        default=1,
                        help="Max number of people in scene.")
    # parser.add_argument(
    #     '-r', type=float,
    #     help='Video rate. Default: 1, if avconv -r 5. '
    #          'Original video sampling rate (no subsampling) should be '
    #          '24/5=4.8. avconv -r 10 leads to 24/10=2.4.',
    #     required=True)
    parser.add_argument('--person_height',
                        type=float,
                        help='Assumed height of human(s) in video.',
                        default=Conf.get().optimize_path.person_height)
    parser.add_argument(
        '--forwards-window-size',
        type=int,
        help='How many poses in time to look before AND after to '
        'average forward direction. 0 means no averaging. Default: 0.',
        default=0)
    parser.add_argument('--no-img',
                        action='store_true',
                        help='Read and write images (vis reproj error)')
    parser.add_argument('--postfix',
                        type=str,
                        help="output file postfix.",
                        default='unannot')
    args = parser.parse_args(argv)
    show = False
    args.resample = not args.no_resample
    # assert not args.resample, "resample should be off"
    assert os.path.exists(args.dir), "Source does not exist: %s" % args.dir
    p_scene = os.path.normpath(os.path.join(args.dir, os.pardir))  # type: str
    p_video_params = os.path.join(p_scene, 'video_params.json')
    assert os.path.exists(p_video_params), "Need video_params.json for rate"
    if 'r' not in args or args.r is None:
        args.r = json.load(open(p_video_params, 'r'))['rate-avconv']

    # manual parameters (depth initialization, number of actors)
    p_scene_params = os.path.join(args.dir, os.pardir, 'scene_params.json')
    if not os.path.exists(p_scene_params):
        scene_params = {
            'depth_init': 10.,
            'actors': args.n_actors,
            'ground_rot': [0., 0., 0.]
        }
        json.dump(scene_params, open(p_scene_params, 'w'))
        raise RuntimeError("Inited scene_params.json, please check: %s" %
                           p_scene_params)
    else:
        scene_params = json.load(open(p_scene_params, 'r'))
        lg.warning("Will work with %d actors and init depth to %g" %
                   (scene_params['actors'], scene_params['depth_init']))
        assert '--n-actors' not in argv \
               or args.n_actors == scene_params['actors'], \
            "Actor count mismatch, remove %d from args, because " \
            "scene_params.json says %d?" \
            % (args.n_actors, scene_params['actors'])
        args.n_actors = scene_params['actors']
        ground_rot = scene_params['ground_rot'] or [0., 0., 0.]

    # load images
    path_images = os.path.abspath(os.path.join(args.dir, os.pardir, 'origjpg'))
    images = {}
    shape_orig = None
    if not args.no_img:
        images, shape_orig = load_images(path_images)

    path_skeleton = \
        max((f for f in os.listdir(os.path.join(args.dir))
             if f.startswith('skeletons') and f.endswith('json')),
            key=lambda s: int(os.path.splitext(s)[0].split('_')[1]))
    print("path_skeleton: %s" % path_skeleton)
    data = json.load(open(os.path.join(args.dir, path_skeleton), 'r'))
    # data, pose_constraints, first_run = \
    #     cleanup(data, p_dir=os.path.join(args.dir, os.pardir))
    # poses_2d = []
    # plt.figure()
    # show_images(images, data)
    if False:
        # pose_ids = identify_actors_multi(data, n_actors=1)
        p_segm_pickle = os.path.join(args.dir, os.pardir,
                                     "label_skeletons.pickle")
        problem = None
        if False and os.path.exists(p_segm_pickle):
            lg.warning("Loading skeleton segmentation from pickle %s" %
                       p_segm_pickle)
            pose_ids, problem = pickle_load(open(p_segm_pickle, 'rb'))
        if not problem or problem._n_actors != args.n_actors:
            pose_ids, problem, data = more_actors_gurobi(
                data,
                n_actors=args.n_actors,
                constraints=pose_constraints,
                first_run=first_run)
            if True or show:
                show_multi(images,
                           data,
                           pose_ids,
                           problem,
                           p_dir=os.path.join(args.dir, os.pardir),
                           first_run=first_run,
                           n_actors=args.n_actors)
            pickle.dump((pose_ids, problem), open(p_segm_pickle, 'wb'), -1)
    else:
        pose_ids = greedy_actors(data, n_actors=args.n_actors)
        data = DataPosesWrapper(data=data)

    visible_f = {a: {} for a in range(args.n_actors)}
    visible_f_max = 0.
    if show:
        plt.ion()
        fig = None
        axe = None
        scatters = dict()

    # how many images we have
    min_frame_id = min(f for f in pose_ids)
    frames_mod = max(f for f in pose_ids) - min_frame_id + 1
    skel_ours = Skeleton(frames_mod=frames_mod,
                         n_actors=args.n_actors,
                         min_frame_id=min_frame_id)
    skel_ours_2d = Skeleton(frames_mod=frames_mod,
                            n_actors=args.n_actors,
                            min_frame_id=min_frame_id)

    # assert len(images) == 0 or max(f for f in images) + 1 == frames_mod, \
    #     "Assumed image count is %d, but max_frame_id is %d" \
    #     % (len(images), frames_mod-1)
    if isinstance(data, DataPosesWrapper):
        frames = data.get_frames()
    else:
        frames = []
        for frame_str in sorted(data.get_frames()):
            try:
                frame_id = int(frame_str.split('_')[1])
            except ValueError:
                print("skipping key %s" % frame_id)
                continue
            frames.append(frame_id)
    my_visibilities = [[], []]
    for frame_id in frames:
        frame_str = DataPosesWrapper._to_frame_str(frame_id)
        pose_in = data.get_poses_3d(frame_id=frame_id)
        # np.asarray(data[frame_str][u'centered_3d'])
        # pose_in_2d = np.asarray(data[frame_str][u'pose_2d'])
        pose_in_2d = data.get_poses_2d(frame_id=frame_id)
        # visible = np.asarray(data[frame_str][u'visible'])

        if False and len(pose_in.shape) > 2:
            pose_id = pose_ids[frame_id]
            if not args.no_img:
                im = cv2.cvtColor(images[frame_id], cv2.COLOR_RGB2BGR)
                for i in range(pose_in.shape[0]):
                    c = (1., 0., 0., 1.)
                    if i == pose_id:
                        c = (0., 1., 0., 1.)
                    color = tuple(int(c_ * 255) for c_ in c[:3])
                    for p2d in pose_in_2d[i, :, :]:
                        # color = (c[0] * 255, c[1] * 255., c[2] * 255.)
                        cv2.circle(im, (p2d[1], p2d[0]),
                                   radius=3,
                                   color=color,
                                   thickness=-1)
                    center = np.mean(pose_in_2d[i, :, :],
                                     axis=0).round().astype('i4').tolist()
                    cv2.putText(im, "%d" % i, (center[1], center[0]), 1, 1,
                                color)
                if show:
                    cv2.imshow("im", im)
                    cv2.waitKey(100)
            # if sid not in scatters:
            #     scatters[sid] = axe.scatter(pose_in_2d[i, :, 1], pose_in_2d[i, :, 0], c=c)
            # else:
            #     scatters[sid].set_offsets(pose_in_2d[i, :, [1, 0]])
            #     scatters[sid].set_array(np.tile(np.array(c), pose_in_2d.shape[1]))
            # scatter.set_color(c)
            # plt.draw()
            # plt.pause(1.)
            pose_in = pose_in[pose_id, :, :]
            pose_in_2d = pose_in_2d[pose_id, :, :]
            visible = visible[pose_id]
        # else:
        # pose_id = 0
        # pose_id = pose_ids[frame_id]

        for actor_id in range(args.n_actors):
            # if actor_id in (2, 3, 4, 5, 8, 9)
            # expanded frame_id
            frame_id2 = Skeleton.unmod_frame_id(frame_id=frame_id,
                                                actor_id=actor_id,
                                                frames_mod=frames_mod)
            assert (actor_id != 0) ^ (frame_id2 == frame_id), "no"
            frame_id_mod = skel_ours.mod_frame_id(frame_id=frame_id2)

            assert frame_id_mod == frame_id, \
                "No: %d %d %d" % (frame_id, frame_id2, frame_id_mod)
            actor_id2 = skel_ours.get_actor_id(frame_id2)
            assert actor_id2 == actor_id, "no: %s %s" % (actor_id, actor_id2)

            # which pose explains this actor in this frame
            pose_id = pose_ids[frame_id][actor_id]
            # check, if actor found
            if pose_id < 0:
                continue

            # 3D pose
            pose = pose_in[pose_id, :, JointDenis.revmap].T
            # added by Aron on 4/4/2018 (Denis' pelvis is too high up)
            pose[:, Joint.PELV] = (pose[:, Joint.LHIP] + pose[:, Joint.RHIP]) \
                                  / 2.
            skel_ours.set_pose(frame_id2, pose)

            # 2D pose
            pose_2d = pose_in_2d[pose_id, :, :]
            arr = np.array(JointDenis.pose_2d_to_ours(pose_2d),
                           dtype=np.float32).T
            skel_ours_2d.set_pose(frame_id2, arr)

            #
            # visibility (binary) and confidence (float)
            #

            # np.asarray(data[frame_str][u'visible'][pose_id])
            vis_i = data.get_visibilities(frame_id)[pose_id]

            # vis_f = np.asarray(data[frame_str][u'visible_float'][pose_id])
            vis_f = data.get_confidences(frame_id)[pose_id]
            for jid, visible in enumerate(vis_i):  # for each joint
                # binary visibility
                jid_ours = JointDenis.to_ours_2d(jid)
                skel_ours_2d.set_visible(frame_id2, jid_ours, visible)

                # confidence (fractional visibility)
                if np.isnan(vis_f[jid]):
                    continue

                try:
                    visible_f[actor_id][frame_id2][jid_ours] = vis_f[jid]
                except KeyError:
                    visible_f[actor_id][frame_id2] = {jid_ours: vis_f[jid]}
                visible_f_max = max(visible_f_max, vis_f[jid])
                conf_ = get_conf_thresholded(vis_f[jid],
                                             thresh_log_conf=None,
                                             dtype_np=np.float32)
                skel_ours_2d.set_confidence(frame_id=frame_id2,
                                            joint=jid_ours,
                                            confidence=conf_)
                my_visibilities[0].append(vis_f[jid])
                my_visibilities[1].append(conf_)
            skel_ours_2d._confidence_normalized = True

    plt.figure()
    plt.plot(my_visibilities[0], my_visibilities[1], 'o')
    plt.savefig('confidences.pdf')

    assert skel_ours.n_actors == args.n_actors, "no"
    assert skel_ours_2d.n_actors == args.n_actors, "no"
    # align to room
    min_z = np.min(skel_ours.poses[:, 2, :])
    print("min_max: %s, %s" % (min_z, np.max(skel_ours.poses[:, 2, :])))
    skel_ours.poses[:, 2, :] += min_z
    skel_ours.poses /= 1000.
    # The output is scaled to 2m by Denis.
    # We change this to 1.8 * a scale in order to correct for
    # the skeletons being a bit too high still.
    skel_ours.poses *= \
        args.person_height * conf.optimize_path.height_correction / 2.
    skel_ours.poses[:, 2, :] *= -1.
    skel_ours.poses = skel_ours.poses[:, [0, 2, 1], :]

    # refine
    name_video = args.dir.split(os.sep)[-2]
    out_path = os.path.join(args.dir, os.pardir,
                            "skel_%s_%s.json" % (name_video, args.postfix))
    out_path_orig = os.path.join(args.dir, os.pardir,
                                 "skel_%s_lfd_orig.json" % name_video)
    sclt_orig = Scenelet(skeleton=copy.deepcopy(skel_ours))
    sclt_orig.save(out_path_orig)

    skel_ours_2d_all = copy.deepcopy(skel_ours_2d)
    assert len(skel_ours_2d_all.get_frames()), skel_ours_2d_all.get_frames()

    #
    # Optimize
    #

    # frames_ignore = [(282, 372), (516, 1000)]

    skel_ours, skel_ours_2d, intrinsics, \
    frame_ids_filled_in = prepare(
      args.camera_name,
      winsorize_limit=args.winsorize_limit,
      shape_orig=shape_orig,
      path_scene=p_scene,
      skel_ours_2d=skel_ours_2d,
      skel_ours=skel_ours,
      resample=args.resample,
    path_skel=path_skeleton)
    frames_ignore = []
    tr_ground = np.eye(4, dtype=np.float32)
    skel_opt, out_images, K = \
        optimize_path(
          skel_ours, skel_ours_2d, images, intrinsics=intrinsics,
          path_skel=out_path, shape_orig=shape_orig,
          use_huber=args.huber, weight_smooth=args.smooth,
          frames_ignore=frames_ignore, resample=args.resample,
          depth_init=scene_params['depth_init'],
          ground_rot=ground_rot)

    for frame_id in skel_opt.get_frames():
        skel_opt.set_time(frame_id=frame_id, time=float(frame_id) / args.r)

    skel_opt_raw = copy.deepcopy(skel_opt)
    skel_opt_resampled = Skeleton.resample(skel_opt)

    # Filter pelvis
    if args.with_filtering:
        out_filter_path = os.path.join(args.dir, os.pardir, "vis_filtering")
        skel_opt = filter_(skel_opt_resampled,
                           out_filter_path=out_filter_path,
                           skel_orig=skel_opt,
                           weight_smooth=args.smooth,
                           forwards_window_size=args.forwards_window_size)
    else:
        skel_opt.estimate_forwards(k=args.forwards_window_size)
        skel_opt_resampled.estimate_forwards(k=args.forwards_window_size)

    # if len(images):
    #     skel_opt.fill_with_closest(images.keys()[0], images.keys()[-1])

    min_y, max_y = skel_opt.get_min_y(tr_ground)
    print("min_y: %s, max_y: %s" % (min_y, max_y))

    #
    # save
    #
    frame_ids_old = set(skel_opt.get_frames())
    if args.resample:
        skel_opt = skel_opt_resampled
        frame_ids_filled_in.update(
            set(skel_opt.get_frames()).difference(frame_ids_old))
        lg.warning("Saving resampled scenelet!")
    scenelet = Scenelet(skel_opt)
    del skel_opt
    # skel_dict = skel_opt.to_json()
    tr_ground[1, 3] = min_y
    scenelet.aux_info['ground'] = tr_ground.tolist()
    assert isinstance(ground_rot, list) and len(ground_rot) == 3
    scenelet.add_aux_info('ground_rot', ground_rot)
    scenelet.add_aux_info(
        'path_opt_params', {
            'rate': args.r,
            'w-smooth': args.smooth,
            'winsorize-limit': args.winsorize_limit,
            'camera': args.camera_name,
            'huber': args.huber,
            'height_correction': conf.optimize_path.height_correction,
            'focal_correction': conf.optimize_path.focal_correction
        })
    scenelet.add_aux_info('frame_ids_filled_in', list(frame_ids_filled_in))

    # To MATLAB
    # _skeleton.get_min_y(_tr_ground)
    # with skel_opt as skeleton:
    # skeleton = skel_opt
    # skeleton_name = os.path.split(args.dir)[0]
    # skeleton_name = skeleton_name[skeleton_name.rfind('/')+1:]
    # mdict = skeleton.to_mdict(skeleton_name)
    # mdict['room_transform'] = tr_ground
    # mdict['room_transform'][1, 3] *= -1.
    # print(mdict)
    # print("scene_name?: %s" % os.path.split(args.dir)[0])
    # skeleton.save_matlab(
    #     os.path.join(os.path.dirname(args.dir), "skeleton_opt.mat"),
    #     mdict=mdict)

    assert scenelet.skeleton.has_forwards(), "No forwards??"
    scenelet.save(out_path)
    if show:
        # save path plot
        out_path_path = os.path.join(args.dir, os.pardir,
                                     "%s_path.jpg" % name_video)
        path_fig = plot_path(scenelet.skeleton)
        legend = ["smooth %g" % args.smooth]

    # hack debug
    # path_skel2 = os.path.join(args.dir, os.pardir, 'skel_lobby7_nosmooth.json')
    # if os.path.exists(path_skel2):
    #     skel2 = Skeleton.load(path_skel2)
    #     path_fig = plot_path(skel2, path_fig)
    #     legend.append('no smooth')
    if show:
        plt.legend(legend)
        path_fig.savefig(out_path_path)

    # backup args
    path_args = os.path.join(args.dir, os.pardir, 'args_denis.txt')
    with open(path_args, 'a') as f_args:
        f_args.write("%s %s\n" %
                     (os.path.basename(sys.executable), " ".join(argv)))

    # save 2D detections to file
    if args.postfix == 'unannot':
        path_skel_ours_2d = os.path.join(
            args.dir, os.pardir, "skel_%s_2d_%02d.json" % (name_video, 0))
        sclt_2d = Scenelet(skel_ours_2d_all)
        print('Saving {} to {}'.format(len(skel_ours_2d_all.get_frames()),
                                       path_skel_ours_2d))
        sclt_2d.skeleton.aux_info = {}
        sclt_2d.save(path_skel_ours_2d)
    else:
        print(args.postfix)

    logging.info("Saving images...")
    if len(images) and len(out_images):
        path_out_images = os.path.join(args.dir, os.pardir, 'color')
        try:
            os.makedirs(path_out_images)
        except OSError:
            pass
        visible_f_max_log = np.log(visible_f_max)
        frames = list(out_images.keys())
        for frame_id in range(frames[0], frames[-1] + 1):
            im = out_images[frame_id] if frame_id in out_images \
                else cv2.cvtColor(images[frame_id], cv2.COLOR_BGR2RGB)
            for actor_id in range(args.n_actors):
                if frame_id in visible_f[actor_id]:
                    frame_id2 = skel_ours_2d_all.unmod_frame_id(
                        frame_id=frame_id,
                        actor_id=actor_id,
                        frames_mod=skel_ours_2d_all.frames_mod)
                    for joint, is_vis in visible_f[actor_id][frame_id].items():
                        p2d = skel_ours_2d_all.get_joint_3d(joint,
                                                            frame_id=frame_id2)
                        # radius = np.log(is_vis) / visible_f_max_log
                        # lg.debug("r0: %g" % radius)
                        # radius = np.exp(np.log(is_vis) / visible_f_max_log)
                        # lg.debug("radius is %g" % radius)
                        vis_bool = True
                        if skel_ours_2d_all.has_visible(frame_id=frame_id2,
                                                        joint_id=joint):
                            vis_bool &= skel_ours_2d_all.is_visible(
                                frame_id2, joint)
                        radius = abs(np.log(is_vis / 0.1 + 1e-6))
                        if not np.isnan(radius):
                            p2d = (int(round(p2d[0])), int(round(p2d[1])))
                            cv2.circle(im,
                                       center=p2d,
                                       radius=int(round(radius)),
                                       color=(1., 1., 1., 0.5),
                                       thickness=1)
                            conf = get_conf_thresholded(conf=is_vis,
                                                        thresh_log_conf=None,
                                                        dtype_np=np.float32)
                            if conf > 0.5:
                                cv2.putText(img=im,
                                            text=Joint(joint).get_name(),
                                            org=p2d,
                                            fontFace=1,
                                            fontScale=1,
                                            color=(10., 150., 10., 100.))
                    # lg.debug("set visibility to %g, radius %g" % (is_vis, radius))
            # if frame_id in out_images:
            scale = (shape_orig[1] / float(im.shape[1]),
                     shape_orig[0] / float(im.shape[0]))
            cv2.imwrite(
                os.path.join(path_out_images, "color_%05d.jpg" % frame_id),
                cv2.resize(im, (0, 0),
                           fx=scale[0],
                           fy=scale[1],
                           interpolation=cv2.INTER_CUBIC))
            # else:
            #     fname = "color_%05d.jpg" % frame_id
            #     shutil.copyfile(
            #         os.path.join(path_images, fname),
            #         os.path.join(path_out_images, fname))
        lg.info("Wrote images to %s/" % path_out_images)
Exemple #8
0
def main(argv):
    pjoin = os.path.join  # cache long name
    parser = argparse.ArgumentParser(
        "Find characteristic scene times",
        description="Scans a directory of short scenelets (exported from "
        "Matlab), and looks up their full version in the original "
        "scenes. It exports a new scenelet containing all poses "
        "between the start and end times of the input short "
        "scenelets. Matching is done by name."
        "Scenelets below time length limit and not enough objects "
        "are thrown away."
        "It also saves the scenelet characteristicness into the "
        "output scenelet files.")
    parser.add_argument(
        'd',
        type=argparse_check_exists,
        help="Folder containing PiGraphs scenelets. E.g. "
        "/mnt/thorin_data/stealth/shared/"
        "pigraph_scenelets__linterval_squarehist_large_radiusx2")
    parser.add_argument('s',
                        type=argparse_check_exists,
                        help="Folder containing PiGraphs full scenes. E.g. "
                        "/mnt/thorin_data/stealth/shared/scenes_pigraphs")
    parser.add_argument('-l',
                        '--limit-len',
                        type=int,
                        help="Minimum length for a scenelet",
                        default=10)  # changed from `5` on 15/1/2018
    parser.add_argument(
        '--dist-thresh',
        type=float,
        help='Distance threshold for object pruning. Typically: 0.2 or 0.5.',
        default=.5)
    # parse arguments
    args = parser.parse_args(argv)
    parts_to_remove = ['sidetable']
    lg.warning("Will remove all parts named %s" % parts_to_remove)

    # read scenes and scenelets
    p_pickle = pjoin(args.d, 'scenes_and_scenelets.pickle')
    if os.path.exists(p_pickle):
        lg.info("reading from %s" % p_pickle)
        scenes, scenelets = pickle_load(open(p_pickle, 'rb'))
        lg.info("read from %s" % p_pickle)
    else:
        scenelets = read_scenelets(args.d)
        scenes = read_scenelets(args.s)
        scenes = {scene.name_scene: scene for scene in scenes}
        pickle.dump((scenes, scenelets), open(p_pickle, 'wb'), protocol=-1)
        lg.info("wrote to %s" % p_pickle)

    # Read characteristicnesses (to put them into the scenelet).
    p_charness = pjoin(args.d, "charness__gaussian.mat")
    pose_charness, scenelet_names = read_charness(p_charness,
                                                  return_hists=False,
                                                  return_names=True)

    # output folder
    d_scenelets_parent = os.path.dirname(args.d)
    d_dest = pjoin(d_scenelets_parent, 'deb',
                   "%s_full_sampling" % args.d.split(os.sep)[-1])
    # makedirs_backed
    if os.path.exists(d_dest):
        i = 0
        while i < 100:
            try:
                os.rename(d_dest, "%s.bak.%02d" % (d_dest, i))
                break
            except OSError:
                i += 1
    os.makedirs(d_dest)

    # _is_close = is_close  # cache namespace lookup
    # processing
    for sclt in scenelets:
        # cache skeleton
        skeleton = sclt.skeleton

        if 'scene09' in sclt.name_scenelet or 'scene10' in sclt.name_scenelet:
            lg.debug("here")
        else:
            continue

        # prune objects
        per_cat = {}
        cnt = 0
        for oid, scene_obj in sclt.objects.items():
            close_, dist = is_close(scene_obj,
                                    skeleton,
                                    args.dist_thresh,
                                    return_dist=True)
            label = scene_obj.label
            if 'chair' in label or 'couch' in label or 'stool' in label:
                label = 'sittable'

            try:
                per_cat[label].append((dist, oid))
            except KeyError:
                per_cat[label] = [(dist, oid)]
            if scene_obj.label != 'floor':
                cnt += 1

        per_cat = {k: sorted(v) for k, v in per_cat.items()}

        name_scene = sclt.name_scene.split('__')[0]
        if '-no-coffeetable' in name_scene:
            name_scene = name_scene[:name_scene.find('-no-coffeetable')]
        scene = scenes[name_scene]

        if 'shelf' not in per_cat:
            for oid, ob in scene.objects.items():
                if ob.label == 'shelf':
                    close_, dist = is_close(ob,
                                            skeleton,
                                            args.dist_thresh,
                                            return_dist=True)
                    oid_ = oid
                    while oid_ in sclt.objects:
                        oid_ += 1
                    sclt.add_object(oid_, ob)
                    cnt += 1
                    try:
                        per_cat['shelf'].append((dist, oid_))
                    except KeyError:
                        per_cat['shelf'] = [(dist, oid_)]

        if 'shelf' in per_cat:
            assert len(per_cat['shelf']) == 1, "TODO: keep all shelves"

        oids_to_keep = [
            v[0][1] for v in per_cat.values() if v[0][0] < args.dist_thresh
        ]

        if not len(oids_to_keep):  # there is always a floor
            lg.warning("Skipping %s, not enough objects: %s" %
                       (sclt.name_scenelet, per_cat))
            continue

        # if 'gates392_mati3_2014-04-30-21-13-46__scenelet_25' \
        #     == sclt.name_scenelet:
        #     lg.debug("here")
        # else:
        #     continue

        # copy skeleton with dense sampling in time
        mn, mx = skeleton.get_frames_min_max()
        # assert mn == 0, "This usually starts indexing from 0, " \
        #                 "no explicit problem, just flagging the change."
        time_mn = floor(skeleton.get_time(mn))
        time_mx = ceil(skeleton.get_time(mx))

        # artificially prolong mocap scenes
        if 'scene' in name_scene and (time_mx - time_mn < 60):
            d = (time_mx - time_mn) // 2 + 1
            time_mn -= d
            time_mx += d
        # lookup original scene name
        # mn_frame_id_scene, mx_frame_id_scene = \
        #     scene.skeleton.get_frames_min_max()

        frame_ids_old = skeleton.get_frames()
        times_old = [skeleton.get_time(fid) for fid in frame_ids_old]
        for frame_id in frame_ids_old:
            skeleton.remove_pose(frame_id)

        for frame_id in range(time_mn, time_mx + 1):
            if not scene.skeleton.has_pose(frame_id):
                continue
            pose = scene.skeleton.get_pose(frame_id=frame_id)
            # scale mocap skeletons
            fw = scene.skeleton.get_forward(frame_id=frame_id,
                                            estimate_ok=False)
            sclt.set_pose(frame_id=frame_id,
                          angles=None,
                          pose=pose,
                          forward=fw,
                          clone_forward=True)

        if 'scene0' in name_scene or 'scene10' in name_scene:
            mx_old = np.max(sclt.skeleton.poses[:, 1, :])
            sclt.skeleton.poses *= 0.8
            mx_new = np.max(sclt.skeleton.poses[:, 1, :])
            sclt.skeleton.poses[1, :] += mx_new - mx_old + 0.05
        _frames = sclt.skeleton.get_frames()

        # check length
        if len(_frames) < args.limit_len:
            lg.warning("Skipping %s, because not enough frames: %s" %
                       (sclt.name_scene, _frames))
            continue

        # save charness
        try:
            id_charness = next(i for i in range(len(scenelet_names))
                               if scenelet_names[i] == sclt.name_scenelet)
            sclt.charness = pose_charness[id_charness]
        except StopIteration:
            lg.error("Something is wrong, can't find %s, %s in charness db "
                     "containing names such as %s." %
                     (sclt.name_scene, sclt.name_scenelet, scenelet_names[0]))
            sclt.charness = 0.4111111

        _mn, _mx = (_frames[0], _frames[-1])
        assert _mn >= time_mn, "not inside? %s < %s" % (_mn, time_mn)
        assert _mx <= time_mx, "not inside? %s < %s" % (_mx, time_mx)
        if len(_frames) < len(frame_ids_old):
            lg.warning("Not more frames than interpolated "
                       "scenelet?\n%s\n%s\n%s" %
                       (_frames, frame_ids_old, times_old))

        oids = list(sclt.objects.keys())
        for oid in oids:
            if oid not in oids_to_keep:
                lg.debug("removed %s" % sclt.objects[oid])
                sclt.objects.pop(oid)
            else:
                obj = sclt.objects[oid]
                part_ids_to_remove = [
                    part_id for part_id, part in obj.parts.items()
                    if part.label in parts_to_remove
                ]
                if len(part_ids_to_remove) == len(obj.parts):
                    sclt.objects.pop(oid)
                else:
                    for part_id in part_ids_to_remove:
                        lg.debug("removed %s" %
                                 sclt.objects[obj].parts[part_id])
                        obj.parts.pop(part_id)
        if len(sclt.objects) < 2 and next(iter(
                sclt.objects.values())).label == 'floor':
            lg.debug("finally removing scenelet: %s" % sclt.objects)
            continue

        # save in the scene folder
        d_dest_scene = pjoin(d_dest, name_scene)
        if not os.path.exists(d_dest_scene):
            os.makedirs(d_dest_scene)
        sclt.save(pjoin(d_dest_scene, "skel_%s" % sclt.name_scenelet))
Exemple #9
0
def optimize_path(skel_ours,
                  skel_ours_2d,
                  images,
                  intrinsics,
                  path_skel,
                  ground_rot,
                  shape_orig=None,
                  use_huber=False,
                  weight_smooth=0.01,
                  show=False,
                  frames_ignore=None,
                  resample=True,
                  depth_init=10.,
                  p_constraints=None,
                  smooth_mode=SmoothMode.ACCEL):
    """Optimize 3D path so that it matches the 2D corresponding observations.

    Args:
        skel_ours (Skeleton):
            3D skeleton from LFD.
        skel_ours_2d (Skeleton):
            2D feature points from LFD.
        images (dict):
            Color images for debug, keyed by frame_ids.
        camera_name (str):
            Initialize intrinsics matrix based on name of camera.
        path_skel (str):
            Path of input file from LFD on disk, used to create paths for
            intermediate result.
        shape_orig (tuple):
            Height and width of original images before LFD scaled them.
        use_huber (bool):
            Deprecated.
        weight_smooth (float):
            Smoothness term weight.
        winsorize_limit (float):
            Outlier detection parameter.
        show (bool):
            Show debug visualizations.
        frames_ignore (set):
            Deprecated.
        resample (bool):
            Fill in missing poses by interpolating using Blender's IK.
        depth_init (float):
            Initial depth for LFD poses.
        p_constraints (str):
            Path to 3D constraints scenelet file.
        smooth_mode (SmoothMode):
            Smooth velocity or acceleration.
    """

    # scale 2D detections to canonical camera coordinates
    np_poses_2d = \
        skel_ours_2d.poses[:, :2, :] \
        - np.expand_dims(intrinsics[:2, 2], axis=1)
    np_poses_2d[:, 0, :] /= intrinsics[0, 0]
    np_poses_2d[:, 1, :] /= intrinsics[1, 1]

    n_frames = skel_ours.poses.shape[0]
    np_translation = np.zeros(shape=(n_frames, 3), dtype=np.float32)
    np_translation[:, 1] = -1.
    np_translation[:, 2] = \
        np.random.uniform(-depth_init * 0.25, depth_init * 0.25,
                          np_translation.shape[0]) \
        + depth_init
    np_rotation = np.zeros(shape=(n_frames, 3), dtype=np.float32)

    frame_ids = np.array(skel_ours.get_frames(), dtype=np.float32)
    np_visibility = skel_ours_2d.get_confidence_matrix(frame_ids=frame_ids,
                                                       dtype='f4')

    if p_constraints is not None:
        sclt_cnstr = Scenelet.load(p_constraints)
        np_cnstr_mask = np.zeros(shape=(len(frame_ids),
                                        Joint.get_num_joints()),
                                 dtype=np.float32)
        np_cnstr = np.zeros(shape=(len(frame_ids), 3, Joint.get_num_joints()),
                            dtype=np.float32)
        for frame_id, confs in sclt_cnstr.confidence.items():
            lin_id = None
            for j, conf in confs.items():
                if conf > 0.5:
                    if lin_id is None:
                        lin_id = next(
                            lin_id_
                            for lin_id_, frame_id_ in enumerate(frame_ids)
                            if frame_id_ == frame_id)
                    np_cnstr_mask[lin_id, j] = conf
                    np_cnstr[lin_id, :, j] = \
                        sclt_cnstr.skeleton.get_joint_3d(
                          joint_id=j, frame_id=frame_id)
    else:
        np_cnstr_mask = None
        np_cnstr = None

    spans = skel_ours.get_actor_empty_frames()
    dt = frame_ids[1:].astype(np.float32) \
         - frame_ids[:-1].astype(np.float32)
    dt_pos_inv = np.reciprocal(dt, dtype=np.float32)
    dt_vel_inv = np.divide(np.float32(2.), dt[1:] + dt[:-1])
    # ensure smoothness weight multipliers are not affected by
    # actor-transitions
    if skel_ours.n_actors > 1 and len(spans):
        for lin_id in range(len(dt)):
            frame_id0 = frame_ids[lin_id]
            frame_id1 = frame_ids[lin_id + 1]
            span = next((span_ for span_ in spans if span_[0] == frame_id0),
                        None)
            if span is not None:
                assert frame_id1 == span[1], "No"
                dt[lin_id] = 0.
                dt_pos_inv[lin_id] = 0.
                dt_vel_inv[lin_id] = 0.
                dt_vel_inv[lin_id - 1] = 1. / dt[lin_id - 1]

    forwards = np.array([
        skel_ours.get_forward(frame_id, estimate_ok=True, k=0)
        for frame_id in skel_ours.get_frames()
    ])
    # from alignment import get_angle
    # xs = np.hstack((
    # np.ones(shape=(len(forwards), 1)),
    # np.zeros(shape=(len(forwards), 2))
    # ))
    # print(xs.shape)
    print(forwards.shape)
    unit_x = np.array((1., 0., 0.))
    np_angles = [-np.arctan2(forward[2], forward[0]) for forward in forwards]
    print(forwards, np_angles)
    # ank_diff = \
    #     np.exp(
    #        -2. * np.max(
    #           [
    #               np.linalg.norm(
    #                  (skel_ours.poses[1:, :, joint]
    #                   - skel_ours.poses[:-1, :, joint]).T
    #                  * dt_pos_inv, axis=0
    #               ).astype(np.float32)
    #               for joint in {Joint.LANK, Joint.RANK}
    #           ],
    #           axis=0
    #        )
    #     )
    # assert ank_diff.shape == (skel_ours.poses.shape[0]-1,), \
    #     "Wrong shape: %s" % repr(ank_diff.shape)

    # cam_angle = [np.deg2rad(-8.)]
    assert np.isclose(ground_rot[1], 0.) and np.isclose(ground_rot[2], 0.), \
        "Assumed only x rotation"
    # assert ground_rot[0] <= 0, "Negative means looking down, why looknig up?"
    cam_angle = [np.deg2rad(ground_rot[0])]
    # assert False, "Fixed angle!"
    device_name = '/gpu:0' if tf.test.is_gpu_available() else '/cpu:0'
    devices = {device_name}
    for device in devices:
        with Timer(device, verbose=True):
            graph = tf.Graph()
            with graph.as_default(), tf.device(device):
                tf_visibility = tf.Variable(np.tile(np_visibility, (1, 2, 1)),
                                            name='visibility',
                                            trainable=False,
                                            dtype=tf.float32)
                tf_dt_pos_inv = \
                    tf.Variable(np.tile(dt_pos_inv, (1, 3)).reshape(-1, 3),
                                name='dt_pos_inv', trainable=False,
                                dtype=tf.float32)
                tf_dt_vel_inv = \
                    tf.constant(np.tile(dt_vel_inv, (1, 3)).reshape(-1, 3),
                                name='dt_vel_inv', dtype=tf.float32)

                # input data
                pos_3d_in = tf.Variable(skel_ours.poses.astype(np.float32),
                                        trainable=False,
                                        name='pos_3d_in',
                                        dtype=tf.float32)
                pos_2d_in = tf.Variable(np_poses_2d.astype(np.float32),
                                        trainable=False,
                                        name='pos_2d_in',
                                        dtype=tf.float32)

                params_camera = tf.Variable(initial_value=cam_angle,
                                            dtype=tf.float32,
                                            trainable=True)

                cam_sn = tf.sin(params_camera)
                cam_cs = tf.cos(params_camera)
                transform_camera = tf.reshape(tf.stack([
                    1., 0., 0., 0., 0., cam_cs[0], cam_sn[0], 0., 0.,
                    -cam_sn[0], cam_cs[0], 0., 0., 0., 0., 1.
                ],
                                                       axis=0),
                                              shape=(4, 4))

                # 3D translation
                translation = tf.Variable(np_translation, name='translation')
                # 3D rotation (Euler XYZ)
                rotation = tf.Variable(np_rotation, name='rotation')
                fw_angles = tf.Variable(np_angles, name='angles')

                # rotation around y
                my_zeros = tf.zeros((n_frames, 1))
                my_ones = tf.ones((n_frames, 1))
                c = tf.cos(tf.slice(rotation, [0, 1], [n_frames, 1]))
                s = tf.sin(tf.slice(rotation, [0, 1], [n_frames, 1]))
                t0 = tf.concat([c, my_zeros, -s, my_zeros], axis=1)
                t1 = tf.concat([my_zeros, my_ones, my_zeros, my_zeros], axis=1)
                t2 = tf.concat([s, my_zeros, c, my_zeros], axis=1)
                t3 = tf.concat([my_zeros, my_zeros, my_zeros, my_ones], axis=1)
                transform = tf.stack([t0, t1, t2, t3],
                                     axis=2,
                                     name="transform")

                transform = tf.einsum('ij,ajk->aik', transform_camera,
                                      transform)[:, :3, :3]

                # transform to 3d
                pos_3d = tf.matmul(transform, pos_3d_in) \
                    + tf.tile(tf.expand_dims(translation, 2),
                              [1, 1, int(pos_3d_in.shape[2])])

                # constraints
                loss_cnstr = None
                if np_cnstr is not None:
                    constraints = tf.Variable(np_cnstr,
                                              trainable=False,
                                              name='constraints',
                                              dtype=tf.float32)
                    constraints_mask = tf.Variable(np_cnstr_mask,
                                                   trainable=False,
                                                   name='constraints_mask',
                                                   dtype=tf.float32)
                    cnstr_diff = tf.reduce_sum(tf.squared_difference(
                        pos_3d, constraints),
                                               axis=1,
                                               name='constraints_difference')
                    cnstr_diff_masked = tf.multiply(
                        constraints_mask,
                        cnstr_diff,
                        name='constraints_difference_masked')
                    loss_cnstr = tf.reduce_sum(cnstr_diff_masked,
                                               name='constraints_loss')

                # perspective divide
                pos_2d = tf.divide(
                    tf.slice(pos_3d, [0, 0, 0], [n_frames, 2, -1]),
                    tf.slice(pos_3d, [0, 2, 0], [n_frames, 1, -1]))

                if use_huber:
                    diff = huber_loss(pos_2d_in, pos_2d, 1.)
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("Doing huber on reprojection, NOT translation")
                else:
                    # re-projection loss
                    diff = pos_2d - pos_2d_in
                    # mask loss by 2d key-point visibility
                    masked = diff * tf_visibility
                    loss_reproj = tf.nn.l2_loss(masked)
                    lg.info("NOT doing huber")

                sys.stderr.write(
                    "TODO: Move huber to translation, not reconstruction\n")

                # translation smoothness
                dx = tf.multiply(
                    x=0.5,
                    y=tf.add(
                        pos_3d[1:, :, Joint.LHIP] - pos_3d[:-1, :, Joint.LHIP],
                        pos_3d[1:, :, Joint.RHIP] - pos_3d[:-1, :, Joint.RHIP],
                    ),
                    name="average_hip_displacement_3d")
                tf_velocity = tf.multiply(dx, tf_dt_pos_inv)

                tf_acceleration_z = tf.multiply(x=dx[1:, 2:3] - dx[:-1, 2:3],
                                                y=tf_dt_vel_inv[:, 2:3],
                                                name="acceleration_z")

                if smooth_mode == SmoothMode.VELOCITY:
                    # if GT, use full smoothness to fix 2-frame flicker
                    if np_cnstr is not None:
                        print('Smoothing all velocity!')
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity)
                    else:  # Normal mode, don't oversmooth screen-space
                        loss_transl_smooth = \
                            weight_smooth * tf.nn.l2_loss(tf_velocity[:, 2:3])
                elif smooth_mode == SmoothMode.ACCEL:
                    loss_transl_smooth = \
                        weight_smooth * tf.nn.l2_loss(tf_acceleration_z)
                else:
                    raise RuntimeError(
                        'Unknown smooth mode: {}'.format(smooth_mode))

                if show:
                    sqr_accel_z = weight_smooth * tf.square(tf_acceleration_z)

                if weight_smooth > 0.:
                    lg.info("Smoothing in time!")
                    loss = loss_reproj + loss_transl_smooth
                else:
                    lg.warning("Not smoothing!")
                    loss = loss_reproj

                if loss_cnstr is not None:
                    loss += 1000 * loss_cnstr

                # hip0 = tf.nn.l2_normalize(pos_3d[:-1, :, Joint.RHIP] - pos_3d[:-1, :, Joint.LHIP])
                # hip1 = tf.nn.l2_normalize(pos_3d[1:, :, Joint.RHIP] - pos_3d[1:, :, Joint.RHIP])
                # dots = tf.reduce_sum(tf.multiply(hip0, hip1), axis=1)
                # print(dots)
                # loss_dot = tf.nn.l2_loss(1. - dots)
                # loss_ang = fw_angles + rotation[:, 1]
                # print(loss_ang)
                # loss_ang = tf.square(loss_ang[1:] - loss_ang[:-1])
                # print(loss_ang)
                # two_pi_sqr = tf.constant((2. * 3.14159)**2., dtype=tf.float32)
                # print(two_pi_sqr)
                # loss_ang = tf.reduce_mean(tf.where(loss_ang > two_pi_sqr, loss_ang - two_pi_sqr, loss_ang))
                # print(loss_ang)
                # loss += loss_ang

                #
                # optimize
                #
                optimizer = ScipyOptimizerInterface(
                    loss,
                    var_list=[translation, rotation],
                    options={'gtol': 1e-12},
                    var_to_bounds={rotation: (-np.pi / 2., np.pi / 2.)})

            with tf.Session(graph=graph) as session:
                session.run(tf.global_variables_initializer())

                optimizer.minimize(session)
                np_pos_3d_out, np_pos_2d_out, np_transl_out, np_masked, \
                np_acceleration, np_loss_transl_smooth, np_dt_vel = \
                    session.run([pos_3d, pos_2d, translation, masked,
                                 tf_acceleration_z, loss_transl_smooth,
                                 tf_dt_vel_inv])
                if show:
                    o_sqr_accel_z = session.run(sqr_accel_z)
                o_vel = session.run(tf_velocity)
                o_dx = session.run(dx)
                o_rot = session.run(rotation)
                # o_dx, o_dx2 = session.run([accel_bak, acceleration2])
                # assert np.allclose(o_dx, o_dx2), "no"
                o_cam = session.run(fetches=[params_camera])
                print("camera angle: %s" % np.rad2deg(o_cam[0]))
                # o_losses = session.run([loss_reproj, loss_transl_smooth, loss_dot, loss_ang])
                o_losses = session.run([loss_reproj, loss_transl_smooth])
                print('losses: {}'.format(o_losses))
                # o_dots = session.run(dots)
                # with open('tmp/dots.txt', 'w') as fout:
                #     fout.write('\n'.join((str(e) for e in o_dots.tolist())))

    fixed_frames = []
    # for lin_frame_id in range(np_transl_out.shape[0]):
    #     if np_transl_out[lin_frame_id, 2] < 0.:
    #         print("Correcting frame_id %d: %s"
    #               % (skel_ours.get_lin_id_for_frame_id(lin_frame_id),
    #                  np_transl_out[lin_frame_id, :]))
    #         if lin_frame_id > 0:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id-1, :]
    #         else:
    #             np_transl_out[lin_frame_id, :] = np_transl_out[lin_frame_id+1, :]
    #         fixed_frames.append(lin_frame_id)

    # debug_forwards(skel_ours.poses, np_pos_3d_out, o_rot, forwards, np_angles)

    # z_jumps = np_pos_3d_out[1:, 2, Joint.PELV] - np_pos_3d_out[:-1, 2, Joint.PELV]
    # out = scipy.stats.mstats.winsorize(z_jumps, limits=1.)
    # plt.figure()
    # plt.plot(pos_3d[:, 2, Joint.PELV])
    # plt.show()
    # sys.exit(0)
    # diff = np.linalg.norm(out - displ, axis=1)
    if len(fixed_frames):
        print("Re-optimizing...")
        with tf.Session(graph=graph) as session:
            np_pos_3d_out, np_pos_2d_out, np_transl_out = \
                session.run(fetches=[pos_3d, pos_2d, translation],
                            feed_dict={transform: np_transl_out})

    if show:
        lim_fr = [105, 115, 135]
        fig = plt.figure()
        accel_thr = 0.  # np.percentile(o_sqr_accel_z, 25)

        ax = plt.subplot2grid((2, 2), (0, 0), colspan=2)
        # print("np_masked:%s" % np_masked)
        # plt.plot(np_masked[:, )
        ax.plot(np.linalg.norm(np_acceleration[lim_fr[0]:lim_fr[1]], axis=1),
                '--o',
                label='accel')
        ax.add_artist(Line2D([0, len(o_sqr_accel_z)], [accel_thr, accel_thr]))
        # plt.plot(np_dt_vel[:, 0], label='dt velocity')
        # plt.plot(np.linalg.norm(np_f_accel, axis=1), '--x', label='f_accel')
        # plt.plot(ank_diff, label='ank_diff')
        ax.plot(o_sqr_accel_z[lim_fr[0]:lim_fr[1] + 1],
                '--x',
                label='loss accel_z')
        ax.legend()

        ax2 = plt.subplot2grid((2, 2), (1, 0), aspect='equal')
        ax2.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, vel in enumerate(o_vel):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue

            p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
            p1 = np_pos_3d_out[i, [0, 2], Joint.PELV]
            ax2.annotate(
                "%f = ((%g - %g) + (%g - %g)) * %g = %g" %
                (vel[2], np_pos_3d_out[i + 1, 2, Joint.LHIP],
                 np_pos_3d_out[i, 2, Joint.LHIP], np_pos_3d_out[i + 1, 2,
                                                                Joint.RHIP],
                 np_pos_3d_out[i, 2, Joint.RHIP], np_dt_vel[i, 2], o_dx[i, 2]),
                xy=((p0[0] + p1[0]) / 2., (p0[1] + p1[1]) / 2.))
        ax2.set_title('velocities')

        ax1 = plt.subplot2grid((2, 2), (1, 1), aspect='equal')
        ax1.plot(np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 0, Joint.PELV],
                 np_pos_3d_out[lim_fr[0]:lim_fr[1] + 1, 2, Joint.PELV], '--x')
        for i, lacc in enumerate(o_sqr_accel_z):
            if not (lim_fr[0] <= i <= lim_fr[1]):
                continue
            if lacc > accel_thr:
                p0 = np_pos_3d_out[i + 1, [0, 2], Joint.PELV]
                ax1.annotate("%.3f" % np_acceleration[i], xy=(p0[0], p0[1]))
                ax.annotate("%.3f" % np.log10(lacc),
                            xy=(i - lim_fr[0], abs(np_acceleration[i])))
        ax1.set_title('accelerations')

        plt.show()

    np.set_printoptions(linewidth=200)
    np_pos_2d_out[:, 0, :] *= intrinsics[0, 0]
    np_pos_2d_out[:, 1, :] *= intrinsics[1, 1]
    np_pos_2d_out[:, 0, :] += intrinsics[0, 2]
    np_pos_2d_out[:, 1, :] += intrinsics[1, 2]

    np_poses_2d[:, 0, :] *= intrinsics[0, 0]
    np_poses_2d[:, 1, :] *= intrinsics[1, 1]
    np_poses_2d[:, 0, :] += intrinsics[0, 2]
    np_poses_2d[:, 1, :] += intrinsics[1, 2]

    out_images = {}
    if shape_orig is not None:
        frames_2d = skel_ours_2d.get_frames()
        for frame_id2 in frames_2d:
            try:
                lin_frame_id = skel_ours_2d.get_lin_id_for_frame_id(frame_id2)
            except KeyError:
                lin_frame_id = None
            frame_id = skel_ours_2d.mod_frame_id(frame_id=frame_id2)

            im = None
            if frame_id in out_images:
                im = out_images[frame_id]
            elif len(images):
                if frame_id not in images:
                    lg.warning("Not enough images, the video was probably cut "
                               "after LiftingFromTheDeep was run.")
                    continue
                im = copy.deepcopy(images[frame_id])
                im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
            else:
                im = np.zeros(
                    (shape_orig[0].astype(int), shape_orig[1].astype(int), 3),
                    dtype='i1')
            if lin_frame_id is not None:
                for jid in range(np_pos_2d_out.shape[2]):
                    if skel_ours_2d.is_visible(frame_id2, jid):
                        p2d = tuple(np_pos_2d_out[lin_frame_id, :,
                                                  jid].astype(int).tolist())
                        p2d_det = tuple(np_poses_2d[lin_frame_id, :,
                                                    jid].astype(int).tolist())
                        cv2.line(im,
                                 p2d,
                                 p2d_det,
                                 color=(100, 100, 100),
                                 thickness=3)
                        cv2.circle(im,
                                   p2d,
                                   radius=3,
                                   color=(0, 0, 200),
                                   thickness=-1)
                        cv2.circle(im,
                                   p2d_det,
                                   radius=3,
                                   color=(0, 200, 0),
                                   thickness=-1)
            out_images[frame_id] = im
            # cv2.imshow("Out", im)
            # cv2.waitKey(50)

        if False:
            # visualize
            fig = plt.figure()
            ax = fig.gca(projection='3d')
            for frame_id in range(0, np_pos_3d_out.shape[0], 1):
                j = Joint.PELV
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
            # smallest = np_pos_3d_out.min()
            # largest = np_pos_3d_out.max()
            ax.set_xlim3d(-5., 5.)
            ax.set_xlabel('x')
            ax.set_ylim3d(-5., 5.)
            ax.set_ylabel('y')
            ax.set_zlim3d(-5., 5.)
            ax.set_zlabel('z')

    if False:
        # visualize
        fig = plt.figure()
        ax = fig.gca(projection='3d')
        for frame_id in range(0, np_pos_3d_out.shape[0], 1):
            for j in range(np_pos_3d_out.shape[2]):
                ax.scatter(np_pos_3d_out[frame_id, 0, j],
                           np_pos_3d_out[frame_id, 2, j],
                           -np_pos_3d_out[frame_id, 1, j],
                           marker='o')
        # smallest = np_pos_3d_out.min()
        # largest = np_pos_3d_out.max()
        ax.set_xlim3d(-5., 5.)
        ax.set_xlabel('x')
        ax.set_ylim3d(-5., 5.)
        ax.set_ylabel('y')
        ax.set_zlim3d(-5., 5.)
        ax.set_zlabel('z')
    plt.show()

    assert all(a == b
               for a, b in zip(skel_ours.poses.shape, np_pos_3d_out.shape)), \
        "no"
    skel_ours.poses = np_pos_3d_out
    return skel_ours, out_images, intrinsics
Exemple #10
0
def main(argv):
    from imapper.pose.opt_consistent import main as opt_consistent
    pjoin = os.path.join
    parser = argparse.ArgumentParser("Fit full video")
    parser.add_argument("video", type=argparse_check_exists, help="Input path")
    parser.add_argument("step_size", type=int, help="Stepsize in frames.")
    parser.add_argument("window_size", type=int, help="Window size in frames.")
    parser.add_argument('--wp',
                        type=float,
                        help="Projection weight.",
                        default=1.)
    parser.add_argument('--ws',
                        type=float,
                        help="Smoothness weight.",
                        default=.1)
    parser.add_argument('--wo',
                        type=float,
                        help="Occlusion weight.",
                        default=0.1)
    parser.add_argument('--wi',
                        type=float,
                        help="Intersection weight.",
                        default=1.
                        # used to be 10.
                        )
    parser.add_argument('--gtol',
                        type=float,
                        help="Optimizer gradient tolerance (termination "
                        "criterion).",
                        default=1e-6)
    parser.add_argument('--maxiter',
                        type=int,
                        help="Optimizer max number of iterations.",
                        default=0)
    parser.add_argument('-w-occlusion',
                        action='store_true',
                        help="Estimate occlusion score.")
    parser.add_argument('-no-isec',
                        action='store_true',
                        help='Don\'t use intersection terms')
    parser.add_argument('--dest-dir',
                        type=str,
                        help="Name of subdirectory to save output to.",
                        default='opt1')
    parser.add_argument("-s",
                        "--d-scenelets",
                        dest='s',
                        type=argparse_check_exists,
                        help="Folder containing original PiGraphs scenelets")
    parser.add_argument('--batch-size',
                        type=int,
                        help="How many scenelets to optimize at once.",
                        default=1500)
    parser.add_argument('--output-n',
                        type=int,
                        help="How many candidates to output per batch and "
                        "overall.",
                        default=200)
    parser.add_argument('--filter-same-scene',
                        action='store_true',
                        help="Hold out same scene scenelets.")
    args = parser.parse_args(argv)

    # get video parent directory
    d_query = args.video if os.path.isdir(args.video) \
        else os.path.dirname(args.video)

    # save call log to video directory
    with open(pjoin(d_query, 'args_opt_consistent.txt'), 'a') as f_args:
        f_args.write('(python3 ')
        f_args.write(" ".join(sys.argv))
        f_args.write(")\n")

    # parse video path
    name_query = os.path.split(d_query)[-1]
    p_query = pjoin(d_query, "skel_%s_unannot.json" % name_query) \
        if os.path.isdir(args.video) else args.video
    assert p_query.endswith('.json'), "Need a skeleton file"
    print("name_query: %s" % name_query)

    cache_scenes = None

    skipped = []
    # load initial video path (local poses)
    query = Scenelet.load(p_query, no_obj=True)
    frame_ids = query.skeleton.get_frames()
    half_window_size = args.window_size // 2
    for mid_frame_id in range(frame_ids[0] + half_window_size,
                              frame_ids[-1] - half_window_size + 1,
                              args.step_size):
        gap = (mid_frame_id - half_window_size,
               mid_frame_id + half_window_size)
        assert gap[0] >= frame_ids[0]
        assert gap[1] <= frame_ids[-1]
        pose_count = sum(1 for _frame_id in range(gap[0], gap[1] + 1)
                         if query.skeleton.has_pose(_frame_id))
        if pose_count < 9:
            print("Skipping span because not enough poses: %s" % pose_count)
            skipped.append((gap, pose_count))
        same_actor = query.skeleton.n_actors == 1  # type: bool
        if not same_actor:
            same_actor = query.skeleton.get_actor_id(frame_id=gap[0]) \
                         == query.skeleton.get_actor_id(frame_id=gap[1])
        if not same_actor:
            print('skipping gap {:d}...{:d}, not same actor'.format(
                gap[0], gap[1]))
            continue

        lg.info("gap: %s" % repr(gap))
        argv = [
            '-silent',
            '--wp',
            "%g" % args.wp,
            '--ws',
            "%g" % args.ws,
            '--wo',
            "%g" % args.wo,
            '--wi',
            "%g" % args.wi,
            '--nomocap',  # added 16/4/2018
            '-v',
            args.video,
            '--output-n',
            "%d" % args.output_n
        ]
        if args.w_occlusion:
            argv.extend(['-w-occlusion'])
        if args.no_isec:
            argv.extend(['-no-isec'])

        if args.filter_same_scene:
            argv.extend(['--filter-scenes', name_query.partition('_')[0]])
        # else:
        #     assert False, "crossvalidation assumed"

        if args.maxiter:
            argv.extend(['--maxiter', "%d" % args.maxiter])

        argv.extend([
            'independent', '-s', args.s, '--gap',
            "%d" % gap[0],
            "%d" % gap[1], '--dest-dir', args.dest_dir, '-tc', '-0.1',
            '--batch-size',
            "%d" % args.batch_size
        ])
        lg.info("argv: %s" % argv)

        # if 'once' not in locals():
        try:
            _cache_scenes = opt_consistent(argv, cache_scenes)
            if isinstance(_cache_scenes, list) and len(_cache_scenes) \
                and (cache_scenes is None
                     or len(_cache_scenes) != len(cache_scenes)):
                cache_scenes = _cache_scenes
        except FileNotFoundError as e:
            lg.error("e: %s" % e)
            if e.__str__().endswith('_2d_00.json\''):
                from imapper.pose.main_denis import main as opt0
                argv_opt0 = [
                    's8', '-d',
                    "%s/denis" % d_query, '-smooth', '0.005'
                ]
                opt0(argv_opt0)
            else:
                print(e.__str__())

            opt_consistent(argv)

        # once = True
    show_folder([args.video])

    extract_gaps([args.video, args.s])

    print("skipped: %s" % skipped)