Ejemplo n.º 1
0
    def fn(elems):
        dm, pose, cfg, com = elems[0], elems[1], elems[2], elems[3]
        # random rotation
        angle = tf.random_uniform((1, ), -1 * np.pi, np.pi)
        rot_dm = tf.contrib.image.rotate(dm, angle)

        uv_com = xyz2uvd_op(com, cfg)
        uvd_pt = xyz2uvd_op(pose, cfg) - tf.tile(uv_com, [jnt_num])
        cost, sint = tf.cos(angle)[0], tf.sin(angle)[0]
        rot_mat = tf.stack([cost, -sint, 0, sint, cost, 0, 0.0, 0.0, 1.0],
                           axis=0)
        rot_mat = tf.reshape(rot_mat, (3, 3))

        uvd_pt = tf.reshape(uvd_pt, (-1, 3))
        rot_pose = tf.reshape(tf.matmul(uvd_pt, rot_mat), (-1, ))

        # random elongate x,y edge
        edge_ratio = tf.clip_by_value(tf.random_normal((2, ), 1.0, 0.2), 0.9,
                                      1.1)
        target_height = tf.to_int32(
            tf.to_float(tf.shape(dm)[0]) * edge_ratio[0])
        target_width = tf.to_int32(
            tf.to_float(tf.shape(dm)[1]) * edge_ratio[1])
        # 1 stands for nearest neighour interpolation
        rot_dm = tf.image.resize_images(rot_dm, (target_height, target_width),
                                        1)
        rot_dm = tf.image.resize_image_with_crop_or_pad(
            rot_dm,
            tf.shape(dm)[0],
            tf.shape(dm)[1])
        rot_pose = tf.multiply(
            rot_pose, tf.tile([edge_ratio[1], edge_ratio[0], 1.0], [jnt_num]))

        rot_pose = rot_pose + tf.tile(uv_com, [jnt_num])
        rot_pose = uvd2xyz_op(rot_pose, cfg)
        rot_pose = tf.reshape(rot_pose, (-1, ))
        return [rot_dm, rot_pose, cfgs, coms]
        def fn(elems):
            xyz_pt, com, cfg, hm, dm = elems[0], elems[1], elems[2], elems[
                3], elems[4]

            xx, yy, zz = tf.unstack(tf.reshape(xyz_pt, (-1, 3)), axis=-1)
            xyz_pt = tf.reshape(xyz_pt, (-1, ))

            xyz_pt = tf.multiply(xyz_pt,
                                 data.preprocess.POSE_NORM_RATIO) + tf.tile(
                                     com, [jnt_num * pnt_num])
            xyz_pt = tf.reshape(xyz_pt, (-1, 3))

            w_ratio = cfg[4] / out_w
            h_ratio = cfg[5] / out_h
            new_cfg = CameraConfig(cfg[0] / w_ratio, cfg[1] / h_ratio,
                                   cfg[2] / w_ratio, cfg[3] / h_ratio, out_w,
                                   out_h)
            uvd_pt = xyz2uvd_op(xyz_pt, new_cfg)
            uvd_pt = tf.reshape(uvd_pt, (-1, 3))
            uu, vv, dd = tf.unstack(uvd_pt, axis=-1)
            uu = tf.to_int32(uu + 0.5)
            vv = tf.to_int32(vv + 0.5)
            jj = tf.tile(tf.expand_dims(tf.range(jnt_num), axis=-1),
                         [1, pnt_num])
            jj = tf.reshape(jj, (-1, ))

            indices = tf.stack([vv, uu, jj], axis=-1)
            weights = tf.gather_nd(hm, indices)
            weights = tf.reshape(weights, (jnt_num, pnt_num, 1))

            #we also clip the values of depth
            dm = tf.squeeze(dm)
            dm = tf.divide(
                dm * data.preprocess.D_RANGE - data.preprocess.D_RANGE * 0.5,
                data.preprocess.POSE_NORM_RATIO)
            indices = tf.stack([vv, uu], axis=-1)
            od = tf.gather_nd(dm, indices)
            zz = tf.maximum(zz, od)
            xyz_pt = tf.stack([xx, yy, zz], axis=-1)
            xyz_pt = tf.reshape(xyz_pt, (jnt_num, pnt_num, 3))

            return [weights, xyz_pt, cfg, hm, dm]
Ejemplo n.º 3
0
def crop_from_xyz_pose(dm, pose, cfg, out_w, out_h, pad=20.0):
    '''crop depth map by generate the bounding box according to the pose
    Args:
        dms: depth map
        poses: either estimated or groundtruth in xyz coordinate
        cfg: the initial camera configuration
        out_w: output width
        out_h: output height
    Returns:
        crop_dm: the cropped depth map
        new_cfg: the new camera configuration for the cropped depth map
    '''
    with tf.name_scope('crop'):
        # determine bouding box from pose
        in_h, in_w = dm.get_shape()[0].value, dm.get_shape()[1].value
        uvd_pose = tf.reshape(xyz2uvd_op(pose, cfg), (-1, 3))
        min_coor = tf.reduce_min(uvd_pose, axis=0)
        max_coor = tf.reduce_max(uvd_pose, axis=0)

        top = tf.minimum(tf.maximum(min_coor[1] - pad, 0.0), cfg.h - 2 * pad)
        left = tf.minimum(tf.maximum(min_coor[0] - pad, 0.0), cfg.w - 2 * pad)
        bottom = tf.maximum(tf.minimum(max_coor[1] + pad, cfg.h),
                            tf.cast(top, tf.float32) + 2 * pad - 1)
        right = tf.maximum(tf.minimum(max_coor[0] + pad, cfg.w),
                           tf.cast(left, tf.float32) + 2 * pad - 1)

        top = tf.cast(top, tf.int32)
        left = tf.cast(left, tf.int32)
        bottom = tf.cast(bottom, tf.int32)
        right = tf.cast(right, tf.int32)

        cropped_dm = tf.image.crop_to_bounding_box(dm,
                                                   offset_height=top,
                                                   offset_width=left,
                                                   target_height=bottom - top,
                                                   target_width=right - left)

        longer_edge = tf.maximum(bottom - top, right - left)
        offset_height = tf.to_int32(tf.divide(longer_edge - bottom + top, 2))
        offset_width = tf.to_int32(tf.divide(longer_edge - right + left, 2))
        cropped_dm = tf.image.pad_to_bounding_box(cropped_dm,
                                                  offset_height=offset_height,
                                                  offset_width=offset_width,
                                                  target_height=longer_edge,
                                                  target_width=longer_edge)
        cropped_dm = tf.image.resize_images(cropped_dm, (out_h, out_w))

        # to further earse the background
        uvd_list = tf.unstack(uvd_pose, axis=-1)

        uu = tf.clip_by_value(tf.to_int32(uvd_list[0]), 0, in_w - 1)
        vv = tf.clip_by_value(tf.to_int32(uvd_list[1]), 0, in_h - 1)

        dd = tf.gather_nd(dm, tf.stack([vv, uu], axis=-1))
        dd = tf.boolean_mask(dd, tf.greater(dd, 100))
        d_th = tf.reduce_min(dd) + 250.0
        if FLAGS.dataset == 'icvl':
            cropped_dm = tf.where(tf.less(cropped_dm, 500.0), cropped_dm,
                                  tf.zeros_like(cropped_dm))
        else:
            cropped_dm = tf.where(tf.less(cropped_dm, d_th), cropped_dm,
                                  tf.zeros_like(cropped_dm))

    with tf.name_scope('cfg'):
        ratio_x = tf.cast(longer_edge / out_w, tf.float32)
        ratio_y = tf.cast(longer_edge / out_h, tf.float32)
        top = tf.cast(top, tf.float32)
        left = tf.cast(left, tf.float32)

        new_cfg = tf.stack([
            cfg.fx / ratio_x, cfg.fy / ratio_y,
            (cfg.cx - left + tf.to_float(offset_width)) / ratio_x,
            (cfg.cy - top + tf.to_float(offset_height)) / ratio_y,
            tf.cast(out_w, tf.float32),
            tf.cast(out_h, tf.float32)
        ],
                           axis=0)
    return [cropped_dm, pose, new_cfg]