Ejemplo n.º 1
0
im_ids = sorted(
    [int(e.split('info_')[1].split('.txt')[0]) for e in rgb_fpaths])

scene_gt = {}
for obj_name in sorted(obj_names_id_map.keys()):

    # Load object model
    obj_id = obj_names_id_map[obj_name]
    model = inout.load_ply(model_mpath.format(obj_id))

    # Transformation which was applied to the object models (its inverse will
    # be applied to the GT poses):
    # 1) Translate the bounding box center to the origin - Brachmann et al.
    # already translated the bounding box to the center
    # 2) Rotate around Y axis by pi + flip for some objects
    R_model = transform.rotation_matrix(math.pi, [0, 1, 0])[:3, :3]

    # Extra rotation around Z axis by pi for some models
    if hinter_flip.obj_flip_z[obj_id]:
        R_z = transform.rotation_matrix(math.pi, [0, 0, 1])[:3, :3]
        R_model = R_z.dot(R_model)

    # The ground truth poses of Brachmann et al. are related to a different
    # model coordinate system - to get the original Hinterstoisser's orientation
    # of the objects, we need to rotate by pi/2 around X and by pi/2 around Z
    R_z_90 = transform.rotation_matrix(-math.pi * 0.5, [0, 0, 1])[:3, :3]
    R_x_90 = transform.rotation_matrix(-math.pi * 0.5, [1, 0, 0])[:3, :3]
    R_conv = np.linalg.inv(R_model.dot(R_z_90.dot(R_x_90)))

    for im_id in im_ids:
        if im_id % 10 == 0:
    # Get list of image IDs
    color_fpaths = glob.glob(rgb_in_mpath.format(scene_id, '*'))
    im_ids = sorted(
        [int(e.split('color')[1].split('.jpg')[0]) for e in color_fpaths])

    # Load object model
    obj_id = scene_id  # The object id is the same as scene id for this dataset
    model = inout.load_ply(model_mpath.format(obj_id))

    # Transformation which was applied to the object models (its inverse will
    # be applied to the GT poses):
    # 1) Translate the bounding box center to the origin
    # 2) Rotate around Y axis by pi + flip for some objects
    t_model = bbox_cens[obj_id - 1, :].reshape((3, 1))
    R_model = transform.rotation_matrix(math.pi, [0, 1, 0])[:3, :3]

    # Extra rotation around Z axis by pi for some models
    if hinter_flip.obj_flip_z[obj_id]:
        R_z = transform.rotation_matrix(math.pi, [0, 0, 1])[:3, :3]
        R_model = R_z.dot(R_model)

    R_model_inv = np.linalg.inv(R_model)

    for im_id in im_ids:
        if im_id % 10 == 0:
            print('scene,view: ' + str(scene_id) + ',' + str(im_id))

        # Load the RGB and depth image
        rgb = inout.load_im(rgb_in_mpath.format(scene_id, im_id))
        depth = load_hinter_depth(depth_in_mpath.format(scene_id, im_id))
Ejemplo n.º 3
0
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            ret[i,j,:] = image[image.shape[0]-i-1, image.shape[1]-j-1,:]

    return ret

# tuning pose
tx = -.001 # mm
ty = -.005 
tz = -.001
rz = 2./180.*math.pi # rad

xaxis, yaxis, zaxis = [1, 0, 0], [0, 1, 0], [0, 0, 1]
Tt = tf.translation_matrix([tx, ty, tz])
Rt = tf.rotation_matrix(rz, zaxis)

TT = np.eye(4)
TT[:3,:3] = Rt[:3,:3]
TT[:3,3] = Tt[:3,3]

# print('Tt = ')
# print(Tt)
# print('Rt = ')
# print(Rt)
print('TT = ')
print(TT)
# TT1 = np.dot(Tt,Rt)
# print('TT1 = ')
# print(TT1)
Ejemplo n.º 4
0
def sample_views(min_n_views,
                 radius=1,
                 azimuth_range=(0, 2 * math.pi),
                 elev_range=(-0.5 * math.pi, 0.5 * math.pi)):
    '''
    Viewpoint sampling from a view sphere.

    :param min_n_views: Minimum required number of views on the whole view sphere.
    :param radius: Radius of the view sphere.
    :param azimuth_range: Azimuth range from which the viewpoints are sampled.
    :param elev_range: Elevation range from which the viewpoints are sampled.
    :return: List of views, each represented by a 3x3 rotation matrix and
             a 3x1 translation vector.
    '''

    # Get points on a sphere
    if True:
        pts, pts_level = hinter_sampling(min_n_views, radius=radius)
    else:
        pts = fibonacci_sampling(min_n_views + 1, radius=radius)
        pts_level = [0 for _ in range(len(pts))]

    views = []
    for pt in pts:
        # Azimuth from (0, 2 * pi)
        azimuth = math.atan2(pt[1], pt[0])
        if azimuth < 0:
            azimuth += 2.0 * math.pi

        # Elevation from (-0.5 * pi, 0.5 * pi)
        a = np.linalg.norm(pt)
        b = np.linalg.norm([pt[0], pt[1], 0])
        elev = math.acos(b / a)
        if pt[2] < 0:
            elev = -elev

        # if hemisphere and (pt[2] < 0 or pt[0] < 0 or pt[1] < 0):
        if not (azimuth_range[0] <= azimuth <= azimuth_range[1]
                and elev_range[0] <= elev <= elev_range[1]):
            continue

        # Rotation matrix
        # The code was adopted from gluLookAt function (uses OpenGL coordinate system):
        # [1] http://stackoverflow.com/questions/5717654/glulookat-explanation
        # [2] https://www.opengl.org/wiki/GluLookAt_code
        f = -np.array(pt)  # Forward direction
        f /= np.linalg.norm(f)
        u = np.array([0.0, 0.0, 1.0])  # Up direction
        s = np.cross(f, u)  # Side direction
        if np.count_nonzero(s) == 0:
            # f and u are parallel, i.e. we are looking along or against Z axis
            s = np.array([1.0, 0.0, 0.0])
        s /= np.linalg.norm(s)
        u = np.cross(s, f)  # Recompute up
        R = np.array([[s[0], s[1], s[2]], [u[0], u[1], u[2]],
                      [-f[0], -f[1], -f[2]]])

        # Convert from OpenGL to OpenCV coordinate system
        R_yz_flip = transform.rotation_matrix(math.pi, [1, 0, 0])[:3, :3]
        R = R_yz_flip.dot(R)

        # Translation vector
        t = -R.dot(np.array(pt).reshape((3, 1)))

        views.append({'R': R, 't': t})

    return views, pts_level
Ejemplo n.º 5
0
def augmentAcPData(params):
    '''
        params.DATA_ROOT \n
        params.PLY_MODEL \n
        params.pose_tuning = [tx, ty, tz, rz] -> transl: meter, rot: deg \n 
        params.frame_num
    '''

    # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet'
    # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet'
    # DATA_ROOT = '/media/shawnle/Data0/YCB_Video_Dataset/SLM_datasets/Exhibition/DUCK'
    DATA_ROOT = params.DATA_ROOT
    p0 = os.path.abspath(DATA_ROOT)

    # GEN_ROOT = r'D:\SL\Summer_2019\original_sixd_toolkit\sixd_toolkit\data\gen_data'
    GEN_ROOT = DATA_ROOT

    # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\sheep\textured.ply')
    # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\ply\rotated.ply')
    # model = inout.load_ply(r'D:\SL\PoseCNN\Loc_data\DUCK\015_duck_toy\textured_m_text.ply')
    # model = inout.load_ply('/media/shawnle/Data0/YCB_Video_Dataset/YCB_Video_Dataset/data_syn_LOV/models/015_duck_toy/textured_dense.ply')
    # model = inout.load_ply('/home/shawnle/Downloads/textured.ply')

    model = inout.load_ply(params.PLY_MODEL)

    print('model keys', model.keys())

    max = np.amax(model['pts'], axis=0)
    min = np.amin(model['pts'], axis=0)
    extents = np.abs(max) + np.abs(min)
    max_all_dim = np.amax(extents)
    assert max_all_dim < 1., 'Unit is millimeter? Meter should be used instead.'

    exit()

    # meta_file = os.path.join(p0, '{:06d}'.format(0) + '-meta.json')

    # print('opening ', meta_file)
    # with open(meta_file, 'r') as f:
    #     meta_json = json.load(f)

    # print('kyes ',meta_json.keys() )
    # print('poses ')
    # pose = np.array(meta_json['poses']).reshape(4,4)
    # print(pose)

    # print('intrinsic_matrix ')
    # print(np.array(meta_json['intrinsic_matrix']).reshape(3,3))

    # tuning pose
    tx = params.pose_tuning[0]  #-.001 # m
    ty = params.pose_tuning[1]  # -.005
    tz = params.pose_tuning[2]  # -.001
    rz = params.pose_tuning[3] / 180. * math.pi  #2./180.*math.pi # rad

    xaxis, yaxis, zaxis = [1, 0, 0], [0, 1, 0], [0, 0, 1]
    Tt = tf.translation_matrix([tx, ty, tz])
    Rt = tf.rotation_matrix(rz, zaxis)

    TT = np.eye(4)
    TT[:3, :3] = Rt[:3, :3]
    TT[:3, 3] = Tt[:3, 3]

    # print('Tt = ')
    # print(Tt)
    # print('Rt = ')
    # print(Rt)
    print('TT = ')
    print(TT)
    # TT1 = np.dot(Tt,Rt)
    # print('TT1 = ')
    # print(TT1)

    for i in range(params.frame_num):
        file_name = os.path.join(p0, '{:06d}'.format(i) + '-color.png')
        print(file_name)

        rgb = cv2.imread(file_name, cv2.IMREAD_UNCHANGED)
        im_size = [rgb.shape[1], rgb.shape[0]]
        # cv2.imshow("rgb", rgb)
        # cv2.waitKey(1)

        # meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.mat')
        # meta = scipy.io.loadmat(meta_file)

        meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.json')
        print('opening ', meta_file)
        with open(meta_file, 'r') as f:
            meta = json.load(f)

        K = np.array(meta['intrinsic_matrix']).reshape(3, 3)
        # print('K',K)
        poses = np.array(meta['poses']).reshape(4, 4)
        R = poses[:3, :3]
        # print ('R',R)
        t = poses[:3, 3]
        t /= 1000.
        # print('t',t)

        # update with tuning
        Rt44 = np.eye(4)
        Rt44[:3, :3] = R
        Rt44[:3, 3] = t
        Rt44 = np.dot(Rt44, TT)
        R = Rt44[:3, :3]
        t = Rt44[:3, 3]

        mdl_proj, mdl_proj_depth = renderer.render(model,
                                                   im_size,
                                                   K,
                                                   R,
                                                   t,
                                                   mode='rgb+depth',
                                                   clip_near=.3,
                                                   clip_far=6.,
                                                   shading='flat')
        # print("dtype", mdl_proj.dtype)
        # print("max min", np.amax(mdl_proj), np.amin(mdl_proj))

        # cv2.imshow('model', mdl_proj)
        # cv2.waitKey(1)

        # depth format is int16
        # convert depth (see PCNN train_net.py)
        factor_depth = 10000
        zfar = 6.0
        znear = 0.25
        im_depth_raw = factor_depth * 2 * zfar * znear / (
            zfar + znear - (zfar - znear) * (2 * mdl_proj_depth - 1))
        I = np.where(mdl_proj_depth == 1)
        im_depth_raw[I[0], I[1]] = 0

        depth_file = os.path.join(GEN_ROOT, '{:06d}-depth.png'.format(i))
        cv2.imwrite(depth_file, im_depth_raw.astype(np.uint16))
        print('writing depth ' + depth_file)

        label_file = os.path.join(GEN_ROOT, '{:06d}-label.png'.format(i))
        # process the label image i.e. achieve nonzero pixel, then cast to cls_id value
        I = np.where(mdl_proj_depth > 0)
        # print('I shape',I.shape)
        label = np.zeros((rgb.shape[0], rgb.shape[1]))
        if len(I[0]) > 0:
            print('len I0', len(I[0]))
            print('label is exported')
            label[I[0], I[1]] = 1
        cv2.imwrite(label_file, label.astype(np.uint8))
        print('writing label ' + label_file)

        blend_name = os.path.join(GEN_ROOT, "{:06d}-blend.png".format(i))
        gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
        mdl_proj_g = cv2.cvtColor(mdl_proj, cv2.COLOR_BGR2GRAY)
        alf = .5
        bet = 1 - alf
        bld = cv2.addWeighted(mdl_proj_g, alf, gray, bet, 0.)
        cv2.imwrite(blend_name, bld)
        cv2.imshow('blend', bld)
        cv2.waitKey(1)
        print('writing blend ' + blend_name)

        # revise pose json -> unit of pose is now in meter
        # save meta_data
        meta_file_rev = os.path.join(p0, '{:06d}'.format(i) + '-meta_rev.json')
        meta['poses'] = Rt44.flatten().tolist()
        with open(meta_file_rev, 'w') as fp:
            json.dump(meta, fp)
        print('writing meta ', meta_file_rev)