im_ids = sorted( [int(e.split('info_')[1].split('.txt')[0]) for e in rgb_fpaths]) scene_gt = {} for obj_name in sorted(obj_names_id_map.keys()): # Load object model obj_id = obj_names_id_map[obj_name] model = inout.load_ply(model_mpath.format(obj_id)) # Transformation which was applied to the object models (its inverse will # be applied to the GT poses): # 1) Translate the bounding box center to the origin - Brachmann et al. # already translated the bounding box to the center # 2) Rotate around Y axis by pi + flip for some objects R_model = transform.rotation_matrix(math.pi, [0, 1, 0])[:3, :3] # Extra rotation around Z axis by pi for some models if hinter_flip.obj_flip_z[obj_id]: R_z = transform.rotation_matrix(math.pi, [0, 0, 1])[:3, :3] R_model = R_z.dot(R_model) # The ground truth poses of Brachmann et al. are related to a different # model coordinate system - to get the original Hinterstoisser's orientation # of the objects, we need to rotate by pi/2 around X and by pi/2 around Z R_z_90 = transform.rotation_matrix(-math.pi * 0.5, [0, 0, 1])[:3, :3] R_x_90 = transform.rotation_matrix(-math.pi * 0.5, [1, 0, 0])[:3, :3] R_conv = np.linalg.inv(R_model.dot(R_z_90.dot(R_x_90))) for im_id in im_ids: if im_id % 10 == 0:
# Get list of image IDs color_fpaths = glob.glob(rgb_in_mpath.format(scene_id, '*')) im_ids = sorted( [int(e.split('color')[1].split('.jpg')[0]) for e in color_fpaths]) # Load object model obj_id = scene_id # The object id is the same as scene id for this dataset model = inout.load_ply(model_mpath.format(obj_id)) # Transformation which was applied to the object models (its inverse will # be applied to the GT poses): # 1) Translate the bounding box center to the origin # 2) Rotate around Y axis by pi + flip for some objects t_model = bbox_cens[obj_id - 1, :].reshape((3, 1)) R_model = transform.rotation_matrix(math.pi, [0, 1, 0])[:3, :3] # Extra rotation around Z axis by pi for some models if hinter_flip.obj_flip_z[obj_id]: R_z = transform.rotation_matrix(math.pi, [0, 0, 1])[:3, :3] R_model = R_z.dot(R_model) R_model_inv = np.linalg.inv(R_model) for im_id in im_ids: if im_id % 10 == 0: print('scene,view: ' + str(scene_id) + ',' + str(im_id)) # Load the RGB and depth image rgb = inout.load_im(rgb_in_mpath.format(scene_id, im_id)) depth = load_hinter_depth(depth_in_mpath.format(scene_id, im_id))
for i in range(image.shape[0]): for j in range(image.shape[1]): ret[i,j,:] = image[image.shape[0]-i-1, image.shape[1]-j-1,:] return ret # tuning pose tx = -.001 # mm ty = -.005 tz = -.001 rz = 2./180.*math.pi # rad xaxis, yaxis, zaxis = [1, 0, 0], [0, 1, 0], [0, 0, 1] Tt = tf.translation_matrix([tx, ty, tz]) Rt = tf.rotation_matrix(rz, zaxis) TT = np.eye(4) TT[:3,:3] = Rt[:3,:3] TT[:3,3] = Tt[:3,3] # print('Tt = ') # print(Tt) # print('Rt = ') # print(Rt) print('TT = ') print(TT) # TT1 = np.dot(Tt,Rt) # print('TT1 = ') # print(TT1)
def sample_views(min_n_views, radius=1, azimuth_range=(0, 2 * math.pi), elev_range=(-0.5 * math.pi, 0.5 * math.pi)): ''' Viewpoint sampling from a view sphere. :param min_n_views: Minimum required number of views on the whole view sphere. :param radius: Radius of the view sphere. :param azimuth_range: Azimuth range from which the viewpoints are sampled. :param elev_range: Elevation range from which the viewpoints are sampled. :return: List of views, each represented by a 3x3 rotation matrix and a 3x1 translation vector. ''' # Get points on a sphere if True: pts, pts_level = hinter_sampling(min_n_views, radius=radius) else: pts = fibonacci_sampling(min_n_views + 1, radius=radius) pts_level = [0 for _ in range(len(pts))] views = [] for pt in pts: # Azimuth from (0, 2 * pi) azimuth = math.atan2(pt[1], pt[0]) if azimuth < 0: azimuth += 2.0 * math.pi # Elevation from (-0.5 * pi, 0.5 * pi) a = np.linalg.norm(pt) b = np.linalg.norm([pt[0], pt[1], 0]) elev = math.acos(b / a) if pt[2] < 0: elev = -elev # if hemisphere and (pt[2] < 0 or pt[0] < 0 or pt[1] < 0): if not (azimuth_range[0] <= azimuth <= azimuth_range[1] and elev_range[0] <= elev <= elev_range[1]): continue # Rotation matrix # The code was adopted from gluLookAt function (uses OpenGL coordinate system): # [1] http://stackoverflow.com/questions/5717654/glulookat-explanation # [2] https://www.opengl.org/wiki/GluLookAt_code f = -np.array(pt) # Forward direction f /= np.linalg.norm(f) u = np.array([0.0, 0.0, 1.0]) # Up direction s = np.cross(f, u) # Side direction if np.count_nonzero(s) == 0: # f and u are parallel, i.e. we are looking along or against Z axis s = np.array([1.0, 0.0, 0.0]) s /= np.linalg.norm(s) u = np.cross(s, f) # Recompute up R = np.array([[s[0], s[1], s[2]], [u[0], u[1], u[2]], [-f[0], -f[1], -f[2]]]) # Convert from OpenGL to OpenCV coordinate system R_yz_flip = transform.rotation_matrix(math.pi, [1, 0, 0])[:3, :3] R = R_yz_flip.dot(R) # Translation vector t = -R.dot(np.array(pt).reshape((3, 1))) views.append({'R': R, 't': t}) return views, pts_level
def augmentAcPData(params): ''' params.DATA_ROOT \n params.PLY_MODEL \n params.pose_tuning = [tx, ty, tz, rz] -> transl: meter, rot: deg \n params.frame_num ''' # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet' # DATA_ROOT = r'D:\SL\PoseCNN\Loc_data\DUCK\POSE_iPBnet' # DATA_ROOT = '/media/shawnle/Data0/YCB_Video_Dataset/SLM_datasets/Exhibition/DUCK' DATA_ROOT = params.DATA_ROOT p0 = os.path.abspath(DATA_ROOT) # GEN_ROOT = r'D:\SL\Summer_2019\original_sixd_toolkit\sixd_toolkit\data\gen_data' GEN_ROOT = DATA_ROOT # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\sheep\textured.ply') # model = inout.load_ply(r'D:\SL\Summer_2019\sixd_toolkit\data\ply\rotated.ply') # model = inout.load_ply(r'D:\SL\PoseCNN\Loc_data\DUCK\015_duck_toy\textured_m_text.ply') # model = inout.load_ply('/media/shawnle/Data0/YCB_Video_Dataset/YCB_Video_Dataset/data_syn_LOV/models/015_duck_toy/textured_dense.ply') # model = inout.load_ply('/home/shawnle/Downloads/textured.ply') model = inout.load_ply(params.PLY_MODEL) print('model keys', model.keys()) max = np.amax(model['pts'], axis=0) min = np.amin(model['pts'], axis=0) extents = np.abs(max) + np.abs(min) max_all_dim = np.amax(extents) assert max_all_dim < 1., 'Unit is millimeter? Meter should be used instead.' exit() # meta_file = os.path.join(p0, '{:06d}'.format(0) + '-meta.json') # print('opening ', meta_file) # with open(meta_file, 'r') as f: # meta_json = json.load(f) # print('kyes ',meta_json.keys() ) # print('poses ') # pose = np.array(meta_json['poses']).reshape(4,4) # print(pose) # print('intrinsic_matrix ') # print(np.array(meta_json['intrinsic_matrix']).reshape(3,3)) # tuning pose tx = params.pose_tuning[0] #-.001 # m ty = params.pose_tuning[1] # -.005 tz = params.pose_tuning[2] # -.001 rz = params.pose_tuning[3] / 180. * math.pi #2./180.*math.pi # rad xaxis, yaxis, zaxis = [1, 0, 0], [0, 1, 0], [0, 0, 1] Tt = tf.translation_matrix([tx, ty, tz]) Rt = tf.rotation_matrix(rz, zaxis) TT = np.eye(4) TT[:3, :3] = Rt[:3, :3] TT[:3, 3] = Tt[:3, 3] # print('Tt = ') # print(Tt) # print('Rt = ') # print(Rt) print('TT = ') print(TT) # TT1 = np.dot(Tt,Rt) # print('TT1 = ') # print(TT1) for i in range(params.frame_num): file_name = os.path.join(p0, '{:06d}'.format(i) + '-color.png') print(file_name) rgb = cv2.imread(file_name, cv2.IMREAD_UNCHANGED) im_size = [rgb.shape[1], rgb.shape[0]] # cv2.imshow("rgb", rgb) # cv2.waitKey(1) # meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.mat') # meta = scipy.io.loadmat(meta_file) meta_file = os.path.join(p0, '{:06d}'.format(i) + '-meta.json') print('opening ', meta_file) with open(meta_file, 'r') as f: meta = json.load(f) K = np.array(meta['intrinsic_matrix']).reshape(3, 3) # print('K',K) poses = np.array(meta['poses']).reshape(4, 4) R = poses[:3, :3] # print ('R',R) t = poses[:3, 3] t /= 1000. # print('t',t) # update with tuning Rt44 = np.eye(4) Rt44[:3, :3] = R Rt44[:3, 3] = t Rt44 = np.dot(Rt44, TT) R = Rt44[:3, :3] t = Rt44[:3, 3] mdl_proj, mdl_proj_depth = renderer.render(model, im_size, K, R, t, mode='rgb+depth', clip_near=.3, clip_far=6., shading='flat') # print("dtype", mdl_proj.dtype) # print("max min", np.amax(mdl_proj), np.amin(mdl_proj)) # cv2.imshow('model', mdl_proj) # cv2.waitKey(1) # depth format is int16 # convert depth (see PCNN train_net.py) factor_depth = 10000 zfar = 6.0 znear = 0.25 im_depth_raw = factor_depth * 2 * zfar * znear / ( zfar + znear - (zfar - znear) * (2 * mdl_proj_depth - 1)) I = np.where(mdl_proj_depth == 1) im_depth_raw[I[0], I[1]] = 0 depth_file = os.path.join(GEN_ROOT, '{:06d}-depth.png'.format(i)) cv2.imwrite(depth_file, im_depth_raw.astype(np.uint16)) print('writing depth ' + depth_file) label_file = os.path.join(GEN_ROOT, '{:06d}-label.png'.format(i)) # process the label image i.e. achieve nonzero pixel, then cast to cls_id value I = np.where(mdl_proj_depth > 0) # print('I shape',I.shape) label = np.zeros((rgb.shape[0], rgb.shape[1])) if len(I[0]) > 0: print('len I0', len(I[0])) print('label is exported') label[I[0], I[1]] = 1 cv2.imwrite(label_file, label.astype(np.uint8)) print('writing label ' + label_file) blend_name = os.path.join(GEN_ROOT, "{:06d}-blend.png".format(i)) gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY) mdl_proj_g = cv2.cvtColor(mdl_proj, cv2.COLOR_BGR2GRAY) alf = .5 bet = 1 - alf bld = cv2.addWeighted(mdl_proj_g, alf, gray, bet, 0.) cv2.imwrite(blend_name, bld) cv2.imshow('blend', bld) cv2.waitKey(1) print('writing blend ' + blend_name) # revise pose json -> unit of pose is now in meter # save meta_data meta_file_rev = os.path.join(p0, '{:06d}'.format(i) + '-meta_rev.json') meta['poses'] = Rt44.flatten().tolist() with open(meta_file_rev, 'w') as fp: json.dump(meta, fp) print('writing meta ', meta_file_rev)