def collect_real_set_info(self): database=[] projector=Projector() modeldb=LineModModelDB() transformer=PoseTransformer(class_type=self.cls_name) img_num=len(os.listdir(os.path.join(self.linemod_dir,self.rgb_dir))) print(img_num) for k in range(img_num): data={} data['rgb_pth']=os.path.join(self.rgb_dir,'color_{:05}.png'.format(k)) data['dpt_pth']=os.path.join(self.mask_dir,'{}.png'.format(k)) pose=self.read_pose(os.path.join(self.rt_dir,'info_{:05}.txt'.format(k))) if len(pose)==0: # os.system('cp {} ./{:05}.png'.format(os.path.join(cfg.OCCLUSION_LINEMOD,data['rgb_pth']),k)) continue data['RT']=transformer.occlusion_pose_to_blender_pose(pose) data['cls_typ']=self.cls_name data['rnd_typ']='real' data['corners']=projector.project(modeldb.get_corners_3d(self.cls_name),data['RT'],'linemod') data['farthest']=projector.project(modeldb.get_farthest_3d(self.cls_name),data['RT'],'linemod') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'linemod') data['center']=projector.project(modeldb.get_centers_3d(self.cls_name)[None,:],data['RT'],'linemod') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'blender') database.append(data) save_pickle(database,self.real_pkl) return database
def collect_real_set_info(self): # linemod standard database=[] projector=Projector() modeldb=LineModModelDB() img_num=len(os.listdir(os.path.join(self.linemod_dir,self.rgb_dir))) for k in range(img_num): data={} data['rgb_pth']=os.path.join(self.rgb_dir, '{:06}.jpg'.format(k)) data['dpt_pth']=os.path.join(self.mask_dir, '{:04}.png'.format(k)) pose=read_pose(os.path.join(self.rt_dir, 'rot{}.rot'.format(k)), os.path.join(self.rt_dir, 'tra{}.tra'.format(k))) pose_transformer = PoseTransformer(class_type=self.cls_name) data['RT'] = pose_transformer.orig_pose_to_blender_pose(pose).astype(np.float32) data['cls_typ']=self.cls_name data['rnd_typ']='real' data['corners']=projector.project(modeldb.get_corners_3d(self.cls_name),data['RT'],'linemod') data['farthest']=projector.project(modeldb.get_farthest_3d(self.cls_name),data['RT'],'linemod') for num in [4,12,16,20]: data['farthest{}'.format(num)]=projector.project(modeldb.get_farthest_3d(self.cls_name,num),data['RT'],'linemod') data['center']=projector.project(modeldb.get_centers_3d(self.cls_name)[None, :],data['RT'],'linemod') data['small_bbox'] = projector.project(modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct=np.concatenate([np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts']=projector.project_h(axis_direct, data['RT'], 'linemod') database.append(data) save_pickle(database,self.set_pkl) return database
def read_benchvise_pose(index): orig_pose_dir_path = os.path.join(cfg.LINEMOD_ORIG, 'benchvise/data') pose = read_pose( os.path.join(orig_pose_dir_path, 'rot{}.rot'.format(index)), os.path.join(orig_pose_dir_path, 'tra{}.tra'.format(index))) pose_transformer = PoseTransformer(class_type='benchvise') return pose_transformer.orig_pose_to_blender_pose(pose).astype( np.float32)
def make_truncated_linemod_dataset(): for cls_name in cfg.linemod_cls_names: print(cls_name) linemod_dir = cfg.LINEMOD rgb_dir = '{}/JPEGImages'.format(cls_name) mask_dir = '{}/mask'.format(cls_name) rt_dir = os.path.join(cfg.DATA_DIR, 'LINEMOD_ORIG', cls_name, 'data') if not os.path.exists( os.path.join(linemod_dir, 'truncated', cls_name)): os.mkdir(os.path.join(linemod_dir, 'truncated', cls_name)) projector = Projector() img_num = len(os.listdir(os.path.join(linemod_dir, rgb_dir))) print(img_num) for k in range(img_num): rgb = imread( os.path.join(linemod_dir, rgb_dir, '{:06}.jpg'.format(k))) msk = imread( os.path.join(linemod_dir, mask_dir, '{:04}.png'.format(k))) msk = (np.sum(msk, 2) > 0).astype(np.uint8) before = np.sum(msk) count = 0 while True: rgb_new, msk_new, hbeg, wbeg = LineModImageDB.crop_instance( rgb, msk, 256) after = np.sum(msk_new) count += 1 if after / before >= 0.2 or count > 50: rgb, msk = rgb_new, msk_new break imsave( os.path.join(linemod_dir, 'truncated', cls_name, '{:06}_rgb.jpg'.format(k)), rgb) imsave( os.path.join(linemod_dir, 'truncated', cls_name, '{:04}_msk.png'.format(k)), msk) pose = read_pose(os.path.join(rt_dir, 'rot{}.rot'.format(k)), os.path.join(rt_dir, 'tra{}.tra'.format(k))) pose_transformer = PoseTransformer(class_type=cls_name) pose = pose_transformer.orig_pose_to_blender_pose(pose).astype( np.float32) K = projector.intrinsic_matrix['linemod'].copy() K[0, 2] += wbeg K[1, 2] += hbeg save_pickle([pose, K], os.path.join(linemod_dir, 'truncated', cls_name, '{:06}_info.pkl'.format(k))) if k % 500 == 0: print(k)
def collect_real_set_info(self): ''' pvnet 的数据集linemod有做更改, cat.ply 与原始的linemod中的cat.ply,有模型偏移和旋转, 所以 原始数据集中的pose真值,需要 简单变换,就是这边数据集的 pose 会这样问了,既然图片数据集 这是没有改变的,怎么RT改变呢 因为 这边提的3d特征是 新的model上取的,所以计算RT的时候, RT要变的 pose_real.pkl ''' database = [] projector = Projector() modeldb = LineModModelDB() img_num = len(os.listdir(os.path.join(self.linemod_dir, self.rgb_dir))) for k in range(img_num): data = {} data['rgb_pth'] = os.path.join(self.rgb_dir, '{:06}.jpg'.format(k)) data['dpt_pth'] = os.path.join(self.mask_dir, '{:04}.png'.format(k)) pose = read_pose(os.path.join(self.rt_dir, 'rot{}.rot'.format(k)), os.path.join(self.rt_dir, 'tra{}.tra'.format(k))) pose_transformer = PoseTransformer(class_type=self.cls_name) data['RT'] = pose_transformer.orig_pose_to_blender_pose( pose).astype(np.float32) data['cls_typ'] = self.cls_name data['rnd_typ'] = 'real' data['corners'] = projector.project( modeldb.get_corners_3d(self.cls_name), data['RT'], 'linemod') data['farthest'] = projector.project( modeldb.get_farthest_3d(self.cls_name), data['RT'], 'linemod') for num in [4, 12, 16, 20]: data['farthest{}'.format(num)] = projector.project( modeldb.get_farthest_3d(self.cls_name, num), data['RT'], 'linemod') data['center'] = projector.project( modeldb.get_centers_3d(self.cls_name)[None, :], data['RT'], 'linemod') data['small_bbox'] = projector.project( modeldb.get_small_bbox(self.cls_name), data['RT'], 'linemod') axis_direct = np.concatenate( [np.identity(3), np.zeros([3, 1])], 1).astype(np.float32) data['van_pts'] = projector.project_h(axis_direct, data['RT'], 'linemod') database.append(data) save_pickle(database, self.real_pkl) return database
def get_plane_height(self): if os.path.exists(self.plane_height_path): plane_height = read_pickle(self.plane_height_path) else: plane_height = {} if self.class_type in plane_height: return plane_height[self.class_type] else: pose_transformer = PoseTransformer(self.class_type) model = pose_transformer.get_blender_model() height = np.min(model[:, -1]) plane_height[self.class_type] = height save_pickle(plane_height, self.plane_height_path) return height
def __init__(self, class_type): self.class_type = class_type self.mask_path = os.path.join(cfg.LINEMOD, '{}/mask/*.png'.format(class_type)) self.dir_path = os.path.join(cfg.LINEMOD_ORIG, '{}/data'.format(class_type)) dataset_pose_dir_path = os.path.join(cfg.DATA_DIR, 'dataset_poses') os.system('mkdir -p {}'.format(dataset_pose_dir_path)) self.dataset_poses_path = os.path.join( dataset_pose_dir_path, '{}_poses.npy'.format(class_type)) blender_pose_dir_path = os.path.join(cfg.DATA_DIR, 'blender_poses') os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.blender_poses_path = os.path.join( blender_pose_dir_path, '{}_poses.npy'.format(class_type)) os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.pose_transformer = PoseTransformer(class_type)
class DataStatistics(object): # world_to_camera_pose = np.array([[-1.19209304e-07, 1.00000000e+00, -2.98023188e-08, 1.19209304e-07], # [-8.94069672e-08, 2.22044605e-16, -1.00000000e+00, 8.94069672e-08], # [-1.00000000e+00, -8.94069672e-08, 1.19209304e-07, 1.00000000e+00]]) world_to_camera_pose = np.array( [[-1.00000024e+00, -8.74227979e-08, -5.02429621e-15, 8.74227979e-08], [5.02429621e-15, 1.34358856e-07, -1.00000012e+00, -1.34358856e-07], [8.74227979e-08, -1.00000012e+00, 1.34358856e-07, 1.00000012e+00]]) def __init__(self, class_type): self.class_type = class_type self.mask_path = os.path.join(cfg.LINEMOD, '{}/mask/*.png'.format(class_type)) self.dir_path = os.path.join(cfg.LINEMOD_ORIG, '{}/data'.format(class_type)) dataset_pose_dir_path = os.path.join(cfg.DATA_DIR, 'dataset_poses') os.system('mkdir -p {}'.format(dataset_pose_dir_path)) self.dataset_poses_path = os.path.join( dataset_pose_dir_path, '{}_poses.npy'.format(class_type)) blender_pose_dir_path = os.path.join(cfg.DATA_DIR, 'blender_poses') os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.blender_poses_path = os.path.join( blender_pose_dir_path, '{}_poses.npy'.format(class_type)) os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.pose_transformer = PoseTransformer(class_type) def get_proper_crop_size(self): mask_paths = glob.glob(self.mask_path) widths = [] heights = [] for mask_path in mask_paths: mask = Image.open(mask_path).convert('1') mask = np.array(mask).astype(np.int32) row_col = np.argwhere(mask == 1) min_row, max_row = np.min(row_col[:, 0]), np.max(row_col[:, 0]) min_col, max_col = np.min(row_col[:, 1]), np.max(row_col[:, 1]) width = max_col - min_col height = max_row - min_row widths.append(width) heights.append(height) widths = np.array(widths) heights = np.array(heights) print('min width: {}, max width: {}'.format(np.min(widths), np.max(widths))) print('min height: {}, max height: {}'.format(np.min(heights), np.max(heights))) def get_quat_translation(self, object_to_camera_pose): object_to_camera_pose = np.append(object_to_camera_pose, [[0, 0, 0, 1]], axis=0) world_to_camera_pose = np.append(self.world_to_camera_pose, [[0, 0, 0, 1]], axis=0) object_to_world_pose = np.dot(np.linalg.inv(world_to_camera_pose), object_to_camera_pose) quat = mat2quat(object_to_world_pose[:3, :3]) translation = object_to_world_pose[:3, 3] return quat, translation def get_dataset_poses(self): if os.path.exists(self.dataset_poses_path): poses = np.load(self.dataset_poses_path) return poses[:, :3], poses[:, 3:] eulers = [] translations = [] train_set = np.loadtxt( os.path.join(cfg.LINEMOD, '{}/training_range.txt'.format(self.class_type)), np.int32) for idx in train_set: rot_path = os.path.join(self.dir_path, 'rot{}.rot'.format(idx)) tra_path = os.path.join(self.dir_path, 'tra{}.tra'.format(idx)) pose = read_pose(rot_path, tra_path) euler = self.pose_transformer.orig_pose_to_blender_euler(pose) eulers.append(euler) translations.append(pose[:, 3]) eulers = np.array(eulers) translations = np.array(translations) np.save(self.dataset_poses_path, np.concatenate([eulers, translations], axis=-1)) return eulers, translations def sample_sphere(self, num_samples): """ sample angles from the sphere reference: https://zhuanlan.zhihu.com/p/25988652?group_id=828963677192491008 """ flat_objects = [ '037_scissors', '051_large_clamp', '052_extra_large_clamp' ] if self.class_type in flat_objects: begin_elevation = 30 else: begin_elevation = 0 ratio = (begin_elevation + 90) / 180 num_points = int(num_samples // (1 - ratio)) phi = (np.sqrt(5) - 1.0) / 2. azimuths = [] elevations = [] for n in range(num_points - num_samples, num_points): z = 2. * n / num_points - 1. azimuths.append(np.rad2deg(2 * np.pi * n * phi % (2 * np.pi))) elevations.append(np.rad2deg(np.arcsin(z))) return np.array(azimuths), np.array(elevations) def sample_poses(self): eulers, translations = self.get_dataset_poses() num_samples = cfg.NUM_SYN azimuths, elevations = self.sample_sphere(num_samples) euler_sampler = stats.gaussian_kde(eulers.T) eulers = euler_sampler.resample(num_samples).T eulers[:, 0] = azimuths eulers[:, 1] = elevations translation_sampler = stats.gaussian_kde(translations.T) translations = translation_sampler.resample(num_samples).T np.save(self.blender_poses_path, np.concatenate([eulers, translations], axis=-1))