class OcclusionLineModDB(LineModModelDB): class_type_to_number = { 'ape': '001', 'can': '004', 'cat': '005', 'driller': '006', 'duck': '007', 'eggbox': '008', 'glue': '009', 'holepuncher': '010', 'benchvise': '011' } translation_transforms = {} blender_models = {} def __init__(self): super(OcclusionLineModDB, self).__init__() from lib.utils.render_utils import OpenGLRenderer self.class_type = None self.xyz_pattern = os.path.join(cfg.OCCLUSION_LINEMOD, 'models/{}/{}.xyz') self.rgb_pattern = os.path.join(cfg.OCCLUSION_LINEMOD, 'RGB-D/rgb_noseg/color_{:05}.png') self.pose_pattern = os.path.join(cfg.OCCLUSION_LINEMOD, 'poses/{}/info_{:05}.txt') self.rgb_dir_path = os.path.join(cfg.OCCLUSION_LINEMOD, 'RGB-D/rgb_noseg') self.mask_dir_pattern = os.path.join(cfg.OCCLUSION_LINEMOD, 'masks/{}') self.mask_pattern = os.path.join(self.mask_dir_pattern, '{}.png') self.opengl_renderer = OpenGLRenderer() @staticmethod def load_ply_model(model_path): ply = PlyData.read(model_path) data = ply.elements[0].data x = data['x'] y = data['y'] z = data['z'] return np.stack([x, y, z], axis=-1) @staticmethod def read_pose(pose_path): with open(pose_path) as pose_info: lines = [line[:-1] for line in pose_info.readlines()] if 'rotation:' not in lines: return np.array([]) row = lines.index('rotation:') + 1 rotation = np.loadtxt(lines[row:row + 3]) translation = np.loadtxt(lines[row + 4:row + 5]) return np.concatenate( [rotation, np.reshape(translation, newshape=[3, 1])], axis=-1) def get_blender_model(self): if self.class_type in self.blender_models: return self.blender_models[self.class_type] blender_model = self.load_ply_model( self.ply_pattern.format(self.class_type, self.class_type)) self.blender_models[self.class_type] = blender_model return blender_model def get_translation_transform(self): if self.class_type in self.translation_transforms: return self.translation_transforms[self.class_type] model = self.get_blender_model() xyz = np.loadtxt( self.xyz_pattern.format( self.class_type.title(), self.class_type_to_number[self.class_type])) rotation = np.array([[0., 0., 1.], [1., 0., 0.], [0., 1., 0.]]) xyz = np.dot(xyz, rotation.T) translation_transform = np.mean(xyz, axis=0) - np.mean(model, axis=0) self.translation_transforms[self.class_type] = translation_transform return translation_transform def occlusion_pose_to_blender_pose(self, pose): rot, tra = pose[:, :3], pose[:, 3] rotation = np.array([[0., 1., 0.], [0., 0., 1.], [1., 0., 0.]]) rot = np.dot(rot, rotation) tra[1:] *= -1 translation_transform = np.dot(rot, self.get_translation_transform()) rot[1:] *= -1 translation_transform[1:] *= -1 tra += translation_transform pose = np.concatenate([rot, np.reshape(tra, newshape=[3, 1])], axis=-1) return pose @staticmethod def read_benchvise_pose(index): orig_pose_dir_path = os.path.join(cfg.LINEMOD_ORIG, 'benchvise/data') pose = read_pose( os.path.join(orig_pose_dir_path, 'rot{}.rot'.format(index)), os.path.join(orig_pose_dir_path, 'tra{}.tra'.format(index))) pose_transformer = PoseTransformer(class_type='benchvise') return pose_transformer.orig_pose_to_blender_pose(pose).astype( np.float32) def read_blender_pose(self, index): if self.class_type == 'benchvise': return self.read_benchvise_pose(index) pose_path = self.pose_pattern.format(self.class_type.title(), index) pose = self.read_pose(pose_path) if len(pose) == 0: return np.array([]) return self.occlusion_pose_to_blender_pose(pose) def get_mask_of_all_objects(self, index): """ get the mask of all objects 1. initialize both mask map and depth map 2. update the mask map and depth map for each object by order 2.1 compute the col_row and depth of objects 2.2 for each pixel, if object's depth is shallower than the corresponding one in the depth map, then replace the label in the mask map """ mask_map = np.zeros(shape=[480, 640], dtype=np.uint8) depth_map = 10 * np.ones(shape=[480, 640], dtype=np.float32) def update(class_type): self.class_type = class_type pose = self.read_blender_pose(index) if len(pose) == 0: return depth = self.opengl_renderer.render(class_type, pose, camera_type='linemod') col_row = np.argwhere(depth != 0)[:, [1, 0]] depth = depth[col_row[:, 1], col_row[:, 0]] pixel_depth = depth_map[col_row[:, 1], col_row[:, 0]] inds = (depth < pixel_depth) selected_col_row = col_row[inds] selected_depth = depth[inds] selected_col_row = 640 * selected_col_row[:, 1] + selected_col_row[:, 0] inds = np.lexsort([selected_depth, selected_col_row]) selected_col_row = selected_col_row[inds] selected_depth = selected_depth[inds] selected_col_row, inds = np.unique(selected_col_row, return_index=True) selected_depth = selected_depth[inds] selected_row = selected_col_row // 640 selected_col = selected_col_row % 640 mask_map[selected_row, selected_col] = int(self.class_type_to_number[class_type]) depth_map[selected_row, selected_col] = selected_depth for class_type in self.class_type_to_number.keys(): update(class_type) return mask_map def get_mask(self, index): """ get the mask for each object 1. get the mask of all objects 2. separate each object's mask from the mask map """ mask_path = self.mask_pattern.format('all_objects', index) mask_map = self.get_mask_of_all_objects(index) if os.path.exists(mask_path): mask_map = np.array(Image.open(mask_path)) else: mask_map = self.get_mask_of_all_objects(index) Image.fromarray(mask_map).save(mask_path) for class_type, class_type_num in self.class_type_to_number.items(): mask_path = self.mask_pattern.format(class_type, index) class_type_num = int(class_type_num) mask = (mask_map == class_type_num).astype(np.uint8) Image.fromarray(mask).save(mask_path) def get_masks(self): """ get masks for each object in images 1. mkdir for each category 2. get masks for each image """ mask_dir_path = self.mask_dir_pattern.format('all_objects') os.system('mkdir -p {}'.format(mask_dir_path)) for class_type in self.class_type_to_number.keys(): mask_dir_path = self.mask_dir_pattern.format(class_type) os.system('mkdir -p {}'.format(mask_dir_path)) num_masks = len(os.listdir(self.rgb_dir_path)) for i in range(num_masks): self.get_mask(i)
from lib.utils.render_utils import OpenGLRenderer r = OpenGLRenderer() r.render(intrinsic_matrix=r.intrinsic_matrix['linemod'], render_type='rgb')