def __init__(self, class_type): self.class_type = class_type self.mask_path = os.path.join(cfg.LINEMOD,'{}/mask/*.png'.format(class_type)) self.dir_path = os.path.join(cfg.LINEMOD_ORIG,'{}/data'.format(class_type)) dataset_pose_dir_path = os.path.join(cfg.DATA_DIR, 'dataset_poses') os.system('mkdir -p {}'.format(dataset_pose_dir_path)) self.dataset_poses_path = os.path.join(dataset_pose_dir_path, '{}_poses.npy'.format(class_type)) blender_pose_dir_path = os.path.join(cfg.DATA_DIR, 'blender_poses') os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.blender_poses_path = os.path.join(blender_pose_dir_path, '{}_poses.npy'.format(class_type)) os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.pose_transformer = PoseTransformer(class_type)
def get_plane_height(self): if os.path.exists(self.plane_height_path): plane_height = read_pickle(self.plane_height_path) else: plane_height = {} if self.class_type in plane_height: return plane_height[self.class_type] else: pose_transformer = PoseTransformer(self.class_type) model = pose_transformer.get_blender_model() height = np.min(model[:, -1]) plane_height[self.class_type] = height save_pickle(plane_height, self.plane_height_path) return height
class DataStatistics(object): # world_to_camera_pose = np.array([[-1.19209304e-07, 1.00000000e+00, -2.98023188e-08, 1.19209304e-07], # [-8.94069672e-08, 2.22044605e-16, -1.00000000e+00, 8.94069672e-08], # [-1.00000000e+00, -8.94069672e-08, 1.19209304e-07, 1.00000000e+00]]) world_to_camera_pose = np.array([[-1.00000024e+00, -8.74227979e-08, -5.02429621e-15, 8.74227979e-08], [5.02429621e-15, 1.34358856e-07, -1.00000012e+00, -1.34358856e-07], [8.74227979e-08, -1.00000012e+00, 1.34358856e-07, 1.00000012e+00]]) def __init__(self, class_type): self.class_type = class_type self.mask_path = os.path.join(cfg.LINEMOD,'{}/mask/*.png'.format(class_type)) self.dir_path = os.path.join(cfg.LINEMOD_ORIG,'{}/data'.format(class_type)) dataset_pose_dir_path = os.path.join(cfg.DATA_DIR, 'dataset_poses') os.system('mkdir -p {}'.format(dataset_pose_dir_path)) self.dataset_poses_path = os.path.join(dataset_pose_dir_path, '{}_poses.npy'.format(class_type)) blender_pose_dir_path = os.path.join(cfg.DATA_DIR, 'blender_poses') os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.blender_poses_path = os.path.join(blender_pose_dir_path, '{}_poses.npy'.format(class_type)) os.system('mkdir -p {}'.format(blender_pose_dir_path)) self.pose_transformer = PoseTransformer(class_type) def get_proper_crop_size(self): mask_paths = glob.glob(self.mask_path) widths = [] heights = [] for mask_path in mask_paths: mask = Image.open(mask_path).convert('1') mask = np.array(mask).astype(np.int32) row_col = np.argwhere(mask == 1) min_row, max_row = np.min(row_col[:, 0]), np.max(row_col[:, 0]) min_col, max_col = np.min(row_col[:, 1]), np.max(row_col[:, 1]) width = max_col - min_col height = max_row - min_row widths.append(width) heights.append(height) widths = np.array(widths) heights = np.array(heights) print('min width: {}, max width: {}'.format(np.min(widths), np.max(widths))) print('min height: {}, max height: {}'.format(np.min(heights), np.max(heights))) def get_quat_translation(self, object_to_camera_pose): object_to_camera_pose = np.append(object_to_camera_pose, [[0, 0, 0, 1]], axis=0) world_to_camera_pose = np.append(self.world_to_camera_pose, [[0, 0, 0, 1]], axis=0) object_to_world_pose = np.dot(np.linalg.inv(world_to_camera_pose), object_to_camera_pose) quat = mat2quat(object_to_world_pose[:3, :3]) translation = object_to_world_pose[:3, 3] return quat, translation def get_dataset_poses(self): if os.path.exists(self.dataset_poses_path): poses = np.load(self.dataset_poses_path) return poses[:, :3], poses[:, 3:] eulers = [] translations = [] train_set = np.loadtxt(os.path.join(cfg.LINEMOD, '{}/training_range.txt'.format(self.class_type)),np.int32) for idx in train_set: rot_path = os.path.join(self.dir_path, 'rot{}.rot'.format(idx)) tra_path = os.path.join(self.dir_path, 'tra{}.tra'.format(idx)) pose = read_pose(rot_path, tra_path) euler = self.pose_transformer.orig_pose_to_blender_euler(pose) eulers.append(euler) translations.append(pose[:, 3]) eulers = np.array(eulers) translations = np.array(translations) np.save(self.dataset_poses_path, np.concatenate([eulers, translations], axis=-1)) return eulers, translations def sample_sphere(self, num_samples): """ sample angles from the sphere reference: https://zhuanlan.zhihu.com/p/25988652?group_id=828963677192491008 """ flat_objects = ['037_scissors', '051_large_clamp', '052_extra_large_clamp'] if self.class_type in flat_objects: begin_elevation = 30 else: begin_elevation = 0 ratio = (begin_elevation + 90) / 180 num_points = int(num_samples // (1 - ratio)) phi = (np.sqrt(5) - 1.0) / 2. azimuths = [] elevations = [] for n in range(num_points - num_samples, num_points): z = 2. * n / num_points - 1. azimuths.append(np.rad2deg(2 * np.pi * n * phi % (2 * np.pi))) elevations.append(np.rad2deg(np.arcsin(z))) return np.array(azimuths), np.array(elevations) def sample_poses(self): eulers, translations = self.get_dataset_poses() num_samples = cfg.NUM_SYN azimuths, elevations = self.sample_sphere(num_samples) euler_sampler = stats.gaussian_kde(eulers.T) eulers = euler_sampler.resample(num_samples).T eulers[:, 0] = azimuths eulers[:, 1] = elevations translation_sampler = stats.gaussian_kde(translations.T) translations = translation_sampler.resample(num_samples).T np.save(self.blender_poses_path, np.concatenate([eulers, translations], axis=-1))
from base_utils import PoseTransformer, read_pose, read_pickle, save_pickle import cv2 def fuse(img, mask, background): background = cv2.resize(background,(img.shape[1], img.shape[0])) silhouette = mask > 0 background[silhouette] = img[silhouette] return background class_type = 'cat' dir_path = os.path.join(cfg.LINEMOD_ORIG,'{}/data'.format(class_type)) train_set = np.loadtxt(os.path.join(cfg.LINEMOD, '{}/training_range.txt'.format(class_type)),np.int32) trans = PoseTransformer(class_type) for idx in train_set: rot_path = os.path.join(dir_path, 'rot{}.rot'.format(idx)) tra_path = os.path.join(dir_path, 'tra{}.tra'.format(idx)) pose = read_pose(rot_path, tra_path) pose = trans.orig_pose_to_blender_pose(pose) rot, tra = pose[:, :3], pose[:, 3] break r = OpenGLRenderer() rgb, mask = r.render(class_type, pose, intrinsic_matrix=r.intrinsic_matrix['linemod'], render_type='all') rgb = rgb[:,:,[2,1,0]] # opencv use bgr order instead of rgb background = cv2.imread('Lena.png', 1) rgb = fuse(rgb, mask, background) cv2.imwrite('RGB2.jpg', rgb)