def test_set(self): a = imeta.LabelledObject(class_names=('class_1', ), bounding_box=(152, 239, 14, 78), label_color=(127, 33, 67), relative_pose=tf.Transform( location=(123, -45, 23), rotation=(0.5, 0.23, 0.1)), object_id='LabelledObject-18569') b = imeta.LabelledObject(class_names=('class_2', ), bounding_box=(39, 169, 96, 16), label_color=(2, 227, 34), relative_pose=tf.Transform(location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478') c = imeta.LabelledObject(class_names=('class_3', ), bounding_box=(148, 468, 82, 241), label_color=(12, 82, 238), relative_pose=tf.Transform( location=(85, -648, -376), rotation=(0.8, -0.64, -0.73)), object_id='LabelledObject-87684') subject_set = {a, a, a, b} self.assertEqual(2, len(subject_set)) self.assertIn(a, subject_set) self.assertIn(b, subject_set) self.assertNotIn(c, subject_set)
def augment(self, image): flipped_data = np.flipud(image.data) transformation_matrix = np.array([[1, 0, 0], [0, -1, image.data.shape[0] - 1], [0, 0, 1]]) if image.metadata.affine_transformation_matrix is not None: transformation_matrix = np.dot( transformation_matrix, image.metadata.affine_transformation_matrix) return core.image.Image( data=flipped_data, metadata=image.metadata.clone( hash_=xxhash.xxh64( np.ascontiguousarray(flipped_data)).digest(), base_image=image.metadata.base_image if image.metadata.base_image is not None else image, transformation_matrix=transformation_matrix, labelled_objects=(imeta.LabelledObject( class_names=obj.class_names, bounding_box=(obj.bounding_box[0], image.metadata.height - 1 - obj.bounding_box[1] - obj.bounding_box[3], obj.bounding_box[2], obj.bounding_box[3]), label_color=obj.label_color, relative_pose=obj.relative_pose, object_id=obj.object_id) for obj in image.metadata.labelled_objects)), additional_metadata=image.additional_metadata, depth_data=np.flipud(image.depth_data) if image.depth_data is not None else None, labels_data=np.flipud(image.labels_data) if image.labels_data is not None else None, world_normals_data=np.flipud(image.world_normals_data) if image.world_normals_data is not None else None)
def test_repeated_applications_returns_to_original(self): data = np.array([list(range(i, i + 100)) for i in range(100)]) image = core.image.Image( data=data, metadata=imeta.ImageMetadata( source_type=imeta.ImageSourceType.SYNTHETIC, hash_=b'\xa5\xc9\x08\xaf$\x0b\x116', labelled_objects=[ imeta.LabelledObject(class_names={'cup'}, bounding_box=(80, 20, 10, 20)) ])) # Apply 4 times to rotate back to the start result = self.do_augment( self.do_augment(self.do_augment(self.do_augment(image)))) self.assertNPEqual(result.data, data) self.assertNPClose(result.metadata.affine_transformation_matrix, np.identity(3)) self.assertEqual((80, 20, 10, 20), result.metadata.labelled_objects[0].bounding_box) for _ in range(10): result = self.do_augment( self.do_augment(self.do_augment(self.do_augment(result)))) self.assertNPEqual(result.data, data) self.assertNPClose(result.metadata.affine_transformation_matrix, np.identity(3)) self.assertEqual((80, 20, 10, 20), result.metadata.labelled_objects[0].bounding_box)
def test_hash(self): kwargs = { 'class_names': ('class_1', ), 'bounding_box': (152, 239, 14, 78), 'label_color': (127, 33, 67), 'relative_pose': tf.Transform(location=(123, -45, 23), rotation=(0.5, 0.23, 0.1)), 'object_id': 'LabelledObject-18569' } a = imeta.LabelledObject(**kwargs) b = imeta.LabelledObject(**kwargs) self.assertEqual(hash(a), hash(b)) b = imeta.LabelledObject( **du.defaults({'class_names': 'class_41'}, kwargs)) self.assertNotEqual(hash(a), hash(b)) b = imeta.LabelledObject( **du.defaults({'bounding_box': (47, 123, 45, 121)}, kwargs)) self.assertNotEqual(hash(a), hash(b)) b = imeta.LabelledObject( **du.defaults({'label_color': (247, 123, 14)}, kwargs)) self.assertNotEqual(hash(a), hash(b)) b = imeta.LabelledObject(**du.defaults( {'relative_pose': tf.Transform((62, -81, 43), (0.1, 0.1, 0.1))}, kwargs)) self.assertNotEqual(hash(a), hash(b)) b = imeta.LabelledObject( **du.defaults({'object_id': 'Cat-12'}, kwargs)) self.assertNotEqual(hash(a), hash(b))
def test_modifies_bounding_boxes(self): labelled_objects = [] modified_bboxes = [] for bbox, bbox_prime in self.get_projected_bounding_boxes(): labelled_objects.append( imeta.LabelledObject(class_names={'cup'}, bounding_box=bbox)) modified_bboxes.append(bbox_prime) image = make_image(metadata={'labelled_objects': labelled_objects}) result = self.do_augment(image) self.assertEqual(len(modified_bboxes), len(result.metadata.labelled_objects)) for idx in range(len(modified_bboxes)): self.assertEqual( modified_bboxes[idx], result.metadata.labelled_objects[idx].bounding_box)
def test_inverts_itself(self): data = np.array([list(range(i, i + 100)) for i in range(100)]) image = core.image.Image( data=data, metadata=imeta.ImageMetadata( source_type=imeta.ImageSourceType.SYNTHETIC, hash_=b'\xa5\xc9\x08\xaf$\x0b\x116', labelled_objects=[imeta.LabelledObject( class_names={'cup'}, bounding_box=(80, 20, 10, 20) )] ) ) result = self.do_augment(self.do_augment(image)) self.assertNPEqual(result.data, data) self.assertNPEqual(result.metadata.affine_transformation_matrix, np.identity(3)) self.assertEqual((80, 20, 10, 20), result.metadata.labelled_objects[0].bounding_box)
def rotate_270(image): return core.image.Image( data=np.rot90(image.data, k=3), metadata=image.metadata.clone( camera_pose=image.camera_pose, labelled_objects=(imeta.LabelledObject( class_names=obj.class_names, bounding_box=(image.metadata.height - obj.bounding_box[1] - obj.bounding_box[3], obj.bounding_box[0], obj.bounding_box[3], obj.bounding_box[2]), label_color=obj.label_color, relative_pose=obj.relative_pose, object_id=obj.object_id) for obj in image.metadata.labelled_objects)), additional_metadata=image.additional_metadata, depth_data=np.rot90(image.depth_data, k=3) if image.depth_data is not None else None, labels_data=np.rot90(image.labels_data, k=3) if image.labels_data is not None else None, world_normals_data=np.rot90(image.world_normals_data, k=3) if image.world_normals_data is not None else None)
def horizontal_flip(image): return core.image.Image( data=np.fliplr(image.data), metadata=image.metadata.clone( camera_pose=image.camera_pose, labelled_objects=(imeta.LabelledObject( class_names=obj.class_names, bounding_box=(image.metadata.width - obj.bounding_box[0] - obj.bounding_box[2], obj.bounding_box[1], obj.bounding_box[2], obj.bounding_box[3]), label_color=obj.label_color, relative_pose=obj.relative_pose, object_id=obj.object_id) for obj in image.metadata.labelled_objects)), additional_metadata=image.additional_metadata, depth_data=np.fliplr(image.depth_data) if image.depth_data is not None else None, labels_data=np.fliplr(image.labels_data) if image.labels_data is not None else None, world_normals_data=np.fliplr(image.world_normals_data) if image.world_normals_data is not None else None)
def transform_bounding_boxes(labelled_objects, transformation_matrix): """ Transform the labelled objects so that the bounding boxes match after the image transformation. :param labelled_objects: The list of labelled objects from the metadata :param transformation_matrix: The 3x3 affine transformation matrix used to modify the image. :return: A new list of labelled objects, with modified bounding boxes. """ transformed_objects = [] for obj in labelled_objects: upper_left = (obj.bounding_box[0], obj.bounding_box[1], 1) upper_right = (obj.bounding_box[0] + obj.bounding_box[2], obj.bounding_box[1], 1) lower_left = (obj.bounding_box[0], obj.bounding_box[1] + obj.bounding_box[3], 1) lower_right = (obj.bounding_box[0] + obj.bounding_box[2], obj.bounding_box[1] + obj.bounding_box[3], 1) # Project all the corners of the bounding box into the new image # Rounding to the nearest integer to avoid floating point errors (pixels are discrete) upper_left = np.round(np.dot(transformation_matrix, upper_left)) upper_right = np.round(np.dot(transformation_matrix, upper_right)) lower_left = np.round(np.dot(transformation_matrix, lower_left)) lower_right = np.round(np.dot(transformation_matrix, lower_right)) # Wrap an axis-aligned bounding box around the projected box new_upper_left = (min(upper_left[0], upper_right[0], lower_left[0], lower_right[0]), min(upper_left[1], upper_right[1], lower_left[1], lower_right[1])) new_lower_right = (max(upper_left[0], upper_right[0], lower_left[0], lower_right[0]), max(upper_left[1], upper_right[1], lower_left[1], lower_right[1])) transformed_objects.append(imeta.LabelledObject( class_names=obj.class_names, bounding_box=( new_upper_left[0], new_upper_left[1], new_lower_right[0] - new_upper_left[0], new_lower_right[1] - new_upper_left[1] ), label_color=obj.label_color, relative_pose=obj.relative_pose, object_id=obj.object_id )) return transformed_objects
def test_serialize_and_deserialize(self): obj1 = imeta.LabelledObject(class_names=('class_3', ), bounding_box=(148, 468, 82, 241), label_color=(12, 82, 238), relative_pose=tf.Transform( location=(85, -648, -376), rotation=(0.8, -0.64, -0.73)), object_id='LabelledObject-87684') s_obj1 = obj1.serialize() obj2 = imeta.LabelledObject.deserialize(s_obj1) s_obj2 = obj2.serialize() self.assertEqual(obj1, obj2) self.assertEqual(s_obj1, s_obj2) for idx in range(100): # Test that repeated serialization and deserialization does not degrade the information obj2 = imeta.LabelledObject.deserialize(s_obj2) s_obj2 = obj2.serialize() self.assertEqual(obj1, obj2) self.assertEqual(s_obj1, s_obj2)
def setUp(self): self.left_pose = tf.Transform((1, 2, 3), (0.5, 0.5, -0.5, -0.5)) self.right_pose = tf.Transform(location=self.left_pose.find_independent((0, 0, 15)), rotation=self.left_pose.rotation_quat(w_first=False), w_first=False) self.metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=self.left_pose, right_camera_pose=self.right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 17, 22, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 8, 12, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) self.left_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.image = im.StereoImage(left_data=self.left_data, right_data=self.right_data, metadata=self.metadata) self.full_left_pose = tf.Transform((4, 5, 6), (-0.5, 0.5, -0.5, 0.5)) self.full_right_pose = tf.Transform(location=self.left_pose.find_independent((0, 0, 15)), rotation=self.left_pose.rotation_quat(w_first=False), w_first=False) self.full_metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=self.full_left_pose, right_camera_pose=self.full_right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 17, 22, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 8, 12, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) self.full_left_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.full_right_data = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.left_gt_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.right_gt_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.left_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.right_depth = np.asarray(np.random.uniform(0, 255, (32, 32)), dtype='uint8') self.left_labels = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_labels = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.left_normals = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.right_normals = np.asarray(np.random.uniform(0, 255, (32, 32, 3)), dtype='uint8') self.full_image = im.StereoImage( left_data=self.full_left_data, right_data=self.full_right_data, left_depth_data=self.left_depth, right_depth_data=self.right_depth, left_ground_truth_depth_data=self.left_gt_depth, right_ground_truth_depth_data=self.right_gt_depth, left_labels_data=self.left_labels, right_labels_data=self.right_labels, left_world_normals_data=self.left_normals, right_world_normals_data=self.right_normals, metadata=self.full_metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } } )
def make_stereo_image(index=1, **kwargs): kwargs = du.defaults( kwargs, { 'id_': bson.objectid.ObjectId(), 'left_data': np.random.uniform(0, 255, (32, 32, 3)), 'right_data': np.random.uniform(0, 255, (32, 32, 3)), 'metadata': imeta.ImageMetadata( hash_=b'\xf1\x9a\xe2|' + np.random.randint(0, 0xFFFFFFFF).to_bytes(4, 'big'), source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=tf.Transform( location=(1 + 100 * index, 2 + np.random.uniform(-1, 1), 3), rotation=(4, 5, 6, 7 + np.random.uniform(-4, 4))), right_camera_pose=tf.Transform( location=(1 + 100 * index, 12 + np.random.uniform(-1, 1), 3), rotation=(4, 5, 6, 7 + np.random.uniform(-4, 4))), intrinsics=cam_intr.CameraIntrinsics(800, 600, 550.2, 750.2, 400, 300), right_intrinsics=cam_intr.CameraIntrinsics( 800, 600, 550.2, 750.2, 400, 300), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=(imeta.LabelledObject( class_names=('car', ), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject( class_names=('cat', ), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform( (378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090')), average_scene_depth=90.12), 'additional_metadata': { 'Source': 'Generated', 'Resolution': { 'width': 1280, 'height': 720 }, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } }, 'left_depth_data': np.random.uniform(0, 1, (32, 32)), 'right_depth_data': np.random.uniform(0, 1, (32, 32)), 'left_labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'right_labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'left_world_normals_data': np.random.uniform(0, 1, (32, 32, 3)), 'right_world_normals_data': np.random.uniform(0, 1, (32, 32, 3)) }) return ie.StereoImageEntity(**kwargs)
def make_image(**kwargs): """ Make a mock image, randomly :param kwargs: Fixed kwargs to the constructor :return: a new image object """ kwargs = du.defaults( kwargs, { 'id_': bson.objectid.ObjectId(), 'data': np.random.randint(0, 255, (32, 32, 3), dtype='uint8'), 'metadata': imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=tf.Transform(location=np.random.uniform( -1000, 1000, 3), rotation=np.random.uniform(-1, 1, 4)), intrinsics=cam_intr.CameraIntrinsics(800, 600, 782.5, 781.3, 320, 300), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=np.random.uniform(10, 10000), aperture=np.random.uniform(1, 22), simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=np.random.randint(0, 8), normal_maps_enabled=bool(np.random.randint(0, 2)), roughness_enabled=bool(np.random.randint(0, 2)), geometry_decimation=np.random.uniform(0, 1), procedural_generation_seed=np.random.randint(10000), labelled_objects=(imeta.LabelledObject( class_names=('car', ), bounding_box=tuple(np.random.randint(0, 100, 4)), label_color=tuple(np.random.randint(0, 255, 3)), relative_pose=tf.Transform( np.random.uniform(-1000, 1000, 3), np.random.uniform(-1, 1, 4)), object_id='Car-002'), imeta.LabelledObject( class_names=('cat', ), bounding_box=tuple( np.random.randint(0, 100, 4)), label_color=tuple( np.random.randint(0, 255, 4)), relative_pose=tf.Transform( np.random.uniform(-1000, 1000, 3), np.random.uniform(-1, 1, 4)), object_id='cat-090')), average_scene_depth=np.random.uniform(10000)), 'additional_metadata': { 'Source': 'Generated', 'Resolution': { 'width': 32, 'height': 32 }, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } }, 'depth_data': np.random.uniform(0, 1, (32, 32)), 'labels_data': np.random.uniform(0, 1, (32, 32, 3)), 'world_normals_data': np.random.uniform(0, 1, (32, 32, 3)) }) return ie.ImageEntity(**kwargs)
def _make_metadata(self, im_data, depth_data, label_data, camera_pose, right_camera_pose=None): focus_length = self._focus_distance aperture = self._aperture # if self._client is not None: # fov = self._client.request('vget /camera/0/fov') # focus_length = self._client.request('vget /camera/0/focus-distance') # aperture = self._client.request('vget /camera/0/fstop') camera_intrinsics = self.get_camera_intrinsics() labelled_objects = [] if label_data is not None: label_colors = set( tuple(color) for m2d in label_data for color in m2d) for color in label_colors: if color != (0, 0, 0): name = self._client.request( "vget /object/name {0} {1} {2}".format( color[0], color[1], color[2])) class_names = self._client.request( "vget /object/{0}/labels".format(name)) class_names = set(class_names.lower().split(',')) label_points = cv2.findNonZero( np.asarray(np.all(label_data == color, axis=2), dtype='uint8')) # TODO: Other ground-truth bounding boxes could be useful, and are trivial to calculate here # E.g.: Oriented bounding boxes, or fit ellipses. see: # http://docs.opencv.org/2.4/modules/imgproc/doc/structural_analysis_and_shape_descriptors.html labelled_objects.append( imeta.LabelledObject( class_names=class_names, bounding_box=cv2.boundingRect(label_points), label_color=color, relative_pose=self.get_object_pose(name), object_id=name)) return imeta.ImageMetadata( hash_=xxhash.xxh64(im_data).digest(), source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=camera_pose, right_camera_pose=right_camera_pose, intrinsics=camera_intrinsics, right_intrinsics=camera_intrinsics, environment_type=self._environment_type, light_level=self._light_level, time_of_day=self._time_of_day, lens_focal_distance=focus_length, aperture=aperture, simulator=self.identifier, simulation_world=self._world_name, lighting_model=imeta.LightingModel.LIT if self._lit_mode else imeta.LightingModel.UNLIT, texture_mipmap_bias=None, normal_maps_enabled=None, roughness_enabled=None, geometry_decimation=None, procedural_generation_seed=None, labelled_objects=labelled_objects, average_scene_depth=np.mean(depth_data) if depth_data is not None else None)
def import_dataset(labels_path, db_client, **kwargs): """ Import a real-world dataset with labelled images. :param labels_path: :param db_client: :param kwargs: Additional arguments passed to the image metadata :return: """ if os.path.isdir(labels_path): # Look in the given folder for possible labels files candidates = glob.glob(os.path.join(labels_path, '*.txt')) if len(candidates) >= 1: labels_path = candidates[0] else: # Cannot find the labels file, return None return None builder = dataset.image_collection_builder.ImageCollectionBuilder( db_client) builder.set_non_sequential() with open(labels_path, 'r') as labels_file: base_dir = os.path.dirname(labels_path) for line in labels_file: split = re.split('[, ]', line) if len(split) != 6: continue imfile, x1, y1, x2, y2, label = split label = label.rstrip() im = cv2.imread(os.path.join(base_dir, imfile)) im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) focal_length = 1 / (2 * np.tan(np.pi / 4) ) # FOV is 90 degrees which is pi / 2 if im.shape[1] > im.shape[0]: focal_length = focal_length * im.shape[1] else: focal_length = focal_length * im.shape[0] labelled_object = imeta.LabelledObject( class_names=(label.lower(), ), bounding_box=(int(x1), int(y1), int(x2) - int(x1), int(y2) - int(y1)), object_id= 'StarbucksCup_170' # This is so I can refer to it later, matches Unreal name ) image_entity = core.image_entity.ImageEntity( data=im, metadata=imeta.ImageMetadata( hash_=xxhash.xxh64(im).digest(), source_type=imeta.ImageSourceType.REAL_WORLD, intrinsics=cam_intr.CameraIntrinsics(width=im.shape[1], height=im.shape[0], fx=focal_length, fy=focal_length, cx=0.5 * im.shape[1], cy=0.5 * im.shape[0]), camera_pose=tf.Transform(), labelled_objects=(labelled_object, ), **kwargs), additional_metadata=None) builder.add_image(image_entity) return builder.save()
def test_preserves_other_metadata(self): data = np.array([list(range(i, i + 100)) for i in range(100)]) image = core.image.Image( data=data, metadata=imeta.ImageMetadata( hash_=b'\x04\xe2\x1f\x3d$\x7c\x116', source_type=imeta.ImageSourceType.SYNTHETIC, environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, camera_pose=tf.Transform((1, 3, 4), (0.2, 0.8, 0.2, -0.7)), right_camera_pose=tf.Transform((-10, -20, -30), (0.9, -0.7, 0.5, -0.3)), intrinsics=cam_intr.CameraIntrinsics(data.shape[1], data.shape[0], 147.2, 123.3, 420, 215), right_intrinsics=cam_intr.CameraIntrinsics( data.shape[1], data.shape[0], 168.2, 123.3, 420, 251), lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=True, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=[ imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485') ], average_scene_depth=90.12)) result = self.do_augment(image) self.assertEqual(result.metadata.source_type, image.metadata.source_type) self.assertEqual(result.metadata.environment_type, image.metadata.environment_type) self.assertEqual(result.metadata.light_level, image.metadata.light_level) self.assertEqual(result.metadata.time_of_day, image.metadata.time_of_day) self.assertEqual(result.metadata.height, image.metadata.height) self.assertEqual(result.metadata.width, image.metadata.width) self.assertEqual(result.metadata.camera_pose, image.metadata.camera_pose) self.assertEqual(result.metadata.right_camera_pose, image.metadata.right_camera_pose) self.assertEqual(result.metadata.lens_focal_distance, image.metadata.lens_focal_distance) self.assertEqual(result.metadata.aperture, image.metadata.aperture) self.assertEqual(result.metadata.simulation_world, image.metadata.simulation_world) self.assertEqual(result.metadata.lighting_model, image.metadata.lighting_model) self.assertEqual(result.metadata.texture_mipmap_bias, image.metadata.texture_mipmap_bias) self.assertEqual(result.metadata.normal_maps_enabled, image.metadata.normal_maps_enabled) self.assertEqual(result.metadata.roughness_enabled, image.metadata.roughness_enabled) self.assertEqual(result.metadata.geometry_decimation, image.metadata.geometry_decimation) self.assertEqual(result.metadata.procedural_generation_seed, image.metadata.procedural_generation_seed) self.assertEqual(result.metadata.average_scene_depth, image.metadata.average_scene_depth)
def test_clone(self): alt_metadata = { 'hash_': [b'\x1f`\xa8\x8aR\xed\x9f\x0b'], 'source_type': [imeta.ImageSourceType.REAL_WORLD], 'environment_type': [ imeta.EnvironmentType.INDOOR, imeta.EnvironmentType.OUTDOOR_URBAN, imeta.EnvironmentType.OUTDOOR_LANDSCAPE ], 'light_level': [ imeta.LightingLevel.PITCH_BLACK, imeta.LightingLevel.DIM, imeta.LightingLevel.EVENLY_LIT, imeta.LightingLevel.BRIGHT ], 'time_of_day': [ imeta.TimeOfDay.DAWN, imeta.TimeOfDay.MORNING, imeta.TimeOfDay.AFTERNOON, imeta.TimeOfDay.TWILIGHT, imeta.TimeOfDay.NIGHT ], 'camera_pose': [tf.Transform((12, 13, 14), (-0.5, 0.3, 0.8, -0.9))], 'right_camera_pose': [tf.Transform((11, 15, 19), (-0.2, 0.4, 0.6, -0.8))], 'intrinsics': [cam_intr.CameraIntrinsics(900, 910, 894.7, 861.2, 640, 360)], 'right_intrinsics': [cam_intr.CameraIntrinsics(900, 890, 760.45, 405.1, 640, 360)], 'lens_focal_distance': [22], 'aperture': [1.2], 'simulator': [bson.ObjectId()], 'simulation_world': ['TestSimulationWorld2'], 'lighting_model': [imeta.LightingModel.UNLIT], 'texture_mipmap_bias': [2], 'normal_maps_enabled': [False], 'roughness_enabled': [False], 'geometry_decimation': [0.3], 'procedural_generation_seed': [7329], 'average_scene_depth': [102.33], 'base_image': [mock.create_autospec(core.image.Image)], 'transformation_matrix': [np.random.uniform(0, 1, (3, 3))], 'labelled_objects': [ tuple(), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('cat', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 12, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')), (imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 255), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485')) ] } a = self.make_metadata() b = a.clone() self.assert_metadata_equal(a, b) # Change single keys, and make sure it is no longer equal for key, values in alt_metadata.items(): for val in values: b = a.clone(**{key: val}) if key == 'hash_': self.assertEqual(val, b.hash) self.assertNotEqual(a.hash, b.hash) else: self.assertEqual(a.hash, b.hash) if key == 'source_type': self.assertEqual(val, b.source_type) self.assertNotEqual(a.source_type, b.source_type) else: self.assertEqual(a.source_type, b.source_type) if key == 'environment_type': self.assertEqual(val, b.environment_type) self.assertNotEqual(a.environment_type, b.environment_type) else: self.assertEqual(a.environment_type, b.environment_type) if key == 'light_level': self.assertEqual(val, b.light_level) self.assertNotEqual(a.light_level, b.light_level) else: self.assertEqual(a.light_level, b.light_level) if key == 'time_of_day': self.assertEqual(val, b.time_of_day) self.assertNotEqual(a.time_of_day, b.time_of_day) else: self.assertEqual(a.time_of_day, b.time_of_day) if key == 'camera_pose': self.assertEqual(val, b.camera_pose) self.assertNotEqual(a.camera_pose, b.camera_pose) else: self.assertEqual(a.camera_pose, b.camera_pose) if key == 'right_camera_pose': self.assertEqual(val, b.right_camera_pose) self.assertNotEqual(a.right_camera_pose, b.right_camera_pose) else: self.assertEqual(a.right_camera_pose, b.right_camera_pose) if key == 'intrinsics': self.assertEqual(val, b.camera_intrinsics) self.assertNotEqual(a.camera_intrinsics, b.camera_intrinsics) else: self.assertEqual(a.camera_intrinsics, b.camera_intrinsics) self.assertEqual(a.width, b.width) self.assertEqual(a.height, b.height) if key == 'right_intrinsics': self.assertEqual(val, b.right_camera_intrinsics) self.assertNotEqual(a.right_camera_intrinsics, b.right_camera_intrinsics) else: self.assertEqual(a.right_camera_intrinsics, b.right_camera_intrinsics) if key == 'lens_focal_distance': self.assertEqual(val, b.lens_focal_distance) self.assertNotEqual(a.lens_focal_distance, b.lens_focal_distance) else: self.assertEqual(a.lens_focal_distance, b.lens_focal_distance) if key == 'aperture': self.assertEqual(val, b.aperture) self.assertNotEqual(a.aperture, b.aperture) else: self.assertEqual(a.aperture, b.aperture) if key == 'simulation_world': self.assertEqual(val, b.simulation_world) self.assertNotEqual(a.simulation_world, b.simulation_world) else: self.assertEqual(a.simulation_world, b.simulation_world) if key == 'lighting_model': self.assertEqual(val, b.lighting_model) self.assertNotEqual(a.lighting_model, b.lighting_model) else: self.assertEqual(a.lighting_model, b.lighting_model) if key == 'texture_mipmap_bias': self.assertEqual(val, b.texture_mipmap_bias) self.assertNotEqual(a.texture_mipmap_bias, b.texture_mipmap_bias) else: self.assertEqual(a.texture_mipmap_bias, b.texture_mipmap_bias) if key == 'normal_maps_enabled': self.assertEqual(val, b.normal_maps_enabled) self.assertNotEqual(a.normal_maps_enabled, b.normal_maps_enabled) else: self.assertEqual(a.normal_maps_enabled, b.normal_maps_enabled) if key == 'roughness_enabled': self.assertEqual(val, b.roughness_enabled) self.assertNotEqual(a.roughness_enabled, b.roughness_enabled) else: self.assertEqual(a.roughness_enabled, b.roughness_enabled) if key == 'geometry_decimation': self.assertEqual(val, b.geometry_decimation) self.assertNotEqual(a.geometry_decimation, b.geometry_decimation) else: self.assertEqual(a.geometry_decimation, b.geometry_decimation) if key == 'procedural_generation_seed': self.assertEqual(val, b.procedural_generation_seed) self.assertNotEqual(a.procedural_generation_seed, b.procedural_generation_seed) else: self.assertEqual(a.procedural_generation_seed, b.procedural_generation_seed) if key == 'labelled_objects': self.assertEqual(val, b.labelled_objects) self.assertNotEqual(a.labelled_objects, b.labelled_objects) else: self.assertEqual(a.labelled_objects, b.labelled_objects) if key == 'average_scene_depth': self.assertEqual(val, b.average_scene_depth) self.assertNotEqual(a.average_scene_depth, b.average_scene_depth) else: self.assertEqual(a.average_scene_depth, b.average_scene_depth)
def test_make_from_images(self): left_pose = tf.Transform((1, 2, 3), (0.5, 0.5, -0.5, -0.5)) right_pose = tf.Transform(location=left_pose.find_independent((0, 0, 15)), rotation=left_pose.rotation_quat(w_first=False), w_first=False) metadata = imeta.ImageMetadata( hash_=b'\x1f`\xa8\x8aR\xed\x9f\x0b', source_type=imeta.ImageSourceType.SYNTHETIC, camera_pose=left_pose, right_camera_pose=right_pose, intrinsics=cam_intr.CameraIntrinsics(32, 32, 15, 21, 16, 16), right_intrinsics=cam_intr.CameraIntrinsics(32, 32, 13, 7, 16, 16), environment_type=imeta.EnvironmentType.INDOOR_CLOSE, light_level=imeta.LightingLevel.WELL_LIT, time_of_day=imeta.TimeOfDay.DAY, lens_focal_distance=5, aperture=22, simulation_world='TestSimulationWorld', lighting_model=imeta.LightingModel.LIT, texture_mipmap_bias=1, normal_maps_enabled=2, roughness_enabled=True, geometry_decimation=0.8, procedural_generation_seed=16234, labelled_objects=( imeta.LabelledObject(class_names=('car',), bounding_box=(12, 144, 67, 43), label_color=(123, 127, 112), relative_pose=tf.Transform((12, 3, 4), (0.5, 0.1, 1, 1.7)), object_id='Car-002'), imeta.LabelledObject(class_names=('cat',), bounding_box=(125, 244, 117, 67), label_color=(27, 89, 62), relative_pose=tf.Transform((378, -1890, 38), (0.3, 1.12, 1.1, 0.2)), object_id='cat-090') ), average_scene_depth=90.12) left_image = im.Image( data=self.left_data, depth_data=self.left_depth, labels_data=self.left_labels, world_normals_data=self.left_normals, metadata=metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 0, 'RoughnessQuality': True } } ) right_image = im.Image( data=self.right_data, depth_data=self.right_depth, labels_data=self.right_labels, world_normals_data=self.right_normals, metadata=metadata, additional_metadata={ 'Source': 'Generated', 'Resolution': {'width': 1280, 'height': 720}, 'Material Properties': { 'BaseMipMapBias': 1, 'RoughnessQuality': False }, 'skeletons': 'There is already one inside you' } ) stereo_image = im.StereoImage.make_from_images(left_image, right_image) self.assertEqual(stereo_image.additional_metadata, du.defaults(left_image.additional_metadata, right_image.additional_metadata)) self.assertNPEqual(stereo_image.left_camera_location, left_image.camera_location) self.assertNPEqual(stereo_image.left_camera_orientation, left_image.camera_orientation) self.assertNPEqual(stereo_image.left_data, left_image.data) self.assertNPEqual(stereo_image.left_depth_data, left_image.depth_data) self.assertNPEqual(stereo_image.left_labels_data, left_image.labels_data) self.assertNPEqual(stereo_image.left_world_normals_data, left_image.world_normals_data) self.assertNPEqual(stereo_image.right_camera_location, right_image.camera_location) self.assertNPEqual(stereo_image.right_camera_orientation, right_image.camera_orientation) self.assertNPEqual(stereo_image.right_data, right_image.data) self.assertNPEqual(stereo_image.right_depth_data, right_image.depth_data) self.assertNPEqual(stereo_image.right_labels_data, right_image.labels_data) self.assertNPEqual(stereo_image.right_world_normals_data, right_image.world_normals_data)
def make_metadata(self, **kwargs): kwargs = du.defaults( kwargs, { 'hash_': b'\xa5\xc9\x08\xaf$\x0b\x116', 'source_type': imeta.ImageSourceType.SYNTHETIC, 'environment_type': imeta.EnvironmentType.INDOOR_CLOSE, 'light_level': imeta.LightingLevel.WELL_LIT, 'time_of_day': imeta.TimeOfDay.DAY, 'camera_pose': tf.Transform((1, 3, 4), (0.2, 0.8, 0.2, -0.7)), 'right_camera_pose': tf.Transform((-10, -20, -30), (0.9, -0.7, 0.5, -0.3)), 'intrinsics': cam_intr.CameraIntrinsics(700, 700, 654.2, 753.3, 400, 300), 'right_intrinsics': cam_intr.CameraIntrinsics(700, 710, 732.1, 612.3, 400, 300), 'lens_focal_distance': 5, 'aperture': 22, 'simulator': bson.ObjectId('5a14cf0e36ed1e17a55f1e35'), 'simulation_world': 'TestSimulationWorld', 'lighting_model': imeta.LightingModel.LIT, 'texture_mipmap_bias': 1, 'normal_maps_enabled': True, 'roughness_enabled': True, 'geometry_decimation': 0.8, 'procedural_generation_seed': 16234, 'labelled_objects': [ imeta.LabelledObject(class_names=('cup', ), bounding_box=(142, 280, 54, 78), label_color=(2, 227, 34), relative_pose=tf.Transform( location=(-246, 468, 4), rotation=(0.2, 0.3, 0.4)), object_id='LabelledObject-68478'), imeta.LabelledObject(class_names=('car', ), bounding_box=(542, 83, 63, 123), label_color=(26, 12, 212), relative_pose=tf.Transform( location=(61, -717, 161), rotation=(0.7, 0.6, 0.3)), object_id='LabelledObject-8246'), imeta.LabelledObject(class_names=('cow', ), bounding_box=(349, 672, 124, 208), label_color=(162, 134, 163), relative_pose=tf.Transform( location=(286, -465, -165), rotation=(0.9, 0.1, 0.5)), object_id='LabelledObject-56485') ], 'average_scene_depth': 90.12, 'base_image': self.parent_image, 'transformation_matrix': np.array([[0.19882871, 0.58747441, 0.90084303], [0.6955363, 0.48193339, 0.09503605], [0.20549805, 0.6110534, 0.61145574]]) }) return imeta.ImageMetadata(**kwargs)