def test_patch_fails_on_inplace_update_without_overwrite(self): dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((3, 5, 3)), annotations=[Bbox(1, 2, 3, 4, label=1)]), ], categories=['a', 'b']) patch = Dataset.from_iterable([ DatasetItem(id=2, image=np.zeros((3, 4, 3)), annotations=[Bbox(1, 2, 3, 2, label=1)]), ], categories=['b', 'a', 'c']) with TestDir() as test_dir: dataset_url = osp.join(test_dir, 'dataset1') patch_url = osp.join(test_dir, 'dataset2') dataset.export(dataset_url, 'coco', save_images=True) patch.export(patch_url, 'coco', save_images=True) run(self, 'patch', dataset_url + ':coco', patch_url + ':coco', expected_code=1)
def test_mask_to_polygons(self): source = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 10, 3)), annotations=[ Mask( np.array([ [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ]), ), ]), ]) expected = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 10, 3)), annotations=[ Polygon([3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5]), Polygon([5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5]), ]), ]) actual = transforms.MasksToPolygons(source) compare_datasets(self, expected, actual)
def test_can_import_masks(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='1', subset='train', image=np.ones((2, 5, 3)), annotations=[ Mask(group=0, image=np.array([[0, 1, 1, 0, 0], [0, 0, 0, 0, 0]]), attributes={ 'index': 0, 'color': '108 225 132', 'text': 'F', 'center': '0 1' }), Mask(group=1, image=np.array([[0, 0, 0, 1, 0], [0, 0, 0, 1, 0]]), attributes={ 'index': 1, 'color': '82 174 214', 'text': 'T', 'center': '1 3' }), Mask(group=1, image=np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 1]]), attributes={ 'index': 2, 'color': '241 73 144', 'text': 'h', 'center': '1 4' }), ] ), ]) dataset = Dataset.import_from( osp.join(DUMMY_DATASET_DIR, 'text_segmentation'), 'icdar') compare_datasets(self, expected_dataset, dataset)
def test_annotation_comparison(self): a = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Caption('hello'), # unmatched Caption('world', group=5), Label(2, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, group=5), Points([1, 2, 2, 0, 1, 1], label=0, z_order=4), Mask(label=3, z_order=2, image=np.ones((2, 3))), ]), ], categories=['a', 'b', 'c', 'd']) b = Dataset.from_iterable([ DatasetItem(id=1, annotations=[ Caption('world', group=5), Label(2, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, group=5), Bbox(5, 6, 7, 8, group=5), # unmatched Points([1, 2, 2, 0, 1, 1], label=0, z_order=4), Mask(label=3, z_order=2, image=np.ones((2, 3))), ]), ], categories=['a', 'b', 'c', 'd']) comp = ExactComparator() matched, unmatched, _, _, errors = comp.compare_datasets(a, b) self.assertEqual(6, len(matched), matched) self.assertEqual(2, len(unmatched), unmatched) self.assertEqual(0, len(errors), errors)
def test_reindex(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=2, image=np.ones((4, 2, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4], label=0, id=5), ], attributes={'id': 22}) ], categories=[str(i) for i in range(10)]) target_dataset = Dataset.from_iterable( [ DatasetItem(id=2, image=np.ones((4, 2, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4], label=0, id=1, group=1, attributes={'is_crowd': False}), ], attributes={'id': 1}) ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(CocoConverter.convert, reindex=True), test_dir, target_dataset=target_dataset)
def test_boxes_to_masks(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Bbox(0, 0, 3, 3, z_order=1), Bbox(0, 0, 3, 1, z_order=2), Bbox(0, 2, 3, 1, z_order=3), ]), ]) target_dataset = Dataset.from_iterable([ DatasetItem( id=1, image=np.zeros((5, 5, 3)), annotations=[ Mask(np.array( [[1, 1, 1, 0, 0], [1, 1, 1, 0, 0], [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], ), z_order=1), Mask(np.array( [[1, 1, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], ), z_order=2), Mask(np.array( [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], ), z_order=3), ]), ]) actual = transforms.BoxesToMasks(source_dataset) compare_datasets(self, target_dataset, actual)
def test_can_import(self): source_dataset = Dataset.from_iterable( [DatasetItem(id='1', image=np.ones((10, 10, 3)))]) zip_path = osp.join(DUMMY_DATASET_DIR, '1.zip') parsed_dataset = Dataset.import_from(zip_path, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def test_shapes_to_boxes(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Mask(np.array([[0, 0, 1, 1, 1], [0, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 0], [1, 1, 1, 0, 0]], ), id=1), Polygon([1, 1, 4, 1, 4, 4, 1, 4], id=2), PolyLine([1, 1, 2, 1, 2, 2, 1, 2], id=3), Points([2, 2, 4, 2, 4, 4, 2, 4], id=4), ]), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Bbox(0, 0, 4, 4, id=1), Bbox(1, 1, 3, 3, id=2), Bbox(1, 1, 1, 1, id=3), Bbox(2, 2, 2, 2, id=4), ]), ]) actual = transforms.ShapesToBoxes(source_dataset) compare_datasets(self, target_dataset, actual)
def test_can_reiterate_sequence(self): source = Dataset.from_iterable([ DatasetItem('1', subset='a', annotations=[Label(0), Label(1)]), DatasetItem('2', subset='a', annotations=[Label(1)]), DatasetItem('3', subset='a', annotations=[Label(2)]), DatasetItem('4', subset='a', annotations=[Label(1), Label(2)]), DatasetItem('5', subset='b', annotations=[Label(0)]), DatasetItem('6', subset='b', annotations=[Label(0), Label(2)]), DatasetItem('7', subset='b', annotations=[Label(1), Label(2)]), DatasetItem('8', subset='b', annotations=[Label(2)]), ], categories=['a', 'b', 'c']) transformed = LabelRandomSampler(source, count=2) actual1 = Dataset.from_extractors(transformed) actual1.init_cache() actual2 = Dataset.from_extractors(transformed) actual2.init_cache() compare_datasets_strict(self, actual1, actual2)
def test_transform_labels(self): src_dataset = Dataset.from_iterable( [ DatasetItem(id=1, annotations=[ Label(1), Bbox(1, 2, 3, 4, label=2), Bbox(1, 3, 3, 3), Mask(image=np.array([1]), label=3), Polygon([1, 1, 2, 2, 3, 4], label=4), PolyLine([1, 3, 4, 2, 5, 6], label=5) ]) ], categories=['label%s' % i for i in range(6)]) dst_dataset = Dataset.from_iterable([ DatasetItem( id=1, annotations=[Label(1), Label(2), Label(3), Label(4), Label(5)]), ], categories=[ 'label%s' % i for i in range(6) ]) actual = transforms.AnnsToLabels(src_dataset) compare_datasets(self, dst_dataset, actual)
def test_crop_covered_segments(self): source_dataset = Dataset.from_iterable([ DatasetItem( id=1, image=np.zeros((5, 5, 3)), annotations=[ # The mask is partially covered by the polygon Mask(np.array( [[0, 0, 1, 1, 1], [0, 0, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 0, 0], [1, 1, 1, 0, 0]], ), z_order=0), Polygon([1, 1, 4, 1, 4, 4, 1, 4], z_order=1), ]), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 5, 3)), annotations=[ Mask(np.array([[0, 0, 1, 1, 1], [0, 0, 0, 0, 1], [1, 0, 0, 0, 1], [1, 0, 0, 0, 0], [1, 1, 1, 0, 0]], ), z_order=0), Polygon([1, 1, 4, 1, 4, 4, 1, 4], z_order=1), ]), ]) actual = transforms.CropCoveredSegments(source_dataset) compare_datasets(self, target_dataset, actual)
def test_can_convert_polygons_to_mask(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((6, 10, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4], label=3, id=4, group=4), Polygon([5, 0, 9, 0, 5, 5], label=3, id=4, group=4), ] ), ], categories=[str(i) for i in range(10)]) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((6, 10, 3)), annotations=[ Mask(np.array([ [0, 1, 1, 1, 0, 1, 1, 1, 1, 0], [0, 0, 1, 1, 0, 1, 1, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], # only internal fragment (without the border), # but not everywhere... ), attributes={ 'is_crowd': True }, label=3, id=4, group=4), ], attributes={'id': 1} ), ], categories=[str(i) for i in range(10)]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(CocoInstancesConverter.convert, segmentation_mode='mask'), test_dir, target_dataset=target_dataset)
def test_inplace_save_writes_only_updated_data_with_direct_changes(self): expected = Dataset.from_iterable([ DatasetItem(1, subset='a'), DatasetItem(2, subset='a', image=np.ones((3, 2, 3))), DatasetItem(2, subset='b'), ]) with TestDir() as path: # generate initial dataset dataset = Dataset.from_iterable([ # modified subset DatasetItem(1, subset='a'), # unmodified subset DatasetItem(2, subset='b'), # removed subset DatasetItem(3, subset='c', image=np.ones((2, 2, 3))), ]) dataset.save(path, save_images=True) dataset.put(DatasetItem(2, subset='a', image=np.ones((3, 2, 3)))) dataset.remove(3, 'c') dataset.save(save_images=True) self.assertEqual({'a.json', 'b.json'}, set(os.listdir(osp.join(path, 'annotations')))) self.assertEqual({'2.jpg'}, set(os.listdir(osp.join(path, 'images', 'a')))) compare_datasets_strict(self, expected, Dataset.load(path))
def test_can_change_output_labels(self): expected = Dataset.from_iterable([], categories=['a']) source = Dataset.from_iterable([], categories=['a', 'b', 'c']) actual = LabelRandomSampler(source, label_counts={'a': 1, 'b': 0}) compare_datasets(self, expected, actual)
def test_polygons_to_masks(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 10, 3)), annotations=[ Polygon([0, 0, 4, 0, 4, 4]), Polygon([5, 0, 9, 0, 5, 5]), ]), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id=1, image=np.zeros((5, 10, 3)), annotations=[ Mask( np.array([ [0, 0, 0, 0, 0, 1, 1, 1, 1, 0], [0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 1, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ]), ), Mask( np.array([ [0, 1, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], ]), ), ]), ]) actual = transforms.PolygonsToMasks(source_dataset) compare_datasets(self, target_dataset, actual)
def test_reindex_allows_single_annotations(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='abc', annotations=[ Cuboid3d(position=[0.4, -1, 2.24], label=0), ]) ], categories=['dog']) expected_dataset = Dataset.from_iterable([ DatasetItem(id='abc', annotations=[ Cuboid3d(position=[0.4, -1, 2.24], label=0, attributes={ 'track_id': 1, 'occluded': False }), ], attributes={'frame': 0}) ], categories=['dog']) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(KittiRawConverter.convert, reindex=True), test_dir, target_dataset=expected_dataset)
def test_relative_paths(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((4, 2, 3))), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3))), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3))), ]) target_dataset = Dataset.from_iterable([ DatasetItem( id='1', image=np.ones((4, 2, 3)), attributes={'frame': 0}), DatasetItem(id='subdir1/1', image=np.ones((2, 6, 3)), attributes={'frame': 1}), DatasetItem(id='subdir2/1', image=np.ones((5, 4, 3)), attributes={'frame': 2}), ], categories=[]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(CvatConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_images=True)
def test_can_run_equality_diff(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 7, 8, label=2), ]), ], categories=['a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'cmp_result') run(self, 'diff', dataset1_url + ':coco', dataset2_url + ':voc', '-m', 'equality', '-o', result_dir) self.assertEqual({'diff.json'}, set(os.listdir(result_dir)))
def test_class_comparison(self): a = Dataset.from_iterable([], categories=['a', 'b', 'c']) b = Dataset.from_iterable([], categories=['b', 'c']) comp = ExactComparator() _, _, _, _, errors = comp.compare_datasets(a, b) self.assertEqual(1, len(errors), errors)
def test_can_save_and_load_multiple_related_images(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='a/d', annotations=[ Cuboid3d(position=[1, 2, 3], label=0, attributes={'track_id': 1}) ], point_cloud=self.pcd1, related_images=[self.image1, self.image2, self.image3], attributes={'frame': 3}), ], categories=['cat']) with TestDir() as test_dir: target_label_cat = LabelCategories(attributes={'occluded'}) target_label_cat.add('cat') target_dataset = Dataset.from_iterable([ DatasetItem( id='a/d', annotations=[ Cuboid3d(position=[1, 2, 3], label=0, attributes={ 'track_id': 1, 'occluded': False }) ], point_cloud=osp.join(test_dir, 'velodyne_points', 'data', 'a', 'd.pcd'), related_images=[ osp.join(test_dir, 'image_00', 'data', 'a', 'd.png'), osp.join(test_dir, 'image_01', 'data', 'a', 'd.png'), osp.join(test_dir, 'image_02', 'data', 'a', 'd.png'), ], attributes={'frame': 3}), ], categories={ AnnotationType.label: target_label_cat }) self._test_save_and_load(source_dataset, partial(KittiRawConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_point_cloud=True) self.assertTrue( osp.isfile(osp.join(test_dir, 'image_00', 'data', 'a', 'd.png'))) self.assertTrue( osp.isfile(osp.join(test_dir, 'image_01', 'data', 'a', 'd.png'))) self.assertTrue( osp.isfile(osp.join(test_dir, 'image_02', 'data', 'a', 'd.png')))
def test_can_save_and_load_with_pointcloud(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, subset='test', point_cloud='1.pcd', related_images=[ Image(data=np.ones((5, 5, 3)), path='1/a.jpg'), Image(data=np.ones((5, 4, 3)), path='1/b.jpg'), Image(size=(5, 3), path='1/c.jpg'), '1/d.jpg', ], annotations=[ Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], id=1, group=1, label=0, attributes={'x': True}) ]), ], categories=['label']) with TestDir() as test_dir: target_dataset = Dataset.from_iterable([ DatasetItem( id=1, subset='test', point_cloud=osp.join(test_dir, 'point_clouds', 'test', '1.pcd'), related_images=[ Image(data=np.ones((5, 5, 3)), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_0.jpg')), Image(data=np.ones((5, 4, 3)), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_1.jpg')), Image(size=(5, 3), path=osp.join(test_dir, 'related_images', 'test', '1', 'image_2.jpg')), osp.join(test_dir, 'related_images', 'test', '1', 'image_3.jpg'), ], annotations=[ Cuboid3d([2, 2, 2], [1, 1, 1], [3, 3, 1], id=1, group=1, label=0, attributes={'x': True}) ]), ], categories=['label']) self._test_save_and_load(source_dataset, partial(DatumaroConverter.convert, save_images=True), test_dir, target_dataset, compare=None, dimension=Dimensions.dim_3d)
def test_can_import_from_directory(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='1', image=np.ones((10, 10, 3))), DatasetItem(id='2', image=np.ones((5, 10, 3))) ]) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, format='image_zip') compare_datasets(self, source_dataset, parsed_dataset)
def test_inplace_save_writes_only_updated_data_with_transforms(self): with TestDir() as path: expected = Dataset.from_iterable([ DatasetItem(2, subset='test'), DatasetItem(3, subset='train', image=np.ones((2, 2, 3))), DatasetItem(4, subset='train', image=np.ones((2, 3, 3))), DatasetItem(5, subset='test', point_cloud=osp.join(path, 'point_clouds', 'test', '5.pcd'), related_images=[ Image(data=np.ones((3, 4, 3)), path=osp.join(path, 'test', '5', 'image_0.jpg')), osp.join(path, 'test', '5', 'a', '5.png'), ]), ]) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a'), DatasetItem(2, subset='b'), DatasetItem(3, subset='c', image=np.ones((2, 2, 3))), DatasetItem(4, subset='d', image=np.ones((2, 3, 3))), DatasetItem(5, subset='e', point_cloud='5.pcd', related_images=[ np.ones((3, 4, 3)), 'a/5.png', ]), ]) dataset.save(path, save_images=True) dataset.filter('/item[id >= 2]') dataset.transform('random_split', splits=(('train', 0.5), ('test', 0.5)), seed=42) dataset.save(save_images=True) self.assertEqual( {'images', 'annotations', 'point_clouds', 'related_images'}, set(os.listdir(path))) self.assertEqual({'train.json', 'test.json'}, set(os.listdir(osp.join(path, 'annotations')))) self.assertEqual({'3.jpg', '4.jpg'}, set(os.listdir(osp.join(path, 'images', 'train')))) self.assertEqual({'train', 'c', 'd'}, set(os.listdir(osp.join(path, 'images')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'c')))) self.assertEqual(set(), set(os.listdir(osp.join(path, 'images', 'd')))) self.assertEqual( {'image_0.jpg'}, set(os.listdir(osp.join(path, 'related_images', 'test', '5')))) compare_datasets_strict(self, expected, Dataset.load(path))
def test_remap_labels(self): src_dataset = Dataset.from_iterable( [ DatasetItem( id=1, annotations=[ # Should be remapped Label(1), Bbox(1, 2, 3, 4, label=2), Mask(image=np.array([1]), label=3), # Should be kept Polygon([1, 1, 2, 2, 3, 4], label=4), PolyLine([1, 3, 4, 2, 5, 6]) ]) ], categories={ AnnotationType.label: LabelCategories.from_iterable('label%s' % i for i in range(5)), AnnotationType.mask: MaskCategories(colormap=mask_tools.generate_colormap(5)), }) dst_dataset = Dataset.from_iterable( [ DatasetItem(id=1, annotations=[ Label(1), Bbox(1, 2, 3, 4, label=0), Mask(image=np.array([1]), label=1), Polygon([1, 1, 2, 2, 3, 4], label=2), PolyLine([1, 3, 4, 2, 5, 6], label=None) ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['label0', 'label9', 'label4']), AnnotationType.mask: MaskCategories( colormap={ k: v for k, v in mask_tools.generate_colormap(5).items() if k in {0, 1, 3, 4} }) }) actual = transforms.RemapLabels(src_dataset, mapping={ 'label1': 'label9', 'label2': 'label0', 'label3': 'label9', }, default='keep') compare_datasets(self, dst_dataset, actual)
def test_image_comparison(self): a = Dataset.from_iterable([ DatasetItem(id=11, image=np.ones((5, 4, 3)), annotations=[ Bbox(5, 6, 7, 8), ]), DatasetItem(id=12, image=np.ones((5, 4, 3)), annotations=[ Bbox(1, 2, 3, 4), Bbox(5, 6, 7, 8), ]), DatasetItem(id=13, image=np.ones((5, 4, 3)), annotations=[ Bbox(9, 10, 11, 12), # mismatch ]), DatasetItem(id=14, image=np.zeros((5, 4, 3)), annotations=[ Bbox(1, 2, 3, 4), Bbox(5, 6, 7, 8), ], attributes={ 'a': 1 }), DatasetItem(id=15, image=np.zeros((5, 5, 3)), annotations=[ Bbox(1, 2, 3, 4), Bbox(5, 6, 7, 8), ]), ], categories=['a', 'b', 'c', 'd']) b = Dataset.from_iterable([ DatasetItem(id=21, image=np.ones((5, 4, 3)), annotations=[ Bbox(5, 6, 7, 8), ]), DatasetItem(id=22, image=np.ones((5, 4, 3)), annotations=[ Bbox(1, 2, 3, 4), Bbox(5, 6, 7, 8), ]), DatasetItem(id=23, image=np.ones((5, 4, 3)), annotations=[ Bbox(10, 10, 11, 12), # mismatch ]), DatasetItem(id=24, image=np.zeros((5, 4, 3)), annotations=[ Bbox(6, 6, 7, 8), # 1 ann missing, mismatch ], attributes={ 'a': 2 }), DatasetItem(id=25, image=np.zeros((4, 4, 3)), annotations=[ Bbox(6, 6, 7, 8), ]), ], categories=['a', 'b', 'c', 'd']) comp = ExactComparator(match_images=True) matched_ann, unmatched_ann, a_unmatched, b_unmatched, errors = \ comp.compare_datasets(a, b) self.assertEqual(3, len(matched_ann), matched_ann) self.assertEqual(5, len(unmatched_ann), unmatched_ann) self.assertEqual(1, len(a_unmatched), a_unmatched) self.assertEqual(1, len(b_unmatched), b_unmatched) self.assertEqual(1, len(errors), errors)
def test_split_for_reidentification_gives_error(self): query = 0.4 / 0.7 # valid query ratio with self.subTest("no label"): source = Dataset.from_iterable([ DatasetItem(1, annotations=[]), DatasetItem(2, annotations=[]), ], categories=["a", "b", "c"]) with self.assertRaisesRegex(Exception, "exactly one is expected"): splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)] actual = splitter.ReidentificationSplit(source, splits, query) len(actual.get_subset("train")) with self.subTest(msg="multi label"): source = Dataset.from_iterable([ DatasetItem(1, annotations=[Label(0), Label(1)]), DatasetItem(2, annotations=[Label(0), Label(2)]), ], categories=["a", "b", "c"]) with self.assertRaisesRegex(Exception, "exactly one is expected"): splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)] actual = splitter.ReidentificationSplit(source, splits, query) len(actual.get_subset("train")) counts = {i: (i % 3 + 1) * 7 for i in range(10)} config = {"person": {"attrs": ["PID"], "counts": counts}} source = self._generate_dataset(config) with self.subTest("wrong ratio"): with self.assertRaisesRegex(Exception, "in the range"): splits = [("train", -0.5), ("val", 0.2), ("test", 0.3)] splitter.ReidentificationSplit(source, splits, query) with self.assertRaisesRegex(Exception, "Sum of ratios"): splits = [("train", 0.6), ("val", 0.2), ("test", 0.3)] splitter.ReidentificationSplit(source, splits, query) with self.assertRaisesRegex(Exception, "in the range"): splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)] actual = splitter.ReidentificationSplit(source, splits, -query) with self.subTest("wrong subset name"): with self.assertRaisesRegex(Exception, "Subset name"): splits = [("_train", 0.5), ("val", 0.2), ("test", 0.3)] splitter.ReidentificationSplit(source, splits, query) with self.subTest("wrong attribute name for person id"): splits = [("train", 0.5), ("val", 0.2), ("test", 0.3)] actual = splitter.ReidentificationSplit(source, splits, query) with self.assertRaisesRegex(Exception, "Unknown subset"): actual.get_subset("test")
def test_id_from_image(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, image='path.jpg'), DatasetItem(id=2), ]) target_dataset = Dataset.from_iterable([ DatasetItem(id='path', image='path.jpg'), DatasetItem(id=2), ]) actual = transforms.IdFromImageName(source_dataset) compare_datasets(self, target_dataset, actual)
def test_can_match_items(self): # items 1 and 3 are unique, item 2 is common and should be merged source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), DatasetItem(2, annotations=[ Label(0), ]), ], categories=['a', 'b']) source1 = Dataset.from_iterable([ DatasetItem(2, annotations=[ Label(1), ]), DatasetItem(3, annotations=[ Label(0), ]), ], categories=['a', 'b']) source2 = Dataset.from_iterable([ DatasetItem(2, annotations=[ Label(0), Bbox(1, 2, 3, 4) ]), ], categories=['a', 'b']) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0, attributes={'score': 1/3}), ]), DatasetItem(2, annotations=[ Label(0, attributes={'score': 2/3}), Label(1, attributes={'score': 1/3}), Bbox(1, 2, 3, 4, attributes={'score': 1.0}), ]), DatasetItem(3, annotations=[ Label(0, attributes={'score': 1/3}), ]), ], categories=['a', 'b']) merger = IntersectMerge() merged = merger([source0, source1, source2]) compare_datasets(self, expected, merged) self.assertEqual( [ NoMatchingItemError(item_id=('1', ''), sources={1, 2}), NoMatchingItemError(item_id=('3', ''), sources={0, 2}), ], sorted((e for e in merger.errors if isinstance(e, NoMatchingItemError)), key=lambda e: e.item_id) ) self.assertEqual( [ NoMatchingAnnError(item_id=('2', ''), sources={0, 1}, ann=source2.get('2').annotations[1]), ], sorted((e for e in merger.errors if isinstance(e, NoMatchingAnnError)), key=lambda e: e.item_id) )
def test_reindex_frames(self): source_dataset = Dataset.from_iterable([DatasetItem(id='abc')], categories=[]) expected_dataset = Dataset.from_iterable( [DatasetItem(id='abc', attributes={'frame': 0})], categories=[]) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(KittiRawConverter.convert, reindex=True), test_dir, target_dataset=expected_dataset)
def test_can_save_in_another_format(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2), Bbox(5, 6, 2, 3, label=3), Bbox(1, 2, 3, 3, label=1), ]), ], categories=['background', 'a', 'b', 'c']) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-f', 'yolo', dataset2_url + ':voc', dataset1_url + ':coco', '--', '--save-images') compare_datasets(self, expected, Dataset.import_from(result_dir, 'yolo'), require_images=True)