Example #1
0
    def __init__(self, url):
        super().__init__()

        assert osp.isdir(url)

        items = []
        for name in os.listdir(url):
            path = osp.join(url, name)
            if self._is_image(path):
                item_id = osp.splitext(name)[0]
                item = DatasetItem(id=item_id, image=path)
                items.append((item.id, item))

        items = sorted(items, key=lambda e: e[0])
        items = OrderedDict(items)
        self._items = items

        self._subsets = None
Example #2
0
 def __iter__(self):
     yield DatasetItem(
         id=1,
         annotations=[
             # drop non voc label
             Bbox(2,
                  3,
                  4,
                  5,
                  label=self._label('cat'),
                  id=1,
                  group=1,
                  attributes={
                      'truncated': False,
                      'difficult': False,
                      'occluded': False,
                  }),
         ])
Example #3
0
    def test_can_split_and_load(self, fxt_sample_video):
        test_dir = scope_add(TestDir())
        on_exit_do(MediaManager.get_instance().clear)

        expected = Dataset.from_iterable([
            DatasetItem('frame_%06d' % i, image=np.ones((4, 6, 3)) * i)
            for i in range(4)
        ])

        dataset = Dataset.import_from(fxt_sample_video,
                                      'video_frames',
                                      start_frame=0,
                                      end_frame=4,
                                      name_pattern='frame_%06d')
        dataset.export(format='image_dir', save_dir=test_dir, image_ext='.jpg')

        actual = Dataset.import_from(test_dir, 'image_dir')
        compare_datasets(TestCase(), expected, actual)
Example #4
0
    def test_can_detect_and_import(self):
        env = Environment()
        env.importers.items = {DEFAULT_FORMAT: env.importers[DEFAULT_FORMAT]}
        env.extractors.items = {DEFAULT_FORMAT: env.extractors[DEFAULT_FORMAT]}

        source_dataset = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[Label(2)]),
        ],
                                               categories=['a', 'b', 'c'])

        with TestDir() as test_dir:
            source_dataset.save(test_dir)

            imported_dataset = Dataset.import_from(test_dir, env=env)

            self.assertEqual(imported_dataset.data_path, test_dir)
            self.assertEqual(imported_dataset.format, DEFAULT_FORMAT)
            compare_datasets(self, source_dataset, imported_dataset)
Example #5
0
    def test_can_do_lazy_get_on_updated_item(self):
        iter_called = False

        class TestExtractor(Extractor):
            def __iter__(self):
                nonlocal iter_called
                iter_called = True
                return iter([
                    DatasetItem(1),
                    DatasetItem(2),
                ])

        dataset = Dataset.from_extractors(TestExtractor())

        dataset.put(DatasetItem(2))

        self.assertTrue((2, '') in dataset)
        self.assertFalse(iter_called)
    def test_can_save_dataset_with_no_subsets(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='0001_c2s3_000001_00',
                        image=np.ones((2, 5, 3)),
                        attributes={
                            'camera_id': 1,
                            'person_id': 1,
                            'query': True
                        }),
        ])

        with TestDir() as test_dir:
            Market1501Converter.convert(source_dataset,
                                        test_dir,
                                        save_images=True)
            parsed_dataset = Dataset.import_from(test_dir, 'market1501')

            compare_datasets(self, source_dataset, parsed_dataset)
Example #7
0
 def __iter__(self):
     return iter([
         DatasetItem(id=1,
                     image=np.zeros((5, 5, 3)),
                     annotations=[
                         Mask(np.array(
                             [[0, 0, 1, 1, 1], [0, 0, 1, 1, 1],
                              [1, 1, 0, 1, 1], [1, 1, 1, 0, 0],
                              [1, 1, 1, 0, 0]], ),
                              label=2,
                              id=1,
                              z_order=0),
                         Polygon([1, 1, 4, 1, 4, 4, 1, 4],
                                 label=1,
                                 id=2,
                                 z_order=1),
                     ]),
     ])
Example #8
0
    def _get(self, item_id, subset_name):
        subset = self._subsets[subset_name]
        item = subset.items[item_id]

        if isinstance(item, str):
            image_size = self._image_info.get(item_id)
            image = Image(path=osp.join(self._path, item), size=image_size)

            anno_path = osp.splitext(image.path)[0] + '.txt'
            annotations = self._parse_annotations(anno_path, image)

            item = DatasetItem(id=item_id,
                               subset=subset_name,
                               image=image,
                               annotations=annotations)
            subset.items[item_id] = item

        return item
Example #9
0
    def test_can_save_dataset_with_no_subsets(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='a/b/c', image=np.zeros((8, 4, 3)),
                annotations=[Label(1)]
            ),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(
                'label_' + str(label) for label in range(10)),
        })

        with TestDir() as test_dir:
            ImagenetTxtConverter.convert(source_dataset, test_dir,
                save_images=True)

            parsed_dataset = Dataset.import_from(test_dir, 'imagenet_txt')

            compare_datasets(self, source_dataset, parsed_dataset,
                require_images=True)
Example #10
0
 def __iter__(self):
     return iter([
         DatasetItem(
             id=1,
             image=np.zeros((5, 10, 3)),
             annotations=[
                 Polygon([3.0, 2.5, 1.0, 0.0, 3.5, 0.0, 3.0, 2.5],
                         label=3,
                         id=4,
                         group=4,
                         attributes={'is_crowd': False}),
                 Polygon([5.0, 3.5, 4.5, 0.0, 8.0, 0.0, 5.0, 3.5],
                         label=3,
                         id=4,
                         group=4,
                         attributes={'is_crowd': False}),
             ]),
     ])
    def test_can_save_and_load_bboxes_with_no_save_images(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id=3,
                        subset='train',
                        image=np.ones((10, 15, 3)),
                        annotations=[
                            Polygon([2, 2, 8, 3, 7, 10, 2, 9],
                                    attributes={'text': 'word_2'}),
                            Bbox(0, 2, 5, 9, attributes={'text': 'word_3'}),
                        ]),
        ])

        with TestDir() as test_dir:
            self._test_save_and_load(
                expected_dataset,
                partial(IcdarTextLocalizationConverter.convert,
                        save_images=False), test_dir,
                'icdar_text_localization')
Example #12
0
    def test_background_masks_dont_introduce_instances_but_cover_others(self):
        dataset = Dataset.from_iterable([
            DatasetItem(1, image=np.zeros((4, 1, 1)), annotations=[
                Mask([1, 1, 1, 1], label=1, attributes={'z_order': 1}),
                Mask([0, 0, 1, 1], label=2, attributes={'z_order': 2}),
                Mask([0, 0, 1, 1], label=0, attributes={'z_order': 3}),
            ])
        ], categories=['background', 'a', 'b'])

        with TestDir() as test_dir:
            VocConverter.convert(dataset, test_dir, apply_colormap=False)

            cls_mask = load_mask(
                osp.join(test_dir, 'SegmentationClass', '1.png'))
            inst_mask = load_mask(
                osp.join(test_dir, 'SegmentationObject', '1.png'))
            self.assertTrue(np.array_equal([0, 1], np.unique(cls_mask)))
            self.assertTrue(np.array_equal([0, 1], np.unique(inst_mask)))
Example #13
0
    def patch(cls, dataset, patch, save_dir, **options):
        converter = cls(dataset, save_dir, **options)
        annotation_writer = _AnnotationWriter(save_dir)
        converter._save(annotation_writer)
        annotation_writer.remove_unwritten()

        images_dir = osp.join(save_dir, OpenImagesPath.IMAGES_DIR)
        for (item_id, subset), status in patch.updated_items.items():
            if status != ItemStatus.removed:
                continue

            item = DatasetItem(item_id, subset=subset)

            image_path = osp.join(images_dir,
                converter._make_image_filename(item, subdir=subset))

            if osp.isfile(image_path):
                os.unlink(image_path)
Example #14
0
    def test_can_save_and_load_images(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id=1, subset='train', attributes={'id': 1}),
            DatasetItem(id=2, subset='train', attributes={'id': 2}),
            DatasetItem(id=2, subset='val', attributes={'id': 2}),
            DatasetItem(id=3, subset='val', attributes={'id': 3}),
            DatasetItem(id=4, subset='val', attributes={'id': 4}),
            DatasetItem(id=5, subset='test', attributes={'id': 1}),
        ])

        with TestDir() as test_dir:
            self._test_save_and_load(expected_dataset,
                                     CocoImageInfoConverter.convert, test_dir)
Example #15
0
    def test_can_import_points(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id=1, subset='train',
                image=Image(path='1.jpg', size=(5, 5)),
                annotations=[
                    Points([0, 0, 0, 2, 4, 1], [0, 1, 2],
                        label=1, group=1, id=1,
                        attributes={'is_crowd': False}),
                    Polygon([0, 0, 4, 0, 4, 4],
                        label=1, group=1, id=1,
                        attributes={'is_crowd': False}),

                    Points([1, 2, 3, 4, 2, 3],
                        group=2, id=2,
                        attributes={'is_crowd': False}),
                    Bbox(1, 2, 2, 2,
                        group=2, id=2,
                        attributes={'is_crowd': False}),

                    Points([1, 2, 0, 2, 4, 1],
                        label=0, group=3, id=3,
                        attributes={'is_crowd': False}),
                    Bbox(0, 1, 4, 1,
                        label=0, group=3, id=3,
                        attributes={'is_crowd': False}),

                    Points([0, 0, 1, 2, 3, 4], [0, 1, 2],
                        group=5, id=5,
                        attributes={'is_crowd': False}),
                    Bbox(1, 2, 2, 2,
                        group=5, id=5,
                        attributes={'is_crowd': False}),
                ], attributes={'id': 1}),
            ], categories={
                AnnotationType.label: LabelCategories.from_iterable(['a', 'b']),
                AnnotationType.points: PointsCategories.from_iterable(
                    (i, None, [[0, 1], [1, 2]]) for i in range(2)
                ),
            })

        dataset = Dataset.import_from(
            osp.join(DUMMY_DATASET_DIR, 'coco_person_keypoints'), 'coco')

        compare_datasets(self, expected_dataset, dataset)
Example #16
0
    def test_dataset(self):
        label_categories = LabelCategories()
        for i in range(5):
            label_categories.add('cat' + str(i))

        mask_categories = MaskCategories(
            generate_colormap(len(label_categories.items)))

        points_categories = PointsCategories()
        for index, _ in enumerate(label_categories.items):
            points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])

        return Dataset.from_iterable([
            DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
                annotations=[
                    Caption('hello', id=1),
                    Caption('world', id=2, group=5),
                    Label(2, id=3, attributes={
                        'x': 1,
                        'y': '2',
                    }),
                    Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={
                        'score': 1.0,
                    }),
                    Bbox(5, 6, 7, 8, id=5, group=5),
                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
                ]),
            DatasetItem(id=21, subset='train',
                annotations=[
                    Caption('test'),
                    Label(2),
                    Bbox(1, 2, 3, 4, label=5, id=42, group=42)
                ]),

            DatasetItem(id=2, subset='val',
                annotations=[
                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
                    Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
                ]),

            DatasetItem(id=42, subset='test',
                attributes={'a1': 5, 'a2': '42'}),

            DatasetItem(id=42),
            DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
        ], categories={
            AnnotationType.label: label_categories,
            AnnotationType.mask: mask_categories,
            AnnotationType.points: points_categories,
        })
Example #17
0
    def test_no_label_diff_with_same_item(self):
        detections = 3
        anns = [
            Label(i, attributes={'score': (1.0 + i) / detections}) \
                for i in range(detections)
        ]
        item = DatasetItem(id=1, annotations=anns)

        conf_thresh = 0.5
        comp = Comparator(conf_threshold=conf_thresh)

        result = comp.compare_item_labels(item, item)

        matches, a_greater, b_greater = result
        self.assertEqual(0, len(a_greater))
        self.assertEqual(0, len(b_greater))
        self.assertEqual(len([it for it in item.annotations \
                if conf_thresh < it.attributes['score']]),
            len(matches))
Example #18
0
 def __iter__(self):
     return iter([
         DatasetItem(id='a/b/1', subset='a', annotations=[
             Bbox(2, 3, 4, 5, label=2,
                 attributes={
                     'truncated': True,
                     VOC.VocAction(1).name: True,
                     VOC.VocAction(2).name: True,
                 }
             ),
             Bbox(5, 4, 3, 2, label=self._label('person'),
                 attributes={
                     'truncated': True,
                     VOC.VocAction(1).name: True,
                     VOC.VocAction(2).name: True,
                 }
             ),
         ]),
     ])
Example #19
0
    def _load_items(self, parsed):
        for frame_id, item_desc in parsed.items():
            filename = item_desc.get('name')
            if filename:
                filename = self._find_image(filename)
            if not filename:
                filename = item_desc.get('name')
            image_size = (item_desc.get('height'), item_desc.get('width'))
            if all(image_size):
                image_size = (int(image_size[0]), int(image_size[1]))
            else:
                image_size = None
            image = None
            if filename:
                image = Image(path=filename, size=image_size)

            parsed[frame_id] = DatasetItem(id=frame_id, subset=self._subset,
                image=image, annotations=item_desc.get('annotations'))
        return parsed
Example #20
0
 def __iter__(self):
     return iter([
         DatasetItem(id=1,
                     image=np.ones((10, 5, 3)),
                     subset='val',
                     annotations=[
                         Polygon([0, 0, 1, 0, 1, 2, 0, 2],
                                 label=0,
                                 id=1,
                                 group=1,
                                 attributes={'is_crowd': False}),
                         Mask(np.array([[1, 0, 0, 1, 0]] * 5 +
                                       [[1, 1, 1, 1, 0]] * 5),
                              label=0,
                              id=2,
                              group=2,
                              attributes={'is_crowd': True}),
                     ]),
     ])
Example #21
0
    def _generate_classification_dataset(self,
                                         config,
                                         subset=None,
                                         empty_scores=False,
                                         out_range=False,
                                         no_attr=False,
                                         no_img=False):
        probs = self._get_probs(out_range)
        if subset is None:
            self.subset = ["train", "val", "test"]
        else:
            self.subset = subset

        iterable = []
        label_cat = LabelCategories()
        idx = 0
        for label_id, label in enumerate(config.keys()):
            num_item = config[label]
            label_cat.add(label, attributes=None)
            for _ in range(num_item):
                scores = probs[idx]
                idx += 1
                if empty_scores:
                    scores = []
                attr = {"scores": scores}
                if no_attr:
                    attr = {}
                img = Image(path=f"test/dataset/{idx}.jpg", size=(90, 90))
                if no_img:
                    img = None
                iterable.append(
                    DatasetItem(
                        idx,
                        subset=self.subset[idx % len(self.subset)],
                        annotations=[Label(
                            label_id,
                            attributes=attr,
                        )],
                        image=img,
                    ))
        categories = {AnnotationType.label: label_cat}
        dataset = Dataset.from_iterable(iterable, categories)
        return dataset
Example #22
0
    def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
        source = Dataset.from_iterable([
            DatasetItem(id='кириллица с пробелом',
                        subset='a',
                        image=np.ones((5, 1)),
                        annotations=[
                            Mask(np.array([[1, 0, 0, 0, 0]]),
                                 label=0,
                                 attributes={'track_id': 2}),
                        ]),
        ],
                                       categories=['a'])

        with TestDir() as test_dir:
            self._test_save_and_load(source,
                                     partial(MotsPngConverter.convert,
                                             save_images=True),
                                     test_dir,
                                     require_images=True)
Example #23
0
 def __iter__(self):
     return iter([
         DatasetItem(id=1, image=np.zeros((6, 10, 3)),
             annotations=[
                 Mask(np.array([
                         [0, 1, 1, 1, 0, 1, 1, 1, 1, 0],
                         [0, 0, 1, 1, 0, 1, 1, 1, 0, 0],
                         [0, 0, 0, 1, 0, 1, 1, 0, 0, 0],
                         [0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
                         # only internal fragment (without the border),
                         # but not everywhere...
                     ),
                     attributes={ 'is_crowd': True },
                     label=3, id=4, group=4),
             ]
         ),
     ])
Example #24
0
    def test_mean_std(self):
        expected_mean = [100, 50, 150]
        expected_std = [20, 50, 10]

        dataset = Dataset.from_iterable([
            DatasetItem(id=i, image=np.random.normal(
                expected_mean, expected_std, size=(h, w, 3))
            )
            for i, (w, h) in enumerate([
                (3000, 100), (800, 600), (400, 200), (700, 300)
            ])
        ])

        actual_mean, actual_std = mean_std(dataset)

        for em, am in zip(expected_mean, actual_mean):
            self.assertAlmostEqual(em, am, places=0)
        for estd, astd in zip(expected_std, actual_std):
            self.assertAlmostEqual(estd, astd, places=0)
Example #25
0
 def __iter__(self):
     return iter([
         DatasetItem(
             id='2007_000001',
             subset='train',
             annotations=[
                 Bbox(
                     1,
                     2,
                     2,
                     2,
                     label=self._label('cat'),
                     attributes={
                         'pose': VOC.VocPose(1).name,
                         'truncated': True,
                         'difficult': False,
                         'occluded': False,
                     },
                     id=1,
                     group=1,
                 ),
                 Bbox(
                     4,
                     5,
                     2,
                     2,
                     label=self._label('person'),
                     attributes={
                         'truncated': False,
                         'difficult': False,
                         'occluded': False,
                         **{
                             a.name: a.value % 2 == 1
                             for a in VOC.VocAction
                         }
                     },
                     id=2,
                     group=2,
                     # TODO: Actions and group should be excluded
                     # as soon as correct merge is implemented
                 ),
             ]),
     ])
Example #26
0
    def test_random_split_gives_error_on_wrong_ratios(self):
        source_dataset = Dataset.from_iterable([DatasetItem(id=1)])

        with self.assertRaises(Exception):
            transforms.RandomSplit(source_dataset,
                                   splits=[
                                       ('train', 0.5),
                                       ('test', 0.7),
                                   ])

        with self.assertRaises(Exception):
            transforms.RandomSplit(source_dataset, splits=[])

        with self.assertRaises(Exception):
            transforms.RandomSplit(source_dataset,
                                   splits=[
                                       ('train', -0.5),
                                       ('test', 1.5),
                                   ])
    def test_can_save_dataset_with_cyrillic_and_spaces_in_filename(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='кириллица с пробелом', subset='train',
                image=np.ones((8, 8, 3)),
                annotations=[
                    Bbox(0, 2, 4, 2, label=2),
                    Bbox(0, 1, 2, 3, label=4),
                ]),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(
                'label_' + str(i) for i in range(10)),
        })

        with TestDir() as test_dir:
            YoloConverter.convert(source_dataset, test_dir, save_images=True)
            parsed_dataset = Dataset.import_from(test_dir, 'yolo')

            compare_datasets(self, source_dataset, parsed_dataset,
                require_images=True)
Example #28
0
 def __iter__(self):
     return iter([
         DatasetItem(
             id='a/b/1',
             subset='a',
             annotations=[
                 Bbox(
                     2,
                     3,
                     4,
                     5,
                     label=2,
                     id=1,
                     group=1,
                     attributes={
                         'truncated': True,
                         'difficult': False,
                         'occluded': False,
                         # no attributes here in the label categories
                     }),
                 Bbox(
                     5,
                     4,
                     3,
                     2,
                     label=self._label('person'),
                     id=2,
                     group=2,
                     attributes={
                         'truncated': True,
                         'difficult': False,
                         'occluded': False,
                         VOC.VocAction(1).name: True,
                         VOC.VocAction(2).name: True,
                         **{
                             a.name: False
                             for a in VOC.VocAction if a.value not in {
                                 1, 2
                             }
                         }
                     }),
             ]),
     ])
Example #29
0
 def __iter__(self):
     return iter([
         DatasetItem(
             id=1,
             subset='a',
             annotations=[
                 # overlapping masks, the first should be truncated
                 # the second and third are different instances
                 Mask(image=np.array([[0, 0, 0, 1, 0]]),
                      label=3,
                      z_order=3),
                 Mask(image=np.array([[0, 1, 1, 1, 0]]),
                      label=4,
                      z_order=1),
                 Mask(image=np.array([[1, 1, 0, 0, 0]]),
                      label=3,
                      z_order=2),
             ]),
     ])
Example #30
0
    def _load_items(self, path):
        items = {}

        labels = self._categories[AnnotationType.label]._indices
        labels = { labels[label_name]: label_name
            for label_name in labels }

        with open(path, encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                objects = line.split('\"')
                if 1 < len(objects):
                    if len(objects) == 5:
                        objects[0] = objects[1]
                        objects[1] = objects[3]
                    else:
                        raise Exception("Line %s: unexpected number "
                            "of quotes in filename" % line)
                else:
                    objects = line.split()
                image = objects[0]
                item_id = osp.splitext(osp.join(*image.split('/')[2:]))[0]
                image_path = osp.join(self._dataset_dir, image.lstrip('/'))

                item_annotations = []
                if 1 < len(objects):
                    gt = objects[1]
                    gt_path = osp.join(self._dataset_dir, gt.lstrip('/'))
                    mask = lazy_mask(gt_path,
                        self._categories[AnnotationType.mask].inverse_colormap)
                    mask = mask() # loading mask through cache

                    classes = np.unique(mask)
                    for label_id in classes:
                        if labels[label_id] in self._labels:
                            image = self._lazy_extract_mask(mask, label_id)
                            item_annotations.append(
                                Mask(image=image, label=label_id))

                items[item_id] = DatasetItem(id=item_id, subset=self._subset,
                    image=image_path, annotations=item_annotations)

        return items