Ejemplo n.º 1
0
    def __init__(self, extractor: IExtractor,
                 dst_labels: Union[Iterable[str], LabelCategories]):
        super().__init__(extractor)

        self._categories = {}

        src_categories = self._extractor.categories()

        src_label_cat = src_categories.get(AnnotationType.label)

        if isinstance(dst_labels, LabelCategories):
            dst_label_cat = deepcopy(dst_labels)
        else:
            dst_labels = list(dst_labels)

            if src_label_cat:
                dst_label_cat = LabelCategories(
                    attributes=deepcopy(src_label_cat.attributes))

                for dst_label in dst_labels:
                    assert isinstance(dst_label, str)
                    src_label = src_label_cat.find(dst_label)[1]
                    if src_label is not None:
                        dst_label_cat.add(dst_label, src_label.parent,
                                          deepcopy(src_label.attributes))
                    else:
                        dst_label_cat.add(dst_label)
            else:
                dst_label_cat = LabelCategories.from_iterable(dst_labels)

        for label in dst_label_cat:
            if label.parent not in dst_label_cat:
                label.parent = ''
        self._categories[AnnotationType.label] = dst_label_cat

        self._make_label_id_map(src_label_cat, dst_label_cat)

        src_mask_cat = src_categories.get(AnnotationType.mask)
        if src_mask_cat is not None:
            assert src_label_cat is not None
            dst_mask_cat = MaskCategories(
                attributes=deepcopy(src_mask_cat.attributes))
            for old_id, old_color in src_mask_cat.colormap.items():
                new_id = self._map_id(old_id)
                if new_id is not None and new_id not in dst_mask_cat:
                    dst_mask_cat.colormap[new_id] = deepcopy(old_color)

            # Generate new colors for new labels, keep old untouched
            existing_colors = set(dst_mask_cat.colormap.values())
            color_bank = iter(
                mask_tools.generate_colormap(
                    len(dst_label_cat), include_background=False).values())
            for new_id, new_label in enumerate(dst_label_cat):
                if new_label.name in src_label_cat:
                    continue
                if new_id in dst_mask_cat:
                    continue

                color = next(color_bank)
                while color in existing_colors:
                    color = next(color_bank)

                dst_mask_cat.colormap[new_id] = color

            self._categories[AnnotationType.mask] = dst_mask_cat

        src_point_cat = src_categories.get(AnnotationType.points)
        if src_point_cat is not None:
            assert src_label_cat is not None
            dst_point_cat = PointsCategories(
                attributes=deepcopy(src_point_cat.attributes))
            for old_id, old_cat in src_point_cat.items.items():
                new_id = self._map_id(old_id)
                if new_id is not None and new_id not in dst_point_cat:
                    dst_point_cat.items[new_id] = deepcopy(old_cat)

            self._categories[AnnotationType.points] = dst_point_cat
Ejemplo n.º 2
0
    def test_can_compare_projects(self): # just a smoke test
        label_categories1 = LabelCategories.from_iterable(['x', 'a', 'b', 'y'])
        mask_categories1 = MaskCategories.generate(len(label_categories1))

        point_categories1 = PointsCategories()
        for index, _ in enumerate(label_categories1.items):
            point_categories1.add(index, ['cat1', 'cat2'], joints=[[0, 1]])

        dataset1 = Dataset.from_iterable([
            DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
                annotations=[
                    Caption('hello', id=1),
                    Caption('world', id=2, group=5),
                    Label(2, id=3, attributes={
                        'x': 1,
                        'y': '2',
                    }),
                    Bbox(1, 2, 3, 4, label=0, id=4, z_order=1, attributes={
                        'score': 1.0,
                    }),
                    Bbox(5, 6, 7, 8, id=5, group=5),
                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
                ]),
            DatasetItem(id=21, subset='train',
                annotations=[
                    Caption('test'),
                    Label(2),
                    Bbox(1, 2, 3, 4, label=2, id=42, group=42)
                ]),

            DatasetItem(id=2, subset='val',
                annotations=[
                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
                    Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
                ]),

            DatasetItem(id=42, subset='test',
                attributes={'a1': 5, 'a2': '42'}),

            DatasetItem(id=42),
            DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
        ], categories={
            AnnotationType.label: label_categories1,
            AnnotationType.mask: mask_categories1,
            AnnotationType.points: point_categories1,
        })


        label_categories2 = LabelCategories.from_iterable(['a', 'b', 'x', 'y'])
        mask_categories2 = MaskCategories.generate(len(label_categories2))

        point_categories2 = PointsCategories()
        for index, _ in enumerate(label_categories2.items):
            point_categories2.add(index, ['cat1', 'cat2'], joints=[[0, 1]])

        dataset2 = Dataset.from_iterable([
            DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)),
                annotations=[
                    Caption('hello', id=1),
                    Caption('world', id=2, group=5),
                    Label(2, id=3, attributes={
                        'x': 1,
                        'y': '2',
                    }),
                    Bbox(1, 2, 3, 4, label=1, id=4, z_order=1, attributes={
                        'score': 1.0,
                    }),
                    Bbox(5, 6, 7, 8, id=5, group=5),
                    Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4),
                    Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))),
                ]),
            DatasetItem(id=21, subset='train',
                annotations=[
                    Caption('test'),
                    Label(2),
                    Bbox(1, 2, 3, 4, label=3, id=42, group=42)
                ]),

            DatasetItem(id=2, subset='val',
                annotations=[
                    PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
                    Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
                ]),

            DatasetItem(id=42, subset='test',
                attributes={'a1': 5, 'a2': '42'}),

            DatasetItem(id=42),
            DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
        ], categories={
            AnnotationType.label: label_categories2,
            AnnotationType.mask: mask_categories2,
            AnnotationType.points: point_categories2,
        })

        with TestDir() as test_dir:
            with DiffVisualizer(save_dir=test_dir,
                        comparator=DistanceComparator(iou_threshold=0.8),
                    ) as visualizer:
                visualizer.save(dataset1, dataset2)

            self.assertNotEqual(0, os.listdir(osp.join(test_dir)))
Ejemplo n.º 3
0
    def compute_statistics(self, dataset):
        """
        Computes statistics of the dataset for the segmentation task.

        Parameters
        ----------
        dataset : IDataset object

        Returns
        -------
        stats (dict): A dict object containing statistics of the dataset.
        """

        stats, filtered_anns = self._compute_common_statistics(dataset)

        # segmentation-specific
        mask_template = {
            'area': deepcopy(self.numerical_stat_template),
            'width': deepcopy(self.numerical_stat_template),
            'height': deepcopy(self.numerical_stat_template)
        }

        stats['items_with_invalid_value'] = {}
        stats['mask_distribution_in_label'] = {}
        stats['mask_distribution_in_attribute'] = {}
        stats['mask_distribution_in_dataset_item'] = {}

        dist_by_label = stats['mask_distribution_in_label']
        dist_by_attr = stats['mask_distribution_in_attribute']
        mask_dist_in_item = stats['mask_distribution_in_dataset_item']
        items_w_invalid_val = stats['items_with_invalid_value']

        def _generate_ann_mask_info(area, _w, _h):
            return {
                'area': area,
                'width': _w,
                'height': _h,
            }

        def _update_mask_stats_by_label(item_key, ann, mask_label_stats):
            mask_has_error = False

            _x, _y, _w, _h = ann.get_bbox()

            # Detete the following block when #226 is resolved
            # https://github.com/openvinotoolkit/datumaro/issues/226
            if ann.type == AnnotationType.mask:
                _w += 1
                _h += 1

            area = ann.get_area()

            ann_mask_info = _generate_ann_mask_info(area, _w, _h)

            for prop, val in ann_mask_info.items():
                if val == float('inf') or np.isnan(val):
                    mask_has_error = True
                    anns_w_invalid_val = items_w_invalid_val.setdefault(
                        item_key, {})
                    invalid_props = anns_w_invalid_val.setdefault(ann.id, [])
                    invalid_props.append(prop)

            if not mask_has_error:
                self._update_prop_distributions(ann_mask_info,
                                                mask_label_stats)

            return ann_mask_info, mask_has_error

        label_categories = dataset.categories().get(AnnotationType.label,
                                                    LabelCategories())
        base_valid_attrs = label_categories.attributes

        for item_key, annotations in filtered_anns:
            ann_count = len(annotations)
            mask_dist_in_item[item_key] = ann_count

            for ann in annotations:
                if not 0 <= ann.label < len(label_categories):
                    label_name = ann.label
                    valid_attrs = set()
                else:
                    label_name = label_categories[ann.label].name
                    valid_attrs = base_valid_attrs.union(
                        label_categories[ann.label].attributes)

                    mask_label_stats = dist_by_label.setdefault(
                        label_name, deepcopy(mask_template))
                    ann_mask_info, mask_has_error = \
                        _update_mask_stats_by_label(
                            item_key, ann, mask_label_stats)

                for attr, value in ann.attributes.items():
                    if attr in valid_attrs:
                        mask_attr_label = dist_by_attr.setdefault(
                            label_name, {})
                        mask_attr_stats = mask_attr_label.setdefault(attr, {})
                        mask_val_stats = mask_attr_stats.setdefault(
                            str(value), deepcopy(mask_template))

                        if not mask_has_error:
                            self._update_prop_distributions(
                                ann_mask_info, mask_val_stats)

        # compute prop stats from dist.
        self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr)

        def _is_valid_ann(item_key, ann):
            has_defined_label = 0 <= ann.label < len(label_categories)
            if not has_defined_label:
                return False

            mask_has_invalid_val = ann.id in items_w_invalid_val.get(
                item_key, {})
            return not mask_has_invalid_val

        def _update_props_far_from_mean(item_key, ann):
            valid_attrs = base_valid_attrs.union(
                label_categories[ann.label].attributes)
            label_name = label_categories[ann.label].name
            mask_label_stats = dist_by_label[label_name]

            _x, _y, _w, _h = ann.get_bbox()

            # Detete the following block when #226 is resolved
            # https://github.com/openvinotoolkit/datumaro/issues/226
            if ann.type == AnnotationType.mask:
                _w += 1
                _h += 1
            area = ann.get_area()

            ann_mask_info = _generate_ann_mask_info(area, _w, _h)

            for prop, val in ann_mask_info.items():
                prop_stats = mask_label_stats[prop]
                self._compute_far_from_mean(prop_stats, val, item_key, ann)

            for attr, value in ann.attributes.items():
                if attr in valid_attrs:
                    mask_attr_stats = dist_by_attr[label_name][attr]
                    mask_val_stats = mask_attr_stats[str(value)]

                    for prop, val in ann_mask_info.items():
                        prop_stats = mask_val_stats[prop]
                        self._compute_far_from_mean(prop_stats, val, item_key,
                                                    ann)

        for item_key, annotations in filtered_anns:
            for ann in annotations:
                if _is_valid_ann(item_key, ann):
                    _update_props_far_from_mean(item_key, ann)

        return stats
Ejemplo n.º 4
0
    def test_dataset(self):
        label_categories = LabelCategories(attributes={'a', 'b', 'score'})
        for i in range(5):
            label_categories.add('cat' + str(i), attributes={'x', 'y'})

        mask_categories = MaskCategories(
            generate_colormap(len(label_categories.items)))

        points_categories = PointsCategories()
        for index, _ in enumerate(label_categories.items):
            points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]])

        return Dataset.from_iterable(
            [
                DatasetItem(id=100,
                            subset='train',
                            image=np.ones((10, 6, 3)),
                            annotations=[
                                Caption('hello', id=1),
                                Caption('world', id=2, group=5),
                                Label(2, id=3, attributes={
                                    'x': 1,
                                    'y': '2',
                                }),
                                Bbox(1,
                                     2,
                                     3,
                                     4,
                                     label=4,
                                     id=4,
                                     z_order=1,
                                     attributes={
                                         'score': 1.0,
                                     }),
                                Bbox(5,
                                     6,
                                     7,
                                     8,
                                     id=5,
                                     group=5,
                                     attributes={
                                         'a': 1.5,
                                         'b': 'text',
                                     }),
                                Points([1, 2, 2, 0, 1, 1],
                                       label=0,
                                       id=5,
                                       z_order=4,
                                       attributes={
                                           'x': 1,
                                           'y': '2',
                                       }),
                                Mask(label=3,
                                     id=5,
                                     z_order=2,
                                     image=np.ones((2, 3)),
                                     attributes={
                                         'x': 1,
                                         'y': '2',
                                     }),
                            ]),
                DatasetItem(id=21,
                            subset='train',
                            annotations=[
                                Caption('test'),
                                Label(2),
                                Bbox(1, 2, 3, 4, label=5, id=42, group=42)
                            ]),
                DatasetItem(
                    id=2,
                    subset='val',
                    annotations=[
                        PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1),
                        Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4),
                    ]),
                DatasetItem(id=1,
                            subset='test',
                            annotations=[
                                Cuboid3d([1.0, 2.0, 3.0], [2.0, 2.0, 4.0],
                                         [1.0, 3.0, 4.0],
                                         id=6,
                                         label=0,
                                         attributes={'occluded': True},
                                         group=6)
                            ]),
                DatasetItem(
                    id=42, subset='test', attributes={
                        'a1': 5,
                        'a2': '42'
                    }),
                DatasetItem(id=42),
                DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))),
            ],
            categories={
                AnnotationType.label: label_categories,
                AnnotationType.mask: mask_categories,
                AnnotationType.points: points_categories,
            })
Ejemplo n.º 5
0
    def _parse_meta(context):
        ev, el = next(context)
        if not (ev == 'start' and el.tag == 'annotations'):
            raise Exception("Unexpected token ")

        categories = {}

        tasks_info = {}
        frame_size = [None, None]
        task_id = None
        mode = None
        labels = OrderedDict()
        label = None

        # Recursive descent parser
        el = None
        states = ['annotations']

        def accepted(expected_state, tag, next_state=None):
            state = states[-1]
            if state == expected_state and el is not None and el.tag == tag:
                if not next_state:
                    next_state = tag
                states.append(next_state)
                return True
            return False

        def consumed(expected_state, tag):
            state = states[-1]
            if state == expected_state and el is not None and el.tag == tag:
                states.pop()
                return True
            return False

        for ev, el in context:
            if ev == 'start':
                if accepted('annotations', 'meta'): pass
                elif accepted('meta', 'task'): pass
                elif accepted('meta', 'project'): pass
                elif accepted('project', 'tasks'): pass
                elif accepted('tasks', 'task'): pass
                elif accepted('task', 'id', next_state='task_id'): pass
                elif accepted('task', 'segment'): pass
                elif accepted('task', 'mode'): pass
                elif accepted('task', 'original_size'): pass
                elif accepted('original_size',
                              'height',
                              next_state='frame_height'):
                    pass
                elif accepted('original_size',
                              'width',
                              next_state='frame_width'):
                    pass
                elif accepted('task', 'labels'):
                    pass
                elif accepted('project', 'labels'):
                    pass
                elif accepted('labels', 'label'):
                    label = {'name': None, 'attributes': []}
                elif accepted('label', 'name', next_state='label_name'):
                    pass
                elif accepted('label', 'attributes'):
                    pass
                elif accepted('attributes', 'attribute'):
                    pass
                elif accepted('attribute', 'name', next_state='attr_name'):
                    pass
                elif accepted('attribute',
                              'input_type',
                              next_state='attr_type'):
                    pass
                elif accepted('annotations', 'image') or \
                     accepted('annotations', 'track') or \
                     accepted('annotations', 'tag'):
                    break
                else:
                    pass
            elif ev == 'end':
                if consumed('meta', 'meta'):
                    break
                elif consumed('project', 'project'):
                    pass
                elif consumed('tasks', 'tasks'):
                    pass
                elif consumed('task', 'task'):
                    tasks_info[task_id] = {
                        'frame_size': frame_size,
                        'mode': mode,
                    }
                    frame_size = [None, None]
                    mode = None
                elif consumed('task_id', 'id'):
                    task_id = int(el.text)
                elif consumed('segment', 'segment'):
                    pass
                elif consumed('mode', 'mode'):
                    mode = el.text
                elif consumed('original_size', 'original_size'):
                    pass
                elif consumed('frame_height', 'height'):
                    frame_size[0] = int(el.text)
                elif consumed('frame_width', 'width'):
                    frame_size[1] = int(el.text)
                elif consumed('label_name', 'name'):
                    label['name'] = el.text
                elif consumed('attr_name', 'name'):
                    label['attributes'].append({'name': el.text})
                elif consumed('attr_type', 'input_type'):
                    label['attributes'][-1]['input_type'] = el.text
                elif consumed('attribute', 'attribute'):
                    pass
                elif consumed('attributes', 'attributes'):
                    pass
                elif consumed('label', 'label'):
                    labels[label['name']] = label['attributes']
                    label = None
                elif consumed('labels', 'labels'):
                    pass
                else:
                    pass

        assert len(states) == 1 and states[0] == 'annotations', \
            "Expected 'meta' section in the annotation file, path: %s" % states

        common_attrs = ['occluded']
        if 'interpolation' in map(lambda t: t['mode'], tasks_info.values()):
            common_attrs.append('keyframe')
            common_attrs.append('outside')
            common_attrs.append('track_id')

        label_cat = LabelCategories(attributes=common_attrs)
        attribute_types = {}
        for label, attrs in labels.items():
            attr_names = {v['name'] for v in attrs}
            label_cat.add(label, attributes=attr_names)
            for attr in attrs:
                attribute_types[attr['name']] = attr['input_type']

        categories[AnnotationType.label] = label_cat
        return categories, tasks_info, attribute_types
Ejemplo n.º 6
0
    def _load_items(self, subset):
        labels = self._categories.setdefault(AnnotationType.label,
                                             LabelCategories())
        path = osp.join(self._path, subset)

        images = [i for i in find_images(path, recursive=True)]

        for image_path in sorted(images):
            item_id = osp.splitext(osp.relpath(image_path, path))[0]

            if Ade20k2020Path.MASK_PATTERN.fullmatch(osp.basename(item_id)):
                continue

            item_annotations = []
            item_info = self._load_item_info(image_path)
            for item in item_info:
                label_idx = labels.find(item['label_name'])[0]
                if label_idx is None:
                    labels.add(item['label_name'])

            mask_path = osp.splitext(image_path)[0] + '_seg.png'
            max_part_level = max([p['part_level'] for p in item_info])
            for part_level in range(max_part_level + 1):
                if not osp.exists(mask_path):
                    log.warning('Can`t find part level %s mask for %s' \
                        % (part_level, image_path))
                    continue

                mask = lazy_image(mask_path, loader=self._load_class_mask)
                mask = CompiledMask(instance_mask=mask)

                classes = {(v['class_idx'], v['label_name'])
                           for v in item_info if v['part_level'] == part_level}

                for class_idx, label_name in classes:
                    label_id = labels.find(label_name)[0]
                    item_annotations.append(
                        Mask(label=label_id,
                             id=class_idx,
                             image=mask.lazy_extract(class_idx),
                             group=class_idx,
                             z_order=part_level))

                mask_path = osp.splitext(image_path)[0] \
                    + '_parts_%s.png' % (part_level + 1)

            for item in item_info:
                instance_path = osp.join(osp.dirname(image_path),
                                         item['instance_mask'])
                if not osp.isfile(instance_path):
                    log.warning('Can`t find instance mask: %s' % instance_path)
                    continue

                mask = lazy_image(instance_path,
                                  loader=self._load_instance_mask)
                mask = CompiledMask(instance_mask=mask)

                label_id = labels.find(item['label_name'])[0]
                instance_id = item['id']
                attributes = {k: True for k in item['attributes']}
                polygon_points = item['polygon_points']

                item_annotations.append(
                    Mask(label=label_id,
                         image=mask.lazy_extract(1),
                         id=instance_id,
                         attributes=attributes,
                         z_order=item['part_level'],
                         group=instance_id))

                if (len(item['polygon_points']) % 2 == 0 \
                        and 3 <= len(item['polygon_points']) // 2):
                    item_annotations.append(
                        Polygon(polygon_points,
                                label=label_id,
                                attributes=attributes,
                                id=instance_id,
                                z_order=item['part_level'],
                                group=instance_id))

            self._items.append(
                DatasetItem(item_id,
                            subset=subset,
                            image=image_path,
                            annotations=item_annotations))
Ejemplo n.º 7
0
    def test_inplace_save_writes_only_updated_data_with_transforms(self):
        expected = Dataset.from_iterable([
            DatasetItem(3, subset='test', image=np.ones((2, 3, 3)),
                annotations=[
                    Bbox(0, 1, 0, 0, label=4, id=1, group=1, attributes={
                        'truncated': False,
                        'difficult': False,
                        'occluded': False,
                    })
                ]),
            DatasetItem(4, subset='train', image=np.ones((2, 4, 3)),
                annotations=[
                    Bbox(1, 0, 0, 0, label=4, id=1, group=1, attributes={
                        'truncated': False,
                        'difficult': False,
                        'occluded': False,
                    }),
                    Mask(np.ones((2, 2)), label=2, group=1),
                ]),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(
                ['background', 'a', 'b', 'c', 'd']),
            AnnotationType.mask: MaskCategories(
                colormap=VOC.generate_colormap(5)),
        })

        dataset = Dataset.from_iterable([
            DatasetItem(1, subset='a', image=np.ones((2, 1, 3)),
                annotations=[ Bbox(0, 0, 0, 1, label=1) ]),
            DatasetItem(2, subset='b', image=np.ones((2, 2, 3)),
                annotations=[
                    Bbox(0, 0, 1, 0, label=2),
                    Mask(np.ones((2, 2)), label=1),
                ]),
            DatasetItem(3, subset='b', image=np.ones((2, 3, 3)),
                annotations=[ Bbox(0, 1, 0, 0, label=3) ]),
            DatasetItem(4, subset='c', image=np.ones((2, 4, 3)),
                annotations=[
                    Bbox(1, 0, 0, 0, label=3),
                    Mask(np.ones((2, 2)), label=1)
                ]),
        ], categories=['a', 'b', 'c', 'd'])

        with TestDir() as path:
            dataset.export(path, 'voc', save_images=True)

            dataset.filter('/item[id >= 3]')
            dataset.transform('random_split',
                splits=(('train', 0.5), ('test', 0.5)), seed=42)
            dataset.save(save_images=True)

            self.assertEqual({'3.xml', '4.xml'},
                set(os.listdir(osp.join(path, 'Annotations'))))
            self.assertEqual({'3.jpg', '4.jpg'},
                set(os.listdir(osp.join(path, 'JPEGImages'))))
            self.assertEqual({'4.png'},
                set(os.listdir(osp.join(path, 'SegmentationClass'))))
            self.assertEqual({'4.png'},
                set(os.listdir(osp.join(path, 'SegmentationObject'))))
            self.assertEqual({'train.txt', 'test.txt'},
                set(os.listdir(osp.join(path, 'ImageSets', 'Main'))))
            self.assertEqual({'train.txt'},
                set(os.listdir(osp.join(path, 'ImageSets', 'Segmentation'))))
            compare_datasets(self, expected, Dataset.import_from(path, 'voc'),
                require_images=True)
Ejemplo n.º 8
0
    def test_can_load(self):
        pcd1 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data',
                        '0000000000.pcd')
        pcd2 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data',
                        '0000000001.pcd')
        pcd3 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data',
                        '0000000002.pcd')

        image1 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data',
                          '0000000000.png')
        image2 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data',
                          '0000000001.png')
        image3 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data',
                          '0000000002.png')

        expected_label_cat = LabelCategories(attributes={'occluded'})
        expected_label_cat.add('bus')
        expected_label_cat.add('car')
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='0000000000',
                            annotations=[
                                Cuboid3d(position=[1, 2, 3],
                                         scale=[7.95, -3.62, -1.03],
                                         label=1,
                                         attributes={
                                             'occluded': False,
                                             'track_id': 1
                                         }),
                                Cuboid3d(position=[1, 1, 0],
                                         scale=[8.34, 23.01, -0.76],
                                         label=0,
                                         attributes={
                                             'occluded': False,
                                             'track_id': 2
                                         })
                            ],
                            point_cloud=pcd1,
                            related_images=[image1],
                            attributes={'frame': 0}),
                DatasetItem(id='0000000001',
                            annotations=[
                                Cuboid3d(position=[0, 1, 0],
                                         scale=[8.34, 23.01, -0.76],
                                         rotation=[1, 1, 3],
                                         label=0,
                                         attributes={
                                             'occluded': True,
                                             'track_id': 2
                                         })
                            ],
                            point_cloud=pcd2,
                            related_images=[image2],
                            attributes={'frame': 1}),
                DatasetItem(id='0000000002',
                            annotations=[
                                Cuboid3d(position=[1, 2, 3],
                                         scale=[-9.41, 13.54, 0.24],
                                         label=1,
                                         attributes={
                                             'occluded': False,
                                             'track_id': 3
                                         })
                            ],
                            point_cloud=pcd3,
                            related_images=[image3],
                            attributes={'frame': 2})
            ],
            categories={AnnotationType.label: expected_label_cat})

        parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'kitti_raw')

        compare_datasets_3d(self,
                            expected_dataset,
                            parsed_dataset,
                            require_point_cloud=True)
    def test_can_load(self):
        pcd1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame1.pcd')
        pcd2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame2.pcd')

        image1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images',
                          'frame1_pcd', 'img2.png')
        image2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images',
                          'frame2_pcd', 'img1.png')

        label_cat = LabelCategories(attributes={'tag1', 'tag3'})
        label_cat.add('car')
        label_cat.add('bus')

        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='frame1',
                            annotations=[
                                Cuboid3d(id=755220128,
                                         label=0,
                                         position=[0.47, 0.23, 0.79],
                                         scale=[0.01, 0.01, 0.01],
                                         attributes={
                                             'track_id': 231825,
                                             'tag1': 'fd',
                                             'tag3': '4s'
                                         }),
                                Cuboid3d(id=755337225,
                                         label=0,
                                         position=[0.36, 0.64, 0.93],
                                         scale=[0.01, 0.01, 0.01],
                                         attributes={
                                             'track_id': 231831,
                                             'tag1': 'v12',
                                             'tag3': ''
                                         }),
                            ],
                            point_cloud=pcd1,
                            related_images=[image1],
                            attributes={
                                'frame': 0,
                                'description': '',
                                'tag1': '25dsd',
                                'tag2': 65
                            }),
                DatasetItem(id='frame2',
                            annotations=[
                                Cuboid3d(id=216,
                                         label=1,
                                         position=[0.59, 14.41, -0.61],
                                         attributes={
                                             'track_id': 36,
                                             'tag1': '',
                                             'tag3': ''
                                         })
                            ],
                            point_cloud=pcd2,
                            related_images=[image2],
                            attributes={
                                'frame': 1,
                                'description': ''
                            }),
            ],
            categories={AnnotationType.label: label_cat})

        parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR,
                                             'sly_pointcloud')

        compare_datasets_3d(self,
                            expected_dataset,
                            parsed_dataset,
                            require_point_cloud=True)
Ejemplo n.º 10
0
 def _load_categories(self, labels):
     return { AnnotationType.label: LabelCategories.from_iterable(labels) }
    def test_can_save_and_load(self):
        src_label_cat = LabelCategories(attributes={'occluded'})
        src_label_cat.add('car', attributes=['x'])
        src_label_cat.add('bus')

        source_dataset = Dataset.from_iterable(
            [
                DatasetItem(id='frame_1',
                            annotations=[
                                Cuboid3d(id=206,
                                         label=0,
                                         position=[320.86, 979.18, 1.04],
                                         attributes={
                                             'occluded': False,
                                             'track_id': 1,
                                             'x': 1
                                         }),
                                Cuboid3d(id=207,
                                         label=1,
                                         position=[318.19, 974.65, 1.29],
                                         attributes={
                                             'occluded': True,
                                             'track_id': 2
                                         }),
                            ],
                            point_cloud=self.pcd1,
                            attributes={
                                'frame': 0,
                                'description': 'zzz'
                            }),
                DatasetItem(id='frm2',
                            annotations=[
                                Cuboid3d(id=208,
                                         label=1,
                                         position=[23.04, 8.75, -0.78],
                                         attributes={
                                             'occluded': False,
                                             'track_id': 2
                                         })
                            ],
                            point_cloud=self.pcd2,
                            related_images=[self.image2],
                            attributes={'frame': 1}),
            ],
            categories={AnnotationType.label: src_label_cat})

        with TestDir() as test_dir:
            target_label_cat = LabelCategories(attributes={'occluded'})
            target_label_cat.add('car', attributes=['x'])
            target_label_cat.add('bus')

            target_dataset = Dataset.from_iterable([
                DatasetItem(id='frame_1',
                            annotations=[
                                Cuboid3d(id=206,
                                         label=0,
                                         position=[320.86, 979.18, 1.04],
                                         attributes={
                                             'occluded': False,
                                             'track_id': 1,
                                             'x': 1
                                         }),
                                Cuboid3d(id=207,
                                         label=1,
                                         position=[318.19, 974.65, 1.29],
                                         attributes={
                                             'occluded': True,
                                             'track_id': 2
                                         }),
                            ],
                            point_cloud=osp.join(test_dir, 'ds0', 'pointcloud',
                                                 'frame_1.pcd'),
                            attributes={
                                'frame': 0,
                                'description': 'zzz'
                            }),
                DatasetItem(id='frm2',
                            annotations=[
                                Cuboid3d(id=208,
                                         label=1,
                                         position=[23.04, 8.75, -0.78],
                                         attributes={
                                             'occluded': False,
                                             'track_id': 2
                                         }),
                            ],
                            point_cloud=osp.join(test_dir, 'ds0', 'pointcloud',
                                                 'frm2.pcd'),
                            related_images=[
                                osp.join(test_dir, 'ds0', 'related_images',
                                         'frm2_pcd', 'img1.png')
                            ],
                            attributes={
                                'frame': 1,
                                'description': ''
                            })
            ],
                                                   categories={
                                                       AnnotationType.label:
                                                       target_label_cat
                                                   })

            self._test_save_and_load(
                source_dataset,
                partial(SuperviselyPointCloudConverter.convert,
                        save_images=True),
                test_dir,
                target_dataset=target_dataset,
                require_point_cloud=True)
Ejemplo n.º 12
0
    def compute_statistics(self, dataset):
        """
        Computes statistics of the dataset for the detection task.

        Parameters
        ----------
        dataset : IDataset object

        Returns
        -------
        stats (dict): A dict object containing statistics of the dataset.
        """

        stats, filtered_anns = self._compute_common_statistics(dataset)

        # detection-specific
        bbox_template = {
            'width': deepcopy(self.numerical_stat_template),
            'height': deepcopy(self.numerical_stat_template),
            'area(wxh)': deepcopy(self.numerical_stat_template),
            'ratio(w/h)': deepcopy(self.numerical_stat_template),
            'short': deepcopy(self.numerical_stat_template),
            'long': deepcopy(self.numerical_stat_template)
        }

        stats['items_with_negative_length'] = {}
        stats['items_with_invalid_value'] = {}
        stats['bbox_distribution_in_label'] = {}
        stats['bbox_distribution_in_attribute'] = {}
        stats['bbox_distribution_in_dataset_item'] = {}

        dist_by_label = stats['bbox_distribution_in_label']
        dist_by_attr = stats['bbox_distribution_in_attribute']
        bbox_dist_in_item = stats['bbox_distribution_in_dataset_item']
        items_w_neg_len = stats['items_with_negative_length']
        items_w_invalid_val = stats['items_with_invalid_value']

        def _generate_ann_bbox_info(_x, _y, _w, _h, area, ratio, _short,
                                    _long):
            return {
                'x': _x,
                'y': _y,
                'width': _w,
                'height': _h,
                'area(wxh)': area,
                'ratio(w/h)': ratio,
                'short': _short,
                'long': _long,
            }

        def _update_bbox_stats_by_label(item_key, ann, bbox_label_stats):
            bbox_has_error = False

            _x, _y, _w, _h = ann.get_bbox()
            area = ann.get_area()

            if _h != 0 and _h != float('inf'):
                ratio = _w / _h
            else:
                ratio = float('nan')

            _short = _w if _w < _h else _h
            _long = _w if _w > _h else _h

            ann_bbox_info = _generate_ann_bbox_info(_x, _y, _w, _h, area,
                                                    ratio, _short, _long)

            for prop, val in ann_bbox_info.items():
                if val == float('inf') or np.isnan(val):
                    bbox_has_error = True
                    anns_w_invalid_val = items_w_invalid_val.setdefault(
                        item_key, {})
                    invalid_props = anns_w_invalid_val.setdefault(ann.id, [])
                    invalid_props.append(prop)

            for prop in ['width', 'height']:
                val = ann_bbox_info[prop]
                if val < 1:
                    bbox_has_error = True
                    anns_w_neg_len = items_w_neg_len.setdefault(item_key, {})
                    neg_props = anns_w_neg_len.setdefault(ann.id, {})
                    neg_props[prop] = val

            if not bbox_has_error:
                ann_bbox_info.pop('x')
                ann_bbox_info.pop('y')
                self._update_prop_distributions(ann_bbox_info,
                                                bbox_label_stats)

            return ann_bbox_info, bbox_has_error

        label_categories = dataset.categories().get(AnnotationType.label,
                                                    LabelCategories())
        base_valid_attrs = label_categories.attributes

        for item_key, annotations in filtered_anns:
            ann_count = len(annotations)

            bbox_dist_in_item[item_key] = ann_count

            for ann in annotations:
                if not 0 <= ann.label < len(label_categories):
                    label_name = ann.label
                    valid_attrs = set()
                else:
                    label_name = label_categories[ann.label].name
                    valid_attrs = base_valid_attrs.union(
                        label_categories[ann.label].attributes)

                    bbox_label_stats = dist_by_label.setdefault(
                        label_name, deepcopy(bbox_template))
                    ann_bbox_info, bbox_has_error = \
                        _update_bbox_stats_by_label(
                            item_key, ann, bbox_label_stats)

                for attr, value in ann.attributes.items():
                    if attr in valid_attrs:
                        bbox_attr_label = dist_by_attr.setdefault(
                            label_name, {})
                        bbox_attr_stats = bbox_attr_label.setdefault(attr, {})
                        bbox_val_stats = bbox_attr_stats.setdefault(
                            str(value), deepcopy(bbox_template))

                        if not bbox_has_error:
                            self._update_prop_distributions(
                                ann_bbox_info, bbox_val_stats)

        # Compute prop stats from distribution
        self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr)

        def _is_valid_ann(item_key, ann):
            has_defined_label = 0 <= ann.label < len(label_categories)
            if not has_defined_label:
                return False

            bbox_has_neg_len = ann.id in items_w_neg_len.get(item_key, {})
            bbox_has_invalid_val = ann.id in items_w_invalid_val.get(
                item_key, {})
            return not (bbox_has_neg_len or bbox_has_invalid_val)

        def _update_props_far_from_mean(item_key, ann):
            valid_attrs = base_valid_attrs.union(
                label_categories[ann.label].attributes)
            label_name = label_categories[ann.label].name
            bbox_label_stats = dist_by_label[label_name]

            _x, _y, _w, _h = ann.get_bbox()
            area = ann.get_area()
            ratio = _w / _h
            _short = _w if _w < _h else _h
            _long = _w if _w > _h else _h

            ann_bbox_info = _generate_ann_bbox_info(_x, _y, _w, _h, area,
                                                    ratio, _short, _long)
            ann_bbox_info.pop('x')
            ann_bbox_info.pop('y')

            for prop, val in ann_bbox_info.items():
                prop_stats = bbox_label_stats[prop]
                self._compute_far_from_mean(prop_stats, val, item_key, ann)

            for attr, value in ann.attributes.items():
                if attr in valid_attrs:
                    bbox_attr_stats = dist_by_attr[label_name][attr]
                    bbox_val_stats = bbox_attr_stats[str(value)]

                    for prop, val in ann_bbox_info.items():
                        prop_stats = bbox_val_stats[prop]
                        self._compute_far_from_mean(prop_stats, val, item_key,
                                                    ann)

        for item_key, annotations in filtered_anns:
            for ann in annotations:
                if _is_valid_ann(item_key, ann):
                    _update_props_far_from_mean(item_key, ann)

        return stats
Ejemplo n.º 13
0
    def _compute_common_statistics(self, dataset):
        defined_attr_template = {
            'items_missing_attribute': [],
            'distribution': {}
        }
        undefined_attr_template = {
            'items_with_undefined_attr': [],
            'distribution': {}
        }
        undefined_label_template = {
            'count': 0,
            'items_with_undefined_label': [],
        }

        stats = {
            'label_distribution': {
                'defined_labels': {},
                'undefined_labels': {},
            },
            'attribute_distribution': {
                'defined_attributes': {},
                'undefined_attributes': {}
            },
        }
        stats['total_ann_count'] = 0
        stats['items_missing_annotation'] = []

        label_dist = stats['label_distribution']
        attr_dist = stats['attribute_distribution']
        defined_label_dist = label_dist['defined_labels']
        defined_attr_dist = attr_dist['defined_attributes']
        undefined_label_dist = label_dist['undefined_labels']
        undefined_attr_dist = attr_dist['undefined_attributes']

        label_categories = dataset.categories().get(AnnotationType.label,
                                                    LabelCategories())
        base_valid_attrs = label_categories.attributes

        for category in label_categories:
            defined_label_dist[category.name] = 0

        filtered_anns = []
        for item in dataset:
            item_key = (item.id, item.subset)
            annotations = []
            for ann in item.annotations:
                if ann.type in self.ann_types:
                    annotations.append(ann)
            ann_count = len(annotations)
            filtered_anns.append((item_key, annotations))

            if ann_count == 0:
                stats['items_missing_annotation'].append(item_key)
            stats['total_ann_count'] += ann_count

            for ann in annotations:
                if not 0 <= ann.label < len(label_categories):
                    label_name = ann.label

                    label_stats = undefined_label_dist.setdefault(
                        ann.label, deepcopy(undefined_label_template))
                    label_stats['items_with_undefined_label'].append(item_key)

                    label_stats['count'] += 1
                    valid_attrs = set()
                    missing_attrs = set()
                else:
                    label_name = label_categories[ann.label].name
                    defined_label_dist[label_name] += 1

                    defined_attr_stats = defined_attr_dist.setdefault(
                        label_name, {})

                    valid_attrs = base_valid_attrs.union(
                        label_categories[ann.label].attributes)
                    ann_attrs = getattr(ann, 'attributes', {}).keys()
                    missing_attrs = valid_attrs.difference(ann_attrs)

                    for attr in valid_attrs:
                        defined_attr_stats.setdefault(
                            attr, deepcopy(defined_attr_template))

                for attr in missing_attrs:
                    attr_dets = defined_attr_stats[attr]
                    attr_dets['items_missing_attribute'].append(item_key)

                for attr, value in ann.attributes.items():
                    if attr not in valid_attrs:
                        undefined_attr_stats = \
                            undefined_attr_dist.setdefault(
                                label_name, {})
                        attr_dets = undefined_attr_stats.setdefault(
                            attr, deepcopy(undefined_attr_template))
                        attr_dets['items_with_undefined_attr'].append(item_key)
                    else:
                        attr_dets = defined_attr_stats[attr]

                    attr_dets['distribution'].setdefault(str(value), 0)
                    attr_dets['distribution'][str(value)] += 1

        return stats, filtered_anns
    def test_can_import(self):
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(
                    id='000001',
                    subset='train',
                    image=np.ones((3, 4, 3)),
                    annotations=[
                        Label(12),
                        Points([69, 109, 106, 113, 77, 142, 73, 152, 108, 154],
                               label=12)
                    ],
                    attributes={
                        '5_o_Clock_Shadow': False,
                        'Arched_Eyebrows': True,
                        'Attractive': True,
                        'Bags_Under_Eyes': False,
                        'Bald': False,
                        'Bangs': False,
                        'Big_Lips': False,
                        'Big_Nose': False
                    }),
                DatasetItem(
                    id='000002',
                    subset='train',
                    image=np.ones((3, 4, 3)),
                    annotations=[
                        Label(5),
                        Points([69, 110, 107, 112, 81, 135, 70, 151, 108, 153],
                               label=5)
                    ]),
                DatasetItem(
                    id='000003',
                    subset='val',
                    image=np.ones((3, 4, 3)),
                    annotations=[
                        Label(2),
                        Points([76, 112, 104, 106, 108, 128, 74, 156, 98, 158],
                               label=2)
                    ],
                    attributes={
                        '5_o_Clock_Shadow': False,
                        'Arched_Eyebrows': False,
                        'Attractive': False,
                        'Bags_Under_Eyes': True,
                        'Bald': False,
                        'Bangs': False,
                        'Big_Lips': False,
                        'Big_Nose': True
                    }),
                DatasetItem(
                    id='000004',
                    subset='test',
                    image=np.ones((3, 4, 3)),
                    annotations=[
                        Label(10),
                        Points(
                            [72, 113, 108, 108, 101, 138, 71, 155, 101, 151],
                            label=10)
                    ]),
                DatasetItem(
                    id='000005',
                    subset='test',
                    image=np.ones((3, 4, 3)),
                    annotations=[
                        Label(7),
                        Points([66, 114, 112, 112, 86, 119, 71, 147, 104, 150],
                               label=7)
                    ])
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable(f'class-{i}' for i in range(13)),
                AnnotationType.points:
                PointsCategories.from_iterable([(0, ['lefteye_x']),
                                                (1, ['lefteye_y']),
                                                (2, ['righteye_x']),
                                                (3, ['righteye_y']),
                                                (4, ['nose_x']),
                                                (5, ['nose_y']),
                                                (6, ['leftmouth_x']),
                                                (7, ['leftmouth_y']),
                                                (8, ['rightmouth_x']),
                                                (9, ['rightmouth_y'])])
            })

        dataset = Dataset.import_from(DUMMY_ALIGN_DATASET_DIR, 'align_celeba')

        compare_datasets(self, expected_dataset, dataset, require_images=True)
    def test_can_convert_to_kitti_raw(self):
        with TestDir() as test_dir:
            export_dir = osp.join(test_dir, 'export_dir')
            expected_label_cat = LabelCategories(attributes={'occluded'})
            expected_label_cat.add('bus', attributes={'tag1', 'tag3'})
            expected_label_cat.add('car', attributes={'tag1', 'tag3'})
            expected_dataset = Dataset.from_iterable([
                DatasetItem(id='frame1',
                            annotations=[
                                Cuboid3d(label=1,
                                         position=[0.47, 0.23, 0.79],
                                         scale=[0.01, 0.01, 0.01],
                                         attributes={
                                             'track_id': 2,
                                             'tag1': 'fd',
                                             'tag3': '4s',
                                             'occluded': False
                                         }),
                                Cuboid3d(label=1,
                                         position=[0.36, 0.64, 0.93],
                                         scale=[0.01, 0.01, 0.01],
                                         attributes={
                                             'track_id': 3,
                                             'tag1': 'v12',
                                             'tag3': '',
                                             'occluded': False
                                         }),
                            ],
                            point_cloud=osp.join(export_dir, 'velodyne_points',
                                                 'data', 'frame1.pcd'),
                            related_images=[
                                osp.join(export_dir, 'image_00', 'data',
                                         'frame1.png')
                            ],
                            attributes={'frame': 0}),
                DatasetItem(id='frame2',
                            annotations=[
                                Cuboid3d(label=0,
                                         position=[0.59, 14.41, -0.61],
                                         attributes={
                                             'track_id': 1,
                                             'tag1': '',
                                             'tag3': '',
                                             'occluded': False
                                         })
                            ],
                            point_cloud=osp.join(export_dir, 'velodyne_points',
                                                 'data', 'frame2.pcd'),
                            related_images=[
                                osp.join(export_dir, 'image_00', 'data',
                                         'frame2.png')
                            ],
                            attributes={'frame': 1}),
            ],
                                                     categories={
                                                         AnnotationType.label:
                                                         expected_label_cat
                                                     })

            run(self, 'convert', '-if', 'sly_pointcloud', '-i',
                DUMMY_DATASET_DIR, '-f', 'kitti_raw', '-o', export_dir, '--',
                '--save-images', '--allow-attrs')

            parsed_dataset = Dataset.import_from(export_dir,
                                                 format='kitti_raw')
            compare_datasets_3d(self,
                                expected_dataset,
                                parsed_dataset,
                                require_point_cloud=True)
Ejemplo n.º 16
0
    def test_can_save_bboxes(self):
        source_dataset = Dataset.from_iterable(
            [
                DatasetItem(id=1,
                            subset='train',
                            image=np.ones((16, 16, 3)),
                            annotations=[
                                Bbox(0,
                                     4,
                                     4,
                                     8,
                                     label=2,
                                     attributes={
                                         'occluded': True,
                                     }),
                                Bbox(0,
                                     4,
                                     4,
                                     4,
                                     label=3,
                                     attributes={
                                         'visibility': 0.4,
                                     }),
                                Bbox(2, 4, 4, 4, attributes={'ignored': True}),
                            ]),
                DatasetItem(id=2,
                            subset='val',
                            image=np.ones((8, 8, 3)),
                            annotations=[
                                Bbox(1, 2, 4, 2, label=3),
                            ]),
                DatasetItem(
                    id=3,
                    subset='test',
                    image=np.ones((5, 4, 3)) * 3,
                ),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable('label_' + str(label)
                                              for label in range(10)),
            })

        target_dataset = Dataset.from_iterable(
            [
                DatasetItem(id=1,
                            image=np.ones((16, 16, 3)),
                            annotations=[
                                Bbox(0,
                                     4,
                                     4,
                                     8,
                                     label=2,
                                     attributes={
                                         'occluded': True,
                                         'visibility': 0.0,
                                         'ignored': False,
                                     }),
                                Bbox(0,
                                     4,
                                     4,
                                     4,
                                     label=3,
                                     attributes={
                                         'occluded': False,
                                         'visibility': 0.4,
                                         'ignored': False,
                                     }),
                                Bbox(2,
                                     4,
                                     4,
                                     4,
                                     attributes={
                                         'occluded': False,
                                         'visibility': 1.0,
                                         'ignored': True,
                                     }),
                            ]),
                DatasetItem(id=2,
                            image=np.ones((8, 8, 3)),
                            annotations=[
                                Bbox(1,
                                     2,
                                     4,
                                     2,
                                     label=3,
                                     attributes={
                                         'occluded': False,
                                         'visibility': 1.0,
                                         'ignored': False,
                                     }),
                            ]),
                DatasetItem(
                    id=3,
                    image=np.ones((5, 4, 3)) * 3,
                ),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable('label_' + str(label)
                                              for label in range(10)),
            })

        with TestDir() as test_dir:
            self._test_save_and_load(source_dataset,
                                     partial(MotSeqGtConverter.convert,
                                             save_images=True),
                                     test_dir,
                                     target_dataset=target_dataset,
                                     require_images=True)
Ejemplo n.º 17
0
    def save(self, a: IDataset, b: IDataset):
        if len(a) != len(b):
            print("Datasets have different lengths: %s vs %s" % \
                (len(a), len(b)))

        a_classes = a.categories().get(AnnotationType.label, LabelCategories())
        b_classes = b.categories().get(AnnotationType.label, LabelCategories())
        class_mismatch = [
            (idx, a_cls, b_cls)
            for idx, (a_cls,
                      b_cls) in enumerate(zip_longest(a_classes, b_classes))
            if getattr(a_cls, 'name', None) != getattr(b_cls, 'name', None)
        ]
        if class_mismatch:
            print("Datasets have mismatching labels:")
            for idx, a_class, b_class in class_mismatch:
                if a_class and b_class:
                    print("  #%s: %s != %s" %
                          (idx, a_class.name, b_class.name))
                elif a_class:
                    print("  #%s:  > %s" % (idx, a_class.name))
                else:
                    print("  #%s:  < %s" % (idx, b_class.name))
        self._a_classes = a.categories().get(AnnotationType.label)
        self._b_classes = b.categories().get(AnnotationType.label)

        ids_a = set((item.id, item.subset) for item in a)
        ids_b = set((item.id, item.subset) for item in b)
        ids = ids_a & ids_b

        if len(ids) != len(ids_a):
            print("Unmatched items in the first dataset: ")
            print(ids_a - ids)
        if len(ids) != len(ids_b):
            print("Unmatched items in the second dataset: ")
            print(ids_b - ids)

        for item_id, item_subset in ids:
            item_a = a.get(item_id, item_subset)
            item_b = b.get(item_id, item_subset)

            label_diff = self._cmp.match_labels(item_a, item_b)
            self.update_label_confusion(label_diff)

            bbox_diff = self._cmp.match_boxes(item_a, item_b)
            self.update_bbox_confusion(bbox_diff)

            polygon_diff = self._cmp.match_polygons(item_a, item_b)
            self.update_polygon_confusion(polygon_diff)

            mask_diff = self._cmp.match_masks(item_a, item_b)
            self.update_mask_confusion(mask_diff)

            self.save_item_label_diff(item_a, item_b, label_diff)
            self.save_item_bbox_diff(item_a, item_b, bbox_diff)

        if len(self.label_confusion_matrix) != 0:
            self.save_conf_matrix(self.label_confusion_matrix,
                                  'label_confusion.png')
        if len(self.bbox_confusion_matrix) != 0:
            self.save_conf_matrix(self.bbox_confusion_matrix,
                                  'bbox_confusion.png')
        if len(self.polygon_confusion_matrix) != 0:
            self.save_conf_matrix(self.polygon_confusion_matrix,
                                  'polygon_confusion.png')
        if len(self.mask_confusion_matrix) != 0:
            self.save_conf_matrix(self.mask_confusion_matrix,
                                  'mask_confusion.png')
Ejemplo n.º 18
0
    def test_can_save_and_load_with_meta_file(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id='0000000000',
                        annotations=[
                            Cuboid3d(position=[13.54, -9.41, 0.24],
                                     label=0,
                                     attributes={
                                         'occluded': False,
                                         'track_id': 1
                                     })
                        ],
                        point_cloud=self.pcd1,
                        related_images=[self.image1],
                        attributes={'frame': 0}),
            DatasetItem(
                id='0000000001',
                annotations=[
                    Cuboid3d(position=[1.4, 2.1, 1.4],
                             label=1,
                             attributes={'track_id': 2})
                ],
            )
        ],
                                               categories=['cat', 'dog'])

        with TestDir() as test_dir:
            target_label_cat = LabelCategories(attributes={'occluded'})
            target_label_cat.add('cat')
            target_label_cat.add('dog')

            target_dataset = Dataset.from_iterable([
                DatasetItem(id='0000000000',
                            annotations=[
                                Cuboid3d(position=[13.54, -9.41, 0.24],
                                         label=0,
                                         attributes={
                                             'occluded': False,
                                             'track_id': 1
                                         })
                            ],
                            point_cloud=osp.join(test_dir, 'velodyne_points',
                                                 'data', '0000000000.pcd'),
                            related_images=[
                                osp.join(test_dir, 'image_00', 'data',
                                         '0000000000.png')
                            ],
                            attributes={'frame': 0}),
                DatasetItem(id='0000000001',
                            annotations=[
                                Cuboid3d(position=[1.4, 2.1, 1.4],
                                         label=1,
                                         attributes={
                                             'occluded': False,
                                             'track_id': 2
                                         })
                            ],
                            attributes={'frame': 1})
            ],
                                                   categories={
                                                       AnnotationType.label:
                                                       target_label_cat
                                                   })

            self._test_save_and_load(source_dataset,
                                     partial(KittiRawConverter.convert,
                                             save_images=True,
                                             save_dataset_meta=True),
                                     test_dir,
                                     target_dataset=target_dataset,
                                     require_point_cloud=True)
            self.assertTrue(osp.isfile(osp.join(test_dir,
                                                'dataset_meta.json')))
Ejemplo n.º 19
0
    def test_can_run_self_merge(self):
        dataset1 = Dataset.from_iterable([
            DatasetItem(id=100,
                        subset='train',
                        image=np.ones((10, 6, 3)),
                        annotations=[
                            Bbox(1, 2, 3, 3, label=0),
                        ]),
        ],
                                         categories=['a', 'b'])

        dataset2 = Dataset.from_iterable([
            DatasetItem(id=100,
                        subset='train',
                        image=np.ones((10, 6, 3)),
                        annotations=[
                            Bbox(1, 2, 3, 4, label=1),
                            Bbox(5, 6, 2, 3, label=2),
                        ]),
        ],
                                         categories=['a', 'b', 'c'])

        expected = Dataset.from_iterable(
            [
                DatasetItem(id=100,
                            subset='train',
                            image=np.ones((10, 6, 3)),
                            annotations=[
                                Bbox(1,
                                     2,
                                     3,
                                     4,
                                     label=2,
                                     id=1,
                                     group=1,
                                     attributes={
                                         'score': 0.5,
                                         'occluded': False,
                                         'difficult': False,
                                         'truncated': False
                                     }),
                                Bbox(5,
                                     6,
                                     2,
                                     3,
                                     label=3,
                                     id=2,
                                     group=2,
                                     attributes={
                                         'score': 0.5,
                                         'occluded': False,
                                         'difficult': False,
                                         'truncated': False
                                     }),
                                Bbox(1,
                                     2,
                                     3,
                                     3,
                                     label=1,
                                     id=1,
                                     group=1,
                                     attributes={
                                         'score': 0.5,
                                         'is_crowd': False
                                     }),
                            ]),
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable(['background', 'a', 'b', 'c']),
                AnnotationType.mask:
                MaskCategories(VOC.generate_colormap(4))
            })

        with TestDir() as test_dir:
            dataset1_url = osp.join(test_dir, 'dataset1')
            dataset2_url = osp.join(test_dir, 'dataset2')

            dataset1.export(dataset1_url, 'coco', save_images=True)
            dataset2.export(dataset2_url, 'voc', save_images=True)

            proj_dir = osp.join(test_dir, 'proj')
            with Project.init(proj_dir) as project:
                project.import_source('source', dataset2_url, 'voc')

            result_dir = osp.join(test_dir, 'result')
            run(self, 'merge', '-o', result_dir, '-p', proj_dir,
                dataset1_url + ':coco')

            compare_datasets(self,
                             expected,
                             Dataset.load(result_dir),
                             require_images=True)
Ejemplo n.º 20
0
 def save_labels(self):
     labels_file = osp.join(self._save_dir, 'labels.txt')
     with open(labels_file, 'w', encoding='utf-8') as f:
         f.writelines(l.name + '\n'
                      for l in self._extractor.categories().get(
                          AnnotationType.label, LabelCategories()))
Ejemplo n.º 21
0
 def _get_label(self, label_id):
     if label_id is None:
         return ""
     label_cat = self._extractor.categories().get(
         AnnotationType.label, LabelCategories())
     return label_cat.items[label_id]
    def test_can_import_dataset_witn_numpy_files(self):
        expected_dataset = Dataset.from_iterable([
            DatasetItem(id='000000001', image=np.ones((5, 5, 3)),
                annotations=[
                    Points([620.0, 394.0, 616.0, 269.0, 573.0, 185.0, 647.0,
                            188.0, 661.0, 221.0, 656.0, 231.0, 610.0, 187.0,
                            647.0, 176.0, 637.02, 189.818, 695.98, 108.182,
                            606.0, 217.0, 553.0, 161.0, 601.0, 167.0, 692.0,
                            185.0, 693.0, 240.0, 688.0, 313.0],
                        [1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        attributes={'center': [594.0, 257.0], 'scale': 3.021},
                        label=0, group=1),
                    Bbox(615, 218.65, 288.4, 286.95, label=0, group=1)
                ]
            ),
            DatasetItem(id='000000002', image=np.ones((5, 5, 3)),
                annotations=[
                    Points([650.0, 424.0, 646.0, 309.0, 603.0, 215.0, 677.0,
                            218.0, 691.0, 251.0, 686.0, 261.0, 640.0, 217.0,
                            677.0, 216.0, 667.02, 219.818, 725.98, 138.182,
                            636.0, 247.0, 583.0, 191.0, 631.0, 197.0, 722.0,
                            215.0, 723.0, 270.0, 718.0, 343.0],
                        [1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
                        attributes={'center': [624.0, 287.0], 'scale': 3.7},
                        label=0, group=1),
                    Bbox(101.1, 33.3, 113.9, 81.4, label=0, group=1)
                ]
            ),
            DatasetItem(id='000000003', image=np.ones((5, 5, 3)),
                annotations=[
                    Points([590.0, 364.0, 586.0, 239.0, 533.0, 155.0, 617.0,
                            158.0, 631.0, 191.0, 626.0, 201.0, 580.0, 157.0,
                            617.0, 146.0, 607.02, 159.818, 645.98, 68.182,
                            576.0, 187.0, 532.0, 131.0, 571.0, 137.0, 662.0,
                            155.0, 663.0, 210.0, 658.0, 283.0],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
                        attributes={'center': [564.0, 227.0], 'scale': 3.2},
                        label=0, group=1),
                    Bbox(313.3, 512.43, 220.7, 121.57, label=0, group=1),

                    Points([490.0, 264.0, 486.0, 139.0, 433.0, 55.0, 517.0,
                            58.0, 531.0, 91.0, 526.0, 101.0, 480.0, 57.0,
                            517.0, 46.0, 507.02, 59.818, 545.98, 8.182,
                            476.0, 87.0, 432.0, 31.0, 471.0, 37.0, 562.0,
                            55.0, 563.0, 110.0, 558.0, 183.0],
                        [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1],
                        attributes={'center': [464.0, 127.0], 'scale': 2.65},
                        label=0, group=2),

                    Points([690.0, 464.0, 686.0, 339.0, 633.0, 255.0, 717.0,
                            258.0, 731.0, 291.0, 726.0, 301.0, 680.0, 257.0,
                            717.0, 246.0, 707.02, 259.818, 745.98, 168.182,
                            676.0, 287.0, 632.0, 231.0, 671.0, 237.0, 762.0,
                            255.0, 763.0, 310.0, 758.0, 383.0],
                        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1],
                        attributes={'center': [664.0, 327.0], 'scale': 3.9},
                        label=0, group=3)
                ]
            )
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(['human']),
            AnnotationType.points: PointsCategories.from_iterable(
                [(0, MPII_POINTS_LABELS, MPII_POINTS_JOINTS)])
        })

        dataset = Dataset.import_from(DUMMY_DATASET_DIR_WITH_NUMPY_FILES, 'mpii_json')

        compare_datasets(self, expected_dataset, dataset, require_images=True)
Ejemplo n.º 23
0
    def test_can_import(self):
        expected_dataset = Dataset.from_iterable(
            [
                DatasetItem(
                    id='000001',
                    subset='train',
                    image=np.ones((5, 5, 3)),
                    annotations=[
                        Label(12),
                        Bbox(95, 71, 226, 313, label=12),
                        Points(
                            [165, 184, 244, 176, 196, 249, 194, 271, 266, 260],
                            label=12)
                    ],
                    attributes={
                        '5_o_Clock_Shadow': False,
                        'Arched_Eyebrows': True,
                        'Attractive': True,
                        'Bags_Under_Eyes': False,
                        'Bald': False,
                        'Bangs': False,
                        'Big_Lips': False,
                        'Big_Nose': False
                    }),
                DatasetItem(
                    id='000002',
                    subset='train',
                    image=np.ones((5, 5, 3)),
                    annotations=[
                        Label(5),
                        Bbox(72, 94, 221, 306, label=5),
                        Points(
                            [140, 204, 220, 204, 168, 254, 146, 289, 226, 289],
                            label=5)
                    ]),
                DatasetItem(
                    id='000003',
                    subset='val',
                    image=np.ones((5, 5, 3)),
                    annotations=[
                        Label(2),
                        Bbox(216, 59, 91, 126, label=2),
                        Points(
                            [244, 104, 264, 105, 263, 121, 235, 134, 251, 140],
                            label=2)
                    ],
                    attributes={
                        '5_o_Clock_Shadow': False,
                        'Arched_Eyebrows': False,
                        'Attractive': False,
                        'Bags_Under_Eyes': True,
                        'Bald': False,
                        'Bangs': False,
                        'Big_Lips': False,
                        'Big_Nose': True
                    }),
                DatasetItem(
                    id='000004',
                    subset='test',
                    image=np.ones((5, 5, 3)),
                    annotations=[
                        Label(10),
                        Bbox(622, 257, 564, 781, label=10),
                        Points(
                            [796, 539, 984, 539, 930, 687, 762, 756, 915, 756],
                            label=10)
                    ]),
                DatasetItem(
                    id='000005',
                    subset='test',
                    image=np.ones((5, 5, 3)),
                    annotations=[
                        Label(7),
                        Bbox(236, 109, 120, 166, label=7),
                        Points(
                            [273, 169, 328, 161, 298, 172, 283, 208, 323, 207],
                            label=7)
                    ])
            ],
            categories={
                AnnotationType.label:
                LabelCategories.from_iterable(f'class-{i}' for i in range(13)),
                AnnotationType.points:
                PointsCategories.from_iterable([(0, ['lefteye_x']),
                                                (1, ['lefteye_y']),
                                                (2, ['righteye_x']),
                                                (3, ['righteye_y']),
                                                (4, ['nose_x']),
                                                (5, ['nose_y']),
                                                (6, ['leftmouth_x']),
                                                (7, ['leftmouth_y']),
                                                (8, ['rightmouth_x']),
                                                (9, ['rightmouth_y'])])
            })

        dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'celeba')

        compare_datasets(self, expected_dataset, dataset, require_images=True)