def __init__(self, extractor: IExtractor, dst_labels: Union[Iterable[str], LabelCategories]): super().__init__(extractor) self._categories = {} src_categories = self._extractor.categories() src_label_cat = src_categories.get(AnnotationType.label) if isinstance(dst_labels, LabelCategories): dst_label_cat = deepcopy(dst_labels) else: dst_labels = list(dst_labels) if src_label_cat: dst_label_cat = LabelCategories( attributes=deepcopy(src_label_cat.attributes)) for dst_label in dst_labels: assert isinstance(dst_label, str) src_label = src_label_cat.find(dst_label)[1] if src_label is not None: dst_label_cat.add(dst_label, src_label.parent, deepcopy(src_label.attributes)) else: dst_label_cat.add(dst_label) else: dst_label_cat = LabelCategories.from_iterable(dst_labels) for label in dst_label_cat: if label.parent not in dst_label_cat: label.parent = '' self._categories[AnnotationType.label] = dst_label_cat self._make_label_id_map(src_label_cat, dst_label_cat) src_mask_cat = src_categories.get(AnnotationType.mask) if src_mask_cat is not None: assert src_label_cat is not None dst_mask_cat = MaskCategories( attributes=deepcopy(src_mask_cat.attributes)) for old_id, old_color in src_mask_cat.colormap.items(): new_id = self._map_id(old_id) if new_id is not None and new_id not in dst_mask_cat: dst_mask_cat.colormap[new_id] = deepcopy(old_color) # Generate new colors for new labels, keep old untouched existing_colors = set(dst_mask_cat.colormap.values()) color_bank = iter( mask_tools.generate_colormap( len(dst_label_cat), include_background=False).values()) for new_id, new_label in enumerate(dst_label_cat): if new_label.name in src_label_cat: continue if new_id in dst_mask_cat: continue color = next(color_bank) while color in existing_colors: color = next(color_bank) dst_mask_cat.colormap[new_id] = color self._categories[AnnotationType.mask] = dst_mask_cat src_point_cat = src_categories.get(AnnotationType.points) if src_point_cat is not None: assert src_label_cat is not None dst_point_cat = PointsCategories( attributes=deepcopy(src_point_cat.attributes)) for old_id, old_cat in src_point_cat.items.items(): new_id = self._map_id(old_id) if new_id is not None and new_id not in dst_point_cat: dst_point_cat.items[new_id] = deepcopy(old_cat) self._categories[AnnotationType.points] = dst_point_cat
def test_can_compare_projects(self): # just a smoke test label_categories1 = LabelCategories.from_iterable(['x', 'a', 'b', 'y']) mask_categories1 = MaskCategories.generate(len(label_categories1)) point_categories1 = PointsCategories() for index, _ in enumerate(label_categories1.items): point_categories1.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Caption('hello', id=1), Caption('world', id=2, group=5), Label(2, id=3, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=0, id=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, id=5, group=5), Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), ]), DatasetItem(id=21, subset='train', annotations=[ Caption('test'), Label(2), Bbox(1, 2, 3, 4, label=2, id=42, group=42) ]), DatasetItem(id=2, subset='val', annotations=[ PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), ]), DatasetItem(id=42, subset='test', attributes={'a1': 5, 'a2': '42'}), DatasetItem(id=42), DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), ], categories={ AnnotationType.label: label_categories1, AnnotationType.mask: mask_categories1, AnnotationType.points: point_categories1, }) label_categories2 = LabelCategories.from_iterable(['a', 'b', 'x', 'y']) mask_categories2 = MaskCategories.generate(len(label_categories2)) point_categories2 = PointsCategories() for index, _ in enumerate(label_categories2.items): point_categories2.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Caption('hello', id=1), Caption('world', id=2, group=5), Label(2, id=3, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=1, id=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, id=5, group=5), Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4), Mask(label=3, id=5, z_order=2, image=np.ones((2, 3))), ]), DatasetItem(id=21, subset='train', annotations=[ Caption('test'), Label(2), Bbox(1, 2, 3, 4, label=3, id=42, group=42) ]), DatasetItem(id=2, subset='val', annotations=[ PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), ]), DatasetItem(id=42, subset='test', attributes={'a1': 5, 'a2': '42'}), DatasetItem(id=42), DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), ], categories={ AnnotationType.label: label_categories2, AnnotationType.mask: mask_categories2, AnnotationType.points: point_categories2, }) with TestDir() as test_dir: with DiffVisualizer(save_dir=test_dir, comparator=DistanceComparator(iou_threshold=0.8), ) as visualizer: visualizer.save(dataset1, dataset2) self.assertNotEqual(0, os.listdir(osp.join(test_dir)))
def compute_statistics(self, dataset): """ Computes statistics of the dataset for the segmentation task. Parameters ---------- dataset : IDataset object Returns ------- stats (dict): A dict object containing statistics of the dataset. """ stats, filtered_anns = self._compute_common_statistics(dataset) # segmentation-specific mask_template = { 'area': deepcopy(self.numerical_stat_template), 'width': deepcopy(self.numerical_stat_template), 'height': deepcopy(self.numerical_stat_template) } stats['items_with_invalid_value'] = {} stats['mask_distribution_in_label'] = {} stats['mask_distribution_in_attribute'] = {} stats['mask_distribution_in_dataset_item'] = {} dist_by_label = stats['mask_distribution_in_label'] dist_by_attr = stats['mask_distribution_in_attribute'] mask_dist_in_item = stats['mask_distribution_in_dataset_item'] items_w_invalid_val = stats['items_with_invalid_value'] def _generate_ann_mask_info(area, _w, _h): return { 'area': area, 'width': _w, 'height': _h, } def _update_mask_stats_by_label(item_key, ann, mask_label_stats): mask_has_error = False _x, _y, _w, _h = ann.get_bbox() # Detete the following block when #226 is resolved # https://github.com/openvinotoolkit/datumaro/issues/226 if ann.type == AnnotationType.mask: _w += 1 _h += 1 area = ann.get_area() ann_mask_info = _generate_ann_mask_info(area, _w, _h) for prop, val in ann_mask_info.items(): if val == float('inf') or np.isnan(val): mask_has_error = True anns_w_invalid_val = items_w_invalid_val.setdefault( item_key, {}) invalid_props = anns_w_invalid_val.setdefault(ann.id, []) invalid_props.append(prop) if not mask_has_error: self._update_prop_distributions(ann_mask_info, mask_label_stats) return ann_mask_info, mask_has_error label_categories = dataset.categories().get(AnnotationType.label, LabelCategories()) base_valid_attrs = label_categories.attributes for item_key, annotations in filtered_anns: ann_count = len(annotations) mask_dist_in_item[item_key] = ann_count for ann in annotations: if not 0 <= ann.label < len(label_categories): label_name = ann.label valid_attrs = set() else: label_name = label_categories[ann.label].name valid_attrs = base_valid_attrs.union( label_categories[ann.label].attributes) mask_label_stats = dist_by_label.setdefault( label_name, deepcopy(mask_template)) ann_mask_info, mask_has_error = \ _update_mask_stats_by_label( item_key, ann, mask_label_stats) for attr, value in ann.attributes.items(): if attr in valid_attrs: mask_attr_label = dist_by_attr.setdefault( label_name, {}) mask_attr_stats = mask_attr_label.setdefault(attr, {}) mask_val_stats = mask_attr_stats.setdefault( str(value), deepcopy(mask_template)) if not mask_has_error: self._update_prop_distributions( ann_mask_info, mask_val_stats) # compute prop stats from dist. self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) def _is_valid_ann(item_key, ann): has_defined_label = 0 <= ann.label < len(label_categories) if not has_defined_label: return False mask_has_invalid_val = ann.id in items_w_invalid_val.get( item_key, {}) return not mask_has_invalid_val def _update_props_far_from_mean(item_key, ann): valid_attrs = base_valid_attrs.union( label_categories[ann.label].attributes) label_name = label_categories[ann.label].name mask_label_stats = dist_by_label[label_name] _x, _y, _w, _h = ann.get_bbox() # Detete the following block when #226 is resolved # https://github.com/openvinotoolkit/datumaro/issues/226 if ann.type == AnnotationType.mask: _w += 1 _h += 1 area = ann.get_area() ann_mask_info = _generate_ann_mask_info(area, _w, _h) for prop, val in ann_mask_info.items(): prop_stats = mask_label_stats[prop] self._compute_far_from_mean(prop_stats, val, item_key, ann) for attr, value in ann.attributes.items(): if attr in valid_attrs: mask_attr_stats = dist_by_attr[label_name][attr] mask_val_stats = mask_attr_stats[str(value)] for prop, val in ann_mask_info.items(): prop_stats = mask_val_stats[prop] self._compute_far_from_mean(prop_stats, val, item_key, ann) for item_key, annotations in filtered_anns: for ann in annotations: if _is_valid_ann(item_key, ann): _update_props_far_from_mean(item_key, ann) return stats
def test_dataset(self): label_categories = LabelCategories(attributes={'a', 'b', 'score'}) for i in range(5): label_categories.add('cat' + str(i), attributes={'x', 'y'}) mask_categories = MaskCategories( generate_colormap(len(label_categories.items))) points_categories = PointsCategories() for index, _ in enumerate(label_categories.items): points_categories.add(index, ['cat1', 'cat2'], joints=[[0, 1]]) return Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Caption('hello', id=1), Caption('world', id=2, group=5), Label(2, id=3, attributes={ 'x': 1, 'y': '2', }), Bbox(1, 2, 3, 4, label=4, id=4, z_order=1, attributes={ 'score': 1.0, }), Bbox(5, 6, 7, 8, id=5, group=5, attributes={ 'a': 1.5, 'b': 'text', }), Points([1, 2, 2, 0, 1, 1], label=0, id=5, z_order=4, attributes={ 'x': 1, 'y': '2', }), Mask(label=3, id=5, z_order=2, image=np.ones((2, 3)), attributes={ 'x': 1, 'y': '2', }), ]), DatasetItem(id=21, subset='train', annotations=[ Caption('test'), Label(2), Bbox(1, 2, 3, 4, label=5, id=42, group=42) ]), DatasetItem( id=2, subset='val', annotations=[ PolyLine([1, 2, 3, 4, 5, 6, 7, 8], id=11, z_order=1), Polygon([1, 2, 3, 4, 5, 6, 7, 8], id=12, z_order=4), ]), DatasetItem(id=1, subset='test', annotations=[ Cuboid3d([1.0, 2.0, 3.0], [2.0, 2.0, 4.0], [1.0, 3.0, 4.0], id=6, label=0, attributes={'occluded': True}, group=6) ]), DatasetItem( id=42, subset='test', attributes={ 'a1': 5, 'a2': '42' }), DatasetItem(id=42), DatasetItem(id=43, image=Image(path='1/b/c.qq', size=(2, 4))), ], categories={ AnnotationType.label: label_categories, AnnotationType.mask: mask_categories, AnnotationType.points: points_categories, })
def _parse_meta(context): ev, el = next(context) if not (ev == 'start' and el.tag == 'annotations'): raise Exception("Unexpected token ") categories = {} tasks_info = {} frame_size = [None, None] task_id = None mode = None labels = OrderedDict() label = None # Recursive descent parser el = None states = ['annotations'] def accepted(expected_state, tag, next_state=None): state = states[-1] if state == expected_state and el is not None and el.tag == tag: if not next_state: next_state = tag states.append(next_state) return True return False def consumed(expected_state, tag): state = states[-1] if state == expected_state and el is not None and el.tag == tag: states.pop() return True return False for ev, el in context: if ev == 'start': if accepted('annotations', 'meta'): pass elif accepted('meta', 'task'): pass elif accepted('meta', 'project'): pass elif accepted('project', 'tasks'): pass elif accepted('tasks', 'task'): pass elif accepted('task', 'id', next_state='task_id'): pass elif accepted('task', 'segment'): pass elif accepted('task', 'mode'): pass elif accepted('task', 'original_size'): pass elif accepted('original_size', 'height', next_state='frame_height'): pass elif accepted('original_size', 'width', next_state='frame_width'): pass elif accepted('task', 'labels'): pass elif accepted('project', 'labels'): pass elif accepted('labels', 'label'): label = {'name': None, 'attributes': []} elif accepted('label', 'name', next_state='label_name'): pass elif accepted('label', 'attributes'): pass elif accepted('attributes', 'attribute'): pass elif accepted('attribute', 'name', next_state='attr_name'): pass elif accepted('attribute', 'input_type', next_state='attr_type'): pass elif accepted('annotations', 'image') or \ accepted('annotations', 'track') or \ accepted('annotations', 'tag'): break else: pass elif ev == 'end': if consumed('meta', 'meta'): break elif consumed('project', 'project'): pass elif consumed('tasks', 'tasks'): pass elif consumed('task', 'task'): tasks_info[task_id] = { 'frame_size': frame_size, 'mode': mode, } frame_size = [None, None] mode = None elif consumed('task_id', 'id'): task_id = int(el.text) elif consumed('segment', 'segment'): pass elif consumed('mode', 'mode'): mode = el.text elif consumed('original_size', 'original_size'): pass elif consumed('frame_height', 'height'): frame_size[0] = int(el.text) elif consumed('frame_width', 'width'): frame_size[1] = int(el.text) elif consumed('label_name', 'name'): label['name'] = el.text elif consumed('attr_name', 'name'): label['attributes'].append({'name': el.text}) elif consumed('attr_type', 'input_type'): label['attributes'][-1]['input_type'] = el.text elif consumed('attribute', 'attribute'): pass elif consumed('attributes', 'attributes'): pass elif consumed('label', 'label'): labels[label['name']] = label['attributes'] label = None elif consumed('labels', 'labels'): pass else: pass assert len(states) == 1 and states[0] == 'annotations', \ "Expected 'meta' section in the annotation file, path: %s" % states common_attrs = ['occluded'] if 'interpolation' in map(lambda t: t['mode'], tasks_info.values()): common_attrs.append('keyframe') common_attrs.append('outside') common_attrs.append('track_id') label_cat = LabelCategories(attributes=common_attrs) attribute_types = {} for label, attrs in labels.items(): attr_names = {v['name'] for v in attrs} label_cat.add(label, attributes=attr_names) for attr in attrs: attribute_types[attr['name']] = attr['input_type'] categories[AnnotationType.label] = label_cat return categories, tasks_info, attribute_types
def _load_items(self, subset): labels = self._categories.setdefault(AnnotationType.label, LabelCategories()) path = osp.join(self._path, subset) images = [i for i in find_images(path, recursive=True)] for image_path in sorted(images): item_id = osp.splitext(osp.relpath(image_path, path))[0] if Ade20k2020Path.MASK_PATTERN.fullmatch(osp.basename(item_id)): continue item_annotations = [] item_info = self._load_item_info(image_path) for item in item_info: label_idx = labels.find(item['label_name'])[0] if label_idx is None: labels.add(item['label_name']) mask_path = osp.splitext(image_path)[0] + '_seg.png' max_part_level = max([p['part_level'] for p in item_info]) for part_level in range(max_part_level + 1): if not osp.exists(mask_path): log.warning('Can`t find part level %s mask for %s' \ % (part_level, image_path)) continue mask = lazy_image(mask_path, loader=self._load_class_mask) mask = CompiledMask(instance_mask=mask) classes = {(v['class_idx'], v['label_name']) for v in item_info if v['part_level'] == part_level} for class_idx, label_name in classes: label_id = labels.find(label_name)[0] item_annotations.append( Mask(label=label_id, id=class_idx, image=mask.lazy_extract(class_idx), group=class_idx, z_order=part_level)) mask_path = osp.splitext(image_path)[0] \ + '_parts_%s.png' % (part_level + 1) for item in item_info: instance_path = osp.join(osp.dirname(image_path), item['instance_mask']) if not osp.isfile(instance_path): log.warning('Can`t find instance mask: %s' % instance_path) continue mask = lazy_image(instance_path, loader=self._load_instance_mask) mask = CompiledMask(instance_mask=mask) label_id = labels.find(item['label_name'])[0] instance_id = item['id'] attributes = {k: True for k in item['attributes']} polygon_points = item['polygon_points'] item_annotations.append( Mask(label=label_id, image=mask.lazy_extract(1), id=instance_id, attributes=attributes, z_order=item['part_level'], group=instance_id)) if (len(item['polygon_points']) % 2 == 0 \ and 3 <= len(item['polygon_points']) // 2): item_annotations.append( Polygon(polygon_points, label=label_id, attributes=attributes, id=instance_id, z_order=item['part_level'], group=instance_id)) self._items.append( DatasetItem(item_id, subset=subset, image=image_path, annotations=item_annotations))
def test_inplace_save_writes_only_updated_data_with_transforms(self): expected = Dataset.from_iterable([ DatasetItem(3, subset='test', image=np.ones((2, 3, 3)), annotations=[ Bbox(0, 1, 0, 0, label=4, id=1, group=1, attributes={ 'truncated': False, 'difficult': False, 'occluded': False, }) ]), DatasetItem(4, subset='train', image=np.ones((2, 4, 3)), annotations=[ Bbox(1, 0, 0, 0, label=4, id=1, group=1, attributes={ 'truncated': False, 'difficult': False, 'occluded': False, }), Mask(np.ones((2, 2)), label=2, group=1), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable( ['background', 'a', 'b', 'c', 'd']), AnnotationType.mask: MaskCategories( colormap=VOC.generate_colormap(5)), }) dataset = Dataset.from_iterable([ DatasetItem(1, subset='a', image=np.ones((2, 1, 3)), annotations=[ Bbox(0, 0, 0, 1, label=1) ]), DatasetItem(2, subset='b', image=np.ones((2, 2, 3)), annotations=[ Bbox(0, 0, 1, 0, label=2), Mask(np.ones((2, 2)), label=1), ]), DatasetItem(3, subset='b', image=np.ones((2, 3, 3)), annotations=[ Bbox(0, 1, 0, 0, label=3) ]), DatasetItem(4, subset='c', image=np.ones((2, 4, 3)), annotations=[ Bbox(1, 0, 0, 0, label=3), Mask(np.ones((2, 2)), label=1) ]), ], categories=['a', 'b', 'c', 'd']) with TestDir() as path: dataset.export(path, 'voc', save_images=True) dataset.filter('/item[id >= 3]') dataset.transform('random_split', splits=(('train', 0.5), ('test', 0.5)), seed=42) dataset.save(save_images=True) self.assertEqual({'3.xml', '4.xml'}, set(os.listdir(osp.join(path, 'Annotations')))) self.assertEqual({'3.jpg', '4.jpg'}, set(os.listdir(osp.join(path, 'JPEGImages')))) self.assertEqual({'4.png'}, set(os.listdir(osp.join(path, 'SegmentationClass')))) self.assertEqual({'4.png'}, set(os.listdir(osp.join(path, 'SegmentationObject')))) self.assertEqual({'train.txt', 'test.txt'}, set(os.listdir(osp.join(path, 'ImageSets', 'Main')))) self.assertEqual({'train.txt'}, set(os.listdir(osp.join(path, 'ImageSets', 'Segmentation')))) compare_datasets(self, expected, Dataset.import_from(path, 'voc'), require_images=True)
def test_can_load(self): pcd1 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000000.pcd') pcd2 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000001.pcd') pcd3 = osp.join(DUMMY_DATASET_DIR, 'velodyne_points', 'data', '0000000002.pcd') image1 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000000.png') image2 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000001.png') image3 = osp.join(DUMMY_DATASET_DIR, 'IMAGE_00', 'data', '0000000002.png') expected_label_cat = LabelCategories(attributes={'occluded'}) expected_label_cat.add('bus') expected_label_cat.add('car') expected_dataset = Dataset.from_iterable( [ DatasetItem(id='0000000000', annotations=[ Cuboid3d(position=[1, 2, 3], scale=[7.95, -3.62, -1.03], label=1, attributes={ 'occluded': False, 'track_id': 1 }), Cuboid3d(position=[1, 1, 0], scale=[8.34, 23.01, -0.76], label=0, attributes={ 'occluded': False, 'track_id': 2 }) ], point_cloud=pcd1, related_images=[image1], attributes={'frame': 0}), DatasetItem(id='0000000001', annotations=[ Cuboid3d(position=[0, 1, 0], scale=[8.34, 23.01, -0.76], rotation=[1, 1, 3], label=0, attributes={ 'occluded': True, 'track_id': 2 }) ], point_cloud=pcd2, related_images=[image2], attributes={'frame': 1}), DatasetItem(id='0000000002', annotations=[ Cuboid3d(position=[1, 2, 3], scale=[-9.41, 13.54, 0.24], label=1, attributes={ 'occluded': False, 'track_id': 3 }) ], point_cloud=pcd3, related_images=[image3], attributes={'frame': 2}) ], categories={AnnotationType.label: expected_label_cat}) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'kitti_raw') compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)
def test_can_load(self): pcd1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame1.pcd') pcd2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'pointcloud', 'frame2.pcd') image1 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images', 'frame1_pcd', 'img2.png') image2 = osp.join(DUMMY_DATASET_DIR, 'ds0', 'related_images', 'frame2_pcd', 'img1.png') label_cat = LabelCategories(attributes={'tag1', 'tag3'}) label_cat.add('car') label_cat.add('bus') expected_dataset = Dataset.from_iterable( [ DatasetItem(id='frame1', annotations=[ Cuboid3d(id=755220128, label=0, position=[0.47, 0.23, 0.79], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 231825, 'tag1': 'fd', 'tag3': '4s' }), Cuboid3d(id=755337225, label=0, position=[0.36, 0.64, 0.93], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 231831, 'tag1': 'v12', 'tag3': '' }), ], point_cloud=pcd1, related_images=[image1], attributes={ 'frame': 0, 'description': '', 'tag1': '25dsd', 'tag2': 65 }), DatasetItem(id='frame2', annotations=[ Cuboid3d(id=216, label=1, position=[0.59, 14.41, -0.61], attributes={ 'track_id': 36, 'tag1': '', 'tag3': '' }) ], point_cloud=pcd2, related_images=[image2], attributes={ 'frame': 1, 'description': '' }), ], categories={AnnotationType.label: label_cat}) parsed_dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'sly_pointcloud') compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)
def _load_categories(self, labels): return { AnnotationType.label: LabelCategories.from_iterable(labels) }
def test_can_save_and_load(self): src_label_cat = LabelCategories(attributes={'occluded'}) src_label_cat.add('car', attributes=['x']) src_label_cat.add('bus') source_dataset = Dataset.from_iterable( [ DatasetItem(id='frame_1', annotations=[ Cuboid3d(id=206, label=0, position=[320.86, 979.18, 1.04], attributes={ 'occluded': False, 'track_id': 1, 'x': 1 }), Cuboid3d(id=207, label=1, position=[318.19, 974.65, 1.29], attributes={ 'occluded': True, 'track_id': 2 }), ], point_cloud=self.pcd1, attributes={ 'frame': 0, 'description': 'zzz' }), DatasetItem(id='frm2', annotations=[ Cuboid3d(id=208, label=1, position=[23.04, 8.75, -0.78], attributes={ 'occluded': False, 'track_id': 2 }) ], point_cloud=self.pcd2, related_images=[self.image2], attributes={'frame': 1}), ], categories={AnnotationType.label: src_label_cat}) with TestDir() as test_dir: target_label_cat = LabelCategories(attributes={'occluded'}) target_label_cat.add('car', attributes=['x']) target_label_cat.add('bus') target_dataset = Dataset.from_iterable([ DatasetItem(id='frame_1', annotations=[ Cuboid3d(id=206, label=0, position=[320.86, 979.18, 1.04], attributes={ 'occluded': False, 'track_id': 1, 'x': 1 }), Cuboid3d(id=207, label=1, position=[318.19, 974.65, 1.29], attributes={ 'occluded': True, 'track_id': 2 }), ], point_cloud=osp.join(test_dir, 'ds0', 'pointcloud', 'frame_1.pcd'), attributes={ 'frame': 0, 'description': 'zzz' }), DatasetItem(id='frm2', annotations=[ Cuboid3d(id=208, label=1, position=[23.04, 8.75, -0.78], attributes={ 'occluded': False, 'track_id': 2 }), ], point_cloud=osp.join(test_dir, 'ds0', 'pointcloud', 'frm2.pcd'), related_images=[ osp.join(test_dir, 'ds0', 'related_images', 'frm2_pcd', 'img1.png') ], attributes={ 'frame': 1, 'description': '' }) ], categories={ AnnotationType.label: target_label_cat }) self._test_save_and_load( source_dataset, partial(SuperviselyPointCloudConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_point_cloud=True)
def compute_statistics(self, dataset): """ Computes statistics of the dataset for the detection task. Parameters ---------- dataset : IDataset object Returns ------- stats (dict): A dict object containing statistics of the dataset. """ stats, filtered_anns = self._compute_common_statistics(dataset) # detection-specific bbox_template = { 'width': deepcopy(self.numerical_stat_template), 'height': deepcopy(self.numerical_stat_template), 'area(wxh)': deepcopy(self.numerical_stat_template), 'ratio(w/h)': deepcopy(self.numerical_stat_template), 'short': deepcopy(self.numerical_stat_template), 'long': deepcopy(self.numerical_stat_template) } stats['items_with_negative_length'] = {} stats['items_with_invalid_value'] = {} stats['bbox_distribution_in_label'] = {} stats['bbox_distribution_in_attribute'] = {} stats['bbox_distribution_in_dataset_item'] = {} dist_by_label = stats['bbox_distribution_in_label'] dist_by_attr = stats['bbox_distribution_in_attribute'] bbox_dist_in_item = stats['bbox_distribution_in_dataset_item'] items_w_neg_len = stats['items_with_negative_length'] items_w_invalid_val = stats['items_with_invalid_value'] def _generate_ann_bbox_info(_x, _y, _w, _h, area, ratio, _short, _long): return { 'x': _x, 'y': _y, 'width': _w, 'height': _h, 'area(wxh)': area, 'ratio(w/h)': ratio, 'short': _short, 'long': _long, } def _update_bbox_stats_by_label(item_key, ann, bbox_label_stats): bbox_has_error = False _x, _y, _w, _h = ann.get_bbox() area = ann.get_area() if _h != 0 and _h != float('inf'): ratio = _w / _h else: ratio = float('nan') _short = _w if _w < _h else _h _long = _w if _w > _h else _h ann_bbox_info = _generate_ann_bbox_info(_x, _y, _w, _h, area, ratio, _short, _long) for prop, val in ann_bbox_info.items(): if val == float('inf') or np.isnan(val): bbox_has_error = True anns_w_invalid_val = items_w_invalid_val.setdefault( item_key, {}) invalid_props = anns_w_invalid_val.setdefault(ann.id, []) invalid_props.append(prop) for prop in ['width', 'height']: val = ann_bbox_info[prop] if val < 1: bbox_has_error = True anns_w_neg_len = items_w_neg_len.setdefault(item_key, {}) neg_props = anns_w_neg_len.setdefault(ann.id, {}) neg_props[prop] = val if not bbox_has_error: ann_bbox_info.pop('x') ann_bbox_info.pop('y') self._update_prop_distributions(ann_bbox_info, bbox_label_stats) return ann_bbox_info, bbox_has_error label_categories = dataset.categories().get(AnnotationType.label, LabelCategories()) base_valid_attrs = label_categories.attributes for item_key, annotations in filtered_anns: ann_count = len(annotations) bbox_dist_in_item[item_key] = ann_count for ann in annotations: if not 0 <= ann.label < len(label_categories): label_name = ann.label valid_attrs = set() else: label_name = label_categories[ann.label].name valid_attrs = base_valid_attrs.union( label_categories[ann.label].attributes) bbox_label_stats = dist_by_label.setdefault( label_name, deepcopy(bbox_template)) ann_bbox_info, bbox_has_error = \ _update_bbox_stats_by_label( item_key, ann, bbox_label_stats) for attr, value in ann.attributes.items(): if attr in valid_attrs: bbox_attr_label = dist_by_attr.setdefault( label_name, {}) bbox_attr_stats = bbox_attr_label.setdefault(attr, {}) bbox_val_stats = bbox_attr_stats.setdefault( str(value), deepcopy(bbox_template)) if not bbox_has_error: self._update_prop_distributions( ann_bbox_info, bbox_val_stats) # Compute prop stats from distribution self._compute_prop_stats_from_dist(dist_by_label, dist_by_attr) def _is_valid_ann(item_key, ann): has_defined_label = 0 <= ann.label < len(label_categories) if not has_defined_label: return False bbox_has_neg_len = ann.id in items_w_neg_len.get(item_key, {}) bbox_has_invalid_val = ann.id in items_w_invalid_val.get( item_key, {}) return not (bbox_has_neg_len or bbox_has_invalid_val) def _update_props_far_from_mean(item_key, ann): valid_attrs = base_valid_attrs.union( label_categories[ann.label].attributes) label_name = label_categories[ann.label].name bbox_label_stats = dist_by_label[label_name] _x, _y, _w, _h = ann.get_bbox() area = ann.get_area() ratio = _w / _h _short = _w if _w < _h else _h _long = _w if _w > _h else _h ann_bbox_info = _generate_ann_bbox_info(_x, _y, _w, _h, area, ratio, _short, _long) ann_bbox_info.pop('x') ann_bbox_info.pop('y') for prop, val in ann_bbox_info.items(): prop_stats = bbox_label_stats[prop] self._compute_far_from_mean(prop_stats, val, item_key, ann) for attr, value in ann.attributes.items(): if attr in valid_attrs: bbox_attr_stats = dist_by_attr[label_name][attr] bbox_val_stats = bbox_attr_stats[str(value)] for prop, val in ann_bbox_info.items(): prop_stats = bbox_val_stats[prop] self._compute_far_from_mean(prop_stats, val, item_key, ann) for item_key, annotations in filtered_anns: for ann in annotations: if _is_valid_ann(item_key, ann): _update_props_far_from_mean(item_key, ann) return stats
def _compute_common_statistics(self, dataset): defined_attr_template = { 'items_missing_attribute': [], 'distribution': {} } undefined_attr_template = { 'items_with_undefined_attr': [], 'distribution': {} } undefined_label_template = { 'count': 0, 'items_with_undefined_label': [], } stats = { 'label_distribution': { 'defined_labels': {}, 'undefined_labels': {}, }, 'attribute_distribution': { 'defined_attributes': {}, 'undefined_attributes': {} }, } stats['total_ann_count'] = 0 stats['items_missing_annotation'] = [] label_dist = stats['label_distribution'] attr_dist = stats['attribute_distribution'] defined_label_dist = label_dist['defined_labels'] defined_attr_dist = attr_dist['defined_attributes'] undefined_label_dist = label_dist['undefined_labels'] undefined_attr_dist = attr_dist['undefined_attributes'] label_categories = dataset.categories().get(AnnotationType.label, LabelCategories()) base_valid_attrs = label_categories.attributes for category in label_categories: defined_label_dist[category.name] = 0 filtered_anns = [] for item in dataset: item_key = (item.id, item.subset) annotations = [] for ann in item.annotations: if ann.type in self.ann_types: annotations.append(ann) ann_count = len(annotations) filtered_anns.append((item_key, annotations)) if ann_count == 0: stats['items_missing_annotation'].append(item_key) stats['total_ann_count'] += ann_count for ann in annotations: if not 0 <= ann.label < len(label_categories): label_name = ann.label label_stats = undefined_label_dist.setdefault( ann.label, deepcopy(undefined_label_template)) label_stats['items_with_undefined_label'].append(item_key) label_stats['count'] += 1 valid_attrs = set() missing_attrs = set() else: label_name = label_categories[ann.label].name defined_label_dist[label_name] += 1 defined_attr_stats = defined_attr_dist.setdefault( label_name, {}) valid_attrs = base_valid_attrs.union( label_categories[ann.label].attributes) ann_attrs = getattr(ann, 'attributes', {}).keys() missing_attrs = valid_attrs.difference(ann_attrs) for attr in valid_attrs: defined_attr_stats.setdefault( attr, deepcopy(defined_attr_template)) for attr in missing_attrs: attr_dets = defined_attr_stats[attr] attr_dets['items_missing_attribute'].append(item_key) for attr, value in ann.attributes.items(): if attr not in valid_attrs: undefined_attr_stats = \ undefined_attr_dist.setdefault( label_name, {}) attr_dets = undefined_attr_stats.setdefault( attr, deepcopy(undefined_attr_template)) attr_dets['items_with_undefined_attr'].append(item_key) else: attr_dets = defined_attr_stats[attr] attr_dets['distribution'].setdefault(str(value), 0) attr_dets['distribution'][str(value)] += 1 return stats, filtered_anns
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem( id='000001', subset='train', image=np.ones((3, 4, 3)), annotations=[ Label(12), Points([69, 109, 106, 113, 77, 142, 73, 152, 108, 154], label=12) ], attributes={ '5_o_Clock_Shadow': False, 'Arched_Eyebrows': True, 'Attractive': True, 'Bags_Under_Eyes': False, 'Bald': False, 'Bangs': False, 'Big_Lips': False, 'Big_Nose': False }), DatasetItem( id='000002', subset='train', image=np.ones((3, 4, 3)), annotations=[ Label(5), Points([69, 110, 107, 112, 81, 135, 70, 151, 108, 153], label=5) ]), DatasetItem( id='000003', subset='val', image=np.ones((3, 4, 3)), annotations=[ Label(2), Points([76, 112, 104, 106, 108, 128, 74, 156, 98, 158], label=2) ], attributes={ '5_o_Clock_Shadow': False, 'Arched_Eyebrows': False, 'Attractive': False, 'Bags_Under_Eyes': True, 'Bald': False, 'Bangs': False, 'Big_Lips': False, 'Big_Nose': True }), DatasetItem( id='000004', subset='test', image=np.ones((3, 4, 3)), annotations=[ Label(10), Points( [72, 113, 108, 108, 101, 138, 71, 155, 101, 151], label=10) ]), DatasetItem( id='000005', subset='test', image=np.ones((3, 4, 3)), annotations=[ Label(7), Points([66, 114, 112, 112, 86, 119, 71, 147, 104, 150], label=7) ]) ], categories={ AnnotationType.label: LabelCategories.from_iterable(f'class-{i}' for i in range(13)), AnnotationType.points: PointsCategories.from_iterable([(0, ['lefteye_x']), (1, ['lefteye_y']), (2, ['righteye_x']), (3, ['righteye_y']), (4, ['nose_x']), (5, ['nose_y']), (6, ['leftmouth_x']), (7, ['leftmouth_y']), (8, ['rightmouth_x']), (9, ['rightmouth_y'])]) }) dataset = Dataset.import_from(DUMMY_ALIGN_DATASET_DIR, 'align_celeba') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_can_convert_to_kitti_raw(self): with TestDir() as test_dir: export_dir = osp.join(test_dir, 'export_dir') expected_label_cat = LabelCategories(attributes={'occluded'}) expected_label_cat.add('bus', attributes={'tag1', 'tag3'}) expected_label_cat.add('car', attributes={'tag1', 'tag3'}) expected_dataset = Dataset.from_iterable([ DatasetItem(id='frame1', annotations=[ Cuboid3d(label=1, position=[0.47, 0.23, 0.79], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 2, 'tag1': 'fd', 'tag3': '4s', 'occluded': False }), Cuboid3d(label=1, position=[0.36, 0.64, 0.93], scale=[0.01, 0.01, 0.01], attributes={ 'track_id': 3, 'tag1': 'v12', 'tag3': '', 'occluded': False }), ], point_cloud=osp.join(export_dir, 'velodyne_points', 'data', 'frame1.pcd'), related_images=[ osp.join(export_dir, 'image_00', 'data', 'frame1.png') ], attributes={'frame': 0}), DatasetItem(id='frame2', annotations=[ Cuboid3d(label=0, position=[0.59, 14.41, -0.61], attributes={ 'track_id': 1, 'tag1': '', 'tag3': '', 'occluded': False }) ], point_cloud=osp.join(export_dir, 'velodyne_points', 'data', 'frame2.pcd'), related_images=[ osp.join(export_dir, 'image_00', 'data', 'frame2.png') ], attributes={'frame': 1}), ], categories={ AnnotationType.label: expected_label_cat }) run(self, 'convert', '-if', 'sly_pointcloud', '-i', DUMMY_DATASET_DIR, '-f', 'kitti_raw', '-o', export_dir, '--', '--save-images', '--allow-attrs') parsed_dataset = Dataset.import_from(export_dir, format='kitti_raw') compare_datasets_3d(self, expected_dataset, parsed_dataset, require_point_cloud=True)
def test_can_save_bboxes(self): source_dataset = Dataset.from_iterable( [ DatasetItem(id=1, subset='train', image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': True, }), Bbox(0, 4, 4, 4, label=3, attributes={ 'visibility': 0.4, }), Bbox(2, 4, 4, 4, attributes={'ignored': True}), ]), DatasetItem(id=2, subset='val', image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3), ]), DatasetItem( id=3, subset='test', image=np.ones((5, 4, 3)) * 3, ), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) target_dataset = Dataset.from_iterable( [ DatasetItem(id=1, image=np.ones((16, 16, 3)), annotations=[ Bbox(0, 4, 4, 8, label=2, attributes={ 'occluded': True, 'visibility': 0.0, 'ignored': False, }), Bbox(0, 4, 4, 4, label=3, attributes={ 'occluded': False, 'visibility': 0.4, 'ignored': False, }), Bbox(2, 4, 4, 4, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': True, }), ]), DatasetItem(id=2, image=np.ones((8, 8, 3)), annotations=[ Bbox(1, 2, 4, 2, label=3, attributes={ 'occluded': False, 'visibility': 1.0, 'ignored': False, }), ]), DatasetItem( id=3, image=np.ones((5, 4, 3)) * 3, ), ], categories={ AnnotationType.label: LabelCategories.from_iterable('label_' + str(label) for label in range(10)), }) with TestDir() as test_dir: self._test_save_and_load(source_dataset, partial(MotSeqGtConverter.convert, save_images=True), test_dir, target_dataset=target_dataset, require_images=True)
def save(self, a: IDataset, b: IDataset): if len(a) != len(b): print("Datasets have different lengths: %s vs %s" % \ (len(a), len(b))) a_classes = a.categories().get(AnnotationType.label, LabelCategories()) b_classes = b.categories().get(AnnotationType.label, LabelCategories()) class_mismatch = [ (idx, a_cls, b_cls) for idx, (a_cls, b_cls) in enumerate(zip_longest(a_classes, b_classes)) if getattr(a_cls, 'name', None) != getattr(b_cls, 'name', None) ] if class_mismatch: print("Datasets have mismatching labels:") for idx, a_class, b_class in class_mismatch: if a_class and b_class: print(" #%s: %s != %s" % (idx, a_class.name, b_class.name)) elif a_class: print(" #%s: > %s" % (idx, a_class.name)) else: print(" #%s: < %s" % (idx, b_class.name)) self._a_classes = a.categories().get(AnnotationType.label) self._b_classes = b.categories().get(AnnotationType.label) ids_a = set((item.id, item.subset) for item in a) ids_b = set((item.id, item.subset) for item in b) ids = ids_a & ids_b if len(ids) != len(ids_a): print("Unmatched items in the first dataset: ") print(ids_a - ids) if len(ids) != len(ids_b): print("Unmatched items in the second dataset: ") print(ids_b - ids) for item_id, item_subset in ids: item_a = a.get(item_id, item_subset) item_b = b.get(item_id, item_subset) label_diff = self._cmp.match_labels(item_a, item_b) self.update_label_confusion(label_diff) bbox_diff = self._cmp.match_boxes(item_a, item_b) self.update_bbox_confusion(bbox_diff) polygon_diff = self._cmp.match_polygons(item_a, item_b) self.update_polygon_confusion(polygon_diff) mask_diff = self._cmp.match_masks(item_a, item_b) self.update_mask_confusion(mask_diff) self.save_item_label_diff(item_a, item_b, label_diff) self.save_item_bbox_diff(item_a, item_b, bbox_diff) if len(self.label_confusion_matrix) != 0: self.save_conf_matrix(self.label_confusion_matrix, 'label_confusion.png') if len(self.bbox_confusion_matrix) != 0: self.save_conf_matrix(self.bbox_confusion_matrix, 'bbox_confusion.png') if len(self.polygon_confusion_matrix) != 0: self.save_conf_matrix(self.polygon_confusion_matrix, 'polygon_confusion.png') if len(self.mask_confusion_matrix) != 0: self.save_conf_matrix(self.mask_confusion_matrix, 'mask_confusion.png')
def test_can_save_and_load_with_meta_file(self): source_dataset = Dataset.from_iterable([ DatasetItem(id='0000000000', annotations=[ Cuboid3d(position=[13.54, -9.41, 0.24], label=0, attributes={ 'occluded': False, 'track_id': 1 }) ], point_cloud=self.pcd1, related_images=[self.image1], attributes={'frame': 0}), DatasetItem( id='0000000001', annotations=[ Cuboid3d(position=[1.4, 2.1, 1.4], label=1, attributes={'track_id': 2}) ], ) ], categories=['cat', 'dog']) with TestDir() as test_dir: target_label_cat = LabelCategories(attributes={'occluded'}) target_label_cat.add('cat') target_label_cat.add('dog') target_dataset = Dataset.from_iterable([ DatasetItem(id='0000000000', annotations=[ Cuboid3d(position=[13.54, -9.41, 0.24], label=0, attributes={ 'occluded': False, 'track_id': 1 }) ], point_cloud=osp.join(test_dir, 'velodyne_points', 'data', '0000000000.pcd'), related_images=[ osp.join(test_dir, 'image_00', 'data', '0000000000.png') ], attributes={'frame': 0}), DatasetItem(id='0000000001', annotations=[ Cuboid3d(position=[1.4, 2.1, 1.4], label=1, attributes={ 'occluded': False, 'track_id': 2 }) ], attributes={'frame': 1}) ], categories={ AnnotationType.label: target_label_cat }) self._test_save_and_load(source_dataset, partial(KittiRawConverter.convert, save_images=True, save_dataset_meta=True), test_dir, target_dataset=target_dataset, require_point_cloud=True) self.assertTrue(osp.isfile(osp.join(test_dir, 'dataset_meta.json')))
def test_can_run_self_merge(self): dataset1 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 3, label=0), ]), ], categories=['a', 'b']) dataset2 = Dataset.from_iterable([ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=1), Bbox(5, 6, 2, 3, label=2), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem(id=100, subset='train', image=np.ones((10, 6, 3)), annotations=[ Bbox(1, 2, 3, 4, label=2, id=1, group=1, attributes={ 'score': 0.5, 'occluded': False, 'difficult': False, 'truncated': False }), Bbox(5, 6, 2, 3, label=3, id=2, group=2, attributes={ 'score': 0.5, 'occluded': False, 'difficult': False, 'truncated': False }), Bbox(1, 2, 3, 3, label=1, id=1, group=1, attributes={ 'score': 0.5, 'is_crowd': False }), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['background', 'a', 'b', 'c']), AnnotationType.mask: MaskCategories(VOC.generate_colormap(4)) }) with TestDir() as test_dir: dataset1_url = osp.join(test_dir, 'dataset1') dataset2_url = osp.join(test_dir, 'dataset2') dataset1.export(dataset1_url, 'coco', save_images=True) dataset2.export(dataset2_url, 'voc', save_images=True) proj_dir = osp.join(test_dir, 'proj') with Project.init(proj_dir) as project: project.import_source('source', dataset2_url, 'voc') result_dir = osp.join(test_dir, 'result') run(self, 'merge', '-o', result_dir, '-p', proj_dir, dataset1_url + ':coco') compare_datasets(self, expected, Dataset.load(result_dir), require_images=True)
def save_labels(self): labels_file = osp.join(self._save_dir, 'labels.txt') with open(labels_file, 'w', encoding='utf-8') as f: f.writelines(l.name + '\n' for l in self._extractor.categories().get( AnnotationType.label, LabelCategories()))
def _get_label(self, label_id): if label_id is None: return "" label_cat = self._extractor.categories().get( AnnotationType.label, LabelCategories()) return label_cat.items[label_id]
def test_can_import_dataset_witn_numpy_files(self): expected_dataset = Dataset.from_iterable([ DatasetItem(id='000000001', image=np.ones((5, 5, 3)), annotations=[ Points([620.0, 394.0, 616.0, 269.0, 573.0, 185.0, 647.0, 188.0, 661.0, 221.0, 656.0, 231.0, 610.0, 187.0, 647.0, 176.0, 637.02, 189.818, 695.98, 108.182, 606.0, 217.0, 553.0, 161.0, 601.0, 167.0, 692.0, 185.0, 693.0, 240.0, 688.0, 313.0], [1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], attributes={'center': [594.0, 257.0], 'scale': 3.021}, label=0, group=1), Bbox(615, 218.65, 288.4, 286.95, label=0, group=1) ] ), DatasetItem(id='000000002', image=np.ones((5, 5, 3)), annotations=[ Points([650.0, 424.0, 646.0, 309.0, 603.0, 215.0, 677.0, 218.0, 691.0, 251.0, 686.0, 261.0, 640.0, 217.0, 677.0, 216.0, 667.02, 219.818, 725.98, 138.182, 636.0, 247.0, 583.0, 191.0, 631.0, 197.0, 722.0, 215.0, 723.0, 270.0, 718.0, 343.0], [1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], attributes={'center': [624.0, 287.0], 'scale': 3.7}, label=0, group=1), Bbox(101.1, 33.3, 113.9, 81.4, label=0, group=1) ] ), DatasetItem(id='000000003', image=np.ones((5, 5, 3)), annotations=[ Points([590.0, 364.0, 586.0, 239.0, 533.0, 155.0, 617.0, 158.0, 631.0, 191.0, 626.0, 201.0, 580.0, 157.0, 617.0, 146.0, 607.02, 159.818, 645.98, 68.182, 576.0, 187.0, 532.0, 131.0, 571.0, 137.0, 662.0, 155.0, 663.0, 210.0, 658.0, 283.0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], attributes={'center': [564.0, 227.0], 'scale': 3.2}, label=0, group=1), Bbox(313.3, 512.43, 220.7, 121.57, label=0, group=1), Points([490.0, 264.0, 486.0, 139.0, 433.0, 55.0, 517.0, 58.0, 531.0, 91.0, 526.0, 101.0, 480.0, 57.0, 517.0, 46.0, 507.02, 59.818, 545.98, 8.182, 476.0, 87.0, 432.0, 31.0, 471.0, 37.0, 562.0, 55.0, 563.0, 110.0, 558.0, 183.0], [1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1], attributes={'center': [464.0, 127.0], 'scale': 2.65}, label=0, group=2), Points([690.0, 464.0, 686.0, 339.0, 633.0, 255.0, 717.0, 258.0, 731.0, 291.0, 726.0, 301.0, 680.0, 257.0, 717.0, 246.0, 707.02, 259.818, 745.98, 168.182, 676.0, 287.0, 632.0, 231.0, 671.0, 237.0, 762.0, 255.0, 763.0, 310.0, 758.0, 383.0], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1], attributes={'center': [664.0, 327.0], 'scale': 3.9}, label=0, group=3) ] ) ], categories={ AnnotationType.label: LabelCategories.from_iterable(['human']), AnnotationType.points: PointsCategories.from_iterable( [(0, MPII_POINTS_LABELS, MPII_POINTS_JOINTS)]) }) dataset = Dataset.import_from(DUMMY_DATASET_DIR_WITH_NUMPY_FILES, 'mpii_json') compare_datasets(self, expected_dataset, dataset, require_images=True)
def test_can_import(self): expected_dataset = Dataset.from_iterable( [ DatasetItem( id='000001', subset='train', image=np.ones((5, 5, 3)), annotations=[ Label(12), Bbox(95, 71, 226, 313, label=12), Points( [165, 184, 244, 176, 196, 249, 194, 271, 266, 260], label=12) ], attributes={ '5_o_Clock_Shadow': False, 'Arched_Eyebrows': True, 'Attractive': True, 'Bags_Under_Eyes': False, 'Bald': False, 'Bangs': False, 'Big_Lips': False, 'Big_Nose': False }), DatasetItem( id='000002', subset='train', image=np.ones((5, 5, 3)), annotations=[ Label(5), Bbox(72, 94, 221, 306, label=5), Points( [140, 204, 220, 204, 168, 254, 146, 289, 226, 289], label=5) ]), DatasetItem( id='000003', subset='val', image=np.ones((5, 5, 3)), annotations=[ Label(2), Bbox(216, 59, 91, 126, label=2), Points( [244, 104, 264, 105, 263, 121, 235, 134, 251, 140], label=2) ], attributes={ '5_o_Clock_Shadow': False, 'Arched_Eyebrows': False, 'Attractive': False, 'Bags_Under_Eyes': True, 'Bald': False, 'Bangs': False, 'Big_Lips': False, 'Big_Nose': True }), DatasetItem( id='000004', subset='test', image=np.ones((5, 5, 3)), annotations=[ Label(10), Bbox(622, 257, 564, 781, label=10), Points( [796, 539, 984, 539, 930, 687, 762, 756, 915, 756], label=10) ]), DatasetItem( id='000005', subset='test', image=np.ones((5, 5, 3)), annotations=[ Label(7), Bbox(236, 109, 120, 166, label=7), Points( [273, 169, 328, 161, 298, 172, 283, 208, 323, 207], label=7) ]) ], categories={ AnnotationType.label: LabelCategories.from_iterable(f'class-{i}' for i in range(13)), AnnotationType.points: PointsCategories.from_iterable([(0, ['lefteye_x']), (1, ['lefteye_y']), (2, ['righteye_x']), (3, ['righteye_y']), (4, ['nose_x']), (5, ['nose_y']), (6, ['leftmouth_x']), (7, ['leftmouth_y']), (8, ['rightmouth_x']), (9, ['rightmouth_y'])]) }) dataset = Dataset.import_from(DUMMY_DATASET_DIR, 'celeba') compare_datasets(self, expected_dataset, dataset, require_images=True)