def test_can_merge_classes(self): source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), Label(1), Bbox(0, 0, 1, 1, label=1), ]), ], categories=['a', 'b']) source1 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), Label(1), Bbox(0, 0, 1, 1, label=0), Bbox(0, 0, 1, 1, label=1), ]), ], categories=['b', 'c']) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), Label(1), Label(2), Bbox(0, 0, 1, 1, label=1), Bbox(0, 0, 1, 1, label=2), ]), ], categories=['a', 'b', 'c']) merger = IntersectMerge() merged = merger([source0, source1]) compare_datasets(self, expected, merged, ignored_attrs={'score'})
def test_group_checks(self): dataset = Dataset.from_iterable( [ DatasetItem( 1, annotations=[ Bbox(0, 0, 0, 0, label=0, group=1), # misses an optional label Bbox(0, 0, 0, 0, label=1, group=1), Bbox(0, 0, 0, 0, label=2, group=2), # misses a mandatory label - error Bbox(0, 0, 0, 0, label=2, group=2), Bbox(0, 0, 0, 0, label=4), # misses an optional label Bbox(0, 0, 0, 0, label=5), # misses a mandatory label - error Bbox(0, 0, 0, 0, label=0), # misses a mandatory label - error Bbox(0, 0, 0, 0, label=3), # not listed - not checked ]), ], categories=['a', 'a_g1', 'a_g2_opt', 'b', 'c', 'c_g1_opt']) merger = IntersectMerge( conf={'groups': [['a', 'a_g1', 'a_g2_opt?'], ['c', 'c_g1_opt?']]}) merger([dataset, dataset]) self.assertEqual( 3, len([e for e in merger.errors if isinstance(e, WrongGroupError)]), merger.errors)
def test_can_match_items(self): # items 1 and 3 are unique, item 2 is common and should be merged source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), DatasetItem(2, annotations=[ Label(0), ]), ], categories=['a', 'b']) source1 = Dataset.from_iterable([ DatasetItem(2, annotations=[ Label(1), ]), DatasetItem(3, annotations=[ Label(0), ]), ], categories=['a', 'b']) source2 = Dataset.from_iterable([ DatasetItem(2, annotations=[ Label(0), Bbox(1, 2, 3, 4) ]), ], categories=['a', 'b']) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0, attributes={'score': 1/3}), ]), DatasetItem(2, annotations=[ Label(0, attributes={'score': 2/3}), Label(1, attributes={'score': 1/3}), Bbox(1, 2, 3, 4, attributes={'score': 1.0}), ]), DatasetItem(3, annotations=[ Label(0, attributes={'score': 1/3}), ]), ], categories=['a', 'b']) merger = IntersectMerge() merged = merger([source0, source1, source2]) compare_datasets(self, expected, merged) self.assertEqual( [ NoMatchingItemError(item_id=('1', DEFAULT_SUBSET_NAME), sources={1, 2}), NoMatchingItemError(item_id=('3', DEFAULT_SUBSET_NAME), sources={0, 2}), ], sorted((e for e in merger.errors if isinstance(e, NoMatchingItemError)), key=lambda e: e.item_id) ) self.assertEqual( [ NoMatchingAnnError(item_id=('2', DEFAULT_SUBSET_NAME), sources={0, 1}, ann=source2.get('2').annotations[1]), ], sorted((e for e in merger.errors if isinstance(e, NoMatchingAnnError)), key=lambda e: e.item_id) )
def merge_command(args): source_projects = [load_project(p) for p in args.project] dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('merged') source_datasets = [] for p in source_projects: log.debug("Loading project '%s' dataset", p.config.project_name) source_datasets.append(p.make_dataset()) merger = IntersectMerge( conf=IntersectMerge.Conf(pairwise_dist=args.iou_thresh, groups=args.groups, output_conf_thresh=args.output_conf_thresh, quorum=args.quorum)) merged_dataset = merger(source_datasets) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) merged_dataset.save(save_dir=dst_dir) report_path = osp.join(dst_dir, 'merge_report.json') save_merge_report(merger, report_path) dst_dir = osp.abspath(dst_dir) log.info("Merge results have been saved to '%s'" % dst_dir) log.info("Report has been saved to '%s'" % report_path) return 0
def test_can_merge_categories(self): source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b']), AnnotationType.points: PointsCategories.from_iterable([ (0, ['l0', 'l1']), (1, ['l2', 'l3']), ]), AnnotationType.mask: MaskCategories({ 0: (0, 1, 2), 1: (1, 2, 3), }), }) source1 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['c', 'b']), AnnotationType.points: PointsCategories.from_iterable([ (0, []), (1, ['l2', 'l3']), ]), AnnotationType.mask: MaskCategories({ 0: (0, 2, 4), 1: (1, 2, 3), }), }) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(0), Label(2), ]), ], categories={ AnnotationType.label: LabelCategories.from_iterable(['a', 'b', 'c']), AnnotationType.points: PointsCategories.from_iterable([ (0, ['l0', 'l1']), (1, ['l2', 'l3']), (2, []), ]), AnnotationType.mask: MaskCategories({ 0: (0, 1, 2), 1: (1, 2, 3), 2: (0, 2, 4), }), }) merger = IntersectMerge() merged = merger([source0, source1]) compare_datasets(self, expected, merged, ignored_attrs={'score'})
def test_attributes(self): source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(2, attributes={ 'unique': 1, 'common_under_quorum': 2, 'common_over_quorum': 3, 'ignored': 'q', }), ]), ], categories=['a', 'b', 'c']) source1 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(2, attributes={ 'common_under_quorum': 2, 'common_over_quorum': 3, 'ignored': 'q', }), ]), ], categories=['a', 'b', 'c']) source2 = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(2, attributes={ 'common_over_quorum': 3, 'ignored': 'q', }), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ Label(2, attributes={ 'common_over_quorum': 3 }), ]), ], categories=['a', 'b', 'c']) merger = IntersectMerge(conf={ 'quorum': 3, 'ignored_attributes': {'ignored'}}) merged = merger([source0, source1, source2]) compare_datasets(self, expected, merged, ignored_attrs={'score'}) self.assertEqual(2, len([e for e in merger.errors if isinstance(e, FailedAttrVotingError)]) )
def test_can_match_lines_when_line_not_approximated(self): source0 = Dataset.from_iterable([ DatasetItem(1, annotations=[ PolyLine([1, 1, 2, 1, 3, 5, 5, 5, 8, 3]), ]), ]) source1 = Dataset.from_iterable([ DatasetItem(1, annotations=[ PolyLine([1, 1, 8, 3]), ]), ]) expected = Dataset.from_iterable([ DatasetItem(1, annotations=[ PolyLine([1, 1, 2, 1, 3, 5, 5, 5, 8, 3]), ]), ], categories=[]) merger = IntersectMerge(conf={'quorum': 1, 'pairwise_dist': 0.1}) merged = merger([source0, source1]) compare_datasets(self, expected, merged, ignored_attrs={'score'}) self.assertEqual(0, len(merger.errors))
def merge(cleaned_datasets, output, save_images=False): """datum merge -o {output} {project_dirs}""" print(f"Merging datasets to {output}/") projects = [Project.load(p) for p in cleaned_datasets] datasets = [p.make_dataset() for p in projects] merged_project_dir = Path(output) # perform the merge merge_config = IntersectMerge.Conf( pairwise_dist=0.25, groups=[], output_conf_thresh=0.0, quorum=0, ) merged_dataset = IntersectMerge(conf=merge_config)(datasets) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) merged_dataset.save(save_dir=merged_project_dir, save_images=save_images)
def test_can_match_shapes(self): source0 = Dataset.from_iterable( [ DatasetItem( 1, annotations=[ # unique Bbox(1, 2, 3, 4, label=1), # common Mask(label=2, z_order=2, image=np.array([ [0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 0], [1, 1, 1, 0], ])), Polygon([1, 0, 3, 2, 1, 2]), # an instance with keypoints Bbox(4, 5, 2, 4, label=2, z_order=1, group=1), Points([5, 6], label=0, group=1), Points([6, 8], label=1, group=1), PolyLine([1, 1, 2, 1, 3, 1]), ]), ], categories=['a', 'b', 'c']) source1 = Dataset.from_iterable( [ DatasetItem( 1, annotations=[ # common Mask(label=2, image=np.array([ [0, 0, 0, 0], [0, 1, 1, 1], [0, 1, 1, 1], [0, 1, 1, 1], ])), Polygon([0, 2, 2, 0, 2, 1]), # an instance with keypoints Bbox(4, 4, 2, 5, label=2, z_order=1, group=2), Points([5.5, 6.5], label=0, group=2), Points([6, 8], label=1, group=2), PolyLine([1, 1.5, 2, 1.5]), ]), ], categories=['a', 'b', 'c']) source2 = Dataset.from_iterable( [ DatasetItem( 1, annotations=[ # common Mask(label=2, z_order=3, image=np.array([ [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 0], ])), Polygon([3, 1, 2, 2, 0, 1]), # an instance with keypoints, one is missing Bbox(3, 6, 2, 3, label=2, z_order=4, group=3), Points([4.5, 5.5], label=0, group=3), PolyLine([1, 1.25, 3, 1, 4, 2]), ]), ], categories=['a', 'b', 'c']) expected = Dataset.from_iterable( [ DatasetItem( 1, annotations=[ # unique Bbox(1, 2, 3, 4, label=1), # common # nearest to mean bbox Mask(label=2, z_order=3, image=np.array([ [0, 0, 0, 0], [0, 1, 1, 1], [0, 1, 1, 1], [0, 1, 1, 1], ])), Polygon([1, 0, 3, 2, 1, 2]), # an instance with keypoints Bbox(4, 5, 2, 4, label=2, z_order=4, group=1), Points([5, 6], label=0, group=1), Points([6, 8], label=1, group=1), PolyLine([1, 1.25, 3, 1, 4, 2]), ]), ], categories=['a', 'b', 'c']) merger = IntersectMerge(conf={'quorum': 1, 'pairwise_dist': 0.1}) merged = merger([source0, source1, source2]) compare_datasets(self, expected, merged, ignored_attrs={'score'}) self.assertEqual( [ NoMatchingAnnError(item_id=('1', ''), sources={2}, ann=source0.get('1').annotations[5]), NoMatchingAnnError(item_id=('1', ''), sources={1, 2}, ann=source0.get('1').annotations[0]), ], sorted( (e for e in merger.errors if isinstance(e, NoMatchingAnnError)), key=lambda e: len(e.sources)))
def mergeDataset(self, import_args: Arg, filter_arg: Arg): config = setConfig(import_args['format']) source_datasets = dict([(path, Environment().make_importer( import_args['format'])(str(path)).make_dataset()) for path in self.datasetPathList]) itemIdsAndPath = reduce(lambda x, y: x + y, [[(item.id, path) for item in dataset] for path, dataset in source_datasets.items()]) # for itemId, path in itemIdsAndPath: for path, dataset in source_datasets.items(): itemIdsInPath = set( [itemId for itemId, _path in itemIdsAndPath if _path == path]) itemIdsOutPath = set( [itemId for itemId, _path in itemIdsAndPath if _path != path]) if itemIdsInPath & itemIdsOutPath: for subsetName, subset in dataset.subsets().items(): imgDir: Path = path / config.getImgDir(subsetName) _subset = deepcopy(subset.items) for item in _subset.values(): imgFile = Path(item.image.path) relPath = imgFile.relative_to(imgDir) newPath = imgDir / path.name / relPath oldItemId = item.id newItemId = item.id = str(path.name / relPath.parent / relPath.stem).replace( '\\', '/') item.image._path = str(newPath) del subset.items[oldItemId] subset.items[newItemId] = item newPath.parent.mkdir(parents=True, exist_ok=True) if item.image.has_data: move(str(imgFile), str(imgDir / path.name / relPath)) mergePath = (self.projectsPath / self.mergeFolderName) if mergePath.is_dir(): rmtree(mergePath, onerror=remove_readonly) mergePath.mkdir(exist_ok=True, parents=True) dst_dir = str(mergePath) merger = IntersectMerge(conf=IntersectMerge.Conf()) merged_dataset = merger(list(source_datasets.values())) merged_project = Project() output_dataset = merged_project.make_dataset() output_dataset.define_categories(merged_dataset.categories()) merged_dataset = output_dataset.update(merged_dataset) if filter_arg['no_anno_filter'].lower() == 'y': filtered_dataset = Project().make_dataset() filtered_dataset.define_categories(merged_dataset.categories()) merged_dataset = filtered_dataset.update( merged_dataset.select(lambda item: len(item.annotations) != 0)) annoId = 1 imageIdName = config.imageIdName for idx, item in tqdm(enumerate(merged_dataset), desc='datasets'): if imageIdName is not None: item.attributes[imageIdName] = idx + 1 for anno in item.annotations: anno.id = annoId annoId += 1 merged_dataset.save(save_dir=dst_dir, save_images=True) # for subsetName, subset in tqdm(merged_dataset.subsets().items(), desc='datasets'): # for idx, itemId in tqdm(enumerate(itemIds), desc='items'): # if imageIdName is not None: # merged_dataset.get(itemId,subset=subsetName).attributes[imageIdName] = idx+1 # for anno in merged_dataset.get(itemId, subset=subsetName).annotations: # anno.id = annoId # annoId += 1 # merged_dataset.save(save_dir=dst_dir, save_images=True) return self
def merge_command(args): # Workaround. Required positionals consume positionals from the end args._positionals += join_cli_args(args, 'targets', 'extra_args') has_sep = '--' in args._positionals if has_sep: pos = args._positionals.index('--') if pos == 0: raise argparse.ArgumentError(None, message="Expected at least 1 target argument") else: pos = len(args._positionals) args.targets = args._positionals[:pos] or [ProjectBuildTargets.MAIN_TARGET] args.extra_args = args._positionals[pos + has_sep:] show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args dst_dir = args.dst_dir if dst_dir: if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir): raise CliException("Directory '%s' already exists " "(pass --overwrite to overwrite)" % dst_dir) else: dst_dir = generate_next_file_name('merged') dst_dir = osp.abspath(dst_dir) project = None try: project = scope_add(load_project(args.project_dir)) except ProjectNotFoundError: if not show_plugin_help and len(args.targets) == 1 and args.project_dir: raise if project is not None: env = project.env else: env = Environment() try: converter = env.converters[args.format] except KeyError: raise CliException("Converter for format '%s' is not found" % \ args.format) export_args = converter.parse_cmdline(args.extra_args) source_datasets = [] try: if len(args.targets) == 1: source_datasets.append(project.working_tree.make_dataset()) for t in args.targets: target_dataset, target_project = parse_full_revpath(t, project) if target_project: scope_add(target_project) source_datasets.append(target_dataset) except Exception as e: raise CliException(str(e)) merger = IntersectMerge(conf=IntersectMerge.Conf( pairwise_dist=args.iou_thresh, groups=args.groups or [], output_conf_thresh=args.output_conf_thresh, quorum=args.quorum )) merged_dataset = merger(source_datasets) merged_dataset.export(save_dir=dst_dir, format=converter, **export_args) report_path = osp.join(dst_dir, 'merge_report.json') save_merge_report(merger, report_path) log.info("Merge results have been saved to '%s'" % dst_dir) log.info("Report has been saved to '%s'" % report_path) return 0