Example #1
0
    def test_can_merge_classes(self):
        source0 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(0),
                Label(1),
                Bbox(0, 0, 1, 1, label=1),
            ]),
        ], categories=['a', 'b'])

        source1 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(0),
                Label(1),
                Bbox(0, 0, 1, 1, label=0),
                Bbox(0, 0, 1, 1, label=1),
            ]),
        ], categories=['b', 'c'])

        expected = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(0),
                Label(1),
                Label(2),
                Bbox(0, 0, 1, 1, label=1),
                Bbox(0, 0, 1, 1, label=2),
            ]),
        ], categories=['a', 'b', 'c'])

        merger = IntersectMerge()
        merged = merger([source0, source1])

        compare_datasets(self, expected, merged, ignored_attrs={'score'})
Example #2
0
    def test_group_checks(self):
        dataset = Dataset.from_iterable(
            [
                DatasetItem(
                    1,
                    annotations=[
                        Bbox(0, 0, 0, 0, label=0,
                             group=1),  # misses an optional label
                        Bbox(0, 0, 0, 0, label=1, group=1),
                        Bbox(0, 0, 0, 0, label=2,
                             group=2),  # misses a mandatory label - error
                        Bbox(0, 0, 0, 0, label=2, group=2),
                        Bbox(0, 0, 0, 0, label=4),  # misses an optional label
                        Bbox(0, 0, 0, 0,
                             label=5),  # misses a mandatory label - error
                        Bbox(0, 0, 0, 0,
                             label=0),  # misses a mandatory label - error
                        Bbox(0, 0, 0, 0, label=3),  # not listed - not checked
                    ]),
            ],
            categories=['a', 'a_g1', 'a_g2_opt', 'b', 'c', 'c_g1_opt'])

        merger = IntersectMerge(
            conf={'groups': [['a', 'a_g1', 'a_g2_opt?'], ['c', 'c_g1_opt?']]})
        merger([dataset, dataset])

        self.assertEqual(
            3,
            len([e for e in merger.errors if isinstance(e, WrongGroupError)]),
            merger.errors)
Example #3
0
    def test_can_match_items(self):
        # items 1 and 3 are unique, item 2 is common and should be merged

        source0 = Dataset.from_iterable([
            DatasetItem(1, annotations=[ Label(0), ]),
            DatasetItem(2, annotations=[ Label(0), ]),
        ], categories=['a', 'b'])

        source1 = Dataset.from_iterable([
            DatasetItem(2, annotations=[ Label(1), ]),
            DatasetItem(3, annotations=[ Label(0), ]),
        ], categories=['a', 'b'])

        source2 = Dataset.from_iterable([
            DatasetItem(2, annotations=[ Label(0), Bbox(1, 2, 3, 4) ]),
        ], categories=['a', 'b'])

        expected = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(0, attributes={'score': 1/3}),
            ]),
            DatasetItem(2, annotations=[
                Label(0, attributes={'score': 2/3}),
                Label(1, attributes={'score': 1/3}),
                Bbox(1, 2, 3, 4, attributes={'score': 1.0}),
            ]),
            DatasetItem(3, annotations=[
                Label(0, attributes={'score': 1/3}),
            ]),
        ], categories=['a', 'b'])

        merger = IntersectMerge()
        merged = merger([source0, source1, source2])

        compare_datasets(self, expected, merged)
        self.assertEqual(
            [
                NoMatchingItemError(item_id=('1', DEFAULT_SUBSET_NAME),
                    sources={1, 2}),
                NoMatchingItemError(item_id=('3', DEFAULT_SUBSET_NAME),
                    sources={0, 2}),
            ],
            sorted((e for e in merger.errors
                    if isinstance(e, NoMatchingItemError)),
                key=lambda e: e.item_id)
        )
        self.assertEqual(
            [
                NoMatchingAnnError(item_id=('2', DEFAULT_SUBSET_NAME),
                    sources={0, 1}, ann=source2.get('2').annotations[1]),
            ],
            sorted((e for e in merger.errors
                    if isinstance(e, NoMatchingAnnError)),
                key=lambda e: e.item_id)
        )
Example #4
0
def merge_command(args):
    source_projects = [load_project(p) for p in args.project]

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('merged')

    source_datasets = []
    for p in source_projects:
        log.debug("Loading project '%s' dataset", p.config.project_name)
        source_datasets.append(p.make_dataset())

    merger = IntersectMerge(
        conf=IntersectMerge.Conf(pairwise_dist=args.iou_thresh,
                                 groups=args.groups,
                                 output_conf_thresh=args.output_conf_thresh,
                                 quorum=args.quorum))
    merged_dataset = merger(source_datasets)

    merged_project = Project()
    output_dataset = merged_project.make_dataset()
    output_dataset.define_categories(merged_dataset.categories())
    merged_dataset = output_dataset.update(merged_dataset)
    merged_dataset.save(save_dir=dst_dir)

    report_path = osp.join(dst_dir, 'merge_report.json')
    save_merge_report(merger, report_path)

    dst_dir = osp.abspath(dst_dir)
    log.info("Merge results have been saved to '%s'" % dst_dir)
    log.info("Report has been saved to '%s'" % report_path)

    return 0
Example #5
0
    def test_can_merge_categories(self):
        source0 = Dataset.from_iterable([
            DatasetItem(1, annotations=[ Label(0), ]),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(['a', 'b']),
            AnnotationType.points: PointsCategories.from_iterable([
                (0, ['l0', 'l1']),
                (1, ['l2', 'l3']),
            ]),
            AnnotationType.mask: MaskCategories({
                0: (0, 1, 2),
                1: (1, 2, 3),
            }),
        })

        source1 = Dataset.from_iterable([
            DatasetItem(1, annotations=[ Label(0), ]),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(['c', 'b']),
            AnnotationType.points: PointsCategories.from_iterable([
                (0, []),
                (1, ['l2', 'l3']),
            ]),
            AnnotationType.mask: MaskCategories({
                0: (0, 2, 4),
                1: (1, 2, 3),
            }),
        })

        expected = Dataset.from_iterable([
            DatasetItem(1, annotations=[ Label(0), Label(2), ]),
        ], categories={
            AnnotationType.label: LabelCategories.from_iterable(['a', 'b', 'c']),
            AnnotationType.points: PointsCategories.from_iterable([
                (0, ['l0', 'l1']),
                (1, ['l2', 'l3']),
                (2, []),
            ]),
            AnnotationType.mask: MaskCategories({
                0: (0, 1, 2),
                1: (1, 2, 3),
                2: (0, 2, 4),
            }),
        })

        merger = IntersectMerge()
        merged = merger([source0, source1])

        compare_datasets(self, expected, merged, ignored_attrs={'score'})
Example #6
0
    def test_attributes(self):
        source0 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(2, attributes={
                    'unique': 1,
                    'common_under_quorum': 2,
                    'common_over_quorum': 3,
                    'ignored': 'q',
                }),
            ]),
        ], categories=['a', 'b', 'c'])

        source1 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(2, attributes={
                    'common_under_quorum': 2,
                    'common_over_quorum': 3,
                    'ignored': 'q',
                }),
            ]),
        ], categories=['a', 'b', 'c'])

        source2 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(2, attributes={
                    'common_over_quorum': 3,
                    'ignored': 'q',
                }),
            ]),
        ], categories=['a', 'b', 'c'])

        expected = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                Label(2, attributes={ 'common_over_quorum': 3 }),
            ]),
        ], categories=['a', 'b', 'c'])

        merger = IntersectMerge(conf={
            'quorum': 3, 'ignored_attributes': {'ignored'}})
        merged = merger([source0, source1, source2])

        compare_datasets(self, expected, merged, ignored_attrs={'score'})
        self.assertEqual(2, len([e for e in merger.errors
            if isinstance(e, FailedAttrVotingError)])
        )
Example #7
0
    def test_can_match_lines_when_line_not_approximated(self):
        source0 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                PolyLine([1, 1, 2, 1, 3, 5, 5, 5, 8, 3]),
            ]),
        ])

        source1 = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                PolyLine([1, 1, 8, 3]),
            ]),
        ])

        expected = Dataset.from_iterable([
            DatasetItem(1, annotations=[
                PolyLine([1, 1, 2, 1, 3, 5, 5, 5, 8, 3]),
            ]),
        ], categories=[])

        merger = IntersectMerge(conf={'quorum': 1, 'pairwise_dist': 0.1})
        merged = merger([source0, source1])

        compare_datasets(self, expected, merged, ignored_attrs={'score'})
        self.assertEqual(0, len(merger.errors))
Example #8
0
def merge(cleaned_datasets, output, save_images=False):
    """datum merge -o {output} {project_dirs}"""

    print(f"Merging datasets to {output}/")
    projects = [Project.load(p) for p in cleaned_datasets]
    datasets = [p.make_dataset() for p in projects]

    merged_project_dir = Path(output)

    # perform the merge
    merge_config = IntersectMerge.Conf(
        pairwise_dist=0.25,
        groups=[],
        output_conf_thresh=0.0,
        quorum=0,
    )
    merged_dataset = IntersectMerge(conf=merge_config)(datasets)

    merged_project = Project()
    output_dataset = merged_project.make_dataset()
    output_dataset.define_categories(merged_dataset.categories())
    merged_dataset = output_dataset.update(merged_dataset)
    merged_dataset.save(save_dir=merged_project_dir, save_images=save_images)
Example #9
0
    def test_can_match_shapes(self):
        source0 = Dataset.from_iterable(
            [
                DatasetItem(
                    1,
                    annotations=[
                        # unique
                        Bbox(1, 2, 3, 4, label=1),

                        # common
                        Mask(label=2,
                             z_order=2,
                             image=np.array([
                                 [0, 0, 0, 0],
                                 [0, 0, 0, 0],
                                 [1, 1, 1, 0],
                                 [1, 1, 1, 0],
                             ])),
                        Polygon([1, 0, 3, 2, 1, 2]),

                        # an instance with keypoints
                        Bbox(4, 5, 2, 4, label=2, z_order=1, group=1),
                        Points([5, 6], label=0, group=1),
                        Points([6, 8], label=1, group=1),
                        PolyLine([1, 1, 2, 1, 3, 1]),
                    ]),
            ],
            categories=['a', 'b', 'c'])

        source1 = Dataset.from_iterable(
            [
                DatasetItem(
                    1,
                    annotations=[
                        # common
                        Mask(label=2,
                             image=np.array([
                                 [0, 0, 0, 0],
                                 [0, 1, 1, 1],
                                 [0, 1, 1, 1],
                                 [0, 1, 1, 1],
                             ])),
                        Polygon([0, 2, 2, 0, 2, 1]),

                        # an instance with keypoints
                        Bbox(4, 4, 2, 5, label=2, z_order=1, group=2),
                        Points([5.5, 6.5], label=0, group=2),
                        Points([6, 8], label=1, group=2),
                        PolyLine([1, 1.5, 2, 1.5]),
                    ]),
            ],
            categories=['a', 'b', 'c'])

        source2 = Dataset.from_iterable(
            [
                DatasetItem(
                    1,
                    annotations=[
                        # common
                        Mask(label=2,
                             z_order=3,
                             image=np.array([
                                 [0, 0, 1, 1],
                                 [0, 1, 1, 1],
                                 [1, 1, 1, 1],
                                 [1, 1, 1, 0],
                             ])),
                        Polygon([3, 1, 2, 2, 0, 1]),

                        # an instance with keypoints, one is missing
                        Bbox(3, 6, 2, 3, label=2, z_order=4, group=3),
                        Points([4.5, 5.5], label=0, group=3),
                        PolyLine([1, 1.25, 3, 1, 4, 2]),
                    ]),
            ],
            categories=['a', 'b', 'c'])

        expected = Dataset.from_iterable(
            [
                DatasetItem(
                    1,
                    annotations=[
                        # unique
                        Bbox(1, 2, 3, 4, label=1),

                        # common
                        # nearest to mean bbox
                        Mask(label=2,
                             z_order=3,
                             image=np.array([
                                 [0, 0, 0, 0],
                                 [0, 1, 1, 1],
                                 [0, 1, 1, 1],
                                 [0, 1, 1, 1],
                             ])),
                        Polygon([1, 0, 3, 2, 1, 2]),

                        # an instance with keypoints
                        Bbox(4, 5, 2, 4, label=2, z_order=4, group=1),
                        Points([5, 6], label=0, group=1),
                        Points([6, 8], label=1, group=1),
                        PolyLine([1, 1.25, 3, 1, 4, 2]),
                    ]),
            ],
            categories=['a', 'b', 'c'])

        merger = IntersectMerge(conf={'quorum': 1, 'pairwise_dist': 0.1})
        merged = merger([source0, source1, source2])

        compare_datasets(self, expected, merged, ignored_attrs={'score'})
        self.assertEqual(
            [
                NoMatchingAnnError(item_id=('1', ''),
                                   sources={2},
                                   ann=source0.get('1').annotations[5]),
                NoMatchingAnnError(item_id=('1', ''),
                                   sources={1, 2},
                                   ann=source0.get('1').annotations[0]),
            ],
            sorted(
                (e
                 for e in merger.errors if isinstance(e, NoMatchingAnnError)),
                key=lambda e: len(e.sources)))
Example #10
0
    def mergeDataset(self, import_args: Arg, filter_arg: Arg):
        config = setConfig(import_args['format'])
        source_datasets = dict([(path, Environment().make_importer(
            import_args['format'])(str(path)).make_dataset())
                                for path in self.datasetPathList])
        itemIdsAndPath = reduce(lambda x, y: x + y,
                                [[(item.id, path) for item in dataset]
                                 for path, dataset in source_datasets.items()])
        # for itemId, path in itemIdsAndPath:
        for path, dataset in source_datasets.items():
            itemIdsInPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path == path])
            itemIdsOutPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path != path])
            if itemIdsInPath & itemIdsOutPath:
                for subsetName, subset in dataset.subsets().items():
                    imgDir: Path = path / config.getImgDir(subsetName)
                    _subset = deepcopy(subset.items)
                    for item in _subset.values():
                        imgFile = Path(item.image.path)
                        relPath = imgFile.relative_to(imgDir)
                        newPath = imgDir / path.name / relPath
                        oldItemId = item.id
                        newItemId = item.id = str(path.name / relPath.parent /
                                                  relPath.stem).replace(
                                                      '\\', '/')
                        item.image._path = str(newPath)
                        del subset.items[oldItemId]
                        subset.items[newItemId] = item
                        newPath.parent.mkdir(parents=True, exist_ok=True)

                        if item.image.has_data:
                            move(str(imgFile),
                                 str(imgDir / path.name / relPath))

        mergePath = (self.projectsPath / self.mergeFolderName)
        if mergePath.is_dir():
            rmtree(mergePath, onerror=remove_readonly)
        mergePath.mkdir(exist_ok=True, parents=True)
        dst_dir = str(mergePath)

        merger = IntersectMerge(conf=IntersectMerge.Conf())
        merged_dataset = merger(list(source_datasets.values()))

        merged_project = Project()
        output_dataset = merged_project.make_dataset()
        output_dataset.define_categories(merged_dataset.categories())
        merged_dataset = output_dataset.update(merged_dataset)
        if filter_arg['no_anno_filter'].lower() == 'y':
            filtered_dataset = Project().make_dataset()
            filtered_dataset.define_categories(merged_dataset.categories())
            merged_dataset = filtered_dataset.update(
                merged_dataset.select(lambda item: len(item.annotations) != 0))
        annoId = 1
        imageIdName = config.imageIdName
        for idx, item in tqdm(enumerate(merged_dataset), desc='datasets'):
            if imageIdName is not None:
                item.attributes[imageIdName] = idx + 1
            for anno in item.annotations:
                anno.id = annoId
                annoId += 1
        merged_dataset.save(save_dir=dst_dir, save_images=True)

        # for subsetName, subset in tqdm(merged_dataset.subsets().items(), desc='datasets'):
        #     for idx, itemId in tqdm(enumerate(itemIds), desc='items'):
        #         if imageIdName is not None:
        #             merged_dataset.get(itemId,subset=subsetName).attributes[imageIdName] = idx+1
        #         for anno in merged_dataset.get(itemId, subset=subsetName).annotations:
        #             anno.id = annoId
        #             annoId += 1
        #     merged_dataset.save(save_dir=dst_dir, save_images=True)
        return self
Example #11
0
def merge_command(args):
    # Workaround. Required positionals consume positionals from the end
    args._positionals += join_cli_args(args, 'targets', 'extra_args')

    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if pos == 0:
            raise argparse.ArgumentError(None,
                message="Expected at least 1 target argument")
    else:
        pos = len(args._positionals)
    args.targets = args._positionals[:pos] or [ProjectBuildTargets.MAIN_TARGET]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('merged')
    dst_dir = osp.abspath(dst_dir)

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help and len(args.targets) == 1 and args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        converter = env.converters[args.format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.format)

    export_args = converter.parse_cmdline(args.extra_args)

    source_datasets = []
    try:
        if len(args.targets) == 1:
            source_datasets.append(project.working_tree.make_dataset())

        for t in args.targets:
            target_dataset, target_project = parse_full_revpath(t, project)
            if target_project:
                scope_add(target_project)
            source_datasets.append(target_dataset)
    except Exception as e:
        raise CliException(str(e))

    merger = IntersectMerge(conf=IntersectMerge.Conf(
        pairwise_dist=args.iou_thresh, groups=args.groups or [],
        output_conf_thresh=args.output_conf_thresh, quorum=args.quorum
    ))
    merged_dataset = merger(source_datasets)

    merged_dataset.export(save_dir=dst_dir, format=converter, **export_args)

    report_path = osp.join(dst_dir, 'merge_report.json')
    save_merge_report(merger, report_path)

    log.info("Merge results have been saved to '%s'" % dst_dir)
    log.info("Report has been saved to '%s'" % report_path)

    return 0