예제 #1
0
def merge(cleaned_datasets, output, save_images=False):
    """datum merge -o {output} {project_dirs}"""

    print(f"Merging datasets to {output}/")
    projects = [Project.load(p) for p in cleaned_datasets]
    datasets = [p.make_dataset() for p in projects]

    merged_project_dir = Path(output)

    # perform the merge
    merge_config = IntersectMerge.Conf(
        pairwise_dist=0.25,
        groups=[],
        output_conf_thresh=0.0,
        quorum=0,
    )
    merged_dataset = IntersectMerge(conf=merge_config)(datasets)

    merged_project = Project()
    output_dataset = merged_project.make_dataset()
    output_dataset.define_categories(merged_dataset.categories())
    merged_dataset = output_dataset.update(merged_dataset)
    merged_dataset.save(save_dir=merged_project_dir, save_images=save_images)
예제 #2
0
def merge_command(args):
    source_projects = [load_project(p) for p in args.project]

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                               "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('merged')

    source_datasets = []
    for p in source_projects:
        log.debug("Loading project '%s' dataset", p.config.project_name)
        source_datasets.append(p.make_dataset())

    merger = IntersectMerge(
        conf=IntersectMerge.Conf(pairwise_dist=args.iou_thresh,
                                 groups=args.groups,
                                 output_conf_thresh=args.output_conf_thresh,
                                 quorum=args.quorum))
    merged_dataset = merger(source_datasets)

    merged_project = Project()
    output_dataset = merged_project.make_dataset()
    output_dataset.define_categories(merged_dataset.categories())
    merged_dataset = output_dataset.update(merged_dataset)
    merged_dataset.save(save_dir=dst_dir)

    report_path = osp.join(dst_dir, 'merge_report.json')
    save_merge_report(merger, report_path)

    dst_dir = osp.abspath(dst_dir)
    log.info("Merge results have been saved to '%s'" % dst_dir)
    log.info("Report has been saved to '%s'" % report_path)

    return 0
예제 #3
0
    def mergeDataset(self, import_args: Arg, filter_arg: Arg):
        config = setConfig(import_args['format'])
        source_datasets = dict([(path, Environment().make_importer(
            import_args['format'])(str(path)).make_dataset())
                                for path in self.datasetPathList])
        itemIdsAndPath = reduce(lambda x, y: x + y,
                                [[(item.id, path) for item in dataset]
                                 for path, dataset in source_datasets.items()])
        # for itemId, path in itemIdsAndPath:
        for path, dataset in source_datasets.items():
            itemIdsInPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path == path])
            itemIdsOutPath = set(
                [itemId for itemId, _path in itemIdsAndPath if _path != path])
            if itemIdsInPath & itemIdsOutPath:
                for subsetName, subset in dataset.subsets().items():
                    imgDir: Path = path / config.getImgDir(subsetName)
                    _subset = deepcopy(subset.items)
                    for item in _subset.values():
                        imgFile = Path(item.image.path)
                        relPath = imgFile.relative_to(imgDir)
                        newPath = imgDir / path.name / relPath
                        oldItemId = item.id
                        newItemId = item.id = str(path.name / relPath.parent /
                                                  relPath.stem).replace(
                                                      '\\', '/')
                        item.image._path = str(newPath)
                        del subset.items[oldItemId]
                        subset.items[newItemId] = item
                        newPath.parent.mkdir(parents=True, exist_ok=True)

                        if item.image.has_data:
                            move(str(imgFile),
                                 str(imgDir / path.name / relPath))

        mergePath = (self.projectsPath / self.mergeFolderName)
        if mergePath.is_dir():
            rmtree(mergePath, onerror=remove_readonly)
        mergePath.mkdir(exist_ok=True, parents=True)
        dst_dir = str(mergePath)

        merger = IntersectMerge(conf=IntersectMerge.Conf())
        merged_dataset = merger(list(source_datasets.values()))

        merged_project = Project()
        output_dataset = merged_project.make_dataset()
        output_dataset.define_categories(merged_dataset.categories())
        merged_dataset = output_dataset.update(merged_dataset)
        if filter_arg['no_anno_filter'].lower() == 'y':
            filtered_dataset = Project().make_dataset()
            filtered_dataset.define_categories(merged_dataset.categories())
            merged_dataset = filtered_dataset.update(
                merged_dataset.select(lambda item: len(item.annotations) != 0))
        annoId = 1
        imageIdName = config.imageIdName
        for idx, item in tqdm(enumerate(merged_dataset), desc='datasets'):
            if imageIdName is not None:
                item.attributes[imageIdName] = idx + 1
            for anno in item.annotations:
                anno.id = annoId
                annoId += 1
        merged_dataset.save(save_dir=dst_dir, save_images=True)

        # for subsetName, subset in tqdm(merged_dataset.subsets().items(), desc='datasets'):
        #     for idx, itemId in tqdm(enumerate(itemIds), desc='items'):
        #         if imageIdName is not None:
        #             merged_dataset.get(itemId,subset=subsetName).attributes[imageIdName] = idx+1
        #         for anno in merged_dataset.get(itemId, subset=subsetName).annotations:
        #             anno.id = annoId
        #             annoId += 1
        #     merged_dataset.save(save_dir=dst_dir, save_images=True)
        return self
예제 #4
0
def merge_command(args):
    # Workaround. Required positionals consume positionals from the end
    args._positionals += join_cli_args(args, 'targets', 'extra_args')

    has_sep = '--' in args._positionals
    if has_sep:
        pos = args._positionals.index('--')
        if pos == 0:
            raise argparse.ArgumentError(None,
                message="Expected at least 1 target argument")
    else:
        pos = len(args._positionals)
    args.targets = args._positionals[:pos] or [ProjectBuildTargets.MAIN_TARGET]
    args.extra_args = args._positionals[pos + has_sep:]

    show_plugin_help = '-h' in args.extra_args or '--help' in args.extra_args

    dst_dir = args.dst_dir
    if dst_dir:
        if not args.overwrite and osp.isdir(dst_dir) and os.listdir(dst_dir):
            raise CliException("Directory '%s' already exists "
                "(pass --overwrite to overwrite)" % dst_dir)
    else:
        dst_dir = generate_next_file_name('merged')
    dst_dir = osp.abspath(dst_dir)

    project = None
    try:
        project = scope_add(load_project(args.project_dir))
    except ProjectNotFoundError:
        if not show_plugin_help and len(args.targets) == 1 and args.project_dir:
            raise

    if project is not None:
        env = project.env
    else:
        env = Environment()

    try:
        converter = env.converters[args.format]
    except KeyError:
        raise CliException("Converter for format '%s' is not found" % \
            args.format)

    export_args = converter.parse_cmdline(args.extra_args)

    source_datasets = []
    try:
        if len(args.targets) == 1:
            source_datasets.append(project.working_tree.make_dataset())

        for t in args.targets:
            target_dataset, target_project = parse_full_revpath(t, project)
            if target_project:
                scope_add(target_project)
            source_datasets.append(target_dataset)
    except Exception as e:
        raise CliException(str(e))

    merger = IntersectMerge(conf=IntersectMerge.Conf(
        pairwise_dist=args.iou_thresh, groups=args.groups or [],
        output_conf_thresh=args.output_conf_thresh, quorum=args.quorum
    ))
    merged_dataset = merger(source_datasets)

    merged_dataset.export(save_dir=dst_dir, format=converter, **export_args)

    report_path = osp.join(dst_dir, 'merge_report.json')
    save_merge_report(merger, report_path)

    log.info("Merge results have been saved to '%s'" % dst_dir)
    log.info("Report has been saved to '%s'" % report_path)

    return 0