예제 #1
0
    def test_can_save_and_load(self):
        source_dataset = Dataset.from_iterable([
            DatasetItem(id=1, annotations=[Label(2)]),
        ],
                                               categories=['a', 'b', 'c'])

        with TestDir() as test_dir:
            source_dataset.save(test_dir)

            loaded_dataset = Dataset.load(test_dir)

            compare_datasets(self, source_dataset, loaded_dataset)
예제 #2
0
    def __init__(self, project):
        super().__init__()

        self._project = project
        self._env = project.env
        config = self.config
        env = self.env

        sources = {}
        for s_name, source in config.sources.items():
            s_format = source.format or env.PROJECT_EXTRACTOR_NAME

            url = source.url
            if not source.url:
                url = osp.join(config.project_dir, config.sources_dir, s_name)
            sources[s_name] = Dataset.import_from(url,
                                                  format=s_format,
                                                  env=env,
                                                  **source.options)
        self._sources = sources

        own_source = None
        own_source_dir = osp.join(config.project_dir, config.dataset_dir)
        if config.project_dir and osp.isdir(own_source_dir):
            own_source = Dataset.load(own_source_dir)

        # merge categories
        # TODO: implement properly with merging and annotations remapping
        categories = ExactMerge.merge_categories(
            s.categories() for s in self._sources.values())
        # ovewrite with own categories
        if own_source is not None and (not categories or len(own_source) != 0):
            categories.update(own_source.categories())
        self._categories = categories

        # merge items
        subsets = {}
        for source_name, source in self._sources.items():
            log.debug("Loading '%s' source contents..." % source_name)
            for item in source:
                existing_item = subsets.setdefault(
                        item.subset, self.Subset(self, item.subset)). \
                    items.get(item.id)
                if existing_item is not None:
                    path = existing_item.path
                    if item.path != path:
                        path = None  # NOTE: move to our own dataset
                    item = ExactMerge.merge_items(existing_item,
                                                  item,
                                                  path=path)
                else:
                    s_config = config.sources[source_name]
                    if s_config and \
                            s_config.format != env.PROJECT_EXTRACTOR_NAME:
                        # NOTE: consider imported sources as our own dataset
                        path = None
                    else:
                        path = [source_name] + (item.path or [])
                    item = item.wrap(path=path)

                subsets[item.subset].items[item.id] = item

        # override with our items, fallback to existing images
        if own_source is not None:
            log.debug("Loading own dataset...")
            for item in own_source:
                existing_item = subsets.setdefault(
                        item.subset, self.Subset(self, item.subset)). \
                    items.get(item.id)
                if existing_item is not None:
                    item = item.wrap(path=None,
                                     image=ExactMerge.merge_images(
                                         existing_item, item))

                subsets[item.subset].items[item.id] = item

        self._subsets = subsets

        self._length = None