def test_can_save_and_load(self): source_dataset = Dataset.from_iterable([ DatasetItem(id=1, annotations=[Label(2)]), ], categories=['a', 'b', 'c']) with TestDir() as test_dir: source_dataset.save(test_dir) loaded_dataset = Dataset.load(test_dir) compare_datasets(self, source_dataset, loaded_dataset)
def __init__(self, project): super().__init__() self._project = project self._env = project.env config = self.config env = self.env sources = {} for s_name, source in config.sources.items(): s_format = source.format or env.PROJECT_EXTRACTOR_NAME url = source.url if not source.url: url = osp.join(config.project_dir, config.sources_dir, s_name) sources[s_name] = Dataset.import_from(url, format=s_format, env=env, **source.options) self._sources = sources own_source = None own_source_dir = osp.join(config.project_dir, config.dataset_dir) if config.project_dir and osp.isdir(own_source_dir): own_source = Dataset.load(own_source_dir) # merge categories # TODO: implement properly with merging and annotations remapping categories = ExactMerge.merge_categories( s.categories() for s in self._sources.values()) # ovewrite with own categories if own_source is not None and (not categories or len(own_source) != 0): categories.update(own_source.categories()) self._categories = categories # merge items subsets = {} for source_name, source in self._sources.items(): log.debug("Loading '%s' source contents..." % source_name) for item in source: existing_item = subsets.setdefault( item.subset, self.Subset(self, item.subset)). \ items.get(item.id) if existing_item is not None: path = existing_item.path if item.path != path: path = None # NOTE: move to our own dataset item = ExactMerge.merge_items(existing_item, item, path=path) else: s_config = config.sources[source_name] if s_config and \ s_config.format != env.PROJECT_EXTRACTOR_NAME: # NOTE: consider imported sources as our own dataset path = None else: path = [source_name] + (item.path or []) item = item.wrap(path=path) subsets[item.subset].items[item.id] = item # override with our items, fallback to existing images if own_source is not None: log.debug("Loading own dataset...") for item in own_source: existing_item = subsets.setdefault( item.subset, self.Subset(self, item.subset)). \ items.get(item.id) if existing_item is not None: item = item.wrap(path=None, image=ExactMerge.merge_images( existing_item, item)) subsets[item.subset].items[item.id] = item self._subsets = subsets self._length = None