def __init__(self, cfg, split, dataset_source_map): self.split = split self.cfg = cfg self.data_objs = [] self.label_objs = [] self.data_paths = [] self.label_paths = [] self.batchsize_per_replica = self.cfg["DATA"][split][ "BATCHSIZE_PER_REPLICA"] self.data_sources = self.cfg["DATA"][split].DATA_SOURCES self.label_sources = self.cfg["DATA"][split].LABEL_SOURCES self.dataset_names = self.cfg["DATA"][split].DATASET_NAMES self.label_type = self.cfg["DATA"][split].LABEL_TYPE self.transform = get_transform(self.cfg["DATA"][split].TRANSFORMS) self._labels_init = False self._verify_data_sources(split, dataset_source_map) self._get_data_files(split) if len(self.label_sources) > 0 and len(self.label_paths) > 0: assert len(self.label_sources) == len(self.label_paths), ( f"len(label_sources) != len(label paths) " f"{len(self.label_sources)} vs. {len(self.label_paths)}") for idx in range(len(self.data_sources)): datasource_cls = dataset_source_map[self.data_sources[idx]] self.data_objs.append( datasource_cls( cfg=self.cfg, path=self.data_paths[idx], split=split, dataset_name=self.dataset_names[idx], data_source=self.data_sources[idx], ))
def __init__( self, cfg: AttrDict, split: str, dataset_source_map: Dict[str, Callable], data_sources_with_subset: Set[str], ): self.cfg = cfg self.split = split self.data_sources_with_subset = data_sources_with_subset self.data_objs = [] self.label_objs = [] self.data_paths = [] self.label_paths = [] self.batchsize_per_replica = self.cfg["DATA"][split]["BATCHSIZE_PER_REPLICA"] self.data_sources = self.cfg["DATA"][split].DATA_SOURCES self.label_sources = self.cfg["DATA"][split].LABEL_SOURCES self.dataset_names = self.cfg["DATA"][split].DATASET_NAMES self.label_type = self.cfg["DATA"][split].LABEL_TYPE self.data_limit = self.cfg["DATA"][split].DATA_LIMIT self.data_limit_sampling = self._get_data_limit_sampling(cfg, split) self.transform = get_transform(self.cfg["DATA"][split].TRANSFORMS) self._labels_init = False self._subset_initialized = False self.image_and_label_subset = None self._verify_data_sources(split, dataset_source_map) self._get_data_files(split) if len(self.label_sources) > 0 and len(self.label_paths) > 0: assert len(self.label_sources) == len(self.label_paths), ( f"len(label_sources) != len(label paths) " f"{len(self.label_sources)} vs. {len(self.label_paths)}" ) for idx in range(len(self.data_sources)): datasource_cls = dataset_source_map[self.data_sources[idx]] self.data_objs.append( datasource_cls( cfg=self.cfg, path=self.data_paths[idx], split=split, dataset_name=self.dataset_names[idx], data_source=self.data_sources[idx], ) )