def load(self, config, dataset_type, *args, **kwargs): self.config = config annotations = config.get("annotations", {}).get(dataset_type, []) # User can pass a single string as well if isinstance(annotations, str): annotations = [annotations] if len(annotations) == 0: warnings.warn( "Dataset type {} is not present or empty in " + "annotations of dataset config or either annotations " + "key is not present. Returning None. " + "This dataset won't be used.".format(dataset_type)) return None datasets = [] for imdb_idx in range(len(annotations)): dataset_class = self.dataset_class dataset = dataset_class(config, dataset_type, imdb_idx) datasets.append(dataset) dataset = MMFConcatDataset(datasets) self.dataset = dataset return self.dataset
def build_dataset_from_multiple_imdbs(config, dataset_cls, dataset_type): from mmf.datasets.concat_dataset import MMFConcatDataset if dataset_type not in config.imdb_files: warnings.warn("Dataset type {} is not present in " "imdb_files of dataset config. Returning None. " "This dataset won't be used.".format(dataset_type)) return None imdb_files = config["imdb_files"][dataset_type] datasets = [] for imdb_idx in range(len(imdb_files)): dataset = dataset_cls(dataset_type, imdb_idx, config) datasets.append(dataset) dataset = MMFConcatDataset(datasets) return dataset
def load(self, config, dataset_type, *args, **kwargs): self.config = config split_dataset_from_train = self.config.get("split_train", False) if split_dataset_from_train: config = self._modify_dataset_config_for_split(config) annotations = self._read_annotations(config, dataset_type) if annotations is None: return None datasets = [] for imdb_idx in range(len(annotations)): dataset_class = self.dataset_class dataset = dataset_class(config, dataset_type, imdb_idx) datasets.append(dataset) dataset = MMFConcatDataset(datasets) if split_dataset_from_train: dataset = self._split_dataset_from_train(dataset, dataset_type) self.dataset = dataset return self.dataset