def build_datasets(self, config): self._datasets = [] self.config = config self._given_datasets = dataset_list_from_config(self.config) for dataset in self._given_datasets: if dataset in self.config.dataset_config: dataset_config = self.config.dataset_config[dataset] else: warnings.warn( f"Dataset {dataset} is missing from dataset_config" + " in config. Proceeding with empty config.") dataset_config = OmegaConf.create() dataset_instance = build_dataset(dataset, dataset_config, self.dataset_type) if dataset_instance is None: continue self.datasets.append(dataset_instance) self.dataset_list.append(dataset) self._num_datasets = len(self.datasets) self.current_index = 0 self._infer_dataset_probabilities()
def build_datasets(self, config): self.config = config self._process_datasets() for dataset in self._given_datasets: if dataset in self.config.dataset_config: dataset_config = self.config.dataset_config[dataset] else: warnings.warn( f"Dataset {dataset} is missing from dataset_config" + " in config. Proceeding with empty config.") dataset_config = OmegaConf.create() dataset_instance = build_dataset(dataset, dataset_config, self.dataset_type) if dataset_instance is None: continue self.datasets.append(dataset_instance) if hasattr(dataset_instance, "__len__"): dataset_instance_length = len(dataset_instance) assert dataset_instance_length, f"dataset: {self.dataset_type} is empty" self._per_dataset_lengths.append(dataset_instance_length) self._total_length += dataset_instance_length self._num_datasets = len(self.datasets) self.current_index = 0 self.current_dataset = self.datasets[self.current_index] self._infer_dataset_probabilities()
def build_datasets(self, config): self.config = config self._process_datasets() for dataset in self._given_datasets: if dataset in self.config.dataset_config: dataset_config = self.config.dataset_config[dataset] else: raise RuntimeError(f"Dataset {dataset} is missing from " "dataset_config in config.") dataset_instance = build_dataset(dataset, dataset_config, self.dataset_type) if dataset_instance is None: continue self.datasets.append(dataset_instance) if hasattr(dataset_instance, "__len__"): self._per_dataset_lengths.append(len(dataset_instance)) self._total_length += len(dataset_instance) self._num_datasets = len(self.datasets) self.current_index = 0 self.current_dataset = self.datasets[self.current_index] self._infer_dataset_probabilities()
def build_datasets(self, config): self.config = config self._process_datasets() for dataset in self._given_datasets: if dataset in self.config.dataset_config: dataset_config = self.config.dataset_config[dataset] else: self.writer.write( "Dataset %s is missing from " "dataset_config in config." % dataset, "error", ) sys.exit(1) dataset_instance = build_dataset(dataset, dataset_config, self.dataset_type) if dataset_instance is None: continue self.datasets.append(dataset_instance) self._per_dataset_lengths.append(len(dataset_instance)) self._total_length += len(dataset_instance) self._num_datasets = len(self.datasets) self.current_index = 0 self.current_dataset = self.datasets[self.current_index] self._infer_dataset_probabilities()