def build_datasets(self, config):
        self._datasets = []
        self.config = config
        self._given_datasets = dataset_list_from_config(self.config)

        for dataset in self._given_datasets:
            if dataset in self.config.dataset_config:
                dataset_config = self.config.dataset_config[dataset]
            else:
                warnings.warn(
                    f"Dataset {dataset} is missing from dataset_config" +
                    " in config. Proceeding with empty config.")
                dataset_config = OmegaConf.create()

            dataset_instance = build_dataset(dataset, dataset_config,
                                             self.dataset_type)
            if dataset_instance is None:
                continue
            self.datasets.append(dataset_instance)
            self.dataset_list.append(dataset)

        self._num_datasets = len(self.datasets)
        self.current_index = 0

        self._infer_dataset_probabilities()
Beispiel #2
0
    def build_datasets(self, config):
        self.config = config
        self._process_datasets()

        for dataset in self._given_datasets:
            if dataset in self.config.dataset_config:
                dataset_config = self.config.dataset_config[dataset]
            else:
                warnings.warn(
                    f"Dataset {dataset} is missing from dataset_config" +
                    " in config. Proceeding with empty config.")
                dataset_config = OmegaConf.create()

            dataset_instance = build_dataset(dataset, dataset_config,
                                             self.dataset_type)
            if dataset_instance is None:
                continue
            self.datasets.append(dataset_instance)

            if hasattr(dataset_instance, "__len__"):
                dataset_instance_length = len(dataset_instance)
                assert dataset_instance_length, f"dataset: {self.dataset_type} is empty"
                self._per_dataset_lengths.append(dataset_instance_length)
                self._total_length += dataset_instance_length

        self._num_datasets = len(self.datasets)
        self.current_index = 0
        self.current_dataset = self.datasets[self.current_index]

        self._infer_dataset_probabilities()
Beispiel #3
0
    def build_datasets(self, config):
        self.config = config
        self._process_datasets()

        for dataset in self._given_datasets:
            if dataset in self.config.dataset_config:
                dataset_config = self.config.dataset_config[dataset]
            else:
                raise RuntimeError(f"Dataset {dataset} is missing from "
                                   "dataset_config in config.")

            dataset_instance = build_dataset(dataset, dataset_config,
                                             self.dataset_type)
            if dataset_instance is None:
                continue
            self.datasets.append(dataset_instance)

            if hasattr(dataset_instance, "__len__"):
                self._per_dataset_lengths.append(len(dataset_instance))
                self._total_length += len(dataset_instance)

        self._num_datasets = len(self.datasets)
        self.current_index = 0
        self.current_dataset = self.datasets[self.current_index]

        self._infer_dataset_probabilities()
    def build_datasets(self, config):
        self.config = config
        self._process_datasets()

        for dataset in self._given_datasets:
            if dataset in self.config.dataset_config:
                dataset_config = self.config.dataset_config[dataset]
            else:
                self.writer.write(
                    "Dataset %s is missing from "
                    "dataset_config in config." % dataset,
                    "error",
                )
                sys.exit(1)

            dataset_instance = build_dataset(dataset, dataset_config,
                                             self.dataset_type)
            if dataset_instance is None:
                continue
            self.datasets.append(dataset_instance)
            self._per_dataset_lengths.append(len(dataset_instance))
            self._total_length += len(dataset_instance)

        self._num_datasets = len(self.datasets)
        self.current_index = 0
        self.current_dataset = self.datasets[self.current_index]

        self._infer_dataset_probabilities()