def prepare_data(self): # setup training set if "timit" in self.hp.data: train, val, test = TrainTestDataset.get_datasets( path=self.hp.timit_path) elif "buckeye" in self.hp.data: train, val, test = TrainValTestDataset.get_datasets( path=self.hp.buckeye_path, percent=self.hp.buckeye_percent) else: raise Exception("no such training data!") if "libri" in self.hp.data: libri_train = LibriSpeechDataset(path=self.hp.libri_path, subset=self.hp.libri_subset, percent=self.hp.libri_percent) train = ConcatDataset([train, libri_train]) train.path = "\n\t+".join( [dataset.path for dataset in train.datasets]) print(f"added libri ({len(libri_train)} examples)") self.train_dataset = train self.valid_dataset = val self.test_dataset = test line() print("DATA:") print(f"train: {self.train_dataset.path} ({len(self.train_dataset)})") print(f"valid: {self.valid_dataset.path} ({len(self.valid_dataset)})") print(f"test: {self.test_dataset.path} ({len(self.test_dataset)})") line()