def get_datasets( meta_dir: str, batch_size: int, num_workers: int, fix_len: int = 0, skip_audio: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]: # TODO: update this function in general assert os.path.isdir(meta_dir), '{} is not valid directory path!' train_file, valid_file = MaestroMeta.frame_file_names[1:] # load meta file train_meta = MaestroMeta(os.path.join(meta_dir, train_file)) valid_meta = MaestroMeta(os.path.join(meta_dir, valid_file)) # create dataset train_dataset = SpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio) valid_dataset = SpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio) # create data loader train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers) valid_loader = SpeechDataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers) return train_loader, valid_loader
def get_datasets( meta_dir: str, batch_size: int, num_workers: int) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert os.path.isdir(meta_dir), '{} is not valid directory path!' train_file, valid_file = LJSpeechMeta.frame_file_names[1:] # load meta file train_meta = LJSpeechMeta(os.path.join(meta_dir, train_file)) valid_meta = LJSpeechMeta(os.path.join(meta_dir, valid_file)) # create dataset train_dataset = SpeechTTSDataset(train_meta) valid_dataset = SpeechTTSDataset(valid_meta) # create data loader train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, is_bucket=True, num_workers=num_workers, n_buckets=5) valid_loader = SpeechDataLoader(valid_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers) return train_loader, valid_loader
def get_datasets( meta_dir: str, batch_size: int, num_workers: int, fix_len: int = 0, audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert os.path.isdir(meta_dir), '{} is not valid directory path!' train_file, valid_file = MedleyDBMeta.frame_file_names[1:] # load meta file train_meta = MedleyDBMeta(os.path.join(meta_dir, train_file)) valid_meta = MedleyDBMeta(os.path.join(meta_dir, valid_file)) # create dataset train_dataset = SpeechDataset(train_meta, fix_len=fix_len, audio_mask=audio_mask) valid_dataset = SpeechDataset(valid_meta, fix_len=fix_len, audio_mask=audio_mask) # create data loader train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, is_bucket=False) valid_loader = SpeechDataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers, is_bucket=False) return train_loader, valid_loader
def get_concated_datasets( meta_dir_list: List[str], batch_size: int, num_workers: int, meta_cls_list: List[MetaFrame], fix_len: int = 0, skip_audio: bool = False, sample_rate: int = 44100, audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert all([os.path.isdir(x) for x in meta_dir_list ]), 'There are not valid directory paths!'.format() assert len(meta_dir_list) == len( meta_cls_list ), 'meta_dir_list, meta_cls_list are must have same length!' # datasets train_datasets = [] valid_datasets = [] for meta_cls, meta_dir in zip(meta_cls_list, meta_dir_list): train_file, valid_file = meta_cls.frame_file_names[1:] # load meta file train_meta = meta_cls(os.path.join(meta_dir, train_file), sr=sample_rate) valid_meta = meta_cls(os.path.join(meta_dir, valid_file), sr=sample_rate) # create dataset train_dataset = AugmentSpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask) valid_dataset = AugmentSpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask) train_datasets.append(train_dataset) valid_datasets.append(valid_dataset) # make concat dataset train_conc_dataset = ConcatDataset(train_datasets) valid_conc_dataset = ConcatDataset(valid_datasets) # create data loader train_loader = SpeechDataLoader(train_conc_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) valid_loader = SpeechDataLoader(valid_conc_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) return train_loader, valid_loader
def get_datasets( meta_dir: str, batch_size: int, num_workers: int, meta_cls: MetaFrame = VoiceBankMeta, fix_len: int = 0, skip_audio: bool = False, audio_mask: bool = False, is_audioset: bool = False ) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert os.path.isdir(meta_dir), '{} is not valid directory path!' train_file, valid_file = meta_cls.frame_file_names[1:] # load meta file train_meta = meta_cls(os.path.join(meta_dir, train_file)) valid_meta = meta_cls(os.path.join(meta_dir, valid_file)) # create dataset train_dataset = AugmentSpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask, is_audioset=is_audioset) valid_dataset = AugmentSpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask, is_audioset=is_audioset) # create data loader train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) valid_loader = SpeechDataLoader(valid_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers, skip_last_bucket=False) return train_loader, valid_loader
def get_datasets( meta_dir: str, batch_size: int, num_workers: int, fix_len: int = 0, skip_audio: bool = False, audio_mask: bool = False, skip_last_bucket: bool = True, n_buckets: int = 10, extra_features: List[Tuple[str, Callable]] = None ) -> Tuple[SpeechDataLoader, SpeechDataLoader]: assert os.path.isdir(meta_dir), '{} is not valid directory path!' train_file, valid_file = LibriTTSMeta.frame_file_names[1:] # load meta file train_meta = LibriTTSMeta(os.path.join(meta_dir, train_file)) valid_meta = LibriTTSMeta(os.path.join(meta_dir, valid_file)) # create dataset train_dataset = SpeechDataset(train_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask, extra_features=extra_features) valid_dataset = SpeechDataset(valid_meta, fix_len=fix_len, skip_audio=skip_audio, audio_mask=audio_mask, extra_features=extra_features) # create data loader train_loader = SpeechDataLoader(train_dataset, batch_size=batch_size, n_buckets=n_buckets, num_workers=num_workers, skip_last_bucket=skip_last_bucket) valid_loader = SpeechDataLoader(valid_dataset, batch_size=batch_size, is_bucket=False, num_workers=num_workers) return train_loader, valid_loader