예제 #1
0
def get_datasets(
        meta_dir: str,
        batch_size: int,
        num_workers: int,
        fix_len: int = 0,
        skip_audio: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]:
    # TODO: update this function in general
    assert os.path.isdir(meta_dir), '{} is not valid directory path!'

    train_file, valid_file = MaestroMeta.frame_file_names[1:]

    # load meta file
    train_meta = MaestroMeta(os.path.join(meta_dir, train_file))
    valid_meta = MaestroMeta(os.path.join(meta_dir, valid_file))

    # create dataset
    train_dataset = SpeechDataset(train_meta,
                                  fix_len=fix_len,
                                  skip_audio=skip_audio)
    valid_dataset = SpeechDataset(valid_meta,
                                  fix_len=fix_len,
                                  skip_audio=skip_audio)

    # create data loader
    train_loader = SpeechDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    num_workers=num_workers)
    valid_loader = SpeechDataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    num_workers=num_workers)

    return train_loader, valid_loader
예제 #2
0
def get_datasets(
        meta_dir: str, batch_size: int,
        num_workers: int) -> Tuple[SpeechDataLoader, SpeechDataLoader]:
    assert os.path.isdir(meta_dir), '{} is not valid directory path!'

    train_file, valid_file = LJSpeechMeta.frame_file_names[1:]

    # load meta file
    train_meta = LJSpeechMeta(os.path.join(meta_dir, train_file))
    valid_meta = LJSpeechMeta(os.path.join(meta_dir, valid_file))

    # create dataset
    train_dataset = SpeechTTSDataset(train_meta)
    valid_dataset = SpeechTTSDataset(valid_meta)

    # create data loader
    train_loader = SpeechDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    is_bucket=True,
                                    num_workers=num_workers,
                                    n_buckets=5)
    valid_loader = SpeechDataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers)

    return train_loader, valid_loader
예제 #3
0
def get_datasets(
        meta_dir: str,
        batch_size: int,
        num_workers: int,
        fix_len: int = 0,
        audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]:
    assert os.path.isdir(meta_dir), '{} is not valid directory path!'

    train_file, valid_file = MedleyDBMeta.frame_file_names[1:]

    # load meta file
    train_meta = MedleyDBMeta(os.path.join(meta_dir, train_file))
    valid_meta = MedleyDBMeta(os.path.join(meta_dir, valid_file))

    # create dataset
    train_dataset = SpeechDataset(train_meta,
                                  fix_len=fix_len,
                                  audio_mask=audio_mask)
    valid_dataset = SpeechDataset(valid_meta,
                                  fix_len=fix_len,
                                  audio_mask=audio_mask)

    # create data loader
    train_loader = SpeechDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    num_workers=num_workers,
                                    is_bucket=False)
    valid_loader = SpeechDataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    num_workers=num_workers,
                                    is_bucket=False)

    return train_loader, valid_loader
예제 #4
0
def get_concated_datasets(
        meta_dir_list: List[str],
        batch_size: int,
        num_workers: int,
        meta_cls_list: List[MetaFrame],
        fix_len: int = 0,
        skip_audio: bool = False,
        sample_rate: int = 44100,
        audio_mask: bool = False) -> Tuple[SpeechDataLoader, SpeechDataLoader]:

    assert all([os.path.isdir(x) for x in meta_dir_list
                ]), 'There are not valid directory paths!'.format()
    assert len(meta_dir_list) == len(
        meta_cls_list
    ), 'meta_dir_list, meta_cls_list are must have same length!'

    # datasets
    train_datasets = []
    valid_datasets = []

    for meta_cls, meta_dir in zip(meta_cls_list, meta_dir_list):
        train_file, valid_file = meta_cls.frame_file_names[1:]

        # load meta file
        train_meta = meta_cls(os.path.join(meta_dir, train_file),
                              sr=sample_rate)
        valid_meta = meta_cls(os.path.join(meta_dir, valid_file),
                              sr=sample_rate)

        # create dataset
        train_dataset = AugmentSpeechDataset(train_meta,
                                             fix_len=fix_len,
                                             skip_audio=skip_audio,
                                             audio_mask=audio_mask)
        valid_dataset = AugmentSpeechDataset(valid_meta,
                                             fix_len=fix_len,
                                             skip_audio=skip_audio,
                                             audio_mask=audio_mask)

        train_datasets.append(train_dataset)
        valid_datasets.append(valid_dataset)

    # make concat dataset
    train_conc_dataset = ConcatDataset(train_datasets)
    valid_conc_dataset = ConcatDataset(valid_datasets)

    # create data loader
    train_loader = SpeechDataLoader(train_conc_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers,
                                    skip_last_bucket=False)
    valid_loader = SpeechDataLoader(valid_conc_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers,
                                    skip_last_bucket=False)

    return train_loader, valid_loader
예제 #5
0
def get_datasets(
        meta_dir: str,
        batch_size: int,
        num_workers: int,
        meta_cls: MetaFrame = VoiceBankMeta,
        fix_len: int = 0,
        skip_audio: bool = False,
        audio_mask: bool = False,
        is_audioset: bool = False
) -> Tuple[SpeechDataLoader, SpeechDataLoader]:

    assert os.path.isdir(meta_dir), '{} is not valid directory path!'

    train_file, valid_file = meta_cls.frame_file_names[1:]

    # load meta file
    train_meta = meta_cls(os.path.join(meta_dir, train_file))
    valid_meta = meta_cls(os.path.join(meta_dir, valid_file))

    # create dataset
    train_dataset = AugmentSpeechDataset(train_meta,
                                         fix_len=fix_len,
                                         skip_audio=skip_audio,
                                         audio_mask=audio_mask,
                                         is_audioset=is_audioset)
    valid_dataset = AugmentSpeechDataset(valid_meta,
                                         fix_len=fix_len,
                                         skip_audio=skip_audio,
                                         audio_mask=audio_mask,
                                         is_audioset=is_audioset)

    # create data loader
    train_loader = SpeechDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers,
                                    skip_last_bucket=False)
    valid_loader = SpeechDataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers,
                                    skip_last_bucket=False)

    return train_loader, valid_loader
예제 #6
0
def get_datasets(
    meta_dir: str,
    batch_size: int,
    num_workers: int,
    fix_len: int = 0,
    skip_audio: bool = False,
    audio_mask: bool = False,
    skip_last_bucket: bool = True,
    n_buckets: int = 10,
    extra_features: List[Tuple[str, Callable]] = None
) -> Tuple[SpeechDataLoader, SpeechDataLoader]:
    assert os.path.isdir(meta_dir), '{} is not valid directory path!'

    train_file, valid_file = LibriTTSMeta.frame_file_names[1:]

    # load meta file
    train_meta = LibriTTSMeta(os.path.join(meta_dir, train_file))
    valid_meta = LibriTTSMeta(os.path.join(meta_dir, valid_file))

    # create dataset
    train_dataset = SpeechDataset(train_meta,
                                  fix_len=fix_len,
                                  skip_audio=skip_audio,
                                  audio_mask=audio_mask,
                                  extra_features=extra_features)
    valid_dataset = SpeechDataset(valid_meta,
                                  fix_len=fix_len,
                                  skip_audio=skip_audio,
                                  audio_mask=audio_mask,
                                  extra_features=extra_features)

    # create data loader
    train_loader = SpeechDataLoader(train_dataset,
                                    batch_size=batch_size,
                                    n_buckets=n_buckets,
                                    num_workers=num_workers,
                                    skip_last_bucket=skip_last_bucket)
    valid_loader = SpeechDataLoader(valid_dataset,
                                    batch_size=batch_size,
                                    is_bucket=False,
                                    num_workers=num_workers)

    return train_loader, valid_loader