Example #1
0
    def from_config(cls, config_manager: Config, kind: str, scan_wavs=False):
        kinds = ['original', 'phonemized', 'train', 'valid']
        if kind not in kinds:
            raise ValueError(f'Invalid kind type. Expected one of: {kinds}')

        reader = get_preprocessor_by_name(config_manager.config['data_name'])
        if kind == 'train':
            metadata = config_manager.train_metadata_path
        elif kind == 'original':
            metadata = config_manager.metadata_path
        elif kind == 'valid':
            metadata = config_manager.valid_metadata_path
        elif kind == 'phonemized':
            metadata = config_manager.phonemized_metadata_path

        return cls(data_directory=config_manager.dataset_dir,
                   metadata_reading_function=reader,
                   metadata_path=metadata,
                   scan_wavs=scan_wavs)
Example #2
0
def phoneme_lengths(phonemes, phoneme_padding=0):
    return tf.reduce_sum(tf.cast(phonemes != phoneme_padding, tf.int32),
                         axis=-1)


if __name__ == '__main__':
    from preprocessing.metadata_readers import get_preprocessor_by_name
    from preprocessing.datasets import DataReader, AutoregressivePreprocessor, TextMelDataset
    from preprocessing.text.tokenizer import Tokenizer
    from preprocessing.text.symbols import all_phonemes
    from pathlib import Path

    ljspeech_folder = '/Volumes/data/datasets/LJSpeech-1.1'
    metadata_path = '/Volumes/data/datasets/LJSpeech-1.1/phonemized_metadata.txt'
    metadata_reader = get_preprocessor_by_name('ljspeech')
    data_reader = DataReader(data_directory=ljspeech_folder,
                             metadata_path=metadata_path,
                             metadata_reading_function=metadata_reader,
                             scan_wavs=True)
    mel_dir = Path('/Volumes/data/datasets/LJSpeech-1.1/mels')

    tokenizer = Tokenizer(alphabet=all_phonemes)
    preprocessor = AutoregressivePreprocessor(mel_channels=80,
                                              mel_start_value=.5,
                                              mel_end_value=-.5,
                                              tokenizer=tokenizer)
    dataset_creator = TextMelDataset(data_reader=data_reader,
                                     preprocessor=preprocessor,
                                     mel_directory=mel_dir)
    dataset = dataset_creator.get_dataset(