def run(): parser = argparse.ArgumentParser() parser.add_argument('--caching_dir', default='/datasets/models/tacotron/cache') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) accepted_modes = ['eval', 'synthesis', 'live'] parser.add_argument( '--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes)) parser.add_argument( '--GTA', default='True', help= 'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode' ) parser.add_argument( '--text_list', default='', help= 'Text file contains list of texts to be synthesized. Valid if mode=eval' ) parser.add_argument( '--speaker_id', default=None, help= 'Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids' ) args = parser.parse_args() if args.mode not in accepted_modes: raise ValueError('accepted modes are: {}, found {}'.format( accepted_modes, args.mode)) if args.GTA not in ('True', 'False'): raise ValueError('GTA option must be either True or False') modified_hp = hparams.parse(args.hparams) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from src.tac.training.wav_training import get_log_dir from src.tac.training.wav_training import get_save_dir wavenet_log_dir = get_log_dir(args.caching_dir) wavenet_pretrained = get_save_dir(wavenet_log_dir) log('Synthesizing audio from mel-spectrograms.. (This may take a while)') run_synthesis(args, wavenet_pretrained, args.caching_dir, hparams) log('Tacotron-2 TTS synthesis complete!')
def main(): parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--metadata', default='tacotron_output/gta/map.txt') args = parser.parse_args() modified_hparams = hparams.parse(args.hparams) run(args, modified_hparams)
def run(): from multiprocessing import cpu_count from src.tac.preprocessing.Dataset import LJSPEECH_TEST, LJSPEECH_LITE, LJSPEECH from src.tac.hparams import hparams print('initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--dataset', default=LJSPEECH) parser.add_argument('--cache_path', default='/datasets/models/tacotron/cache') parser.add_argument('--n_jobs', type=int, default=cpu_count()) parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs') args = parser.parse_args() modified_hp = hparams.parse(args.hparams) processor = Preprocessor(args.n_jobs, args.cache_path, args.dataset, modified_hp) processor.run()
def main(): print('initializing preprocessing..') parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default='') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--input_dir', default='LJSpeech-1.1/wavs') parser.add_argument('--output', default='tacotron_output/gta/') parser.add_argument('--n_jobs', type=int, default=cpu_count()) args = parser.parse_args() modified_hp = hparams.parse(args.hparams) run_preprocess(args, modified_hp)
def run(): parser = argparse.ArgumentParser() parser.add_argument('--caching_dir', default='/datasets/models/tacotron/cache') parser.add_argument('--mode', default='synthesis', help='mode for synthesis of tacotron after training') parser.add_argument('--GTA', default='True', help='Ground truth aligned synthesis, defaults to True, only considered in Tacotron synthesis mode') parser.add_argument('--tf_log_level', type=int, default=1, help='Tensorflow C++ log level.') parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs') args = parser.parse_args() modified_hp = hparams.parse(args.hparams) os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level) log_dir = get_log_dir(args.caching_dir) os.makedirs(log_dir, exist_ok=True) infolog_path = get_infolog_path(log_dir) infolog.init(infolog_path, 'tacotron') log('\n##########################################################\n') log('Tacotron GTA Synthesis\n') log('###########################################################\n') tacotron_pretrained = get_save_dir(log_dir) run_synthesis(args, tacotron_pretrained, modified_hp)
def run(): parser = argparse.ArgumentParser() parser.add_argument('--caching_dir', default='/datasets/models/tacotron/cache') parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) #parser.add_argument('--name', help='Name of logging directory if the two models were trained together.') #parser.add_argument('--tacotron_name', help='Name of logging directory of Tacotron. If trained separately') #parser.add_argument('--wavenet_name', help='Name of logging directory of WaveNet. If trained separately') #parser.add_argument('--model', default='Tacotron-2') #parser.add_argument('--input_dir', default='training_data/', help='folder to contain inputs sentences/targets') #parser.add_argument('--mels_dir', default='tacotron_output/eval/', help='folder to contain mels to synthesize audio from using the Wavenet') #parser.add_argument('--output_dir', default='output/', help='folder to contain synthesized mel spectrograms') accepted_modes = ['eval', 'synthesis', 'live'] parser.add_argument( '--mode', default='eval', help='mode of run: can be one of {}'.format(accepted_modes)) parser.add_argument( '--GTA', default='True', help= 'Ground truth aligned synthesis, defaults to True, only considered in synthesis mode' ) parser.add_argument( '--text_list', default='', help= 'Text file contains list of texts to be synthesized. Valid if mode=eval' ) parser.add_argument( '--speaker_id', default=None, help= 'Defines the speakers ids to use when running standalone Wavenet on a folder of mels. this variable must be a comma-separated list of ids' ) args = parser.parse_args() if args.mode not in accepted_modes: raise ValueError('accepted modes are: {}, found {}'.format( accepted_modes, args.mode)) if args.GTA not in ('True', 'False'): raise ValueError('GTA option must be either True or False') modified_hp = hparams.parse(args.hparams) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' from src.tac.training.tacotron_training import get_log_dir from src.tac.training.tacotron_training import get_save_dir taco_log_dir = get_log_dir(args.caching_dir) tacotron_pretrained = get_save_dir(taco_log_dir) #run_name = args.name or args.tacotron_name or args.model #taco_checkpoint = os.path.join('logs-' + run_name, 'taco_' + args.checkpoint) sentences = get_sentences(args) log('Synthesizing mel-spectrograms from text..') run_eval(args, tacotron_pretrained, modified_hp, sentences)
self.processing_result = result return result def show_stats(self): assert self.processing_result textlenght_sum = sum([int(m[1]) for m in self.processing_result]) textlenght_max = max([int(m[1]) for m in self.processing_result]) print('Written {} utterances'.format(len(self.processing_result))) print('Sum input length (text chars): {}'.format(textlenght_sum)) print('Max input length (text chars): {}'.format(textlenght_max)) if __name__ == "__main__": from hparams import hparams from multiprocessing import cpu_count if __name__ == "__main__": from src.preprocessing.parser.LJSpeechDatasetParser import LJSpeechDatasetParser from src.preprocessing.parser.DummyIPADatasetParser import DummyIPADatasetParser #parser = LJSpeechDatasetParser('/datasets/LJSpeech-1.1-test') parser = DummyIPADatasetParser('/datasets/IPA-Dummy') processor = TextProcessor(hparams.parse(''), '/datasets/models/tacotron/cache') processor.process(parser, cpu_count()) processor.show_stats()
def run(testrun: bool = False): import argparse import os import tensorflow as tf from src.tac import infolog from src.tac.hparams import hparams parser = argparse.ArgumentParser() train_steps = 20000 checkpoint_intervall = 1000 if testrun: train_steps = 10 checkpoint_intervall = 1 parser.add_argument('--caching_dir', default='/datasets/models/tacotron/cache') parser.add_argument('--tacotron_train_steps', type=int, default=train_steps, help='total number of tacotron training steps') parser.add_argument('--tf_log_level', type=int, default=1, help='Tensorflow C++ log level.') parser.add_argument('--checkpoint_interval', type=int, default=checkpoint_intervall, help='Steps between writing checkpoints') # 2500 parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--restore', type=bool, default=False, help='Set this to False to do a fresh training') parser.add_argument('--eval_interval', type=int, default=30000, help='Steps between eval on test data') parser.add_argument('--summary_interval', type=int, default=30000, help='Steps between running summary ops') parser.add_argument( '--embedding_interval', type=int, default=30000, help='Steps between updating embeddings projection visualization') args = parser.parse_args() modified_hp = hparams.parse(args.hparams) os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level) log_dir = get_log_dir(args.caching_dir) os.makedirs(log_dir, exist_ok=True) infolog_path = get_infolog_path(log_dir) infolog.init(infolog_path, 'tacotron') log('\n##########################################################\n') log('Tacotron Train\n') log('###########################################################\n') train(log_dir, args, modified_hp)