def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, i + 1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def main(): parser = argparse.ArgumentParser() parser.add_argument('--base_dir', default=os.path.expanduser('~/tacotron')) parser.add_argument('--output', default='training') parser.add_argument('--dataset', required=True, choices=['blizzard', 'ljspeech', 'nick']) parser.add_argument('--num_workers', type=int, default=cpu_count()) parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs' ) parser.add_argument('--validation_size', type=int, default=0) parser.add_argument('--test_size', type=int, default=0) args = parser.parse_args() hparams.parse(args.hparams) log(hparams_debug_string()) if args.dataset == 'blizzard': preprocess_blizzard(args, hparams) elif args.dataset == 'ljspeech': preprocess_ljspeech(args, hparams) elif args.dataset == 'nick': preprocess_nick(args, hparams)
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, reference_mel=args.reference_audio) if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T else: raise ValueError( "Evaluation without reference audio. Please provide path to reference audio." ) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None, reference_mel=reference_mel) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def setup_log(log_path, checkpoint_path, input_path): infolog.init(log_path, 'emt4_disc', None) log('hi') log('Checkpoint path: {}'.format(checkpoint_path)) log('Loading training data from: {}'.format(input_path)) log('Using model: {}'.format('emt4_disc')) log(hparams_debug_string())
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath( args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath( args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None) #file.write('{}|{}\n'.format(text, mel_filename)) #npy_data = np.load(mel_filename) #npy_data = npy_data.reshape((-1,)) #npy_data.tofile("f32_for_lpcnet.f32") print("Features f32 file created for text") end = time.time() print(">>>>>LPCNet Feature to PCM Conversion time = {}".format( end - start)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def init_tacotron2(args): # t2 print('\n#####################################') if args.model == 'Tacotron': print('\nInitialising Tacotron Model...\n') t2_hparams = hparams.parse(args.hparams) try: checkpoint_path = tf.train.get_checkpoint_state( args.taco_checkpoint).model_checkpoint_path log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format( args.taco_checkpoint)) output_dir = 'tacotron_' + args.output_dir eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') print('eval_dir:', eval_dir) print('args.mels_dir:', args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, t2_hparams) return synth, eval_dir, log_dir
def run_eval(args): print(hparams_debug_string()) is_teacher_force = False mel_targets = args.mel_targets reference_mel = None if args.mel_targets is not None: is_teacher_force = True mel_targets = np.load(args.mel_targets) synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.checkpoint, args.reference_audio) base_path = get_output_base_path(args.checkpoint) if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0]) alignment_path = '%s_ref-%s-align.png' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0]) else: if hparams.use_gst: print("*******************************") print("TODO: add style weights when there is no reference audio. Now we use random weights, " + "which may generate unintelligible audio sometimes.") print("*******************************") path = '%s_ref-randomWeight.wav' % (base_path) alignment_path = '%s_ref-%s-align.png' % (base_path, 'randomWeight') else: raise ValueError("You must set the reference audio if you don't want to use GSTs.") with open(path, 'wb') as f: print('Synthesizing: %s' % args.text) print('Output wav file: %s' % path) print('Output alignments: %s' % alignment_path) f.write(synth.synthesize(args.text, mel_targets=mel_targets, reference_mel=reference_mel, alignment_path=alignment_path))
def run_synthesis(args, checkpoint_path, output_dir, hparams): log_dir = os.path.join(output_dir, 'plots') wav_dir = os.path.join(output_dir, 'wavs') #We suppose user will provide correct folder depending on training method log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) if args.model == 'Tacotron-2': #If running all Tacotron-2, synthesize audio from evaluated mels metadata_filename = os.path.join(args.mels_dir, 'map.txt') with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[-1]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) metadata = np.array(metadata) speaker_ids = metadata[:, 2] mel_files = metadata[:, 1] texts = metadata[:, 0] else: #else Get all npy files in input_dir (supposing they are mels) mel_files = [ os.path.join(args.mels_dir, f) for f in os.listdir(args.mels_dir) if f.split('.')[-1] == 'npy' ] speaker_ids = args.speaker_id texts = None log('Starting synthesis! (this will take a while..)') os.makedirs(log_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) with open(os.path.join(wav_dir, 'map.txt'), 'w') as file: for i, mel_file in enumerate(tqdm(mel_files)): mel_spectro = np.load(mel_file) if hparams.normalize_for_wavenet: #[-max, max] or [0,max] T2_output_range = ( -hparams.max_abs_value, hparams.max_abs_value) if hparams.symmetric_mels else ( 0, hparams.max_abs_value) #rerange to [0, 1] mel_spectro = np.interp(mel_spectro, T2_output_range, (0, 1)) basename = mel_file.replace('.npy', '') speaker_id = speaker_ids[i] audio_file = synth.synthesize(mel_spectro, speaker_id, basename, wav_dir, log_dir) if texts is None: file.write('{}|{}\n'.format(mel_file, audio_file)) else: file.write('{}|{}|{}\n'.format(texts[i], mel_file, audio_file)) log('synthesized audio waveforms at {}'.format(wav_dir))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath( args.mels_dir) # mels_dir = wavenet_input_dir # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): values = text.split('|') if len(values) == 1: raise ValueError('invalid "speaker_id|text" format') speak_id = values[0] text = values[1] if is_korean_text(text): text = normalize_number(text) # 한글을 자소 단위로 쪼갠다. text = split_to_jamo(text, hparams.cleaners) mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None, speak_id) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_live(args, checkpoint_path, hparams): # Log to Terminal without keeping any records in files log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' log(greetings) generate_fast(synth, greetings) # Interaction loop while True: try: text = input() if text == 'quit': break if args.speaker_id is None: speaker_id = random.choice(list(range(1, args.num_speakers))) else: speaker_id = args.speaker_id if text: generate_fast(synth, text, speaker_id) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' log(leave) generate_fast(synth, leave) sleep(2) break
def run_synthesis(checkpoint_path, output_dir, hparams): log_dir = os.path.join(output_dir, 'plots') wav_dir = os.path.join(output_dir, 'wavs') embed_dir = os.path.join(output_dir, 'embeddings') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) metadata_filename = os.path.join(hparams.wavenet_synth, 'map.txt') with open(metadata_filename, encoding='utf-8') as f: metadata = np.array([line.strip().split('|') for line in f]) if (hparams.synth_mode == "all") and (hparams.synth_idx != None): # if synth mode is all and synth_idx is not None, extract a part of metadata metadata = metadata[hparams.synth_idx[0]:hparams.synth_idx[1], :] # speaker ids from trained speakers list speaker_ids = metadata[:, 3] print("spk_ids" +str(speaker_ids.shape)) mel_files = metadata[:, 1] print("mel_files" +str(mel_files.shape)) log('Starting synthesis! (this will take a while..)') os.makedirs(log_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(embed_dir, exist_ok=True) synth_dict = load_synthesis_dict() for idx, mel_file in enumerate(tqdm(mel_files)): print("idx") print(idx) mel_spectro = [np.load(mel_file)] basenames = [os.path.basename(mel_file).replace('.npy', '')] speaker_id = [speaker_ids[idx]] print("synthesizing {}".format(basenames[0])) if hparams.synth_mode == "all": if basenames[0].split('-')[1] in synth_dict.keys(): print("Synthesizing both wav and embedding") synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=False) else: print("Synthesizing embedding only") synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=True) elif hparams.synth_mode == "embedding": print("Synthesizing embedding only") synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=True) elif hparams.synth_mode == "wav": if basenames[0].split('-')[1] in synth_dict.keys(): synth.synthesize(mel_spectro, speaker_id, basenames, wav_dir, log_dir, embed_dir, embed_only=False) else: print("Not supported synth mode.") log('synthesized audio waveforms at {}'.format(wav_dir))
def prepare_run(args): modified_hp = hparams.parse(args.hparams) print(hparams_debug_string()) os.environ['TF_CPP_MIN_LOG_LEVEL'] = str(args.tf_log_level) run_name = args.name or args.model log_dir = os.path.join(args.base_dir, 'logs-{}'.format(run_name)) os.makedirs(log_dir, exist_ok=True) return log_dir, modified_hp
def main(): args = docopt(__doc__) print("Command line args:\n", args) checkpoint_dir = args["--checkpoint-dir"] source_data_root = args["--source-data-root"] target_data_root = args["--target-data-root"] selected_list_dir = args["--selected-list-dir"] use_multi_gpu = args["--multi-gpus"] if args["--hparam-json-file"]: with open(args["--hparam-json-file"]) as f: json = "".join(f.readlines()) hparams.parse_json(json) hparams.parse(args["--hparams"]) training_list = list(load_key_list("train.csv", selected_list_dir)) validation_list = list(load_key_list("validation.csv", selected_list_dir)) training_source_files = [ os.path.join(source_data_root, f"{key}.{hparams.source_file_extension}") for key in training_list ] training_target_files = [ os.path.join(target_data_root, f"{key}.{hparams.target_file_extension}") for key in training_list ] validation_source_files = [ os.path.join(source_data_root, f"{key}.{hparams.source_file_extension}") for key in validation_list ] validation_target_files = [ os.path.join(target_data_root, f"{key}.{hparams.target_file_extension}") for key in validation_list ] print("training source", len(training_source_files)) print("training target", len(training_target_files)) log = logging.getLogger("tensorflow") log.setLevel(logging.INFO) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh = logging.FileHandler(hparams.logfile) fh.setLevel(logging.INFO) fh.setFormatter(formatter) log.addHandler(fh) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info(hparams_debug_string()) train_and_evaluate(hparams, checkpoint_dir, training_source_files, training_target_files, validation_source_files, validation_target_files, use_multi_gpu)
def run_synthesis(args, checkpoint_path, output_dir): _p_cmudict = 0.5 GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') if hparams.use_cmudict: cmudict_path = os.path.join(os.path.dirname(metadata_filename), 'cmudict-0.7b') if not os.path.isfile(cmudict_path): raise Exception( 'If use_cmudict=True, you must download cmu dictionary first. ' + 'Run shell as:\n wget -P %s http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b' % self._datadir) _cmudict = cmudict.CMUDict(cmudict_path, keep_ambiguous=False) log('Loaded CMUDict with %d unambiguous entries' % len(_cmudict)) else: _cmudict = None log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'linear') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): _punctuation_re = re.compile(r'([\.,"\-_:]+)') text = re.sub(_punctuation_re, r' \1 ', meta[3]) if _cmudict and random.random() < _p_cmudict: text = ' '.join([ maybe_get_arpabet(_cmudict, word) for word in text.split(' ') ]) mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, i + 1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s-_%s_.wav' % (base_path, args.sentence) print('Synthesizing: %s - %s' % (path, args.sentence)) file_handle = open(path, 'wb') file_handle.write(synth.synthesize(args.sentence))
def run_eval(args, text): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s.wav' % (text) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) with open( '{}-eval.wav'.format(args.ref.split('/')[-1].replace('.wav', '')), 'wb') as f: f.write(synth.synthesize(args.ref))
def run_eval(args, checkpoint_path): print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) for i, text in enumerate(sentences): start = time.time() synth.synthesize(text, i, args.output_dir) print('synthesized sentence n°{} in {:.3f} sec'.format( i + 1, time.time() - start))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def main(): args = get_args() if args.preset is not None: with open(args.preset) as f: hparams.parse_json(f.read()) modified_hp = hparams.parse(args.hparams) print(hparams_debug_string()) synthesis(args.checkpoint_path, args.local_path, args.global_id, args.output_dir, modified_hp)
def run_eval(ckpt_dir): print(hparams_debug_string()) checkpoint = tf.train.get_checkpoint_state(ckpt_dir).model_checkpoint_path synth = Synthesizer() synth.load(checkpoint) base_path = get_output_base_path(checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) print(hparams_debug_string()) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] print('Starting Synthesis') log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w', encoding="utf-8") as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = [ 'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts)) ] mel_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') print('synthesized mel spectrograms at {}'.format(eval_dir)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) wav = load_wav(args.reference_audio) mel = melspectrogram(wav).transpose() for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text, mel))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print(' ') print('[{:<10}]: {}'.format('processing', path)) wav, feature = synth.synthesize(text) sf.write(path, wav, 16000) np.save(path.replace('.wav', '.npy'), feature)
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate print(metadata[0]) print(len(metadata)) exit() hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [ os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def init(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse('') print(hparams_debug_string()) self.voice_choice = 1 # female default self.base_dir = os.getcwd() checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') self.output_path = os.path.join(self.base_dir, 'static', 'audio', 'output.wav') self.synth = Synthesizer() self.synth.load(checkpoint)
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: url='http://127.0.0.1:8080/get_sentence/'+text text=requests.get(url).text f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) os.makedirs(base_path, exist_ok=True) for i, text in enumerate(sentences, 1): wavname = '%s-%04d.wav' % (os.path.basename(base_path), i) path = os.path.join(base_path, wavname) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text + '。。'))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(args.checkpoint, modified_hp) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: data,wav = synth.eval(text) f.write(data)
def run_live(args, checkpoint_path, hparams): #Log to Terminal without keeping any records in files log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' log(greetings) generate_fast(synth, greetings) #Interaction loop while True: try: text = input() generate_fast(synth, text) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' log(leave) generate_fast(synth, leave) sleep(2) break
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
class SynthesisResource: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() res.data = synthesizer.synthesize(req.params.get('text')) res.content_type = 'audio/wav' synthesizer = Synthesizer() api = falcon.API() api.add_route('/synthesize', SynthesisResource()) api.add_route('/', UIResource()) if __name__ == '__main__': from wsgiref import simple_server parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint') parser.add_argument('--port', type=int, default=9000) parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs') args = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse(args.hparams) print(hparams_debug_string()) synthesizer.load(args.checkpoint) print('Serving on port %d' % args.port) simple_server.make_server('0.0.0.0', args.port, api).serve_forever() else: synthesizer.load(os.environ['CHECKPOINT'])
def train(log_dir, args): commit = get_git_commit() if args.git else 'None' checkpoint_path = os.path.join(log_dir, 'model.ckpt') input_path = os.path.join(args.base_dir, args.input) log('Checkpoint path: %s' % checkpoint_path) log('Loading training data from: %s' % input_path) log('Using model: %s' % args.model) log(hparams_debug_string()) # Set up DataFeeder: coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = DataFeeder(coord, input_path, hparams) # Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) with tf.variable_scope('model') as scope: model = create_model(args.model, hparams) model.initialize(feeder.inputs, feeder.input_lengths, feeder.mel_targets, feeder.linear_targets) model.add_loss() model.add_optimizer(global_step) stats = add_stats(model) # Bookkeeping: step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # Train! with tf.Session() as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) if args.restore_step: # Restore from a checkpoint if the user requested it. restore_path = '%s-%d' % (checkpoint_path, args.restore_step) saver.restore(sess, restore_path) log('Resuming from checkpoint: %s at commit: %s' % (restore_path, commit), slack=True) else: log('Starting new training run at commit: %s' % commit, slack=True) feeder.start_in_session(sess) while not coord.should_stop(): start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step %-7d [%.03f sec/step, loss=%.05f, avg_loss=%.05f]' % ( step, time_window.average, loss, loss_window.average) log(message, slack=(step % args.checkpoint_interval == 0)) if loss > 100 or math.isnan(loss): log('Loss exploded to %.05f at step %d!' % (loss, step), slack=True) raise Exception('Loss Exploded') if step % args.summary_interval == 0: log('Writing summary at step: %d' % step) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: log('Saving checkpoint to: %s-%d' % (checkpoint_path, step)) saver.save(sess, checkpoint_path, global_step=step) log('Saving audio and alignment...') input_seq, spectrogram, alignment = sess.run([ model.inputs[0], model.linear_outputs[0], model.alignments[0]]) waveform = audio.inv_spectrogram(spectrogram.T) audio.save_wav(waveform, os.path.join(log_dir, 'step-%d-audio.wav' % step)) plot.plot_alignment(alignment, os.path.join(log_dir, 'step-%d-align.png' % step), info='%s, %s, %s, step=%d, loss=%.5f' % (args.model, commit, time_string(), step, loss)) log('Input: %s' % sequence_to_text(input_seq)) except Exception as e: log('Exiting due to exception: %s' % e, slack=True) traceback.print_exc() coord.request_stop(e)
def train(log_dir, args, hparams, input_path): save_dir = os.path.join(log_dir, 'wave_pretrained/') eval_dir = os.path.join(log_dir, 'eval-dir') audio_dir = os.path.join(log_dir, 'wavs') plot_dir = os.path.join(log_dir, 'plots') wav_dir = os.path.join(log_dir, 'wavs') eval_audio_dir = os.path.join(eval_dir, 'wavs') eval_plot_dir = os.path.join(eval_dir, 'plots') checkpoint_path = os.path.join(save_dir, 'wavenet_model.ckpt') input_path = os.path.join(args.base_dir, input_path) os.makedirs(save_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(audio_dir, exist_ok=True) os.makedirs(plot_dir, exist_ok=True) os.makedirs(eval_audio_dir, exist_ok=True) os.makedirs(eval_plot_dir, exist_ok=True) log('Checkpoint_path: {}'.format(checkpoint_path)) log('Loading training data from: {}'.format(input_path)) log('Using model: {}'.format(args.model)) log(hparams_debug_string()) #Start by setting a seed for repeatability tf.set_random_seed(hparams.wavenet_random_seed) #Set up data feeder coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = Feeder(coord, input_path, args.base_dir, hparams) #Set up model global_step = tf.Variable(0, name='global_step', trainable=False) model, stats = model_train_mode(args, feeder, hparams, global_step) eval_model = model_test_mode(args, feeder, hparams, global_step) #book keeping step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) sh_saver = create_shadow_saver(model, global_step) log('Wavenet training set to a maximum of {} steps'.format(args.wavenet_train_steps)) #Memory allocation on the memory config = tf.ConfigProto() config.gpu_options.allow_growth = True #Train with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) #saved model restoring if args.restore: #Restore saved model if the user requested it, default = True try: checkpoint_state = tf.train.get_checkpoint_state(save_dir) except tf.errors.OutOfRangeError as e: log('Cannot restore checkpoint: {}'.format(e)) if (checkpoint_state and checkpoint_state.model_checkpoint_path): log('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path)) load_averaged_model(sess, sh_saver, checkpoint_state.model_checkpoint_path) else: if not args.restore: log('Starting new training!') else: log('No model to load at {}'.format(save_dir)) #initializing feeder feeder.start_threads(sess) #Training loop while not coord.should_stop() and step < args.wavenet_train_steps: start_time = time.time() step, y_hat, loss, opt = sess.run([global_step, model.y_hat, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step {:7d} [{:.3f} sec/step, loss={:.5f}, avg_loss={:.5f}]'.format( step, time_window.average, loss, loss_window.average) log(message, end='\r') if loss > 100 or np.isnan(loss): log('Loss exploded to {:.5f} at step {}'.format(loss, step)) raise Exception('Loss exploded') if step % args.summary_interval == 0: log('\nWriting summary at step {}'.format(step)) summary_writer.add_summary(sess.run(stats), step) if step % args.checkpoint_interval == 0: save_log(sess, step, model, plot_dir, audio_dir, hparams=hparams) save_checkpoint(sess, sh_saver, checkpoint_path, global_step) if step % args.eval_interval == 0: log('\nEvaluating at step {}'.format(step)) eval_step(sess, step, eval_model, eval_plot_dir, eval_audio_dir, summary_writer=summary_writer , hparams=model._hparams) log('Wavenet training complete after {} global steps'.format(args.wavenet_train_steps)) except Exception as e: log('Exiting due to Exception: {}'.format(e))
def train(log_dir, args, hparams): save_dir = os.path.join(log_dir, 'taco_pretrained/') checkpoint_path = os.path.join(save_dir, 'tacotron_model.ckpt') input_path = os.path.join(args.base_dir, args.tacotron_input) plot_dir = os.path.join(log_dir, 'plots') wav_dir = os.path.join(log_dir, 'wavs') mel_dir = os.path.join(log_dir, 'mel-spectrograms') eval_dir = os.path.join(log_dir, 'eval-dir') eval_plot_dir = os.path.join(eval_dir, 'plots') eval_wav_dir = os.path.join(eval_dir, 'wavs') os.makedirs(eval_dir, exist_ok=True) os.makedirs(plot_dir, exist_ok=True) os.makedirs(wav_dir, exist_ok=True) os.makedirs(mel_dir, exist_ok=True) os.makedirs(eval_plot_dir, exist_ok=True) os.makedirs(eval_wav_dir, exist_ok=True) if hparams.predict_linear: linear_dir = os.path.join(log_dir, 'linear-spectrograms') os.makedirs(linear_dir, exist_ok=True) log('Checkpoint path: {}'.format(checkpoint_path)) log('Loading training data from: {}'.format(input_path)) log('Using model: {}'.format(args.model)) log(hparams_debug_string()) #Start by setting a seed for repeatability tf.set_random_seed(hparams.tacotron_random_seed) #Set up data feeder coord = tf.train.Coordinator() with tf.variable_scope('datafeeder') as scope: feeder = Feeder(coord, input_path, hparams) #Set up model: global_step = tf.Variable(0, name='global_step', trainable=False) model, stats = model_train_mode(args, feeder, hparams, global_step) eval_model = model_test_mode(args, feeder, hparams, global_step) #Book keeping step = 0 time_window = ValueWindow(100) loss_window = ValueWindow(100) saver = tf.train.Saver(max_to_keep=5) log('Tacotron training set to a maximum of {} steps'.format(args.tacotron_train_steps)) #Memory allocation on the GPU as needed config = tf.ConfigProto() config.gpu_options.allow_growth = True #Train with tf.Session(config=config) as sess: try: summary_writer = tf.summary.FileWriter(log_dir, sess.graph) sess.run(tf.global_variables_initializer()) #saved model restoring if args.restore: #Restore saved model if the user requested it, Default = True. try: checkpoint_state = tf.train.get_checkpoint_state(save_dir) except tf.errors.OutOfRangeError as e: log('Cannot restore checkpoint: {}'.format(e)) if (checkpoint_state and checkpoint_state.model_checkpoint_path): log('Loading checkpoint {}'.format(checkpoint_state.model_checkpoint_path)) saver.restore(sess, checkpoint_state.model_checkpoint_path) else: if not args.restore: log('Starting new training!') else: log('No model to load at {}'.format(save_dir)) #initializing feeder feeder.start_threads(sess) #Training loop while not coord.should_stop() and step < args.tacotron_train_steps: start_time = time.time() step, loss, opt = sess.run([global_step, model.loss, model.optimize]) time_window.append(time.time() - start_time) loss_window.append(loss) message = 'Step {:7d} [{:.3f} sec/step, loss={:.5f}, avg_loss={:.5f}]'.format( step, time_window.average, loss, loss_window.average) log(message, end='\r') if np.isnan(loss): log('Loss exploded to {:.5f} at step {}'.format(loss, step)) raise Exception('Loss exploded') if step % args.summary_interval == 0: log('\nWriting summary at step {}'.format(step)) summary_writer.add_summary(sess.run(stats), step) if step % args.eval_interval == 0: #Run eval and save eval stats log('\nRunning evaluation at step {}'.format(step)) eval_losses = [] before_losses = [] after_losses = [] stop_token_losses = [] linear_losses = [] linear_loss = None if hparams.predict_linear: for i in tqdm(range(feeder.test_steps)): eloss, before_loss, after_loss, stop_token_loss, linear_loss, mel_p, mel_t, t_len, align, lin_p = sess.run( [eval_model.loss, eval_model.before_loss, eval_model.after_loss, eval_model.stop_token_loss, eval_model.linear_loss, eval_model.mel_outputs[0], eval_model.mel_targets[0], eval_model.targets_lengths[0], eval_model.alignments[0], eval_model.linear_outputs[0]]) eval_losses.append(eloss) before_losses.append(before_loss) after_losses.append(after_loss) stop_token_losses.append(stop_token_loss) linear_losses.append(linear_loss) linear_loss = sum(linear_losses) / len(linear_losses) wav = audio.inv_linear_spectrogram(lin_p.T, hparams) audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-linear.wav'.format(step)), sr=hparams.sample_rate) else: for i in tqdm(range(feeder.test_steps)): eloss, before_loss, after_loss, stop_token_loss, mel_p, mel_t, t_len, align = sess.run( [eval_model.loss, eval_model.before_loss, eval_model.after_loss, eval_model.stop_token_loss, eval_model.mel_outputs[0], eval_model.mel_targets[0], eval_model.targets_lengths[0], eval_model.alignments[0]]) eval_losses.append(eloss) before_losses.append(before_loss) after_losses.append(after_loss) stop_token_losses.append(stop_token_loss) eval_loss = sum(eval_losses) / len(eval_losses) before_loss = sum(before_losses) / len(before_losses) after_loss = sum(after_losses) / len(after_losses) stop_token_loss = sum(stop_token_losses) / len(stop_token_losses) log('Saving eval log to {}..'.format(eval_dir)) #Save some log to monitor model improvement on same unseen sequence wav = audio.inv_mel_spectrogram(mel_p.T, hparams) audio.save_wav(wav, os.path.join(eval_wav_dir, 'step-{}-eval-waveform-mel.wav'.format(step)), sr=hparams.sample_rate) plot.plot_alignment(align, os.path.join(eval_plot_dir, 'step-{}-eval-align.png'.format(step)), info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, eloss), max_len=t_len // hparams.outputs_per_step) plot.plot_spectrogram(mel_p, os.path.join(eval_plot_dir, 'step-{}-eval-mel-spectrogram.png'.format(step)), info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, eloss), target_spectrogram=mel_t, max_len=t_len) log('Eval loss for global step {}: {:.3f}'.format(step, eval_loss)) log('Writing eval summary!') add_eval_stats(summary_writer, step, linear_loss, before_loss, after_loss, stop_token_loss, eval_loss) if step % args.checkpoint_interval == 0: #Save model and current global step saver.save(sess, checkpoint_path, global_step=global_step) log('\nSaving alignment, Mel-Spectrograms and griffin-lim inverted waveform..') if hparams.predict_linear: input_seq, mel_prediction, linear_prediction, alignment, target, target_length = sess.run([ model.inputs[0], model.mel_outputs[0], model.linear_outputs[0], model.alignments[0], model.mel_targets[0], model.targets_lengths[0], ]) #save predicted linear spectrogram to disk (debug) linear_filename = 'linear-prediction-step-{}.npy'.format(step) np.save(os.path.join(linear_dir, linear_filename), linear_prediction.T, allow_pickle=False) #save griffin lim inverted wav for debug (linear -> wav) wav = audio.inv_linear_spectrogram(linear_prediction.T, hparams) audio.save_wav(wav, os.path.join(wav_dir, 'step-{}-wave-from-linear.wav'.format(step)), sr=hparams.sample_rate) else: input_seq, mel_prediction, alignment, target, target_length = sess.run([model.inputs[0], model.mel_outputs[0], model.alignments[0], model.mel_targets[0], model.targets_lengths[0], ]) #save predicted mel spectrogram to disk (debug) mel_filename = 'mel-prediction-step-{}.npy'.format(step) np.save(os.path.join(mel_dir, mel_filename), mel_prediction.T, allow_pickle=False) #save griffin lim inverted wav for debug (mel -> wav) wav = audio.inv_mel_spectrogram(mel_prediction.T, hparams) audio.save_wav(wav, os.path.join(wav_dir, 'step-{}-wave-from-mel.wav'.format(step)), sr=hparams.sample_rate) #save alignment plot to disk (control purposes) plot.plot_alignment(alignment, os.path.join(plot_dir, 'step-{}-align.png'.format(step)), info='{}, {}, step={}, loss={:.5f}'.format(args.model, time_string(), step, loss), max_len=target_length // hparams.outputs_per_step) #save real and predicted mel-spectrogram plot to disk (control purposes) plot.plot_spectrogram(mel_prediction, os.path.join(plot_dir, 'step-{}-mel-spectrogram.png'.format(step)), info='{}, {}, step={}, loss={:.5}'.format(args.model, time_string(), step, loss), target_spectrogram=target, max_len=target_length) log('Input at step {}: {}'.format(step, sequence_to_text(input_seq))) log('Tacotron training complete after {} global steps!'.format(args.tacotron_train_steps)) return save_dir except Exception as e: log('Exiting due to exception: {}'.format(e)) traceback.print_exc() coord.request_stop(e)
features = features[:labels.num_frames()] indices = labels.silence_frame_indices() features = np.delete(features, indices, axis=0) return features.astype(np.float32) if __name__ == "__main__": args = docopt(__doc__) print("Command line args:\n", args) DATA_ROOT = args["<DATA_ROOT>"] max_files = int(args["--max_files"]) dst_dir = args["--dst_dir"] overwrite = args["--overwrite"] print("Acoustic", hparams_debug_string(hp_acoustic)) print("Duration", hparams_debug_string(hp_duration)) assert hp_acoustic.question_path == hp_duration.question_path assert hp_acoustic.use_phone_alignment == hp_duration.use_phone_alignment # Features required to train duration model # X -> Y # X: linguistic # Y: duration X_duration_source = LinguisticSource( DATA_ROOT, max_files, add_frame_features=hp_duration.add_frame_features, subphone_features=hp_duration.subphone_features) Y_duration_source = DurationSource(DATA_ROOT, max_files)