def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath( args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) delta_size = hparams.tacotron_synthesis_batch_size if hparams.tacotron_synthesis_batch_size < len( sentences) else len(sentences) batch_sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), delta_size) ] start = time.time() for i, batch in enumerate(tqdm(batch_sentences)): audio.save_wav( synth.eval(batch), os.path.join(log_dir, 'wavs', 'eval_batch_{:03}.wav'.format(i)), hparams) log('\nGenerated total batch of {} in {:.3f} sec'.format( delta_size, time.time() - start)) return eval_dir
def run_live(args, checkpoint_path, hparams): #Log to Terminal without keeping any records in files print(hparams_debug_string()) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' print(greetings) log(greetings) generate_fast(synth, greetings) #Interaction loop while True: try: text = input() generate_fast(synth, text) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' print(leave) log(leave) generate_fast(synth, leave) sleep(2) break
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Set inputs batch wise sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = ['batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))] mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_single(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] # sentences=[[sentences]] print(sentences) log('Starting Synthesis Single') for i, texts in enumerate(tqdm(sentences)): start = time.time() #basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))] #mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None) print(texts, eval_dir, log_dir) synth.synthesize(texts, None, eval_dir, log_dir, None) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath( args.mels_dir) # mels_dir = wavenet_input_dir # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): if is_korean_text(text): text = normalize_number(text) # 한글을 자소 단위로 쪼갠다. text = split_to_jamo(text, hparams.cleaners) mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def init_tacotron2(args): # t2 print('\n#####################################') if args.model == 'Tacotron': print('\nInitialising Tacotron Model...\n') t2_hparams = hparams.parse(args.hparams) try: checkpoint_path = tf.train.get_checkpoint_state( args.taco_checkpoint).model_checkpoint_path log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format( args.taco_checkpoint)) output_dir = 'tacotron_' + args.output_dir eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') print('eval_dir:', eval_dir) print('args.mels_dir:', args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, t2_hparams) return synth, eval_dir, log_dir
def run_synthesis(args, checkpoint_path, output_dir): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) if args.GTA==True: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) print('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, None, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
def run_synthesis_sytle_transfer(args, synth_metadata_filename, checkpoint_path, output_dir, hparams): synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(args, checkpoint_path, hparams) texts, basenames, basenames_refs, mel_filenames, \ mel_ref_filenames_emt, mel_ref_filenames_spk,\ emt_labels, spk_labels = get_filenames_from_metadata(synth_metadata_filename, args.input_dir, args.flip_spk_emt) synth.synthesize(texts, basenames, synth_dir, synth_dir, mel_filenames, mel_ref_filenames_emt=mel_ref_filenames_emt, mel_ref_filenames_spk=mel_ref_filenames_spk, emt_labels_synth=emt_labels, spk_labels_synth=spk_labels)
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) npy_data = np.load(mel_filename) npy_data = npy_data.reshape((-1,)) npy_data.tofile("f32_for_lpcnet.f32") log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, reference_mel=args.reference_audio) if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T else: raise ValueError( "Evaluation without reference audio. Please provide path to reference audio." ) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None, reference_mel=reference_mel) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def load_checkpoint(self, args, hparams, checkpoint): # ./Tacotron-2/tacotron/synthesize.py:tacotron_synthesize output_dir = 'tacotron_' + args.output_dir try: checkpoint_path = tf.train.get_checkpoint_state( checkpoint).model_checkpoint_path log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError( 'Failed to load checkpoint at {}'.format(checkpoint)) if hparams.tacotron_synthesis_batch_size < hparams.tacotron_num_gpus: raise ValueError( 'Defined synthesis batch size {} is smaller than minimum required {} (num_gpus)! Please verify your synthesis batch size choice.' .format(hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) if hparams.tacotron_synthesis_batch_size % hparams.tacotron_num_gpus != 0: raise ValueError( 'Defined synthesis batch size {} is not a multiple of {} (num_gpus)! Please verify your synthesis batch size choice!' .format(hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) # ./Tacotron-2/tacotron/synthesize.py:run_live log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) self.model = synth
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, speaker_id=args.speaker_id) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() if args.speaker_id is not None: mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=[args.speaker_id[i]]) else: mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=None) file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0])) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] print('Starting Synthesis') log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w', encoding="utf-8") as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [ os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') print('synthesized mel spectrograms at {}'.format(synth_dir)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(args.checkpoint, modified_hp) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: data,wav = synth.eval(text) f.write(data)
def tacotron_synthesize(sentences): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # ignore warnings https://stackoverflow.com/questions/47068709/ output_dir = 'A' checkpoint_path = tf.train.get_checkpoint_state('trained_model').model_checkpoint_path print('####### checkpoint_path', checkpoint_path) synth = Synthesizer() synth.load(checkpoint_path) os.makedirs(output_dir, exist_ok=True) for i, text in enumerate(sentences): synth.synthesize(text, i + 1, output_dir, None) print('Results at: {}'.format(output_dir))
def load_synth(args, checkpoint_path, output_dir): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) return synth, eval_dir, log_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = [ 'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts)) ] if hparams.tacotron_reference_waveform: # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p225_046.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p226_306.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p297_247.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p376_076.npy"]*len(basenames) mel_reference_filename = [args.mel_reference] * len(basenames) else: mel_reference_filename = None mel_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, None, mel_reference_filename) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_synthesis(args, checkpoint_path, output_dir, sentences): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) wav = load_wav(args.reference_audio) reference_mel = melspectrogram(wav).transpose() with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) if args.GTA==True: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) os.makedirs(os.path.join(synth_dir, 'wavs/'), exist_ok=True) print('starting synthesis') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: #for i, meta in enumerate(tqdm(metadata)): #text = meta[5] for i, text in enumerate(tqdm(sentences)): mel_output_filename = synth.synthesize(text=text, index=i+1, out_dir=synth_dir, log_dir=None, mel_filename=None, reference_mel=reference_mel) mels = np.load(mel_output_filename) wav = audio.inv_mel_spectrogram(mels.T) audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(i+1))) with open(os.path.join(synth_dir, 'wavs/speech-wav-{:05d}.txt'.format(i+1)), 'w') as tf: tf.write(text) if hparams.predict_linear: # save wav (linear -> wav) wav = audio.inv_linear_spectrogram(linear.T) audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(i+1))) #file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename, speaker_id = synth.synthesize([text], [i + 1], eval_dir, log_dir, None) file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0])) log('synthesized mel spectrograms at {}'.format(eval_dir))
def run_eval(args, checkpoint_path, output_dir): # print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(hparams.sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename))
def run_inference(args, checkpoint_path, output_dir, hparams): os.makedirs(output_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=False, vae_code_mode='inference') with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] log('Starting inference') mel_dir = os.path.join(args.input_dir, 'mels') all_embeddings = {} trange = tqdm(metadata) for i, meta in enumerate(trange): mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] latent_embeddings = synth.inference(mel_filenames) for mel_filename, latent_embedding in zip(mel_filenames, latent_embeddings): all_embeddings[os.path.basename(mel_filename) [4:-4]] = latent_embedding log('Saving latent embeddings...') with open(os.path.join(output_dir, 'latent_embeddings.pkl'), 'wb') as file: pickle.dump(all_embeddings, file) log('Latent embeddings saved at {}'.format(output_dir)) return os.path.join(output_dir, 'latent_embeddings.pkl')
def run_eval(args, checkpoint_path, output_dir): print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') wav = load_wav(args.reference_audio) reference_mel = melspectrogram(wav).transpose() #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(hparams.sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None, reference_mel) file.write('{}|{}\n'.format(text, mel_filename)) print('synthesized mel spectrograms at {}'.format(eval_dir))
def run_eval(args, checkpoint_path, output_dir, hparams, text, step, cwd): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist #os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) #os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) log('Starting Synthesis') synth.synthesize(text, step, eval_dir, log_dir, None, cwd) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio,sr=hparams.sample_rate) reference_mel = audio.melspectrogram(ref_wav,hparams).astype(np.float32).T else: #raise ValueError("Evaluation without reference audio. Please provide path to reference audio.") reference_mel = None synth.load(checkpoint_path, hparams, reference_mel=reference_mel) #Set inputs batch wise sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))] mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None, reference_mel=reference_mel) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_live(args, checkpoint_path, hparams): #Log to Terminal without keeping any records in files log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' log(greetings) generate_fast(synth, greetings) #Interaction loop while True: try: text = input() generate_fast(synth, text) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' log(leave) generate_fast(synth, leave) sleep(2) break
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def embedding_synthesize(args, hparams, checkpoint, ppgs=None, speakers=None): output_dir = args.output_dir try: checkpoint_path = tf.train.get_checkpoint_state(checkpoint).model_checkpoint_path # checkpoint_path = '/home/zhaoxt20/vae_tac_myself/exp_multi_2020.4.1_2DPPgs+ref_same_speaker_dif_sentence/pretrained_model/tacotron_model.ckpt-45000' log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint)) if hparams.tacotron_synthesis_batch_size < hparams.tacotron_num_gpus: raise ValueError( 'Defined synthesis batch size {} is smaller than minimum required {} (num_gpus)! Please verify your synthesis batch size choice.'.format( hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) if hparams.tacotron_synthesis_batch_size % hparams.tacotron_num_gpus != 0: raise ValueError( 'Defined synthesis batch size {} is not a multiple of {} (num_gpus)! Please verify your synthesis batch size choice!'.format( hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) synth = Synthesizer() synth.load(checkpoint_path, hparams, reference_mels=True) return run_eval(args, checkpoint_path, output_dir, hparams,synth)
import argparse from tacotron.utils import makedirs, str2bool from tacotron.synthesizer import Synthesizer if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--load_path', required=True) parser.add_argument('--sample_path', default="samples") parser.add_argument('--text', required=True) parser.add_argument('--num_speakers', default=1, type=int) parser.add_argument('--speaker_id', default=0, type=int) parser.add_argument('--checkpoint_step', default=None, type=int) parser.add_argument('--is_korean', default=True, type=str2bool) config = parser.parse_args() makedirs(config.sample_path) synthesizer = Synthesizer() synthesizer.load(config.load_path, config.num_speakers, config.checkpoint_step) audio = synthesizer.synthesize(texts=[config.text], base_path=config.sample_path, speaker_ids=[config.speaker_id], attention_trim=False, isKorean=config.is_korean)[0]
'--name', help='Name of logging directory if the two models were trained together.') args = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['CUDA_VISIBLE_DEVICES'] = '0' checkpoint = os.path.join('logs-Tacotron', 'taco_' + args.checkpoint) try: checkpoint_path = tf.train.get_checkpoint_state( checkpoint).model_checkpoint_path log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint)) synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(checkpoint_path, modified_hp) class Res: def on_get(self, req, res): res.body = html_body res.content_type = "text/html" class Syn: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() log('Synthesize {}'.format(p(req.params.get('text')))) res.data = synth.eval(p(req.params.get('text'))) res.content_type = "audio/wav"
default='pretrained/', help='Path to model checkpoint') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs') parser.add_argument('--port', default=9000, help='Port of Http service') parser.add_argument('--host', default="localhost", help='Host of Http service') parser.add_argument( '--name', help='Name of logging directory if the two models were trained together.') args = parser.parse_args() synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(args.checkpoint, modified_hp) class Res: def on_get(self, req, res): res.body = html_body res.content_type = "text/html" class Syn: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() res.data = synth.eval(p(req.params.get('text'))) res.content_type = "audio/wav"
from tacotron.infolog import log from tacotron.synthesizer import Synthesizer from tqdm import tqdm from pypinyin import pinyin, Style checkpoint_path = os.path.join('taco_model2','tacotron_model.ckpt-100000') output_dir = os.path.join('taco_output','org') eval_dir = output_dir log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) from asr_model.model_vgg_ctc import SpeechModel import os import platform as plat #下面这个分配真的是无效的,我也不知道该咋使用才行,脑壳疼呢, #而且两个模型同时部署总是会有点麻烦,打扰了,想放在cpu上也放不到,谁有能力谁搞吧, import keras.backend.tensorflow_backend as KTF config = tf.ConfigProto() config.gpu_options.allow_growth=True session = tf.Session(config=config) KTF.set_session(session) S_M=SpeechModel('dataset')
import argparse parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', required=False, help='Full path to model checkpoint', default="tacotron/tmp/tacotron-20180906/model.ckpt") parser.add_argument('--text', required=False, help='Text to synthesize', default="Hello World") parser.add_argument('--output', required=False, help='File path of output', default="HelloWorld.wav") args = parser.parse_args() checkpoint = str(args.checkpoint) text = str(args.text) output = str(args.output) print("Checkpoint: " + checkpoint) print("Text: " + text) print("Output: " + output) print("") print("Loading model...") synthesizer = Synthesizer() synthesizer.load(checkpoint) print("Loading model completed!") print("") print("Sythesizing text...") with open(output, 'wb') as file: file.write(synthesizer.synthesize(text)) print("Sythesizing text completed!") print("")