def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model in ('Both', 'Tacotron-2'): assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename)) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') # Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') # Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) # Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [ os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, synth_dir, None, mel_filenames) for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = [ 'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts)) ] if hparams.tacotron_reference_waveform: # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p225_046.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p226_306.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p297_247.npy"]*len(basenames) # mel_reference_filename = ["/home/aperquin/Programmes/Tacotron-2-from-reference/training_data/mels/mel-p376_076.npy"]*len(basenames) mel_reference_filename = [args.mel_reference] * len(basenames) else: mel_reference_filename = None mel_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, None, mel_reference_filename) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences, speaker_labels, language_labels): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] speaker_labels = [ speaker_labels[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(speaker_labels), hparams.tacotron_synthesis_batch_size) ] language_labels = [ language_labels[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(language_labels), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = [ 'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts)) ] mel_filenames, speaker_ids = synth.synthesize( texts, speaker_labels[i], language_labels[i], basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_synthesis(args, checkpoint_path, output_dir, sentences): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) wav = load_wav(args.reference_audio) reference_mel = melspectrogram(wav).transpose() with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) if args.GTA==True: synth_dir = os.path.join(output_dir, 'gta') else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) os.makedirs(os.path.join(synth_dir, 'wavs/'), exist_ok=True) print('starting synthesis') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: #for i, meta in enumerate(tqdm(metadata)): #text = meta[5] for i, text in enumerate(tqdm(sentences)): mel_output_filename = synth.synthesize(text=text, index=i+1, out_dir=synth_dir, log_dir=None, mel_filename=None, reference_mel=reference_mel) mels = np.load(mel_output_filename) wav = audio.inv_mel_spectrogram(mels.T) audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(i+1))) with open(os.path.join(synth_dir, 'wavs/speech-wav-{:05d}.txt'.format(i+1)), 'w') as tf: tf.write(text) if hparams.predict_linear: # save wav (linear -> wav) wav = audio.inv_linear_spectrogram(linear.T) audio.save_wav(wav, os.path.join(synth_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(i+1))) #file.write('{}|{}|{}|{}\n'.format(text, mel_filename, mel_output_filename, wav_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) basename = os.path.basename(mel_filename).replace('.npy', '').replace( 'mel-', '') mel_output_filename, speaker_id = synth.synthesize( text, basename, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, speaker_id, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def run_eval(args, checkpoint_path, output_dir): # print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(hparams.sentences)): start = time.time() mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir, None) file.write('{}|{}\n'.format(text, mel_filename))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() mel_filename, speaker_id = synth.synthesize([text], [i + 1], eval_dir, log_dir, None) file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0])) log('synthesized mel spectrograms at {}'.format(eval_dir))
def run_inference(args, checkpoint_path, output_dir, hparams): os.makedirs(output_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=False, vae_code_mode='inference') with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] log('Starting inference') mel_dir = os.path.join(args.input_dir, 'mels') all_embeddings = {} trange = tqdm(metadata) for i, meta in enumerate(trange): mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] latent_embeddings = synth.inference(mel_filenames) for mel_filename, latent_embedding in zip(mel_filenames, latent_embeddings): all_embeddings[os.path.basename(mel_filename) [4:-4]] = latent_embedding log('Saving latent embeddings...') with open(os.path.join(output_dir, 'latent_embeddings.pkl'), 'wb') as file: pickle.dump(all_embeddings, file) log('Latent embeddings saved at {}'.format(output_dir)) return os.path.join(output_dir, 'latent_embeddings.pkl')
def run_eval(args, checkpoint_path, output_dir): print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') wav = load_wav(args.reference_audio) reference_mel = melspectrogram(wav).transpose() #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(hparams.sentences)): start = time.time() mel_filename = synth.synthesize(text, i+1, eval_dir, log_dir, None, reference_mel) file.write('{}|{}\n'.format(text, mel_filename)) print('synthesized mel spectrograms at {}'.format(eval_dir))
def run_synthesis_sytle_transfer(args, synth_metadata_filename, checkpoint_path, output_dir, hparams): synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(args, checkpoint_path, hparams) texts, basenames, basenames_refs, mel_filenames, \ mel_ref_filenames_emt, mel_ref_filenames_spk,\ emt_labels, spk_labels = get_filenames_from_metadata(synth_metadata_filename, args.input_dir, args.flip_spk_emt) synth.synthesize(texts, basenames, synth_dir, synth_dir, mel_filenames, mel_ref_filenames_emt=mel_ref_filenames_emt, mel_ref_filenames_spk=mel_ref_filenames_spk, emt_labels_synth=emt_labels, spk_labels_synth=spk_labels)
def run_single(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] # sentences=[[sentences]] print(sentences) log('Starting Synthesis Single') for i, texts in enumerate(tqdm(sentences)): start = time.time() #basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))] #mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None) print(texts, eval_dir, log_dir) synth.synthesize(texts, None, eval_dir, log_dir, None) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #mels_dir = wavenet_input_dir #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, speaker_id=args.speaker_id) with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, text in enumerate(tqdm(sentences)): start = time.time() if args.speaker_id is not None: mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=[args.speaker_id[i]]) else: mel_filename, speaker_id = synth.synthesize([text], [i+1], eval_dir, log_dir, None, speaker_id=None) file.write('{}|{}|{}\n'.format(text, mel_filename[0], speaker_id[0])) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio,sr=hparams.sample_rate) reference_mel = audio.melspectrogram(ref_wav,hparams).astype(np.float32).T else: #raise ValueError("Evaluation without reference audio. Please provide path to reference audio.") reference_mel = None synth.load(checkpoint_path, hparams, reference_mel=reference_mel) #Set inputs batch wise sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), hparams.tacotron_synthesis_batch_size)] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): start = time.time() basenames = ['batch_{:03d}_sentence_{:03d}'.format(i, j) for j in range(len(texts))] mel_filenames, speaker_ids = synth.synthesize(texts, basenames, eval_dir, log_dir, None, reference_mel=reference_mel) for elems in zip(texts, mel_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def run_live(args, checkpoint_path, hparams): #Log to Terminal without keeping any records in files log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' log(greetings) generate_fast(synth, greetings) #Interaction loop while True: try: text = input() generate_fast(synth, text) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' log(leave) generate_fast(synth, leave) sleep(2) break
def run_live(args, checkpoint_path, hparams): #Log to Terminal without keeping any records in files log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) #Generate fast greeting message greetings = 'Hello, Welcome to the Live testing tool. Please type a message and I will try to read it!' log(greetings) generate_fast(synth, greetings) #Interaction loop while True: try: text = input() generate_fast(synth, text) except KeyboardInterrupt: leave = 'Thank you for testing our features. see you soon.' log(leave) generate_fast(synth, leave) sleep(2) break
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) delta_size = hparams.tacotron_synthesis_batch_size if hparams.tacotron_synthesis_batch_size < len(sentences) else len(sentences) batch_sentences = [sentences[i: i+hparams.tacotron_synthesis_batch_size] for i in range(0, len(sentences), delta_size)] start = time.time() for i, batch in enumerate(tqdm(batch_sentences)): audio.save_wav(synth.eval(batch), os.path.join(log_dir, 'wavs', 'eval_batch_{:03}.wav'.format(i)), hparams) log('\nGenerated batches of size {} in {:.3f} sec'.format(delta_size, time.time() - start)) return eval_dir
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams, gta=GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format(len(metadata), hours)) log('starting synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: for i, meta in enumerate(tqdm(metadata)): text = meta[5] mel_filename = os.path.join(mel_dir, meta[1]) wav_filename = os.path.join(wav_dir, meta[0]) mel_output_filename = synth.synthesize(text, i+1, synth_dir, None, mel_filename) file.write('{}|{}|{}|{}\n'.format(wav_filename, mel_filename, mel_output_filename, text)) log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
def embedding_synthesize(args, hparams, checkpoint, ppgs=None, speakers=None): output_dir = args.output_dir try: checkpoint_path = tf.train.get_checkpoint_state(checkpoint).model_checkpoint_path # checkpoint_path = '/home/zhaoxt20/vae_tac_myself/exp_multi_2020.4.1_2DPPgs+ref_same_speaker_dif_sentence/pretrained_model/tacotron_model.ckpt-45000' log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint)) if hparams.tacotron_synthesis_batch_size < hparams.tacotron_num_gpus: raise ValueError( 'Defined synthesis batch size {} is smaller than minimum required {} (num_gpus)! Please verify your synthesis batch size choice.'.format( hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) if hparams.tacotron_synthesis_batch_size % hparams.tacotron_num_gpus != 0: raise ValueError( 'Defined synthesis batch size {} is not a multiple of {} (num_gpus)! Please verify your synthesis batch size choice!'.format( hparams.tacotron_synthesis_batch_size, hparams.tacotron_num_gpus)) synth = Synthesizer() synth.load(checkpoint_path, hparams, reference_mels=True) return run_eval(args, checkpoint_path, output_dir, hparams,synth)
def tacotron_synthesize(sentences): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # ignore warnings https://stackoverflow.com/questions/47068709/ output_dir = 'A' checkpoint_path = tf.train.get_checkpoint_state('trained_model').model_checkpoint_path print('####### checkpoint_path', checkpoint_path) synth = Synthesizer() synth.load(checkpoint_path) os.makedirs(output_dir, exist_ok=True) for i, text in enumerate(sentences): synth.synthesize(text, i + 1, output_dir, None) print('Results at: {}'.format(output_dir))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences, flag_to_wav=False, checkpoint_eal=None, flag_check=False, cmu_dict=None): # import pdb # pdb.set_trace() # sentences = sentences[:3] log(hparams_debug_string()) # use the correct synthesizer for the model type if args.variant not in ['tacotron_orig', 'tacotron_bk2orig']: cfg = Configuration(hparams.sample_rate, hparams.pml_dimension) synth = PMLSynthesizer(cfg) else: synth = Synthesizer() synth.load(checkpoint_path, hparams, model_name=args.variant, checkpoint_eal=checkpoint_eal, flag_online=args.online) if hparams.use_cmudict: sentences = sentences_2_phones(sentences, cmu_dict) # import pdb; pdb.set_trace() # with open('/home/dawna/tts/qd212/models/tacotron/tests/sentences_asup.txt','w') as f: # f.write("\n".join(sentences)) # pdb.set_trace() if flag_check: _eval_check(synth, args, checkpoint_path, output_dir, hparams, sentences, flag_to_wav, checkpoint_eal) else: _eval_tgt(synth, args, checkpoint_path, output_dir, hparams, sentences, flag_to_wav, checkpoint_eal) return
def run_eval(args, checkpoint_path, output_dir, hparams, text, step, cwd): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist #os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) #os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) log('Starting Synthesis') synth.synthesize(text, step, eval_dir, log_dir, None, cwd) log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def get_style_embeddings(args, checkpoint_path, output_dir, hparams): emb_dir = os.path.join(output_dir, 'embeddings') os.makedirs(emb_dir, exist_ok=True) meta_path = os.path.join(emb_dir, 'meta.tsv') emb_emt_path = os.path.join(emb_dir, 'emb_emt.tsv') emb_spk_path = os.path.join(emb_dir, 'emb_spk.tsv') with open(args.train_filename, encoding='utf-8') as f: metadata = [ line.strip().split('|') for line in f if not (line.startswith('#')) ] df_meta = get_metadata_df(args.train_filename, args) spk_ids = df_meta.spk_label.unique() spk_ids_chosen = np.sort(np.random.choice(spk_ids, args.n_spk)) #make sure first user is in embeddings (zo - the one with emotions) # if not(0 in spk_ids_chosen): # spk_ids_chosen = np.sort(np.append(spk_ids_chosen,0)) # if args.unpaired: # chosen_idx = [] # for id in spk_ids_chosen: # spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id] # chosen_idxs = np.random.choice(spk_rows.index.values, args.n_per_spk) # for idx in chosen_idxs: # row = df_meta # for i in range(4): # if i ==0: # # # df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))] # # mel_filenames = [os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename) for idx, row in # df_meta_chosen.iterrows()] # # # texts = list(df_meta_chosen.text) chosen_idx = [] for id in spk_ids_chosen: spk_rows = df_meta[df_meta.loc[:, 'spk_label'] == id] # if id ==0: # for emt in range(4): # emt_rows = spk_rows[spk_rows.loc[:, 'emt_label'] == emt] # chosen_idx += list(np.random.choice(emt_rows.index.values, args.n_emt)) # else: chosen_idx += list( np.random.choice(spk_rows.index.values, args.n_per_spk)) df_meta_chosen = df_meta.iloc[np.array(sorted(chosen_idx))] mel_filenames = [ os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename) for idx, row in df_meta_chosen.iterrows() ] texts = list(df_meta_chosen.text) synth = Synthesizer() synth.load(args, checkpoint_path, hparams) print("getting embedding for {} samples".format(len(mel_filenames))) emb_emt, emb_spk, emb_mo_emt, emb_mo_spk, emb_cont_emt = synth.synthesize( texts, None, None, None, mel_filenames, mel_ref_filenames_emt=mel_filenames, mel_ref_filenames_spk=mel_filenames, emb_only=True) #SAVE META + EMBEDDING CSVS columns_to_keep = [ 'dataset', 'mel_filename', 'mel_frames', 'emt_label', 'spk_label', 'basename', 'sex' ] df = df_meta_chosen.loc[:, columns_to_keep] df['real'] = 'real' df_synth = df.copy() df_synth['real'] = 'synth' df = pd.concat([df, df_synth]) df.to_csv(meta_path, sep='\t', index=False) # if args.emt_attn: # emb_emt = np.vstack((emb_emt, emb_mo_emt)) emb_spk = np.vstack((emb_spk, emb_mo_spk)) # pd.DataFrame(emb_emt).to_csv(emb_emt_path,sep='\t',index=False, header=False) pd.DataFrame(emb_spk).to_csv(emb_spk_path, sep='\t', index=False, header=False) print(len(emb_emt)) print(emb_emt.shape)
parser.add_argument('--host', default="localhost", help='Host of Http service') parser.add_argument( '--name', help='Name of logging directory if the two models were trained together.') args = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['CUDA_VISIBLE_DEVICES'] = '0' checkpoint = os.path.join('logs-Tacotron', 'taco_' + args.checkpoint) try: checkpoint_path = tf.train.get_checkpoint_state( checkpoint).model_checkpoint_path log('loaded model at {}'.format(checkpoint_path)) except: raise RuntimeError('Failed to load checkpoint at {}'.format(checkpoint)) synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(checkpoint_path, modified_hp) class Res: def on_get(self, req, res): res.body = html_body res.content_type = "text/html" class Syn: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() log('Synthesize {}'.format(p(req.params.get('text'))))
parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', default='pretrained/', help='Path to model checkpoint') parser.add_argument( '--hparams', default='', help= 'Hyperparameter overrides as a comma-separated list of name=value pairs') parser.add_argument('--port', default=9000, help='Port of Http service') parser.add_argument('--host', default="localhost", help='Host of Http service') parser.add_argument( '--name', help='Name of logging directory if the two models were trained together.') args = parser.parse_args() synth = Synthesizer() modified_hp = hparams.parse(args.hparams) synth.load(args.checkpoint, modified_hp) class Res: def on_get(self, req, res): res.body = html_body res.content_type = "text/html" class Syn: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() res.data = synth.eval(p(req.params.get('text')))
from tacotron.hparams_emmm import hparams, hparams_debug_string from tacotron.infolog import log from tacotron.synthesizer import Synthesizer from tqdm import tqdm from pypinyin import pinyin, Style checkpoint_path = os.path.join('taco_model2','tacotron_model.ckpt-100000') output_dir = os.path.join('taco_output','org') eval_dir = output_dir log_dir = os.path.join(output_dir, 'logs-eval') #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) from asr_model.model_vgg_ctc import SpeechModel import os import platform as plat #下面这个分配真的是无效的,我也不知道该咋使用才行,脑壳疼呢, #而且两个模型同时部署总是会有点麻烦,打扰了,想放在cpu上也放不到,谁有能力谁搞吧, import keras.backend.tensorflow_backend as KTF config = tf.ConfigProto() config.gpu_options.allow_growth=True session = tf.Session(config=config) KTF.set_session(session)
def run_synthesis(args, checkpoint_path, output_dir, hparams): GTA = (args.GTA == 'True') if GTA: synth_dir = os.path.join(output_dir, 'gta') log_dir = os.path.join(output_dir, 'logs-gta') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) else: synth_dir = os.path.join(output_dir, 'natural') log_dir = os.path.join(output_dir, 'logs-natural') if args.modify_vae_dim is not None: synth_dir += '-modify' log_dir += '-modify' modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',') ] if args.modify_vae_dim else None #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) metadata_filename = os.path.join(args.input_dir, 'train.txt') log(hparams_debug_string()) synth = Synthesizer() if GTA or args.modify_vae_dim is None: synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='auto') else: synth.load(checkpoint_path, hparams, gta=GTA, vae_code_mode='modify') with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[4]) for x in metadata]) * frame_shift_ms / (3600) log('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) #Set inputs batch wise metadata = [ metadata[i:i + hparams.tacotron_synthesis_batch_size] for i in range(0, len(metadata), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') mel_dir = os.path.join(args.input_dir, 'mels') wav_dir = os.path.join(args.input_dir, 'audio') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: trange = tqdm(metadata) for i, meta in enumerate(trange): if GTA or args.modify_vae_dim is None: texts = [m[5] for m in meta] mel_filenames = [os.path.join(mel_dir, m[1]) for m in meta] wav_filenames = [os.path.join(wav_dir, m[0]) for m in meta] basenames = [ os.path.basename(m).replace('.npy', '').replace('mel-', '') for m in mel_filenames ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, synth_dir, log_dir, mel_filenames) log_dir = None #save plots and wavs for the first batch only, for human inspection for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join([str(x) for x in elems]) + '\n') else: scales = [-2, -1, 0, 1, 2] for dim in modify_vae_dim: for scale in scales: texts = [m[5] for m in meta] mel_filenames = [ os.path.join(mel_dir, m[1]) for m in meta ] wav_filenames = [ os.path.join(wav_dir, m[0]) for m in meta ] basenames = [ 'dim_{}-'.format(dim) + os.path.basename(m).replace('.npy', '').replace( 'mel-', '') + '-mu+({}*sigma)'.format(scale) for m in mel_filenames ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, synth_dir, log_dir, mel_filenames, dim, scale) trange.set_postfix({ 'modified_dim': dim, 'value': 'mu+({}*sigma)'.format(scale) }) trange.update(1 / len(scales) / len(modify_vae_dim) * len(trange)) trange.refresh() for elems in zip(wav_filenames, mel_filenames, mel_output_filenames, speaker_ids, texts): file.write('|'.join( [str(x) for x in elems + (dim, scale)]) + '\n') break #synthesize spectrograms for the first batch only, for human inspection log('synthesized mel spectrograms at {}'.format(synth_dir)) return os.path.join(synth_dir, 'map.txt')
class UIResource: def on_get(self, req, res): res.content_type = 'text/html' res.body = html_body class SynthesisResource: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() res.data = synthesizer.synthesize(req.params.get('text')) res.content_type = 'audio/wav' synthesizer = Synthesizer() api = falcon.API() api.add_route('/synthesize', SynthesisResource()) api.add_route('/', UIResource()) if __name__ == '__main__': from wsgiref import simple_server parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint') parser.add_argument('--port', type=int, default=9000) parser.add_argument( '--hparams', default='', help=
def synthesize_random(args, checkpoint_path, output_dir, hparams, model_suffix): n_emt = 4 if not (args.paired) else 1 n_txts_per_emotion = 5 if not (args.paired) else 10 synth_dir = os.path.join(output_dir, 'random', model_suffix, time_string()) os.makedirs(synth_dir, exist_ok=True) synth = Synthesizer() synth.load(args, checkpoint_path, hparams) meta_save_path = os.path.join(synth_dir, 'meta.csv') df = pd.read_csv( r'C:\Users\t-mawhit\Documents\code\Tacotron-2\data\zo_jessa_train_test.csv' ) df_train = df[df.train_test == 'train'] df_test = df[df.train_test == 'test'] #synthesize 20 random samples from zo and jessa, 5 in each emotion #change emotion df_test_zo = df_test[df_test.dataset == 'emt4'] df_test_jessa = df_test[df_test.dataset == 'jessa'] df_test_use = df_test_jessa if not ( args.zo) else df_test_zo[df_test_zo.emt_label == 0] np.random.seed(2) chosen_texts_idxs = np.random.choice(df_test_use.index, n_txts_per_emotion * n_emt, replace=False) df_test_use_texts_rows = df_test_use.loc[chosen_texts_idxs] meta = df_test_use_texts_rows.copy() meta['basename'] = '' idx = 0 texts = [] mel_filenames = [] mel_ref_filenames_emt = [] mel_ref_filenames_spk = [] basenames = [] basenames_refs = [] emt_labels = [] spk_labels = [] for i in range(n_emt): df_test_zo_emt = df_test_zo[df_test_zo.emt_label == i] for j in range(n_txts_per_emotion): row = df_test_use_texts_rows.iloc[idx] texts.append(row.text) mel_filenames.append( os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename)) if args.paired: mel_ref_filenames_spk.append( os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename)) mel_ref_filenames_emt.append( os.path.join(args.input_dir, row.dataset, 'mels', row.mel_filename)) else: row_spk = df_test_use.loc[np.random.choice(df_test_use.index)] mel_ref_filenames_spk.append( os.path.join(args.input_dir, row_spk.dataset, 'mels', row_spk.mel_filename)) row_emt = df_test_zo_emt.loc[np.random.choice( df_test_zo_emt.index)] mel_ref_filenames_emt.append( os.path.join(args.input_dir, row_emt.dataset, 'mels', row_emt.mel_filename)) basename = '{}'.format(row.basename.split('.')[0]) basename_ref = 'e{}'.format(i) basenames.append(basename) basenames_refs.append(basename_ref) emt_label = row_emt.emt_label if not ( args.paired) else row.emt_label spk_label = row_spk.spk_label if not ( args.paired) else row.spk_label emt_labels.append(int(emt_label)) spk_labels.append(int(spk_label)) meta.iloc[idx, 8] = emt_label meta.iloc[idx, 9] = spk_label meta.iloc[idx, 10] = 'mel-{}_{}.npy'.format(basename, basename_ref) idx += 1 meta.to_csv(meta_save_path, index=False) print('Starting Synthesis on {} samples'.format(len(mel_filenames))) synth.synthesize(texts, basenames, synth_dir, synth_dir, mel_filenames, basenames_refs=basenames_refs, mel_ref_filenames_emt=mel_ref_filenames_emt, mel_ref_filenames_spk=mel_ref_filenames_spk, emt_labels_synth=emt_labels, spk_labels_synth=spk_labels)
import argparse from tacotron.utils import makedirs, str2bool from tacotron.synthesizer import Synthesizer if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--load_path', required=True) parser.add_argument('--sample_path', default="samples") parser.add_argument('--text', required=True) parser.add_argument('--num_speakers', default=1, type=int) parser.add_argument('--speaker_id', default=0, type=int) parser.add_argument('--checkpoint_step', default=None, type=int) parser.add_argument('--is_korean', default=True, type=str2bool) config = parser.parse_args() makedirs(config.sample_path) synthesizer = Synthesizer() synthesizer.load(config.load_path, config.num_speakers, config.checkpoint_step) audio = synthesizer.synthesize(texts=[config.text], base_path=config.sample_path, speaker_ids=[config.speaker_id], attention_trim=False, isKorean=config.is_korean)[0]
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.modify_vae_dim is not None: eval_dir += '-modify' log_dir += '-modify' modify_vae_dim = [int(dim) for dim in args.modify_vae_dim.split(',') ] if args.modify_vae_dim else None if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) #Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() if args.reference_mel is not None and args.modify_vae_dim is None: synth.load(checkpoint_path, hparams, vae_code_mode='auto') elif args.reference_mel is not None and args.modify_vae_dim is not None: synth.load(checkpoint_path, hparams, vae_code_mode='modify') else: synth.load(checkpoint_path, hparams, vae_code_mode='feed') #Set inputs batch wise sentences = [ sentences[i:i + hparams.tacotron_synthesis_batch_size] for i in range( 0, len(sentences), hparams.tacotron_synthesis_batch_size) ] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: trange = tqdm(sentences) for i, texts in enumerate(trange): if args.modify_vae_dim is None: start = time.time() basenames = [ 'batch_{}_sentence_{}'.format(i, j) for j in range(len(texts)) ] if args.reference_mel is not None: mel_filenames = [ args.reference_mel for j in range(len(texts)) ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, mel_filenames) else: mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, None) log_dir = None #save plots and wavs for the first batch only, for human inspection for elems in zip(texts, mel_output_filenames, speaker_ids): file.write('|'.join([str(x) for x in elems]) + '\n') else: scales = [-2, -1, 0, 1, 2] for dim in modify_vae_dim: for scale in scales: start = time.time() basenames = [ 'dim_{}_batch_{}_sentence_{}_mu+({}*sigma)'.format( dim, i, j, scale) for j in range(len(texts)) ] if args.reference_mel is not None: mel_filenames = [ args.reference_mel for j in range(len(texts)) ] mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, mel_filenames, dim, scale) else: mel_output_filenames, speaker_ids = synth.synthesize( texts, basenames, eval_dir, log_dir, None, dim, scale) trange.set_postfix({ 'modified_dim': dim, 'value': 'mu+({}*sigma)'.format(scale) }) trange.update(1 / len(scales) / len(modify_vae_dim)) trange.refresh() for elems in zip(texts, mel_output_filenames, speaker_ids): file.write('|'.join( [str(x) for x in elems + (dim, scale)]) + '\n') log_dir = None #save plots and wavs for the first batch only, for human inspection log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
import argparse parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', required=False, help='Full path to model checkpoint', default="tacotron/tmp/tacotron-20180906/model.ckpt") parser.add_argument('--text', required=False, help='Text to synthesize', default="Hello World") parser.add_argument('--output', required=False, help='File path of output', default="HelloWorld.wav") args = parser.parse_args() checkpoint = str(args.checkpoint) text = str(args.text) output = str(args.output) print("Checkpoint: " + checkpoint) print("Text: " + text) print("Output: " + output) print("") print("Loading model...") synthesizer = Synthesizer() synthesizer.load(checkpoint) print("Loading model completed!") print("") print("Sythesizing text...") with open(output, 'wb') as file: file.write(synthesizer.synthesize(text)) print("Sythesizing text completed!") print("")
def run_synthesis_multiple(args, checkpoint_path, output_dir, hparams, model_suffix): n_spk_per_accent = 2 n_text_per_spk = 5 synth_dir = os.path.join(output_dir, 'wavs', model_suffix, time_string()) os.makedirs(synth_dir, exist_ok=True) synth = Synthesizer() synth.load(args, checkpoint_path, hparams) with open(args.train_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] if args.remove_long_samps: len_before = len(metadata) metadata = [ f for f in metadata if not (f[10].endswith('_023.wav')) ] metadata = [ f for f in metadata if not (f[10].endswith('_021.wav')) ] metadata = [f for f in metadata if int(f[6]) < 500] print("Removed Long Samples - before: {}, after: {}".format( len_before, len(metadata))) #only synthesize long samples metadata = [f for f in metadata if int(f[6]) > 200] frame_shift_ms = hparams.hop_size / hparams.sample_rate hours = sum([int(x[6]) for x in metadata]) * frame_shift_ms / (3600) print('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) df = pd.DataFrame(metadata, columns=[ 'dataset', 'audio_filename', 'mel_filename', 'linear_filename', 'spk_emb_filename', 'time_steps', 'mel_frames', 'text', 'emt_label', 'spk_label', 'basename', 'sex' ]) chosen_accents = ['0', '3'] assert (len(chosen_accents) <= 2) acc_names = [ 'American', 'Australian', 'Canadian', 'English', 'Indian', 'Irish', 'NewZealand', 'NorthernIrish', 'Scottish', 'SouthAfrican', 'Welsh' ] df_acc = df[df['emt_label'].isin(chosen_accents)] # spk_idxs = sorted(frozenset(df_acc['spk_label'].unique())) texts = [] mel_filenames = [] mel_ref_filenames_emt = [] mel_ref_filenames_spk = [] basenames = [] basenames_refs = [] for i, acc in enumerate(chosen_accents): df_acc_spks = df_acc[df_acc['emt_label'] == acc]['spk_label'].unique() chosen_spks = np.random.choice(df_acc_spks, n_spk_per_accent, replace=False) for spk in chosen_spks: df_spk = df_acc[df_acc['spk_label'] == spk] idxs = np.random.choice(df_spk.index, n_text_per_spk, replace=False) for idx in idxs: # for j in range(5): for acc_ref in chosen_accents: texts.append(df_acc.loc[idx].text) mel_filename = os.path.join(args.input_dir, df_acc.loc[idx].dataset, 'mels', df_acc.loc[idx].mel_filename) mel_filenames.append(mel_filename) mel_ref_filenames_spk.append(mel_filename) basenames.append('{}_{}_{}'.format( df_acc.loc[idx].basename.split('.')[0], acc_names[int(acc)][:2], df_acc.loc[idx].sex)) df_other_acc = df_acc[df_acc['emt_label'] == acc_ref] row = df_other_acc.loc[np.random.choice( df_other_acc.index, 1)] mel_ref_filenames_emt.append( os.path.join(args.input_dir, row.dataset.iloc[0], 'mels', row.mel_filename.iloc[0])) basenames_refs.append('{}'.format(acc_names[int( row.emt_label)][:2])) #,j)) if args.flip_spk_emt: mel_ref_filenames_emt_tmp = mel_ref_filenames_emt mel_ref_filenames_emt = mel_ref_filenames_spk mel_ref_filenames_spk = mel_ref_filenames_emt_tmp print('Starting Synthesis on {} samples'.format( len(mel_filenames) // len(chosen_accents))) synth.synthesize(texts, basenames, synth_dir, synth_dir, mel_filenames, basenames_refs=basenames_refs, mel_ref_filenames_emt=mel_ref_filenames_emt, mel_ref_filenames_spk=mel_ref_filenames_spk)