def run_eval(args): if not args.ckpt_path: run_name = args.name or args.model log_dir = os.path.join(args.base_dir, 'logs-%s-%s' % (run_name, args.description)) print( "Trying to restore saved checkpoints from {} ...".format(log_dir)) ckpt = get_checkpoint_state(log_dir) if ckpt: print("Checkpoint found: {}".format(ckpt.model_checkpoint_path)) ckpt_path = ckpt.model_checkpoint_path else: print('no model found') raise else: ckpt_path = args.ckpt_path print(hparams_debug_string()) synth = Synthesizer() synth.load(ckpt_path) base_path = get_output_base_path(ckpt_path) os.makedirs(base_path, exist_ok=True) for i, text in enumerate(sentences): text = re.sub("[A-Za-z0-9\!\%\[\]\,\,\。\…\:\“\”]", "", text) text = text.strip() path = os.path.join(base_path, '%d-identity-%d-%s.wav' % (i, args.identity, text)) path_alignment = os.path.join( base_path, '%d-identity-%d.png' % (i, args.identity)) print('Synthesizing: %s' % path) synth.synthesize(text, args.identity, path, path_alignment)
def run_eval(args): print(hparams_debug_string()) load_paths = glob(args.load_path_pattern) for load_path in load_paths: if not os.path.exists(os.path.join(load_path, "checkpoint")): print(" [!] Skip non model directory: {}".format(load_path)) continue synth = Synthesizer() synth.load(load_path) for speaker_id in range(synth.num_speakers): base_path = get_output_base_path(load_path, "eval-{}".format(speaker_id)) inputs, input_lengths = create_batch_inputs_from_texts(texts) for idx in range(math.ceil(len(inputs) / args.batch_size)): start_idx, end_idx = idx*args.batch_size, (idx+1)*args.batch_size cur_texts = texts[start_idx:end_idx] cur_inputs = inputs[start_idx:end_idx] synth.synthesize( texts=cur_texts, speaker_ids=[speaker_id] * len(cur_texts), tokens=cur_inputs, base_path="{}-{}".format(base_path, idx), manual_attention_mode=args.manual_attention_mode, base_alignment_path=args.base_alignment_path, ) synth.close()
def run_eval(args): print(hparams_debug_string()) is_teacher_force = False mel_targets = args.mel_targets if args.mel_targets is not None: is_teacher_force = True mel_targets = np.load(args.mel_targets) synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): if args.text is not None: path = '%s-eval.wav' % (base_path) print('Synthesizing: %s' % path) reference_mel = args.reference_mel if reference_mel is not None: reference_mel = np.load(args.reference_mel) else: print("TODO: add style weights when there is no reference mel") raise with open(path, 'wb') as f: f.write( synth.synthesize(args.text, mel_targets=mel_targets, reference_mel=reference_mel)) break else: path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): synth = Synthesizer(args.use_gta, args.use_ref, args.use_att, args.model_name) synth.load(args.ckpt_path) batch_size = args.batch_size num_batches = len(args.batch_texts) use_att, use_ref, use_gta = args.use_att, args.use_ref, args.use_gta gta_mels_lens = args.gta_inputs if args.use_gta else [None, None] emos = args.ref_inputs if args.use_ref else args.emo_weights # 优先使用ref mel emos_info = args.ref_names if args.use_ref else args.emo_strs emos, emos_info = [emos, emos_info] if emos else [[None], [None]] def print_infos(): print(f'\nLoading checkpoint: {args.ckpt_path}') print('\nSynthesis Infos:\n ', end='') print(f'use_att={use_att}', f'use_ref={use_ref}', f'use_gta={use_gta}', f'num_batches={num_batches}', f'batch_size={batch_size}', f'num_texts={len(args.texts)}', f'emo_infos={emos_info}', f'model={args.model_name}', f'output_dir={args.output_dir}', sep='\n ') print_infos() for emo, emo_info in zip(emos, emos_info): print(f'\nSynthesizing with emo info: {emo_info} ...') for i in range(num_batches): batch_seq = args.batch_seqs[i] batch_text = args.batch_texts[i] if os.path.isfile(emo_info): emo_info = os.path.splitext( os.path.basename(emo_info) )[0] # when mel as ref audio, only preserve the mel file name batch_name = [n.format(emo_info) for n in args.batch_names[i]] print(f' Synthesizing {i + 1}th batch with sentences:', end='') print('', *batch_text, sep='\n ') call_fn_kwargs = { 'mel_inputs': gta_mels_lens[0], 'mel_lengths': gta_mels_lens[1] } if emo is not None: emo = emo if isinstance(emo, (tuple, list)) else [emo] emo = [x[:len(batch_text)] for x in emo] if args.use_ref: call_fn_kwargs.update(ref_inputs=emo[0], ref_lengths=emo[1]) elif args.use_att: if args.model_name in ['sygst', 'emogst']: call_fn_kwargs.update(atten_weights=emo[0]) elif args.model_name == 'embgst': call_fn_kwargs.update(aro_weights=emo[0], val_weights=emo[1]) synth.synthesize(batch_seq, batch_text, batch_name, **call_fn_kwargs)
def __run_eval(args): synth = Synthesizer() synth.load(args.checkpoint, os.path.join(args.base_dir, args.vgg19_path)) output_wav_dir = __get_output_wav_path(args) print('Synthesizing: %s' % output_wav_dir) synth.synthesize(args.image_path, output_wav_dir) print('Done testing! Check the {} folder for samples'.format(output_wav_dir))
def run_eval(args, checkpoint_path): print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path) for i, text in enumerate(sentences): start = time.time() synth.synthesize(text, i, args.output_dir) print('synthesized sentence n°{} in {:.3f} sec'.format( i + 1, time.time() - start))
class TextToSpeech(): def __init__(self): print(hparams_debug_string()) self.synth = Synthesizer() self.synth.load('./logs-tacotron/model.ckpt-52000') def speech_connect(self, *speeches): print('---------0----------') result = AudioSegment.silent(duration=100) for speech in speeches: if type(speech) == str: print('synthesize: ', speech) syllables = lazy_pinyin(speech, style=pypinyin.TONE2) print('---------1 ', speech, '----------') syllables = text2pinyin(syllables) text = ' '.join(syllables) bytewav = self.synth.synthesize(text) result += AudioSegment.from_file(bytewav, format='wav') elif type(speech) == AudioSegment: print('--------- audio----------') result += self.cutspeech(speech) return result def cutspeech(self, song1): not_silence_ranges = detect_nonsilent(song1, min_silence_len=100, silence_thresh=-32) starti = not_silence_ranges[0][0] if len(not_silence_ranges) == 0: return song1 endi = not_silence_ranges[-1][1] return song1[starti:endi]
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Set inputs batch wise sentences = [sentences[i: i + hparams.synthesis_batch_size] for i in range(0, len(sentences), hparams.synthesis_batch_size)] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): basenames = ['batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))] mel_filenames = synth.synthesize(texts, basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
class WavGenerator(object): def __init__(self, song_id, organ_registry, rtttl, effects=['none']): self.song_id = song_id self.registry = organ_registry self.effects = effects self.rtttl_parser = RtttlParser(rtttl) self.synthesizer = Synthesizer(self.rtttl_parser.interpret(), organ_registry) self.effects_processor = EffectsProcessor( self.synthesizer.synthesize(), effects) def save(self): filename = os.path.abspath( os.path.join( os.path.dirname(__file__), '../storage/wave_files/' + md5( str(self.song_id) + self.registry + list_to_csv(self.effects)) + '.wav')) wav = wave.open(filename, 'w') wav.setparams((1, 2, 44100, 0, 'NONE', 'not compressed')) data = self.effects_processor.process() wav_data = '' for v in data: wav_data += pack('h', v) wav.writeframes(wav_data) wav.close()
def run_synthesis(args, checkpoint_path): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] hours = sum([int(x[1]) for x in metadata ]) * hparams.frame_shift_ms / (3600 * 1000) #Making sure we got all of it print('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) if args.GTA == True: synth_dir = os.path.join(args.output_dir, 'gta') else: synth_dir = os.path.join(args.output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) print('starting synthesis') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: file.write('"input"|"frames"|"target_mel"|"generated_mel"\n') for i, meta in enumerate(tqdm(metadata)): text = meta[2] mel_filename = os.path.join(args.input_dir, meta[0]) mel_output_filename = synth.synthesize(text, i + 1, synth_dir, mel_filename) file.write('"{}"|"{}"|"{}"|"{}"\n'.format(text, meta[1], mel_filename, mel_output_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
class Eval: def init(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse('') print(hparams_debug_string()) self.voice_choice = 1 # female default self.base_dir = os.getcwd() checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') self.output_path = os.path.join(self.base_dir, 'static', 'audio', 'output.wav') self.synth = Synthesizer() self.synth.load(checkpoint) def reload_checkpoint(self, voice_choice): if voice_choice == 1: checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') else: checkpoint = os.path.join(self.base_dir, 'california-12-logs', 'model.ckpt-112000') self.voice_choice = voice_choice print('Synthesizing: %s' % checkpoint) self.synth.reload(checkpoint) def text(self, text, voice_choice): print('voice changed to ', voice_choice) if self.voice_choice != voice_choice: self.reload_checkpoint(voice_choice) with open(self.output_path, 'wb') as f: f.write(self.synth.synthesize(text)) return os.path.basename(self.output_path)
def run_eval(args): print(hparams_debug_string()) is_teacher_force = False mel_targets = args.mel_targets reference_mel = None if args.mel_targets is not None: is_teacher_force = True mel_targets = np.load(args.mel_targets) synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.checkpoint, args.reference_audio) base_path = get_output_base_path(args.checkpoint) if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0]) else: if hparams.use_gst: print("*******************************") print("TODO: add style weights when there is no reference audio. Now we use random weights, " + "which may generate unintelligible audio sometimes.") print("*******************************") path = '%s_ref-randomWeight.wav' % (base_path) else: raise ValueError("You must set the reference audio if you don't want to use GSTs.") with open(path, 'wb') as f: print('Synthesizing: %s' % args.text) print('Output wav file: %s' % path) f.write(synth.synthesize(args.text, reference_mel=reference_mel))
def run_eval(args): #print(hparams_debug_string()) is_teacher_force = False reference_mel = None synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.model, args.reference) base_path = get_output_base_path(args.model) if args.reference is not None: ref_wav = audio.load_wav(args.reference) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T #path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference))[0]) path = 'ref-%s.wav' % (os.path.splitext(os.path.basename(args.reference))[0]) else: raise ValueError("You must set the reference audio.") with open('examples_test.txt', 'r') as fs: lines = fs.readlines() for i, line in enumerate(lines): args.text = line.strip().split('|')[-1] path_id = '%d_' %(i+6) new_path = path_id + path print('Synthesizing: %s' % args.text) print('Output wav file: %s' % new_path) with open(new_path, 'wb') as f: f.write(synth.synthesize(args.text, reference_mel=reference_mel))
def eval_text(texts, checkpoint, out_path): synth = Synthesizer() synth.load(checkpoint) for i, text in enumerate(texts): path = '%s/%d.wav' % (out_path, i) print('%d test case, write to %s' % (i, path)) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def bulk_synthesize(config): load_path = config.log_dir output_path = config.output_path checkpoint_paths = [ path for path in glob("{}/*.ckpt-*.data-*".format(load_path)) ] checkpoint_steps = sorted([ int(os.path.basename(path).split('-')[1].split('.')[0]) for path in checkpoint_paths ]) print(f'Loaded {len(checkpoint_steps)} check point steps from step \ {checkpoint_steps[0]} to step {checkpoint_steps[-1]}.') texts = [ 'ne kal ibam soud molen!', 'koun se mina lod belam', 'ne. kal. ibam. soud. molen.', 'koun. se. mina. lod. belam.' ] output_path = os.path.join( os.path.join(output_path, load_path.split('/')[-1]), datetime.now().strftime("%m_%d_%Y %Hh%Mm%Ss")) print(f'Outputting audio to {output_path}') if not os.path.exists(output_path): os.makedirs(output_path) synthesizer = Synthesizer() for checkpoint in tqdm(checkpoint_steps): for idx, text in enumerate(texts): prefix = f'{checkpoint}_{idx}' try: synthesizer.load(load_path, 1, checkpoint) except ValueError: tf.reset_default_graph() synthesizer.load(load_path, 1, checkpoint) synthesizer.synthesize(texts=text, base_path=output_path, speaker_ids=[0], attention_trim=True, isKorean=False, savePlot=False, file_name_prefix=prefix)[0]
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s-_%s_.wav' % (base_path, args.sentence) print('Synthesizing: %s - %s' % (path, args.sentence)) file_handle = open(path, 'wb') file_handle.write(synth.synthesize(args.sentence))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) with open( '{}-eval.wav'.format(args.ref.split('/')[-1].replace('.wav', '')), 'wb') as f: f.write(synth.synthesize(args.ref))
def run_eval(args, text): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s.wav' % (text) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path + "-" + str(int(time.time())), i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(checkpoint_path, output_dir, hparams, sentences): # Create output path if it doesn't exist os.makedirs(output_dir, exist_ok=True) os.makedirs(os.path.join(output_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(output_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Set inputs batch wise sentences = [ sentences[i:i + hparams.synthesis_batch_size] for i in range(0, len(sentences), hparams.synthesis_batch_size) ] log('Starting Synthesis') for i, texts in enumerate(tqdm(sentences)): basenames = ['{}_sentence_{}'.format(i, j) for j in range(len(texts))] synth.synthesize(texts, basenames, output_dir, None)
def run_eval(ckpt_dir): print(hparams_debug_string()) checkpoint = tf.train.get_checkpoint_state(ckpt_dir).model_checkpoint_path synth = Synthesizer() synth.load(checkpoint) base_path = get_output_base_path(checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: url='http://127.0.0.1:8080/get_sentence/'+text text=requests.get(url).text f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) wav = load_wav(args.reference_audio) mel = melspectrogram(wav).transpose() for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text, mel))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print(' ') print('[{:<10}]: {}'.format('processing', path)) wav, feature = synth.synthesize(text) sf.write(path, wav, 16000) np.save(path.replace('.wav', '.npy'), feature)
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) os.makedirs(base_path, exist_ok=True) for i, text in enumerate(sentences, 1): wavname = '%s-%04d.wav' % (os.path.basename(base_path), i) path = os.path.join(base_path, wavname) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text + '。。'))
def run_eval(args, sentences): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) txtpath = path.replace('.wav', '.txt') print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text)) with open(txtpath, 'w') as f: f.write('{}\n'.format(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) start = time.time() for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: # 合成并保存为wav文件 f.write(synth.synthesize(text)) print('cost_time: %.2f' % (time.time() - start))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): wav_path = '%s-%d.wav' % (base_path, i) align_path = '%s-%d.png' % (base_path, i) print('Synthesizing and plotting: %s' % wav_path) wav, alignment = synth.synthesize(text) with open(wav_path, 'wb') as f: f.write(wav) plot.plot_alignment(alignment, align_path, info='%s' % (text))
def main(args): synthesizer = Synthesizer() if args.t_checkpoint and args.v_checkpoint: synthesizer.load(args.t_checkpoint, args.v_checkpoint) else: t_model_path = os.path.join(PROJECT_PATH, 'models/upc_pau_tacotron2.pt') v_model_path = os.path.join(PROJECT_PATH, 'models/melgan_onapau_catotron.pt') synthesizer.load(t_model_path, v_model_path) audio = synthesizer.synthesize(args.text) with open(args.out, 'wb') as out: out.write(audio)