class Eval: def init(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse('') print(hparams_debug_string()) self.voice_choice = 1 # female default self.base_dir = os.getcwd() checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') self.output_path = os.path.join(self.base_dir, 'static', 'audio', 'output.wav') self.synth = Synthesizer() self.synth.load(checkpoint) def reload_checkpoint(self, voice_choice): if voice_choice == 1: checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') else: checkpoint = os.path.join(self.base_dir, 'california-12-logs', 'model.ckpt-112000') self.voice_choice = voice_choice print('Synthesizing: %s' % checkpoint) self.synth.reload(checkpoint) def text(self, text, voice_choice): print('voice changed to ', voice_choice) if self.voice_choice != voice_choice: self.reload_checkpoint(voice_choice) with open(self.output_path, 'wb') as f: f.write(self.synth.synthesize(text)) return os.path.basename(self.output_path)
def run_eval(args): #print(hparams_debug_string()) is_teacher_force = False reference_mel = None synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.model, args.reference) base_path = get_output_base_path(args.model) if args.reference is not None: ref_wav = audio.load_wav(args.reference) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T #path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference))[0]) path = 'ref-%s.wav' % (os.path.splitext(os.path.basename(args.reference))[0]) else: raise ValueError("You must set the reference audio.") with open('examples_test.txt', 'r') as fs: lines = fs.readlines() for i, line in enumerate(lines): args.text = line.strip().split('|')[-1] path_id = '%d_' %(i+6) new_path = path_id + path print('Synthesizing: %s' % args.text) print('Output wav file: %s' % new_path) with open(new_path, 'wb') as f: f.write(synth.synthesize(args.text, reference_mel=reference_mel))
class TextToSpeech(): def __init__(self): print(hparams_debug_string()) self.synth = Synthesizer() self.synth.load('./logs-tacotron/model.ckpt-52000') def speech_connect(self, *speeches): print('---------0----------') result = AudioSegment.silent(duration=100) for speech in speeches: if type(speech) == str: print('synthesize: ', speech) syllables = lazy_pinyin(speech, style=pypinyin.TONE2) print('---------1 ', speech, '----------') syllables = text2pinyin(syllables) text = ' '.join(syllables) bytewav = self.synth.synthesize(text) result += AudioSegment.from_file(bytewav, format='wav') elif type(speech) == AudioSegment: print('--------- audio----------') result += self.cutspeech(speech) return result def cutspeech(self, song1): not_silence_ranges = detect_nonsilent(song1, min_silence_len=100, silence_thresh=-32) starti = not_silence_ranges[0][0] if len(not_silence_ranges) == 0: return song1 endi = not_silence_ranges[-1][1] return song1[starti:endi]
def run_eval(args): print(hparams_debug_string()) is_teacher_force = False mel_targets = args.mel_targets reference_mel = None if args.mel_targets is not None: is_teacher_force = True mel_targets = np.load(args.mel_targets) synth = Synthesizer(teacher_forcing_generating=is_teacher_force) synth.load(args.checkpoint, args.reference_audio) base_path = get_output_base_path(args.checkpoint) if args.reference_audio is not None: ref_wav = audio.load_wav(args.reference_audio) reference_mel = audio.melspectrogram(ref_wav).astype(np.float32).T path = '%s_ref-%s.wav' % (base_path, os.path.splitext(os.path.basename(args.reference_audio))[0]) else: if hparams.use_gst: print("*******************************") print("TODO: add style weights when there is no reference audio. Now we use random weights, " + "which may generate unintelligible audio sometimes.") print("*******************************") path = '%s_ref-randomWeight.wav' % (base_path) else: raise ValueError("You must set the reference audio if you don't want to use GSTs.") with open(path, 'wb') as f: print('Synthesizing: %s' % args.text) print('Output wav file: %s' % path) f.write(synth.synthesize(args.text, reference_mel=reference_mel))
def play(chord): player = Player() player.open_stream() synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=0.4, use_osc2=False) player.play_wave(synthesizer.generate_chord(chord, 1))
def write_chord(f, note_freq, sound_leveler): writer = Writer() synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=sound_leveler(note_freq), use_osc2=False) wave = synthesizer.generate_chord([note_freq], 3.0) writer.write_wave(f, wave)
def run_synthesis(args, checkpoint_path): metadata_filename = os.path.join(args.input_dir, 'train.txt') print(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, gta=args.GTA) with open(metadata_filename, encoding='utf-8') as f: metadata = [line.strip().split('|') for line in f] hours = sum([int(x[1]) for x in metadata ]) * hparams.frame_shift_ms / (3600 * 1000) #Making sure we got all of it print('Loaded metadata for {} examples ({:.2f} hours)'.format( len(metadata), hours)) if args.GTA == True: synth_dir = os.path.join(args.output_dir, 'gta') else: synth_dir = os.path.join(args.output_dir, 'natural') #Create output path if it doesn't exist os.makedirs(synth_dir, exist_ok=True) print('starting synthesis') with open(os.path.join(synth_dir, 'map.txt'), 'w') as file: file.write('"input"|"frames"|"target_mel"|"generated_mel"\n') for i, meta in enumerate(tqdm(metadata)): text = meta[2] mel_filename = os.path.join(args.input_dir, meta[0]) mel_output_filename = synth.synthesize(text, i + 1, synth_dir, mel_filename) file.write('"{}"|"{}"|"{}"|"{}"\n'.format(text, meta[1], mel_filename, mel_output_filename)) print('synthesized mel spectrograms at {}'.format(synth_dir))
def run_eval(args, checkpoint_path, output_dir, hparams, sentences): eval_dir = os.path.join(output_dir, 'eval') log_dir = os.path.join(output_dir, 'logs-eval') if args.model == 'Tacotron-2': assert os.path.normpath(eval_dir) == os.path.normpath(args.mels_dir) # Create output path if it doesn't exist os.makedirs(eval_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True) os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True) os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True) log(hparams_debug_string()) synth = Synthesizer() synth.load(checkpoint_path, hparams) # Set inputs batch wise sentences = [sentences[i: i + hparams.synthesis_batch_size] for i in range(0, len(sentences), hparams.synthesis_batch_size)] log('Starting Synthesis') with open(os.path.join(eval_dir, 'map.txt'), 'w') as file: for i, texts in enumerate(tqdm(sentences)): basenames = ['batch_{}_sentence_{}'.format(i, j) for j in range(len(texts))] mel_filenames = synth.synthesize(texts, basenames, eval_dir, log_dir, None) for elems in zip(texts, mel_filenames): file.write('|'.join([str(x) for x in elems]) + '\n') log('synthesized mel spectrograms at {}'.format(eval_dir)) return eval_dir
def load_model( model_path='/Users/sshaar/hackathon/frontend/theme/backend/models/tacotron-20180906/model.ckpt' ): model = Synthesizer() model.load(model_path) return model
def playVoice(notes): player = Player() player.open_stream() synthesizer = Synthesizer(osc1_waveform=Waveform.triangle, osc1_volume=1.0, use_osc2=False) for note in notes: player.play_wave(synthesizer.generate_constant_wave(synth(note), .5))
def play_synthe(code): synth = Synthesizer( osc1_waveform=Waveform.square, osc1_volume=0.2, ) freq = scales_freq[code - 65] player.play_wave(synth.generate_constant_wave(frequency=freq, length=0.1))
def worker(): player = Player() player.open_stream() synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=0.7, use_osc2=False) return player.play_wave( synthesizer.generate_constant_wave(areatone, 0.14))
def eval_text(texts, checkpoint, out_path): synth = Synthesizer() synth.load(checkpoint) for i, text in enumerate(texts): path = '%s/%d.wav' % (out_path, i) print('%d test case, write to %s' % (i, path)) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def __init__(self, pre=["C3", "E3", "G3"], post=["C3", "D3", "F3"]): self.pre = pre self.post = post self.player = Player() self.player.open_stream() self.synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=1.0, use_osc2=False)
def run_eval(args, text): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s.wav' % (text) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def __init__(self, path): Synthesizer.__init__(self, path) #self.timeout = 10 #self.user = environ.get('GOOGLE_USER_STT') #Do smth with it #self.passwd = environ.get('GOOGLE_PASS_STT') #Do smth with it self.client = speech.SpeechClient()
def __init__(self, song_id, organ_registry, rtttl, effects=['none']): self.song_id = song_id self.registry = organ_registry self.effects = effects self.rtttl_parser = RtttlParser(rtttl) self.synthesizer = Synthesizer(self.rtttl_parser.interpret(), organ_registry) self.effects_processor = EffectsProcessor( self.synthesizer.synthesize(), effects)
def run_eval(args): synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) path = '%s-_%s_.wav' % (base_path, args.sentence) print('Synthesizing: %s - %s' % (path, args.sentence)) file_handle = open(path, 'wb') file_handle.write(synth.synthesize(args.sentence))
def play_beat(t): player = Player() player.open_stream() synthesizer = Synthesizer(osc1_waveform=Waveform.sawtooth, osc1_volume=1.0, use_osc2=False) for i in range(int(t / RATE)): player.play_wave(synthesizer.generate_chord(['A3'], 0.1)) time.sleep(float(RATE - 0.1))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) with open( '{}-eval.wav'.format(args.ref.split('/')[-1].replace('.wav', '')), 'wb') as f: f.write(synth.synthesize(args.ref))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path + "-" + str(int(time.time())), i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def play_progression(progression): player = Player() player.open_stream() synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=10.0, use_osc2=False) for measure in progression: for chord in measure: player.play_wave( synthesizer.generate_chord(chord[0], abs(chord[1])))
def test_write_wave(): writer = Writer() synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=1.0, use_osc2=False) wave = synthesizer.generate_constant_wave(440.0, 3.0) writer.write_wave("./test_wave.wav", wave) ok_("write_wave() succeeded.")
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def test_sine_wave(): synthesizer = Synthesizer(osc1_waveform=Waveform.sine, osc1_volume=1.0, use_osc2=False, rate=RATE) wave = synthesizer.generate_constant_wave(440.0, 1.0) eq_(wave.size, RATE) assert_almost_equal(wave.max(), 1.0, places=3) assert_almost_equal(wave.min(), -1.0, places=3) assert_almost_equal(wave.mean(), 0.0, places=3)
def run_eval(ckpt_dir): print(hparams_debug_string()) checkpoint = tf.train.get_checkpoint_state(ckpt_dir).model_checkpoint_path synth = Synthesizer() synth.load(checkpoint) base_path = get_output_base_path(checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) wav = load_wav(args.reference_audio) mel = melspectrogram(wav).transpose() for i, text in enumerate(sentences): path = '%s-%d.wav' % (base_path, i) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text, mel))
def init(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse('') print(hparams_debug_string()) self.voice_choice = 1 # female default self.base_dir = os.getcwd() checkpoint = os.path.join(self.base_dir, 'LJlogs-tacotron', 'model.ckpt-40000') self.output_path = os.path.join(self.base_dir, 'static', 'audio', 'output.wav') self.synth = Synthesizer() self.synth.load(checkpoint)
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) os.makedirs(base_path, exist_ok=True) for i, text in enumerate(sentences, 1): wavname = '%s-%04d.wav' % (os.path.basename(base_path), i) path = os.path.join(base_path, wavname) print('Synthesizing: %s' % path) with open(path, 'wb') as f: f.write(synth.synthesize(text + '。。'))
def run_eval(args): print(hparams_debug_string()) synth = Synthesizer() synth.load(args.checkpoint) base_path = get_output_base_path(args.checkpoint) for i, text in enumerate(sentences): path = '%s-%03d.wav' % (base_path, i) print(' ') print('[{:<10}]: {}'.format('processing', path)) wav, feature = synth.synthesize(text) sf.write(path, wav, 16000) np.save(path.replace('.wav', '.npy'), feature)
class UIResource: def on_get(self, req, res): res.content_type = 'text/html' res.body = html_body class SynthesisResource: def on_get(self, req, res): if not req.params.get('text'): raise falcon.HTTPBadRequest() res.data = synthesizer.synthesize(req.params.get('text')) res.content_type = 'audio/wav' synthesizer = Synthesizer() api = falcon.API() api.add_route('/synthesize', SynthesisResource()) api.add_route('/', UIResource()) if __name__ == '__main__': from wsgiref import simple_server parser = argparse.ArgumentParser() parser.add_argument('--checkpoint', required=True, help='Full path to model checkpoint') parser.add_argument('--port', type=int, default=9000) parser.add_argument('--hparams', default='', help='Hyperparameter overrides as a comma-separated list of name=value pairs') args = parser.parse_args() os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' hparams.parse(args.hparams)