def _concatunits(self, utt, args): """ Concatenates units and produces waveform via residual excited LPC synthesis filter... """ unit_rel = utt.get_relation("Unit") #concat: unit_item = unit_rel.head_item lpctrack = copy.deepcopy( unit_item["selected_unit"]["candidate"]["lpc-coefs"]) residuals = window_residual( unit_item["selected_unit"]["candidate"]["lpc-coefs"], unit_item["selected_unit"]["candidate"]["residuals"]) unit_item = unit_item.next_item while unit_item is not None: temptrack = unit_item["selected_unit"]["candidate"]["lpc-coefs"] #append lpccoefs to lpctrack: lpctrack.times = np.concatenate( (lpctrack.times, (temptrack.times + lpctrack.times[-1]))) lpctrack.values = np.concatenate( (lpctrack.values, temptrack.values)) #append windowed residuals: residuals.extend( window_residual( temptrack, unit_item["selected_unit"]["candidate"]["residuals"])) unit_item = unit_item.next_item #overlap add residual: lastsample = int(round(lpctrack.times[-1] * SAMPLERATE)) + int( round(len(residuals[-1]) / 2)) residual = np.zeros(lastsample + 1) for i, time in enumerate(lpctrack.times): centersample = int(round(time * SAMPLERATE)) firstsample = centersample - int(len(residuals[i]) / 2) residual[firstsample:firstsample + len(residuals[i])] += np.array( residuals[i]) #synth filter: samples = synth_filter(lpctrack.times, lpctrack.values, residual.astype(np.float), SAMPLERATE) #save in utterance: w = Waveform() w.samplerate = SAMPLERATE w.samples = samples.astype("int16") #16bit samples w.channels = 1 utt["waveform"] = w return utt
def test(testfn="samples/data_001d.wav"): aud = Waveform(testfn) samples = zero_startends(aud.samples) hh = make_hpf() # mfreqz(h) # show() hpfsamples = filt(hh, samples) detsamples = detectsimple(hpfsamples) plot(samples) plot(np.abs(hpfsamples)) plot(detsamples / 1000.0) print(find_segs(detsamples)) show()
def get_wav(self): assert self.donesynth waveform = Waveform() waveform.samples = np.zeros(LIBHTS.HTS_Engine_get_nsamples( self.engine), np.int16) #16-bit samples waveform.samplerate = int( LIBHTS.HTS_Engine_get_sampling_frequency(self.engine)) waveform.channels = 1 for i in range(len(waveform.samples)): waveform.samples[i] = LIBHTS.HTS_Engine_get_generated_speech( self.engine, i) #copy return waveform
def from_textgrid(voice): """ Create aligned Utterances by synthesising to Segment level from the orthography and simply copying label end times into segment items as "end" feature. """ #Setup and create necessary dirs... CWD = os.getcwd() wav_dir = os.path.join(CWD, WAV_DIR) uttwav_dir = os.path.join(CWD, UTTWAV_DIR) transcr_location = os.path.join(CWD, ETC_DIR, TRANSCR_FILE) textgrid_dir = os.path.join(CWD, TEXTGRID_DIR) aligned_utts_dir = os.path.join(CWD, ALIGNED_UTT_DIR) os.makedirs(aligned_utts_dir) #update utts from textgrids... transcriptions = load_transcriptions_schemefile(transcr_location) alignments = sl.Corpus(textgrid_dir) ################# for sc_utt, uttname, wavfilename in zip( alignments.utterances, sorted(transcriptions), sorted(glob(os.path.join(uttwav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-segments') utt["file_id"] = uttname utt = transplant_segtime_info(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile( utt, os.path.join(aligned_utts_dir, ".".join([uttname, UTT_EXT])))
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ def copyuttfeats(u, u2): for relname in ["Word", "Syllable"]: items = u.gr(relname).as_list() items2 = u2.gr(relname).as_list() assert [i["name"] for i in items] == [i2["name"] for i2 in items2] for i, i2 in zip(items, items2): for k in i2: if not k in i: i[k] = i2[k] return u for sc_utt, uttname, wavfilename in zip( sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) utt2 = voice.synthesize(transcriptions[uttname], 'text-to-segments') try: utt = copyuttfeats(utt, utt2) except AssertionError: print("WARNING: could not copy item feats for %s" % utt["file_id"]) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def make_aligned_utts(voice, transcriptions, sc_corpus, wav_dir, output_dir): """ Make Word level utts and complete from 3-tier TextGrids... """ for sc_utt, uttname, wavfilename in zip( sc_corpus.utterances, sorted(transcriptions), sorted(glob(os.path.join(wav_dir, "*")))): assert sc_utt.name == uttname, "Utterance missmatch..." assert os.path.basename(wavfilename).startswith( uttname), "Utterance missmatch..." print("Synthesizing:", uttname) utt = voice.synthesize(transcriptions[uttname], 'text-to-words') utt["file_id"] = uttname utt = complete_utt_from_textgrid(voice, sc_utt, utt) #add waveform to utt: utt["waveform"] = Waveform(wavfilename) #save utt... ttslab.tofile(utt, os.path.join(output_dir, ".".join([uttname, UTT_EXT])))
def update_wordview(self): u = self.corpusview.current_utt words = u.get_relation("SylStructure").as_list() word = words[self.corpusview.current_wordindex] try: prevword = word.prev_item prevwordname = prevword["name"] origstartsample = u["waveform"].samplerate * prevword["start"] synthstartsample = u["lindists"]["utt"]["waveform"].samplerate * prevword["start"] prevwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex-1] except TypeError: prevwordname = "NONE" origstartsample = 0 synthstartsample = 0 prevwordpronun = "" wordname = word["name"] wordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex] try: nextword = word.next_item nextwordname = nextword["name"] origendsample = u["waveform"].samplerate * nextword["end"] synthendsample = u["lindists"]["utt"]["waveform"].samplerate * nextword["end"] nextwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex+1] except TypeError: nextwordname = "NONE" origendsample = len(u["waveform"].samples) synthendsample = len(u["waveform"].samples) nextwordpronun = "" self.label_word1.set_label(prevwordname) self.label_word2.set_label(wordname) self.label_word3.set_label(nextwordname) self.entry_word1.set_text(prevwordpronun) self.entry_word2.set_text(wordpronun) self.entry_word3.set_text(nextwordpronun) self.origwordcontextwav = Waveform() self.origwordcontextwav.samplerate = u["waveform"].samplerate self.origwordcontextwav.samples = u["waveform"].samples[origstartsample:origendsample] origwordcontext_specfig = Figure(dpi=72) origwordcontext_specplot = origwordcontext_specfig.add_subplot(111) origwordcontext_specplot.specgram(self.origwordcontextwav.samples, Fs=self.origwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.origwordcontextwav.samplerate*len(self.origwordcontextwav.samples))) origwordcontext_speccanvas = FigureCanvasGTK(origwordcontext_specfig) framecontents = self.frame_wordspecorig.get_children() if framecontents: self.frame_wordspecorig.remove(framecontents[0]) self.frame_wordspecorig.add(origwordcontext_speccanvas) self.synthwordcontextwav = Waveform() self.synthwordcontextwav.samplerate = u["lindists"]["utt"]["waveform"].samplerate self.synthwordcontextwav.samples = u["lindists"]["utt"]["waveform"].samples[synthstartsample:synthendsample] synthwordcontext_specfig = Figure(dpi=72) synthwordcontext_specplot = synthwordcontext_specfig.add_subplot(111) synthwordcontext_specplot.specgram(self.synthwordcontextwav.samples, Fs=self.synthwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.synthwordcontextwav.samplerate*len(self.synthwordcontextwav.samples))) synthwordcontext_speccanvas = FigureCanvasGTK(synthwordcontext_specfig) framecontents = self.frame_wordspecsynth.get_children() if framecontents: self.frame_wordspecsynth.remove(framecontents[0]) self.frame_wordspecsynth.add(synthwordcontext_speccanvas) self.statusbar.push(0, "Item: %s/%s (Word index: %s)" % (self.corpusview.current_index + 1, len(self.corpusview.worklist), self.corpusview.current_wordindex)) self.table_words.show_all()
class SpeechbrowserApp(object): def __init__(self, voice): builder = gtk.Builder() builder.add_from_file(os.path.join(os.path.dirname(__file__), "speechbrowser.glade")) builder.connect_signals({"on_window1_destroy": gtk.main_quit, "on_toolbutton_open_clicked": self.on_toolbutton_open_clicked, "on_button_playutt_clicked": self.on_button_playutt_clicked, "on_button_playwordorig_clicked": self.on_button_playwordorig_clicked, "on_button_playwordsynth_clicked": self.on_button_playwordsynth_clicked, "on_button_next_clicked": self.on_button_next_clicked, "on_button_prev_clicked": self.on_button_prev_clicked}) self.window1 = builder.get_object("window1") self.frame_specutt = builder.get_object("frame_specutt") self.button_playutt = builder.get_object("button_playutt") self.frame_words = builder.get_object("frame_words") self.entry_transcription = builder.get_object("entry_transcription") self.table_utt = builder.get_object("table_utt") self.table_words = builder.get_object("table_words") self.frame_wordspecorig = builder.get_object("frame_wordspecorig") self.frame_wordspecsynth = builder.get_object("frame_wordspecsynth") self.button_playwordorig = builder.get_object("button_playwordorig") self.button_playwordsynth = builder.get_object("button_playwordsynth") self.label_word1 = builder.get_object("label_word1") self.label_word2 = builder.get_object("label_word2") self.label_word3 = builder.get_object("label_word3") self.entry_word1 = builder.get_object("entry_word1") self.entry_word2 = builder.get_object("entry_word2") self.entry_word3 = builder.get_object("entry_word3") self.statusbar = builder.get_object("statusbar") self.entry_comment = builder.get_object("entry_comment") # self.combobox_comment = builder.get_object("combobox_comment") # liststore = gtk.ListStore(gobject.TYPE_STRING) # self.combobox_comment.set_model(liststore) # self.combobox_comment.set_entry_text_column(0) # self.combobox_comment.append_text("transcription error") # self.combobox_comment.append_text("pronunciation error") # self.combobox_comment.append_text("noise present") # self.combobox_comment.append_text("no problem") # cell = gtk.CellRendererText() # self.combobox_comment.pack_start(cell, True) # self.combobox_comment.add_attribute(cell, 'text', 1) self.window1.show() self.voice = voice def update_wordview(self): u = self.corpusview.current_utt words = u.get_relation("SylStructure").as_list() word = words[self.corpusview.current_wordindex] try: prevword = word.prev_item prevwordname = prevword["name"] origstartsample = u["waveform"].samplerate * prevword["start"] synthstartsample = u["lindists"]["utt"]["waveform"].samplerate * prevword["start"] prevwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex-1] except TypeError: prevwordname = "NONE" origstartsample = 0 synthstartsample = 0 prevwordpronun = "" wordname = word["name"] wordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex] try: nextword = word.next_item nextwordname = nextword["name"] origendsample = u["waveform"].samplerate * nextword["end"] synthendsample = u["lindists"]["utt"]["waveform"].samplerate * nextword["end"] nextwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex+1] except TypeError: nextwordname = "NONE" origendsample = len(u["waveform"].samples) synthendsample = len(u["waveform"].samples) nextwordpronun = "" self.label_word1.set_label(prevwordname) self.label_word2.set_label(wordname) self.label_word3.set_label(nextwordname) self.entry_word1.set_text(prevwordpronun) self.entry_word2.set_text(wordpronun) self.entry_word3.set_text(nextwordpronun) self.origwordcontextwav = Waveform() self.origwordcontextwav.samplerate = u["waveform"].samplerate self.origwordcontextwav.samples = u["waveform"].samples[origstartsample:origendsample] origwordcontext_specfig = Figure(dpi=72) origwordcontext_specplot = origwordcontext_specfig.add_subplot(111) origwordcontext_specplot.specgram(self.origwordcontextwav.samples, Fs=self.origwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.origwordcontextwav.samplerate*len(self.origwordcontextwav.samples))) origwordcontext_speccanvas = FigureCanvasGTK(origwordcontext_specfig) framecontents = self.frame_wordspecorig.get_children() if framecontents: self.frame_wordspecorig.remove(framecontents[0]) self.frame_wordspecorig.add(origwordcontext_speccanvas) self.synthwordcontextwav = Waveform() self.synthwordcontextwav.samplerate = u["lindists"]["utt"]["waveform"].samplerate self.synthwordcontextwav.samples = u["lindists"]["utt"]["waveform"].samples[synthstartsample:synthendsample] synthwordcontext_specfig = Figure(dpi=72) synthwordcontext_specplot = synthwordcontext_specfig.add_subplot(111) synthwordcontext_specplot.specgram(self.synthwordcontextwav.samples, Fs=self.synthwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.synthwordcontextwav.samplerate*len(self.synthwordcontextwav.samples))) synthwordcontext_speccanvas = FigureCanvasGTK(synthwordcontext_specfig) framecontents = self.frame_wordspecsynth.get_children() if framecontents: self.frame_wordspecsynth.remove(framecontents[0]) self.frame_wordspecsynth.add(synthwordcontext_speccanvas) self.statusbar.push(0, "Item: %s/%s (Word index: %s)" % (self.corpusview.current_index + 1, len(self.corpusview.worklist), self.corpusview.current_wordindex)) self.table_words.show_all() def savepronuns(self, wordindex): if wordindex != 0: self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex-1] = unicode(self.entry_word1.get_text(), "utf-8") self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex] = unicode(self.entry_word2.get_text(), "utf-8") try: self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex+1] = unicode(self.entry_word3.get_text(), "utf-8") except IndexError: pass def change_wordview(self, button): self.savepronuns(self.corpusview.current_wordindex) self.corpusview.current_wordindex = button.wordindex self.update_wordview() def update_uttview(self): utt = self.corpusview.current_utt origspeech_specfig = Figure(dpi=72) origspeech_specplot = origspeech_specfig.add_subplot(111) origspeech_specplot.specgram(utt["waveform"].samples, Fs=utt["waveform"].samplerate, NFFT=128, noverlap=64) origspeech_speccanvas = FigureCanvasGTK(origspeech_specfig) framecontents = self.frame_specutt.get_children() if framecontents: self.frame_specutt.remove(framecontents[0]) self.frame_specutt.add(origspeech_speccanvas) self.entry_transcription.set_text(self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]]) self.entry_comment.set_text(self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]]) self.buttonbox_words = gtk.HButtonBox() words = utt.get_relation("Word").as_list() for i, word in enumerate(words): button = gtk.Button() button.wordindex = i button.connect("clicked", self.change_wordview) button.set_label(word["name"]) self.buttonbox_words.pack_end(button) framecontents = self.frame_words.get_children() if framecontents: self.frame_words.remove(framecontents[0]) self.frame_words.add(self.buttonbox_words) self.table_utt.show_all() self.update_wordview() def on_button_next_clicked(self, obj): self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_transcription.get_text(), "utf-8") self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_comment.get_text(), "utf-8") self.savepronuns(self.corpusview.current_wordindex) self.corpusview.next() self.update_uttview() def on_button_prev_clicked(self, obj): self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_transcription.get_text(), "utf-8") self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_comment.get_text(), "utf-8") self.savepronuns(self.corpusview.current_wordindex) self.corpusview.prev() self.update_uttview() def on_button_playutt_clicked(self, obj): self.corpusview.current_utt["waveform"].play() def on_button_playwordorig_clicked(self, obj): self.origwordcontextwav.play() def on_button_playwordsynth_clicked(self, obj): self.synthwordcontextwav.play() def on_toolbutton_open_clicked(self, obj): chooser = gtk.FileChooserDialog(title=None, action=gtk.FILE_CHOOSER_ACTION_OPEN, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) chooser.set_current_folder(os.getcwd()) response = chooser.run() if response == gtk.RESPONSE_OK: filename = chooser.get_filename() worklist = loadworklist(filename) self.corpusview = CorpusView(worklist, self.voice) elif response == gtk.RESPONSE_CANCEL: print('Closed, no files selected') chooser.destroy() self.update_uttview() self.update_wordview()
def test2(testfn="samples/data_001d.wav"): aud = Waveform(testfn) print(inhaletimeinfo(aud.samples))
class SpeechbrowserApp(object): def __init__(self, phmap): builder = gtk.Builder() builder.add_from_file(os.path.join(os.getenv("TTSLABDEV_ROOT"), "voicetools/speechbrowser", "speechbrowser.glade")) builder.connect_signals({"on_window1_destroy": gtk.main_quit, "on_toolbutton_open_clicked": self.on_toolbutton_open_clicked, "on_button_playutt_clicked": self.on_button_playutt_clicked, "on_button_playwordorig_clicked": self.on_button_playwordorig_clicked, "on_button_playwordsynth_clicked": self.on_button_playwordsynth_clicked, "on_button_next_clicked": self.on_button_next_clicked, "on_button_prev_clicked": self.on_button_prev_clicked}) self.window1 = builder.get_object("window1") self.frame_specutt = builder.get_object("frame_specutt") self.button_playutt = builder.get_object("button_playutt") self.frame_words = builder.get_object("frame_words") self.entry_transcription = builder.get_object("entry_transcription") self.table_utt = builder.get_object("table_utt") self.table_words = builder.get_object("table_words") self.frame_wordspecorig = builder.get_object("frame_wordspecorig") self.frame_wordspecsynth = builder.get_object("frame_wordspecsynth") self.button_playwordorig = builder.get_object("button_playwordorig") self.button_playwordsynth = builder.get_object("button_playwordsynth") self.label_word1 = builder.get_object("label_word1") self.label_word2 = builder.get_object("label_word2") self.label_word3 = builder.get_object("label_word3") self.entry_word1 = builder.get_object("entry_word1") self.entry_word2 = builder.get_object("entry_word2") self.entry_word3 = builder.get_object("entry_word3") self.statusbar = builder.get_object("statusbar") self.entry_comment = builder.get_object("entry_comment") # self.combobox_comment = builder.get_object("combobox_comment") # liststore = gtk.ListStore(gobject.TYPE_STRING) # self.combobox_comment.set_model(liststore) # self.combobox_comment.set_entry_text_column(0) # self.combobox_comment.append_text("transcription error") # self.combobox_comment.append_text("pronunciation error") # self.combobox_comment.append_text("noise present") # self.combobox_comment.append_text("no problem") # cell = gtk.CellRendererText() # self.combobox_comment.pack_start(cell, True) # self.combobox_comment.add_attribute(cell, 'text', 1) self.window1.show() self.phmap = phmap def update_wordview(self): u = self.corpusview.current_utt words = u.get_relation("SylStructure").as_list() word = words[self.corpusview.current_wordindex] try: prevword = word.prev_item prevwordname = prevword["name"] origstartsample = u["waveform"].samplerate * prevword["start"] synthstartsample = u["lindists"]["utt"]["waveform"].samplerate * prevword["start"] prevwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex-1] except TypeError: prevwordname = "NONE" origstartsample = 0 synthstartsample = 0 prevwordpronun = "" wordname = word["name"] wordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex] try: nextword = word.next_item nextwordname = nextword["name"] origendsample = u["waveform"].samplerate * nextword["end"] synthendsample = u["lindists"]["utt"]["waveform"].samplerate * nextword["end"] nextwordpronun = self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][self.corpusview.current_wordindex+1] except TypeError: nextwordname = "NONE" origendsample = len(u["waveform"].samples) synthendsample = len(u["waveform"].samples) nextwordpronun = "" self.label_word1.set_label(prevwordname) self.label_word2.set_label(wordname) self.label_word3.set_label(nextwordname) self.entry_word1.set_text(prevwordpronun) self.entry_word2.set_text(wordpronun) self.entry_word3.set_text(nextwordpronun) self.origwordcontextwav = Waveform() self.origwordcontextwav.samplerate = u["waveform"].samplerate self.origwordcontextwav.samples = u["waveform"].samples[origstartsample:origendsample] origwordcontext_specfig = Figure(dpi=72) origwordcontext_specplot = origwordcontext_specfig.add_subplot(111) origwordcontext_specplot.specgram(self.origwordcontextwav.samples, Fs=self.origwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.origwordcontextwav.samplerate*len(self.origwordcontextwav.samples))) origwordcontext_speccanvas = FigureCanvasGTK(origwordcontext_specfig) framecontents = self.frame_wordspecorig.get_children() if framecontents: self.frame_wordspecorig.remove(framecontents[0]) self.frame_wordspecorig.add(origwordcontext_speccanvas) self.synthwordcontextwav = Waveform() self.synthwordcontextwav.samplerate = u["lindists"]["utt"]["waveform"].samplerate self.synthwordcontextwav.samples = u["lindists"]["utt"]["waveform"].samples[synthstartsample:synthendsample] synthwordcontext_specfig = Figure(dpi=72) synthwordcontext_specplot = synthwordcontext_specfig.add_subplot(111) synthwordcontext_specplot.specgram(self.synthwordcontextwav.samples, Fs=self.synthwordcontextwav.samplerate, NFFT=128, noverlap=64, xextent=(0.0, self.synthwordcontextwav.samplerate*len(self.synthwordcontextwav.samples))) synthwordcontext_speccanvas = FigureCanvasGTK(synthwordcontext_specfig) framecontents = self.frame_wordspecsynth.get_children() if framecontents: self.frame_wordspecsynth.remove(framecontents[0]) self.frame_wordspecsynth.add(synthwordcontext_speccanvas) self.statusbar.push(0, "Item: %s/%s (Word index: %s)" % (self.corpusview.current_index + 1, len(self.corpusview.worklist), self.corpusview.current_wordindex)) self.table_words.show_all() def savepronuns(self, wordindex): if wordindex != 0: self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex-1] = unicode(self.entry_word1.get_text(), "utf-8") self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex] = unicode(self.entry_word2.get_text(), "utf-8") try: self.corpusview.pronuns[self.corpusview.worklist[self.corpusview.current_index][0]][wordindex+1] = unicode(self.entry_word3.get_text(), "utf-8") except IndexError: pass def change_wordview(self, button): self.savepronuns(self.corpusview.current_wordindex) self.corpusview.current_wordindex = button.wordindex self.update_wordview() def update_uttview(self): utt = self.corpusview.current_utt origspeech_specfig = Figure(dpi=72) origspeech_specplot = origspeech_specfig.add_subplot(111) origspeech_specplot.specgram(utt["waveform"].samples, Fs=utt["waveform"].samplerate, NFFT=128, noverlap=64) origspeech_speccanvas = FigureCanvasGTK(origspeech_specfig) framecontents = self.frame_specutt.get_children() if framecontents: self.frame_specutt.remove(framecontents[0]) self.frame_specutt.add(origspeech_speccanvas) self.entry_transcription.set_text(self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]]) self.entry_comment.set_text(self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]]) self.buttonbox_words = gtk.HButtonBox() words = utt.get_relation("Word").as_list() for i, word in enumerate(words): button = gtk.Button() button.wordindex = i button.connect("clicked", self.change_wordview) button.set_label(word["name"]) self.buttonbox_words.pack_end(button) framecontents = self.frame_words.get_children() if framecontents: self.frame_words.remove(framecontents[0]) self.frame_words.add(self.buttonbox_words) self.table_utt.show_all() self.update_wordview() def on_button_next_clicked(self, obj): self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_transcription.get_text(), "utf-8") self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_comment.get_text(), "utf-8") self.savepronuns(self.corpusview.current_wordindex) self.corpusview.next() self.update_uttview() def on_button_prev_clicked(self, obj): self.corpusview.transcriptions[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_transcription.get_text(), "utf-8") self.corpusview.comments[self.corpusview.worklist[self.corpusview.current_index][0]] = unicode(self.entry_comment.get_text(), "utf-8") self.savepronuns(self.corpusview.current_wordindex) self.corpusview.prev() self.update_uttview() def on_button_playutt_clicked(self, obj): self.corpusview.current_utt["waveform"].play() def on_button_playwordorig_clicked(self, obj): self.origwordcontextwav.play() def on_button_playwordsynth_clicked(self, obj): self.synthwordcontextwav.play() def on_toolbutton_open_clicked(self, obj): chooser = gtk.FileChooserDialog(title=None, action=gtk.FILE_CHOOSER_ACTION_OPEN, buttons=(gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) chooser.set_current_folder(os.getcwd()) response = chooser.run() if response == gtk.RESPONSE_OK: filename = chooser.get_filename() worklist = loadworklist(filename) self.corpusview = CorpusView(worklist, self.phmap) elif response == gtk.RESPONSE_CANCEL: print('Closed, no files selected') chooser.destroy() self.update_uttview() self.update_wordview()
def hts_synth(self, utt, processname): htsparms = self.engine_parms.copy() htsparms["-of"] = "%(tempolf0_file)s" if "htsparms" in utt: htsparms.update(utt["htsparms"]) #parm overrides for this utt... #build command string and execute: cmds = self.hts_bin for k in htsparms: if htsparms[k]: if htsparms[k] is True: cmds += " " + k else: cmds += " " + k + " " + str(htsparms[k]) cmds += " %(tempilab_file)s" fd1, tempwav_file = mkstemp(prefix="ttslab_", suffix=".wav") fd2, tempilab_file = mkstemp(prefix="ttslab_") fd3, tempolab_file = mkstemp(prefix="ttslab_") fd4, tempolf0_file = mkstemp(prefix="ttslab_") cmds = cmds % {'models_dir': self.models_dir, 'tempwav_file': tempwav_file, 'tempilab_file': tempilab_file, 'tempolab_file': tempolab_file, 'tempolf0_file': tempolf0_file} #print(cmds) with codecs.open(tempilab_file, "w", encoding="utf-8") as outfh: outfh.write("\n".join(utt["hts_label"])) os.system(cmds) #load seg endtimes into utt: with open(tempolab_file) as infh: lines = infh.readlines() segs = utt.get_relation("Segment").as_list() assert len(segs) == len(lines) for line, seg in zip(lines, segs): seg["end"] = hts_labels_tone.htk_int_to_float(line.split()[1]) #load audio: utt["waveform"] = Waveform(tempwav_file) #load lf0: f0 = np.exp(np.fromfile(tempolf0_file, "float32")) #load and lf0 to hertz #to semitones relative to 1Hz: f0[f0.nonzero()] = 12.0 * np.log2(f0[f0.nonzero()]) # 12 * log2 (F0 / F0reference) where F0reference = 1 f0t = Track() f0t.values = f0 f0t.times = np.arange(len(f0), dtype=np.float64) * 0.005 utt["f0"] = f0t #cleanup tempfiles: os.close(fd1) os.close(fd2) os.close(fd3) os.close(fd4) os.remove(tempwav_file) os.remove(tempolab_file) os.remove(tempilab_file) os.remove(tempolf0_file) return utt