def lex_gen_ipa(locale, engine, voice, speak=False): global tts if engine == 'sequitur': ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base) else: tts.locale = locale tts.engine = engine tts.voice = voice ipas = tts.gen_ipa(lex_base) if speak: say_ipa('de', 'mary', 'dfki-pavoque-neutral-hsmm', ipas) return ipas
else: num_ts_complete += 1 cnt = 0 for item in reversed(sorted(missing.items(), key=lambda x: x[1])): cnt += 1 if cnt > options.num_words: break if verbose: logging.info(u"Missing %4d times: %s" % (item[1], item[0])) else: logging.info(item[0].encode('utf8')) if options.generate: ipas = sequitur_gen_ipa(sequitur_model, item[0]) logging.info(u"%4d/%4d generated lex entry: %s -> %s" % (cnt, options.num_words, item[0], ipas)) lex[item[0]] = {'ipa': ipas} logging.info( "%d missing words total. %d submissions lack at least one word, %d are covered fully by the lexicon." % (len(missing), num_ts_lacking, num_ts_complete)) if options.generate: logging.info('saving lexicon...') lex.save() logging.info('saving lexicon...done.')
def lex_edit(token): global lex, lex_token, lex_entry, lex_base lex_set_token (token) while True: try: lex_paint_main() c = stdscr.getch() if c == ord('q'): lex.save() break # generate de-mary elif c == ord('g'): tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-espeak elif c == ord('h'): tts.locale ='de' tts.engine ='espeak' tts.voice ='de' ipas = tts.gen_ipa (lex_base) lex_entry['ipa'] = ipas tts.locale ='de' tts.engine ='mary' tts.voice ='bits3' tts.say_ipa(ipas) # generate en-mary elif c == ord('l'): tts.locale ='en-US' tts.engine ='mary' tts.voice ='cmu-rms-hsmm' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate fr-mary elif c == ord('k'): tts.locale ='fr' tts.engine ='mary' tts.voice ='upmc-pierre-hsmm' ipas = tts.gen_ipa (lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-sequitur elif c == ord('j'): ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base) tts.locale ='de' tts.engine ='mary' tts.voice ='bits3' tts.say_ipa(ipas) lex_entry['ipa'] = ipas # speak de mary unitsel elif c == ord('p'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # speak de mary hsmm elif c == ord('o'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas) # speak fr mary hsmm elif c == ord('i'): if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'pierre-voice-hsmm' tts.say_ipa(ipas) # speak en mary hsmm elif c == ord('u'): ipas = lex_entry['ipa'] tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' tts.say_ipa(ipas) # edit token elif c == ord('t'): token = misc.edit_popup(stdscr, ' Token ', '') lex_set_token (token) # edit XS elif c == ord('e'): ipas = lex_entry['ipa'] xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False) xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs) ipas = xsampa2ipa (lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
def lex_edit(token): global lex, lex_token, lex_entry, lex_base lex_set_token(token) while True: try: lex_paint_main() c = raw_input('lex > ').lower() if c == 'q': lex.save() break # generate de-mary elif c == 'g': tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-espeak elif c == 'h': tts.locale = 'de' tts.engine = 'espeak' tts.voice = 'de' ipas = tts.gen_ipa(lex_base) lex_entry['ipa'] = ipas tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # generate en-mary elif c == 'l': tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate fr-mary elif c == 'k': tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'upmc-pierre-hsmm' ipas = tts.gen_ipa(lex_base) tts.say_ipa(ipas) lex_entry['ipa'] = ipas # generate de-sequitur elif c == 'j': ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base) tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) lex_entry['ipa'] = ipas # speak de mary unitsel elif c == 'p': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'bits3' tts.say_ipa(ipas) # speak de mary hsmm elif c == 'o': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'de' tts.engine = 'mary' tts.voice = 'dfki-pavoque-neutral-hsmm' tts.say_ipa(ipas) # speak fr mary hsmm elif c == 'i': if len(lex_entry['ipa']) == 0: continue ipas = lex_entry['ipa'] tts.locale = 'fr' tts.engine = 'mary' tts.voice = 'pierre-voice-hsmm' tts.say_ipa(ipas) # speak en mary hsmm elif c == 'u': ipas = lex_entry['ipa'] tts.locale = 'en-US' tts.engine = 'mary' tts.voice = 'cmu-rms-hsmm' tts.say_ipa(ipas) # edit token elif c == 't': readline.add_history(lex_token.encode('utf8')) token = raw_input('token: ').decode('utf8') lex_set_token(token) # edit XS elif c == 'e': ipas = lex_entry['ipa'] xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False) readline.add_history(xs.encode('utf8')) xs = raw_input('X-SAMPA: ').decode('utf8') ipas = xsampa2ipa(lex_token, xs) lex_entry['ipa'] = ipas except: logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
for word in tokenize(ts['prompt']): if word in lex: continue if word in missing: missing[word] += 1 else: missing[word] = 1 cnt = 0 for item in reversed(sorted(missing.items(), key=lambda x: x[1])): lex_base = item[0] ipas = sequitur_gen_ipa (lex_base) logging.info ( u"%5d/%5d Adding missing word : %s [ %s ]" % (cnt, len(missing), item[0], ipas) ) lex_entry = {'ipa': ipas} lex[lex_base] = lex_entry cnt += 1 # # dictionary export # dictfn2 = '%s/local/dict/lexicon.txt' % data_dir
for word in tokenize(ts['prompt']): if word in lex: continue if word in missing: missing[word] += 1 else: missing[word] = 1 cnt = 0 for item in reversed(sorted(missing.items(), key=lambda x: x[1])): lex_base = item[0] ipas = sequitur_gen_ipa(SEQUITUR_MODEL % options.lang, lex_base) logging.info(u"%5d/%5d Adding missing word : %s [ %s ]" % (cnt, len(missing), item[0], ipas)) lex_entry = {'ipa': ipas} lex[lex_base] = lex_entry cnt += 1 # # dictionary export # dictfn2 = '%s/local/dict/lexicon.txt' % data_dir logging.info("Exporting dictionary...")