Example #1
0
def lex_gen_ipa(locale, engine, voice, speak=False):

    global tts

    if engine == 'sequitur':
        ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base)

    else:
        tts.locale = locale
        tts.engine = engine
        tts.voice = voice
        ipas = tts.gen_ipa(lex_base)

    if speak:
        say_ipa('de', 'mary', 'dfki-pavoque-neutral-hsmm', ipas)

    return ipas
Example #2
0
    else:
        num_ts_complete += 1

cnt = 0
for item in reversed(sorted(missing.items(), key=lambda x: x[1])):

    cnt += 1

    if cnt > options.num_words:
        break

    if verbose:
        logging.info(u"Missing %4d times: %s" % (item[1], item[0]))
    else:
        logging.info(item[0].encode('utf8'))

    if options.generate:
        ipas = sequitur_gen_ipa(sequitur_model, item[0])
        logging.info(u"%4d/%4d generated lex entry: %s -> %s" %
                     (cnt, options.num_words, item[0], ipas))
        lex[item[0]] = {'ipa': ipas}

logging.info(
    "%d missing words total. %d submissions lack at least one word, %d are covered fully by the lexicon."
    % (len(missing), num_ts_lacking, num_ts_complete))

if options.generate:
    logging.info('saving lexicon...')
    lex.save()
    logging.info('saving lexicon...done.')
Example #3
0
def lex_edit(token):

    global lex, lex_token, lex_entry, lex_base

    lex_set_token (token)

    while True:
   
        try:

            lex_paint_main()
        
            c = stdscr.getch()
            if c == ord('q'):
                lex.save()
                break  
        
            # generate de-mary
            elif c == ord('g'):
                
                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'bits3'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas
       
            # generate de-espeak
            elif c == ord('h'):
                
                tts.locale ='de'
                tts.engine ='espeak'
                tts.voice  ='de'
                ipas = tts.gen_ipa (lex_base)
                lex_entry['ipa'] = ipas

                tts.locale ='de'
                tts.engine ='mary'
                tts.voice  ='bits3'
                tts.say_ipa(ipas)

        
            # generate en-mary 
            elif c == ord('l'):
                
                tts.locale ='en-US'
                tts.engine ='mary'
                tts.voice  ='cmu-rms-hsmm'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate fr-mary 
            elif c == ord('k'):
                
                tts.locale ='fr'
                tts.engine ='mary'
                tts.voice  ='upmc-pierre-hsmm'

                ipas = tts.gen_ipa (lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-sequitur
            elif c == ord('j'):
                
                ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base)
                tts.locale ='de'
                tts.engine ='mary'
                tts.voice  ='bits3'
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # speak de mary unitsel 
            elif c == ord('p'):
        
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'bits3'

                tts.say_ipa(ipas)

            # speak de mary hsmm
            elif c == ord('o'):
        
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice  = 'dfki-pavoque-neutral-hsmm'

                tts.say_ipa(ipas)

            # speak fr mary hsmm
            elif c == ord('i'):
       
                if len(lex_entry['ipa']) == 0:
                    continue
        
                ipas = lex_entry['ipa']

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice  = 'pierre-voice-hsmm'

                tts.say_ipa(ipas)
       
            # speak en mary hsmm
            elif c == ord('u'):
        
                ipas = lex_entry['ipa']

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice  = 'cmu-rms-hsmm'

                tts.say_ipa(ipas)
       
            # edit token
            elif c == ord('t'):

                token = misc.edit_popup(stdscr, ' Token ', '')

                lex_set_token (token)

            # edit XS
            elif c == ord('e'):
        
                ipas = lex_entry['ipa']

                xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False)

                xs = misc.edit_popup(stdscr, ' X-SAMPA ', xs)

                ipas = xsampa2ipa (lex_token, xs)
        
                lex_entry['ipa'] = ipas

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
Example #4
0
def lex_edit(token):

    global lex, lex_token, lex_entry, lex_base

    lex_set_token(token)

    while True:

        try:

            lex_paint_main()

            c = raw_input('lex > ').lower()
            if c == 'q':
                lex.save()
                break

            # generate de-mary
            elif c == 'g':

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-espeak
            elif c == 'h':

                tts.locale = 'de'
                tts.engine = 'espeak'
                tts.voice = 'de'
                ipas = tts.gen_ipa(lex_base)
                lex_entry['ipa'] = ipas

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'
                tts.say_ipa(ipas)

            # generate en-mary
            elif c == 'l':

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate fr-mary
            elif c == 'k':

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'upmc-pierre-hsmm'

                ipas = tts.gen_ipa(lex_base)
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # generate de-sequitur
            elif c == 'j':

                ipas = sequitur_gen_ipa(SEQUITUR_MODEL, lex_base)
                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'
                tts.say_ipa(ipas)
                lex_entry['ipa'] = ipas

            # speak de mary unitsel
            elif c == 'p':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'bits3'

                tts.say_ipa(ipas)

            # speak de mary hsmm
            elif c == 'o':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'de'
                tts.engine = 'mary'
                tts.voice = 'dfki-pavoque-neutral-hsmm'

                tts.say_ipa(ipas)

            # speak fr mary hsmm
            elif c == 'i':

                if len(lex_entry['ipa']) == 0:
                    continue

                ipas = lex_entry['ipa']

                tts.locale = 'fr'
                tts.engine = 'mary'
                tts.voice = 'pierre-voice-hsmm'

                tts.say_ipa(ipas)

            # speak en mary hsmm
            elif c == 'u':

                ipas = lex_entry['ipa']

                tts.locale = 'en-US'
                tts.engine = 'mary'
                tts.voice = 'cmu-rms-hsmm'

                tts.say_ipa(ipas)

            # edit token
            elif c == 't':

                readline.add_history(lex_token.encode('utf8'))
                token = raw_input('token: ').decode('utf8')

                lex_set_token(token)

            # edit XS
            elif c == 'e':

                ipas = lex_entry['ipa']

                xs = ipa2xsampa(lex_token, ipas, stress_to_vowels=False)

                readline.add_history(xs.encode('utf8'))
                xs = raw_input('X-SAMPA: ').decode('utf8')

                ipas = xsampa2ipa(lex_token, xs)

                lex_entry['ipa'] = ipas

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())
Example #5
0
        for word in tokenize(ts['prompt']):

            if word in lex:
                continue

            if word in missing:
                missing[word] += 1
            else:
                missing[word] = 1

    cnt = 0
    for item in reversed(sorted(missing.items(), key=lambda x: x[1])):

        lex_base = item[0] 

        ipas = sequitur_gen_ipa (lex_base)

        logging.info ( u"%5d/%5d Adding missing word : %s [ %s ]" % (cnt, len(missing), item[0], ipas) )

        lex_entry = {'ipa': ipas}
        lex[lex_base] = lex_entry
        cnt += 1
        


#
# dictionary export
#

dictfn2 = '%s/local/dict/lexicon.txt' % data_dir
        for word in tokenize(ts['prompt']):

            if word in lex:
                continue

            if word in missing:
                missing[word] += 1
            else:
                missing[word] = 1

    cnt = 0
    for item in reversed(sorted(missing.items(), key=lambda x: x[1])):

        lex_base = item[0]

        ipas = sequitur_gen_ipa(SEQUITUR_MODEL % options.lang, lex_base)

        logging.info(u"%5d/%5d Adding missing word : %s [ %s ]" %
                     (cnt, len(missing), item[0], ipas))

        lex_entry = {'ipa': ipas}
        lex[lex_base] = lex_entry
        cnt += 1

#
# dictionary export
#

dictfn2 = '%s/local/dict/lexicon.txt' % data_dir

logging.info("Exporting dictionary...")