Beispiel #1
0
    def test_tts_espeak(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        tts.engine = 'espeak'

        first = True
        for v, word, ph in ESPEAK_TESTS:

            tts.locale = v
            tts.voice = v

            espeak_ph = tts.gen_ipa(word)

            self.assertEqual(espeak_ph, ph)

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            wav = tts.synthesize(ph, mode='ipa')
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            if first:
                tts.say(word)
                first = False
Beispiel #2
0
    def test_tts_mary(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        # test mary

        tts.engine = 'mary'

        for l, voice, word, ph in MARY_TESTS:

            tts.locale = l
            tts.voice = voice

            mary_ph = tts.gen_ipa(word)

            self.assertEqual(mary_ph, ph)

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            wav = tts.synthesize(ph, mode='ipa')
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)
Beispiel #3
0
    def __init__(self, lex):
    
        self.lex = lex

        #
        # TTS (for audio output)
        #

        self.tts = TTS ('local', 0, locale='de', voice='bits3', engine='espeak')

        #
        # sequitur interface
        #

        self.si = SeqIf(SEQUITUR_MODEL)
Beispiel #4
0
    def test_tts_pico(self):

        config = misc.load_config('.speechrc')

        tts = TTS(config.get('tts', 'host'), int(config.get('tts', 'port')))

        tts.engine = 'pico'

        for v, word in PICO_TESTS:

            tts.locale = v
            tts.voice = v

            wav = tts.synthesize(word)
            logging.debug('wav len: %d bytes.' % len(wav))
            self.assertGreater(len(wav), 100)

            tts.say(word)
Beispiel #5
0
            transcript.append(t)

#
# config
#

config = misc.load_config('.speechrc')

vf_login = config.get("speech", "vf_login")
extrasdir = config.get("speech", "extrasdir_%s" % lang)

#
# TTS (for audio output)
#

tts = TTS('local', 0, locale='de', voice='bits3', engine='espeak')

#
# load lexicon
#

logging.info("loading lexicon...")
lex = Lexicon(file_name=options.lang)
logging.info("loading lexicon...done.")

#
# main ui loop
#

next_segment()
Beispiel #6
0

class Intent(Enum):
    HELLO = 1
    LIGHT = 2
    RADIO = 3


print("Initializing...")

radio_on = False
lights_on = False
asr = ASR(model_dir=MODELDIR)
rec = PulseRecorder(volume=VOLUME)
vad = VAD()
tts = TTS(engine="espeak", voice="en")

utt_map = {}


def add_utt(utterance, intent):
    utt_map[utterance] = intent


add_utt("hello computer", Intent.HELLO)
add_utt("switch on the lights", Intent.LIGHT)
add_utt("switch off the lights", Intent.LIGHT)
add_utt("switch on the radio", Intent.RADIO)
add_utt("switch off the radio", Intent.RADIO)

rec.start_recording()
Beispiel #7
0
class LexEdit(object):

    def __init__(self, lex):
    
        self.lex = lex

        #
        # TTS (for audio output)
        #

        self.tts = TTS ('local', 0, locale='de', voice='bits3', engine='espeak')

        #
        # sequitur interface
        #

        self.si = SeqIf(SEQUITUR_MODEL)

    def lex_gen_ipa (self, lex_base, locale, engine, voice, speak=False):

        ipas = u''
        try:

            if engine == 'sequitur':
                # ipas = sequitur_gen_ipa (SEQUITUR_MODEL, lex_base)
                ipas = self.si.g2p(lex_base)
            
            else:
                self.tts.locale = locale
                self.tts.engine = engine
                self.tts.voice  = voice
                ipas = self.tts.gen_ipa (lex_base)

            if speak:
                self.tts.locale = 'de'
                self.tts.engine = 'mary'
                self.tts.voice  = 'dfki-pavoque-neutral-hsmm'
                self.tts.say_ipa(ipas, async=True)

        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

        return ipas

    def edit(self, lex_token):

        lex_base  = lex_token.split('_')[0]

        if lex_token in self.lex:
            lex_entry = lex[lex_token]

        else:
            ipas = self.lex_gen_ipa(lex_base, 'de', 'sequitur', 'de')
            lex_entry = {'ipa': ipas}
            self.lex[lex_token] = lex_entry

        ipas = lex_entry['ipa']

        lex_gen = {}

        lex_gen['de-mary']     = self.lex_gen_ipa(lex_base, 'de', 'mary',     'bits3')
        lex_gen['de-espeak']   = self.lex_gen_ipa(lex_base, 'de', 'espeak',   'de')
        lex_gen['de-sequitur'] = self.lex_gen_ipa(lex_base, 'de', 'sequitur', 'de')

        try:
            self.tts.locale ='de'
            self.tts.engine ='mary'
            self.tts.voice  ='dfki-pavoque-neutral-hsmm'
            self.tts.say_ipa(ipas, async=True)
        except:
            logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

        while True:

            print
            print u"Token       : %s" % lex_token
            print u"IPA         : %s" % lex_entry['ipa']
            print

            for engine in sorted(lex_gen):
                print u"%-11s : %s" % (engine, lex_gen[engine])
            print

            if lex_token in self.lex:
                m = self.lex.get_multi(lex_token)
                for k in m:
                    print u"%s [%s]" % (k, m[k]['ipa'])

            else:
                print u"NEW TOKEN"

            print u"SPEAK  P:de-unitsel  O:de-hsmm                   I:fr-hsmm   U:en-hsmm"
            print u"GEN    G:de-mary     H:de-espeak  J:de-sequitur  K:fr-mary   L:en-mary"
            print u"       E:Edit        Q:Quit "

            try:

                resp = raw_input("Lex> ")

                # quit
                if resp.lower() == 'q':
                    break  
            
                # generate de-mary
                elif resp.lower() == 'g':
                    lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'mary', 'bits3', True)

                # generate de-espeak
                elif resp.lower() == 'h':
                    lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'espeak', 'de', True)
                    
                # generate en-mary 
                elif resp.lower() == 'l':
                    
                    self.tts.locale ='en-US'
                    self.tts.engine ='mary'
                    self.tts.voice  ='cmu-rms-hsmm'

                    ipas = self.tts.gen_ipa (lex_base)
                    self.tts.say_ipa(ipas, async=True)
                    lex_entry['ipa'] = ipas

                # generate fr-mary 
                elif resp.lower() == 'k':
                    
                    self.tts.locale ='fr'
                    self.tts.engine ='mary'
                    self.tts.voice  ='upmc-pierre-hsmm'

                    ipas = self.tts.gen_ipa (lex_base)
                    self.tts.say_ipa(ipas, async=True)
                    lex_entry['ipa'] = ipas

                # generate de-sequitur
                elif resp.lower() == 'j':
                    lex_entry['ipa'] = self.lex_gen_ipa (lex_base, 'de', 'sequitur', 'de', True)
                    
                # speak de mary unitsel 
                elif resp.lower() == 'p':
            
                    if len(lex_entry['ipa']) == 0:
                        continue
            
                    ipas = lex_entry['ipa']

                    self.tts.locale ='de'
                    self.tts.engine ='mary'
                    self.tts.voice  ='bits3'

                    self.tts.say_ipa(ipas, async=True)

                # speak de mary hsmm
                elif resp.lower() == 'o':
            
                    if len(lex_entry['ipa']) == 0:
                        continue
            
                    ipas = lex_entry['ipa']

                    self.tts.locale = 'de'
                    self.tts.engine = 'mary'
                    self.tts.voice  = 'dfki-pavoque-neutral-hsmm'

                    self.tts.say_ipa(ipas, async=True)

                # speak fr mary hsmm
                elif resp.lower() == 'i':
           
                    if len(lex_entry['ipa']) == 0:
                        continue
            
                    ipas = lex_entry['ipa']

                    self.tts.locale ='fr'
                    self.tts.engine ='mary'
                    self.tts.voice  ='upmc-pierre-hsmm'

                    self.tts.say_ipa(ipas, async=True)
           
                # speak en mary hsmm
                elif resp.lower() == 'u':
            
                    ipas = lex_entry['ipa']

                    self.tts.locale = 'en-US'
                    self.tts.engine = 'mary'
                    self.tts.voice  = 'cmu-rms-hsmm'

                    self.tts.say_ipa(ipas, async=True)
           
                # edit XS
                elif resp.lower() == 'e':
            
                    ipas = lex_entry['ipa']

                    xs = ipa2xsampa (lex_token, ipas, stress_to_vowels=False)
                    readline.add_history(xs)
                    xs = raw_input(xs + '> ')

                    ipas = xsampa2ipa (lex_token, xs)
        
                    lex_entry['ipa'] = ipas

            except:
                logging.error('EXCEPTION CAUGHT %s' % traceback.format_exc())

        self.lex.save()
        print "new lexicon saved."
        print
Beispiel #8
0
kernal.setup_nlp_model()
ctx = kernal.create_context()
logging.debug('AI kernal initialized.')

#
# ASR
#

asr = ASR(model_dir=options.asr_model)
logging.debug('ASR initialized.')

#
# TTS
#

tts = TTS(engine="espeak", voice="en")

#
# main loop
#

print(chr(27) + "[2J")
while True:

    #
    # record audio, run VAD
    #

    print "Please speak.",

    rec.start_recording()
Beispiel #9
0
#
# config
#

config = misc.load_config('.speechrc')

wav16_dir   = config.get("speech", "wav16")
host        = config.get('tts', 'host')
port        = int(config.get('tts', 'port'))

#
# TTS
#

tts = TTS (host, port, locale='de', voice='bits3', engine='espeak')

def paint_main(stdscr, cur_ts):

    global edit_ts, prompt_tokens, prompt_token_idx

    ts = edit_ts[cur_ts]

    stdscr.clear()

    my, mx = stdscr.getmaxyx()

    for x in range(mx):
        stdscr.insstr(   0, x, ' ', curses.A_REVERSE)
        stdscr.insstr(my-2, x, ' ', curses.A_REVERSE)
        stdscr.insstr(my-1, x, ' ', curses.A_REVERSE)
Beispiel #10
0
asr = ASR(engine=ASR_ENGINE_NNET3,
          model_dir=kaldi_model_dir,
          model_name=kaldi_model,
          kaldi_beam=kaldi_beam,
          kaldi_acoustic_scale=kaldi_acoustic_scale,
          kaldi_frame_subsampling_factor=kaldi_frame_subsampling_factor)
logging.debug('ASR initialized.')

#
# TTS
#

tts = TTS(host_tts=tts_host,
          port_tts=tts_port,
          locale=tts_locale,
          voice=tts_voice,
          engine=tts_engine,
          speed=tts_speed,
          pitch=tts_pitch)

#
# main loop
#

print(chr(27) + "[2J")
while True:

    #
    # record audio, run VAD
    #
Beispiel #11
0
stdscr.keypad(1)

#
# config
#

config = misc.load_config('.speechrc')

host = config.get('tts', 'host')
port = int(config.get('tts', 'port'))

#
# TTS Client
#

tts = TTS(host, port, locale='de', voice='bits3')

#
# main
#

try:

    lex_gen = {}
    lex_cur_token = 0
    lex_set_token(lex_tokens[lex_cur_token])

    while True:

        lex_paint_main()
Beispiel #12
0
stdscr.keypad(1)

#
# config
#

config = misc.load_config('.speechrc')

host = config.get('tts', 'host')
port = int(config.get('tts', 'port'))

#
# TTS Client
#

tts = TTS(host, port, locale='de', voice='bits3')

#
# main
#

try:

    lex_gen = {}
    lex_cur_token = 0
    lex_set_token(lex_tokens[lex_cur_token])

    while True:

        lex_paint_main()
transcripts = Transcripts(corpus_name=corpus)
print "loading transcripts...done."

#
# config
#

wav16_dir   = config.get("speech", "wav16")
host        = config.get('tts', 'host')
port        = int(config.get('tts', 'port'))

#
# TTS
#

tts = TTS (host, port, locale='de', voice='bits3')

#
# count
#

known = set()
for spk in spk2gender:
    known.add(spk)

num_unk = 0
for cfn in transcripts:

    ts = transcripts[cfn]
    spk = ts['spk']
Beispiel #14
0
kernal.setup_nlp_model()
ctx  = kernal.create_context()
logging.debug ('AI kernal initialized.')

#
# ASR
#

asr = ASR(model_dir = options.asr_model)
logging.debug ('ASR initialized.')

#
# TTS
#

tts = TTS(engine="espeak", voice="en")

#
# main loop
#

print(chr(27) + "[2J")
while True:

    #
    # record audio, run VAD
    #

    print "Please speak.",

    rec.start_recording()
kernal.setup_nlp_model()
ctx = kernal.create_context()
logging.debug('AI kernal initialized.')

#
# ASR
#

asr = ASR(model_dir=options.asr_model)
logging.debug('ASR initialized.')

#
# TTS
#

tts = TTS(engine="espeak", voice=kernal.lang)

#
# main loop
#

print(chr(27) + "[2J")
while True:

    #
    # record audio, run VAD
    #

    print "Please speak.",

    rec.start_recording()
Beispiel #16
0
#!/usr/bin/env python3
from nltools.tts import TTS

tts = TTS(engine="espeak", voice="en")
tts.say("hello from your pi")