Exemple #1
0
class TestRawDecoder(TestCase):
    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx()
        self.ps.decode()
        super(TestRawDecoder, self).__init__(*args, **kwargs)

    def test_raw_decoder_lookup_word(self):
        self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW')
        self.assertEqual(self.ps.lookup_word('abcdf'), None)

    def test_raw_decoder_hypothesis(self):
        self.assertEqual(self.ps.hypothesis(), 'go forward ten meters')
        self.assertEqual(self.ps.score(), -7066)
        self.assertEqual(self.ps.confidence(), 0.04042641466841839)

    def test_raw_decoder_segments(self):
        self.assertEqual(
            self.ps.segments(),
            ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'])

    def test_raw_decoder_best_hypothesis(self):
        self.assertEqual(self.ps.best(), [('go forward ten meters', -28034),
                                          ('go for word ten meters', -28570),
                                          ('go forward and majors', -28670),
                                          ('go forward and meters', -28681),
                                          ('go forward and readers', -28685),
                                          ('go forward ten readers', -28688),
                                          ('go forward ten leaders', -28695),
                                          ('go forward can meters', -28695),
                                          ('go forward and leaders', -28706),
                                          ('go for work ten meters', -28722)])
class TestRawDecoder(TestCase):

    def __init__(self, *args, **kwargs):
        self.ps = Pocketsphinx()
        self.ps.decode()
        super(TestRawDecoder, self).__init__(*args, **kwargs)

    def test_raw_decoder_lookup_word(self):
        self.assertEqual(self.ps.lookup_word('hello'), 'HH AH L OW')
        self.assertEqual(self.ps.lookup_word('abcdf'), None)

    def test_raw_decoder_hypothesis(self):
        self.assertEqual(self.ps.hypothesis(), 'go forward ten meters')
        self.assertEqual(self.ps.score(), -7066)
        self.assertEqual(self.ps.confidence(), 0.04042641466841839)

    def test_raw_decoder_segments(self):
        self.assertEqual(self.ps.segments(), [
            '<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>'
        ])

    def test_raw_decoder_best_hypothesis(self):
        self.assertEqual(self.ps.best(), [
            ('go forward ten meters', -28034),
            ('go for word ten meters', -28570),
            ('go forward and majors', -28670),
            ('go forward and meters', -28681),
            ('go forward and readers', -28685),
            ('go forward ten readers', -28688),
            ('go forward ten leaders', -28695),
            ('go forward can meters', -28695),
            ('go forward and leaders', -28706),
            ('go for work ten meters', -28722)
        ])
Exemple #3
0
class SpeechProcessor:
    def __init__(self, hmm='data/spanish/CIEMPIESS_Spanish_Models_581h/Models/modelo',
                       lm='data/spanish/CIEMPIESS_Spanish_Models_581h/Models/leng.lm.bin',
                       dict='data/spanish/CIEMPIESS_Spanish_Models_581h/Models/dicc.dic',
                       grammar='data/gramatica-tp2.gram', dataPath='tmp/'):
        self.data_path = dataPath
        config = {
            'hmm': hmm,
            'lm': lm,
            'dict': dict
        }
        #model_path = get_model_path()

        self.ps = Pocketsphinx(**config)
        
        # Switch to JSGF grammar
        jsgf = Jsgf(grammar)
        rule = jsgf.get_rule('tp2.grammar')
        fsg = jsgf.build_fsg(rule, self.ps.get_logmath(), 7.5)
        self.ps.set_fsg('tp2', fsg)
        self.ps.set_search('tp2')

        # Síntesis
        self.tts_authenticator = IAMAuthenticator('cq9_4YcCXxClw2AfgUhbokFktZ-xSRT4kcHS2akcZ05J')
        self.tts = TextToSpeechV1(authenticator=self.tts_authenticator)
        self.tts.set_service_url('https://stream.watsonplatform.net/text-to-speech/api')


    def sintetizar(self, outFileName, msg):
        if len(msg) > 0:
            with open(outFileName, 'wb') as audio_file:
                audio_file.write(
                    self.tts.synthesize(
                        msg,
                        voice='es-LA_SofiaV3Voice',
                        accept='audio/wav'
                    ).get_result().content)

    def reconocer(self, inFileName='audio.wav'):
        # Reconocimiento
        print(self.data_path)
        self.ps.decode(
            audio_file=os.path.join(self.data_path,inFileName),
            buffer_size=2048,
            no_search=False,
            full_utt=False
        )
        return self.ps.segments(), self.ps.best(count=3)
        return "light on"
    elif ("TURN OFF THE LIGHT" in speech):
        playwave(os.path.join(voice_path, 'beep_lo.wav'))
        print("turn off the light")
        return "light off"
    else:
        return "null"


if __name__ == "__main__":
    ps.decode(audio_file=os.path.join(voice_path, "baby.wav"),
              buffer_size=2048,
              no_search=False,
              full_utt=False)

    best_result = ps.best(count=10)
    speech = []

    for phrase in best_result:
        speech.append(phrase[0])

    time.sleep(3)

    while True:
        if "HI BABY" in speech:
            print("recognise right")

        playwave(os.path.join(voice_path, 'beep_hi.wav'))

        time.sleep(3)
# => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
print(ps.segments())
print('Detailed segments:', *ps.segments(detailed=True), sep='\n')  # => [
#     word, prob, start_frame, end_frame
#     ('<s>', 0, 0, 24)
#     ('<sil>', -3778, 25, 45)
#     ('go', -27, 46, 63)
#     ('forward', -38, 64, 116)
#     ('ten', -14105, 117, 152)
#     ('meters', -2152, 153, 211)
#     ('</s>', 0, 212, 260)
# ]

print(ps.hypothesis())  # => go forward ten meters
print(ps.probability())  # => -32079
print(ps.score())  # => -7066
print(ps.confidence())  # => 0.04042641466841839

print(*ps.best(count=10), sep='\n')  # => [
#     ('go forward ten meters', -28034)
#     ('go for word ten meters', -28570)
#     ('go forward and majors', -28670)
#     ('go forward and meters', -28681)
#     ('go forward and readers', -28685)
#     ('go forward ten readers', -28688)
#     ('go forward ten leaders', -28695)
#     ('go forward can meters', -28695)
#     ('go forward and leaders', -28706)
#     ('go for work ten meters', -28722)
# ]
Exemple #6
0
class Sphinx(Thread):

    def __init__(self):
        Thread.__init__(self)
        self.ready = False

    def run(self):
        print_important("Info! Thread sphinx started.") 
        self.config = {
            'verbose': True,
            'hmm': os.path.join('s2m', 'core', 'sphinx', 'fr'),
            'lm': os.path.join('s2m', 'core', 'sphinx', 'fr.lm.dmp'),
            'dict': os.path.join('s2m', 'core', 'sphinx', 's2m.dict'),
            'jsgf': os.path.join('s2m', 'core', 'sphinx', 's2m.jsgf'),
        }
        self.pocketsphinx = Pocketsphinx(**self.config)
        self.ready = True

    def get_silence(self, duration):
        if duration < 0.25:
            return '[veryshortsil]'
        elif duration < 0.5:
            return '[shortsil]'
        elif duration < 1.5:
            return '[sil]'
        elif duration < 3.:
            return '[longsil]'
        else:
            return '[verylongsil]'

    def get_segment_string(self, segments):
        segment_list = []
        last_silence = 0
        spoken_duration = 0
        word_count = 0
        for segment in segments:
            if segment.word in ['<s>', '</s>']:
                continue
            elif segment.word == '<sil>':
                last_silence += segment.end_frame - segment.start_frame
            else:
                if last_silence > 0:
                    segment_list.append(last_silence)
                    last_silence = 0
                spoken_duration += segment.end_frame - segment.start_frame
                segment_list.append(segment.word)
                word_count += 1
        if word_count == 0:
            return ''
        avg_word_duration = spoken_duration / word_count
        return ' '.join((self.get_silence(s / avg_word_duration)
                         if type(s) is int else nobrackets(s))
                        for s in segment_list)
    
    def to_text(self, filename, erase=False):
        if not self.ready:
            raise EnvironmentError('Initialization of sphinx not finished.')
        FILLER_WORDS = ['<s>', '<sil>', '</s>']
        try:
            self.pocketsphinx.decode(filename)
        except Exception as e:
            print("An error was raised by sphinx while decoding file '%r', parsing aborted" % filename)
        text = " ".join(
           [s for s in self.pocketsphinx.segments() if s not in FILLER_WORDS])
        text = nobrackets(text)
        segment_string = self.get_segment_string(self.pocketsphinx.seg())
        nbest = [nobrackets(w[0])
                 for w in self.pocketsphinx.best(count=10)[1:]]
        if erase:
            os.remove(loc)
        return segment_string, nbest