コード例 #1
0
def pocket():

	ps = Pocketsphinx()


	language_directory = os.path.dirname(os.path.realpath(__file__))
	
	print language_directory

	acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
	language_model_file = os.path.join(language_directory, "language-model.lm.bin")
	phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
    
	config = Decoder.default_config()
	config.set_string("-hmm", acoustic_parameters_directory)  # set the path of the hidden Markov model (HMM) parameter files
	config.set_string("-lm", language_model_file)
	config.set_string("-dict", phoneme_dictionary_file)

	decoder = Decoder(config)

	with sr.AudioFile(s_dir + "/a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav") as source:
		audio_data = r.record(source)
	decoder.start_utt()
	decoder.process_raw(audio_data, False, True)
	decoder.end_utt()

	print decoder.hyp()

	ps.decode(
	    audio_file=os.path.join(s_dir, 'a bad situation could become dramatically worse. /a bad situation could become dramatically worse. .wav'),
	    buffer_size=2048,
	    no_search=False,
	    full_utt=False)
	print(ps.hypothesis()) # => ['<s>', '<sil>', 'go', 'forward', 'ten', 'meters', '</s>']
#pocket()
コード例 #2
0
ファイル: speech_recognizer.py プロジェクト: ys7914/DrinkWise
def listen(MODE):
    CORPUS = 6278
    model_path = get_model_path()
    home_path = "/home/the0s/Desktop/HCR_Python"
    print(model_path)
    print(home_path)
    DATADIR = "/usr/local/lib/python2.7/dist-packages/pocketsphinx/data"

    config = Decoder.default_config()
    config.set_string('-hmm', os.path.join(model_path, 'hub4wsj_sc_8k'))
    config.set_string('-lm', os.path.join(home_path, str(CORPUS) + '.lm.bin'))
    config.set_string('-dict', os.path.join(home_path, str(CORPUS) + '.dic'))
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=1024)
    stream.start_stream()
    in_speech_bf = False
    decoder.start_utt()
    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech() != in_speech_bf:
                in_speech_bf = decoder.get_in_speech()
                if not in_speech_bf:
                    decoder.end_utt()
                    if decoder.hyp() is not None:
                        buf = [s for s in decoder.hyp().hypstr.split()]
                        print(buf)
                        if len(buf) > 0:
                            if MODE == 0:  #DrinkRequest
                                for item in buf:
                                    if checkRequest(item) != "NONE":
                                        output = checkRequest(item)
                                        stream.stop_stream()
                                        stream.close()
                                        return output
                            if MODE == 1:  #DrinkConfirm
                                for item in buf:
                                    if checkConfirm(item) != "NONE":
                                        output = checkConfirm(item)
                                        stream.stop_stream()
                                        stream.close()
                                        return output

                    decoder.start_utt()
        else:
            break
    decoder.end_utt()
コード例 #3
0
ファイル: __main__.py プロジェクト: PyCee/vocal_commands
    def __init__(self):
        
        MODELDIR = get_model_path()
        CURR_DIR = os.path.dirname(os.path.realpath(__file__))
        KEYPHRASE_THRESH_DIR = CURR_DIR + '/keyphrases.thresh'
        
        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
        config.set_string('-dict', \
                          os.path.join(MODELDIR, 'cmudict-en-us.dict'))
        config.set_string('-kws', KEYPHRASE_THRESH_DIR)
        #config.set_string('-logfn', '/dev/null')
        decoder = Decoder(config)
        
        
        p = pyaudio.PyAudio()
        host_info = p.get_host_api_info_by_index(0)
        device_index = 3
        for i in range(host_info.get('deviceCount')):
            device_info = p.get_device_info_by_host_api_device_index(0, i)
            #print('\n\n\n\n'+str(i)+device_info.get('name') + " : " + str(device_info.get('maxInputChannels')))
            if 'USB' in device_info.get('name'):
                device_index = i
                break

        '''
        fire /1e18/
        '''
            
        stream = p.open(
            format=pyaudio.paInt16,
            channels=1,
            rate=44100,
            input=True,
            frames_per_buffer=1024,
            input_device_index=device_index)
        
        stream.start_stream()
        in_speech_bf = True
        
        decoder.start_utt()
        print("Starting to listen")
        
        while True:
            buf = stream.read(1024, exception_on_overflow = False)
            decoder.process_raw(buf, False, False)
            if decoder.hyp() != None:
                print("\nDetected: " + decoder.hyp().hypstr + "\n")
                decoder.end_utt()
                #print "Detected Move Forward, restarting search"
                decoder.start_utt()
        print("Am not listening any more")
        stream.stop_stream()
        stream.close()
        p.terminate()
コード例 #4
0
class PocketsphinxHotWord(HotWordEngine):
    """Wake word engine using PocketSphinx.

    PocketSphinx is very general purpose but has a somewhat high error rate.
    The key advantage is to be able to specify the wake word with phonemes.
    """
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super().__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(self.key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        """If language config doesn't exist then
        we use default language (english) config as a fallback.
        """
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error(
                'PocketSphinx model not found at "{}". '.format(model_file) +
                'Falling back to en-us model'
            )
            model_file = join(RECOGNIZER_DIR, 'model', 'en-us', 'hmm')
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
コード例 #5
0
def record(listen_time):

    THRESHOLD = None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
        THRESHOLD = fetchThreshold()
        print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected = False
    for i in range(0, RATE / CHUNK * listen_time):
        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)
        if score < THRESHOLD:
            continue
        else:
            detected = True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    # stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir + "/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)

    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()

    return(speechRec.hyp().hypstr)
コード例 #6
0
ファイル: local.py プロジェクト: yannickulrich/IRIS
class stt:
    def __init__(self, profile, hmm=None, dict=None, lm=None,
                 kws_threshold=None, keyphrase=None):
        self.profile = profile
        if keyphrase:
            if not dict:
                dict = fullpath('config/keyphrase.dic')
            if not lm:
                lm = fullpath('config/keyphrase.lm')
        else:
            if not dict:
                dict = fullpath('config/corpus.dic')
            if not lm:
                lm = fullpath('config/corpus.lm')

        if not hmm:
            hmm = 'share/pocketsphinx/model/en-us/en-us'

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm))
        config.set_string('-dict', dict)
        config.set_string('-lm', lm)
        config.set_string('-logfn', fullpath('config/sphinx.log'))

        if keyphrase:
            config.set_string('-keyphrase', keyphrase)
        if kws_threshold:
            config.set_float('-kws_threshold', kws_threshold)

        self.decoder = Decoder(config)

        self.transcribe = self.transcribe_darwin
        self.hyp = None

    def transcribe_darwin(self, wav):
        self.decoder.start_utt()
        self.decoder.process_raw(wav, False, False)
        self.decoder.end_utt()

        self.hyp = self.decoder.hyp()
        if self.hyp:
            return self.hyp.hypstr
    
    def get_prob(self):
        if self.hyp:
            print self.hyp.best_score
            return self.hyp.prob

    def transcribe_linux(self, wav):
        self.decoder.start_utt()
        self.decoder.process_raw(wav, False, False)
        self.decoder.end_utt()

        result = self.decoder.get_hyp()
        if result:
            return result[0]
コード例 #7
0
class LocalRecognizer(object):
    def __init__(self,
                 key_phrase,
                 phonemes,
                 threshold,
                 sample_rate=16000,
                 lang="en-us"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        dict_name = self.create_dict(key_phrase, phonemes)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm',
                          os.path.join(BASEDIR, 'model', self.lang, 'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn',
                          '/home/sg/mycroft-core/scripts/logs/pocket.log')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        hyp = self.transcribe(frame_data)
        #if hyp is not None:
        #print("hyp is not null")
        return hyp and self.key_phrase in hyp.hypstr.lower()
コード例 #8
0
class PocketsphinxRecognizer(LocalRecognizer):
    def __init__(self,
                 key_phrase,
                 phonemes,
                 threshold,
                 sample_rate=16000,
                 lang="en-us"):
        self.lang = str(lang)
        self.key_phrase = str(key_phrase)
        print("####key_phrase-->", key_phrase)
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        print("####phonemes -->", phonemes)
        dict_name = self.create_dict(key_phrase, phonemes)
        print("####dict_name --->", dict_name)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')

        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn',
                          '/home/sg/mycroft-core/scripts/logs/pocket.log')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        #sr = r.recognize_sphinx()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        #LOG.error("transcribed ---> +"+str(self.decoder.hyp()))
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        #LOG.info("hyp is ---->"+hyp))
        return hyp and self.key_phrase in hyp.hypstr.lower()
コード例 #9
0
    def run(self):
        conf = Decoder.default_config()
        conf.set_string('-hmm', self.config.hmmPS)
        conf.set_string('-lm', self.config.lmPS)
        conf.set_string('-dict', self.config.dictPS)
        if os.path.isfile(self.config.mllrPS):
            conf.set_string('-mllr', self.config.mllrPS)
        decoder = Decoder(conf)

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)
        stream.start_stream()
        self.samplewith = p.get_sample_size(pyaudio.paInt16)

        in_speech_bf = True
        decoder.start_utt('')
        while not self._terminate:
            buf = stream.read(1024)
            if buf:
                if self.save:
                    self.liSave.append(buf)
                    self.numSave += 1
                    if self.numSave > self.maxSave:  # nos protegemos de dejar el microfono encendido
                        self.activeSave(self.fichWAV)
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        try:
                            if decoder.hyp().hypstr != '':
                                self.decode(decoder.hyp().hypstr)
                        except AttributeError:
                            pass
                        decoder.start_utt('')
            else:
                break
        decoder.end_utt()
コード例 #10
0
ファイル: Voice.py プロジェクト: JERUKA9/lucaschess
    def run( self ):
        conf = Decoder.default_config()
        conf.set_string('-hmm', self.config.hmmPS)
        conf.set_string('-lm', self.config.lmPS)
        conf.set_string('-dict', self.config.dictPS)
        if os.path.isfile(self.config.mllrPS):
            conf.set_string('-mllr', self.config.mllrPS)
        decoder = Decoder(conf)

        p = pyaudio.PyAudio()
        stream = p.open( format=pyaudio.paInt16,
                         channels=1,
                         rate=16000,
                         input=True,
                         frames_per_buffer=1024 )
        stream.start_stream()
        self.samplewith = p.get_sample_size(pyaudio.paInt16)

        in_speech_bf = True
        decoder.start_utt('')
        while not self._terminate:
            buf = stream.read(1024)
            if buf:
                if self.save:
                    self.liSave.append(buf)
                    self.numSave += 1
                    if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido
                        self.activeSave(self.fichWAV)
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        try:
                            if decoder.hyp().hypstr != '':
                                self.decode(decoder.hyp().hypstr)
                        except AttributeError:
                            pass
                        decoder.start_utt('')
            else:
                break
        decoder.end_utt()
コード例 #11
0
class PocketsphinxHotWord(HotWordEngine):
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module config
        module = self.config.get("module")
        if module != "pocketsphinx":
            LOG.warning(
                str(module) + " module does not match with "
                              "Hotword class pocketsphinx")
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error('PocketSphinx model not found at ' + str(model_file))
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
コード例 #12
0
ファイル: hotword_factory.py プロジェクト: lakst/mykt
class PocketsphinxHotWord(HotWordEngine):
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module config
        module = self.config.get("module")
        if module != "pocketsphinx":
            LOG.warning(
                str(module) + " module does not match with "
                "Hotword class pocketsphinx")
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(self.key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error('PocketSphinx model not found at ' + str(model_file))
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
コード例 #13
0
class PocketsphinxListener:
    """Pocketsphinx listener implementation used for comparison with Precise"""
    def __init__(self,
                 key_phrase,
                 dict_file,
                 hmm_folder,
                 threshold=1e-90,
                 chunk_size=-1):
        from pocketsphinx import Decoder
        config = Decoder.default_config()
        config.set_string('-hmm', hmm_folder)
        config.set_string('-dict', dict_file)
        config.set_string('-keyphrase', key_phrase)
        config.set_float('-kws_threshold', float(threshold))
        config.set_float('-samprate', 16000)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.key_phrase = key_phrase
        self.buffer = b'\0' * pr.sample_depth * pr.buffer_samples
        self.pr = pr
        self.read_size = -1 if chunk_size == -1 else pr.sample_depth * chunk_size

        try:
            self.decoder = Decoder(config)
        except RuntimeError:
            options = dict(key_phrase=key_phrase,
                           dict_file=dict_file,
                           hmm_folder=hmm_folder,
                           threshold=threshold)
            raise RuntimeError('Invalid Pocketsphinx options: ' + str(options))

    def _transcribe(self, byte_data):
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self._transcribe(frame_data + b'\0' * int(2 * 16000 * 0.01))
        return bool(hyp and self.key_phrase in hyp.hypstr.lower())

    def update(self, stream: Union[BinaryIO, np.ndarray, bytes]) -> float:
        if isinstance(stream, np.ndarray):
            chunk = audio_to_buffer(stream)
        else:
            if isinstance(stream, (bytes, bytearray)):
                chunk = stream
            else:
                chunk = stream.read(self.read_size)
            if len(chunk) == 0:
                raise EOFError
        self.buffer = self.buffer[len(chunk):] + chunk
        return float(self.found_wake_word(self.buffer))
コード例 #14
0
ファイル: views.py プロジェクト: dustin-nguyen/MAST
def retrieve_scores(word):
    filename = word + '.wav'
    grammarname = word + '-align.jsgf'
    model_path = get_model_path()

    # Initialize the config values
    config = DefaultConfig()
    config.set_boolean('-verbose', False)
    config.set_string('-hmm', os.path.join(model_path, 'en-us'))
    config.set_boolean('-lm', False)
    config.set_string('-dict', 'phonemes.dict.txt')
    config.set_boolean('-backtrace', True)
    config.set_boolean('-bestpath', False)
    config.set_boolean('-fsgusefiller', False)

    decoder = Decoder(config)

    # Set the search to JSGF Grammar
    jsgf = Jsgf(grammarname)
    rule = jsgf.get_rule('forcing.' + word)
    decoder.set_jsgf_file('grammar', grammarname)
    decoder.set_search('grammar')

    stream = open(filename, 'rb')
    utt_started = False
    scores = []
    decoder.start_utt()

    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
            in_speech = decoder.get_in_speech()
            if (in_speech and not utt_started):
                utt_started = True
            if (not in_speech and utt_started):
                decoder.end_utt()
                hyp = decoder.hyp()
                if hyp is not None:
                    print('hyp: %s' % (hyp.best_score))
                print_segments(decoder)
                scores = retrieve_segments(decoder)
                decoder.start_utt()
                utt_started = False
        else:
            break

    decoder.end_utt()
    print('scores:', scores)

    return scores
コード例 #15
0
def get_phonemes(file):
    # Decode streaming data
    decoder = Decoder(config)
    decoder.start_utt()
    stream = open(file, 'rb')
    i=0
    while True:
        buf = stream.read(1024)
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            break
    decoder.end_utt()

    Hypothesis = decoder.hyp()
    return [seg.word for seg in decoder.seg()]
コード例 #16
0
class LocalRecognizer(object):
    def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000,
                 lang="en-us"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        dict_name = self.create_dict(key_phrase, phonemes)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                               'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, hypothesis):
        return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
コード例 #17
0
class CMUSphinxRecognizer(BaseRecognizer):
    def __init__(self):
        config = Decoder.default_config()
        config.set_string('-hmm', SPHINX_HMM)
        config.set_string('-lm', SPHINX_LM)
        config.set_string('-dict', SPHINX_DICT)
        self.decoder = Decoder(config)

    def recognize(self, raw_audio):
        file_path = self.__save_file(raw_audio)
        with open(file_path, 'r') as wav_fp:
            self.decoder.decode_raw(wav_fp)
            hypothesis = self.decoder.hyp()
            return hypothesis.hypstr, hypothesis.best_score, hypothesis.prob

    @staticmethod
    def __save_file(data):
        tmp_fp = NamedTemporaryFile(delete=False)
        tmp_fp.write(data)
        tmp_fp.close()
        return tmp_fp.name
コード例 #18
0
    def begin_passive_listening(self):
        """Uses PocketSphinx to listen for the wakeword and call the active
           listening function
        """
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase',
                          self.config.get("general", "wake_word"))
        config.set_string('-logfn', 'nul')
        config.set_float('-kws_threshold', 1e-10)

        p = pyaudio.PyAudio()
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)
        stream.start_stream()

        decoder = Decoder(config)
        decoder.start_utt()

        while True:
            buf = stream.read(1024)
            decoder.process_raw(buf, False, False)
            if decoder.hyp() is not None:
                logging.debug("Wake word recognized")
                speech_input = self.active_listen()
                if (speech_input != -1 and speech_input != -2
                        and speech_input != -3):
                    for name, command in self.commands.items():
                        if speech_input in name:
                            command()
                elif speech_input == -1:
                    self.speak("Sorry, I didn't catch that.")
                decoder.end_utt()
                decoder.start_utt()
                logging.debug("Listening for wakeword again")
コード例 #19
0
def transcribe(decoder: pocketsphinx.Decoder,
               audio_data: bytes,
               nbest: int = 0) -> Dict[str, Any]:
    """Transcribes audio data to text."""
    # Process data as an entire utterance
    start_time = time.time()
    decoder.start_utt()
    decoder.process_raw(audio_data, False, True)
    decoder.end_utt()
    end_time = time.time()

    logger.debug(f"Decoded audio in {end_time - start_time} second(s)")

    transcription = ""
    decode_seconds = end_time - start_time
    likelihood = 0.0
    score = 0

    hyp = decoder.hyp()
    if hyp is not None:
        likelihood = decoder.get_logmath().exp(hyp.prob)
        transcription = hyp.hypstr

    result = {
        "text": transcription,
        "transcribe_seconds": decode_seconds,
        "likelihood": likelihood,
    }

    if nbest > 0:
        # Include alternative transcriptions
        result["nbest"] = {
            nb.hypstr: nb.score
            for nb in decoder.nbest()[:nbest]
        }

    return result
コード例 #20
0
ファイル: voicergn_V2.py プロジェクト: casyazmon/mars_city
def record(listen_time):

    THRESHOLD=None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected=False
    for i in range(0, RATE / CHUNK * listen_time):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		if score < THRESHOLD:
			continue
                else:
                        detected=True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)


    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()


    return(speechRec.hyp().hypstr)
コード例 #21
0
ファイル: robot.py プロジェクト: Nandopolis/nayra_api
class TestVoice(Voice):
    # playback
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100
    CHUNK = 1024
    FILE_NAME = 'aux.wav'

    # recognition
    MODELDIR = "es-ES"
    GRAMMARDIR = "gram"

    # text to speech
    RATE = 150
    VOLUME = 0.9

    def __init__(self, file_name='aux.wav', raspi=False):
        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        self.tts = pyttsx3.init()
        self.tts.setProperty('rate', self.RATE)
        self.tts.setProperty('volume', self.VOLUME)
        self.tts.setProperty('voice', 'spanish-latin-am')

    def speak(self, phrase):
        self.tts.say(phrase)
        self.tts.runAndWait()

    def play(self, filename):
        extension = filename.split('.')[1]
        if extension == 'wav':
            wf = wave.open(filename, 'rb')
            stream = self.audio.open(format=self.audio.get_format_from_width(
                wf.getsampwidth()),
                                     channels=wf.getnchannels(),
                                     rate=wf.getframerate(),
                                     output=True)
            data = wf.readframes(self.CHUNK)

            # play
            while len(data) > 0:
                stream.write(data)
                data = wf.readframes(self.CHUNK)
            stream.stop_stream()
            stream.close()
        elif extension == 'mp3':
            playsound(filename)

    def listen(self, duration=3):
        # start recording
        if self.raspi:
            stream = self.audio.open(format=self.FORMAT,
                                     channels=1,
                                     rate=self.RATE,
                                     input_device_index=2,
                                     input=True,
                                     frames_per_buffer=self.CHUNK)
        else:
            stream = self.audio.open(format=self.FORMAT,
                                     channels=self.CHANNELS,
                                     rate=self.RATE,
                                     input_device_index=7,
                                     input=True,
                                     frames_per_buffer=self.CHUNK)

        frames = []

        for i in range(0, int(self.RATE / self.CHUNK * duration)):
            data = stream.read(self.CHUNK, exception_on_overflow=False)
            frames.append(data)

        stream.stop_stream()
        stream.close()

        wave_file = wave.open(self.FILE_NAME, 'wb')
        if self.raspi:
            wave_file.setnchannels(1)
        else:
            wave_file.setnchannels(self.CHANNELS)

        wave_file.setsampwidth(self.audio.get_sample_size(self.FORMAT))
        wave_file.setframerate(self.RATE)
        wave_file.writeframes(b''.join(frames))
        wave_file.close()

        with sr.AudioFile(self.FILE_NAME) as source:
            audio = self.r.record(source)

        raw_data = audio.get_raw_data(convert_rate=16000, convert_width=2)

        return raw_data

    def echo(self):
        self.play(self.FILE_NAME)

    def recognize(self):
        with sr.Microphone() as source:
            audio = self.r.listen(source)

        # raw_out = self.listen()
        try:
            self.decoder.start_utt()
            self.decoder.process_raw(audio.frame_data, False, True)
            self.decoder.end_utt()
            hyp = self.decoder.hyp()
            return hyp.hypstr

        except Exception:
            return None

    def loadGrammar(self, grammar):
        # delete(self.decoder)
        grammar_file = grammar + '.gram'
        c_string = os.path.join(self.GRAMMARDIR,
                                grammar_file)  #.encode('ascii')
        print(c_string)

        self.config.set_string('-jsgf', c_string)

        self.decoder.reinit(self.config)

    def close(self):
        self.audio.terminate()
コード例 #22
0
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os



from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])


コード例 #23
0
ファイル: kws.py プロジェクト: lpdink/Jamming
class KeywordSpotting(threading.Thread):
    def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length):
        threading.Thread.__init__(self)
        # 初始化配置
        self.daemon = True
        self.exit_flag = False
        self.in_fs = in_fs
        self.out_fs = out_fs
        self.mute_period_frames_count = int(in_fs * mute_period_length)
        self.kws_frames_count = int(in_fs * kws_frame_length)
        model_path = get_model_path()
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(model_path, 'en-us'))  # 声学模型路径
        # config.set_string('-lm',"./tests/7567.lm")
        config.set_string('-dict',
                          os.path.join(model_path,
                                       'cmudict-en-us.dict'))  # 字典路径
        config.set_string('-keyphrase', 'alexa')
        config.set_float('-kws_threshold', 1e-20)
        config.set_string('-logfn', './logs/tmp')  # INFO输出到其他位置
        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.start()

    def run(self):
        while not self.exit_flag:
            # 1.从input池中读取一定长度的数据。该过程可能被阻塞,直到池中存在足够多数据。
            processed_input_frames = global_var.processed_input_pool.get(
                self.kws_frames_count)

            # 2.如果keyword spotting检测出该数据段中存在关键字,则对该数据进行重采样,填充后,存入keyword池
            if self._kws(processed_input_frames):
                global_var.keyword_pool.put(
                    self._padding(
                        Resampler.resampling(processed_input_frames,
                                             self.in_fs, self.out_fs), 0,
                        self.mute_period_frames_count))

    def stop(self):
        self.exit_flag = True
        self.join()

    def _kws(self, frames):
        buf = frames.tobytes()
        if buf:
            self.decoder.process_raw(buf, False, False)
            if self.decoder.hyp() != None:
                print([(seg.word, seg.prob, seg.start_frame, seg.end_frame)
                       for seg in self.decoder.seg()])
                print("Detected keyphrase, restarting search")
                self.decoder.end_utt()
                self.decoder.start_utt()
                return True
        return False

    def _padding(self, frames, padding_value, padding_num):
        res = np.pad(frames, (0, padding_num),
                     'constant',
                     constant_values=(padding_value, padding_value))
        return res
コード例 #24
0
class SphinxWrapper(object):
    '''
For audio stream feeding is used `process_raw(...)` method. It also updates vad status: if voice found in signal.
Before signal is fed to decoder, it should be isntructed that new utterance is expected.
When Vad says that speech segment ended it should be called `stopListening(...)`, only then we could request hypothesis what was said. `calculateHypothesis(...)`

    '''

    #MODELDIR = "../models"
    #MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"
    MODELDIR = "../../lt-pocketsphinx-tutorial/impl/models"

    decoder = None
    config = None
    previousVadState = 0
    currentVadState = 0

    def __init__(self):
        '''
        Constructor
        '''

    def prepareDecoder(self, pGramma):
        '''
        Entry point where sphinx decoder is initialized or grammar updated
        '''
        if self.decoder is None:
            self.config = self.createConfig(pGramma);
            self.decoder = Decoder(self.config);
        else:
            self.updateGrammar(self.decoder, pGramma);

    def createConfig(self,pGramma):
        '''
        Create configuration with acoustic model path, grammar and dictionary
        '''
        print ("[createConfig]+++")
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
        config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
        #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
        config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
        print ("[createConfig]---")
        return config;

    def updateGrammar(self,pGramma):
        '''
        Update decoder language model from fsg file
        '''
        print ("[updateGrammar]+++" + pGramma)
        logmath = self.decoder.get_logmath();
        fsg = sphinxbase.FsgModel(os.path.join("../resource/", pGramma+'.fsg'), logmath, 7.5)
        self.decoder.set_fsg("default",fsg);
        self.decoder.set_search("default");
        print ("[updateGrammar]---")

    def startListening(self):
        """
        Instruct decoder that new utterace should be expected
        """
        self.decoder.start_utt(None)


    def stopListening(self):
        """
        Instruct decoder that new utterace should is not expected any more
        """
        self.decoder.end_utt()


    def process_raw(self, data):
        """
        Feed decoder with raw audio data. After data is updating refresh VAD state
        """
        #print("process_raw...\n")
        self.decoder.process_raw(data, False, False)
        self.previousVadState = self.currentVadState
        self.currentVadState = self.decoder.get_vad_state();
        #print("process_raw", self.currentVadState and True, self.previousVadState and True)

    def calculateHypothesis(self):
        return self.decoder.hyp();

    def calculateVadState(self):
        return self.decoder.get_vad_state;

    def isVoiceStarted(self):
        '''
        silence -> speech transition,
        '''
        return self.currentVadState and not self.previousVadState

    def isVoiceEnded(self):
        '''
        speech -> silence transition,
        '''
        return not self.currentVadState and self.previousVadState
コード例 #25
0
class PocketSphinxAsr(Asr):
    NAME = 'Pocketsphinx Asr'
    DEPENDENCIES = {
        'system': ['swig', 'libpulse-dev'],
        'pip': ['pocketsphinx==0.1.15']
    }

    LANGUAGE_PACK = {
        f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/%lang%.tar',
        f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/%lang%.lm.bin',
        f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/%lang%/cmudict-%lang%.dict'
    }

    def __init__(self):
        super().__init__()
        self._capableOfArbitraryCapture = True
        self._isOnlineASR = False
        self._decoder: Optional[Decoder] = None
        self._config = None

    def onStart(self):
        super().onStart()

        if not self.checkLanguage():
            self.downloadLanguage()

        self._config = Decoder.default_config()
        self._config.set_string(
            '-hmm',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}'
        )
        self._config.set_string(
            '-lm',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin'
        )
        self._config.set_string(
            '-dict',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict'
        )
        self._decoder = Decoder(self._config)

    def checkLanguage(self) -> bool:
        if not Path(
                self.Commons.rootDir(),
                f'venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}'
        ).exists():
            self.logInfo('Missing language model')
            return False

        return True

    def timeout(self):
        super().timeout()
        try:
            self._decoder.end_utt()
        except:
            # If this fails we don't care, at least we tried to close the utterance
            pass

    def downloadLanguage(self, forceLang: str = '') -> bool:
        lang = forceLang or self.LanguageManager.activeLanguageAndCountryCode
        self.logInfo(f'Downloading language model for "{lang}"')

        venv = Path(self.Commons.rootDir(),
                    'venv/lib/python3.7/site-packages/pocketsphinx/')
        for url in self.LANGUAGE_PACK:
            url = url.replace('%lang%', lang.lower())
            filename = Path(url).name
            download = Path(venv, 'model', filename)
            result = self.Commons.downloadFile(url=f'{url}?raw=true',
                                               dest=str(download))
            if not result:
                if forceLang:
                    return False
                else:
                    # TODO be universal
                    self.downloadLanguage(forceLang='en-US')
            else:
                if download.suffix == '.tar':
                    dest = Path(venv, 'model', lang.lower())

                    if dest.exists():
                        shutil.rmtree(dest)

                    tar = tarfile.open(str(download))
                    tar.extractall(str(dest))

                    download.unlink()

        self.logInfo('Downloaded and installed')
        return True

    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        result = None
        counter = 0
        with Stopwatch() as processingTime:
            with Recorder(self._timeout, session.user,
                          session.deviceUid) as recorder:
                self.ASRManager.addRecorder(session.deviceUid, recorder)
                self._recorder = recorder
                self._decoder.start_utt()
                inSpeech = False
                for chunk in recorder:
                    if self._timeout.isSet():
                        break

                    self._decoder.process_raw(chunk, False, False)
                    hypothesis = self._decoder.hyp()
                    if hypothesis:
                        counter += 1
                        if counter == 10:
                            self.partialTextCaptured(session,
                                                     hypothesis.hypstr,
                                                     hypothesis.prob,
                                                     processingTime.time)
                            counter = 0
                    if self._decoder.get_in_speech() != inSpeech:
                        inSpeech = self._decoder.get_in_speech()
                        if not inSpeech:
                            self._decoder.end_utt()
                            result = self._decoder.hyp() if self._decoder.hyp(
                            ) else None
                            break

                self.end()

        return ASRResult(
            text=result.hypstr.strip(),
            session=session,
            likelihood=self._decoder.hyp().prob,
            processingTime=processingTime.time) if result else None
コード例 #26
0
class SphinxWrapper(object):
    '''
For audio stream feeding is used `process_raw(...)` method. It also updates vad status: if voice found in signal.
Before signal is fed to decoder, it should be isntructed that new utterance is expected.
When Vad says that speech segment ended it should be called `stopListening(...)`, only then we could request hypothesis what was said. `calculateHypothesis(...)`

    '''

    #MODELDIR = "../models"
    #MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"
    MODELDIR = "../../lt-pocketsphinx-tutorial/impl/models"

    decoder = None
    config = None
    previousVadState = 0
    currentVadState = 0

    def __init__(self):
        '''
        Constructor
        '''

    def prepareDecoder(self, pGramma):
        '''
        Entry point where sphinx decoder is initialized or grammar updated
        '''
        if self.decoder is None:
            self.config = self.createConfig(pGramma)
            self.decoder = Decoder(self.config)
        else:
            self.updateGrammar(self.decoder, pGramma)

    def createConfig(self, pGramma):
        '''
        Create configuration with acoustic model path, grammar and dictionary
        '''
        print("[createConfig]+++")
        config = Decoder.default_config()
        config.set_string('-hmm',
                          os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
        config.set_string('-fsg', os.path.join("../resource/",
                                               pGramma + '.fsg'))
        #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
        config.set_string('-dict', os.path.join("../resource/",
                                                'service.dict'))
        print("[createConfig]---")
        return config

    def updateGrammar(self, pGramma):
        '''
        Update decoder language model from fsg file
        '''
        print("[updateGrammar]+++" + pGramma)
        logmath = self.decoder.get_logmath()
        fsg = sphinxbase.FsgModel(
            os.path.join("../resource/", pGramma + '.fsg'), logmath, 7.5)
        self.decoder.set_fsg("default", fsg)
        self.decoder.set_search("default")
        print("[updateGrammar]---")

    def startListening(self):
        """
        Instruct decoder that new utterace should be expected
        """
        self.decoder.start_utt(None)

    def stopListening(self):
        """
        Instruct decoder that new utterace should is not expected any more
        """
        self.decoder.end_utt()

    def process_raw(self, data):
        """
        Feed decoder with raw audio data. After data is updating refresh VAD state
        """
        #print("process_raw...\n")
        self.decoder.process_raw(data, False, False)
        self.previousVadState = self.currentVadState
        self.currentVadState = self.decoder.get_vad_state()
        #print("process_raw", self.currentVadState and True, self.previousVadState and True)

    def calculateHypothesis(self):
        return self.decoder.hyp()

    def calculateVadState(self):
        return self.decoder.get_vad_state

    def isVoiceStarted(self):
        '''
        silence -> speech transition,
        '''
        return self.currentVadState and not self.previousVadState

    def isVoiceEnded(self):
        '''
        speech -> silence transition,
        '''
        return not self.currentVadState and self.previousVadState
コード例 #27
0
    def run_decoder(self, stream):
        # Process audio chunk by chunk. On keyword detected process/restart
        decoder = Decoder(self.config)
        #decoder.set_search('keywords')
        decoder.start_utt()

        last_decode_str = None
        last_decode_time = perf_counter()
        # https://stackoverflow.com/a/47371315/8903959
        while True:
            buf = stream.read(1024)
            if buf:
                decoder.process_raw(buf, False, False)
            else:
                break

            _time_check = datetime.now().replace(second=0, microsecond=0)
            if _time_check in self.callback_time_dict:
                stream.stop_stream()
                decoder.end_utt()
                self.callback_time_dict[_time_check]()
                # Wait until the next minute
                time.sleep(60)
                stream.start_stream()
                print("Listening again\r")
                decoder.start_utt()
                just_restarted = True

            if decoder.hyp() is not None:
                if last_decode_str == decoder.hyp().hypstr:
                    reset_max = 5
                    if perf_counter() - last_decode_time > reset_max:
                        print(
                            f"No kwrds in the last {reset_max}s, resetting\r")
                        decoder.end_utt()
                        decoder.start_utt()
                    continue

                else:
                    last_decode_str = decoder.hyp().hypstr
                    last_decode_time = perf_counter()
                    print(decoder.hyp().hypstr + "\r")

                just_restarted = False
                split_words = decoder.hyp().hypstr.lower().split()
                for i in range(len(split_words)):
                    together = " ".join(split_words[i:])
                    if together in self.callbacks_dict:
                        stream.stop_stream()
                        decoder.end_utt()
                        callback = self.callbacks_dict[together]
                        #print([(seg.word, seg.prob) for seg in decoder.seg()])
                        print(f"\n{callback.__name__}")
                        try:
                            callback(decoder.hyp().hypstr)
                        except Exception as e:
                            print(e)
                        stream.start_stream()
                        print("Listening again\r")
                        decoder.start_utt()
                        just_restarted = True
                        break

                if not just_restarted and len(decoder.hyp().hypstr) > 25:
                    print("No keyword, restarting search\r")
                    decoder.end_utt()
                    decoder.start_utt()
コード例 #28
0
class PocketSphinxASR(ASR):
	NAME = 'Pocketsphinx ASR'
	DEPENDENCIES = {
		'system': [
			'swig',
			'libpulse-dev'
		],
		'pip'   : [
			'pocketsphinx==0.1.15'
		]
	}

	LANGUAGE_PACKS = {
		'en': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/cmudict-en-us.dict'
		],
		'fr': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/cmudict-fr-fr.dict'
		],
		'de': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/cmudict-de-de.dict'
		]
	}


	def __init__(self):
		super().__init__()
		self._capableOfArbitraryCapture = True
		self._isOnlineASR = False
		self._decoder: Optional[Decoder] = None
		self._config = None


	def onStart(self):
		super().onStart()

		if not self.checkLanguage():
			self.downloadLanguage()

		self._config = Decoder.default_config()
		self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}')
		self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin')
		self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict')
		self._decoder = Decoder(self._config)


	def checkLanguage(self) -> bool:
		if not Path(self.Commons.rootDir(), f'venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}').exists():
			self.logInfo('Missing language model')
			return False

		return True


	def timeout(self):
		super().timeout()
		try:
			self._decoder.end_utt()
		except:
			# If this fails we don't care, at least we tried to close the utterance
			pass


	def downloadLanguage(self) -> bool:
		self.logInfo(f'Downloading language model for "{self.LanguageManager.activeLanguage}"')

		venv = Path(self.Commons.rootDir(), 'venv/lib/python3.7/site-packages/pocketsphinx/')
		for url in self.LANGUAGE_PACKS[self.LanguageManager.activeLanguage]:
			filename = Path(url).name
			download = Path(venv, 'model', filename)
			self.Commons.downloadFile(url=f'{url}?raw=true', dest=str(download))

			if download.suffix == '.tar':
				dest = Path(venv, 'model', self.LanguageManager.activeLanguageAndCountryCode.lower())

				if dest.exists():
					shutil.rmtree(dest)

				tar = tarfile.open(str(download))
				tar.extractall(str(dest))

				download.unlink()

		self.logInfo('Downloaded and installed')
		return True


	def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
		super().decodeStream(session)

		result = None
		with Stopwatch() as processingTime:
			with Recorder(self._timeout) as recorder:
				self.ASRManager.addRecorder(session.siteId, recorder)
				self._decoder.start_utt()
				inSpeech = False
				for chunk in recorder:
					if self._timeout.isSet():
						break

					self._decoder.process_raw(chunk, False, False)
					if self._decoder.get_in_speech() != inSpeech:
						inSpeech = self._decoder.get_in_speech()
						if not inSpeech:
							self._decoder.end_utt()
							result = self._decoder.hyp() if self._decoder.hyp() else None
							break

				self.end(recorder, session)

		return ASRResult(
			text=result.hypstr.strip(),
			session=session,
			likelihood=self._decoder.hyp().prob,
			processingTime=processingTime.time
		) if result else None
コード例 #29
0
class VoiceIOHandler(JarvisIOHandler):
	
	def __init__(self):
		JarvisIOHandler.__init__(self)
		hmm = '/usr/local/share/pocketsphinx/model/en-us/en-us'
		dic ='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'
		lm ='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin'

		config = Decoder.default_config()
		config.set_string('-hmm',hmm)
		config.set_string('-lm',lm)
		config.set_string('-dict',dic)
		config.set_string('-logfn','/dev/null')

		self.decoder = Decoder(config)

		self.microphone = pyaudio.PyAudio()

		pyvona_config = open('configs/pyvona.txt')
		pvcfg = pyvona_config.readlines()
		pyvona_config.close()
		self.voice = pyvona.create_voice(pvcfg[0].strip(),pvcfg[1].strip())
		self.voice.region = 'us-west'
		self.voice.voice_name='Brian'
		self.voice.sentence_break = 200

		googleSTT_config = open('configs/GoogleSTT.txt')
		self.key = googleSTT_config.readlines()[0].strip()
		googleSTT_config.close()
		self.recognizer = sr.Recognizer()
		with sr.Microphone() as source:
			self.recognizer.adjust_for_ambient_noise(source)

	def waitForInput(self):
		if self._isLowPower:
			utt = ''
			stream = self.microphone.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)
			stream.start_stream()
			in_speech_bf = True
			self.decoder.start_utt()
			while True:
				buf = stream.read(1024)
				if buf:
					self.decoder.process_raw(buf, False, False)
					if self.decoder.get_in_speech() != in_speech_bf:
						in_speech_bf = self.decoder.get_in_speech()
						if not in_speech_bf:
							self.decoder.end_utt()
							try:
								if  self.decoder.hyp().hypstr != '':
									utt = self.decoder.hyp().hypstr
									break
							except AttributeError:
								pass
							self.decoder.start_utt()
			stream.stop_stream()
			stream.close()
			print utt
			return utt.lower().strip()
		
		else:
			with sr.Microphone() as source:
				print 'Listening'
				audio = self.recognizer.listen(source)

			print 'Recognizing...'
			try:
				rec = self.recognizer.recognize_google(audio,key=self.key).lower().strip()
				print rec
				return rec
			except sr.UnknownValueError:
				print("Google Speech Recognition could not understand audio")
				return 'CNU'
			except sr.RequestError as e:
				print("Could not request results from Google Speech Recognition service; {0}".format(e))
				return 'CNC'
			

	def output(self,text_to_output):
		self.voice.speak(text_to_output)
コード例 #30
0
def recognition_worker(audio_file,
                       queue, event, max_no_speech=120, debug=False,
                       hmm='/usr/local/share/pocketsphinx/model/en-us/en-us',
                       lm='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin',
                       cmudict='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'):
    '''
    Read audio from `audio_file and feed it to pocketsphinx.
    Put recognized text in `queue`. Shut down if `event` is set.
    If no speech is detected for `max_no_speech` seconds, set
    `event` and quit.
    '''
    from pocketsphinx import Decoder
    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', cmudict)
    if not debug:
        config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)
    in_speech_bf = True
    no_speech_timer = None
    now_in_speech = False
    decoder.start_utt()
    try:
        with open(audio_file, 'rb') as f:
            f.read(40) # read RIFF header
            # TODO: Probably should sanity check the audio format...
            while not event.is_set():
                buf = f.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                    now_in_speech = decoder.get_in_speech()
                    if debug and now_in_speech:
                        print('Found speech', file=sys.stderr)
                    if now_in_speech != in_speech_bf:
                        in_speech_bf = now_in_speech
                        if not in_speech_bf:
                            if debug:
                                print('Processing speech', file=sys.stderr)
                            # No speech, but there was speech before, so, process.
                            decoder.end_utt()
                            try:
                                speech = decoder.hyp().hypstr
                                if speech != '':
                                    if debug:
                                        print('Speech: ' + speech, file=sys.stderr)
                                    queue.put_nowait(speech)
                            except AttributeError:
                                pass
                            decoder.start_utt()
                        else:
                            # Got some speech, reset timer.
                            no_speech_timer = None
                else:
                    if debug:
                        print('No audio', file=sys.stderr)
                    # Wait a bit...
                    event.wait(0.1)
                if not now_in_speech:
                    if no_speech_timer is None:
                        no_speech_timer = datetime.datetime.now()
                    elif (datetime.datetime.now() - no_speech_timer).total_seconds() > max_no_speech:
                        if debug:
                            print('No speech, timing out', file=sys.stderr)
                        event.set()
    except KeyboardInterrupt:
        pass
コード例 #31
0
class TestVoice(Voice):
    # playback
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 44100
    CHUNK = 1024
    FILE_NAME = 'aux.wav'

    # recognition
    MODELDIR = "es-ES"
    GRAMMARDIR = "gram"

    # text to speech
    RATE = 150
    VOLUME = 0.9

    def __init__(self, file_name='aux.wav', raspi=False, local=True):

        ## load environment

        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.local = local

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        if self.local:
            self.tts = pyttsx3.init()
            self.tts.setProperty('rate', self.RATE)
            self.tts.setProperty('volume', self.VOLUME)
            self.tts.setProperty('voice', 'spanish-latin-am')
        else:
            # Instantiates a client
            self.tts_client = texttospeech.TextToSpeechClient()
            # Build the voice request, select the language code ("en-US") and the ssml
            # voice gender ("neutral")
            self.tts_voice = texttospeech.types.VoiceSelectionParams(
                language_code='es-ES',
                ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

            # Select the type of audio file you want returned
            self.tts_audio_config = texttospeech.types.AudioConfig(
                audio_encoding=texttospeech.enums.AudioEncoding.MP3)

    def speak(self, phrase):
        print('decir: ' + phrase)
        if self.local:
            self.tts.say(phrase)
            self.tts.runAndWait()
        else:
            # Set the text input to be synthesized
            synthesis_input = texttospeech.types.SynthesisInput(text=phrase)
            # Perform the text-to-speech request on the text input with the selected
            # voice parameters and audio file type
            response = self.tts_client.synthesize_speech(
                synthesis_input, self.tts_voice, self.tts_audio_config)
            audio_file = 'tts.mp3'
            # The response's audio_content is binary.
            with open(audio_file, 'wb') as out:
                out.write(response.audio_content)
            print('reproducir voz sintetica')
            command = '/usr/bin/mpg321 ' + audio_file
            print(command)
            os.system(command)

    def play(self, filename):
        print('reproduciendo archivo: ' + filename)
        extension = filename.split('.')[-1]
        if extension == 'wav':
            wf = wave.open(filename, 'rb')
            stream = self.audio.open(format=self.audio.get_format_from_width(
                wf.getsampwidth()),
                                     channels=wf.getnchannels(),
                                     rate=wf.getframerate(),
                                     output=True)
            data = wf.readframes(self.CHUNK)

            # play
            while len(data) > 0:
                stream.write(data)
                data = wf.readframes(self.CHUNK)
            stream.stop_stream()
            stream.close()
        elif extension == 'mp3':
            command = '/usr/bin/mpg321 ' + filename
            print(command)
            os.system(command)

    def listen(self, duration=3):
        # start recording
        if self.raspi:
            stream = self.audio.open(format=self.FORMAT,
                                     channels=1,
                                     rate=self.RATE,
                                     input_device_index=2,
                                     input=True,
                                     frames_per_buffer=self.CHUNK)
        else:
            stream = self.audio.open(format=self.FORMAT,
                                     channels=self.CHANNELS,
                                     rate=self.RATE,
                                     input_device_index=7,
                                     input=True,
                                     frames_per_buffer=self.CHUNK)

        frames = []

        for i in range(0, int(self.RATE / self.CHUNK * duration)):
            data = stream.read(self.CHUNK, exception_on_overflow=False)
            frames.append(data)

        stream.stop_stream()
        stream.close()

        wave_file = wave.open(self.FILE_NAME, 'wb')
        if self.raspi:
            wave_file.setnchannels(1)
        else:
            wave_file.setnchannels(self.CHANNELS)

        wave_file.setsampwidth(self.audio.get_sample_size(self.FORMAT))
        wave_file.setframerate(self.RATE)
        wave_file.writeframes(b''.join(frames))
        wave_file.close()

        with sr.AudioFile(self.FILE_NAME) as source:
            audio = self.r.record(source)

        raw_data = audio.get_raw_data(convert_rate=16000, convert_width=2)

        return raw_data

    def echo(self):
        self.play(self.FILE_NAME)

    def recognize(self):
        with sr.Microphone() as source:
            audio = self.r.listen(source)

        # raw_out = self.listen()
        try:
            self.decoder.start_utt()
            self.decoder.process_raw(audio.frame_data, False, True)
            self.decoder.end_utt()
            hyp = self.decoder.hyp()
            return hyp.hypstr

        except Exception:
            return None

    def loadGrammar(self, grammar):
        # delete(self.decoder)
        grammar_file = grammar + '.gram'
        c_string = os.path.join(self.GRAMMARDIR,
                                grammar_file)  #.encode('ascii')
        print(c_string)

        self.config.set_string('-jsgf', c_string)

        self.decoder.reinit(self.config)

    def close(self):
        self.audio.terminate()
コード例 #32
0
class VoiceService(object):
    audio_device = None
    buffer_size = 2048
    sampling_rate = 16000

    def __init__(self):
        config = get_decoder_config()
        self.decoder = Decoder(config)

        self.speech = pyttsx3.init()

        self.audio = sphinxbase.Ad(self.audio_device, self.sampling_rate)
        self.buffer = bytearray(self.buffer_size)

        self.default_search = self.decoder.get_search()
        self.in_speech = False
        self.max_history = 100
        self.phrases = []
        self.prompts = {}

        self.next_prompt_id = 1

        self.current_prompt = None
        self.prompt_queue = queue.Queue()

    def create_prompt(self,
                      message=None,
                      message_url=None,
                      search="enable",
                      timeout=15):
        """
        Create a new prompt and add it to the queue.

        Currently, only one type of prompt is supported. We play a message,
        then wait for someone to say a specific word (the search word) within
        the alloted amount of time.

        The status of the prompt can be retrieved by calling get_prompt with
        the appropriate id.

        timeout: prompt timeout in seconds, expected to be either None or numeric.
        """
        if timeout is not None:
            # Be forgiving of caller who may have passed timeout as a string.
            timeout = float(timeout)

        prompt = {
            "created_time": time.time(),
            "detected": False,
            "detected_time": None,
            "id": self.get_next_prompt_id(),
            "message": message,
            "message_url": message_url,
            "search": search,
            "search_started": False,
            "search_started_time": None,
            "played": False,
            "played_time": None,
            "timeout": timeout,
            "timed_out": False
        }
        self.prompts[str(prompt['id'])] = prompt
        self.prompt_queue.put(prompt)
        return prompt

    def get_next_prompt_id(self):
        """
        Get a unique ID for a prompt.
        """
        tmp = self.next_prompt_id
        self.next_prompt_id += 1
        return tmp

    def get_phrases(self):
        """
        Get the history of detected phrases.
        """
        return self.phrases

    def get_prompt(self, prompt_id):
        """
        Get information about a prompt.
        """
        return self.prompts[str(prompt_id)]

    def get_status(self):
        """
        Get the system status.
        """
        status = {
            "current_prompt": self.current_prompt,
            "in_speech": self.decoder.get_in_speech(),
            "queue_length": self.prompt_queue.qsize(),
            "search": self.decoder.get_search()
        }
        return status

    def play_prompt(self, prompt):
        prompt['played_time'] = time.time()

        if prompt.get("message_url", None) is not None:
            cmd = ["mplayer", "-ao", "pulse", prompt['message_url']]
            subprocess.call(cmd)
        elif prompt.get("message", None) is not None:
            self.speech.say(prompt['message'])
            self.speech.runAndWait()

        prompt['played'] = True

    def process_hypothesis(self, hypothesis):
        print("SPEECH {}".format(hypothesis.hypstr))

        phrase = {
            "search": self.decoder.get_search(),
            "time": time.time(),
            "text": hypothesis.hypstr
        }
        self.phrases.append(phrase)
        del self.phrases[:-self.max_history]

    def run_next_prompt(self):
        if self.prompt_queue.empty():
            self.create_prompt(None, search="paradrop", timeout=None)

        self.current_prompt = self.prompt_queue.get_nowait()
        self.decoder.set_search(self.current_prompt['search'])

        self.audio.stop_recording()
        self.play_prompt(self.current_prompt)
        self.audio.start_recording()

        self.current_prompt['search_started_time'] = time.time()
        self.current_prompt['search_started'] = True

    def detect_timeout(self):
        """
        Check if the current prompt has timed out.
        """
        if self.current_prompt is None:
            # No active prompt to timeout.
            return False

        if self.decoder.get_in_speech():
            # Defer timeout if decoder reports that speech is in progress.  A
            # person may be speaking the target phrase currently.
            return False

        if self.current_prompt['timeout'] is None:
            # If timeout is None, then only timeout when there is another item
            # in the queue.
            return not self.prompt_queue.empty()
        else:
            diff = time.time() - self.current_prompt['search_started_time']
            return diff >= self.current_prompt['timeout']

    def run(self):
        self.decoder.set_keyphrase("activate", "activate")
        self.decoder.set_keyphrase("allow", "allow")
        self.decoder.set_keyphrase("enable", "enable")
        self.decoder.set_keyphrase("paradrop", "para drop")

        self.audio.start_recording()
        while True:
            if self.current_prompt is None:
                self.run_next_prompt()
                self.decoder.start_utt()

            self.audio.readinto(self.buffer)
            self.decoder.process_raw(self.buffer, False, False)

            if self.in_speech and not self.decoder.get_in_speech():
                self.decoder.end_utt()

                hypothesis = self.decoder.hyp()
                if hypothesis is not None:
                    self.process_hypothesis(hypothesis)
                    self.current_prompt['detected'] = True
                    self.current_prompt['detected_time'] = time.time()
                    self.current_prompt = None
                else:
                    self.decoder.start_utt()

            if self.detect_timeout():
                self.decoder.end_utt()
                self.current_prompt['timed_out'] = True
                self.current_prompt = None

            self.in_speech = self.decoder.get_in_speech()
コード例 #33
0
class Wrapper():
    def __init__(self, **kwargs):
        signal.signal(signal.SIGINT, self.stop)

        model_path = get_model_path()

        kwargs = {
            x: os.path.expandvars(kwargs[x])
            if type(kwargs[x]) is str else kwargs[x]
            for x in kwargs
        }

        nodename = kwargs.pop('nodename')
        grammar_file = kwargs.pop('grammar_file', None)
        grammar_rule = kwargs.pop('grammar_rule', None)
        grammar_name = kwargs.pop('grammar_name', None)

        kwargs.pop('esiaf_input_topic')

        if kwargs.get('dic') is not None and kwargs.get('dict') is None:
            kwargs['dict'] = kwargs.pop('dic')

        if kwargs.get('hmm') is None:
            kwargs['hmm'] = os.path.join(model_path, 'en-us')

        if kwargs.get('lm') is None:
            kwargs['lm'] = os.path.join(model_path, 'en-us.lm.bin')

        if kwargs.get('dict') is None and kwargs.get('dic') is None:
            kwargs['dict'] = os.path.join(model_path, 'cmudict-en-us.dict')

        if kwargs.pop('verbose', False) is False:
            if sys.platform.startswith('win'):
                kwargs['logfn'] = 'nul'
            else:
                kwargs['logfn'] = '/dev/null'

        config = Decoder.default_config()

        print(kwargs)

        for key, value in kwargs.items():
            if isinstance(value, bool):
                config.set_boolean('-{}'.format(key), value)
            elif isinstance(value, int):
                config.set_int('-{}'.format(key), value)
            elif isinstance(value, float):
                config.set_float('-{}'.format(key), value)
            elif isinstance(value, str):
                config.set_string('-{}'.format(key), value)

        self.decoder = Decoder(config)

        if grammar_file and grammar_rule and grammar_name:
            jsgf = Jsgf(grammar_file)
            rule = jsgf.get_rule(grammar_name + '.' + grammar_rule)
            fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5)
            self.decoder.set_fsg(grammar_name, fsg)
            self.decoder.set_search(grammar_name)

        self.start = None
        self.finish = None

        self.speech_publisher = rospy.Publisher(nodename + '/' + 'SpeechRec',
                                                SpeechInfo,
                                                queue_size=10)

    def stop(self, *args, **kwargs):
        raise StopIteration

    def hypothesis(self):
        hyp = self.decoder.hyp()
        if hyp:
            return hyp.hypstr
        else:
            return ''

    def vad_finished_callback(self):
        self.decoder.end_utt()
        result = ''
        if self.decoder.hyp():
            result = self.hypothesis()
        rospy.loginfo('understood: \'' + str(result) + '\'')

        hypo = SpeechHypothesis()
        hypo.recognizedSpeech = result
        hypo.probability = 1.0

        time = RecordingTimeStamps()
        time.start = self.start
        time.finish = self.finish

        speechInfo = SpeechInfo()
        speechInfo.hypotheses = [hypo]
        speechInfo.duration = time

        self.speech_publisher.publish(speechInfo)

        self.start = None
        self.finish = None

    def add_audio_data(self, audio_data, recording_timestamps):
        _recording_timestamps = RecordingTimeStamps()
        msg_from_string(_recording_timestamps, recording_timestamps)
        rospy.loginfo('got audio!')
        if not self.start:
            self.start = _recording_timestamps.start
            self.decoder.start_utt()
        self.finish = _recording_timestamps.finish
        bytearray = audio_data.tobytes()
        self.decoder.process_raw(bytearray, False, False)
コード例 #34
0
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os

from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(
    open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
コード例 #35
0
class PocketsphinxEngine(WakeWordEnginePlugin):
    # Padding of silence when feeding to pocketsphinx
    _config = {
        'phonemes': 'HH EY . M AY K R AO F T',
        'threshold': '1e-90',
        'wake_word_length': 1.2
    }

    SILENCE_SEC = 0.01
    url = 'https://github.com/MatthewScholefield/pocketsphinx-models/raw/master/{lang}.tar.gz'

    def __init__(self, rt, on_activation: Callable):
        super().__init__(rt, on_activation)
        lang = rt.config['lang']
        self.hmm_folder = join(rt.paths.user_config, 'models', lang)
        self.rate, self.width = self.rec_config['sample_rate'], self.rec_config['sample_width']
        self.padding = b'\0' * int(self.rate * self.width * self.SILENCE_SEC)
        self.buffer = b''

        download_extract_tar(self.url.format(lang=lang), self.hmm_folder)

        config = Decoder.default_config()
        config.set_string('-hmm', self.hmm_folder)
        config.set_string('-dict', self._create_dict(self.wake_word, self.config['phonemes']))
        config.set_string('-keyphrase', self.wake_word)
        config.set_float('-kws_threshold', float(self.config['threshold']))
        config.set_float('-samprate', self.rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.ps = Decoder(config)

    @staticmethod
    def _create_dict(key_phrase, phonemes):
        fd, file_name = tempfile.mkstemp()
        with os.fdopen(fd, 'w') as f:
            f.write(key_phrase + ' ' + phonemes.replace(' . ', ' '))
        return file_name

    def _transcribe(self, raw_audio):
        self.ps.start_utt()
        self.ps.process_raw(raw_audio, False, False)
        self.ps.end_utt()
        return self.ps.hyp()

    def startup(self):
        self.buffer = b'\0' * int(self.width * self.rate * self.config['wake_word_length'])

    def shutdown(self):
        self.buffer = b''

    def pause_listening(self):
        pass

    def continue_listening(self):
        pass

    def update(self, raw_audio: bytes):
        self.buffer = self.buffer[len(raw_audio):] + raw_audio

        transcription = self._transcribe(self.buffer + self.padding)
        if transcription and self.wake_word in transcription.hypstr.lower():
            self.on_activation()