Beispiel #1
0
    def __init__(self, profile, hmm=None, dict=None, lm=None,
                 kws_threshold=None, keyphrase=None):
        self.profile = profile
        if keyphrase:
            if not dict:
                dict = fullpath('config/keyphrase.dic')
            if not lm:
                lm = fullpath('config/keyphrase.lm')
        else:
            if not dict:
                dict = fullpath('config/corpus.dic')
            if not lm:
                lm = fullpath('config/corpus.lm')

        if not hmm:
            hmm = 'share/pocketsphinx/model/en-us/en-us'

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm))
        config.set_string('-dict', dict)
        config.set_string('-lm', lm)
        config.set_string('-logfn', fullpath('config/sphinx.log'))

        if keyphrase:
            config.set_string('-keyphrase', keyphrase)
        if kws_threshold:
            config.set_float('-kws_threshold', kws_threshold)

        self.decoder = Decoder(config)

        self.transcribe = self.transcribe_darwin
        self.hyp = None
Beispiel #2
0
def record(listen_time):

    THRESHOLD=None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected=False
    for i in range(0, RATE / CHUNK * listen_time):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		if score < THRESHOLD:
			continue
                else:
                        detected=True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"


    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        result = speechRec.get_hyp()


    return(result[0])
 def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
     super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
     # Hotword module imports
     from pocketsphinx import Decoder
     # Hotword module params
     self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
     self.num_phonemes = len(self.phonemes.split())
     self.threshold = self.config.get("threshold", 1e-90)
     self.sample_rate = self.listener_config.get("sample_rate", 1600)
     dict_name = self.create_dict(self.key_phrase, self.phonemes)
     config = self.create_config(dict_name, Decoder.default_config())
     self.decoder = Decoder(config)
Beispiel #4
0
def best_sphinx_speech_result(pyaudio, wav_name, profile):
  if not have_sphinx_dictionary:
    if not profile.has_key("words"):
      raise(KeyError("Pass the possible words in in profile"))
    compile("sentences.txt", "dictionary.dic", "language_model.lm", profile["words"])
    global have_sphinx_dictionary
    have_sphinx_dictionary = True

  wav_file = file(wav_name, 'rb')
  speechRec = Decoder(
    hmm  = "/usr/local/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k",
    lm   = "language_model.lm",
    dict = "dictionary.dic"
  )

  speechRec.decode_raw(wav_file)
  results = speechRec.get_hyp()
  return results[0]
 def prepareDecoder(self, pGramma):
     '''
     Entry point where sphinx decoder is initialized or grammar updated
     '''
     if self.decoder is None:
         self.config = self.createConfig(pGramma);
         self.decoder = Decoder(self.config);
     else:
         self.updateGrammar(self.decoder, pGramma);
 def createConfig(self,pGramma):
     print ("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/liepa.cd_semi_200/'))
     config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
     print ("[createConfig]---")
     return config;
 def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000,
              lang="en-us"):
     self.lang = lang
     self.key_phrase = key_phrase
     self.sample_rate = sample_rate
     self.threshold = threshold
     self.phonemes = phonemes
     dict_name = self.create_dict(key_phrase, phonemes)
     self.decoder = Decoder(self.create_config(dict_name))
 def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
     super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
     # Hotword module imports
     from pocketsphinx import Decoder
     # Hotword module config
     module = self.config.get("module")
     if module != "pocketsphinx":
         LOG.warning(
             str(module) + " module does not match with "
                           "Hotword class pocketsphinx")
     # Hotword module params
     self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
     self.num_phonemes = len(self.phonemes.split())
     self.threshold = self.config.get("threshold", 1e-90)
     self.sample_rate = self.listener_config.get("sample_rate", 1600)
     dict_name = self.create_dict(key_phrase, self.phonemes)
     config = self.create_config(dict_name, Decoder.default_config())
     self.decoder = Decoder(config)
class PocketsphinxHotWord(HotWordEngine):
    """Wake word engine using PocketSphinx.

    PocketSphinx is very general purpose but has a somewhat high error rate.
    The key advantage is to be able to specify the wake word with phonemes.
    """
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super().__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(self.key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        """If language config doesn't exist then
        we use default language (english) config as a fallback.
        """
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error(
                'PocketSphinx model not found at "{}". '.format(model_file) +
                'Falling back to en-us model')
            model_file = join(RECOGNIZER_DIR, 'model', 'en-us', 'hmm')
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
Beispiel #11
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', self.threshold)
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
Beispiel #12
0
    def __init__(self):
        config = get_decoder_config()
        self.decoder = Decoder(config)

        self.speech = pyttsx3.init()

        self.audio = sphinxbase.Ad(self.audio_device, self.sampling_rate)
        self.buffer = bytearray(self.buffer_size)

        self.default_search = self.decoder.get_search()
        self.in_speech = False
        self.max_history = 100
        self.phrases = []
        self.prompts = {}

        self.next_prompt_id = 1

        self.current_prompt = None
        self.prompt_queue = queue.Queue()
Beispiel #13
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(MODELDIR, 'en-us'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     return config
    def __init__(self, rt, on_activation: Callable):
        super().__init__(rt, on_activation)
        lang = rt.config['lang']
        self.hmm_folder = join(rt.paths.user_config, 'models', lang)
        self.rate, self.width = self.rec_config['sample_rate'], self.rec_config['sample_width']
        self.padding = b'\0' * int(self.rate * self.width * self.SILENCE_SEC)
        self.buffer = b''

        download_extract_tar(self.url.format(lang=lang), self.hmm_folder)

        config = Decoder.default_config()
        config.set_string('-hmm', self.hmm_folder)
        config.set_string('-dict', self._create_dict(self.wake_word, self.config['phonemes']))
        config.set_string('-keyphrase', self.wake_word)
        config.set_float('-kws_threshold', float(self.config['threshold']))
        config.set_float('-samprate', self.rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.ps = Decoder(config)
    def onStart(self):
        super().onStart()

        if not self.checkLanguage():
            self.downloadLanguage()

        self._config = Decoder.default_config()
        self._config.set_string(
            '-hmm',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}'
        )
        self._config.set_string(
            '-lm',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin'
        )
        self._config.set_string(
            '-dict',
            f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict'
        )
        self._decoder = Decoder(self._config)
Beispiel #16
0
 def create_config(self, dict_name):
     config = Decoder.default_config()
     config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
     config.set_string('-dict', dict_name)
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float(self.threshold))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn',
                       '/home/sg/mycroft-core/scripts/logs/pocket.log')
     return config
Beispiel #17
0
    def __init__(self, file_name='aux.wav', raspi=False, local=True):

        ## load environment

        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.local = local

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        if self.local:
            self.tts = pyttsx3.init()
            self.tts.setProperty('rate', self.RATE)
            self.tts.setProperty('volume', self.VOLUME)
            self.tts.setProperty('voice', 'spanish-latin-am')
        else:
            # Instantiates a client
            self.tts_client = texttospeech.TextToSpeechClient()
            # Build the voice request, select the language code ("en-US") and the ssml
            # voice gender ("neutral")
            self.tts_voice = texttospeech.types.VoiceSelectionParams(
                language_code='es-ES',
                ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE)

            # Select the type of audio file you want returned
            self.tts_audio_config = texttospeech.types.AudioConfig(
                audio_encoding=texttospeech.enums.AudioEncoding.MP3)
Beispiel #18
0
class LocalRecognizer(object):
    def __init__(self,
                 key_phrase,
                 phonemes,
                 threshold,
                 sample_rate=16000,
                 lang="en-us"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        dict_name = self.create_dict(key_phrase, phonemes)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm',
                          os.path.join(BASEDIR, 'model', self.lang, 'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn',
                          '/home/sg/mycroft-core/scripts/logs/pocket.log')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        hyp = self.transcribe(frame_data)
        #if hyp is not None:
        #print("hyp is not null")
        return hyp and self.key_phrase in hyp.hypstr.lower()
 def createConfig(self,pGramma):
     '''
     Create configuration with acoustic model path, grammar and dictionary
     '''
     print ("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
     config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
     print ("[createConfig]---")
     return config;
class CMUSphinxRecognizer(BaseRecognizer):
    def __init__(self):
        config = Decoder.default_config()
        config.set_string('-hmm', SPHINX_HMM)
        config.set_string('-lm', SPHINX_LM)
        config.set_string('-dict', SPHINX_DICT)
        self.decoder = Decoder(config)

    def recognize(self, raw_audio):
        file_path = self.__save_file(raw_audio)
        with open(file_path, 'r') as wav_fp:
            self.decoder.decode_raw(wav_fp)
            hypothesis = self.decoder.hyp()
            return hypothesis.hypstr, hypothesis.best_score, hypothesis.prob

    @staticmethod
    def __save_file(data):
        tmp_fp = NamedTemporaryFile(delete=False)
        tmp_fp.write(data)
        tmp_fp.close()
        return tmp_fp.name
Beispiel #21
0
class PocketsphinxRecognizer(LocalRecognizer):
    def __init__(self,
                 key_phrase,
                 phonemes,
                 threshold,
                 sample_rate=16000,
                 lang="en-us"):
        self.lang = str(lang)
        self.key_phrase = str(key_phrase)
        print("####key_phrase-->", key_phrase)
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        print("####phonemes -->", phonemes)
        dict_name = self.create_dict(key_phrase, phonemes)
        print("####dict_name --->", dict_name)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')

        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm', join(BASEDIR, 'model', self.lang, 'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn',
                          '/home/sg/mycroft-core/scripts/logs/pocket.log')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        #sr = r.recognize_sphinx()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        #LOG.error("transcribed ---> +"+str(self.decoder.hyp()))
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        #LOG.info("hyp is ---->"+hyp))
        return hyp and self.key_phrase in hyp.hypstr.lower()
class PocketsphinxHotWord(HotWordEngine):
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module config
        module = self.config.get("module")
        if module != "pocketsphinx":
            LOG.warning(
                str(module) + " module does not match with "
                              "Hotword class pocketsphinx")
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error('PocketSphinx model not found at ' + str(model_file))
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
Beispiel #23
0
def record(listen_time):

    THRESHOLD = None
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
        THRESHOLD = fetchThreshold()
        print THRESHOLD

    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print "* recording"
    frames = []
    detected = False
    for i in range(0, RATE / CHUNK * listen_time):
        data = stream.read(CHUNK)
        frames.append(data)
        score = getScore(data)
        if score < THRESHOLD:
            continue
        else:
            detected = True
    if not detected:
        print "nothing detected"
        return("")

    print "* done recording"
    # stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir + "/livewav.wav"
    config = Decoder.default_config()
    config.set_string('-hmm', hmdir)
    config.set_string('-lm', lmdir)
    config.set_string('-dict', dictd)
    config.set_string('-logfn', '/dev/null')

    speechRec = Decoder(config)

    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        #result = speechRec.get_hyp()

    return(speechRec.hyp().hypstr)
Beispiel #24
0
class PocketsphinxHotWord(HotWordEngine):
    def __init__(self, key_phrase="hey mycroft", config=None, lang="en-us"):
        super(PocketsphinxHotWord, self).__init__(key_phrase, config, lang)
        # Hotword module imports
        from pocketsphinx import Decoder
        # Hotword module config
        module = self.config.get("module")
        if module != "pocketsphinx":
            LOG.warning(
                str(module) + " module does not match with "
                "Hotword class pocketsphinx")
        # Hotword module params
        self.phonemes = self.config.get("phonemes", "HH EY . M AY K R AO F T")
        self.num_phonemes = len(self.phonemes.split())
        self.threshold = self.config.get("threshold", 1e-90)
        self.sample_rate = self.listener_config.get("sample_rate", 1600)
        dict_name = self.create_dict(self.key_phrase, self.phonemes)
        config = self.create_config(dict_name, Decoder.default_config())
        self.decoder = Decoder(config)

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name, config):
        model_file = join(RECOGNIZER_DIR, 'model', self.lang, 'hmm')
        if not exists(model_file):
            LOG.error('PocketSphinx model not found at ' + str(model_file))
        config.set_string('-hmm', model_file)
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self.transcribe(frame_data)
        return hyp and self.key_phrase in hyp.hypstr.lower()
class PocketsphinxListener:
    """Pocketsphinx listener implementation used for comparison with Precise"""
    def __init__(self,
                 key_phrase,
                 dict_file,
                 hmm_folder,
                 threshold=1e-90,
                 chunk_size=-1):
        from pocketsphinx import Decoder
        config = Decoder.default_config()
        config.set_string('-hmm', hmm_folder)
        config.set_string('-dict', dict_file)
        config.set_string('-keyphrase', key_phrase)
        config.set_float('-kws_threshold', float(threshold))
        config.set_float('-samprate', 16000)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        self.key_phrase = key_phrase
        self.buffer = b'\0' * pr.sample_depth * pr.buffer_samples
        self.pr = pr
        self.read_size = -1 if chunk_size == -1 else pr.sample_depth * chunk_size

        try:
            self.decoder = Decoder(config)
        except RuntimeError:
            options = dict(key_phrase=key_phrase,
                           dict_file=dict_file,
                           hmm_folder=hmm_folder,
                           threshold=threshold)
            raise RuntimeError('Invalid Pocketsphinx options: ' + str(options))

    def _transcribe(self, byte_data):
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        return self.decoder.hyp()

    def found_wake_word(self, frame_data):
        hyp = self._transcribe(frame_data + b'\0' * int(2 * 16000 * 0.01))
        return bool(hyp and self.key_phrase in hyp.hypstr.lower())

    def update(self, stream: Union[BinaryIO, np.ndarray, bytes]) -> float:
        if isinstance(stream, np.ndarray):
            chunk = audio_to_buffer(stream)
        else:
            if isinstance(stream, (bytes, bytearray)):
                chunk = stream
            else:
                chunk = stream.read(self.read_size)
            if len(chunk) == 0:
                raise EOFError
        self.buffer = self.buffer[len(chunk):] + chunk
        return float(self.found_wake_word(self.buffer))
Beispiel #26
0
    def __init__(self, file_name='aux.wav', raspi=False):
        self.FILE_NAME = file_name
        self.audio = pyaudio.PyAudio()
        self.raspi = raspi

        self.config = Decoder.default_config()
        self.config.set_string('-hmm',
                               os.path.join(self.MODELDIR, 'acoustic-model'))
        self.config.set_string(
            '-dict',
            os.path.join(self.MODELDIR, 'pronounciation-dictionary.dict'))
        self.config.set_string('-logfn', os.devnull)
        self.decoder = Decoder(self.config)
        self.r = sr.Recognizer()
        print("adjunting...")
        with sr.Microphone() as source:
            self.r.adjust_for_ambient_noise(source)

        # tts
        self.tts = pyttsx3.init()
        self.tts.setProperty('rate', self.RATE)
        self.tts.setProperty('volume', self.VOLUME)
        self.tts.setProperty('voice', 'spanish-latin-am')
Beispiel #27
0
    def __init__(self, in_fs, out_fs, mute_period_length, kws_frame_length):
        threading.Thread.__init__(self)
        # 初始化配置
        self.daemon = True
        self.exit_flag = False
        self.in_fs = in_fs
        self.out_fs = out_fs
        self.mute_period_frames_count = int(in_fs * mute_period_length)
        self.kws_frames_count = int(in_fs * kws_frame_length)
        model_path = get_model_path()
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(model_path, 'en-us'))  # 声学模型路径
        # config.set_string('-lm',"./tests/7567.lm")
        config.set_string('-dict',
                          os.path.join(model_path,
                                       'cmudict-en-us.dict'))  # 字典路径
        config.set_string('-keyphrase', 'alexa')
        config.set_float('-kws_threshold', 1e-20)
        config.set_string('-logfn', './logs/tmp')  # INFO输出到其他位置
        self.decoder = Decoder(config)
        self.decoder.start_utt()

        self.start()
Beispiel #28
0
    def __init__(self):
        '''
        Constructor
        '''
        print ("[__init__]+++")

        # Create a decoder with certain model
        self.ai = Artificialintelligence()
        self.config = self.createConfig("code");
        self.decoder = Decoder(self.config);
        print ("[__init__] created decoder")
        #self.updateGrammar(self.decoder, "confirmation");

        print ("[__init__]---")

        p = pyaudio.PyAudio()

        self.stream = p.open(format=self.FORMAT,
                channels=self.CHANNELS,
                rate=self.RATE,
                input=True,
                frames_per_buffer=self.CHUNK)
        #Indicate listening for next utterance
        print ("READY....")
Beispiel #29
0
 def createConfig(self, pGramma):
     '''
     Create configuration with acoustic model path, grammar and dictionary
     '''
     print("[createConfig]+++")
     config = Decoder.default_config()
     config.set_string('-hmm',
                       os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
     config.set_string('-fsg', os.path.join("../resource/",
                                            pGramma + '.fsg'))
     #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
     config.set_string('-dict', os.path.join("../resource/",
                                             'service.dict'))
     print("[createConfig]---")
     return config
Beispiel #30
0
def init():
	# Create a decoder with certain model
	config = DefaultConfig()
	# config.set_string('-logfn', settings.POCKET_LOG)
	config.set_string('-hmm', settings.POCKET_HMM_ACOUSTIC_MODEL)
	config.set_string('-lm', settings.POCKET_LANGUAGE_MODEL)
	config.set_string('-dict', settings.POCKET_DICTIONARY)
	# config.set_string('-kws',   settings.POCKET_KEYPHRASES)

	# Decode streaming data
	global decoder, p
	decoder = Decoder(config)
	p = pyaudio.PyAudio()

	# Set up speech recognition recogniser
	global r
	r = speech_recognition.Recognizer()
def transcribe(decoder: pocketsphinx.Decoder,
               audio_data: bytes,
               nbest: int = 0) -> Dict[str, Any]:
    """Transcribes audio data to text."""
    # Process data as an entire utterance
    start_time = time.time()
    decoder.start_utt()
    decoder.process_raw(audio_data, False, True)
    decoder.end_utt()
    end_time = time.time()

    logger.debug(f"Decoded audio in {end_time - start_time} second(s)")

    transcription = ""
    decode_seconds = end_time - start_time
    likelihood = 0.0
    score = 0

    hyp = decoder.hyp()
    if hyp is not None:
        likelihood = decoder.get_logmath().exp(hyp.prob)
        transcription = hyp.hypstr

    result = {
        "text": transcription,
        "transcribe_seconds": decode_seconds,
        "likelihood": likelihood,
    }

    if nbest > 0:
        # Include alternative transcriptions
        result["nbest"] = {
            nb.hypstr: nb.score
            for nb in decoder.nbest()[:nbest]
        }

    return result
class LocalRecognizer(object):
    def __init__(self, key_phrase, phonemes, threshold, sample_rate=16000,
                 lang="en-us"):
        self.lang = lang
        self.key_phrase = key_phrase
        self.sample_rate = sample_rate
        self.threshold = threshold
        self.phonemes = phonemes
        dict_name = self.create_dict(key_phrase, phonemes)
        self.decoder = Decoder(self.create_config(dict_name))

    def create_dict(self, key_phrase, phonemes):
        (fd, file_name) = tempfile.mkstemp()
        words = key_phrase.split()
        phoneme_groups = phonemes.split('.')
        with os.fdopen(fd, 'w') as f:
            for word, phoneme in zip(words, phoneme_groups):
                f.write(word + ' ' + phoneme + '\n')
        return file_name

    def create_config(self, dict_name):
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                               'hmm'))
        config.set_string('-dict', dict_name)
        config.set_string('-keyphrase', self.key_phrase)
        config.set_float('-kws_threshold', float(self.threshold))
        config.set_float('-samprate', self.sample_rate)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', '/dev/null')
        return config

    def transcribe(self, byte_data, metrics=None):
        start = time.time()
        self.decoder.start_utt()
        self.decoder.process_raw(byte_data, False, False)
        self.decoder.end_utt()
        if metrics:
            metrics.timer("mycroft.stt.local.time_s", time.time() - start)
        return self.decoder.hyp()

    def is_recognized(self, byte_data, metrics):
        hyp = self.transcribe(byte_data, metrics)
        return hyp and self.key_phrase in hyp.hypstr.lower()

    def found_wake_word(self, hypothesis):
        return hypothesis and self.key_phrase in hypothesis.hypstr.lower()
Beispiel #33
0
def init():
    # Create a decoder with certain model
    config = DefaultConfig()
    config.set_string('-logfn', settings.POCKETSPHINX_LOG)
    #config.set_string('-hmm',   settings.ACOUSTIC_MODEL)
    config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
    config.set_string('-dict',
                      os.path.join(get_model_path(), 'cmudict-en-us.dict'))
    #config.set_string('-lm',    settings.LANGUAGE_MODEL)
    config.set_string('-kws', settings.KEYPHRASES)
    #config.set_string('-dict',  settings.POCKET_DICT)

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Beispiel #34
0
def get_decoder_config():
    """
    Get a populated configuration object for the pocketsphinx Decoder.
    """
    model_dir = get_model_path()

    config = Decoder.default_config()
    config.set_string("-dict", os.path.join(model_dir, "cmudict-en-us.dict"))
    config.set_string("-fdict", os.path.join(model_dir, "en-us/noisedict"))
    config.set_string("-featparams", os.path.join(model_dir, "en-us/feat.params"))
    config.set_string("-hmm", os.path.join(model_dir, "en-us"))
    config.set_string("-lm", os.path.join(model_dir, "en-us.lm.bin"))
    config.set_string("-mdef", os.path.join(model_dir, "en-us/mdef"))
    config.set_string("-mean", os.path.join(model_dir, "en-us/means"))
    config.set_string("-sendump", os.path.join(model_dir, "en-us/sendump"))
    config.set_string("-tmat", os.path.join(model_dir, "en-us/transition_matrices"))
    config.set_string("-var", os.path.join(model_dir, "en-us/variances"))

    return config
    def __init__(self, config=Decoder.default_config()):
        assert isinstance(config, Config)

        search_args_set = search_arguments_set(config)

        if len(search_args_set) == 0:
            # Use the language model by default if nothing else is set
            set_lm_path(config)
        elif len(search_args_set) > 1:
            raise ConfigError(
                "more than one search argument was set in the Config "
                "object")

        # Set the required config paths if they aren't already set
        if not (config.get_string("-hmm") and config.get_string("-dict")):
            set_hmm_and_dict_paths(config)

        self._speech_start_callback = None
        self._hypothesis_callback = None
        self._utterance_state = self._UTT_ENDED

        super(PocketSphinx, self).__init__(config)
def load_decoder(myid, model_config, out):
    # Create a decoder with certain model
    pocketsphinx_config = DefaultConfig()
    model_name = model_config.sections()[0]
    hmm = model_config[model_name]['hmm']
    dict = model_config[model_name]['dict']
    lm = model_config[model_name]['lm']
    # logfn = model_config[model_name]['log']
    logfn = '{}_{}.log'.format(out, myid)
    if not os.path.exists(hmm):
        print('ERROR: {} does not exist'.format(hmm))
        sys.exit(-2)
    if not os.path.exists(lm):
        print('ERROR: {} does not exist'.format(lm))
        sys.exit(-4)
    if not os.path.exists(dict):
        print('ERROR: {} does not exist'.format(dict))
        sys.exit(-5)
    pocketsphinx_config.set_string('-hmm', hmm)
    pocketsphinx_config.set_string('-lm', lm)
    pocketsphinx_config.set_string('-dict', dict)
    pocketsphinx_config.set_string('-logfn', logfn)
    decoder_engine = Decoder(pocketsphinx_config)
    return decoder_engine
class PocketSphinxASR(ASR):
	NAME = 'Pocketsphinx ASR'
	DEPENDENCIES = {
		'system': [
			'swig',
			'libpulse-dev'
		],
		'pip'   : [
			'pocketsphinx==0.1.15'
		]
	}

	LANGUAGE_PACKS = {
		'en': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/en-us.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/en-us/cmudict-en-us.dict'
		],
		'fr': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/fr-fr.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/fr-fr/cmudict-fr-fr.dict'
		],
		'de': [
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.tar',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/de-de.lm.bin',
			f'{constants.GITHUB_URL}/cmusphinx-models/blob/master/de-de/cmudict-de-de.dict'
		]
	}


	def __init__(self):
		super().__init__()
		self._capableOfArbitraryCapture = True
		self._isOnlineASR = False
		self._decoder: Optional[Decoder] = None
		self._config = None


	def onStart(self):
		super().onStart()

		if not self.checkLanguage():
			self.downloadLanguage()

		self._config = Decoder.default_config()
		self._config.set_string('-hmm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}')
		self._config.set_string('-lm', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}.lm.bin')
		self._config.set_string('-dict', f'{self.Commons.rootDir()}/venv/lib/python3.7/site-packages/pocketsphinx/model/cmudict-{self.LanguageManager.activeLanguageAndCountryCode.lower()}.dict')
		self._decoder = Decoder(self._config)


	def checkLanguage(self) -> bool:
		if not Path(self.Commons.rootDir(), f'venv/lib/python3.7/site-packages/pocketsphinx/model/{self.LanguageManager.activeLanguageAndCountryCode.lower()}').exists():
			self.logInfo('Missing language model')
			return False

		return True


	def timeout(self):
		super().timeout()
		try:
			self._decoder.end_utt()
		except:
			# If this fails we don't care, at least we tried to close the utterance
			pass


	def downloadLanguage(self) -> bool:
		self.logInfo(f'Downloading language model for "{self.LanguageManager.activeLanguage}"')

		venv = Path(self.Commons.rootDir(), 'venv/lib/python3.7/site-packages/pocketsphinx/')
		for url in self.LANGUAGE_PACKS[self.LanguageManager.activeLanguage]:
			filename = Path(url).name
			download = Path(venv, 'model', filename)
			self.Commons.downloadFile(url=f'{url}?raw=true', dest=str(download))

			if download.suffix == '.tar':
				dest = Path(venv, 'model', self.LanguageManager.activeLanguageAndCountryCode.lower())

				if dest.exists():
					shutil.rmtree(dest)

				tar = tarfile.open(str(download))
				tar.extractall(str(dest))

				download.unlink()

		self.logInfo('Downloaded and installed')
		return True


	def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
		super().decodeStream(session)

		result = None
		with Stopwatch() as processingTime:
			with Recorder(self._timeout) as recorder:
				self.ASRManager.addRecorder(session.siteId, recorder)
				self._decoder.start_utt()
				inSpeech = False
				for chunk in recorder:
					if self._timeout.isSet():
						break

					self._decoder.process_raw(chunk, False, False)
					if self._decoder.get_in_speech() != inSpeech:
						inSpeech = self._decoder.get_in_speech()
						if not inSpeech:
							self._decoder.end_utt()
							result = self._decoder.hyp() if self._decoder.hyp() else None
							break

				self.end(recorder, session)

		return ASRResult(
			text=result.hypstr.strip(),
			session=session,
			likelihood=self._decoder.hyp().prob,
			processingTime=processingTime.time
		) if result else None
import os
from os import path
from pocketsphinx import pocketsphinx
from pocketsphinx import Decoder
import speech_recognition as sr
from time import sleep

MODELDIR = "BIOMEC_DICTIONARY"

config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'acoustic-model'))
config.set_string('-lm', path.join(MODELDIR, '4177.lm'))
config.set_string('-dict', path.join(MODELDIR, '4177.dict'))
config.set_string("-logfn", os.devnull)
decoder = Decoder(config)

commands = ['DOWN', 'GO', 'LEFT', 'RIGHT', 'STOP', 'UP']


def getCommand(phrase, commands=commands):
    for i in range(len(commands)):
        if phrase.find(commands[i]) != -1:
            return (commands[i])


r = sr.Recognizer()
r.energy_threshold = 1000  # minimum audio energy to consider for recording
r.pause_threshold = 0.25  # seconds of non-speaking audio before a phrase is cons$
r.phrase_threshold = 0.15  # minimum seconds of speaking audio before we conside$
r.non_speaking_duration = 0.25  # seconds of non-speaking audio to keep on both $
with sr.Microphone() as source:
Beispiel #39
0
def recognition_worker(audio_file,
                       queue, event, max_no_speech=120, debug=False,
                       hmm='/usr/local/share/pocketsphinx/model/en-us/en-us',
                       lm='/usr/local/share/pocketsphinx/model/en-us/en-us.lm.bin',
                       cmudict='/usr/local/share/pocketsphinx/model/en-us/cmudict-en-us.dict'):
    '''
    Read audio from `audio_file and feed it to pocketsphinx.
    Put recognized text in `queue`. Shut down if `event` is set.
    If no speech is detected for `max_no_speech` seconds, set
    `event` and quit.
    '''
    from pocketsphinx import Decoder
    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', cmudict)
    if not debug:
        config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)
    in_speech_bf = True
    no_speech_timer = None
    now_in_speech = False
    decoder.start_utt()
    try:
        with open(audio_file, 'rb') as f:
            f.read(40) # read RIFF header
            # TODO: Probably should sanity check the audio format...
            while not event.is_set():
                buf = f.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                    now_in_speech = decoder.get_in_speech()
                    if debug and now_in_speech:
                        print('Found speech', file=sys.stderr)
                    if now_in_speech != in_speech_bf:
                        in_speech_bf = now_in_speech
                        if not in_speech_bf:
                            if debug:
                                print('Processing speech', file=sys.stderr)
                            # No speech, but there was speech before, so, process.
                            decoder.end_utt()
                            try:
                                speech = decoder.hyp().hypstr
                                if speech != '':
                                    if debug:
                                        print('Speech: ' + speech, file=sys.stderr)
                                    queue.put_nowait(speech)
                            except AttributeError:
                                pass
                            decoder.start_utt()
                        else:
                            # Got some speech, reset timer.
                            no_speech_timer = None
                else:
                    if debug:
                        print('No audio', file=sys.stderr)
                    # Wait a bit...
                    event.wait(0.1)
                if not now_in_speech:
                    if no_speech_timer is None:
                        no_speech_timer = datetime.datetime.now()
                    elif (datetime.datetime.now() - no_speech_timer).total_seconds() > max_no_speech:
                        if debug:
                            print('No speech, timing out', file=sys.stderr)
                        event.set()
    except KeyboardInterrupt:
        pass
 def __init__(self):
     config = Decoder.default_config()
     config.set_string('-hmm', SPHINX_HMM)
     config.set_string('-lm', SPHINX_LM)
     config.set_string('-dict', SPHINX_DICT)
     self.decoder = Decoder(config)
Beispiel #41
0
'''
Created on Dec 29, 2013

@author: Mindaugas Greibus
'''

from os import path
from pocketsphinx import Decoder


#from sphinxbase import *
#MODELDIR = "../models"
MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
#print 'Best hypothesis segments: ', [seg.word for seg in decoder.seg()]

from os import path
import pyaudio


CHUNK = 4096
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000



MODELDIR = "../models"
#MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)


p = pyaudio.PyAudio()

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)
#Indicate listening for next utterance
print ("READY....")
Beispiel #43
0
def decodepassive():
	speechRec = Decoder(hmm = hmdir, lm = lmdir, dict = dictd)
	with open(passivewav, 'rb') as passivewav:
		speechRec.decode_raw(passivewav)
        result = speechRec.get_hyp()
        return(result[0])
Beispiel #44
0
from os import environ, path

from sphinxbase import Config
from pocketsphinx import Decoder

MODELDIR = "pocketsphinx/model"
DATADIR = "pocketsphinx/test/data"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', path.join(MODELDIR, 'en-us/en-us'))
config.set_string('-lm', path.join(MODELDIR, 'en-us/en-us.lm.bin'))
config.set_string('-dict', path.join(MODELDIR, 'en-us/cmudict-en-us.dict'))
decoder = Decoder(config)

# Decode streaming data.
decoder = Decoder(config)
decoder.start_utt()
stream = open(path.join(DATADIR, 'goforward.raw'), 'rb')
while True:
  buf = stream.read(1024)
  if buf:
    decoder.process_raw(buf, False, False)
  else:
    break
decoder.end_utt()
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
Beispiel #45
0
class stt:
    def __init__(self, profile, hmm=None, dict=None, lm=None,
                 kws_threshold=None, keyphrase=None):
        self.profile = profile
        if keyphrase:
            if not dict:
                dict = fullpath('config/keyphrase.dic')
            if not lm:
                lm = fullpath('config/keyphrase.lm')
        else:
            if not dict:
                dict = fullpath('config/corpus.dic')
            if not lm:
                lm = fullpath('config/corpus.lm')

        if not hmm:
            hmm = 'share/pocketsphinx/model/en-us/en-us'

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(SPHINX_ROOT, hmm))
        config.set_string('-dict', dict)
        config.set_string('-lm', lm)
        config.set_string('-logfn', fullpath('config/sphinx.log'))

        if keyphrase:
            config.set_string('-keyphrase', keyphrase)
        if kws_threshold:
            config.set_float('-kws_threshold', kws_threshold)

        self.decoder = Decoder(config)

        self.transcribe = self.transcribe_darwin
        self.hyp = None

    def transcribe_darwin(self, wav):
        self.decoder.start_utt()
        self.decoder.process_raw(wav, False, False)
        self.decoder.end_utt()

        self.hyp = self.decoder.hyp()
        if self.hyp:
            return self.hyp.hypstr
    
    def get_prob(self):
        if self.hyp:
            print self.hyp.best_score
            return self.hyp.prob

    def transcribe_linux(self, wav):
        self.decoder.start_utt()
        self.decoder.process_raw(wav, False, False)
        self.decoder.end_utt()

        result = self.decoder.get_hyp()
        if result:
            return result[0]
Beispiel #46
0
    def run( self ):
        conf = Decoder.default_config()
        conf.set_string('-hmm', self.config.hmmPS)
        conf.set_string('-lm', self.config.lmPS)
        conf.set_string('-dict', self.config.dictPS)
        if os.path.isfile(self.config.mllrPS):
            conf.set_string('-mllr', self.config.mllrPS)
        decoder = Decoder(conf)

        p = pyaudio.PyAudio()
        stream = p.open( format=pyaudio.paInt16,
                         channels=1,
                         rate=16000,
                         input=True,
                         frames_per_buffer=1024 )
        stream.start_stream()
        self.samplewith = p.get_sample_size(pyaudio.paInt16)

        in_speech_bf = True
        decoder.start_utt('')
        while not self._terminate:
            buf = stream.read(1024)
            if buf:
                if self.save:
                    self.liSave.append(buf)
                    self.numSave += 1
                    if self.numSave > self.maxSave: # nos protegemos de dejar el microfono encendido
                        self.activeSave(self.fichWAV)
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        try:
                            if decoder.hyp().hypstr != '':
                                self.decode(decoder.hyp().hypstr)
                        except AttributeError:
                            pass
                        decoder.start_utt('')
            else:
                break
        decoder.end_utt()
Beispiel #47
0
def record(THRESHOLD=None):


    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    LISTEN_TIME = 4
    WAVE_OUTPUT_FILENAME = "livewav.wav"

    p = pyaudio.PyAudio()
    if THRESHOLD == None:
		THRESHOLD = fetchThreshold()
		print THRESHOLD


    stream = p.open(format=FORMAT,
                    channels=1,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)



    print "* recording"
    frames = []
    lastN = [THRESHOLD * 1.2 for i in range(30)]
    for i in range(0, RATE / CHUNK * LISTEN_TIME):
		data = stream.read(CHUNK)
		frames.append(data)
		score = getScore(data)
		lastN.pop(0)
		lastN.append(score)
		average = sum(lastN) / float(len(lastN))
		#print average,THRESHOLD * 0.8
		if average < THRESHOLD * 0.8:
			break



    print "* done recording"
    #stream.stop_stream()
    stream.close()
    p.terminate()

    # write data to WAVE file
    data = ''.join(frames)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(1)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()
    sysdir = os.getcwd()
    wavfile = sysdir+"/livewav.wav"
    #decoded=decodepassive()


    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    with open(wavfile, 'rb') as wavFile:
        speechRec.decode_raw(wavFile)
        result = speechRec.get_hyp()


    return(result[0])
Beispiel #48
0
class ContinuousPocketsphinx(object):
    '''
    classdocs
    '''
    CHUNK = 4096
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000
    #MODELDIR = "../models"
    MODELDIR = "/home/mgreibus/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"

    decoder = None
    stream = None
    config = None
    ai = None



    def __init__(self):
        '''
        Constructor
        '''
        print ("[__init__]+++")

        # Create a decoder with certain model
        self.ai = Artificialintelligence()
        self.config = self.createConfig("code");
        self.decoder = Decoder(self.config);
        print ("[__init__] created decoder")
        #self.updateGrammar(self.decoder, "confirmation");

        print ("[__init__]---")

        p = pyaudio.PyAudio()

        self.stream = p.open(format=self.FORMAT,
                channels=self.CHANNELS,
                rate=self.RATE,
                input=True,
                frames_per_buffer=self.CHUNK)
        #Indicate listening for next utterance
        print ("READY....")

    def updateGrammar(self,pDecoder, pGramma):
        '''
        Update decoder language model from fsg file
        '''
        print ("[updateGrammar]+++" + pGramma)
        logmath = pDecoder.get_logmath();
        fsg = sphinxbase.FsgModel(os.path.join("../resource/", pGramma+'.fsg'), logmath, 7.5)
        #pDecoder.readfile(os.path.join("../resource/", pGramma+'.fsg'), logmath)
        pDecoder.set_fsg("default",fsg);
        pDecoder.set_search("default");
        print ("[updateGrammar]---")


    def createConfig(self,pGramma):
        print ("[createConfig]+++")
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/liepa.cd_semi_200/'))
        config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
        #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
        config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
        print ("[createConfig]---")
        return config;

    def speak(self,text):
        print("Speak: ", text)
        if text is not None:
            aProcess = subprocess.Popen(['/home/mgreibus/bin/tark-win-lt', text], stderr=subprocess.STDOUT)
            out = aProcess.communicate()[0];
            time.sleep (0.100)
        print("ended Speak: ", out)


    def said(self, aiContext, text):
        print ("[said]+++", text)
        aiContext = self.ai.said(text, aiContext)
        print ('AI response: ',  aiContext.state, aiContext.response)
        self.speak(aiContext.response)
        if aiContext.interactiveStep is False :
            self.said(aiContext, text);
        print ("[said]---")
        return aiContext

    def recognized(self, pStream, pDecoder, aiContext):
        print ("[recognized]+++")
        pStream.stop_stream()
        pDecoder.end_utt()
        # Retrieve hypothesis.
        hypothesis = pDecoder.hyp()
        if hypothesis is not None:
            print ('Best hypothesis: ', hypothesis.uttid, hypothesis.best_score, hypothesis.hypstr)
            self.said(aiContext, hypothesis.hypstr.decode('utf-8'))
            if aiContext.state in aiContext.GRAM:
                self.updateGrammar(pDecoder, aiContext.GRAM[aiContext.state]);
        elif (time.time() - aiContext.stateStarted) > 10:
            self.speak(aiContext.response)
            aiContext.stateStarted = time.time()
        print ("Time: ", (time.time() - aiContext.stateStarted))

        print("AI response ", aiContext.response)
        time.sleep (0.100)
        #Indicate listening for next utterance
        pStream.start_stream()
        pDecoder.start_utt(None)
        print ("READY....")
        print ("[recognized]---")
        return aiContext

    def run(self):
        '''
        Executor
        '''
        print("* start recording")
        self.decoder.start_utt(None)
        cur_vad_state = 0
        aiContext = self.ai.createContext();
        self.said(aiContext, None);
        while True:
            data = self.stream.read(self.CHUNK)
            time.sleep (0.100)
            #frames.append(data)
            self.decoder.process_raw(data, False, False)
            vad_state = self.decoder.get_vad_state()
            if vad_state and not cur_vad_state:
                #silence -> speech transition,
                #let user know that we heard
                print("Listening...\n")
            if not vad_state and cur_vad_state:
                #speech -> silence transition,
                #time to start new utterance
                aiContext = self.recognized(self.stream,self.decoder, aiContext);
                if aiContext.state == aiContext.STATE_THANKS:
                    break
            cur_vad_state = vad_state
class SphinxWrapper(object):
    '''
For audio stream feeding is used `process_raw(...)` method. It also updates vad status: if voice found in signal.
Before signal is fed to decoder, it should be isntructed that new utterance is expected.
When Vad says that speech segment ended it should be called `stopListening(...)`, only then we could request hypothesis what was said. `calculateHypothesis(...)`

    '''

    #MODELDIR = "../models"
    #MODELDIR = "/home/as/src/speech/sphinx/lt-pocketsphinx-tutorial/impl/models"
    MODELDIR = "../../lt-pocketsphinx-tutorial/impl/models"

    decoder = None
    config = None
    previousVadState = 0
    currentVadState = 0

    def __init__(self):
        '''
        Constructor
        '''

    def prepareDecoder(self, pGramma):
        '''
        Entry point where sphinx decoder is initialized or grammar updated
        '''
        if self.decoder is None:
            self.config = self.createConfig(pGramma);
            self.decoder = Decoder(self.config);
        else:
            self.updateGrammar(self.decoder, pGramma);

    def createConfig(self,pGramma):
        '''
        Create configuration with acoustic model path, grammar and dictionary
        '''
        print ("[createConfig]+++")
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'hmm/lt.cd_cont_200/'))
        config.set_string('-fsg', os.path.join("../resource/", pGramma+'.fsg'))
        #config.set_string('-jsgf', os.path.join("../resource/", pGramma+'.gram'))
        config.set_string('-dict', os.path.join("../resource/", 'service.dict'))
        print ("[createConfig]---")
        return config;

    def updateGrammar(self,pGramma):
        '''
        Update decoder language model from fsg file
        '''
        print ("[updateGrammar]+++" + pGramma)
        logmath = self.decoder.get_logmath();
        fsg = sphinxbase.FsgModel(os.path.join("../resource/", pGramma+'.fsg'), logmath, 7.5)
        self.decoder.set_fsg("default",fsg);
        self.decoder.set_search("default");
        print ("[updateGrammar]---")

    def startListening(self):
        """
        Instruct decoder that new utterace should be expected
        """
        self.decoder.start_utt(None)


    def stopListening(self):
        """
        Instruct decoder that new utterace should is not expected any more
        """
        self.decoder.end_utt()


    def process_raw(self, data):
        """
        Feed decoder with raw audio data. After data is updating refresh VAD state
        """
        #print("process_raw...\n")
        self.decoder.process_raw(data, False, False)
        self.previousVadState = self.currentVadState
        self.currentVadState = self.decoder.get_vad_state();
        #print("process_raw", self.currentVadState and True, self.previousVadState and True)

    def calculateHypothesis(self):
        return self.decoder.hyp();

    def calculateVadState(self):
        return self.decoder.get_vad_state;

    def isVoiceStarted(self):
        '''
        silence -> speech transition,
        '''
        return self.currentVadState and not self.previousVadState

    def isVoiceEnded(self):
        '''
        speech -> silence transition,
        '''
        return not self.currentVadState and self.previousVadState
Beispiel #50
0
class VoiceService(object):
    audio_device = None
    buffer_size = 2048
    sampling_rate = 16000

    def __init__(self):
        config = get_decoder_config()
        self.decoder = Decoder(config)

        self.speech = pyttsx3.init()

        self.audio = sphinxbase.Ad(self.audio_device, self.sampling_rate)
        self.buffer = bytearray(self.buffer_size)

        self.default_search = self.decoder.get_search()
        self.in_speech = False
        self.max_history = 100
        self.phrases = []
        self.prompts = {}

        self.next_prompt_id = 1

        self.current_prompt = None
        self.prompt_queue = queue.Queue()

    def create_prompt(self,
                      message=None,
                      message_url=None,
                      search="enable",
                      timeout=15):
        """
        Create a new prompt and add it to the queue.

        Currently, only one type of prompt is supported. We play a message,
        then wait for someone to say a specific word (the search word) within
        the alloted amount of time.

        The status of the prompt can be retrieved by calling get_prompt with
        the appropriate id.

        timeout: prompt timeout in seconds, expected to be either None or numeric.
        """
        if timeout is not None:
            # Be forgiving of caller who may have passed timeout as a string.
            timeout = float(timeout)

        prompt = {
            "created_time": time.time(),
            "detected": False,
            "detected_time": None,
            "id": self.get_next_prompt_id(),
            "message": message,
            "message_url": message_url,
            "search": search,
            "search_started": False,
            "search_started_time": None,
            "played": False,
            "played_time": None,
            "timeout": timeout,
            "timed_out": False
        }
        self.prompts[str(prompt['id'])] = prompt
        self.prompt_queue.put(prompt)
        return prompt

    def get_next_prompt_id(self):
        """
        Get a unique ID for a prompt.
        """
        tmp = self.next_prompt_id
        self.next_prompt_id += 1
        return tmp

    def get_phrases(self):
        """
        Get the history of detected phrases.
        """
        return self.phrases

    def get_prompt(self, prompt_id):
        """
        Get information about a prompt.
        """
        return self.prompts[str(prompt_id)]

    def get_status(self):
        """
        Get the system status.
        """
        status = {
            "current_prompt": self.current_prompt,
            "in_speech": self.decoder.get_in_speech(),
            "queue_length": self.prompt_queue.qsize(),
            "search": self.decoder.get_search()
        }
        return status

    def play_prompt(self, prompt):
        prompt['played_time'] = time.time()

        if prompt.get("message_url", None) is not None:
            cmd = ["mplayer", "-ao", "pulse", prompt['message_url']]
            subprocess.call(cmd)
        elif prompt.get("message", None) is not None:
            self.speech.say(prompt['message'])
            self.speech.runAndWait()

        prompt['played'] = True

    def process_hypothesis(self, hypothesis):
        print("SPEECH {}".format(hypothesis.hypstr))

        phrase = {
            "search": self.decoder.get_search(),
            "time": time.time(),
            "text": hypothesis.hypstr
        }
        self.phrases.append(phrase)
        del self.phrases[:-self.max_history]

    def run_next_prompt(self):
        if self.prompt_queue.empty():
            self.create_prompt(None, search="paradrop", timeout=None)

        self.current_prompt = self.prompt_queue.get_nowait()
        self.decoder.set_search(self.current_prompt['search'])

        self.audio.stop_recording()
        self.play_prompt(self.current_prompt)
        self.audio.start_recording()

        self.current_prompt['search_started_time'] = time.time()
        self.current_prompt['search_started'] = True

    def detect_timeout(self):
        """
        Check if the current prompt has timed out.
        """
        if self.current_prompt is None:
            # No active prompt to timeout.
            return False

        if self.decoder.get_in_speech():
            # Defer timeout if decoder reports that speech is in progress.  A
            # person may be speaking the target phrase currently.
            return False

        if self.current_prompt['timeout'] is None:
            # If timeout is None, then only timeout when there is another item
            # in the queue.
            return not self.prompt_queue.empty()
        else:
            diff = time.time() - self.current_prompt['search_started_time']
            return diff >= self.current_prompt['timeout']

    def run(self):
        self.decoder.set_keyphrase("activate", "activate")
        self.decoder.set_keyphrase("allow", "allow")
        self.decoder.set_keyphrase("enable", "enable")
        self.decoder.set_keyphrase("paradrop", "para drop")

        self.audio.start_recording()
        while True:
            if self.current_prompt is None:
                self.run_next_prompt()
                self.decoder.start_utt()

            self.audio.readinto(self.buffer)
            self.decoder.process_raw(self.buffer, False, False)

            if self.in_speech and not self.decoder.get_in_speech():
                self.decoder.end_utt()

                hypothesis = self.decoder.hyp()
                if hypothesis is not None:
                    self.process_hypothesis(hypothesis)
                    self.current_prompt['detected'] = True
                    self.current_prompt['detected_time'] = time.time()
                    self.current_prompt = None
                else:
                    self.decoder.start_utt()

            if self.detect_timeout():
                self.decoder.end_utt()
                self.current_prompt['timed_out'] = True
                self.current_prompt = None

            self.in_speech = self.decoder.get_in_speech()
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# ====================================================================


from os import environ, path
from itertools import izip

from sphinxbase import Config
from pocketsphinx import Decoder

#some dumb test for checking during developent

MODELDIR = "../../../model"

config = Decoder.default_config()

intval = 256
floatval = 8000.0
stringval = "~/pocketsphinx"
boolval = True

# Check values that was previously set.
s = config.get_float("-samprate")
print "Float: ",floatval ," ", s
config.set_float("-samprate", floatval)
s = config.get_float("-samprate")
print "Float: ",floatval ," ", s

s = config.get_int("-nfft")
print "Int:",intval, " ", s
Beispiel #52
0
import os
from pocketsphinx import DefaultConfig, Decoder, get_model_path, get_data_path
model_path = get_model_path()
data_path = 'C:/project/accent/accent-poc/src/Audio/'

# Create a decoder with a certain model
config = DefaultConfig()
config.set_string('-hmm', os.path.join(model_path, 'en-us'))
config.set_string('-lm', os.path.join(model_path, 'en-us.lm.bin'))
config.set_string('-dict', os.path.join(model_path, 'cmudict-en-us.dict'))
decoder = Decoder(config)

# Decode streaming data
buf = bytearray(1024)
with open(os.path.join(data_path, 'speaker2.wav'), 'rb') as f:
    decoder.start_utt()
    while f.readinto(buf):
        decoder.process_raw(buf, False, False)
    decoder.end_utt()
print('Best hypothesis segments:', [seg.word for seg in decoder.seg()])
    p.terminate()

    # write data to WAVE file
    data = ''.join(all)
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(data)
    wf.close()


if __name__ == "__main__":
    hmdir = "/usr/share/pocketsphinx/model/hmm/en_US/hub4wsj_sc_8k"
    lmdir = "/usr/share/pocketsphinx/model/lm/en_US/hub4.5000.DMP"
    dictd = "/usr/share/pocketsphinx/model/lm/en_US/cmu07a.dic"
    record()
    wavfile = "/home/shridhar/pocketsphinxtest/livewav.wav"

    speechRec = Decoder(hmm=hmdir, lm=lmdir, dict=dictd)
    wavFile = file(wavfile, 'rb')
    speechRec.decode_raw(wavFile)
    result = speechRec.get_hyp()

    print "Recognised text from the converted video file"

    print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"

    print result[0]
    print "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%"
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os

from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(
    open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])
'''
Created on Dec 29, 2013


@author: Mindaugas Greibus
'''
import sys, os



from pocketsphinx import Decoder

MODELDIR = "../models"

# Create a decoder with certain model
config = Decoder.default_config()
config.set_string('-hmm', os.path.join(MODELDIR, 'hmm/lt.cd_cont_200/'))
config.set_string('-jsgf', os.path.join(MODELDIR, 'lm/robotas.gram'))
config.set_string('-dict', os.path.join(MODELDIR, 'dict/robotas.dict'))
decoder = Decoder(config)

decoder.decode_raw(open(os.path.join(MODELDIR, '../test/audio/varyk_pirmyn-16k.wav'), 'rb'))

# Retrieve hypothesis.
hypothesis = decoder.hyp()
print ('Best hypothesis: ', hypothesis.best_score, hypothesis.hypstr)
print ('Best hypothesis segments: ', [seg.word for seg in decoder.seg()])