Example #1
0
    def process_file(self, audiofile):
        """
        processes audio file and returns the text
        """
        with open(audiofile, 'rb') as audiofile:
            decoder = Decoder(self.config)
            decoder.start_utt()

            while True:
                buf = audiofile.read(1024)
                if buf:
                    decoder.process_raw(buf, False, False)
                else:
                    break
            decoder.end_utt()

            hyp = decoder.hyp()
            print "Hyp:", hyp

            if hyp != None:
                print "Hyp Score", (hyp.prob, hyp.best_score)
                average_score = 0
                seg_count = 0
                for seg in decoder.seg():
                    if seg.word != "<sil>":
                        seg_count += 1
                        average_score += seg.ascore
                        print(seg.word, seg.ascore, seg.lscore)

                print "hyp:", hyp.hypstr
                print average_score / seg_count
                return hyp.hypstr
        return None
Example #2
0
 def build_decoder(self):
     config = Decoder.default_config()
     config.set_string(
         "-dict", os.path.join(self.MODEL_DIR, "cmudict-en-us.dict")
     )
     config.set_string(
         "-fdict", os.path.join(self.MODEL_DIR, "en-us/noisedict")
     )
     config.set_string(
         "-featparams", os.path.join(self.MODEL_DIR, "en-us/feat.params")
     )
     config.set_string(
         "-tmat", os.path.join(self.MODEL_DIR, "en-us/transition_matrices")
     )
     config.set_string("-hmm", os.path.join(self.MODEL_DIR, "en-us"))
     config.set_string("-lm", os.path.join(self.MODEL_DIR, "en-us.lm.bin"))
     config.set_string("-mdef", os.path.join(self.MODEL_DIR, "en-us/mdef"))
     config.set_string("-mean", os.path.join(self.MODEL_DIR, "en-us/means"))
     config.set_string(
         "-sendump", os.path.join(self.MODEL_DIR, "en-us/sendump")
     )
     config.set_string(
         "-var", os.path.join(self.MODEL_DIR, "en-us/variances")
     )
     null_path = "/dev/null"
     if sys.platform == "win32":
         null_path = "NUL"
     config.set_string("-logfn", null_path)
     return Decoder(config)
Example #3
0
def create_decoder():
    base = os.path.join(root(), 'pocketsphinx', 'zero_ru_cont_8k_v3')
    hmm = os.path.join(base, 'zero_ru.cd_semi_4000')  # - mobile?
    # hmm = os.path.join(base, 'zero_ru.cd_cont_4000')
    # hmm = os.path.join(base, 'zero_ru.cd_ptm_4000') - mobile?

    dict = os.path.join(base, 'ru.dic.orig')
    # dict = os.path.join(base, 'ru.dic')
    lm = os.path.join(base, 'ru.lm.orig')

    # kws = os.path.join(base, 'ru.dic.orig.keywords')
    kws = os.path.join(base, 'keywords.mini')

    decoder_config = Decoder.default_config()
    decoder_config.set_string('-hmm', hmm)

    decoder_config.set_string("-lm", lm)
    # decoder_config.set_string('-keyphrase', 'алекса')
    # decoder_config.set_float('-kws_threshold', 1e-20)
    # decoder_config.set_string('-kws', kws)

    decoder_config.set_string('-dict', dict)
    decoder_config.set_boolean('-remove_noise', False)
    decoder_config.set_float('-samprate', 8000)
    decoder_config.set_string('-logfn', os.devnull)

    decoder = Decoder(decoder_config)

    return decoder
Example #4
0
def audio2phoneme(audio_file):
    wave_read = wave.open(audio_file, 'rb')
    length = wave_read.getnframes() / wave_read.getframerate()
    wave_read.close()

    # Decode streaming data.
    decoder = Decoder(config)

    buf = bytearray(1024)
    with open(audio_file, 'rb') as f:
        decoder.start_utt()
        while f.readinto(buf):
            decoder.process_raw(buf, False, False)
        decoder.end_utt()

    nframes = decoder.n_frames()

    phonemes = []
    offset = None
    for seg in decoder.seg():
        if offset is None:
            offset = seg.start_frame
        start_frame = seg.start_frame - offset
        end_frame = seg.end_frame - offset
        phonemes.append((seg.word, start_frame / nframes * length,
                         end_frame / nframes * length))

    return phonemes
Example #5
0
    def setup(self):
        # PocketSphinx configuration
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        ps_config.set_string(
            '-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))

        # Specify recognition key phrase
        ps_config.set_string('-keyphrase', self._tconfig['phrase'])
        ps_config.set_float('-kws_threshold',
                            float(self._tconfig['threshold']))

        # Hide the VERY verbose logging information when not in debug
        if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:

            null_path = '/dev/null'
            if platform.system() == 'Windows':
                null_path = 'nul'

            ps_config.set_string('-logfn', null_path)

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._detector = Decoder(ps_config)
Example #6
0
    def speech_recog(self, model):
        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm',
                          '/usr/local/share/pocketsphinx/model/en-us/en-us')
        config.set_int('-ds', 2)
        config.set_int('-topn', 3)
        config.set_int('-maxwpf', 5)
        #config.set_string('-kws', MODELDIR + model + '.txt')
        config.set_string('-lm', MODELDIR + model + '.lm')
        config.set_string('-dict', MODELDIR + model + '.dict')
        decoder = Decoder(config)

        decoder.start_utt()
        recog_text = ''

        with self.stream_in as stream:
            audio_generator = stream.generator()
            for content in audio_generator:
                decoder.process_raw(content, False, False)
                if decoder.hyp() and decoder.hyp().hypstr != '':
                    recog_text += decoder.hyp().hypstr
                    if len(recog_text) > 1:
                        decoder.end_utt()
                        logging.info("recog text: %s", recog_text)
                        return recog_text
        return recog_text
Example #7
0
    def setup(self):
        # PocketSphinx configuration
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string(
            '-hmm', os.path.join(get_model_path(), self._tconfig['language']))
        ps_config.set_string(
            '-dict', os.path.join(get_model_path(),
                                  self._tconfig['dictionary']))

        # Specify recognition key phrase
        #ps_config.set_string('-keyphrase', self._tconfig['phrase'])
        #ps_config.set_float('-kws_threshold', float(self._tconfig['threshold']))

        ### Multiple Hotwords
        #ps_config.set_string('-inmic', 'yes')
        ps_config.set_string('-kws', '/opt/AlexaPi/src/keyphrase.list')

        # Hide the VERY verbose logging information when not in debug
        if logging.getLogger('alexapi').getEffectiveLevel() != logging.DEBUG:
            ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
Example #8
0
    def create_decoder():
        path = os.path.dirname(os.path.realpath(__file__))
        pocketsphinx_data = os.getenv('POCKETSPHINX_DATA',
                                      os.path.join(path, 'pocketsphinx'))
        hmm = os.getenv('POCKETSPHINX_HMM',
                        os.path.join(pocketsphinx_data, 'tdt_sc_8k'))
        dict = os.getenv('POCKETSPHINX_DIC',
                         os.path.join(pocketsphinx_data, 'keywords.dic'))
        kws = os.getenv('POCKETSPHINX_KWS',
                        os.path.join(pocketsphinx_data, 'keywords.kws'))
        lm = os.getenv('POCKETSPHINX_LM',
                       os.path.join(pocketsphinx_data, 'keywords.lm'))
        log = os.getenv('POCKETSPHINX_LOG',
                        os.path.join(pocketsphinx_data, 'log'))

        config = Decoder.default_config()
        config.set_string('-hmm', hmm)
        config.set_string('-lm', lm)
        config.set_string('-dict', dict)
        # config.set_string('-kws', kws)
        # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu
        config.set_int('-nfft', 512)
        #config.set_float('-vad_threshold', 2.7)
        config.set_string('-logfn', log)

        return Decoder(config)
Example #9
0
    def __init__(self, phrase, threshold, device_index=0):

        self._decoder = None
        self._pa = None
        self._device_no = device_index
        self._phrase = phrase
        self._threshold = float(threshold)

        # PocketSphinx configuration
        logging.info('Phrase: ' + phrase + ' Threshold: ' + str(threshold))
        ps_config = Decoder.default_config()

        # Set recognition model to US
        ps_config.set_string('-hmm',
                             os.path.join(get_model_path_keyword(), 'en-us'))
        ps_config.set_string(
            '-dict',
            os.path.join(get_model_path_keyword(), 'cmudict-en-us.dict'))
        # Specify recognition key phrase
        ps_config.set_string('-keyphrase', self._phrase)
        ps_config.set_float('-kws_threshold', self._threshold)
        ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
        self._pa = pyaudio.PyAudio()
Example #10
0
    def __init__(self, keyword, sensitivity):
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict', os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword if keyword != 'snowboy' else 'snow boy')
        config.set_float('-kws_threshold', 10 ** -sensitivity)

        self._decoder = Decoder(config)
        self._decoder.start_utt()
Example #11
0
    def start_listening(self):
        ''' Starts streaming. Pauses until self.resume has been called '''
        config = Decoder.default_config()
        config.set_string('-hmm', path.join(self.model_dir, self.hmm))
        config.set_string('-lm', path.join(self.model_dir, self.lm))
        config.set_string('-dict', path.join(self.model_dir, self.dictionary))
        config.set_string('-logfn', self.logfn)

        # This takes a while
        decoder = Decoder(config)

        p = pyaudio.PyAudio()
        print(self.input_source_index)
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        input_device_index=self.input_source_index,
                        frames_per_buffer=1024)

        stream.start_stream()

        in_speech_bf = False
        decoder.start_utt()

        self.wait_to_resume_lock.acquire()

        while self.is_running:
            while self.paused:
                pass
            buf = stream.read(1024, exception_on_overflow=False)
            if buf:
                decoder.process_raw(buf, False, False)
                if decoder.get_in_speech() != in_speech_bf:
                    in_speech_bf = decoder.get_in_speech()
                    if not in_speech_bf:
                        decoder.end_utt()
                        # if self.wait_to_resume:
                        #     stream.stop_stream()

                        phrase = decoder.hyp().hypstr
                        if phrase != "":
                            self.all_speech_data.append(phrase)
                            # if self.wait_to_resume:
                            #     # print("waiting")
                            #     self.wait_to_resume_lock.acquire()
                            #     # print("resuming")

                        # if self.wait_to_resume:
                        # stream.start_stream()
                        decoder.start_utt()
            else:
                break
        decoder.end_utt()
Example #12
0
    def __init__(self):
        # https://github.com/cmusphinx/pocketsphinx-python/blob/master/example.py
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-lm', os.path.join(get_model_path(),
                                              'en-us.lm.bin'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))

        self._decoder = Decoder(config)
 def configure(self):
     config = Decoder.default_config()
     config.set_string('-hmm', os.path.join(BASEDIR, 'model', self.lang,
                                            'hmm'))
     config.set_string('-dict', os.path.join(BASEDIR, 'model', self.lang,
                                             'mycroft-en-us.dict'))
     config.set_string('-keyphrase', self.key_phrase)
     config.set_float('-kws_threshold', float('1e-45'))
     config.set_float('-samprate', self.sample_rate)
     config.set_int('-nfft', 2048)
     config.set_string('-logfn', '/dev/null')
     self.decoder = Decoder(config)
Example #14
0
def main():
    abspath = os.path.dirname(os.path.abspath(__file__))
    abspath = os.path.join(abspath, '..')

    model_dir = os.path.join(abspath, 'model')

    hmm = os.path.join(model_dir, HMM)
    lm = os.path.join(model_dir, LM)
    dic = os.path.join(model_dir, DIC)

    config = Decoder.default_config()
    config.set_string('-hmm', hmm)
    config.set_string('-lm', lm)
    config.set_string('-dict', dic)
    config.set_string('-logfn', '/dev/null')
    decoder = Decoder(config)

    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=BUFFER)
    stream.start_stream()
    in_speech_bf = True
    decoder.start_utt()
    while True:
        buf = stream.read(BUFFER)
        if buf:
            decoder.process_raw(buf, False, False)
            if decoder.get_in_speech():
                sys.stdout.write('.')
                sys.stdout.flush()
            if decoder.get_in_speech() == in_speech_bf:
                continue

            in_speech_bf = decoder.get_in_speech()
            if in_speech_bf:
                continue

            decoder.end_utt()
            try:
                if decoder.hyp().hypstr != '':
                    print('You said:', decoder.hyp().hypstr)
            except AttributeError:
                pass
            decoder.start_utt()
        else:
            break
    decoder.end_utt()
    print('An Error occured:', decoder.hyp().hypstr)
Example #15
0
 def process_stream(self, stream, callback):
     """
     Processes continuosly an audio stream and
     trigger the callback when text is detected
     """
     decoder = Decoder(self.config)
     decoder.start_utt()
     while True:
         buf = stream.read(1024)
         decoder.process_raw(buf, False, False)
         if decoder.hyp() is not None and decoder.hyp().hypstr is not None:
             decoder.end_utt()
             callback(decoder.hyp().hypstr)
             decoder.start_utt()
Example #16
0
    def setup_pocketsphinx(self) -> None:
        self.logger.info("Setting up PocketSphinx.")
        self.MODELDIR = "resources/model"

        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(self.MODELDIR, 'es-es'))
        config.set_string('-lm', os.path.join(self.MODELDIR, 'es-es.lm'))
        config.set_string('-dict', os.path.join(self.MODELDIR, 'es.dict'))
        config.set_string('-logfn', '/dev/null')

        self.decoder = Decoder(config)

        self.prev_buf_is_speech = False
        self.decoder.start_utt()
        self.logger.info("Done setting up PocketSphinx.")
Example #17
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn', settings.POCKETSPHINX_LOG)
    config.set_string('-hmm',   settings.ACOUSTIC_MODEL)
    config.set_string('-lm',    settings.LANGUAGE_MODEL)
    config.set_string('-dict',  settings.POCKET_DICT)

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Example #18
0
    def start_recognizer(self):
        """Function to handle lm or grammar processing of audio."""
        config = Decoder.default_config()
        rospy.loginfo("Done initializing pocketsphinx")

        # Setting configuration of decoder using provided params
        config.set_string('-dict', self.dict)
        config.set_string('-lm', self.class_lm)
        config.set_string('-hmm', self.hmm)
        self.decoder = Decoder(config)

        # Start processing input audio
        self.decoder.start_utt()
        rospy.loginfo("Decoder started successfully")

        # Subscribe to audio topic
        rospy.Subscriber("recognizer/audio_ready", Bool, self.process_audio)
        rospy.spin()
Example #19
0
    def create_decoder():
        from pocketsphinx.pocketsphinx import Decoder

        path = os.path.dirname(os.path.realpath(__file__))
        pocketsphinx_data = os.getenv('POCKETSPHINX_DATA', os.path.join(path, 'pocketsphinx-data'))
        hmm = os.getenv('POCKETSPHINX_HMM', os.path.join(pocketsphinx_data, 'hmm'))
        dict = os.getenv('POCKETSPHINX_DIC', os.path.join(pocketsphinx_data, 'dictionary.txt'))
        kws = os.getenv('POCKETSPHINX_KWS', os.path.join(pocketsphinx_data, 'keywords.txt'))

        config = Decoder.default_config()
        config.set_string('-hmm', hmm)
        config.set_string('-dict', dict)
        config.set_string('-kws', kws)
        # config.set_int('-samprate', SAMPLE_RATE) # uncomment if rate is not 16000. use config.set_float() on ubuntu
        config.set_int('-nfft', 512)
        config.set_float('-vad_threshold', 2.7)
        config.set_string('-logfn', os.devnull)

        return Decoder(config)
Example #20
0
    def __init__(self, keyword, sensitivity):
        """
        Constructor.

        :param keyword: keyword to be detected.
        :param sensitivity: detection sensitivity.
        """

        # Set the configuration.
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        # Set recognition model to US
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword)
        config.set_float('-kws_threshold', sensitivity)
        self._decoder = Decoder(config)
        self._decoder.start_utt()
        def load_models(pipe, config, models):
            """Internal worker method to load the language model

            Note:
                Some lanaguages take a long time to load. English is by far
                the fastest language to be loaded as a model.
            
            Arguments:
                pipe (:obj: socket): The response pipe to send to the parent process
                models (dict): The language and nltk models developed by the parent process
           
            Returns: (Decoder)
                The STT decoder object and the nltk model

            """

            language_model = models["language_model"]
            nltk_model = models["nltk_model"]

            if False in [
                    language_model.is_valid_model(),
                    nltk_model.is_valid_model()
            ]:
                l_log.error("The language model %s is invalid!" %
                            str(language_model.name))
                send_error(pipe, "Failed loading language model!")
                return

            # Load the model configurations into pocketsphinx
            config.set_string('-hmm', str(language_model.hmm))
            config.set_string('-lm', str(language_model.lm))
            config.set_string('-dict', str(language_model.dict))
            decoder = Decoder(config)

            send_json(
                pipe,
                {"success": True})  # Send a success message to the client

            l_log.debug("Set the language model to %s" %
                        str(language_model.name))

            return decoder, nltk_model  # Return the new decoder and nltk model
Example #22
0
    def __init__(self, device_index=0, model_path=None):

        self._decoder = None
        self._pa = None
        self._device_no = device_index
        self._model_path = model_path

        # PocketSphinx configuration
        logging.info('Grammar file:' + os.path.join(model_path, self.GRAMMAR))
        ps_config = Decoder.default_config()

        # Set recognition model to ...
        ps_config.set_string('-hmm', os.path.join(model_path, self.HMM))
        ps_config.set_string('-dict', os.path.join(model_path, self.DIC))
        ps_config.set_string('-jsgf', os.path.join(model_path, self.GRAMMAR))
        ps_config.set_string('-logfn', '/dev/null')

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        self._decoder = Decoder(ps_config)
        self._pa = pyaudio.PyAudio()
Example #23
0
    def __init__(self, engine_type, keyword, sensitivity):
        """Initializer.

        :param engine_type: type of the engine.
        :param keyword: keyword being used for detection.
        :param sensitivity: sensitivity passed to the engine.
        """

        super().__init__(engine_type, keyword, sensitivity)
        # Set the configuration.
        config = Decoder.default_config()
        config.set_string('-logfn', '/dev/null')
        # Set recognition model to US
        config.set_string('-hmm', os.path.join(get_model_path(), 'en-us'))
        config.set_string('-dict',
                          os.path.join(get_model_path(), 'cmudict-en-us.dict'))
        config.set_string('-keyphrase', keyword)
        config.set_float('-kws_threshold', sensitivity)
        self._decoder = Decoder(config)
        self._decoder.start_utt()
Example #24
0
    def __init__(self):
        self.MODELDIR = 'speech/'
        self.wav_name = 'media/temp.wav'
        self.raw_name = 'media/temp.raw'

        config = Decoder.default_config()
        config.set_string('-hmm', self.MODELDIR + 'ru_ru/')
        config.set_string('-dict', self.MODELDIR + 'ru.dic')
        self.decoder = Decoder(config)

        jsgf = Jsgf(self.MODELDIR + 'gr.gram')
        rule = jsgf.get_rule('gr.rule')
        fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5)
        fsg.writefile('gr.fsg')

        self.decoder.set_fsg('gr', fsg)
        self.decoder.set_search('gr')

        self.rec = Recognizer()
        self.mic = Microphone()
        def recognize_phonemes(segments_path, phonemes_result_path):

            # Create a decoder with certain model
            config = Decoder.default_config()
            config.set_string('-hmm', join(model_dir, decoder_hmm))
            config.set_string('-allphone', join(model_dir, decoder_allphone))
            config.set_string('-dict', join(model_dir, decoder_dict))
            config.set_float('-lw', decoder_lw)
            config.set_float('-pip', decoder_pip)
            config.set_float('-beam', decoder_beam)
            config.set_float('-pbeam', decoder_pbeam)
            config.set_boolean('-mmap', decoder_mmap)
            hyps = []
            segs = []
            self.decoder = Decoder(config)
            with open(segments_path, 'rb') as stream:
                in_speech_buffer = False
                self.decoder.start_utt()
                while True:
                    buf = stream.read(decoder_stream_buf_size)
                    if buf:
                        self.decoder.process_raw(buf, False, False)
                        if self.decoder.get_in_speech() != in_speech_buffer:
                            in_speech_buffer = self.decoder.get_in_speech()
                            if not in_speech_buffer:
                                hyp_result, segment = _get_decoder_results()
                                segs += segment

                                hyps.append(hyp_result)
                                self.decoder.start_utt()
                    else:
                        if in_speech_buffer:
                            hyp_result, segment = _get_decoder_results()
                            segs += segment

                            hyps.append(hyp_result)
                        break
            phonemes_dict = dict(hypotheses=hyps, segment_info=segs)
            phonemes_result = DecoderOutputSchema().dumps(phonemes_dict)
            with open(phonemes_result_path, 'w') as f:
                f.write(phonemes_result)
Example #26
0
 def __init__(self, gui):
     QThread.__init__(self, gui)
     if settings.sphinx_acoustic_model_dir == '':  # use default acoustic model
         acoustic_model_directory = path.join(get_model_path(), 'en-us')
     else:  # use custom acoustic model
         acoustic_model_directory = settings.sphinx_acoustic_model_dir
     config = Decoder.default_config()
     config.set_string('-hmm', acoustic_model_directory)  # acoustic model
     config.set_string(
         '-dict', settings.prepared_lexicon_file)  # lexicon pronunciation
     config.set_string(
         '-jsgf',
         settings.prepared_grammar_file)  # language model from grammar
     config.set_string(
         '-logfn',
         settings.outputFileName(sphinx_decoder_log_file_base_name,
                                 ext='log'))
     self.listen = False
     self.decoder = Decoder(config)
     self.audio = None
     self.device = None
Example #27
0
    def speech_recog(self, model):

        # Create a decoder with certain model
        config = Decoder.default_config()
        config.set_string('-hmm',
                          '/usr/local/share/pocketsphinx/model/en-us/en-us')
        config.set_int('-ds', 2)
        config.set_int('-topn', 3)
        config.set_int('-maxwpf', 5)
        #config.set_string('-kws', MODELDIR + model + '.txt')
        config.set_string('-lm', MODELDIR + model + '.lm')
        config.set_string('-dict', MODELDIR + model + '.dict')
        decoder = Decoder(config)

        decoder.start_utt()
        tstamp = time.time()
        recog_text = ''

        while len(recog_text) < 1:
            try:
                buf = self.stream_in.read(CHUNK_SIZE)
                logging.info("actual voice")
                decoder.process_raw(buf, False, False)
                if decoder.hyp().hypstr != '':
                    recog_text += decoder.hyp().hypstr
                    print "text: " + decoder.hyp().hypstr
                    tstamp = time.time()
            except IOError as ex:
                if ex[1] != pyaudio.paInputOverflowed:
                    raise
                buf = '\x00' * CHUNK_SIZE  #white noise
                logging.info("white noise")
            except AttributeError:
                pass

        decoder.end_utt()

        logging.info("recog text: " + recog_text)
        return recog_text
Example #28
0
def init():
    # Create a decoder with certain model
    config = Decoder.default_config()
    config.set_string('-logfn',
                      os.path.join(settings.LOGS_DIR, 'passive-listen.log'))
    config.set_string('-hmm',
                      os.path.join(settings.MODEL_DIR, 'en-US/acoustic-model'))
    config.set_string(
        '-lm', os.path.join(settings.MODEL_DIR, 'en-US/language-model.lm.bin'))
    config.set_string(
        '-dict',
        os.path.join(settings.MODEL_DIR,
                     'en-US/pronounciation-dictionary.dict'))

    # Decode streaming data
    global decoder, p
    decoder = Decoder(config)
    decoder.set_keyphrase('wakeup', settings.WAKE_UP_WORD)
    decoder.set_search('wakeup')
    p = pyaudio.PyAudio()

    global r
    r = speech_recognition.Recognizer()
Example #29
0
    def _create_decoder(config) -> Decoder:
        decoder_config = Decoder.default_config()
        decoder_config.set_string('-hmm', config.hmm)
        decoder_config.set_string('-dict', config.dict)
        decoder_config.set_boolean('-remove_noise', config.remove_noise)
        decoder_config.set_float('-samprate', config.sample_rate)
        decoder_config.set_string('-logfn', devnull)

        if config.lm is not None:
            decoder_config.set_string("-lm", config.lm)
        elif len(config.hotwords) == 1:
            decoder_config.set_string('-keyphrase', config.hotwords[0])
            decoder_config.set_float('-kws_threshold', config.threshold)
        else:
            import os
            from tempfile import gettempdir
            path = os.path.join(gettempdir(), 'keywords.mini')
            f = open(path, 'w')
            f.writelines(['{} /{}/\n'.format(w, config.threshold) for w in config.hotwords])
            f.flush()
            decoder_config.set_string('-kws', path)

        return Decoder(decoder_config)
Example #30
0
    def get_decoder():
        from pocketsphinx.pocketsphinx import Decoder

        script_dir = os.path.dirname(os.path.realpath(__file__))
        config = Decoder.default_config()
        config.set_string('-hmm', os.path.join(script_dir, 'model/hmm/en'))
        config.set_string('-dict',
                          os.path.join(script_dir, 'model/respeaker.dic'))
        config.set_string('-kws', os.path.join(script_dir,
                                               'model/keywords.txt'))
        # config.set_string('-keyphrase', 'respeaker')
        # config.set_float('-kws_threshold', 1e-43)
        config.set_int('-samprate', SAMPLE_RATE)
        config.set_int('-nfft', 2048)
        config.set_string('-logfn', os.devnull)
        try:
            decoder = Decoder(config)
        except Exception as e:
            print(
                "Maybe replace config.set_int('-samprate', SAMPLE_RATE) with config.set_float('-samprate', SAMPLE_RATE)"
            )
            raise e

        return decoder