Example #1
0
    def parse(self, raw_audio, grammar=None):
        root = os.path.dirname(os.path.normpath(__file__))
        model_dir = os.path.join(root, "pocketsphinx")
        hmm = os.path.join(model_dir, "en-us")
        lm = os.path.join(model_dir, "en-us.lm")
        dict = os.path.join(model_dir, "cmudict.dict")

        config = pocketsphinx.Decoder.default_config()
        config.set_string("-hmm", hmm)
        config.set_string("-dict", dict)
        config.set_string("-logfn", os.devnull)

        if grammar is not None:
            grammar_file = os.path.join(root, grammar)
            if not os.path.isfile(grammar_file):
                raise IOError("missing grammar file")
            config.set_string("-jsgf", grammar)
        else:
            config.set_string("-lm", lm)

        decoder = pocketsphinx.Decoder(config)
        decoder.start_utt()
        decoder.process_raw(raw_audio, False, True)
        decoder.end_utt()

        text = decoder.hyp()
        if text is None:
            return None
        return text.hypstr
Example #2
0
def recognition(keyphrase_function, key_phrase):

    # Start a pyaudio instance
    p = pyaudio.PyAudio()
    # Create an input stream with pyaudio - if on raspi use index 1 for google voice hat mic
    if os.uname()[1] == 'raspberrypi':
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        input_device_index=1,
                        frames_per_buffer=1024)
        print('stream started on rpi')
    else:
        stream = p.open(format=pyaudio.paInt16,
                        channels=1,
                        rate=16000,
                        input=True,
                        frames_per_buffer=1024)
        print('stream started')

    # Start the stream
    stream.start_stream()

    modeldir = "data/files/sphinx/models"

    # Create a decoder with certain model
    config = pocketsphinx.Decoder.default_config()
    # Use the mobile voice model (en-us-ptm) for performance constrained systems
    if os.uname()[1] == 'raspberrypi':
        config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us-ptm'))
    else:
        config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us'))

    config.set_string('-dict',
                      os.path.join(modeldir, 'en-us/cmudict-en-us.dict'))
    config.set_string('-keyphrase', key_phrase)
    config.set_string('-logfn', 'data/files/sphinx.log')
    config.set_float('-kws_threshold', 1)

    # Process audio chunk by chunk. On keyword detected perform action and restart search
    decoder = pocketsphinx.Decoder(config)
    decoder.start_utt()
    # Loop forever
    while True:
        # Read 1024 samples from the buffer
        buf = stream.read(1024, exception_on_overflow=False)
        # If data in the buffer, process using the sphinx decoder
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            break
        # If the hypothesis is not none, the key phrase was recognized
        if decoder.hyp() is not None:
            decoder.end_utt()
            stream.stop_stream()  # Stop Audio Recording
            stream.close()  # Close Audio Recording
            keyphrase_function()  # Call back

            return True
Example #3
0
 def stream_decode(self, raw):
     if raw.endswith('.wav') and not os.path.isfile(
             raw.replace('.wav', '.raw')):
         msg = 'converting %s to raw' % raw
         logging.debug(msg)
         self.convert2raw(raw)
         raw = raw.replace('.wav', '.raw')
     self.segs = []
     decoder = ps.Decoder(self.config)
     stream = open(raw, 'rb')
     in_speech_bf = False
     decoder.start_utt()
     while True:
         buf = stream.read(1024)
         if buf:
             decoder.process_raw(buf, False, False)
             if decoder.get_in_speech() != in_speech_bf:
                 in_speech_bf = decoder.get_in_speech()
                 if not in_speech_bf:
                     decoder.end_utt()
                     for seg in decoder.seg():
                         self.segs.append([
                             seg.word, seg.start_frame / 100,
                             seg.end_frame / 100
                         ])
                     decoder.start_utt()
         else:
             # the last buffered stream
             for seg in decoder.seg():
                 self.segs.append(
                     [seg.word, seg.start_frame / 100, seg.end_frame / 100])
             break
     decoder.end_utt()
Example #4
0
    def __init__(self, language="en-US"):
        """[summary]
		
		[description]
		
		Keyword Arguments:
			language {str} -- [description] (default: {"en-US"})
		
		Raises:
			RequestError -- There are issues with the Sphinx installation.
		"""
        assert isinstance(language, str), "``language`` must be a string"

        language_directory = os.path.join(
            os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data",
            language)

        if not os.path.isdir(language_directory):
            raise RequestError("missing PocketSphinx language data directory:\
			 \"{0}\"".format(language_directory))

        acoustic_parameters_directory = os.path.join(language_directory,
                                                     "acoustic-model")

        if not os.path.isdir(acoustic_parameters_directory):
            raise RequestError("missing PocketSphinx language model parameters\
			 directory: \"{0}\"".format(acoustic_parameters_directory))

        language_model_file = os.path.join(language_directory,
                                           "language-model.lm.bin")

        if not os.path.isfile(language_model_file):
            raise RequestError("missing PocketSphinx language model file:\
			 \"{0}\"".format(language_model_file))

        phoneme_dictionary_file = os.path.join(
            language_directory, "pronounciation-dictionary.dict")

        if not os.path.isfile(phoneme_dictionary_file):
            raise RequestError("missing PocketSphinx phoneme dictionary file:\
			 \"{0}\"".format(phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()

        # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-hmm", acoustic_parameters_directory)

        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)

        # disable logging (logging causes unwanted output in terminal)
        config.set_string("-logfn", os.devnull)

        self.decoder = pocketsphinx.Decoder(config)
    def recognize_sphinx(self, audio_data, language = "en-US", show_all = False):
        """
        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.

        The recognition language is determined by ``language``, an IETF language tag like ``"en-US"`` or ``"en-GB"``, defaulting to US English. Out of the box, only ``en-US`` is supported. See `Notes on using `PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing other languages. This document is also included under ``reference/pocketsphinx.rst``.

        Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Decoder`` object resulting from the recognition.

        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if there are any issues with the Sphinx installation.
        """
        assert isinstance(audio_data, AudioData), "`audio_data` must be audio data"
        assert isinstance(language, str), "`language` must be a string"
        
        # import the PocketSphinx speech recognition module
        try:
            from pocketsphinx import pocketsphinx
            from sphinxbase import sphinxbase
        except ImportError:
            raise RequestError("missing PocketSphinx module: ensure that PocketSphinx is set up correctly.")

        language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
        if not os.path.isdir(language_directory):
            raise RequestError("missing PocketSphinx language data directory: \"{0}\"".format(language_directory))
        acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
        if not os.path.isdir(acoustic_parameters_directory):
            raise RequestError("missing PocketSphinx language model parameters directory: \"{0}\"".format(acoustic_parameters_directory))
        language_model_file = os.path.join(language_directory, "language-model.lm.bin")
        if not os.path.isfile(language_model_file):
            raise RequestError("missing PocketSphinx language model file: \"{0}\"".format(language_model_file))
        phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
        if not os.path.isfile(phoneme_dictionary_file):
            raise RequestError("missing PocketSphinx phoneme dictionary file: \"{0}\"".format(phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        config.set_string("-hmm", acoustic_parameters_directory) # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        config.set_string("-logfn", os.devnull) # disable logging (logging causes unwanted output in terminal)
        decoder = pocketsphinx.Decoder(config)

        # obtain audio data
        raw_data = audio_data.get_raw_data(convert_rate = 16000, convert_width = 2) # the included language models require audio to be 16-bit mono 16 kHz in little-endian format

        # obtain recognition results
        decoder.start_utt() # begin utterance processing
        decoder.process_raw(raw_data, False, True) # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
        decoder.end_utt() # stop utterance processing

        if show_all: return decoder

        # return results
        hypothesis = decoder.hyp()
        if hypothesis is not None: return hypothesis.hypstr
        raise UnknownValueError() # no transcriptions available
Example #6
0
def get_decoder(libdir=None, modeldir=None, lang='en-us'):
    """ Create a decoder with the requested language model """
    modeldir = modeldir or (os.path.join(libdir, 'model')
                            if libdir else MODELDIR)
    libdir = os.path.dirname(modeldir)
    config = ps.Decoder.default_config()
    config.set_string('-hmm', os.path.join(modeldir, lang))
    config.set_string('-lm', os.path.join(modeldir, lang + '.lm.bin'))
    config.set_string('-dict',
                      os.path.join(modeldir, 'cmudict-' + lang + '.dict'))
    print(config)
    return ps.Decoder(config)
def start_keyphrase_recognition(keyphrase_function, key_phrase):
    """ Starts a thread that is always listening for a specific key phrase. Once the
        key phrase is recognized, the thread will call the keyphrase_function. This
        function is called within the thread (a new thread is not started), so the
        key phrase detection is paused until the function returns.

    :param keyphrase_function: function that is called when the phrase is recognized
    :param key_phrase: a string for the key phrase
    """
    modeldir = "files/sphinx/models"

    # Create a decoder with certain model
    config = pocketsphinx.Decoder.default_config()
    # Use the mobile voice model (en-us-ptm) for performance constrained systems
    config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us-ptm'))
    # config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us'))
    config.set_string('-dict',
                      os.path.join(modeldir, 'en-us/cmudict-en-us.dict'))
    config.set_string('-keyphrase', key_phrase)
    config.set_string('-logfn', 'files/sphinx.log')
    config.set_float('-kws_threshold', 1)

    # Start a pyaudio instance
    p = pyaudio.PyAudio()
    # Create an input stream with pyaudio
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=16000,
                    input=True,
                    frames_per_buffer=1024)
    # Start the stream
    stream.start_stream()

    # Process audio chunk by chunk. On keyword detected perform action and restart search
    decoder = pocketsphinx.Decoder(config)
    decoder.start_utt()
    # Loop forever
    while True:
        # Read 1024 samples from the buffer
        buf = stream.read(1024)
        # If data in the buffer, process using the sphinx decoder
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            break
        # If the hypothesis is not none, the key phrase was recognized
        if decoder.hyp() is not None:
            keyphrase_function()
            # Stop and reinitialize the decoder
            decoder.end_utt()
            decoder.start_utt()
Example #8
0
 def build_decoder(self):
     model_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'files/sphinx')
     file_name = 'alexa'
     dict_file = '{}.dict'.format(file_name)
     lm_file = '{}.lm'.format(file_name)
     ps_config = pocketsphinx.Decoder.default_config()
     ps_config.set_string('-hmm', os.path.join(model_path, 'acoustic-model'))
     ps_config.set_string('-dict', os.path.join(model_path, dict_file))
     ps_config.set_string('-keyphrase', "JARVIS")
     ps_config.set_float('-kws_threshold', 1e-10)
     ps_config.set_string('-logfn', '/dev/null')
     
     self.decoder = pocketsphinx.Decoder(ps_config)
     self.decoder.start_utt()
Example #9
0
    def start_recognizer(self):

        config = pocketsphinx.Decoder.default_config()

        # Setup decoder config
        if self._hmm is None: 
            rospy.logwarn("Using default hmm") 
        else:
            rospy.loginfo("hmm file: %s", self._hmm)
            config.set_string('-hmm', self._hmm)

        if self._dict is None: 
            rospy.logwarn("Using default dict")
        else:
            rospy.loginfo("Dict file: %s", self._dict)
            config.set_string('-dict', self._dict)

        config.set_string('-dither', "no")
        config.set_string('-featparams', os.path.join(self._hmm, "feat.params"))
        #config.set_boolean('-bestpath', True)
        if self._kws is not None: 
            config.set_string('-kws', self._kws)
            if self._keyphrase is not None: config.set_string('-keyphrase', self.keyphrase)
            if self._threshold is not None: config.set_float('-kws_threshold', self.kws_threshold)

        # Set required configuration for decoder
        self.decoder = pocketsphinx.Decoder(config)

        if self._gram and self._grammar and self._rule:
            jsgf = Jsgf(self._gram)
            self.get_list_of_public_jsgf_rules(self._gram)

            if isinstance(self._rule,str):
                rule = jsgf.get_rule(self._grammar + '.' + self._rule)
                # rospy.logwarn(rule)
                if rule is not None:
                    rospy.logwarn("LOAD: Rule <"+ self._rule + "> from grammar <" + self._grammar + ">")
                    fsg = jsgf.build_fsg(rule, self.decoder.get_logmath(), 7.5)
                    fsg.writefile(self._gram + '.fsg')
                    self.decoder.set_fsg(self._gram, fsg)
                    self.decoder.set_search(self._gram)
                    # Start processing input audio
                    self.decoder.start_utt()
                    rospy.loginfo("Decoder is successfully started")
                else:
                    rospy.logwarn("LOAD FAILED: No rule <"+ self._rule + "> in grammar <" + self._grammar + ">")
            else:
                rospy.logerr("LOAD FAILED: rule name must be string")
                self._rule = None
Example #10
0
 def pocketsphinx_init(self):
     """Initialize pocketsphinx stt engine"""
     language_directory = self.args["pocketsphinx_dir"]
     acoustic_parameters_directory = os.path.join(language_directory,
                                                  "acoustic-model")
     language_model_file = os.path.join(language_directory,
                                        "language-model.lm.bin")
     phoneme_dictionary_file = os.path.join(
         language_directory, "pronounciation-dictionary.dict")
     config = pocketsphinx.Decoder.default_config()
     config.set_string("-hmm", acoustic_parameters_directory)
     config.set_string("-lm", language_model_file)
     config.set_string("-dict", phoneme_dictionary_file)
     config.set_string("-logfn", os.devnull)
     self.sphinx_decoder = pocketsphinx.Decoder(config)
     self.log("Pocketsphinx init done")
Example #11
0
            def decodeSpeech(decoder_config, audio_file):

                # pocketsphinx wav recognition process. Do not modify !
                
                
                speechRec = ps.Decoder(decoder_config)

                subprocess.call(record, shell=True)
                stream = open(audio_file, 'rb')
                in_speech_bf = True
                speechRec.start_utt()
                
                while True:
                    buf = stream.read(1024)
                    #audio_file2 = file(audio_file,'rb')
                    #audio_file2.seek(44)
                    #if audio_file2:
                    #    speechRec.process_raw(audio_file2,False,False)
                    #    #speechRec.decode_raw(audio_file2)
                    if buf:
                        speechRec.process_raw(buf,False,False)
                        try:
                            if speechRec.hyp().hypstr != '':
                                print "**************PARTIAL decoding reult:", speechRec.hyp().hypstr
                        except AttributeError:
                            pass
                        if speechRec.get_in_speech():
                            sys.stdout.write('.')
                            sys.stdout.flush()
                        if speechRec.get_in_speech() != in_speech_bf:
                            in_speech_bf = speechRec.get_in_speech()
                            if not in_speech_bf:
                                speechRec.end_utt()
                                try:
                                    if speechRec.hyp().hypstr != '':
                                        print 'Stream decoding result:', speechRec.hyp().hypstr
                                except AttributeError:
                                    pass
                                speechRec.start_utt()
                                     
                    else:
                        break
                
                #result = speechRec.hyp().hypstr
                result = speechRec.hyp()
                speechRec.end_utt()
                return result 
Example #12
0
    def __init__(self,
                 language="en-US",
                 language_directory=None,
                 acoustic_parameters_directory=None,
                 language_model_file=None,
                 phoneme_dictionary_file=None):
        super(PS_Recognizer, self).__init__()
        language = language.lower()
        language_directory = language_directory or join(
            dirname(dirname(__file__)), "recognizer/model", language)
        if not isdir(language_directory):
            raise RequestError(
                "missing PocketSphinx language data directory: \"{}\"".format(
                    language_directory))

        acoustic_parameters_directory = \
            acoustic_parameters_directory or \
            join(language_directory, "hmm")
        if not isdir(acoustic_parameters_directory):
            raise RequestError(
                "missing PocketSphinx language model parameters directory: "
                "\"{}\"".format(acoustic_parameters_directory))

        language_model_file = language_model_file or join(
            language_directory, language + ".lm")
        if not isfile(language_model_file):
            language_model_file += ".bin"
            if not isfile(language_model_file):
                raise RequestError(
                    "missing PocketSphinx language model file: \"{}\"".format(
                        language_model_file))

        phoneme_dictionary_file = phoneme_dictionary_file or join(
            language_directory, language + ".dict")
        if not isfile(phoneme_dictionary_file):
            raise RequestError(
                "missing PocketSphinx phoneme dictionary file: \"{}\"".format(
                    phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        config.set_string("-hmm", acoustic_parameters_directory)
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        config.set_string("-logfn", os.devnull)
        self.decoder = pocketsphinx.Decoder(config)
        self.lang = language
Example #13
0
 def __init__(self):
     super().__init__(self)
     self.interpreter = Interpreter.load(settings.RASA_MODEL_DIR)
     self.stream = PyAudio().open(format=paInt16,
                                  channels=1,
                                  rate=16000,
                                  input=True,
                                  frames_per_buffer=1024,
                                  output_device_index=0)
     self.config = pocketsphinx.Decoder.default_config()
     self.config.set_string(
         '-hmm', path.join(settings.SPHINX_MODEL_DIR, 'en-us/en-us'))
     self.config.set_string(
         '-dict',
         path.join(settings.SPHINX_MODEL_DIR, 'en-us/cmudict-en-us.dict'))
     self.config.set_string('-keyphrase', settings.WAKE_PHRASE)
     self.config.set_float('-kws_threshold', 1e+20)
     self.config.set_string('-logfn', 'text.log')
     self.decoder = pocketsphinx.Decoder(self.config)
     self.listen_for_wake()
Example #14
0
def recognition(keyphrase_function, key_phrase, loop):

    modeldir = "data/files/sphinx/models"

    # Create a decoder with certain model
    config = pocketsphinx.Decoder.default_config()
    # Use the mobile voice model (en-us-ptm) for performance constrained systems
    if os.uname()[1] == 'raspberrypi':
        config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us-ptm'))
    else:
        config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us'))

    config.set_string('-dict',
                      os.path.join(modeldir, 'en-us/cmudict-en-us.dict'))
    config.set_string('-keyphrase', key_phrase)
    config.set_string('-logfn', 'data/files/sphinx.log')
    config.set_float('-kws_threshold', 1)

    # Process audio chunk by chunk. On keyword detected perform action and restart search
    decoder = pocketsphinx.Decoder(config)
    decoder.start_utt()
    # Loop forever
    while True:
        # Read 1024 samples from the buffer
        buf = stream.read(1024, exception_on_overflow=False)
        # If data in the buffer, process using the sphinx decoder
        if buf:
            decoder.process_raw(buf, False, False)
        else:
            break
        # If the hypothesis is not none, the key phrase was recognized
        if decoder.hyp() is not None:
            decoder.end_utt()
            keyphrase_function()
            if loop:
                # Stop and reinitialize the decoder if loop is on
                decoder.start_utt()
            else:
                # else end and send true
                return True
Example #15
0
    def start_keyphrase_detection(self, keyphrase_function, key_phrase):
        modeldir = "models"

        config = pocketsphinx.Decoder.default_config()
        config.set_string('-hmm', os.path.join(modeldir, 'en-us/en-us-ptm'))
        config.set_string('-dict', os.path.join(modeldir, 'en-us/cmudict-en-us.dict'))
        config.set_string('-kws', 'keylist')
        # config.set_string('-keyphrase', key_phrase)
        config.set_string('-logfn', './log')
        config.set_float('-kws_threshold', 1e10)

        # Start a pyaudio instance
        p = pyaudio.PyAudio()

        # Create an input stream with pyaudio
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=1024)

        # Start the stream
        stream.start_stream()

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        decoder = pocketsphinx.Decoder(config)
        decoder.start_utt()

        print('start listening...')

        while True:
            buf = stream.read(1024)
            if buf:
                decoder.process_raw(buf, True, True)
            else:
                break
            hyp = decoder.hyp()
            if  hyp is not None:
                keyphrase_function(hyp.hypstr)
                # Stop and reinitialize the decoder
                decoder.end_utt()
                decoder.start_utt()
Example #16
0
    def start(self):
        # Create a decoder with certain model
        config = pocketsphinx.Decoder.default_config()
        config.set_string('-hmm', self.hmm_directory)
        config.set_string('-dict', self.dictionary_file)
        config.set_string("-logfn", os.devnull)
        decoder = pocketsphinx.Decoder(config)

        decoder.set_lm_file("lm", self.language_model_file)
        decoder.set_keyphrase("kws", "hey emma")
        decoder.set_search("kws")

        p = pyaudio.PyAudio()
        stream = p.open(format=FORMAT,
                        channels=1,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=BUFFER_SIZE)
        stream.start_stream()

        # Process audio chunk by chunk. On keyword detected perform action and restart search
        decoder.start_utt()
        while True:
            buf = stream.read(BUFFER_SIZE)
            if buf:
                decoder.process_raw(buf, False, False)
            else:
                break
            if decoder.hyp() != None:
                print decoder.hyp().hypstr
                stream.stop_stream()
                decoder.end_utt()
                # self.houndClient.query()
                stream.start_stream()
                decoder.start_utt()
Example #17
0
    def recognize_sphinx(self, audio_data, language="en-US", keyword_entries=None, grammar=None, show_all=False):
        """
        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using CMU Sphinx.
        The recognition language is determined by ``language``, an RFC5646 language tag like ``"en-US"`` or ``"en-GB"``, defaulting to US English. Out of the box, only ``en-US`` is supported. See `Notes on using `PocketSphinx <https://github.com/Uberi/speech_recognition/blob/master/reference/pocketsphinx.rst>`__ for information about installing other languages. This document is also included under ``reference/pocketsphinx.rst``. The ``language`` parameter can also be a tuple of filesystem paths, of the form ``(acoustic_parameters_directory, language_model_file, phoneme_dictionary_file)`` - this allows you to load arbitrary Sphinx models.
        If specified, the keywords to search for are determined by ``keyword_entries``, an iterable of tuples of the form ``(keyword, sensitivity)``, where ``keyword`` is a phrase, and ``sensitivity`` is how sensitive to this phrase the recognizer should be, on a scale of 0 (very insensitive, more false negatives) to 1 (very sensitive, more false positives) inclusive. If not specified or ``None``, no keywords are used and Sphinx will simply transcribe whatever words it recognizes. Specifying ``keyword_entries`` is more accurate than just looking for those same keywords in non-keyword-based transcriptions, because Sphinx knows specifically what sounds to look for.
        Sphinx can also handle FSG or JSGF grammars. The parameter ``grammar`` expects a path to the grammar file. Note that if a JSGF grammar is passed, an FSG grammar will be created at the same location to speed up execution in the next run. If ``keyword_entries`` are passed, content of ``grammar`` will be ignored.
        Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the Sphinx ``pocketsphinx.pocketsphinx.Decoder`` object resulting from the recognition.
        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if there are any issues with the Sphinx installation.
        """
        assert isinstance(audio_data, AudioData), "``audio_data`` must be audio data"
        assert isinstance(language, str) or (isinstance(language, tuple) and len(language) == 3), "``language`` must be a string or 3-tuple of Sphinx data file paths of the form ``(acoustic_parameters, language_model, phoneme_dictionary)``"
        assert keyword_entries is None or all(isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1"

        # import the PocketSphinx speech recognition module
        try:
            from pocketsphinx import pocketsphinx, Jsgf, FsgModel

        except ImportError:
            raise RequestError("missing PocketSphinx module: ensure that PocketSphinx is set up correctly.")
        except ValueError:
            raise RequestError("bad PocketSphinx installation; try reinstalling PocketSphinx version 0.0.9 or better.")
        if not hasattr(pocketsphinx, "Decoder") or not hasattr(pocketsphinx.Decoder, "default_config"):
            raise RequestError("outdated PocketSphinx installation; ensure you have PocketSphinx version 0.0.9 or better.")

        if isinstance(language, str):  # directory containing language data
            language_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pocketsphinx-data", language)
            if not os.path.isdir(language_directory):
                raise RequestError("missing PocketSphinx language data directory: \"{}\"".format(language_directory))
            acoustic_parameters_directory = os.path.join(language_directory, "acoustic-model")
            language_model_file = os.path.join(language_directory, "language-model.lm.bin")
            phoneme_dictionary_file = os.path.join(language_directory, "pronounciation-dictionary.dict")
        else:  # 3-tuple of Sphinx data file paths
            acoustic_parameters_directory, language_model_file, phoneme_dictionary_file = language
        if not os.path.isdir(acoustic_parameters_directory):
            raise RequestError("missing PocketSphinx language model parameters directory: \"{}\"".format(acoustic_parameters_directory))
        if not os.path.isfile(language_model_file):
            raise RequestError("missing PocketSphinx language model file: \"{}\"".format(language_model_file))
        if not os.path.isfile(phoneme_dictionary_file):
            raise RequestError("missing PocketSphinx phoneme dictionary file: \"{}\"".format(phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        config.set_string("-hmm", acoustic_parameters_directory)  # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        config.set_string("-logfn", os.devnull)  # disable logging (logging causes unwanted output in terminal)
        decoder = pocketsphinx.Decoder(config)

        # obtain audio data
        raw_data = audio_data.get_raw_data(convert_rate=16000, convert_width=2)  # the included language models require audio to be 16-bit mono 16 kHz in little-endian format

        # obtain recognition results
        if keyword_entries is not None:  # explicitly specified set of keywords
            with PortableNamedTemporaryFile("w") as f:
                # generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
                f.writelines("{} /1e{}/\n".format(keyword, 100 * sensitivity - 110) for keyword, sensitivity in keyword_entries)
                f.flush()

                # perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
                decoder.set_kws("keywords", f.name)
                decoder.set_search("keywords")
                decoder.start_utt()  # begin utterance processing
                decoder.process_raw(raw_data, False, True)  # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
                decoder.end_utt()  # stop utterance processing
        elif grammar is not None:  # a path to a FSG or JSGF grammar
            if not os.path.exists(grammar):
                raise ValueError("Grammar '{0}' does not exist.".format(grammar))
            grammar_path = os.path.abspath(os.path.dirname(grammar))
            grammar_name = os.path.splitext(os.path.basename(grammar))[0]
            fsg_path = "{0}/{1}.fsg".format(grammar_path, grammar_name)
            if not os.path.exists(fsg_path):  # create FSG grammar if not available
                jsgf = Jsgf(grammar)
                rule = jsgf.get_rule("{0}.{0}".format(grammar_name))
                fsg = jsgf.build_fsg(rule, decoder.get_logmath(), 7.5)
                fsg.writefile(fsg_path)
            else:
                fsg = FsgModel(fsg_path, decoder.get_logmath(), 7.5)
            decoder.set_fsg(grammar_name, fsg)
            decoder.set_search(grammar_name)
            decoder.start_utt()
            decoder.process_raw(raw_data, False, True)  # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
            decoder.end_utt()  # stop utterance processing
        else:  # no keywords, perform freeform recognition
            decoder.start_utt()  # begin utterance processing
            decoder.process_raw(raw_data, False, True)  # process audio data with recognition enabled (no_search = False), as a full utterance (full_utt = True)
            decoder.end_utt()  # stop utterance processing

        if show_all: return decoder

        # return results
        hypothesis = decoder.hyp()
        if hypothesis is not None: return hypothesis.hypstr
        raise UnknownValueError()  # no transcriptions available
Example #18
0
    def __init__(self, *args, **kwargs):
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        self._vocabulary_name = "keywords"
        keywords = profile.get(['keyword'], ['Naomi'])
        if isinstance(keywords, str):
            keywords = [keywords]
        keywords = [keyword.upper() for keyword in keywords]
        self._vocabulary_phrases = keywords
        self._logger.info(
            "Adding vocabulary {} containing phrases {}".format(
                self._vocabulary_name,
                self._vocabulary_phrases
            )
        )

        vocabulary_path = self.compile_vocabulary(
            sphinxvocab.compile_vocabulary
        )

        dict_path = sphinxvocab.get_dictionary_path(vocabulary_path)
        lm_path = sphinxvocab.get_languagemodel_path(vocabulary_path)
        thresholds_path = sphinxvocab.get_thresholds_path(vocabulary_path)
        msg = " ".join([
            "Creating thresholds file '{}'",
            "See README.md for more information."
        ]).format(thresholds_path)
        print(msg)
        with open(thresholds_path, 'w') as f:
            for keyword in keywords:
                threshold = profile.get(['Pocketsphinx_KWS', 'thresholds', keyword], 80)
                if(threshold < 0):
                    f.write("{}\t/1e{}/\n".format(keyword, threshold))
                else:
                    f.write("{}\t/1e+{}/\n".format(keyword, threshold))
        hmm_dir = profile.get(['pocketsphinx', 'hmm_dir'])
        # Perform some checks on the hmm_dir so that we can display more
        # meaningful error messages if neccessary
        if not os.path.exists(hmm_dir):
            msg = " ".join([
                "hmm_dir '{}' does not exist! Please make sure that you",
                "have set the correct hmm_dir in your profile."
            ]).format(hmm_dir)
            self._logger.error(msg)
            raise RuntimeError(msg)
        # Lets check if all required files are there. Refer to:
        # http://cmusphinx.sourceforge.net/wiki/acousticmodelformat
        # for details
        missing_hmm_files = []
        for fname in ('mdef', 'feat.params', 'means', 'noisedict',
                      'transition_matrices', 'variances'):
            if not os.path.exists(os.path.join(hmm_dir, fname)):
                missing_hmm_files.append(fname)
        mixweights = os.path.exists(os.path.join(hmm_dir, 'mixture_weights'))
        sendump = os.path.exists(os.path.join(hmm_dir, 'sendump'))
        if not mixweights and not sendump:
            # We only need mixture_weights OR sendump
            missing_hmm_files.append('mixture_weights or sendump')
        if missing_hmm_files:
            self._logger.warning(
                " ".join([
                    "hmm_dir '%s' is missing files: %s.",
                    "Please make sure that you have set the correct",
                    "hmm_dir in your profile."
                ]).format(hmm_dir, ', '.join(missing_hmm_files))
            )
        with tempfile.NamedTemporaryFile(
            prefix='psdecoder_',
            suffix='.log',
            delete=False
        ) as f:
            self._logfile = f.name
            self._logger.info('Pocketsphinx log file: {}'.format(self._logfile))
        # Pocketsphinx v5
        config = pocketsphinx.Decoder.default_config()
        config.set_string('-hmm', hmm_dir)
        config.set_string('-kws', thresholds_path)
        config.set_string('-lm', lm_path)
        config.set_string('-dict', dict_path)
        config.set_string('-logfn', self._logfile)
        self._ps = pocketsphinx.Decoder(config)
Example #19
0
grammar_path = model_dir + '/grammars'
config = pocketsphinx.Decoder.default_config()
config.set_string('-hmm', model_dir + '/accoustic-model')
config.set_string('-lm', model_dir + '/language-model.bin')
config.set_string('-dict', model_dir + '/pronounciation-dictionary.dict')
config.set_string("-logfn", os.devnull)
jsgf = Jsgf(grammar_path)

grammar_decoders = []
pattern = re.compile('public <(.*?)> =')

with open(grammar_path, 'rt') as in_file:
    for linenum, line in enumerate(in_file):
        grammar_key = pattern.findall(line)
        if grammar_key != []:
            decoder = pocketsphinx.Decoder(config)
            ruleGrammar = jsgf.get_rule(
                ('structure.' + grammar_key[0]).format(grammar_path))
            fsgNext = jsgf.build_fsg(ruleGrammar, decoder.get_logmath(), 7.5)
            decoder.set_fsg(grammar_key[0], fsgNext)
            decoder.set_search(grammar_key[0])
            grammar_decoders.append(decoder)


class Text2Speech:

    CHANNEL = 'text2speech'
    CHANNEL_TYPE = 'brain'

    @staticmethod
    def id():
    def setup_decoder(audio_file, keyword_entries):

        language = "en-US"

        audio_file_type = audio_file.split(".")[1]

        if audio_file_type == 'wav':
            curr_dir = os.getcwd()
            data_dir = os.path.join(curr_dir, '../data/')
            speech_recognition_directory = '/Library/Python/2.7/site-packages/speech_recognition/'
            audio_data_path = os.path.join(data_dir, audio_file)
        else:
            raise speech_recognition.RequestError("file type must be .wav")

        assert isinstance(language, str), "``language`` must be a string"
        assert keyword_entries is None or all(
            isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in
            keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers " \
                              "between 0 and 1"
        # import the PocketSphinx speech recognition module
        try:
            from pocketsphinx import pocketsphinx
        except ImportError:
            raise speech_recognition.RequestError(
                "missing PocketSphinx module: ensure that PocketSphinx is set up correctly."
            )
        except ValueError:
            raise speech_recognition.RequestError(
                "bad PocketSphinx installation detected; make sure you have PocketSphinx version 0.0.9 or better."
            )

        language_directory = os.path.join(
            os.path.dirname(speech_recognition_directory), "pocketsphinx-data",
            language)
        if not os.path.isdir(language_directory):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language data directory: \"{}\"".format(
                    language_directory))
        acoustic_parameters_directory = os.path.join(language_directory,
                                                     "acoustic-model")
        if not os.path.isdir(acoustic_parameters_directory):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language model parameters directory: \"{}\""
                .format(acoustic_parameters_directory))
        language_model_file = os.path.join(language_directory,
                                           "language-model.lm.bin")
        if not os.path.isfile(language_model_file):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language model file: \"{}\"".format(
                    language_model_file))
        phoneme_dictionary_file = os.path.join(
            language_directory, "pronounciation-dictionary.dict")
        if not os.path.isfile(phoneme_dictionary_file):
            raise speech_recognition.RequestError(
                "missing PocketSphinx phoneme dictionary file: \"{}\"".format(
                    phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-hmm", acoustic_parameters_directory)
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        # disable logging (logging causes unwanted output in terminal)
        config.set_string("-logfn", os.devnull)
        decoder = pocketsphinx.Decoder(config)

        return audio_data_path, decoder
Example #21
0
    def __init__(self, *args, **kwargs):
        """
        Initiates the pocketsphinx instance.

        Arguments:
            vocabulary -- a PocketsphinxVocabulary instance
            hmm_dir -- the path of the Hidden Markov Model (HMM)
        """
        plugin.STTPlugin.__init__(self, *args, **kwargs)

        if not pocketsphinx_available:
            raise ImportError("Pocketsphinx not installed!")

        vocabulary_path = self.compile_vocabulary(
            sphinxvocab.compile_vocabulary
        )

        lm_path = sphinxvocab.get_languagemodel_path(vocabulary_path)
        dict_path = sphinxvocab.get_dictionary_path(vocabulary_path)
        hmm_dir = profile.get(['pocketsphinx', 'hmm_dir'])

        self._logger.debug(
            "Initializing PocketSphinx Decoder with hmm_dir '{}'".format(
                hmm_dir
            )
        )
        # Perform some checks on the hmm_dir so that we can display more
        # meaningful error messages if neccessary
        if not os.path.exists(hmm_dir):
            msg = " ".join([
                "hmm_dir '{}' does not exist! Please make sure that you",
                "have set the correct hmm_dir in your profile."
            ]).format(hmm_dir)
            self._logger.error(msg)
            raise RuntimeError(msg)
        # Lets check if all required files are there. Refer to:
        # http://cmusphinx.sourceforge.net/wiki/acousticmodelformat
        # for details
        missing_hmm_files = []
        for fname in ('mdef', 'feat.params', 'means', 'noisedict',
                      'transition_matrices', 'variances'):
            if not os.path.exists(os.path.join(hmm_dir, fname)):
                missing_hmm_files.append(fname)
        mixweights = os.path.exists(os.path.join(hmm_dir, 'mixture_weights'))
        sendump = os.path.exists(os.path.join(hmm_dir, 'sendump'))
        if not mixweights and not sendump:
            # We only need mixture_weights OR sendump
            missing_hmm_files.append('mixture_weights or sendump')
        if missing_hmm_files:
            self._logger.warning(
                " ".join([
                    "hmm_dir '%s' is missing files: %s.",
                    "Please make sure that you have set the correct",
                    "hmm_dir in your profile."
                ]).format(hmm_dir, ', '.join(missing_hmm_files))
            )
        self._pocketsphinx_v5 = hasattr(pocketsphinx.Decoder, 'default_config')

        with tempfile.NamedTemporaryFile(prefix='psdecoder_',
                                         suffix='.log', delete=False) as f:
            self._logfile = f.name

        if self._pocketsphinx_v5:
            # Pocketsphinx v5
            config = pocketsphinx.Decoder.default_config()
            config.set_string('-hmm', hmm_dir)
            config.set_string('-lm', lm_path)
            config.set_string('-dict', dict_path)
            config.set_string('-logfn', self._logfile)
            self._decoder = pocketsphinx.Decoder(config)
        else:
            # Pocketsphinx v4 or sooner
            self._decoder = pocketsphinx.Decoder(
                hmm=hmm_dir,
                logfn=self._logfile,
                lm=lm_path,
                dict=dict_path
            )
Example #22
0
import cv2
import csv

FILENAME = 'users.csv'
MODELDIR = "../../../model"
config = ps.Decoder.default_config()
config.set_string(
    '-hmm',
    '/home/anna/diplom/test_speech/zero_ru_cont_8k_v3/zero_ru.cd_cont_4000/')
config.set_string('-dict', '/home/anna/diplom/comb_1/speech/vocabular.dict')
config.set_string('-jsgf', '/home/anna/diplom/comb_1/speech/sp.jsgf')
config.set_string('-logfn', '/dev/null')
#config.set_string('-lm', '/home/anna/diplom/test_speech/zero_ru_cont_8k_v3/ru.lm')
config.set_int('-nfft', 512)
config.set_float('-samprate', 8000.0)
decoder = ps.Decoder(config)

#create models for looking face
sp = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')
facerec = dlib.face_recognition_model_v1(
    'dlib_face_recognition_resnet_model_v1.dat')
detector = dlib.get_frontal_face_detector()

users = {}
with open(FILENAME, "r", newline="") as file:
    reader = csv.reader(file)
    for row in reader:
        name = row[0]
        with open(name + '.pickle', 'rb') as f:
            face_descriptor = pickle.load(f)
            users[face_descriptor] = name
Example #23
0
    def prepare_sphinx2(self, language="en-US", keyword_entries=None):
        assert isinstance(language, str) or (
            isinstance(language, tuple) and len(language) == 3
        ), "``language`` must be a string or 3-tuple of Sphinx data file paths of the form ``(acoustic_parameters, language_model, phoneme_dictionary)``"
        assert keyword_entries is None or all(
            isinstance(keyword, (type(""),
                                 type(u""))) and 0 <= sensitivity <= 1
            for keyword, sensitivity in keyword_entries
        ), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1"

        if isinstance(language, str):  # directory containing language data
            language_directory = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "pocketsphinx-data", language)
            if not os.path.isdir(language_directory):
                raise sr.RequestError(
                    "missing PocketSphinx language data directory: \"{}\"".
                    format(language_directory))
            acoustic_parameters_directory = os.path.join(
                language_directory, "acoustic-model")
            language_model_file = os.path.join(language_directory,
                                               "language-model.lm.bin")
            phoneme_dictionary_file = os.path.join(
                language_directory, "pronounciation-dictionary.dict")
        else:  # 3-tuple of Sphinx data file paths
            acoustic_parameters_directory, language_model_file, phoneme_dictionary_file = language
        if not os.path.isdir(acoustic_parameters_directory):
            raise sr.RequestError(
                "missing PocketSphinx language model parameters directory: \"{}\""
                .format(acoustic_parameters_directory))
        if not os.path.isfile(language_model_file):
            raise sr.RequestError(
                "missing PocketSphinx language model file: \"{}\"".format(
                    language_model_file))
        if not os.path.isfile(phoneme_dictionary_file):
            raise sr.RequestError(
                "missing PocketSphinx phoneme dictionary file: \"{}\"".format(
                    phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        config.set_string(
            "-hmm", acoustic_parameters_directory
        )  # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        config.set_string(
            "-logfn", os.devnull
        )  # disable logging (logging causes unwanted output in terminal)
        self.decoder = pocketsphinx.Decoder(config)

        with open("sphinx.txt", "w") as f:
            # generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
            f.writelines("{} /{}/\n".format(keyword, sensitivity)
                         for keyword, sensitivity in keyword_entries)

        # perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
        self.decoder.set_kws("keywords", "sphinx.txt")
        self.decoder.set_search("keywords")

        return