def __init__(self):
        self.__logger = Logger()

        self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(),
                                                            requests.TooManyRedirects(), requests.Timeout(),
                                                            requests.TooManyRedirects(),
                                                            requests.RequestException(), requests.ConnectTimeout(),
                                                            requests.ReadTimeout()]]

        self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(),
                                                       ZeroDivisionError(), SystemError(), ValueError(),
                                                       AssertionError()]]

        self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]]

        self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(),
                                                         sqlite3.ProgrammingError(), sqlite3.DatabaseError(),
                                                         sqlite3.NotSupportedError(), sqlite3.IntegrityError(),
                                                         sqlite3.InterfaceError(), sqlite3.InternalError(),
                                                         sqlite3.OperationalError()]]

        self._speech_recognizer_errors = [type(item) for item in
                                          [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(),
                                           sr.RequestError()]]

        self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
Beispiel #2
0
def recognize_ibm(self, audio_data, username, password, language="en-US", show_all=False,
                  url="https://stream.watsonplatform.net/speech-to-text/api"):
    """
    Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the IBM Speech to Text API.

    The IBM Speech to Text username and password are specified by ``username`` and ``password``, respectively. Unfortunately, these are not available without `signing up for an account <https://console.ng.bluemix.net/registration/>`__. Once logged into the Bluemix console, follow the instructions for `creating an IBM Watson service instance <https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml>`__, where the Watson service is "Speech To Text". IBM Speech to Text usernames are strings of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX, while passwords are mixed-case alphanumeric strings.

    The recognition language is determined by ``language``, an RFC5646 language tag with a dialect like ``"en-US"`` (US English) or ``"zh-CN"`` (Mandarin Chinese), defaulting to US English. The supported language values are listed under the ``model`` parameter of the `audio recognition API documentation <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__, in the form ``LANGUAGE_BroadbandModel``, where ``LANGUAGE`` is the language value.

    Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__ as a JSON dictionary.

    Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
    """
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(username, str), "``username`` must be a string"
    assert isinstance(password, str), "``password`` must be a string"

    flac_data = audio_data.get_flac_data(
        convert_rate=None if audio_data.sample_rate >= 16000 else 16000,  # audio samples should be at least 16 kHz
        convert_width=None if audio_data.sample_width >= 2 else 2  # audio samples should be at least 16-bit
    )
    url = "{}/v1/recognize?{}".format(url, urlencode({
        "profanity_filter": "false",
        "continuous": "true",
        "model": "{}_BroadbandModel".format(language),
    }))
    request = Request(url, data=flac_data, headers={
        "Content-Type": "audio/x-flac",
        "X-Watson-Learning-Opt-Out": "true",  # prevent requests from being logged, for improved privacy
    })
    authorization_value = base64.standard_b64encode("{}:{}".format(username, password).encode("utf-8")).decode(
        "utf-8")
    request.add_header("Authorization", "Basic {}".format(authorization_value))
    try:
        response = urlopen(request, timeout=self.operation_timeout)
    except HTTPError as e:
        raise sr.RequestError("recognition request failed: {}".format(e.reason))
    except URLError as e:
        raise sr.RequestError("recognition connection failed: {}".format(e.reason))
    response_text = response.read().decode("utf-8")
    result = json.loads(response_text)

    # return results
    if show_all: return result
    if "results" not in result or len(result["results"]) < 1 or "alternatives" not in result["results"][0]:
        raise sr.UnknownValueError()

    transcription = []
    for utterance in result["results"]:
        if "alternatives" not in utterance: raise sr.UnknownValueError()
        for hypothesis in utterance["alternatives"]:
            if "transcript" in hypothesis:
                transcription.append(hypothesis["transcript"])
    return "\n".join(transcription)
Beispiel #3
0
def recognize_deepspeech(audio_data, cmdline):
    """
    Author: Misha Jiline (https://github.com/mjiline)
    """
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(cmdline, str), "``cmdline`` must be a string"

    try:
        import tempfile
        import subprocess
        from subprocess import PIPE
    except ImportError:
        raise sr.RequestError("missing tempfile module")

    raw_data = audio_data.get_wav_data(convert_rate=16000, convert_width=2)

    with tempfile.NamedTemporaryFile(suffix='.wav') as fp:
        fp.write(raw_data)
        fp.seek(0)
        transcript = subprocess.run("exec %s --audio %s" % (cmdline, fp.name),
                                    shell=True,
                                    stdout=PIPE,
                                    stderr=PIPE).stdout
        transcript = transcript.decode('utf-8')

    return transcript, {}
Beispiel #4
0
    def test_listen_raises_connection_error(self):
        with patch.object(self.listener, 'transcribe') as transcribe:
            transcribe.side_effect = sr.RequestError()

            with self.assertRaises(Exception) as context:
                self.listener.listen()

            self.assertIn('Error connecting to API', str(context.exception))
Beispiel #5
0
def recognize_amazon(audio_data,
                     bot_name,
                     bot_alias,
                     user_id,
                     content_type="audio/l16; rate=16000; channels=1",
                     access_key_id=None,
                     secret_access_key=None,
                     region=None):
    """
    Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).

    If access_key_id or secret_access_key is not set it will go through the list in the link below
    http://boto3.readthedocs.io/en/latest/guide/configuration.html#configuring-credentials

    Author: Patrick Artounian (https://github.com/partounian)
    Source: https://github.com/Uberi/speech_recognition/pull/331
    """
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(bot_name, str), "``bot_name`` must be a string"
    assert isinstance(bot_alias, str), "``bot_alias`` must be a string"
    assert isinstance(user_id, str), "``user_id`` must be a string"
    assert isinstance(content_type, str), "``content_type`` must be a string"
    assert access_key_id is None or isinstance(
        access_key_id, str), "``access_key_id`` must be a string"
    assert secret_access_key is None or isinstance(
        secret_access_key, str), "``secret_access_key`` must be a string"
    assert region is None or isinstance(region,
                                        str), "``region`` must be a string"

    try:
        import boto3
    except ImportError:
        raise sr.RequestError(
            "missing boto3 module: ensure that boto3 is set up correctly.")

    client = boto3.client('lex-runtime',
                          aws_access_key_id=access_key_id,
                          aws_secret_access_key=secret_access_key,
                          region_name=region)

    raw_data = audio_data.get_raw_data(convert_rate=16000, convert_width=2)

    accept = "text/plain; charset=utf-8"
    response = client.post_content(botName=bot_name,
                                   botAlias=bot_alias,
                                   userId=user_id,
                                   contentType=content_type,
                                   accept=accept,
                                   inputStream=raw_data)

    if not response["inputTranscript"]:
        raise sr.UnknownValueError()

    return response["inputTranscript"], response
def say_answer(recog):
    """Retrieve the text for a spoken answer"""
    try:
        with sr.Microphone() as source:
            print("Say your answer\n")
            audio = recog.listen(source, phrase_time_limit=20)
        return recog.recognize_google(audio)
    except sr.UnknownValueError:
        print('Speech recognition could not understand audio')
        raise sr.UnknownValueError
    except sr.RequestError as err:
        print('Could not request results from the speech recognition '
              'service; {0}'.format(err))
        raise sr.RequestError(err)
Beispiel #7
0
    def recognize(self, connected):
        if connected:
            r = sr.Recognizer()

            with sr.Microphone() as source:
                print("I'm listening...")
                audio = r.listen(source)

            try:
                self.data = r.recognize_google(audio)
            except sr.UnknownValueError:
                print("I cannot understand you clearly.")
            except sr.RequestError as e:
                raise sr.RequestError("Error: No connection", str(e))

            print("You said: " + self.data)
        else:
            self.data = input("Input: ")

        if self.data:
            return self.data
        else:
            print("Test")
    def setup_decoder(audio_file, keyword_entries):

        language = "en-US"

        audio_file_type = audio_file.split(".")[1]

        if audio_file_type == 'wav':
            curr_dir = os.getcwd()
            data_dir = os.path.join(curr_dir, '../data/')
            speech_recognition_directory = '/Library/Python/2.7/site-packages/speech_recognition/'
            audio_data_path = os.path.join(data_dir, audio_file)
        else:
            raise speech_recognition.RequestError("file type must be .wav")

        assert isinstance(language, str), "``language`` must be a string"
        assert keyword_entries is None or all(
            isinstance(keyword, (type(""), type(u""))) and 0 <= sensitivity <= 1 for keyword, sensitivity in
            keyword_entries), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers " \
                              "between 0 and 1"
        # import the PocketSphinx speech recognition module
        try:
            from pocketsphinx import pocketsphinx
        except ImportError:
            raise speech_recognition.RequestError(
                "missing PocketSphinx module: ensure that PocketSphinx is set up correctly."
            )
        except ValueError:
            raise speech_recognition.RequestError(
                "bad PocketSphinx installation detected; make sure you have PocketSphinx version 0.0.9 or better."
            )

        language_directory = os.path.join(
            os.path.dirname(speech_recognition_directory), "pocketsphinx-data",
            language)
        if not os.path.isdir(language_directory):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language data directory: \"{}\"".format(
                    language_directory))
        acoustic_parameters_directory = os.path.join(language_directory,
                                                     "acoustic-model")
        if not os.path.isdir(acoustic_parameters_directory):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language model parameters directory: \"{}\""
                .format(acoustic_parameters_directory))
        language_model_file = os.path.join(language_directory,
                                           "language-model.lm.bin")
        if not os.path.isfile(language_model_file):
            raise speech_recognition.RequestError(
                "missing PocketSphinx language model file: \"{}\"".format(
                    language_model_file))
        phoneme_dictionary_file = os.path.join(
            language_directory, "pronounciation-dictionary.dict")
        if not os.path.isfile(phoneme_dictionary_file):
            raise speech_recognition.RequestError(
                "missing PocketSphinx phoneme dictionary file: \"{}\"".format(
                    phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-hmm", acoustic_parameters_directory)
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        # disable logging (logging causes unwanted output in terminal)
        config.set_string("-logfn", os.devnull)
        decoder = pocketsphinx.Decoder(config)

        return audio_data_path, decoder
    def prepare_sphinx2(self, language="en-US", keyword_entries=None):
        assert isinstance(language, str) or (
            isinstance(language, tuple) and len(language) == 3
        ), "``language`` must be a string or 3-tuple of Sphinx data file paths of the form ``(acoustic_parameters, language_model, phoneme_dictionary)``"
        assert keyword_entries is None or all(
            isinstance(keyword, (type(""),
                                 type(u""))) and 0 <= sensitivity <= 1
            for keyword, sensitivity in keyword_entries
        ), "``keyword_entries`` must be ``None`` or a list of pairs of strings and numbers between 0 and 1"

        if isinstance(language, str):  # directory containing language data
            language_directory = os.path.join(
                os.path.dirname(os.path.realpath(__file__)),
                "pocketsphinx-data", language)
            if not os.path.isdir(language_directory):
                raise sr.RequestError(
                    "missing PocketSphinx language data directory: \"{}\"".
                    format(language_directory))
            acoustic_parameters_directory = os.path.join(
                language_directory, "acoustic-model")
            language_model_file = os.path.join(language_directory,
                                               "language-model.lm.bin")
            phoneme_dictionary_file = os.path.join(
                language_directory, "pronounciation-dictionary.dict")
        else:  # 3-tuple of Sphinx data file paths
            acoustic_parameters_directory, language_model_file, phoneme_dictionary_file = language
        if not os.path.isdir(acoustic_parameters_directory):
            raise sr.RequestError(
                "missing PocketSphinx language model parameters directory: \"{}\""
                .format(acoustic_parameters_directory))
        if not os.path.isfile(language_model_file):
            raise sr.RequestError(
                "missing PocketSphinx language model file: \"{}\"".format(
                    language_model_file))
        if not os.path.isfile(phoneme_dictionary_file):
            raise sr.RequestError(
                "missing PocketSphinx phoneme dictionary file: \"{}\"".format(
                    phoneme_dictionary_file))

        # create decoder object
        config = pocketsphinx.Decoder.default_config()
        config.set_string(
            "-hmm", acoustic_parameters_directory
        )  # set the path of the hidden Markov model (HMM) parameter files
        config.set_string("-lm", language_model_file)
        config.set_string("-dict", phoneme_dictionary_file)
        config.set_string(
            "-logfn", os.devnull
        )  # disable logging (logging causes unwanted output in terminal)
        self.decoder = pocketsphinx.Decoder(config)

        with open("sphinx.txt", "w") as f:
            # generate a keywords file - Sphinx documentation recommendeds sensitivities between 1e-50 and 1e-5
            f.writelines("{} /{}/\n".format(keyword, sensitivity)
                         for keyword, sensitivity in keyword_entries)

        # perform the speech recognition with the keywords file (this is inside the context manager so the file isn;t deleted until we're done)
        self.decoder.set_kws("keywords", "sphinx.txt")
        self.decoder.set_search("keywords")

        return
import os

try:
    import speech_recognition as sr  #@UnusedImport #check if package is installed
except:
    print("No speech_recognition installed on system. Try to use fallback...")
    import resources.lib.speech_recognition as sr  #@Reimport #if not, use the provides ones

# import the PocketSphinx speech recognition module
try:
    from pocketsphinx import pocketsphinx

except ImportError:
    raise sr.RequestError(
        "missing PocketSphinx module: ensure that PocketSphinx is set up correctly."
    )
except ValueError:
    raise sr.RequestError(
        "bad PocketSphinx installation; try reinstalling PocketSphinx version 0.0.9 or better."
    )
if not hasattr(pocketsphinx, "Decoder") or not hasattr(pocketsphinx.Decoder,
                                                       "default_config"):
    raise sr.RequestError(
        "outdated PocketSphinx installation; ensure you have PocketSphinx version 0.0.9 or better."
    )


class MyRecognizer(sr.Recognizer):
    def prepare_sphinx2(self, language="en-US", keyword_entries=None):
        assert isinstance(language, str) or (
            isinstance(language, tuple) and len(language) == 3
Beispiel #11
0
def recognize_ibm(audio_data,
                  username,
                  password,
                  language="en-US",
                  show_all=False):
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(username, str), "``username`` must be a string"
    assert isinstance(password, str), "``password`` must be a string"

    flac_data = audio_data.get_flac_data(
        convert_rate=None if audio_data.sample_rate >= 16000 else
        16000,  # audio samples should be at least 16 kHz
        convert_width=None if audio_data.sample_width >= 2 else
        2  # audio samples should be at least 16-bit
    )
    url = "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?{}".format(
        urlencode({
            "profanity_filter": "false",
            "model": "{}_BroadbandModel".format(language),
            "inactivity_timeout":
            -1,  # don't stop recognizing when the audio stream activity stops
            "timestamps": "true"
        }))
    request = Request(
        url,
        data=flac_data,
        headers={
            "Content-Type": "audio/x-flac",
            "X-Watson-Learning-Opt-Out":
            "true",  # prevent requests from being logged, for improved privacy
        })
    authorization_value = base64.standard_b64encode("{}:{}".format(
        username, password).encode("utf-8")).decode("utf-8")
    request.add_header("Authorization", "Basic {}".format(authorization_value))
    try:
        response = urlopen(request, timeout=None)
    except HTTPError as e:
        raise sr.RequestError("recognition request failed: {}".format(
            e.reason))
    except URLError as e:
        raise sr.RequestError("recognition connection failed: {}".format(
            e.reason))
    response_text = response.read().decode("utf-8")
    result = json.loads(response_text)

    # return results
    if show_all:
        return result
    if "results" not in result or len(
            result["results"]
    ) < 1 or "alternatives" not in result["results"][0]:
        raise sr.UnknownValueError()

    transcription = []
    for utterance in result["results"]:
        if "alternatives" not in utterance:
            raise sr.UnknownValueError()
        for hypothesis in utterance["alternatives"]:
            if "transcript" in hypothesis:
                transcription.append(hypothesis["transcript"])
    return "\n".join(transcription)