コード例 #1
0
def recognize_ibm(self, audio_data, username, password, language="en-US", show_all=False,
                  url="https://stream.watsonplatform.net/speech-to-text/api"):
    """
    Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the IBM Speech to Text API.

    The IBM Speech to Text username and password are specified by ``username`` and ``password``, respectively. Unfortunately, these are not available without `signing up for an account <https://console.ng.bluemix.net/registration/>`__. Once logged into the Bluemix console, follow the instructions for `creating an IBM Watson service instance <https://www.ibm.com/watson/developercloud/doc/getting_started/gs-credentials.shtml>`__, where the Watson service is "Speech To Text". IBM Speech to Text usernames are strings of the form XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX, while passwords are mixed-case alphanumeric strings.

    The recognition language is determined by ``language``, an RFC5646 language tag with a dialect like ``"en-US"`` (US English) or ``"zh-CN"`` (Mandarin Chinese), defaulting to US English. The supported language values are listed under the ``model`` parameter of the `audio recognition API documentation <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__, in the form ``LANGUAGE_BroadbandModel``, where ``LANGUAGE`` is the language value.

    Returns the most likely transcription if ``show_all`` is false (the default). Otherwise, returns the `raw API response <https://www.ibm.com/watson/developercloud/speech-to-text/api/v1/#sessionless_methods>`__ as a JSON dictionary.

    Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
    """
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(username, str), "``username`` must be a string"
    assert isinstance(password, str), "``password`` must be a string"

    flac_data = audio_data.get_flac_data(
        convert_rate=None if audio_data.sample_rate >= 16000 else 16000,  # audio samples should be at least 16 kHz
        convert_width=None if audio_data.sample_width >= 2 else 2  # audio samples should be at least 16-bit
    )
    url = "{}/v1/recognize?{}".format(url, urlencode({
        "profanity_filter": "false",
        "continuous": "true",
        "model": "{}_BroadbandModel".format(language),
    }))
    request = Request(url, data=flac_data, headers={
        "Content-Type": "audio/x-flac",
        "X-Watson-Learning-Opt-Out": "true",  # prevent requests from being logged, for improved privacy
    })
    authorization_value = base64.standard_b64encode("{}:{}".format(username, password).encode("utf-8")).decode(
        "utf-8")
    request.add_header("Authorization", "Basic {}".format(authorization_value))
    try:
        response = urlopen(request, timeout=self.operation_timeout)
    except HTTPError as e:
        raise sr.RequestError("recognition request failed: {}".format(e.reason))
    except URLError as e:
        raise sr.RequestError("recognition connection failed: {}".format(e.reason))
    response_text = response.read().decode("utf-8")
    result = json.loads(response_text)

    # return results
    if show_all: return result
    if "results" not in result or len(result["results"]) < 1 or "alternatives" not in result["results"][0]:
        raise sr.UnknownValueError()

    transcription = []
    for utterance in result["results"]:
        if "alternatives" not in utterance: raise sr.UnknownValueError()
        for hypothesis in utterance["alternatives"]:
            if "transcript" in hypothesis:
                transcription.append(hypothesis["transcript"])
    return "\n".join(transcription)
コード例 #2
0
ファイル: transcribe.py プロジェクト: lspitzley/ASR_benchmark
def google_post(speech_filepath):
    try:
        # for testing purposes, we're just using the default API key
        # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
        # instead of `r.recognize_google(audio)`
        actual_result = ''
        with open(os.path.splitext(speech_filepath)[0] + '.json') as json_data:
            actual_result = json.load(json_data)

        if not isinstance(actual_result, dict) or len(actual_result.get("alternative", [])) == 0: raise sr.UnknownValueError()

        if "confidence" in actual_result["alternative"]:
            # return alternative with highest confidence score
            best_hypothesis = max(actual_result["alternative"], key=lambda alternative: alternative["confidence"])
        else:
            # when there is no confidence available, we arbitrarily choose the first hypothesis.
            best_hypothesis = actual_result["alternative"][0]
        if "transcript" not in best_hypothesis: raise sr.UnknownValueError()
        transcription = best_hypothesis["transcript"]


        print("Google Speech Recognition transcription is: " + transcription)
        
        return transcription
    except sr.UnknownValueError:
        print("Google Speech Recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))
        asr_could_not_be_reached = True
コード例 #3
0
    def __init__(self):
        self.__logger = Logger()

        self._request_exceptions = [type(item) for item in [requests.ConnectionError(), requests.HTTPError(),
                                                            requests.TooManyRedirects(), requests.Timeout(),
                                                            requests.TooManyRedirects(),
                                                            requests.RequestException(), requests.ConnectTimeout(),
                                                            requests.ReadTimeout()]]

        self._system_errors = [type(item) for item in [KeyError(), AttributeError(), IndexError(),
                                                       ZeroDivisionError(), SystemError(), ValueError(),
                                                       AssertionError()]]

        self._file_errors = [type(item) for item in [FileExistsError(), FileNotFoundError()]]

        self._database_errors = [type(item) for item in [sqlite3.Error(), sqlite3.DataError(),
                                                         sqlite3.ProgrammingError(), sqlite3.DatabaseError(),
                                                         sqlite3.NotSupportedError(), sqlite3.IntegrityError(),
                                                         sqlite3.InterfaceError(), sqlite3.InternalError(),
                                                         sqlite3.OperationalError()]]

        self._speech_recognizer_errors = [type(item) for item in
                                          [sr.RequestError(), sr.UnknownValueError(), sr.WaitTimeoutError(),
                                           sr.RequestError()]]

        self.__logger.info('ExceptionsHandler was successfully initialized.', __name__)
コード例 #4
0
    def test_listen_raises_value_error(self):
        with patch.object(self.listener, 'transcribe') as transcribe:
            transcribe.side_effect = sr.UnknownValueError()

            with self.assertRaises(ValueError) as context:
                self.listener.listen()

            self.assertIn('Could not translate speech', str(context.exception))
コード例 #5
0
 def recognize_mycroft(self,
                       audio,
                       key=None,
                       language=None,
                       show_all=False):
     if len(self.transcriptions) > 0:
         return self.transcriptions.pop(0)
     else:
         raise speech_recognition.UnknownValueError()
コード例 #6
0
ファイル: transcribe.py プロジェクト: mjiline/ASR_benchmark
def recognize_amazon(audio_data,
                     bot_name,
                     bot_alias,
                     user_id,
                     content_type="audio/l16; rate=16000; channels=1",
                     access_key_id=None,
                     secret_access_key=None,
                     region=None):
    """
    Performs speech recognition on ``audio_data`` (an ``AudioData`` instance).

    If access_key_id or secret_access_key is not set it will go through the list in the link below
    http://boto3.readthedocs.io/en/latest/guide/configuration.html#configuring-credentials

    Author: Patrick Artounian (https://github.com/partounian)
    Source: https://github.com/Uberi/speech_recognition/pull/331
    """
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(bot_name, str), "``bot_name`` must be a string"
    assert isinstance(bot_alias, str), "``bot_alias`` must be a string"
    assert isinstance(user_id, str), "``user_id`` must be a string"
    assert isinstance(content_type, str), "``content_type`` must be a string"
    assert access_key_id is None or isinstance(
        access_key_id, str), "``access_key_id`` must be a string"
    assert secret_access_key is None or isinstance(
        secret_access_key, str), "``secret_access_key`` must be a string"
    assert region is None or isinstance(region,
                                        str), "``region`` must be a string"

    try:
        import boto3
    except ImportError:
        raise sr.RequestError(
            "missing boto3 module: ensure that boto3 is set up correctly.")

    client = boto3.client('lex-runtime',
                          aws_access_key_id=access_key_id,
                          aws_secret_access_key=secret_access_key,
                          region_name=region)

    raw_data = audio_data.get_raw_data(convert_rate=16000, convert_width=2)

    accept = "text/plain; charset=utf-8"
    response = client.post_content(botName=bot_name,
                                   botAlias=bot_alias,
                                   userId=user_id,
                                   contentType=content_type,
                                   accept=accept,
                                   inputStream=raw_data)

    if not response["inputTranscript"]:
        raise sr.UnknownValueError()

    return response["inputTranscript"], response
コード例 #7
0
def speechToText(sound):
    try:
        r = sr.Recognizer()
        with sound as source:
            audio = r.record(source)
        text = r.recognize_google(audio)
        return text
    except sr.UnknownValueError:
        raise sr.UnknownValueError("Sound file clarity too low, try again.")
    except FileNotFoundError:
        raise FileNotFoundError("Check your file source and try again!")
コード例 #8
0
def command():
    c = sr.Recognizer()  # Nhận giọng nói
    with sr.Microphone() as source:
        c.pause_threshold = 2  # Dừng 2s trước khi nghe lệnh mới
        audio = c.listen(source)  # Lắng nghe từ microphone
        try:
            query = c.recognize_google(audio, language='en')
            print("Tony Kien: " + query)
        except sr.UnknownValueError():  # Nếu nó không nghe được
            print("Please repeat or typing the command")
            query = str(input('Your Order Is: '))
        return query
コード例 #9
0
 def test_could_not_parse_text(self):
     with mock.patch('speech_recognition.Microphone') as mocked_mic:
         mocked_mic.return_value.__enter__ = lambda x: time.sleep(.5)
         rel_path = os.path.join(os.getcwd(), "audio-files/harvard.wav")
         harvard = sr.AudioFile(rel_path)
         os.system = mock.MagicMock()
         l = Listener()
         with harvard as source:
             audio = l.recognizer.record(source)
         l.recognizer.listen = mock.MagicMock(return_value=audio)
         l.transcribe = mock.Mock(side_effect=sr.UnknownValueError("test"))
         s = Timer(1.0, l.stop)
         s.start()
         l.listen()
         assert os.system.called
コード例 #10
0
ファイル: transcribe.py プロジェクト: lspitzley/ASR_benchmark
def transcribe(speech_filepath, asr_system, settings, save_transcription=True):
    '''
    Returns:
     - transcription: string corresponding the transcription obtained from the ASR API or existing transcription file.
     - transcription_skipped: Boolean indicating if the speech file was sent to the ASR API.
    '''
    transcription_json = ''
    transcription_filepath_base = '.'.join(speech_filepath.split('.')[:-1]) + '_'  + asr_system
    transcription_filepath_text = transcription_filepath_base  + '.txt'
    transcription_filepath_json = transcription_filepath_base  + '.json'

    # If there already exists a transcription file,  we may skip it depending on the user settings.
    if os.path.isfile(transcription_filepath_text):
        existing_transcription = codecs.open(transcription_filepath_text, 'r', settings.get('general','predicted_transcription_encoding')).read()
        is_transcription_file_empty = len(existing_transcription.strip()) == 0
        if not is_transcription_file_empty and not settings.getboolean('general','overwrite_non_empty_transcriptions'):
            print('Skipped speech file {0} because the file {1} already exists and is not empty.'.format(speech_filepath,transcription_filepath_text))
            print('Change the setting `overwrite_non_empty_transcriptions` to True if you want to overwrite existing transcriptions')
            transcription_skipped = True
            return existing_transcription, transcription_skipped
        if is_transcription_file_empty and not settings.getboolean('general','overwrite_empty_transcriptions'):
            print('Skipped speech file {0} because the file {1} already exists and is empty.'.format(speech_filepath,transcription_filepath_text))
            print('Change the setting `overwrite_empty_transcriptions` to True if you want to overwrite existing transcriptions')
            transcription_skipped = True
            return existing_transcription, transcription_skipped

    # use the audio file as the audio source
    r = sr.Recognizer()
    with sr.AudioFile(speech_filepath) as source:
        audio = r.record(source)  # read the entire audio file

    transcription = ''
    asr_could_not_be_reached = False
    asr_timestamp_started = time.time()
    speech_language = settings.get('general','speech_language')
    if asr_system == 'google':
        # recognize speech using Google Speech Recognition
        try:
            # for testing purposes, we're just using the default API key
            # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
            # instead of `r.recognize_google(audio)`
            response = r.recognize_google(audio, show_all=True, language=speech_language)
            transcription_json = response

            actual_result = response
            if not isinstance(actual_result, dict) or len(actual_result.get("alternative", [])) == 0: raise sr.UnknownValueError()

            if "confidence" in actual_result["alternative"]:
                # return alternative with highest confidence score
                best_hypothesis = max(actual_result["alternative"], key=lambda alternative: alternative["confidence"])
            else:
                # when there is no confidence available, we arbitrarily choose the first hypothesis.
                best_hypothesis = actual_result["alternative"][0]
            if "transcript" not in best_hypothesis: raise sr.UnknownValueError()
            transcription = best_hypothesis["transcript"]

            print("Google Speech Recognition transcription is: " + transcription)
        except sr.UnknownValueError:
            print("Google Speech Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Speech Recognition service; {0}".format(e))
            asr_could_not_be_reached = True

    elif asr_system == 'google_post':
        transcription = google_post(speech_filepath)
        
        
    elif asr_system == 'googlecloud':
        # recognize speech using Google Cloud Speech
        GOOGLE_CLOUD_SPEECH_CREDENTIALS_filepath = settings.get('credentials','google_cloud_speech_credentials_filepath')
        GOOGLE_CLOUD_SPEECH_CREDENTIALS = codecs.open(GOOGLE_CLOUD_SPEECH_CREDENTIALS_filepath, 'r', 'UTF-8').read()
        try:
            response = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS, show_all=True, language=speech_language)
            transcription_json = response
            if "results" not in response or len(response["results"]) == 0: raise sr.UnknownValueError()
            transcript = ""
            for result in response["results"]:
                transcript += result["alternatives"][0]["transcript"].strip() + " "

            transcription = transcript

        except sr.UnknownValueError:
            print("Google Cloud Speech could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Google Cloud Speech service; {0}".format(e))
            asr_could_not_be_reached = True

    # recognize speech using Wit.ai
    elif asr_system == 'wit':
        WIT_AI_KEY = settings.get('credentials','wit_ai_key')
        print("Calling the Wit.ai API")
        try:
            response = r.recognize_wit(audio, key=WIT_AI_KEY, show_all=True)
            transcription_json = response

            if "_text" not in response or response["_text"] is None: raise sr.UnknownValueError()
            transcription = response["_text"]

        except sr.UnknownValueError:
            print("Wit.ai could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Wit.ai service; {0}".format(e))
            asr_could_not_be_reached = True

    # recognize speech using Microsoft Bing Voice Recognition
    elif asr_system == 'microsoft':
        BING_KEY = settings.get('credentials','bing_key')
        print('Calling the Microsoft Bing Voice Recognition API')
        try:
            response =  r.recognize_bing(audio, key=BING_KEY, show_all=True, language=speech_language)
            transcription_json = response
            if "RecognitionStatus" not in response or response["RecognitionStatus"] != "Success" or "DisplayText" not in response:
                raise sr.UnknownValueError()
            transcription = response["DisplayText"]

        except sr.UnknownValueError:
            print("Microsoft Bing Voice Recognition could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Microsoft Bing Voice Recognition service; {0}".format(e))
            asr_could_not_be_reached = True


    elif asr_system == 'houndify':
        # recognize speech using Houndify
        HOUNDIFY_CLIENT_ID = settings.get('credentials','houndify_client_id')
        HOUNDIFY_CLIENT_KEY = settings.get('credentials','houndify_client_key')

        print("Calling the Houndify API")
        try:
            response = r.recognize_houndify(audio, client_id=HOUNDIFY_CLIENT_ID, client_key=HOUNDIFY_CLIENT_KEY, show_all=True)
            transcription_json = response

            if "Disambiguation" not in response or response["Disambiguation"] is None:
                raise sr.UnknownValueError()

            transcription = response['Disambiguation']['ChoiceData'][0]['Transcription']


        except sr.UnknownValueError:
            print("Houndify could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from Houndify service; {0}".format(e))
            asr_could_not_be_reached = True

    # recognize speech using IBM Speech to Text
    elif asr_system == 'ibm':
        IBM_USERNAME = settings.get('credentials','ibm_username')
        IBM_PASSWORD = settings.get('credentials','ibm_password')
        try:
            response = r.recognize_ibm(audio, username=IBM_USERNAME, password=IBM_PASSWORD, show_all=True, language=speech_language)
            transcription_json = response

            if "results" not in response or len(response["results"]) < 1 or "alternatives" not in response["results"][0]:
                raise sr.UnknownValueError()

            transcription = []
            for utterance in response["results"]:
                if "alternatives" not in utterance: raise sr.UnknownValueError()
                for hypothesis in utterance["alternatives"]:
                    if "transcript" in hypothesis:
                        transcription.append(hypothesis["transcript"])
            transcription = "\n".join(transcription)
            transcription = transcription.strip()

        except sr.UnknownValueError:
            print("IBM Speech to Text could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from IBM Speech to Text service; {0}".format(e))
            asr_could_not_be_reached = True
            
    # custom IBM to use already downloaded
    elif asr_system == 'ibm_post':
        IBM_USERNAME = settings.get('credentials','ibm_username')
        IBM_PASSWORD = settings.get('credentials','ibm_password')
        try:
            #print('speech filepath', speech_filepath, os.path.splitext(speech_filepath)[0] + '.json')
            transcription_json = ''
            with open(os.path.splitext(speech_filepath)[0] + '.json') as json_data:
                transcription_json = json.load(json_data)
            response = transcription_json
                

            if "results" not in response or len(response["results"]) < 1 or "alternatives" not in response["results"][0]:
                raise sr.UnknownValueError()

            transcription = []
            for utterance in response["results"]:
                if "alternatives" not in utterance: raise sr.UnknownValueError()
                for hypothesis in utterance["alternatives"]:
                    if "transcript" in hypothesis:
                        transcription.append(hypothesis["transcript"])
            transcription = "\n".join(transcription)
            transcription = transcription.strip()
            transcription = re.sub('%HESITATION', '', transcription)

        except sr.UnknownValueError:
            print("IBM Speech to Text could not understand audio")
        except sr.RequestError as e:
            print("Could not request results from IBM Speech to Text service; {0}".format(e))
            asr_could_not_be_reached = True

    elif asr_system == 'speechmatics':
        # recognize speech using Speechmatics Speech Recognition
        speechmatics_id = settings.get('credentials','speechmatics_id')
        speechmatics_token = settings.get('credentials','speechmatics_token')
        print('speech_filepath: {0}'.format(speech_filepath))
        transcription, transcription_json = asr_speechmatics.transcribe_speechmatics(speechmatics_id,speechmatics_token,speech_filepath,speech_language)
        try:
            print('Speechmatics  transcription is: {0}'.format(transcription))
        except:
            print('Speechmatics encountered some issue')
            asr_could_not_be_reached = True

    elif asr_system == 'amazon':
        try:
            bot_name = settings.get('credentials','amazon_bot_name')
            bot_alias = settings.get('credentials','amazon_bot_alias')
            user_id = settings.get('credentials','amazon_user_id')
            transcription,transcription_json = recognize_amazon(audio, bot_name, bot_alias, user_id,
                     content_type="audio/l16; rate=16000; channels=1", access_key_id=settings.get('credentials','amazon_access_key_id'),
                     secret_access_key=settings.get('credentials','amazon_secret_access_key'), region=settings.get('credentials','amazon_region'))
        except sr.UnknownValueError:
            print("Amazon not process the speech transcription request")

    else: raise ValueError("Invalid asr_system. asr_system = {0}".format(asr_system))

    asr_timestamp_ended = time.time()
    asr_time_elapsed = asr_timestamp_ended - asr_timestamp_started
    print('asr_time_elapsed: {0:.3f} seconds'.format(asr_time_elapsed))
    #time.sleep(2)   # Delay in seconds
    #if len(transcription) == 0 and asr_could_not_be_reached: return transcription

    if save_transcription:
        #print('Transcription saved in {0} and {1}'.format(transcription_filepath_text,transcription_filepath_json))
        codecs.open(transcription_filepath_text,'w', settings.get('general','predicted_transcription_encoding')).write(transcription)

    print('transcription: {0}'.format(transcription))
    results = {}
    results['transcription'] = transcription
    results['transcription_json'] = transcription_json
    results['asr_time_elapsed'] = asr_time_elapsed
    results['asr_timestamp_ended'] = asr_timestamp_ended
    results['asr_timestamp_started'] = asr_timestamp_started


    json.dump(results, codecs.open(transcription_filepath_json, 'w', settings.get('general','predicted_transcription_encoding')), indent = 4, sort_keys=True)

    transcription_skipped = False
    return transcription, transcription_skipped
コード例 #11
0
import speech_recognition 

# Build recognizer object 
recognizer = speech_recognition.Recognizer()

while True: 

    try: 
        # Set up input and declare mic 
        with speech_recognition.Microphone() as mic: 
            recognizer.adjust_for_ambient_noise(mic, duration = .2)

            #define audio and input 
            audio = recognizer.listen(mic)

            #convert audio to text 
            text = recognizer.recognize_google(audio)
            text = text.lower()
            print(f"Recognized {text}")
    
    except speech_recognition.UnknownValueError():
        # If audio cant be picked up, try again 
         recognizer = speech_recognition.Recognizer()
         continue 
コード例 #12
0
def alarm(source):
    conn = sqlite3.connect('/home/pi/Oracle/Oracle')
    cursor = conn.cursor()

    speak("Hour is sir ?")

    with source as source:
        #         os.system('play -nq -t alsa synth {} sine {}'.format(0.15, 500))  # 1 = duration, 440 = frequency -> this sounds a beep
        audio = r.listen(source)
        try:
            hour = r.recognize_google(audio)

            speak("And minute is ?")
            #             os.system('play -nq -t alsa synth {} sine {}'.format(0.15, 500))  # 1 = duration, 440 = frequency -> this sounds a beep
            audio = r.listen(source)
            minute = r.recognize_google(audio)
            hour = int(hour)
            minute = int(minute)

            speak(f"You asked me to set an alarm for {hour} {minute} sir")
            cursor.execute("INSERT INTO Alarms (hour, minute) VALUES (?, ?)",
                           (hour, minute))
            conn.commit()

        except ValueError or sr.UnknownValueError or speech_recognition.UnknownValueError or sr.UnknownValueError(
        ):
            speak("There was an error sir, i am gonna ask you to repeat it")
            alarm()
        except sr.RequestError as e:
            print(
                "Could not request results from Google Speech Recognition service; {0}"
                .format(e))
            alarm()

    conn.close()
コード例 #13
0
ファイル: ASRClient.py プロジェクト: ShakedH/CFV_Functions
def recognize_ibm(audio_data,
                  username,
                  password,
                  language="en-US",
                  show_all=False):
    assert isinstance(audio_data, sr.AudioData), "Data must be audio data"
    assert isinstance(username, str), "``username`` must be a string"
    assert isinstance(password, str), "``password`` must be a string"

    flac_data = audio_data.get_flac_data(
        convert_rate=None if audio_data.sample_rate >= 16000 else
        16000,  # audio samples should be at least 16 kHz
        convert_width=None if audio_data.sample_width >= 2 else
        2  # audio samples should be at least 16-bit
    )
    url = "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?{}".format(
        urlencode({
            "profanity_filter": "false",
            "model": "{}_BroadbandModel".format(language),
            "inactivity_timeout":
            -1,  # don't stop recognizing when the audio stream activity stops
            "timestamps": "true"
        }))
    request = Request(
        url,
        data=flac_data,
        headers={
            "Content-Type": "audio/x-flac",
            "X-Watson-Learning-Opt-Out":
            "true",  # prevent requests from being logged, for improved privacy
        })
    authorization_value = base64.standard_b64encode("{}:{}".format(
        username, password).encode("utf-8")).decode("utf-8")
    request.add_header("Authorization", "Basic {}".format(authorization_value))
    try:
        response = urlopen(request, timeout=None)
    except HTTPError as e:
        raise sr.RequestError("recognition request failed: {}".format(
            e.reason))
    except URLError as e:
        raise sr.RequestError("recognition connection failed: {}".format(
            e.reason))
    response_text = response.read().decode("utf-8")
    result = json.loads(response_text)

    # return results
    if show_all:
        return result
    if "results" not in result or len(
            result["results"]
    ) < 1 or "alternatives" not in result["results"][0]:
        raise sr.UnknownValueError()

    transcription = []
    for utterance in result["results"]:
        if "alternatives" not in utterance:
            raise sr.UnknownValueError()
        for hypothesis in utterance["alternatives"]:
            if "transcript" in hypothesis:
                transcription.append(hypothesis["transcript"])
    return "\n".join(transcription)
コード例 #14
0
# Porta que custumo utilizar: /dev/ttyACM0 ou /dev/ttyACM1

import speech_recognition as sr
from pyfirmata import Arduino, util
from assintent import get_microphone, p2s
from time import sleep

placa = Arduino("/dev/ttyACM0")
led = placa.digital[4]

while True:
    voz = get_microphone().lower()
    try:
        if voz == "ligar led":
            p2s("ligando led...")
            led.write(1)
        if voz == "desligar led":
            p2s("desligando led... ")
            led.write(0)
    except sr.UnknownValueError():
        p2s("Não entendi mestre?")
コード例 #15
0
def recognize():

    # set a threshold to triggering noise relative to ambient noise
    with m as source:
        r.adjust_for_ambient_noise(source)

    # initial console output
    print("[VOICE] Set minimum energy threshold to", r.energy_threshold)
    print("[VOICE] Say some things!")

    # run until broken by keypress or interrupt
    while True:

        # listen for ESC or Q key and break loop if pressed
        key = cv2.waitKey(1)
        if key == 27 or key == ord('q') or key == ord('Q'):
            break

        # listen for input on microphone, set a limit to quickly get on to the
        # evaluation after input (no command takes longer than two seconds)
        with m as source:
            # add Snowboy config to listen function for hotword detection
            audio = r.listen(source, phrase_time_limit=2)
        try:
            # recognize speech using Sphinx to interpret locally
            # (add keyword_entries=keywords argument for Sphinx)
            # using Google's engine for now because of the much higher accuracy
            value = r.recognize_google(audio).lower()

            # output recognized text -> subset of the keywords (when using
            # Sphinx)
            print("[VOICE] [DEBUG] You said:", value)

            # distinguish commands ("stop" used first to make rover stop
            # when in doubt) and call corresponding functions
            if "stop" in value:
                stop_driving_recognized()
            elif "start" in value:
                drive_forward_recognized()
            elif "move" in value:
                if "left" in value:
                    turn_left_recognized()
                elif "right" in value:
                    turn_right_recognized()
                else:
                    raise sr.UnknownValueError("[VOICE] [ERROR] " +
                                               "Direction not recognized.")
            else:
                raise sr.UnknownValueError("[VOICE] [ERROR] " +
                                           "Input not recognized.")

        # raised when the input was not empty but could not be recognized
        except sr.UnknownValueError:
            print("[VOICE] [ERROR] Unknown Value!")

        except sr.RequestError as e:
            print("[VOICE] [ERROR] Speech Recognition Engine - " +
                  "Request not fulfilled; {0}".format(e))

        except KeyboardInterrupt:
            break