def speech_recognizer_from_user_config( user_config: helper.Read_Only_Dict) -> helper.Read_Only_Dict: audio_config_data = audio_config_from_user_config(user_config) speech_config = speech_config_from_user_config(user_config) speech_recognizer = None if user_config["language_ID_languages"] is not None: auto_detect_source_language_config = speechsdk.AutoDetectSourceLanguageConfig( user_config["language_ID_languages"].split(";")) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config_data["audio_config"], auto_detect_source_language_config= auto_detect_source_language_config) else: speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config_data["audio_config"]) if user_config["phrase_list"] is not None: grammar = speechsdk.PhraseListGrammar.from_recognizer( recognizer=speech_recognizer) grammar.addPhrase(user_config["phrase_list"]) return helper.Read_Only_Dict({ "speech_recognizer": speech_recognizer, "audio_stream_format": audio_config_data["audio_stream_format"], "pull_input_audio_stream_callback": audio_config_data["pull_input_audio_stream_callback"], "pull_input_audio_stream": audio_config_data["pull_input_audio_stream"], })
def user_config_to_speech_recognizer( speech_config: speechsdk.SpeechConfig, audio_config: speechsdk.AudioConfig, user_config: READ_ONLY_DICT) -> speechsdk.SpeechRecognizer: speech_recognizer = None if user_config['languages'] is not None: # Note: Continuous language identification is supported only in C#, C++, and Python. # See: # https://docs.microsoft.com/azure/cognitive-services/speech-service/how-to-automatic-language-detection?pivots=programming-language-cpp#language-identification-with-speech-to-text detect_language_config = speechsdk.AutoDetectSourceLanguageConfig( user_config['languages'].split(',')) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config, auto_detect_source_language_config=detect_language_config) else: speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config) if user_config['phrases'] is not None: grammar = speechsdk.PhraseListGrammar.from_recognizer( recognizer=speech_recognizer) grammar.addPhrase(user_config['phrases']) return speech_recognizer
def stt_process( self, audio_file): #speech-to-text, takes fname, returns recognized text self.speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) self.audio_config = speechsdk.audio.AudioConfig(filename=audio_file) self.speech_recognizer = speechsdk.SpeechRecognizer( speech_config=self.speech_config, audio_config=self.audio_config) result = self.speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: #print(str(result.text)) return str(result.text) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details))
def speech_recognize_once_from_file(): """performs one-shot speech recognition with input from an audio file""" # <SpeechRecognitionWithFile> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) # Creates a speech recognizer using a file as audio input, also specify the speech language speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, language="de-DE", audio_config=audio_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. It returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def speech_recognize_once_from_mic(): """performs one-shot speech recognition from the default microphone""" # <SpeechRecognitionWithMicrophone> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates a speech recognizer using microphone as audio input. # The default language is "en-us". speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. It returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def speech_recognize_async_from_file(): """performs one-shot speech recognition asynchronously with input from an audio file""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) # Creates a speech recognizer using a file as audio input. # The default language is "en-us". speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) # Perform recognition. `recognize_async` does not block until recognition is complete, # so other tasks can be performed while recognition is running. # However, recognition stops when the first utterance has bee recognized. # For long-running recognition, use continuous recognitions instead. result_future = speech_recognizer.recognize_once_async() print('recognition is running....') # Other tasks can be performed here... # Retrieve the recognition result. This blocks until recognition is complete. result = result_future.get() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def speech_recognize_once_from_file_with_customized_model(): """performs one-shot speech recognition with input from an audio file, specifying a custom model""" # <SpeechRecognitionUsingCustomizedModel> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Create source language configuration with the speech language and the endpoint ID of your customized model # Replace with your speech language and CRIS endpoint ID. source_language_config = speechsdk.languageconfig.SourceLanguageConfig("zh-CN", "YourEndpointId") audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) # Creates a speech recognizer using a file as audio input and specify the source language config speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, source_language_config=source_language_config, audio_config=audio_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. It returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def listen(self): print("Say something...") # Creates a recognizer with the given settings self._audio_config = speechsdk.audio.AudioConfig( filename=self.audioSource) self._speech_recognizer = speechsdk.SpeechRecognizer( speech_config=self._speech_config, audio_config=self._audio_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. The task returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = self._speech_recognizer.recognize_once() # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). # Checks result. if result.reason == speechsdk.ResultReason.RecognizedSpeech: self.__recognized = result.text elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details))
def stt(): speech_key, service_region = "11381bd8eeb946cfb90eb86d5933c9ad", "northcentralus" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) result = speech_recognizer.recognize_once() response = "" # Checks result if result.reason == speechsdk.ResultReason.RecognizedSpeech: response = result.text print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: response = result.no_match_details print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) response = cancellation_details.reason.error_details return response
def voice_pipeine(): """Starts voice processing pipleine """ global can_sst_start, can_validate _thread.start_new_thread(record, (4, )) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) print("Speach to text conversion started") result = speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) while (not can_validate): pass validated, emp_id = helpers.validate(result.text) if validated: requests.get(url=SERVER_URL + 'alternate/' + emp_id + '/' + 'BUILDING_IN/', timeout=5) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) can_sst_start = True
def get_audio(time_limit=None, isstop=False): speech_key, service_region = "a133a3b377524aeda3338a66d761d520", "southeastasia" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) print("Say something...") try: result = speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("You: {}".format(result.text)) return result.text elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) return '0' elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) return '0' except: return '0'
def get_text(filepath): speech_config = speechsdk.SpeechConfig(subscription=SUBSCRIPTION, region=REGION) speech_config.speech_recognition_language = 'ja-JP' audio_input = speechsdk.AudioConfig(filename=filepath) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) done = False text = '' def stop_cb(evt): print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True def split(evt): st = re.search(r'\".+?\"', str(evt)) new_text = st.group(0).strip('"') nonlocal text text = text + '\n' + new_text speech_recognizer.recognized.connect(split) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) speech_recognizer.stop_continuous_recognition() return text
def speechListener(): # setup my credentials for speech config speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # initialize speech recognition using those credentials speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) # initialize LED to off ledCode = 'X' initfuture = speech_recognizer.start_continuous_recognition_async() # estest = speechsdk.EventSignal(speech_recognizer.recognized, 1) print(initfuture) # print(estest) # Connect callbacks to the events fired by the speech recognizer # speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect(printResponse) # speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) # speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) try: while(True): if(seri) except KeyboardInterrupt: print("keyboard interupt") speech_recognizer.stop_continuous_recognition_async() print("stopped session")
def recognize(self, audio_path, settings, result_callback): if not settings['key']: result_callback("ERROR: No API key provided") result_callback(self.END_VAL) return speech_config = speechsdk.SpeechConfig(subscription=settings['key'], region=settings['region']) audio_input = speechsdk.AudioConfig(filename=audio_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_input, language=settings['language']) def end_callback(evt): nonlocal audio_input nonlocal speech_recognizer speech_recognizer.stop_continuous_recognition() result_callback(self.END_VAL) # fix thread not releasing audio file speech_recognizer = None audio_input = None speech_recognizer.recognized.connect( lambda evt: result_callback(evt.result.text)) speech_recognizer.session_stopped.connect(end_callback) speech_recognizer.start_continuous_recognition()
def pronunciation_assessment_from_microphone(): """"performs one-shot pronunciation assessment asynchronously with input from microphone.""" # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). # Note: The pronunciation assessment feature is currently only available on westus, eastasia and centralindia regions. # And this feature is currently only available on en-US language. config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) reference_text = "" # create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. pronunciation_config = speechsdk.PronunciationAssessmentConfig(reference_text=reference_text, grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark, granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme, enable_miscue=True) recognizer = speechsdk.SpeechRecognizer(speech_config=config) while True: # Receives reference text from console input. print('Enter reference text you want to assess, or enter empty text to exit.') print('> ') try: reference_text = input() except EOFError: break pronunciation_config.reference_text = reference_text pronunciation_config.apply_to(recognizer) # Starts recognizing. print('Read out "{}" for pronunciation assessment ...'.format(reference_text)) # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot evaluation. # For long-running multi-utterance pronunciation evaluation, use start_continuous_recognition() instead. result = recognizer.recognize_once_async().get() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print('Recognized: {}'.format(result.text)) print(' Pronunciation Assessment Result:') pronunciation_result = speechsdk.PronunciationAssessmentResult(result) print(' Accuracy score: {}, Pronunciation score: {}, Completeness score : {}, FluencyScore: {}'.format( pronunciation_result.accuracy_score, pronunciation_result.pronunciation_score, pronunciation_result.completeness_score, pronunciation_result.fluency_score )) print(' Word-level details:') for idx, word in enumerate(pronunciation_result.words): print(' {}: word: {}, accuracy score: {}, error type: {};'.format( idx + 1, word.word, word.accuracy_score, word.error_type )) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def createBlockData(path): print("here") block_dict = {'blockArray': [], 'done': False} audioConfig = speechsdk.audio.AudioConfig(filename=path) speechRecognizer = speechsdk.SpeechRecognizer(speech_config=speechConfig, audio_config=audioConfig) speechRecognizer.recognizing.connect( lambda evt: print('Recongizing: {}'.format(evt))) speechRecognizer.recognized.connect( lambda evt: onRecognized(evt, block_dict)) speechRecognizer.session_started.connect(onSpeechStart) #speechRecognizer.session_stopped.connect(lambda evt: print('Session stopped: {}'.format(evt))) speechRecognizer.session_stopped.connect( lambda evt: onSpeechStop(evt, speechRecognizer, block_dict)) #speechRecognizer.canceled.connect(lambda evt: print('Canceled: {}').format(evt)); speechRecognizer.canceled.connect( lambda evt: onSpeechStop(evt, speechRecognizer, block_dict)) speechRecognizer.start_continuous_recognition_async() while not block_dict['done']: print("WAIT", len(block_dict['blockArray'])) time.sleep(0.5) return block_dict
def speechInit(): speech_key, service_region = 'AZURE-KEY', 'uksouth' speech_config = sp.SpeechConfig(subscription=speech_key, region=service_region) speech_config.speech_recognition_language = 'it-IT' speech_recognizer = sp.SpeechRecognizer(speech_config=speech_config) return speech_recognizer
def speech_recognize_once_from_file(filepath): speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=filepath) # Default language is en-US speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) result = speech_recognizer.recognize_once() transcription = result.text # Check the result for errors if result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) return transcription
def __init__(self): # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). speech_key, service_region = "e0b89f207fe74a91b11e0249661e0c23", "canadacentral" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates a recognizer with the given settings self.speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)
def speech_continuous_recognition_async_from_mic(): """performs continuous speech recognition from the default microphone""" # <SpeechRecognitionWithMicrophone> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/how-to-select-audio-input-devices audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True) # The default language is "en-us". speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) done = False def stop_cb(evt): """callback that stops continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(lambda evt: print('{}'.format(evt.result.text))) speech_recognizer.recognized.connect(lambda evt: print('{} \n[NEW PARAGRAPH]'.format(evt.result.text))) speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt))) # stop continuous recognition on either session stopped or canceled events speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start continuous speech recognition speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5)
def speech_from_file(): speech_config = speechsdk.SpeechConfig( host = service_host) audio_filename = input("input the name of the .wav file: ") audio_filename = audio_filename + ".wav" while(!os.path.isfile(audio_filename)): print("invalid file name") audio_filename = input("try another filename or hit enter to exit:") if(audio_filename == ""): return 0 audio_filename = audio_filename + ".wav" audio_input = speechsdk.audio.AudioConfig(filename=audio_filename) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) print("Processing audio file...") result = speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized by Azure: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details)) return result
def speech_to_text(self, audio_file_path): audio_input = speech.audio.AudioConfig(filename=audio_file_path) LOGGER.info("calling azure stt API for file: %s", audio_file_path) speech_recognizer = speech.SpeechRecognizer( speech_config=self.speech_config, language=self.language, audio_config=audio_input, ) LOGGER.info("Recognizing first result...") result = speech_recognizer.recognize_once() # R1705: Unnecessary "elif" after "return" (no-else-return) if result.reason == speech.ResultReason.RecognizedSpeech: LOGGER.info("Recognized: %s", result.text) return result elif result.reason == speech.ResultReason.NoMatch: msg = "No speech could be recognized: {}".format( result.no_match_details) raise RuntimeError(msg) elif result.reason == speech.ResultReason.Canceled: cancellation_details = result.cancellation_details msg = "Speech Recognition canceled: {}".format( cancellation_details.reason) raise RuntimeError(msg) LOGGER.info("done..")
def speech_recognize_once_from_file_with_custom_endpoint_parameters(): """performs one-shot speech recognition with input from an audio file, specifying an endpoint with custom parameters""" initial_silence_timeout_ms = 15 * 1e3 template = "wss://{}.stt.speech.microsoft.com/speech/recognition" \ "/conversation/cognitiveservices/v1?initialSilenceTimeoutMs={:d}" speech_config = speechsdk.SpeechConfig(subscription=speech_key, endpoint=template.format(service_region, int(initial_silence_timeout_ms))) print("Using endpoint", speech_config.get_property(speechsdk.PropertyId.SpeechServiceConnection_Endpoint)) audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) # Creates a speech recognizer using a file as audio input. # The default language is "en-us". speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. It returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def chat(): while True: speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config) try: result = speech_recognizer.recognize_once() text = format(result.text) if text == 'Bye.': welcometext = 'okay bye have a nice meal' language = 'en' myobj = gTTS(text=welcometext, lang=language, slow=False) myobj.save("welcome.mp3") os.system("mpg321 welcome.mp3") time.sleep(1) sys.exit() mytext = mybot.respond(text) language = 'en' myobj = gTTS(text=mytext, lang=language, slow=False) myobj.save("welcome.mp3") os.system("mpg321 welcome.mp3") time.sleep(1) except result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details))
def speech_recognize_continuous_from_file(): """performs continuous speech recognition with input from an audio file""" # <SpeechContinuousRecognitionWithFile> speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) done = False def stop_cb(evt: speechsdk.SessionEventArgs): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) nonlocal done done = True # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt))) speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt))) # stop continuous recognition on either session stopped or canceled events speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start continuous speech recognition speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) speech_recognizer.stop_continuous_recognition()
def speech_to_text(): speech_config = speechsdk.SpeechConfig( subscription="6f24f130fc824d7bab3de9e16526903e", region="westus") # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) print("Say something...") # Performs recognition. recognize_once() returns when the first utterance has been recognized, # so it is suitable only for single shot recognition like command or query. For long-running # recognition, use start_continuous_recognition() instead, or if you want to run recognition in a # non-blocking manner, use recognize_once_async(). result = speech_recognizer.recognize_once() # Checks result. if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) return result.text
def speech_recognize_with_auto_language_detection_UsingCustomizedModel(): """performs speech recognition from the audio file with auto language detection, using customized model""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.audio.AudioConfig(filename=weatherfilename) # Replace the languages with your languages in BCP-47 format, e.g. fr-FR. # Please see https://docs.microsoft.com/azure/cognitive-services/speech-service/language-support # for all supported languages en_language_config = speechsdk.languageconfig.SourceLanguageConfig("en-US") # Replace the languages with your languages in BCP-47 format, e.g. zh-CN. # Set the endpoint ID of your customized mode that will be used for fr-FR. # Replace with your own CRIS endpoint ID. fr_language_config = speechsdk.languageconfig.SourceLanguageConfig("fr-FR", "myendpointId") # create the auto detection language configuration with the source language configurations auto_detect_source_language_config = speechsdk.languageconfig.AutoDetectSourceLanguageConfig( sourceLanguageConfigs=[en_language_config, fr_language_config]) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, auto_detect_source_language_config=auto_detect_source_language_config, audio_config=audio_config) result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: auto_detect_source_language_result = speechsdk.AutoDetectSourceLanguageResult(result) print("Recognized: {} in language {}".format(result.text, auto_detect_source_language_result.language)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details))
def speech_recognize_keyword_from_microphone(): """performs keyword-triggered speech recognition with input microphone""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates an instance of a keyword recognition model. Update this to # point to the location of your keyword recognition model. model = speechsdk.KeywordRecognitionModel( "YourKeywordRecognitionModelFile.table") # The phrase your keyword recognition model triggers on. keyword = "YourKeyword" speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config) done = False def stop_cb(evt): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) nonlocal done done = True def recognizing_cb(evt): """callback for recognizing event""" if evt.result.reason == speechsdk.ResultReason.RecognizingKeyword: print('RECOGNIZING KEYWORD: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.RecognizingSpeech: print('RECOGNIZING: {}'.format(evt)) def recognized_cb(evt): """callback for recognized event""" if evt.result.reason == speechsdk.ResultReason.RecognizedKeyword: print('RECOGNIZED KEYWORD: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: print('RECOGNIZED: {}'.format(evt)) elif evt.result.reason == speechsdk.ResultReason.NoMatch: print('NOMATCH: {}'.format(evt)) # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognizing.connect(recognizing_cb) speech_recognizer.recognized.connect(recognized_cb) speech_recognizer.session_started.connect( lambda evt: print('SESSION STARTED: {}'.format(evt))) speech_recognizer.session_stopped.connect( lambda evt: print('SESSION STOPPED {}'.format(evt))) speech_recognizer.canceled.connect( lambda evt: print('CANCELED {}'.format(evt))) # stop continuous recognition on either session stopped or canceled events speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start keyword recognition speech_recognizer.start_keyword_recognition(model) print('Say something starting with "{}" followed by whatever you want...'. format(keyword)) while not done: time.sleep(.5) speech_recognizer.stop_keyword_recognition()
def transcribe(audio): audio_filename = audio audio_input = speechsdk.audio.AudioConfig(filename=audio_filename) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_input) # #print("Recognizing first result...") # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. The task returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Checks result. if result.reason == speechsdk.ResultReason.RecognizedSpeech: return result.text elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format(result.no_match_details), file=sys.stderr) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format(cancellation_details.reason), file=sys.stderr) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format(cancellation_details.error_details), file=sys.stderr)
def speech_recognize_once_with_auto_language_detection_from_mic(): """performs one-shot speech recognition from the default microphone with auto language detection""" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # create the auto detection language configuration with the potential source language candidates auto_detect_source_language_config = \ speechsdk.languageconfig.AutoDetectSourceLanguageConfig(languages=["de-DE", "en-US"]) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, auto_detect_source_language_config=auto_detect_source_language_config) result = speech_recognizer.recognize_once() # Check the result if result.reason == speechsdk.ResultReason.RecognizedSpeech: auto_detect_source_language_result = speechsdk.AutoDetectSourceLanguageResult( result) print("Recognized: {} in language {}".format( result.text, auto_detect_source_language_result.language)) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized") elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details))