def Translate(targetLanguage): translation = '' # Translate speech audio_config = speech_sdk.AudioConfig(use_default_microphone=True) translator = speech_sdk.translation.TranslationRecognizer( translation_config, audio_config) print("Speak now...") result = translator.recognize_once_async().get() print('Translating "{}"'.format(result.text)) translation = result.translations[targetLanguage] print(translation) # For audio file # Translate speech audioFile = 'station.wav' playsound(audioFile) audio_config = speech_sdk.AudioConfig(filename=audioFile) translator = speech_sdk.translation.TranslationRecognizer( translation_config, audio_config) print("Getting speech from file...") result = translator.recognize_once_async().get() print('Translating "{}"'.format(result.text)) translation = result.translations[targetLanguage] print(translation) # Synthesize translation voices = {"fr": "fr-FR-Julie", "es": "es-ES-Laura", "hi": "hi-IN-Kalpana"} speech_config.speech_synthesis_voice_name = voices.get(targetLanguage) speech_synthesizer = speech_sdk.SpeechSynthesizer(speech_config) speak = speech_synthesizer.speak_text_async(translation).get() if speak.reason != speech_sdk.ResultReason.SynthesizingAudioCompleted: print(speak.reason)
def transcribe_audio_file_path(self, audio_file_path): # For now supports wav, not mp3 # https://stackoverflow.com/questions/51614216/what-audio-formats-are-supported-by-azure-cognitive-services-speech-service-ss?rq=1 audio_config = speechsdk.AudioConfig(use_default_microphone=False, filename=audio_file_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config) transcript = "" transcription_status = TranscriptionStatus.success try: result = speech_recognizer.recognize_once() if result.reason == speechsdk.ResultReason.RecognizedSpeech: transcript = result.text elif result.reason == speechsdk.ResultReason.NoMatch: transcription_status = TranscriptionStatus.transcription_error elif result.reason == speechsdk.ResultReason.Canceled: transcription_status = TranscriptionStatus.unknown_error except: print("Unknown transcription error:", sys.exc_info()) transcription_status = TranscriptionStatus.unknown_error return transcript, transcription_status
def from_file(): speech_config = speechsdk.SpeechConfig( subscription="3785c3171b694795ad5e7ec2bc01ab7f", region="canadacentral") audio_input = speechsdk.AudioConfig( filename= 'sound/audio_verification_challenge_35460110d06c257a9.9555557505-5083.wav' ) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) return result.text elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details))
def TranscribeCommand(): command = '' # Configure speech recognition # For microphone # audio_config = speech_sdk.AudioConfig(use_default_microphone=True) # speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config) # print('Speak now...') # Configure speech recognition audioFile = 'time.wav' playsound(audioFile) audio_config = speech_sdk.AudioConfig(filename=audioFile) speech_recognizer = speech_sdk.SpeechRecognizer(speech_config, audio_config) # Process speech input speech = speech_recognizer.recognize_once_async().get() if speech.reason == speech_sdk.ResultReason.RecognizedSpeech: command = speech.text print(command) else: print(speech.reason) if speech.reason == speech_sdk.ResultReason.Canceled: cancellation = speech.cancellation_details print(cancellation.reason) print(cancellation.error_details) # Return the command return command
def TranscribeCommand(): command = 'stop.' # Configure speech recognition audio_config = speech_sdk.AudioConfig(use_default_microphone=True) source_language_config = speech_sdk.languageconfig.SourceLanguageConfig( "es-ES") speech_recognizer = speech_sdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config, source_language_config=source_language_config) # Process speech input print('Say "stop" to end...') speech = speech_recognizer.recognize_once_async().get() if speech.reason == speech_sdk.ResultReason.RecognizedSpeech: command = speech.text print(command) print(Translate(command)) else: print(speech.reason) if speech.reason == speech_sdk.ResultReason.Canceled: cancellation = speech.cancellation_details print(cancellation.reason) print(cancellation.error_details) # Return the command return Translate(command)
def process_recorded_data(file_q: Queue) -> None: # load server configs configs = {} with open(config_file_path, 'r') as config_file: c = yaml.safe_load(config_file) configs['key'] = c['azure_subscription_key'] configs['endpoint'] = c['azure_endpoint'] configs['region'] = c['azure_region'] speech_config = speechsdk.SpeechConfig(region=configs['region'], subscription=configs['key']) source_language_config = speechsdk.languageconfig.SourceLanguageConfig( "zh-CN", configs['endpoint']) counter = Counter({"可以": 0, "吗": 0}) while True: new_file_path = file_q.get() # send it to microsoft audio_input = speechsdk.AudioConfig(filename=new_file_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, source_language_config=source_language_config, audio_config=audio_input) result = speech_recognizer.recognize_once() if result.reason != speechsdk.ResultReason.RecognizedSpeech: print(result) print(result.text) for k in counter: if k in result.text: counter[k] += 1 print(f'keyima counter {min(counter.values())}') # remove the processed file Path(new_file_path).unlink()
def audio_config_from_user_config( user_config: helper.Read_Only_Dict) -> helper.Read_Only_Dict: if user_config["input_file"] is None: return speechsdk.AudioConfig( use_default_microphone=True), None, None, None else: audio_stream_format = None if not user_config["use_compressed_audio"]: reader = wave.open(user_config["input_file"], mode=None) audio_stream_format = speechsdk.audio.AudioStreamFormat( samples_per_second=reader.getframerate(), bits_per_sample=reader.getsampwidth() * 8, channels=reader.getnchannels()) reader.close() else: audio_stream_format = speechsdk.audio.AudioStreamFormat( compressed_stream_format=user_config["compressed_audio_format"] ) callback = helper.BinaryFileReaderCallback( filename=user_config["input_file"]) stream = speechsdk.audio.PullAudioInputStream( pull_stream_callback=callback, stream_format=audio_stream_format) # We return the BinaryFileReaderCallback, AudioStreamFormat, and PullAudioInputStream # because we need to keep them in scope until they are actually used. return helper.Read_Only_Dict({ "audio_config": speechsdk.audio.AudioConfig(stream=stream), "audio_stream_format": audio_stream_format, "pull_input_audio_stream_callback": callback, "pull_input_audio_stream": stream, })
def recognize(self, audio_path, settings, result_callback): if not settings['key']: result_callback("ERROR: No API key provided") result_callback(self.END_VAL) return speech_config = speechsdk.SpeechConfig(subscription=settings['key'], region=settings['region']) audio_input = speechsdk.AudioConfig(filename=audio_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_input, language=settings['language']) def end_callback(evt): nonlocal audio_input nonlocal speech_recognizer speech_recognizer.stop_continuous_recognition() result_callback(self.END_VAL) # fix thread not releasing audio file speech_recognizer = None audio_input = None speech_recognizer.recognized.connect( lambda evt: result_callback(evt.result.text)) speech_recognizer.session_stopped.connect(end_callback) speech_recognizer.start_continuous_recognition()
def get_text(filepath): speech_config = speechsdk.SpeechConfig(subscription=SUBSCRIPTION, region=REGION) speech_config.speech_recognition_language = 'ja-JP' audio_input = speechsdk.AudioConfig(filename=filepath) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) done = False text = '' def stop_cb(evt): print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True def split(evt): st = re.search(r'\".+?\"', str(evt)) new_text = st.group(0).strip('"') nonlocal text text = text + '\n' + new_text speech_recognizer.recognized.connect(split) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) speech_recognizer.stop_continuous_recognition() return text
def recognize(audio_filename): audio_input = speechsdk.AudioConfig(filename=audio_filename) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) print("Recognizing first result...") # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. The task returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Checks result. if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized:") ans = result.text elif result.reason == speechsdk.ResultReason.NoMatch: ans = "No speech could be recognized: {}".format( result.no_match_details) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details ans = "Speech Recognition canceled: {}".format( cancellation_details.reason) if cancellation_details.reason == speechsdk.CancellationReason.Error: ans += "\nError details: {}".format( cancellation_details.error_details) return ans
def text_from_voice(file): if file[-4:] != ".wav": # sanity check return ({"msg": "file type error", "status": "ERROR"}) speech_config = speechsdk.SpeechConfig( subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"]) audio_input = speechsdk.AudioConfig(filename=file) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() data = {"msg": "", "status": "ERROR"} if result.reason == speechsdk.ResultReason.RecognizedSpeech: data["msg"] = result.text data["status"] = "OK" elif result.reason == speechsdk.ResultReason.NoMatch: data["msg"] = "No speech could be recognized: {}".format( result.no_match_details) data["status"] = "FAILED" elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details data["msg"] = "Speech Recognition canceled: {}".format( cancellation_details.reason) data["status"] = "CANCELED" if cancellation_details.reason == speechsdk.CancellationReason.Error: data["msg"] = ("Error details: {}".format( cancellation_details.error_details)) data["status"] = "ERROR" return data
def recognize_continuous(show: bool, file_name): # use file for stt: audio_input = speechsdk.AudioConfig(filename=f"data/{file_name}") # set address of container speech_config = speechsdk.SpeechConfig(host="ws://localhost:5000") # request timing info speech_config.request_word_level_timestamps() # state of job, once finished == true done = False # instantiate speech_recognizer speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) def stop_cb(evt) -> None: """ callback to stop continuous recognition when an event is received :param evt: event """ print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True def formatted_json(evt): res = evt.result.json return f'this is the result: {res}' # Signal for events containing intermediate recognition results if show == "all": speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt))) # Signal for events containing final recognition results (indicating a successful recognition attempt) # original snippet from azure examples #speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt))) # test method, to see all the json-fields speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(formatted_json(evt)))) # Signal for events indicating the start of a recognition session (operation). speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt))) # Signal for events indicating the end of a recognition session (operation). speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt))) # Signal for events containing canceled recognition results (indicating a recognition attempt that was canceled # as a result or a direct cancellation request or, alternatively, a transport or protocol failure) speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt))) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start the actual transcription job speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5)
def from_file(file_path, key): speech_config = speechsdk.SpeechConfig(subscription=key, region="eastus") audio_input = speechsdk.AudioConfig(filename=file_path) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() return result.text.split(" ")
async def transcribe_file(speechFileFullPath, lang, fileName, writer): """Transcribe the given audio file.""" result = None speech_config = speechsdk.SpeechConfig(subscription=Subscription, region=Region) speech_config.endpoint_id = Endpoint_id speech_config.output_format = speechsdk.OutputFormat.Detailed audio_input = speechsdk.AudioConfig(filename=speechFileFullPath) speech_config.set_profanity(profanity_option=speechsdk.ProfanityOption.Raw) #speech_config.speech_recognition_language=lang recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) #auto_detect_source_language_config=auto_detect_source_language_config) try: result = recognizer.recognize_once_async().get() confidence = 0 #temp = json.load() for key in result.properties.keys(): temp = eval(result.properties[key]) if (type(temp) == dict): confidence = temp["NBest"][0]["Confidence"] #print(temp["NBest"][0]["Confidence"]) if result.reason == speechsdk.ResultReason.RecognizedSpeech: #print(fileName) temp = { "Filename": fileName, "Culture/Accent": lang, "Text": result.text, "Confidence": confidence } recognition_config.recognitionResults.append(temp) temp = None #print(recognition_config.recognitionResults) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) pass elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) except Exception as e: print("Error in Transcribe Function : " + str(e)) return None """
def from_file(): speech_config = speechsdk.SpeechConfig( subscription="<paste-your-subscription-key>", region="<paste-your-region>") audio_input = speechsdk.AudioConfig(filename="your_file_name.wav") speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() print(result.text)
def transcribe_azure(file_path): """performs one-shot speech recognition with input from an audio file""" audio_input = speechsdk.AudioConfig(filename=file_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=AZURE_SPEECH_CONFIG, audio_config=audio_input, language='pt-BR') result = speech_recognizer.recognize_once_async().get() return result.text
def from_file(file): speech_config = speechsdk.SpeechConfig( subscription="f9769ca0076f491e91cbd90a1c6ed97c", region="eastus") # audio_input = speechsdk.AudioConfig(filename=r"/Users/garyge/Desktop/NLP/final_project/audio_f0001/f0001_us_f0001_00001.wav") audio_input = speechsdk.AudioConfig(filename=file) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() return result.text
def A2T(wav_file): speech_config = speechsdk.SpeechConfig(subscription="<API_KEY>", region="eastus") audio_input = speechsdk.AudioConfig(filename=wav_file) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) speech_config.enable_dictation() done = False def stop_cb(evt): print('CLOSING on {}'.format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True all_results = [] def handle_final_result(evt): all_results.append(evt.result.text) speech_recognizer.recognized.connect(handle_final_result) speech_recognizer.recognizing.connect( lambda evt: print('RECOGNIZING: {}'.format(evt))) speech_recognizer.recognized.connect( lambda evt: print('RECOGNIZED: {}'.format(evt))) speech_recognizer.session_started.connect( lambda evt: print('SESSION STARTED: {}'.format(evt))) speech_recognizer.session_stopped.connect( lambda evt: print('SESSION STOPPED {}'.format(evt))) speech_recognizer.canceled.connect( lambda evt: print('CANCELED {}'.format(evt))) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) #print(all_results) # Function to convert def listToString(s): # initialize an empty string str1 = " " # return string return (str1.join(s)) return listToString(all_results) """with open(wav_file + ".txt",'w') as f:
def audiototext(self, audio_filename): # Creates an audio configuration that points to an audio file. audio_input = speechsdk.AudioConfig(filename=audio_filename) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer( speech_config=self.speech_config, audio_config=audio_input) print("[Info] Prcoessing Audio to Text") translated_text = self.recognizespeech(speech_recognizer) return translated_text
def transcribe_audio_file_path(self, audio_file_path): # For now supports wav, not mp3 # https://stackoverflow.com/questions/51614216/what-audio-formats-are-supported-by-azure-cognitive-services-speech-service-ss?rq=1 audio_config = speechsdk.AudioConfig(use_default_microphone=False, filename=audio_file_path) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=self.speech_config, audio_config=audio_config) done = False transcript = "" cancellation_details = None def stop_cb(evt): """callback that stops continuous recognition upon receiving an event `evt`""" print("CLOSING on {}".format(evt)) speech_recognizer.stop_continuous_recognition() nonlocal done done = True def return_transcript(evt): """recognition is continuous, that is every sentence gets recognized separately. We want to concatenate all the sentences and return the full transcript""" nonlocal transcript transcript += " " transcript += evt.result.text def return_cancellation_details(evt): """return cancellation details""" nonlocal cancellation_details cancellation_details = evt.result.cancellation_details.error_details # Connect callbacks to the events fired by the speech recognizer speech_recognizer.recognized.connect(return_transcript) speech_recognizer.canceled.connect(return_cancellation_details) # stop continuous recognition on either session stopped or canceled events speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start continuous speech recognition speech_recognizer.start_continuous_recognition() while not done: time.sleep(0.5) if cancellation_details: raise exceptions.Canceled("Azure Speech cancellation error: " + cancellation_details) if transcript == "": raise exceptions.BlankTranscript( "Azure Speech returned blank transcript") return transcript
def speech_from_file(file): if file[-4:] != ".wav": print("error") return # return JsonResponse({"error": "file type error"}) speech_config = speechsdk.SpeechConfig(subscription=api_keys["microsoft-speech"]["key"], region=api_keys["microsoft-speech"]["region"]) audio_input = speechsdk.AudioConfig(filename=file) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() # return JsonResponse(result.text) print(result.text) return result.text
def snip_transcribe(output_list, filename, output_folder=output_folder, speech_key=speech_key, service_region=service_region): speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) speech_config.enable_dictation def recognized_cb(evt): if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech: # Do something with the recognized text output_list[ind]['text'] = output_list[ind]['text'] + \ str(evt.result.text) print(evt.result.text) for ind, diag in enumerate(output_list): t1 = diag['start_time'] t2 = diag['end_time'] newAudio = AudioSegment.from_wav(filename) chunk = newAudio[t1 * 1000:t2 * 1000] filename_out = output_folder + f"snippet_{diag['sequence_id']}.wav" # Exports to a wav file in the current path. chunk.export(filename_out, format="wav") done = False def stop_cb(evt): """callback that signals to stop continuous recognition upon receiving an event `evt`""" print('CLOSING on {}'.format(evt)) nonlocal done done = True audio_input = speechsdk.AudioConfig(filename=filename_out) speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_input) output_list[ind]['snippet_path'] = filename_out speech_recognizer.recognized.connect(recognized_cb) speech_recognizer.session_stopped.connect(stop_cb) speech_recognizer.canceled.connect(stop_cb) # Start continuous speech recognition speech_recognizer.start_continuous_recognition() while not done: time.sleep(.5) speech_recognizer.stop_continuous_recognition() return output_list
def prepare_speech_recognizer(args): hint = "See https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/get-started-speech-to-text" subscription = read_key("speech-subscription", hint=hint) print("ASR Language: %s" % (args.lang, )) speech_config = speechsdk.SpeechConfig( subscription=subscription, region=args.region, speech_recognition_language=args.lang, ) input_stream = get_input_stream(args.port) audio_config = speechsdk.AudioConfig(stream=input_stream, ) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) return speech_recognizer
def from_file(base='', filename='default'): speech_config = speechsdk.SpeechConfig(subscription="c4ac7a15c7204203b5179d7e745c238a", region="centralus") wav_path = base + filename # wav 路径 blob_file_name = filename.split('.')[0] + '.txt' # txt文件名 txt_path = base + blob_file_name # txt文件路径 audio_input = speechsdk.AudioConfig(filename=wav_path) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() with open(txt_path, 'w', encoding="UTF-8") as fp: fp.write(result.text) # 进行备份 block_blob_service.create_blob_from_path(container_name, filename, wav_path) block_blob_service.create_blob_from_path(container_name, blob_file_name, txt_path)
def recognize_audio(output, speech_key, service_region, language, filename, recognize_time=100): """ wav形式のデータから文字を起こす関数 --------------------------- Parameters output: str 音声から起こしたテキスト(再帰的に取得する) speech_key: str Azure Speech SDKのキー service_region: str Azure Speech SDKのリージョン名 language: str 音声解析するための言語を指定する filename: str 音声ファイルのパス recognize_time: int 音声認識にかける時間 180秒の音声ファイルであれば100秒程度で十分 """ # Azure Speech Config speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) speech_config.enable_dictation() # Language Setting if language == '日本語': speech_config.speech_recognition_language = "ja-JP" elif language == '英語': speech_config.speech_recognition_language = "en-US" # Recognizing audio_input = speechsdk.AudioConfig(filename=filename) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) def recognized(evt): nonlocal output output += evt.result.text speech_recognizer.recognized.connect(recognized) speech_recognizer.start_continuous_recognition() time.sleep(recognize_time) return output
def get_text_from_input(input_audio_filename, speech_config): # Creates an audio configuration that points to an audio file. # Replace with your own audio filename. audio_input = speechsdk.AudioConfig(filename=input_audio_filename) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) print("Recognizing first result...") # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. The task returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() return result.text
def speech_to_text(audio_file): print(audio_file) audio_input = speechsdk.AudioConfig(filename=audio_file) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) return result.text elif result.reason == speechsdk.ResultReason.NoMatch: print(f"No speech could be recognized: {result.no_match_details}") return "ERROR_Rec" elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print(f"Speech Recognition canceled: {cancellation_details.reason}") if cancellation_details.reason == speechsdk.CancellationReason.Error: print(f"Error details: {cancellation_details.error_details}") return "ERROR_Canc" return
async def handle_post(request): reader = await request.multipart() field = await reader.next() assert field.name == 'audio' filename = field.filename size = 0 with open(os.path.join('.', filename), 'wb') as f: while True: chunk = await field.read_chunk() if not chunk: break size += len(chunk) f.write(chunk) wavefile = filename + '.wav' if os.path.exists(wavefile): os.remove(wavefile) ff = FFmpeg(inputs={filename: None}, outputs={wavefile: '-ac 1'}) ff.run() speech_key, service_region = 'yourkey', 'regionsuchaswestus' speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) audio_config = speechsdk.AudioConfig(filename=wavefile) speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config) result = speech_recognizer.recognize_once() res = 'No response' if result.reason == speechsdk.ResultReason.RecognizedSpeech: res = 'Recognized: {}'.format(result.text) elif result.reason == speechsdk.ResultReason.NoMatch: res = 'No speech could be recognized: {}'.format( result.no_match_details) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details res = 'Speech Recognition canceled: {}'.format( cancellation_details.reason) # if cancellation_details.reason == speechsdk.CancellationReason.Error: # print("Error details: {}".format(cancellation_details.error_details)) return web.Response(text=res)
def from_file(): f = open('output.txt', 'w') d = open('result.txt', 'w') speech_config = speechsdk.SpeechConfig( subscription="18b27d3bc19143fe9d581e9b40b4253e", region="eastus") audio_input = speechsdk.AudioConfig(filename="test.wav") speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_input) result = speech_recognizer.recognize_once_async().get() d.write(result.text) d.close() returnedStuff = grammar_check.model_call(result.text) for i in returnedStuff: print(i + "\n") f.write(i) f.close()
def __init__(self, speech_key: str, service_region: str, recognition_language: str, mic_id: str, dict_mode_active: bool, callbackClass): self.speech_recognizer = None self.callbackClass = callbackClass self.conversation = {} # Setup Azure Speech Recognizer if mic_id is not None: audio_config = speechsdk.AudioConfig(device_name=mic_id) else: audio_config = None speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) speech_config.speech_recognition_language = recognition_language if (dict_mode_active == True): speech_config.enable_dictation() print("Dictiation Mode enabled.") self.speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config, audio_config=audio_config) # Connect callbacks to the events fired by the speech recognizer self.speech_recognizer.session_started.connect( lambda evt: print('SESSION STARTED: {}'.format(evt))) self.speech_recognizer.session_stopped.connect( lambda evt: print('SESSION STOPPED {}'.format(evt))) self.speech_recognizer.recognizing.connect( self.cb_recogonizing ) # every utterance, even when recogn. is not finished self.speech_recognizer.recognized.connect(self.cb_recognized) self.speech_recognizer.canceled.connect(self.cb_cancelled) # stop continuous recognition on either session stopped or canceled events self.speech_recognizer.session_stopped.connect(self.stop_cb) self.speech_recognizer.canceled.connect(self.stop_cb) # Improve recognition accuracy with custom lists self.update_grammar_list(self.speech_recognizer)