def call_runner(limit_cnt=1): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "ko-KR" # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. movie_dict_list = listen_print_loop(responses, stream, limit_cnt) print("====", movie_dict_list) # call_runner() return movie_dict_list
def main(): language_code = 'ko-KR' print('AI 스피커가 동작하는데 시간이 걸립니다. 잠시만 기다려 주세요.') client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) playsound('../../open_sound/open.wav') print('원하시는 것을 말씀 해주세요.') with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. path = r'C:\Users\leewo\hci_test\err_sound' sounds = os.listdir(path) for sound in sounds: listen_print_loop(responses, sound)
def download_audio_and_transcribe(self, recording_url: str) -> str: transcription: str = "" self.connect(destination="speech") response = requests.get(url=recording_url, stream=True) reqs = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in response.iter_content()) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-US", ) streaming_config = speech.StreamingRecognitionConfig(config=config) responses = self.speech_client.streaming_recognize(config=streaming_config, requests=reqs,) for response in responses: # Once the transcription has settled, the first result will contain the # is_final result. The other results will be for subsequent portions of # the audio. for result in response.results: # print("Finished: {}".format(result.is_final)) # print("Stability: {}".format(result.stability)) alternatives = result.alternatives # The alternatives are ordered from most likely to least. for alternative in alternatives: # print("Confidence: {}".format(alternative.confidence)) transcription = u"{}".format(alternative.transcript) return transcription
def stt(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "en-US" # a BCP-47 language tag #language_code = 'ko-KR' # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True, single_utterance=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. # print("here is final text:", end = "") return listen_print_loop(responses) # return final text
def transcribe(transcript, q): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "en-US" client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicStream(RATE, CHUNK) as stream: generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in generator) responses = client.streaming_recognize(streaming_config, requests) # quit_transcript = [False] # while 1: # transcription(responses, quit_transcript) # if quit_transcript[0]: # print("Exit") # break # Now, put the transcription responses to use. listen_print_loop(responses, transcript, q)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'ja-JP' # a BCP-47 language tag import os os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "speech-rec-827143ff9a4c.json" client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) print("start rec") with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. for s in listen_print_loop(responses): # voiceroid.say(s) print(s)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'ko-KR' # a BCP-47 language tag client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = speech.StreamingRecognitionConfig( config=config, # single_utterance=True 파라미터 추가함 --> single spoken utterance만 인지해서 응답해줌 # 중간에 말을 멈추거나 하면 스트리밍인식을 종료함 --> 스피커소리 다시 인식 안하게됨 #single_utterance=True, # false로 바꿧어. 이렇게 바꾸면 is_final 이 true인것만 반환함 interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # listen_print_loop가 리턴해도 다시 실핼될 수 있도 listen_print_loop(responses) print('main: finished listen_print_loop')
def transcript_in_loop(self, stream): while not stream.closed: sys.stdout.write(YELLOW) sys.stdout.write("\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n") stream.audio_input = [] audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = self.client.streaming_recognize( self.streaming_config, requests) # Now, put the transcription responses to use. transcripts = self.listen_print_loop(responses, stream) if stream.result_end_time > 0: stream.final_request_end_time = stream.is_final_end_time stream.result_end_time = 0 stream.last_audio_input = [] stream.last_audio_input = stream.audio_input stream.audio_input = [] stream.restart_counter = stream.restart_counter + 1 stream.new_stream = True if not stream.last_transcript_was_final: sys.stdout.write("\n") return transcripts
def main(lacalut): language_code = "uk-UA" client = speech.SpeechClient() interaction_type = speech.RecognitionMetadata.InteractionType.DICTATION metadata = speech.RecognitionMetadata(interaction_type=interaction_type) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, metadata=metadata, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) listen_print_loop(responses, lacalut)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "ja-JP" # a BCP-47 language tag credential_path = "./aitraining-306004-2e354d0f5ba9.json" os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_path client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, ) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True ) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = ( speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator ) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def prepare_run(self): requests = (gspeech.StreamingRecognizeRequest(audio_content=content) for content in self._generator()) self.responses = self.client.streaming_recognize( self.streaming_config, requests) t = threading.Thread(target=self._produce_predictions_loop) t.start()
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "en-US" # a BCP-47 language tag Labeling = speech.SpeakerDiarizationConfig( enable_speaker_diarization=True, min_speaker_count=1, max_speaker_count=5, ) client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, enable_automatic_punctuation=True, diarization_config=Labeling, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "en-US" # a BCP-47 language tag client = speech.SpeechClient.from_service_account_json( "../creds/speech_google_credentials.json") config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def code_driver(self): with MicrophoneStream(self.rate, self.chunk) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) self.responses = self.client.streaming_recognize( self.streaming_config, requests) self.listen_print_loop()
def main(): """start bidirectional streaming from microphone input to speech API""" client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, language_code="ko-KR", #en-US max_alternatives=1, ) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True ) mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) print(mic_manager.chunk_size) sys.stdout.write(YELLOW) sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n') sys.stdout.write("End (ms) Transcript Results/Status\n") sys.stdout.write("=====================================================\n") with mic_manager as stream: while not stream.closed: sys.stdout.write(YELLOW) sys.stdout.write( "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n" ) stream.audio_input = [] audio_generator = stream.generator() requests = ( speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator ) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. return_word = listen_print_loop(responses, stream) print(return_word) if stream.result_end_time > 0: stream.final_request_end_time = stream.is_final_end_time stream.result_end_time = 0 stream.last_audio_input = [] stream.last_audio_input = stream.audio_input stream.audio_input = [] stream.restart_counter = stream.restart_counter + 1 if not stream.last_transcript_was_final: sys.stdout.write("\n") stream.new_stream = True return return_word
def start(self): client = speech.SpeechClient() stream = self.generator() requests = ( speech.StreamingRecognizeRequest(audio_content=content) for content in stream ) responses = client.streaming_recognize(self.streaming_config, requests) self.process_responses_loop(responses)
def start(self, auto_turn=True, reset_result=False): # auto_turnがTrueの場合自動でターンテイキングを行う # 基本的にはTrueで良い self.print_debug("start Google ASR") if reset_result == True: self.recognition_result = "" self.is_listening = False self.utt_start_time = None self.turn_start_time = time.time() turn_thread_flag = False client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, language_code="ja-JP", max_alternatives=1, ) # streaming_config = speech.StreamingRecognitionConfig( # config=config, interim_results=True) # single_utterance設定をオン # オンにしないと音声認識結果が確定するまで60秒かかる streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True, single_utterance=True) mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) with mic_manager as self.stream: while not self.stream.closed: self.stream.audio_input = [] audio_generator = self.stream.generator() requests = (speech.StreamingRecognizeRequest( audio_content=content) for content in audio_generator) if (turn_thread_flag == False) and (auto_turn == True): turn_thread_flag = True self.m_turn.start_turn_thread() responses = client.streaming_recognize(streaming_config, requests) # 音声認識結果を取得するループ開始 self.listen_loop(responses) # streamリミットを超えた場合の音声認識再開処理 # 4分stream繋ぎ続けない限り実行されない if self.stream.result_end_time > 0: self.stream.final_request_end_time = self.stream.is_final_end_time self.stream.result_end_time = 0 self.stream.last_audio_input = [] self.stream.last_audio_input = self.stream.audio_input self.stream.audio_input = [] self.stream.restart_counter = self.stream.restart_counter + 1 if not self.stream.last_transcript_was_final: sys.stdout.write("\n") self.stream.new_stream = True self.file_num += 1
def perform_transcription(self): while not glbl.G_EXIT_FLAG: glbl.main_logger.info(f"starting transcription iteration") try: # creating a generator using data supplied by PacketizedPCMReader generator_obj = PCMGenerator(self.pcm_q, self.pcm_stream_state) audio_generator = generator_obj.get_bytes() for data in audio_generator: #blocks until there is data break #in the audio_generator # the transcription request stream (via a generator) requests = (speech.StreamingRecognizeRequest( audio_content=content) for content in audio_generator) # the transcription response stream (via a generator) self.responses = self.speech_client.streaming_recognize( self.speech_config, requests) # forwarding responses to a 'q' that is read/handled by SRTWriter self.queue_transcription_responses() # control comes here when there are no more responses # this can happen if # (a) there is no more input (or) # (b) streaming_recognize cannot process more than this duration # (in which case we need to set the pcm_stream_state right # and begin all over again) self.pcm_stream_state.on_iteration_complete() glbl.main_logger.info( "RETRY AFTER 5MIN, " "last_sent={self.pcm_stream_state.last_sub_pts}") lk = self.pcm_stream_state.get_last_key() if (lk != None): glbl.main_logger.info( f"=====audio_pts_map[{lk}] = " "{self.pcm_stream_state.audio_pts_map[lk]}======") except google.api_core.exceptions.ServiceUnavailable: glbl.main_logger.info( f"ServiceUnavailable exception, "\ f"retry_after_sec={confvars.G_RETRY_DURATION_SEC_ON_SERVICE_UNAVAILABLE}") time.sleep( confvars.G_RETRY_DURATION_SEC_ON_SERVICE_UNAVAILABLE)
def run(self): with MicrophoneStream(RATE, CHUNK) as stream: self.mic = stream audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = self.client.streaming_recognize( self.streaming_config, requests) # Now, put the transcription responses to use. self.listen_print_loop(responses, stream) self._buff.put(None) self.status = False
def recognize_async_audio_stream(self, language_code="en-US"): """ Recognize in "real-time" from microphone stream. Returns when a final_result is found. May be created as a thread of its own or it'll block until a final result is found. Stores all results in the `final_result_queue` queue. Args: language_code -- language to use for recognition. See `languages` for supported languages. """ if language_code not in self.languages: print( '\"{}\" is not a supported language code. Make sure it\'s supported by Google and try adding adding it to the languages list.\n' .format(language_code)) return self.final_result_queue.queue.clear( ) # Clear all items in queue for new stream. config_stream = speech.StreamingRecognitionConfig( config=speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=self.microphone_handler.RATE, language_code=language_code, enable_automatic_punctuation=True, ), interim_results=True) self.microphone_handler.start_recording(streaming=True) while self.microphone_handler.streaming: data = self.microphone_handler.stream_generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in data) try: responses = self.client.streaming_recognize( config_stream, requests) for response in responses: self.final_result_queue.put(response.results[0]) if response.results[0].is_final: return # Stops more recordings than one. Doesn't halt after recording is done. (temp) if self.debug: print( response.results[0].alternatives[0].transcript + '\n' ) # Print all non final results in terminal(debug). except: print('Failed to get response.')
def start_speech_to_text(self): ''' Response to open the microphone stream and send it to the API to create from that transcription ''' with MicrophoneStream(self.RATE, self.CHUNK) as stream: # In practice, stream should be a generator yielding chunks of audio data audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) # streaming_recognize returns a generator responses = self.client.streaming_recognize( self.streaming_config, requests) # create transcription from the responses self.listen_loop(responses)
def transcribe(self): # Generator to return transcription results if not self.is_supported: return logger.debug("googleTranscribe.transcribe ENTER") while True: audio_generator = self.audio_device.streamGenerator() requests = (speech.StreamingRecognizeRequest( audio_content=content, ) for content in audio_generator) responses = self.client.streaming_recognize( requests=requests, config=self.streaming_config) try: for response in responses: if not response.results: continue result = response.results[0] if not result.is_final and not speakreader.CONFIG.SHOW_INTERIM_RESULTS: continue if not result.alternatives: continue transcript = { 'transcript': result.alternatives[0].transcript, 'is_final': result.is_final, } yield transcript logger.debug("googleTranscribe.transcribe EXIT") break except exceptions.OutOfRange: """ Google Cloud limits stream to about 5 minutes. Just loop. """ continue except exceptions.DeadlineExceeded: """ Google Cloud limits stream to about 5 minutes. Just loop. """ continue
def transcribe_streaming(stream_file): """Streams transcription of the given audio file.""" import io from google.cloud import speech client = speech.SpeechClient() # [START speech_python_migration_streaming_request] # In practice, stream should be a generator yielding chunks of audio data. stream = [stream_file] requests = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream) config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_code="en-US", ) streaming_config = speech.StreamingRecognitionConfig(config=config) # streaming_recognize returns a generator. # [START speech_python_migration_streaming_response] responses = client.streaming_recognize( config=streaming_config, requests=requests, ) # [END speech_python_migration_streaming_request] for response in responses: # Once the transcription has settled, the first result will contain the # is_final result. The other results will be for subsequent portions of # the audio. for result in response.results: print("Finished: {}".format(result.is_final)) print("Stability: {}".format(result.stability)) alternatives = result.alternatives # The alternatives are ordered from most likely to least. for alternative in alternatives: print("Confidence: {}".format(alternative.confidence)) print(u"Transcript: {}".format(alternative.transcript)) # [END speech_python_migration_streaming_response] return alternative.transcript
def socket_stream(c): with NetworkAudioStream(SAMPLE_RATE, CHUNK_SIZE, c) as stream: data = c.recv(1024) # Dummy Thread print('Headers', data, len(data), threading.currentThread().getName()) client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, language_code='en-IN', max_alternatives=1) streaming_config = speech.StreamingRecognitionConfig( config=config, interim_results=True) #Start data receiving thread to fill the buffer start_new_thread(read_network_stream, ( c, stream, )) while not stream.closed: stream.audio_input = [] audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses, stream) if stream.result_end_time > 0: stream.final_request_end_time = stream.is_final_end_time stream.result_end_time = 0 stream.last_audio_input = [] stream.last_audio_input = stream.audio_input stream.audio_input = [] stream.restart_counter = stream.restart_counter + 1 if not stream.last_transcript_was_final: sys.stdout.write('final-\n') stream.new_stream = True c.close()
def main(): client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=8000, language_code="en-IN") streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(8000, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) listen_print_loop(responses)
def main(): client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=in_language_code) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) translate_client = translate.Client() speech_client = texttospeech.TextToSpeechClient() loop = asyncio.get_event_loop() with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) interpret(responses, translate_client, speech_client, loop)
def main(lang): """start bidirectional streaming from microphone input to speech API""" client = speech.SpeechClient() config = speech.RecognitionConfig( encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=SAMPLE_RATE, language_code="en-US", max_alternatives=1, ) streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True) mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE) sys.stdout.write('\nTranscribing:\n\n') sys.stdout.write("=====================================================\n") with mic_manager as stream: while not stream.closed: stream.audio_input = [] audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(requests=requests, config=streaming_config) # Now, put the transcription responses to use. listen_print_loop(responses, stream, lang) if stream.result_end_time > 0: stream.final_request_end_time = stream.is_final_end_time stream.result_end_time = 0 stream.last_audio_input = [] stream.last_audio_input = stream.audio_input stream.audio_input = [] stream.restart_counter = stream.restart_counter + 1 if not stream.last_transcript_was_final: sys.stdout.write("\n") stream.new_stream = True
def listen_talk(): text, client, streaming_config = gcp_talk.get_talk() with text as stream: while not stream.closed: stream.audio_input = [] audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. my_added_text = gcp_talk.listen_print_loop(responses, stream) if my_added_text is None: pass elif "おはよう" in my_added_text: ser.write('3'.encode('utf-8')) display("Test", "Speaking", "おはようございます") jtalk.jtalk("おはようございます") elif ("あなたの名前は" in my_added_text) or ("名前" in my_added_text): ser.write('3'.encode('utf-8')) display("Test", "Speaking", "おはようございますよろしくお願いします。") jtalk.jtalk("エントと申します。よろしくお願いします。") else: ser.write('3'.encode('utf-8')) reply = send_message(my_added_text) display("Test", "Speaking", reply) jtalk.jtalk(reply) if stream.result_end_time > 0: stream.final_request_end_time = stream.is_final_end_time stream.result_end_time = 0 stream.last_audio_input = [] stream.last_audio_input = stream.audio_input stream.audio_input = [] stream.restart_counter = stream.restart_counter + 1 stream.new_stream = True ser.write('2'.encode('utf-8'))
def handle_request_order(self, goal): print("Request!!!") with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = self.client.streaming_recognize(self.streaming_config, requests) # Now, put the transcription responses to use. received_order = self.listen_print_loop(responses) result = ReceiveTargetResult() success = True if success: result.result = True result.data = received_order self.order_server.set_succeeded(result)
def run(self): with MicrophoneStream(self._audio_interface, self._rate, self._chunk) as stream: self._microphone_stream = stream audio_generator = stream.generator() requests = (speech.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = self._client.streaming_recognize( self._streaming_config, requests) for response in responses: for result in response.results: my_log.info(result) for alternative in result.alternatives: transcript = alternative.transcript confidence = alternative.confidence if "confidence" in alternative else 0.01 if "is_final" in result and result.is_final: # todo: Muss der text bei den is_final aneinander gehängt werden? testen! if text_has_action(transcript, confidence): stop_google() self._microphone_stream = None # must be after stop_google() loop.call_soon_threadsafe( call_text_execute_action, transcript, confidence) return if any([ word.lower() in transcript.lower() for word in settings.ABORT_WORDS ]): my_log.debug( f"Google aborted because abort word in '{transcript}' was heard" ) stop_google() self._microphone_stream = None # must be after stop_google() return # for loop did not react: Not understood loop.call_soon_threadsafe(call_text_execute_action, "", 1) self._microphone_stream = None