Example #1
0
def call_runner(limit_cnt=1):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "ko-KR"  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        movie_dict_list = listen_print_loop(responses, stream, limit_cnt)
    print("====", movie_dict_list)

    # call_runner()
    return movie_dict_list
Example #2
0
def main():
    language_code = 'ko-KR'
    print('AI 스피커가 동작하는데 시간이 걸립니다. 잠시만 기다려 주세요.')

    client = speech.SpeechClient()

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    playsound('../../open_sound/open.wav')
    print('원하시는 것을 말씀 해주세요.')

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)
        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        path = r'C:\Users\leewo\hci_test\err_sound'
        sounds = os.listdir(path)
        for sound in sounds:
            listen_print_loop(responses, sound)
Example #3
0
    def download_audio_and_transcribe(self, recording_url: str) -> str:
        transcription: str = ""
        self.connect(destination="speech")
        response = requests.get(url=recording_url, stream=True)

        reqs = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in response.iter_content())
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=8000,
            language_code="en-US",
        )
        streaming_config = speech.StreamingRecognitionConfig(config=config)

        responses = self.speech_client.streaming_recognize(config=streaming_config, requests=reqs,)

        for response in responses:
            # Once the transcription has settled, the first result will contain the
            # is_final result. The other results will be for subsequent portions of
            # the audio.
            for result in response.results:
                # print("Finished: {}".format(result.is_final))
                # print("Stability: {}".format(result.stability))
                alternatives = result.alternatives
                # The alternatives are ordered from most likely to least.
                for alternative in alternatives:
                    # print("Confidence: {}".format(alternative.confidence))
                    transcription = u"{}".format(alternative.transcript)

        return transcription
Example #4
0
def stt():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag
    #language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True,
                                                         single_utterance=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        # print("here is final text:", end = "")
        return listen_print_loop(responses)  # return final text
def transcribe(transcript, q):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicStream(RATE, CHUNK) as stream:
        generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # quit_transcript = [False]

        # while 1:
        #     transcription(responses, quit_transcript)

        #     if quit_transcript[0]:
        #         print("Exit")
        #         break

        # Now, put the transcription responses to use.
        listen_print_loop(responses, transcript, q)
Example #6
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ja-JP'  # a BCP-47 language tag
    import os
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "speech-rec-827143ff9a4c.json"
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = speech.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    print("start rec")
    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
       
       
        for s in listen_print_loop(responses):
                # voiceroid.say(s)
                print(s)
Example #7
0
def main():

    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = speech.StreamingRecognitionConfig(
        config=config,
        #  single_utterance=True 파라미터 추가함 --> single spoken utterance만 인지해서 응답해줌
        # 중간에 말을 멈추거나 하면 스트리밍인식을 종료함 --> 스피커소리 다시 인식 안하게됨
        #single_utterance=True,
        # false로 바꿧어. 이렇게 바꾸면 is_final 이 true인것만 반환함
        interim_results=True)


    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # listen_print_loop가 리턴해도 다시 실핼될 수 있도
        listen_print_loop(responses)
        print('main: finished listen_print_loop')
    def transcript_in_loop(self, stream):
        while not stream.closed:
            sys.stdout.write(YELLOW)
            sys.stdout.write("\n" +
                             str(STREAMING_LIMIT * stream.restart_counter) +
                             ": NEW REQUEST\n")

            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = self.client.streaming_recognize(
                self.streaming_config, requests)

            # Now, put the transcription responses to use.
            transcripts = self.listen_print_loop(responses, stream)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1
            stream.new_stream = True

            if not stream.last_transcript_was_final:
                sys.stdout.write("\n")

        return transcripts
Example #9
0
def main(lacalut):
    language_code = "uk-UA"

    client = speech.SpeechClient()

    interaction_type = speech.RecognitionMetadata.InteractionType.DICTATION

    metadata = speech.RecognitionMetadata(interaction_type=interaction_type)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        metadata=metadata,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        listen_print_loop(responses, lacalut)
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "ja-JP"  # a BCP-47 language tag

    credential_path = "./aitraining-306004-2e354d0f5ba9.json"

    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_path

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(
        config=config, interim_results=True
    )

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (
            speech.StreamingRecognizeRequest(audio_content=content)
            for content in audio_generator
        )

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Example #11
0
 def prepare_run(self):
     requests = (gspeech.StreamingRecognizeRequest(audio_content=content)
                 for content in self._generator())
     self.responses = self.client.streaming_recognize(
         self.streaming_config, requests)
     t = threading.Thread(target=self._produce_predictions_loop)
     t.start()
Example #12
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag

    Labeling = speech.SpeakerDiarizationConfig(
        enable_speaker_diarization=True,
        min_speaker_count=1,
        max_speaker_count=5,
    )

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_automatic_punctuation=True,
        diarization_config=Labeling,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Example #13
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag

    client = speech.SpeechClient.from_service_account_json(
        "../creds/speech_google_credentials.json")
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
    def code_driver(self):
        with MicrophoneStream(self.rate, self.chunk) as stream:
            audio_generator = stream.generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            self.responses = self.client.streaming_recognize(
                self.streaming_config, requests)
            self.listen_print_loop()
Example #15
0
def main():
    """start bidirectional streaming from microphone input to speech API"""

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=SAMPLE_RATE,
        language_code="ko-KR", #en-US
        max_alternatives=1,
    )

    streaming_config = speech.StreamingRecognitionConfig(
        config=config, interim_results=True
    )

    mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
    print(mic_manager.chunk_size)
    sys.stdout.write(YELLOW)
    sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
    sys.stdout.write("End (ms)       Transcript Results/Status\n")
    sys.stdout.write("=====================================================\n")

    with mic_manager as stream:

        while not stream.closed:
            sys.stdout.write(YELLOW)
            sys.stdout.write(
                "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
            )

            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (
                speech.StreamingRecognizeRequest(audio_content=content)
                for content in audio_generator
            )

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            return_word = listen_print_loop(responses, stream)
            print(return_word)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1

            if not stream.last_transcript_was_final:
                sys.stdout.write("\n")
            stream.new_stream = True
    
        return return_word
 def start(self):
     client = speech.SpeechClient()
     stream = self.generator()
     requests = (
         speech.StreamingRecognizeRequest(audio_content=content)
         for content in stream
     )
     responses = client.streaming_recognize(self.streaming_config, requests)
     self.process_responses_loop(responses)
Example #17
0
    def start(self, auto_turn=True, reset_result=False):
        # auto_turnがTrueの場合自動でターンテイキングを行う
        # 基本的にはTrueで良い
        self.print_debug("start Google ASR")
        if reset_result == True:
            self.recognition_result = ""
        self.is_listening = False
        self.utt_start_time = None
        self.turn_start_time = time.time()
        turn_thread_flag = False
        client = speech.SpeechClient()
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=SAMPLE_RATE,
            language_code="ja-JP",
            max_alternatives=1,
        )
        # streaming_config = speech.StreamingRecognitionConfig(
        #     config=config, interim_results=True)
        # single_utterance設定をオン
        # オンにしないと音声認識結果が確定するまで60秒かかる
        streaming_config = speech.StreamingRecognitionConfig(
            config=config, interim_results=True, single_utterance=True)
        mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
        with mic_manager as self.stream:
            while not self.stream.closed:
                self.stream.audio_input = []
                audio_generator = self.stream.generator()
                requests = (speech.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                if (turn_thread_flag == False) and (auto_turn == True):
                    turn_thread_flag = True
                    self.m_turn.start_turn_thread()

                responses = client.streaming_recognize(streaming_config,
                                                       requests)

                # 音声認識結果を取得するループ開始
                self.listen_loop(responses)

                # streamリミットを超えた場合の音声認識再開処理
                # 4分stream繋ぎ続けない限り実行されない
                if self.stream.result_end_time > 0:
                    self.stream.final_request_end_time = self.stream.is_final_end_time
                self.stream.result_end_time = 0
                self.stream.last_audio_input = []
                self.stream.last_audio_input = self.stream.audio_input
                self.stream.audio_input = []
                self.stream.restart_counter = self.stream.restart_counter + 1

                if not self.stream.last_transcript_was_final:
                    sys.stdout.write("\n")
                self.stream.new_stream = True
        self.file_num += 1
Example #18
0
    def perform_transcription(self):

        while not glbl.G_EXIT_FLAG:

            glbl.main_logger.info(f"starting transcription iteration")

            try:
                # creating a generator using data supplied by PacketizedPCMReader
                generator_obj = PCMGenerator(self.pcm_q, self.pcm_stream_state)
                audio_generator = generator_obj.get_bytes()

                for data in audio_generator:  #blocks until there is data
                    break  #in the audio_generator

                # the transcription request stream (via a generator)
                requests = (speech.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                # the transcription response stream (via a generator)
                self.responses = self.speech_client.streaming_recognize(
                    self.speech_config, requests)

                # forwarding responses to a 'q' that is read/handled by SRTWriter
                self.queue_transcription_responses()

                # control comes here when there are no more responses
                # this can happen if
                #   (a) there is no more input (or)
                #   (b) streaming_recognize cannot process more than this duration
                #       (in which case we need to set the pcm_stream_state right
                #        and begin all over again)

                self.pcm_stream_state.on_iteration_complete()

                glbl.main_logger.info(
                    "RETRY AFTER 5MIN, "
                    "last_sent={self.pcm_stream_state.last_sub_pts}")

                lk = self.pcm_stream_state.get_last_key()
                if (lk != None):
                    glbl.main_logger.info(
                        f"=====audio_pts_map[{lk}] = "
                        "{self.pcm_stream_state.audio_pts_map[lk]}======")

            except google.api_core.exceptions.ServiceUnavailable:

                glbl.main_logger.info(
                    f"ServiceUnavailable exception, "\
                    f"retry_after_sec={confvars.G_RETRY_DURATION_SEC_ON_SERVICE_UNAVAILABLE}")
                time.sleep(
                    confvars.G_RETRY_DURATION_SEC_ON_SERVICE_UNAVAILABLE)
    def run(self):
        with MicrophoneStream(RATE, CHUNK) as stream:
            self.mic = stream
            audio_generator = stream.generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = self.client.streaming_recognize(
                self.streaming_config, requests)

            # Now, put the transcription responses to use.
            self.listen_print_loop(responses, stream)
        self._buff.put(None)
        self.status = False
    def recognize_async_audio_stream(self, language_code="en-US"):
        """
        Recognize in "real-time" from microphone stream. 
        Returns when a final_result is found.
        
        May be created as a thread of its own or it'll block until a final result is found.
        
        Stores all results in the `final_result_queue` queue.

        Args:
            language_code -- language to use for recognition. See `languages` for supported languages.   
        """
        if language_code not in self.languages:
            print(
                '\"{}\" is not a supported language code. Make sure it\'s supported by Google and try adding adding it to the languages list.\n'
                .format(language_code))
            return

        self.final_result_queue.queue.clear(
        )  # Clear all items in queue for new stream.

        config_stream = speech.StreamingRecognitionConfig(
            config=speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=self.microphone_handler.RATE,
                language_code=language_code,
                enable_automatic_punctuation=True,
            ),
            interim_results=True)

        self.microphone_handler.start_recording(streaming=True)
        while self.microphone_handler.streaming:
            data = self.microphone_handler.stream_generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in data)

            try:
                responses = self.client.streaming_recognize(
                    config_stream, requests)
                for response in responses:
                    self.final_result_queue.put(response.results[0])
                    if response.results[0].is_final:
                        return  # Stops more recordings than one. Doesn't halt after recording is done. (temp)
                    if self.debug:
                        print(
                            response.results[0].alternatives[0].transcript +
                            '\n'
                        )  # Print all non final results in terminal(debug).
            except:
                print('Failed to get response.')
Example #21
0
    def start_speech_to_text(self):
        '''
        Response to open the microphone stream and send it to the API to create from that transcription
        '''
        with MicrophoneStream(self.RATE, self.CHUNK) as stream:
            # In practice, stream should be a generator yielding chunks of audio data
            audio_generator = stream.generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            # streaming_recognize returns a generator
            responses = self.client.streaming_recognize(
                self.streaming_config, requests)

            # create transcription from the responses
            self.listen_loop(responses)
    def transcribe(self):
        # Generator to return transcription results

        if not self.is_supported:
            return

        logger.debug("googleTranscribe.transcribe ENTER")

        while True:
            audio_generator = self.audio_device.streamGenerator()

            requests = (speech.StreamingRecognizeRequest(
                audio_content=content, ) for content in audio_generator)

            responses = self.client.streaming_recognize(
                requests=requests, config=self.streaming_config)

            try:
                for response in responses:
                    if not response.results:
                        continue

                    result = response.results[0]

                    if not result.is_final and not speakreader.CONFIG.SHOW_INTERIM_RESULTS:
                        continue

                    if not result.alternatives:
                        continue

                    transcript = {
                        'transcript': result.alternatives[0].transcript,
                        'is_final': result.is_final,
                    }

                    yield transcript

                logger.debug("googleTranscribe.transcribe EXIT")
                break

            except exceptions.OutOfRange:
                """ Google Cloud limits stream to about 5 minutes. Just loop. """
                continue
            except exceptions.DeadlineExceeded:
                """ Google Cloud limits stream to about 5 minutes. Just loop. """
                continue
Example #23
0
def transcribe_streaming(stream_file):
    """Streams transcription of the given audio file."""
    import io
    from google.cloud import speech

    client = speech.SpeechClient()

    # [START speech_python_migration_streaming_request]

    # In practice, stream should be a generator yielding chunks of audio data.
    stream = [stream_file]

    requests = (speech.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in stream)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    # [START speech_python_migration_streaming_response]
    responses = client.streaming_recognize(
        config=streaming_config,
        requests=requests,
    )
    # [END speech_python_migration_streaming_request]

    for response in responses:
        # Once the transcription has settled, the first result will contain the
        # is_final result. The other results will be for subsequent portions of
        # the audio.
        for result in response.results:
            print("Finished: {}".format(result.is_final))
            print("Stability: {}".format(result.stability))
            alternatives = result.alternatives
            # The alternatives are ordered from most likely to least.
            for alternative in alternatives:
                print("Confidence: {}".format(alternative.confidence))
                print(u"Transcript: {}".format(alternative.transcript))
    # [END speech_python_migration_streaming_response]
    return alternative.transcript
Example #24
0
def socket_stream(c):

    with NetworkAudioStream(SAMPLE_RATE, CHUNK_SIZE, c) as stream:

        data = c.recv(1024)  # Dummy Thread
        print('Headers', data, len(data), threading.currentThread().getName())

        client = speech.SpeechClient()
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=SAMPLE_RATE,
            language_code='en-IN',
            max_alternatives=1)
        streaming_config = speech.StreamingRecognitionConfig(
            config=config, interim_results=True)

        #Start data receiving thread to fill the buffer
        start_new_thread(read_network_stream, (
            c,
            stream,
        ))

        while not stream.closed:
            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            # Now, put the transcription responses to use.
            listen_print_loop(responses, stream)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1

            if not stream.last_transcript_was_final:
                sys.stdout.write('final-\n')
            stream.new_stream = True
    c.close()
Example #25
0
def main():

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=8000,
        language_code="en-IN")
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(8000, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        listen_print_loop(responses)
Example #26
0
def main():
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=in_language_code)
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)
    translate_client = translate.Client()
    speech_client = texttospeech.TextToSpeechClient()
    loop = asyncio.get_event_loop()

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)
        responses = client.streaming_recognize(streaming_config, requests)
        interpret(responses, translate_client, speech_client, loop)
Example #27
0
def main(lang):
    """start bidirectional streaming from microphone input to speech API"""

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=SAMPLE_RATE,
        language_code="en-US",
        max_alternatives=1,
    )
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
    sys.stdout.write('\nTranscribing:\n\n')
    sys.stdout.write("=====================================================\n")

    with mic_manager as stream:

        while not stream.closed:
            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(requests=requests,
                                                   config=streaming_config)

            # Now, put the transcription responses to use.
            listen_print_loop(responses, stream, lang)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1

            if not stream.last_transcript_was_final:
                sys.stdout.write("\n")
            stream.new_stream = True
Example #28
0
def listen_talk():
    text, client, streaming_config = gcp_talk.get_talk()
    with text as stream:

        while not stream.closed:

            stream.audio_input = []

            audio_generator = stream.generator()

            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            my_added_text = gcp_talk.listen_print_loop(responses, stream)
            if my_added_text is None:
                pass
            elif "おはよう" in my_added_text:
                ser.write('3'.encode('utf-8'))
                display("Test", "Speaking", "おはようございます")
                jtalk.jtalk("おはようございます")
            elif ("あなたの名前は" in my_added_text) or ("名前" in my_added_text):
                ser.write('3'.encode('utf-8'))
                display("Test", "Speaking", "おはようございますよろしくお願いします。")
                jtalk.jtalk("エントと申します。よろしくお願いします。")
            else:
                ser.write('3'.encode('utf-8'))
                reply = send_message(my_added_text)
                display("Test", "Speaking", reply)
                jtalk.jtalk(reply)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1
            stream.new_stream = True
            ser.write('2'.encode('utf-8'))
    def handle_request_order(self, goal):
        print("Request!!!")

        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = self.client.streaming_recognize(self.streaming_config, requests)

            # Now, put the transcription responses to use.
            received_order = self.listen_print_loop(responses)

            result = ReceiveTargetResult()
            success = True

            if success:
                result.result = True
                result.data = received_order
                self.order_server.set_succeeded(result)
Example #30
0
    def run(self):
        with MicrophoneStream(self._audio_interface, self._rate,
                              self._chunk) as stream:
            self._microphone_stream = stream
            audio_generator = stream.generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = self._client.streaming_recognize(
                self._streaming_config, requests)

            for response in responses:
                for result in response.results:
                    my_log.info(result)
                    for alternative in result.alternatives:
                        transcript = alternative.transcript
                        confidence = alternative.confidence if "confidence" in alternative else 0.01

                        if "is_final" in result and result.is_final:  # todo: Muss der text bei den is_final aneinander gehängt werden? testen!
                            if text_has_action(transcript, confidence):
                                stop_google()
                                self._microphone_stream = None  # must be after stop_google()
                                loop.call_soon_threadsafe(
                                    call_text_execute_action, transcript,
                                    confidence)
                                return

                        if any([
                                word.lower() in transcript.lower()
                                for word in settings.ABORT_WORDS
                        ]):
                            my_log.debug(
                                f"Google aborted because abort word in '{transcript}' was heard"
                            )
                            stop_google()
                            self._microphone_stream = None  # must be after stop_google()
                            return

        # for loop did not react: Not understood
        loop.call_soon_threadsafe(call_text_execute_action, "", 1)
        self._microphone_stream = None