Esempio n. 1
0
def call_runner(limit_cnt=1):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "ko-KR"  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        movie_dict_list = listen_print_loop(responses, stream, limit_cnt)
    print("====", movie_dict_list)

    # call_runner()
    return movie_dict_list
Esempio n. 2
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag

    client = speech.SpeechClient.from_service_account_json(
        "../creds/speech_google_credentials.json")
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Esempio n. 3
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ja-JP'  # a BCP-47 language tag
    import os
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "speech-rec-827143ff9a4c.json"
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = speech.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    print("start rec")
    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
       
       
        for s in listen_print_loop(responses):
                # voiceroid.say(s)
                print(s)
Esempio n. 4
0
def transcribe(transcript, q):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicStream(RATE, CHUNK) as stream:
        generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # quit_transcript = [False]

        # while 1:
        #     transcription(responses, quit_transcript)

        #     if quit_transcript[0]:
        #         print("Exit")
        #         break

        # Now, put the transcription responses to use.
        listen_print_loop(responses, transcript, q)
Esempio n. 5
0
def main():
    language_code = 'ko-KR'
    print('AI 스피커가 동작하는데 시간이 걸립니다. 잠시만 기다려 주세요.')

    client = speech.SpeechClient()

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    playsound('../../open_sound/open.wav')
    print('원하시는 것을 말씀 해주세요.')

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)
        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        path = r'C:\Users\leewo\hci_test\err_sound'
        sounds = os.listdir(path)
        for sound in sounds:
            listen_print_loop(responses, sound)
Esempio n. 6
0
    def __init__(
        self,
        language: str,
        credentials: Union[None, str, dict] = None,
        sample_rate: int = 16000,
        **kwargs: Any,
    ) -> None:
        if credentials:
            if isinstance(credentials, str):
                credentials = service_account.Credentials.from_service_account_file(
                    credentials)
            elif isinstance(credentials, dict):
                credentials = service_account.Credentials.from_service_account_info(
                    credentials)
            else:
                raise ValueError(
                    "Invalid Credentials: Only dict, str, or None accepted")

        self._client = speech.SpeechClient(credentials=credentials)
        self._config = speech.StreamingRecognitionConfig(
            config=speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=sample_rate,
                language_code=language,
                enable_automatic_punctuation=True,
            ),
            interim_results=True,
        )
        self._queue: Queue = Queue()
        self._thread: Any = None
Esempio n. 7
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag

    Labeling = speech.SpeakerDiarizationConfig(
        enable_speaker_diarization=True,
        min_speaker_count=1,
        max_speaker_count=5,
    )

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_automatic_punctuation=True,
        diarization_config=Labeling,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Esempio n. 8
0
    def create_speech_config():

        phrases = []

        if (len(confvars.G_PHRASES_PATH) != 0):
            with open(confvars.G_PHRASES_PATH,
                      "r",
                      encoding=confvars.G_PHRASES_ENCODING) as fp:
                for line in fp:
                    if line:
                        phrases.append(line.strip().encode(
                            'ascii', 'ignore').decode('ascii'))
        else:
            glbl.main_logger.info(
                f"Phrases file {confvars.G_PHRASES_PATH} is null.")

        glbl.main_logger.info(f"phrases as context, num={len(phrases)}")

        speech_context = speech.SpeechContext(
            phrases=phrases[:confvars.G_MAX_PHRASES])

        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=confvars.G_AUD_SAMPLING_RATE,
            enable_word_time_offsets=False,
            model='video',
            profanity_filter=True,
            enable_automatic_punctuation=True,
            speech_contexts=[speech_context],
            language_code=confvars.G_LANGUAGE_CODE)

        speech_config = speech.StreamingRecognitionConfig(config=config,
                                                          interim_results=True)

        return speech_config
Esempio n. 9
0
    def __init__(self, audio_device):
        self.is_supported = is_supported
        if not self.is_supported:
            return

        self.audio_device = audio_device

        self.responseQueue = Queue(maxsize=100)

        self.credentials_json = speakreader.CONFIG.GOOGLE_CREDENTIALS_FILE

        self.client = speech.SpeechClient.from_service_account_json(
            self.credentials_json)

        self.recognition_config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=audio_device._outputSampleRate,
            language_code="en-US",
            max_alternatives=1,
            enable_word_time_offsets=False,
            enable_automatic_punctuation=True,
            profanity_filter=bool(speakreader.CONFIG.ENABLE_CENSORSHIP),
        )

        self.streaming_config = speech.StreamingRecognitionConfig(
            config=self.recognition_config,
            interim_results=True,
        )
Esempio n. 10
0
    def download_audio_and_transcribe(self, recording_url: str) -> str:
        transcription: str = ""
        self.connect(destination="speech")
        response = requests.get(url=recording_url, stream=True)

        reqs = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in response.iter_content())
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=8000,
            language_code="en-US",
        )
        streaming_config = speech.StreamingRecognitionConfig(config=config)

        responses = self.speech_client.streaming_recognize(config=streaming_config, requests=reqs,)

        for response in responses:
            # Once the transcription has settled, the first result will contain the
            # is_final result. The other results will be for subsequent portions of
            # the audio.
            for result in response.results:
                # print("Finished: {}".format(result.is_final))
                # print("Stability: {}".format(result.stability))
                alternatives = result.alternatives
                # The alternatives are ordered from most likely to least.
                for alternative in alternatives:
                    # print("Confidence: {}".format(alternative.confidence))
                    transcription = u"{}".format(alternative.transcript)

        return transcription
Esempio n. 11
0
def stt():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "en-US"  # a BCP-47 language tag
    #language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True,
                                                         single_utterance=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        # print("here is final text:", end = "")
        return listen_print_loop(responses)  # return final text
Esempio n. 12
0
def main():

    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = speech.StreamingRecognitionConfig(
        config=config,
        #  single_utterance=True 파라미터 추가함 --> single spoken utterance만 인지해서 응답해줌
        # 중간에 말을 멈추거나 하면 스트리밍인식을 종료함 --> 스피커소리 다시 인식 안하게됨
        #single_utterance=True,
        # false로 바꿧어. 이렇게 바꾸면 is_final 이 true인것만 반환함
        interim_results=True)


    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # listen_print_loop가 리턴해도 다시 실핼될 수 있도
        listen_print_loop(responses)
        print('main: finished listen_print_loop')
Esempio n. 13
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "ja-JP"  # a BCP-47 language tag

    credential_path = "./aitraining-306004-2e354d0f5ba9.json"

    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credential_path

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
    )

    streaming_config = speech.StreamingRecognitionConfig(
        config=config, interim_results=True
    )

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (
            speech.StreamingRecognizeRequest(audio_content=content)
            for content in audio_generator
        )

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Esempio n. 14
0
def main(lacalut):
    language_code = "uk-UA"

    client = speech.SpeechClient()

    interaction_type = speech.RecognitionMetadata.InteractionType.DICTATION

    metadata = speech.RecognitionMetadata(interaction_type=interaction_type)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        metadata=metadata,
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        listen_print_loop(responses, lacalut)
Esempio n. 15
0
 def setup(self):
     self.client = gspeech.SpeechClient()
     config = gspeech.RecognitionConfig(
         encoding=gspeech.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=self.rate,
         language_code=self.language,
     )
     self.streaming_config = gspeech.StreamingRecognitionConfig(
         config=config, interim_results=True)
Esempio n. 16
0
def main():
    """start bidirectional streaming from microphone input to speech API"""

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=SAMPLE_RATE,
        language_code="ko-KR", #en-US
        max_alternatives=1,
    )

    streaming_config = speech.StreamingRecognitionConfig(
        config=config, interim_results=True
    )

    mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
    print(mic_manager.chunk_size)
    sys.stdout.write(YELLOW)
    sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
    sys.stdout.write("End (ms)       Transcript Results/Status\n")
    sys.stdout.write("=====================================================\n")

    with mic_manager as stream:

        while not stream.closed:
            sys.stdout.write(YELLOW)
            sys.stdout.write(
                "\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
            )

            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (
                speech.StreamingRecognizeRequest(audio_content=content)
                for content in audio_generator
            )

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            return_word = listen_print_loop(responses, stream)
            print(return_word)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1

            if not stream.last_transcript_was_final:
                sys.stdout.write("\n")
            stream.new_stream = True
    
        return return_word
Esempio n. 17
0
    def start(self, auto_turn=True, reset_result=False):
        # auto_turnがTrueの場合自動でターンテイキングを行う
        # 基本的にはTrueで良い
        self.print_debug("start Google ASR")
        if reset_result == True:
            self.recognition_result = ""
        self.is_listening = False
        self.utt_start_time = None
        self.turn_start_time = time.time()
        turn_thread_flag = False
        client = speech.SpeechClient()
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=SAMPLE_RATE,
            language_code="ja-JP",
            max_alternatives=1,
        )
        # streaming_config = speech.StreamingRecognitionConfig(
        #     config=config, interim_results=True)
        # single_utterance設定をオン
        # オンにしないと音声認識結果が確定するまで60秒かかる
        streaming_config = speech.StreamingRecognitionConfig(
            config=config, interim_results=True, single_utterance=True)
        mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
        with mic_manager as self.stream:
            while not self.stream.closed:
                self.stream.audio_input = []
                audio_generator = self.stream.generator()
                requests = (speech.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                if (turn_thread_flag == False) and (auto_turn == True):
                    turn_thread_flag = True
                    self.m_turn.start_turn_thread()

                responses = client.streaming_recognize(streaming_config,
                                                       requests)

                # 音声認識結果を取得するループ開始
                self.listen_loop(responses)

                # streamリミットを超えた場合の音声認識再開処理
                # 4分stream繋ぎ続けない限り実行されない
                if self.stream.result_end_time > 0:
                    self.stream.final_request_end_time = self.stream.is_final_end_time
                self.stream.result_end_time = 0
                self.stream.last_audio_input = []
                self.stream.last_audio_input = self.stream.audio_input
                self.stream.audio_input = []
                self.stream.restart_counter = self.stream.restart_counter + 1

                if not self.stream.last_transcript_was_final:
                    sys.stdout.write("\n")
                self.stream.new_stream = True
        self.file_num += 1
Esempio n. 18
0
    def set_speech(self):
        '''
        Set the variables that needed for streaming_recognize
        '''
        self.client = speech.SpeechClient()

        self.config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=self.RATE,
            language_code=self.language_code)

        self.streaming_config = speech.StreamingRecognitionConfig(
            config=self.config, interim_results=True)
    def recognize_async_audio_stream(self, language_code="en-US"):
        """
        Recognize in "real-time" from microphone stream. 
        Returns when a final_result is found.
        
        May be created as a thread of its own or it'll block until a final result is found.
        
        Stores all results in the `final_result_queue` queue.

        Args:
            language_code -- language to use for recognition. See `languages` for supported languages.   
        """
        if language_code not in self.languages:
            print(
                '\"{}\" is not a supported language code. Make sure it\'s supported by Google and try adding adding it to the languages list.\n'
                .format(language_code))
            return

        self.final_result_queue.queue.clear(
        )  # Clear all items in queue for new stream.

        config_stream = speech.StreamingRecognitionConfig(
            config=speech.RecognitionConfig(
                encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=self.microphone_handler.RATE,
                language_code=language_code,
                enable_automatic_punctuation=True,
            ),
            interim_results=True)

        self.microphone_handler.start_recording(streaming=True)
        while self.microphone_handler.streaming:
            data = self.microphone_handler.stream_generator()
            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in data)

            try:
                responses = self.client.streaming_recognize(
                    config_stream, requests)
                for response in responses:
                    self.final_result_queue.put(response.results[0])
                    if response.results[0].is_final:
                        return  # Stops more recordings than one. Doesn't halt after recording is done. (temp)
                    if self.debug:
                        print(
                            response.results[0].alternatives[0].transcript +
                            '\n'
                        )  # Print all non final results in terminal(debug).
            except:
                print('Failed to get response.')
Esempio n. 20
0
    async def start_transcriptions_stream(self, call_sid: str):
        from_number = redisController.get(key=call_sid)
        await self.connect(destination="speech")
        
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.MULAW,
            sample_rate_hertz=8000,
            language_code="en-US",
        )
        streaming_config = speech.StreamingRecognitionConfig(config=config, interim_results=True)

        self.stream_queue = queue.Queue()
        thread = threading.Thread(target=self.send_to_google, args=(streaming_config,from_number,))
        thread.start()
Esempio n. 21
0
def get_talk():
    """start bidirectional streaming from microphone input to speech API"""

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=SAMPLE_RATE,
        language_code='ja-JP',
        max_alternatives=1)
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
    return mic_manager, client, streaming_config
    '''
def main():
    """start bidirectional streaming from microphone input to speech API"""

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=SAMPLE_RATE,
        language_code="ja-JA",
        max_alternatives=1,
    )

    streaming_config = speech.StreamingRecognitionConfig(
        config=config, interim_results=True
    )

     '''
Esempio n. 23
0
 def get_stream_config(self):
     cfg = speechclient.RecognitionConfig(
         dict(
             sample_rate_hertz=DEFAULT_SAMPLE_RATE,
             encoding=speechclient.RecognitionConfig.AudioEncoding.LINEAR16,
             audio_channel_count=2,
             language_code='en-US',
             model='default',
             use_enhanced=False,
         ))
     return speechclient.StreamingRecognitionConfig(
         dict(
             config=cfg,
             single_utterance=False,
             interim_results=True,
         ))
Esempio n. 24
0
def transcribe_streaming(stream_file):
    """Streams transcription of the given audio file."""
    import io
    from google.cloud import speech

    client = speech.SpeechClient()

    # [START speech_python_migration_streaming_request]

    # In practice, stream should be a generator yielding chunks of audio data.
    stream = [stream_file]

    requests = (speech.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in stream)

    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code="en-US",
    )

    streaming_config = speech.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    # [START speech_python_migration_streaming_response]
    responses = client.streaming_recognize(
        config=streaming_config,
        requests=requests,
    )
    # [END speech_python_migration_streaming_request]

    for response in responses:
        # Once the transcription has settled, the first result will contain the
        # is_final result. The other results will be for subsequent portions of
        # the audio.
        for result in response.results:
            print("Finished: {}".format(result.is_final))
            print("Stability: {}".format(result.stability))
            alternatives = result.alternatives
            # The alternatives are ordered from most likely to least.
            for alternative in alternatives:
                print("Confidence: {}".format(alternative.confidence))
                print(u"Transcript: {}".format(alternative.transcript))
    # [END speech_python_migration_streaming_response]
    return alternative.transcript
Esempio n. 25
0
def socket_stream(c):

    with NetworkAudioStream(SAMPLE_RATE, CHUNK_SIZE, c) as stream:

        data = c.recv(1024)  # Dummy Thread
        print('Headers', data, len(data), threading.currentThread().getName())

        client = speech.SpeechClient()
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=SAMPLE_RATE,
            language_code='en-IN',
            max_alternatives=1)
        streaming_config = speech.StreamingRecognitionConfig(
            config=config, interim_results=True)

        #Start data receiving thread to fill the buffer
        start_new_thread(read_network_stream, (
            c,
            stream,
        ))

        while not stream.closed:
            stream.audio_input = []
            audio_generator = stream.generator()

            requests = (speech.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            # Now, put the transcription responses to use.
            listen_print_loop(responses, stream)

            if stream.result_end_time > 0:
                stream.final_request_end_time = stream.is_final_end_time
            stream.result_end_time = 0
            stream.last_audio_input = []
            stream.last_audio_input = stream.audio_input
            stream.audio_input = []
            stream.restart_counter = stream.restart_counter + 1

            if not stream.last_transcript_was_final:
                sys.stdout.write('final-\n')
            stream.new_stream = True
    c.close()
Esempio n. 26
0
def main():

    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=8000,
        language_code="en-IN")
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)

    with MicrophoneStream(8000, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        listen_print_loop(responses)
Esempio n. 27
0
def main():
    client = speech.SpeechClient()
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=in_language_code)
    streaming_config = speech.StreamingRecognitionConfig(config=config,
                                                         interim_results=True)
    translate_client = translate.Client()
    speech_client = texttospeech.TextToSpeechClient()
    loop = asyncio.get_event_loop()

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (speech.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)
        responses = client.streaming_recognize(streaming_config, requests)
        interpret(responses, translate_client, speech_client, loop)
    def __init__(self):

        self.order_server = actionlib.SimpleActionServer('sst_order_received', ReceiveTargetAction, self.handle_request_order, False)
        # self.sub_detect = rospy.Subscriber('detected_object', Result, self.handle_detector_result)
        # self.server = actionlib.SimpleActionServer('qrcode_detect', ReceiveTargetAction, self.handle_request_detect, False)
        language_code = 'ko-KR'  # a BCP-47 language tag

        self.client = speech.SpeechClient()
        config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        self.streaming_config = speech.StreamingRecognitionConfig(
            config=config,
            interim_results=True)

        self.order_server.start()
        rospy.loginfo('[%s] initialized...'%rospy.get_name())
Esempio n. 29
0
    def _process(self):
        """Handles the setup of transcription request, and retreving audio chunks in queue."""
        client = speech.SpeechClient()

        config = speech.RecognitionConfig(encoding=self.encoding,
                                          sample_rate_hertz=self.sample_rate,
                                          language_code=self.language_code)

        streaming_config = speech.StreamingRecognitionConfig(
            config=config, interim_results=True)

        # Give it a config and a generator which procduces audio chunks. in return
        # you get an iterator of results
        responses = client.streaming_recognize(streaming_config,
                                               self.generator())

        # This will block until there's no more interim translation results left
        for response in responses:
            self.result_queue.put(self._response_to_dict(response))
Esempio n. 30
0
    def __init__(self):
        os.environ[
            "GOOGLE_APPLICATION_CREDENTIALS"] = "/home/mj/PycharmProjects/speech-309618-0f83735de272.json"
        self.client = speech.SpeechClient()
        self.config = speech.RecognitionConfig(
            encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=SAMPLE_RATE,
            language_code="en-US",
            max_alternatives=1,
        )

        self.streaming_config = speech.StreamingRecognitionConfig(
            config=self.config, interim_results=True)

        sys.stdout.write(YELLOW)
        sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
        sys.stdout.write("End (ms)       Transcript Results/Status\n")
        sys.stdout.write(
            "=====================================================\n")