def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'C:\Hackathon\\codeoverflow-08-10e832d41721.json'
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 2
0
    def start(self,window):
        self.window = window
        language_code = 'ko-KR'  # a BCP-47 language tag

        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=True)

        #무한으로 하려면 'while True :'
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            self.listen_print_loop(responses)
Ejemplo n.º 3
0
    def google_stt_streaming(self, socket_action):
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'ko-KR'  # a BCP-47 language tag

        # for content in comuni.get_data(client_record):
        #     print("Type >> {}".format(type(content)))

        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        streaming_config = types.StreamingRecognitionConfig(
            config=config, interim_results=True)
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in socket_action.get_data())

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        stt_text = self.listen_print_loop(responses)
        return stt_text
Ejemplo n.º 4
0
def main():
    # language_code = 'en-US'  # a BCP-47 language tag te-IN en-IN
    language_code = 'en-IN'  # a BCP-47 language tag te-IN en-IN
    credentials = service_account.Credentials. from_service_account_file('googleKeys.json')
    client = speech.SpeechClient(credentials=credentials)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        print("inside stream")
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 5
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'zh'  # a BCP-47 language tag 'zh' 'ja-JP'
    passage = passages.chinese
    passageIndex = 0
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        finals = read(responses, passage, 'zh')
Ejemplo n.º 6
0
def main():
   
    language_code = 'zh-TW'
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
            config=config,
            #自己測試
            # single_utterance=True,
            interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 7
0
def main(category_index):
    category_index = [val['name'] for val in category_index.values()]
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    print("##########Begining Stream##########")
    with MicrophoneStream(RATE, CHUNK) as stream:

        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        return listen_print_loop(responses, stream, category_index)
Ejemplo n.º 8
0
    def __init__(self, callback):

        Thread.__init__(self)
        self.callback = callback
        self.client = speech.SpeechClient()

        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'en-US'  # a BCP-47 language tag

        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)

        self.streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=True)


        while True:
            print("Listen again?")
            self.listen()
Ejemplo n.º 9
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_automatic_punctuation=True)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 10
0
    def __init__(self,
                 credentials=None,
                 RATE=16000,
                 CHUNK=1024,
                 language_code='en-US'):
        self.RATE = RATE
        self.CHUNK = CHUNK
        self.language_code = language_code
        if (credentials != None):
            credentials = service_account.Credentials.from_service_account_file(
                credentials)
        self.client = speech.SpeechClient(credentials=credentials)

        #TODO: speech_contexts -> https://cloud.google.com/speech-to-text/docs/basics#phrase-hints
        self.config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code,
            model='command_and_search',
            enable_automatic_punctuation=True)

        self.streaming_config = types.StreamingRecognitionConfig(
            config=self.config, interim_results=True, single_utterance=True)
Ejemplo n.º 11
0
    def __init__(self, engine, args, loop):

        Thread.__init__(self)
        self.client = speech.SpeechClient()
        self.args = args
        self.engine = engine
        self.stop_recognition = False
        self.loop = loop
        self.role = "UNKOWN"
        self.crash = False

        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'en-US'  # a BCP-47 language tag

        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)

        self.streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=True)
Ejemplo n.º 12
0
def main():
    procs = []
    #화면에 화자가 있고 출력할 문자열이 있는지 확인하는 Queue
    q = Queue()

    #작은 말풍선을 출력할지 큰 말풍선을 출력할지 결정하는 Queue
    bubble_q = Queue()
    #멀티 프로세싱으로 frame출력하는 프로그램 start
    proc = Process(target=visualize_frame, args=(
        q,
        bubble_q,
    ))
    procs.append(proc)
    proc.start()

    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag

    #마이크로 들어오는 speech를 google cloud platform의 speech to text api 호출하여 말풍선에 문자열 저장
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses, q, bubble_q)
Ejemplo n.º 13
0
def transcribe_streaming(stream_file):
    """Streams transcription of the given audio file."""
    import io
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    client = speech.SpeechClient()

    with io.open(stream_file, 'rb') as audio_file:
        content = audio_file.read()

    # In practice, stream should be a generator yielding chunks of audio data.
    stream = [content]
    requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                for chunk in stream)

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=16000,
        language_code='es-es')
    streaming_config = types.StreamingRecognitionConfig(config=config)

    # streaming_recognize returns a generator.
    responses = client.streaming_recognize(streaming_config, requests)

    for response in responses:
        # Once the transcription has settled, the first result will contain the
        # is_final result. The other results will be for subsequent portions of
        # the audio.
        for result in response.results:
            print('Finished: {}'.format(result.is_final))
            print('Stability: {}'.format(result.stability))
            alternatives = result.alternatives
            # The alternatives are ordered from most likely to least.
            for alternative in alternatives:
                print('Confidence: {}'.format(alternative.confidence))
                print(u'Transcript: {}'.format(alternative.transcript))
Ejemplo n.º 14
0
    def stream_speach(self):
        try:
            if get_env_value('DEVICE') == 'PI':
                from lib.PiControls import PiControls
                pi = PiControls()
                pi.flash_blue()
            else:
                print("sorry! can't blink blue you don't have pi")

            print('live speech recognition started')
            print(threading.enumerate())
            # See http://g.co/cloud/speech/docs/languages
            # for a list of supported languages..
            language_code = 'en-US'  # a BCP-47 language tag
            credentials = service_account.Credentials.from_service_account_file(
                'google-cloud.json')
            client = speech.SpeechClient(credentials=credentials)
            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=RATE,
                language_code=language_code)
            streaming_config = types.StreamingRecognitionConfig(
                config=config,
                interim_results=True)

            with MicrophoneStream(RATE, CHUNK) as stream:
                audio_generator = stream.generator()
                requests = (types.StreamingRecognizeRequest(audio_content=content)
                            for content in audio_generator)

                responses = client.streaming_recognize(streaming_config, requests)

                # Now, put the transcription responses to use.
                self.listen_print_loop(responses)
        except:
            print('exception occured')
            self.stream_speach()
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    # 언어 설정코드 변경 가능
    language_code = 'ko-KR'  # a BCP-47 language tag

    # 언어 데이터셋을 Snips NLU에 넣어주기
    with io.open("./lights_dataset_train_ko.json", encoding="utf8") as f:
        sample_dataset = json.load(f)

    nlu_engine = SnipsNLUEngine(config=CONFIG_KO)
    nlu_engine = nlu_engine.fit(sample_dataset)

    print("성공")

    # 전처리 종료

    # 음성인식 시작

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        # 무한루프 시작
        listen_print_loop(responses, nlu_engine)
Ejemplo n.º 16
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    # this code comes from Google Cloud's Speech to Text API!
    # Check out the links in your handout. Comments are ours.
    language_code = 'en-US'  # a BCP-47 language tag
    intro()

    #set up a client
    #make sure GCP is aware of the encoding, rate
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    #our example uses streamingrecognition - most likely what you will want to use.
    #check out the simpler cases of asychronous recognition too!
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    #this section is where the action happens:
    #a microphone stream is set up, requests are generated based on
    #how the audiofile is chunked, and they are sent to GCP using
    #the streaming_recognize() function for analysis. responses
    #contains the info you get back from the API.
    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        #### Save audio recording ####

        ######
        # Now, put the transcription responses to use.
        listen_print_loop(responses, stream)
Ejemplo n.º 17
0
def sub_main(profanityFilterBool):
    """
    *** Code taken from Google Cloud Speech to text documentation ***
    Turns on the profanity filter so bad words are censored and not printed
    """
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    sp_c_cico = {"phrases": ["cico"],"boost": 20} #speech_contexts_cico
    #sp_c_kiko = {"phrases": ["Kiko"],"boost": 0}#speech_contexts_kiko
    speech_contexts = [sp_c_cico]
    client = speech.SpeechClient()
    #print(help(types.RecognitionConfig))
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_automatic_punctuation=True,
        speech_contexts=speech_contexts)

    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:

        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        # Now, put the transcription responses to use.
        solution = returnResponseString(responses) #solution is the result

        append_to_file("log.txt",str(solution))

    return solution
Ejemplo n.º 18
0
    def __init__(self, parent=None):
        super(VoiceRecognizer, self).__init__(parent)
        language_code = 'en-US'  # a BCP-47 language tag
        self.confirm_signal = pyqtSignal()
        self.client = speech.SpeechClient()
        self.config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code,
            speech_contexts=[SPEECH_CONTEXT])
        self.streaming_config = types.StreamingRecognitionConfig(
            config=self.config, interim_results=True)

        self.home_pub = rospy.Publisher('/dvrk/console/home',
                                        Empty,
                                        latch=True,
                                        queue_size=1)  # HOME command publisher
        self.poff_pub = rospy.Publisher(
            '/dvrk/console/power_off', Empty, latch=True,
            queue_size=1)  # POWER OFF command publisher
        self.pon_pub = rospy.Publisher(
            '/dvrk/console/power_on', Empty, latch=True,
            queue_size=1)  # POWER ON command publisher
        self.tenable_pub = rospy.Publisher(
            '/dvrk/console/teleop/enable', Bool, latch=True,
            queue_size=1)  # TELEOP ENABLE command publisher
        self.tscale = rospy.Publisher(
            '/dvrk/console/teleop/scale', Float32, latch=True,
            queue_size=1)  # TELEOP SCALE command publisher
        self.tset_scale = rospy.Publisher(
            '/dvrk/console/teleop/set_scale',
            Float32,
            latch=True,
            queue_size=1)  # TELEOP SET SCALE command publisher

        self.exit = False
Ejemplo n.º 19
0
def main():
    """The main event."""
    GPIOSetup.setup()

    language_code = 'en-US'  # a BCP-47 language tag
    dir_path = os.getcwd()
    gc_key_file = 'gc_private_key.json'
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = '{}/{}'.format(
        dir_path, gc_key_file)

    # Speech-To-Text and Text-To-Speech clients config
    tts_client = texttospeech.TextToSpeechClient()
    stt_client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    # Wait for an internet connection.
    wait_internet_conn()
    stream_audio(tts_client, stt_client, streaming_config)
    main()
Ejemplo n.º 20
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    # language_code = 'ru-RU'  # a BCP-47 language tag
    os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "C:\\Users\\RealityShift24\\TooLazyForPPTX-3785c34de4cc.json"
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 21
0
def setup_and_run():
    """
    Sets up the speech reognition API calls and runs
    """
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag
    # Set up the client and configuration.
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)
    os.system('omxplayer speech_recogniton/Twinkle-sound-effect.mp3')
    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        return listen_and_translate(responses)
Ejemplo n.º 22
0
def main():
    rospy.init_node('Speech_node')
    language_code = 'en-US'
    if_restart = True
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts = [{"phrases":["attach","move","make","get","grab","take","pick","select","put","object","item","objects",
                                       "items","one","ones","cube","cubes","blocks","block","guys","guy","here","there","place",
                                       "location","position","yellow","green","blue","this","that","these","those","targets","target",
                                       "it", "transport"]}])
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)
    while if_restart:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            if_restart = listen_print_loop(responses)
Ejemplo n.º 23
0
    def __init__(self):
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'ko-KR'  # a BCP-47 language tag
        os.environ[
            "GOOGLE_APPLICATION_CREDENTIALS"] = "/home/pi/posvacpjt-251711-c6df951f8f17.json"

        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        streaming_config = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            self.listen_print_loop(responses)
Ejemplo n.º 24
0
def main():
    rospy.init_node(name='transcription_node')

    rospy.loginfo('Registering as publisher for /mpstate/transcription')
    trans_pub = rospy.Publisher('mpstate/transcription', TranscriptionResult, queue_size=10, latch=True)

    # Some short snippets which are likely to be told to Marco.
    marco_phrases = ['hey marco', 'where is the', 'go get the']

    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[{"phrases": marco_phrases}]
        )

    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_publish_loop(responses, trans_pub)

    rospy.spin()

    return
Ejemplo n.º 25
0
def main(sample_rate):
    stopFlag = False
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=sample_rate,
        language_code=language_code,
        max_alternatives=1)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(sample_rate, int(sample_rate / 10)) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        stopFlag = listen_print_loop(responses)
def start():
    client = speech.SpeechClient()

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=LANGUAGE)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        single_utterance=True,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config,
                                               requests,
                                               timeout=TIMEOUT)

        # Now, put the transcription responses to use.
        command = get_command(responses)
        print(command)
        return (command)
Ejemplo n.º 27
0
    def __init__(self):
        global SpeechClient, types, enums, Credentials
        from google.cloud.speech import SpeechClient, types, enums
        from google.oauth2.service_account import Credentials

        super(GoogleCloudStreamingSTT, self).__init__()
        # override language with module specific language selection
        self.language = self.config.get('lang') or self.lang
        credentials = Credentials.from_service_account_info(
            self.credential.get('json'))

        self.client = SpeechClient(credentials=credentials)
        recognition_config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=16000,
            language_code=self.language,
            model='command_and_search',
            max_alternatives=1,
        )
        self.streaming_config = types.StreamingRecognitionConfig(
            config=recognition_config,
            interim_results=True,
            single_utterance=True,
        )
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag en-US
    cmd_add()
    credential = "/home/pi/Downloads/logical-carver-277605-91fe57214bc9.json"
    client = speech.SpeechClient(credentials="/home/pi/Downloads/logical-carver-277605-91fe57214bc9.json")
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        
        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 29
0
def transcribe_streaming():
    """Streams transcription of the given audio file."""
    client = speech.SpeechClient()
    CHUNK = 1024 #measured in bytes
    FORMAT = pyaudio.paInt16
    CHANNELS = 1
    RATE = 16000 #common sampling frequency
    RECORD_SECONDS=5
    file="test.rav"
    p = pyaudio.PyAudio()
    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        audioStream = p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        frames_per_buffer=CHUNK)
        content=audioStream.read(CHUNK)
        # In practice, stream should be a generator yielding chunks of audio data.
        stream = [content]
        requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                    for chunk in stream)
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=16000,
            language_code='en-US')
        streaming_config = types.StreamingRecognitionConfig(config=config)
        # streaming_recognize returns a generator.
        responses = client.streaming_recognize(streaming_config, requests)
        for response in responses:
            for result in response.results:
                print('Finished: {}'.format(result.is_final))
                print('Stability: {}'.format(result.stability))
                alternatives = result.alternatives
                for alternative in alternatives:
                    print('Confidence: {}'.format(alternative.confidence))
                    print('Transcript: {}'.format(alternative.transcript))
Ejemplo n.º 30
0
def main(projectID):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)
    print(
        "Begin speaking to see deidentified text (stream only lasts for 65 seconds)..."
    )
    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_dlp_loop(responses, projectID)