Ejemplo n.º 1
0
def record():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        complete_response = listen_print_loop(responses)

        while complete_response == '-1':
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            complete_response = listen_print_loop(responses)



        # Now, put the transcription responses to use.
        return complete_response 
Ejemplo n.º 2
0
    def start_recognize(self):
        with MicrophoneStream(RATE, CHUNK) as stream:
            self.audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in self.audio_generator)

            self.responses = self.client.streaming_recognize(
                self.streaming_config, requests)

            # Now, put the transcription responses to use.
            try:
                while not self.exit:
                    self.listen_print_loop(self.responses)

                    # Discard this stream and create a new one.
                    # Note: calling .cancel() doesn't immediately raise an RpcError
                    # - it only raises when the iterator's next() is requested
                    self.responses.cancel()
                    self.audio_generator = stream.generator()
                    logging.debug('Starting new stream')
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content)
                                for content in self.audio_generator)

                    self.responses = self.client.streaming_recognize(
                        self.streaming_config, requests)

                    if self.exit:
                        break

            except grpc.RpcError:
                # This happens because of the interrupt handler
                print "error in GPRC"
                pass
Ejemplo n.º 3
0
def main_2():
    import os
    os.environ[
        "GOOGLE_APPLICATION_CREDENTIALS"] = "/home/aleks/gcp/speechtotext-281122-7d0875e1d1ca.json"
    print(os.environ['GOOGLE_APPLICATION_CREDENTIALS'])
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        #audio_generator = stream.generator()
        audio_generator = stream.generator_from_files('./data/rssd_A.wav',
                                                      './data/rssd_B.wav')

        #test_requests = (content[0] for content in audio_generator)
        #test_sample = next(test_requests)
        #test_sample = cis.convert_toPyAudio(test_sample)
        #test_sample = test_sample.tostring()
        #print(type(test_sample))
        #print(len(test_sample))

        #print(test_sample.shape)
        requests_0 = (types.StreamingRecognizeRequest(
            audio_content=cis.convert_toPyAudio(content[0]).tostring())
                      for content in audio_generator)
        #while True:
        #try:
        #next(requests_0)
        #except Exception as err:
        #print("exception"+str(err))
        #break

        responses_0 = client.streaming_recognize(streaming_config, requests_0)

        # Now, put the transcription responses to use.
        listen_print_loop(responses_0)

        audio_generator = stream.generator_from_files('./data/rssd_A.wav',
                                                      './data/rssd_B.wav')
        requests_1 = (types.StreamingRecognizeRequest(
            audio_content=cis.convert_toPyAudio(content[1]).tostring())
                      for content in audio_generator)

        responses_1 = client.streaming_recognize(streaming_config, requests_1)

        # Now, put the transcription responses to use.
        listen_print_loop(responses_1)
Ejemplo n.º 4
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        transcription = listen_print_loop(responses)

        client = nexmo.Client(
            application_id = '826a404d-9c0d-49d5-ad96-63f39d07bb49',
            private_key = 'private.key'
        )

        response = client.create_call({
            'to': [{'type':'phone', 'number': '447519432230'}],
            'from': {'type': 'phone', 'number': '447418343967'},
            # 'answer_url': ['https://developer.nexmo.com/ncco/tts.json']
            'ncco': [{'action':'talk', 'text':transcription}]
        })
Ejemplo n.º 5
0
def main2():

    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.

        print("here")

        listen_print_loop(responses)
        time.sleep(0.01)
        print("here2")
Ejemplo n.º 6
0
def main():

    mode = choose_mode()
    #WORD LIST
    cues = get_word_list(mode)
    launchppt()
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        listen_for_cue(responses, cues, mode)
Ejemplo n.º 7
0
    def run(self):
        syslog.syslog("starting: " + self.name)
        self.client = speech.SpeechClient()
        while self.running:
            stream = self.dataStream(self)
            #syslog.syslog(self.name+"for chunk in stream")
            #for chunk in stream:
            #  syslog.syslog(self.name+" chunk size:"+str(len(chunk)))
            requests = (types.StreamingRecognizeRequest(audio_content=chunk)
                        for chunk in stream)
            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=44100,
                language_code='en-US')
            streaming_config = types.StreamingRecognitionConfig(
                config=config, interim_results=True)

            # streaming_recognize returns a generator.
            try:
                responses = self.client.streaming_recognize(
                    streaming_config, requests)
                for response in responses:

                    for result in response.results:
                        self.anal.queue.put(result)

            except Exception as e:
                print("%s: exception %s" % (self.name, e))
                if str(e).find("iterating requests") == -1:
                    traceback.print_exc()
                else:
                    print("%s: %s" % (self.name, e))
                    self.running = False
Ejemplo n.º 8
0
def bigBrother():
    #Google Imports - Cant have them leaking
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types
    #Begin google code
    language_code = 'en-US'

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)
        responses = client.streaming_recognize(streaming_config, requests)
        for response in responses:
            if response.results[
                    0].is_final:  #Checks if the word is the most likely...
                searchSkills(response.results[0].alternatives[0].transcript
                             )  #check if its right
                return  #Bail out!
Ejemplo n.º 9
0
    def startStream(self):
        def read(responses, passage):
            missed = []
            passage_index = 0
            self.passage_label = str(".\n".join(passage[passage_index:]) + ".")
            for response in responses:
                if not response.results:
                    continue
                result = response.results[0]
                if not result.alternatives:
                    continue
                if result.is_final:
                    print(result.alternatives[0].transcript)
                    print(
                        passageCheck(passage[passage_index],
                                     result.alternatives[0].transcript))
                    comp_result = passageCheck(
                        passage[passage_index],
                        result.alternatives[0].transcript)
                    missed += comp_result[0]
                    if not comp_result[1]:
                        passage_index += 1
                        self.ids.repeat_button.disabled = True
                    else:
                        passage[passage_index] = " ".join(comp_result[1])
                        generatePronun(comp_result[1][0])
                        self.ids.repeat_button.disabled = False
                        missed += [comp_result[1][0]]
                        self.help_label = str(
                            "Tip: " + " ".join(getWord(
                                comp_result[1][0])))  # call dictionary lookup
                    self.input_label = result.alternatives[0].transcript
                    if passage_index < len(passage):
                        self.passage_label = str(
                            ".\n".join(passage[passage_index:]) + ".")
                if passage_index == len(passage):
                    self.passage_label = str("")
                    print("woo")
                    return missed

        language_code = 'en-US'  # a BCP-47 language tag
        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        streaming_config = types.StreamingRecognitionConfig(
            config=config, interim_results=True)

        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            # Now, put the transcription responses to use.
            finals = read(responses, passages.english)
            App.get_running_app().missed_keys = finals
            print("yippee")
            return finals
Ejemplo n.º 10
0
def main():
    try:
        # Supports 119 languages
        language_code = 'en-US'  # a BCP-47 language tag list can be found at: https://cloud.google.com/speech-to-text/docs/languages

        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code)
        streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=True)

        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            listen_print_loop(responses)
    except:
        main()
Ejemplo n.º 11
0
def get_speech_data():
    # threading.Timer(10.0, main).start()
    print("running main..")
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = speech.types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_speaker_diarization=True,
        diarization_speaker_count=2)
    streaming_config = speech.types.StreamingRecognitionConfig(
        config=config, interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        data = listen_print_loop(responses)
        return data
Ejemplo n.º 12
0
    def run(self):
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code = 'en-US'  # a BCP-47 language tag

        client = speech.SpeechClient()
        translate_client = translate.Client()

        config = types.RecognitionConfig(
            encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
            sample_rate_hertz=RATE,
            language_code=language_code,
            enable_automatic_punctuation=True)
        streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=True)

        with MicrophoneStream(RATE, CHUNK) as stream:
            while True:
                audio_generator = stream.generator()
                requests = (types.StreamingRecognizeRequest(audio_content=content)
                            for content in audio_generator)

                responses = client.streaming_recognize(streaming_config, requests)

                # Now, put the transcription responses to use.
                for sentence in listen_print_loop(responses):
                    print(sentence)
                    translation = translate_client.translate(
                            sentence,
                            target_language='zh')
                    zimu = translation['translatedText']
                    self.label.config(text=zimu)
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)
    while True:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            listen_print_loop(responses)
            stream._buff.put(None)
            stream._audio_stream.stop_stream()
            stream._audio_stream.close()
            stream.closed = True
            # Signal the generator to terminate so that the client's
            # streaming_recognize method will not block the process termination.

            stream._audio_interface.terminate()
Ejemplo n.º 14
0
    def StreamingRecognize(self, request_iterator, context):

        print("Recieved streaming request")

        # We need to capture a configuration from the first incoming request,
        # which corresponds to call of StartRecognizing on the client side. All other requests
        # will have  configuration  empty.
        if (self._is_first_message == True):
            request = next(request_iterator)
            if (request.streaming_config.interim_results == True):

                self._is_first_message = False
                self._streaming_config = request.streaming_config

        #Here we can inject custom logic to modify audio content
        requests = (types.StreamingRecognizeRequest(
            audio_content=request.audio_content)
                    for request in request_iterator)

        responses = self._speech_client.streaming_recognize(
            self._streaming_config, requests)

        # Now, print the transcription responses to use,
        # self._listen_print_loop(responses)

        #but actually we need to yield them

        for response in responses:
            yield response

        self._is_first_message = True
Ejemplo n.º 15
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-GB'  # a BCP-47 language tag

    # Prepare audio
    subprocess.Popen(shlex.split("killall -9 mpg321"),
                     stdout=subprocess.DEVNULL,
                     stderr=subprocess.DEVNULL)
    subprocess.run(["amixer", "cset", AUDIO_DEVICE_ID, str(INITIAL_VOLUME)])

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        model="command_and_search",
        language_code=language_code,
        max_alternatives=10,
        profanity_filter=True,
        speech_contexts=[{
            'phrases': key_phrases
        }])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 16
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    # language_code = 'en-US'  # a BCP-47 language tag te-IN en-IN
    language_code = 'en-IN'  # a BCP-47 language tag te-IN en-IN
    # language_code = 'ja-JP'  # a BCP-47 language tag te-IN
    # language_code = 'te-IN'  # a BCP-47 language tag te-IN
    credentials = service_account.Credentials.from_service_account_file(
        'googleKeys.json')

    # client = vision.ImageAnnotatorClient(credentials=credentials)
    # client = speech.SpeechClient()
    client = speech.SpeechClient(credentials=credentials)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=False)

    with MicrophoneStream(RATE, CHUNK) as stream:
        print("inside stream")
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 17
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # fo#r a list of supported languages.
    #language_code = 'ja-JP'  # a BCP-47 language tag
    language_code = 'en-CA'  # a BCP-47 language tag
    client = speech.SpeechClient()
    speech_contexts = [
        speech.types.SpeechContext(phrases=place_hint + command_hint +
                                   object_hint)
    ]

    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=speech_contexts)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 18
0
def main():
    global running
    # p = vlc.MediaPlayer("./output.mp3")
    # p.play()
    # time.sleep(7)
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        single_utterance=True,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        while running:
            stream.reset()
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)
            responses = client.streaming_recognize(streaming_config, requests)

            # Now, put the transcription responses to use.
            listen_print_loop(responses)
Ejemplo n.º 19
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        while True:
          requests = (types.StreamingRecognizeRequest(audio_content=content)
            for content in audio_generator)
          responses = client.streaming_recognize(streaming_config, requests)

          try:
            listen_print_loop(responses)
            break
          except Exception, e:
            sys.stderr.write("Error, retrying: {}".format(e))
            sys.stderr.flush()
        print "ended"
	quit()
Ejemplo n.º 20
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = "ja-JP"  # a BCP-47 language tag
    # 英語にしたければ 'en-US'

    server_base_url = "http://0.0.0.0:8000/"
    res = requests.post(server_base_url + "init", data="please do init")
    print(json.loads(res.text)["ResultSet"])

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 21
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'ko-KR'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)
    # Character analysis
    model_path = 'data/wiki_dmpv_1000_no_taginfo_word2vec_format.bin'
    #model = w.KeyedVectors.load_word2vec_format(model_path,binary=True, unicode_errors='ignore')

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses)
Ejemplo n.º 22
0
def run_loop(phrases):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    speech_context = types.SpeechContext(phrases=phrases)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[speech_context])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        while True:
            try:
                print "running a recognition..."
                audio_generator = stream.generator()
                requests = (types.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)

                responses = client.streaming_recognize(streaming_config,
                                                       requests)

                # Now, put the transcription responses to use.
                listen_print_loop(responses)
            except grpc._channel._Rendezvous as e:
                print "timeout, restarting"
                pass
Ejemplo n.º 23
0
 def _start_google_stream(self):
     self._logger.info("[gstar] Start streaming to Google")
     # Configure Google speech recognition
     self._google_client = speech.SpeechClient()
     self._logger.info("[gstar] Got Google client")
     contexts = [types.SpeechContext(phrases=[])]
     config = types.RecognitionConfig(
         encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=self._google_rate,
         language_code="en_US",
         max_alternatives=1,
         profanity_filter=False,
         speech_contexts=contexts,
         enable_word_time_offsets=False)
     self._google_recognition_config = types.StreamingRecognitionConfig(
         config=config, single_utterance=False, interim_results=False)
     self._logger.info("[gstar] Google configuration ready")
     source_audio = (types.StreamingRecognizeRequest(audio_content=content)
                     for content in self._generate_next_buffer())
     self._logger.info("[gstar] source list ready")
     self._google_response_iterator = self._google_client.streaming_recognize(
         self._google_recognition_config, self._generate_next_buffer())
     # source_audio)
     self._logger.info("[gstar] Streaming started!")
     async (self._process_next_response)
Ejemplo n.º 24
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'th-TH'  # a BCP-47 language tag

    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    rounds = 1
    while True:
        try:
            print('streaming loop :' + str(rounds))
            with MicrophoneStream(RATE, CHUNK) as stream:
                audio_generator = stream.generator()
                # Create request data
                requests = (types.StreamingRecognizeRequest(
                    audio_content=content) for content in audio_generator)
                # POST data to google cloud speech
                responses = client.streaming_recognize(streaming_config,
                                                       requests)
                # Now, put the transcription responses to use.
                listen_print_loop(responses)
        except Exception as err:
            print(err)
            rounds = rounds + 1
Ejemplo n.º 25
0
def start_stream_transcription(*, on_update=lambda *args: None, on_exit=lambda *args: None):
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-US'  # a BCP-47 language tag

    client = speech.SpeechClient()
    diarization_config = {
        "enableSpeakerDiarization": True,
        "minSpeakerCount": 2,
        "maxSpeakerCount": 3,
    }
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        enable_automatic_punctuation=True,
        model="phone_call",
    )
    streaming_config = types.StreamingRecognitionConfig(
        config=config,
        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        listen_print_loop(responses, on_update=on_update, on_exit=on_exit)
Ejemplo n.º 26
0
def startSpeechDetector(user_name, check_list, language_code, gender, address):
    speech_to_text_client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code)
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    text_to_speech(
        "Well Done" + user_name +
        ", new you are all set!, Just tell me the items you are carrying out when you leave the house, I'll remind you the forgotten items. Enjoy!",
        language_code, gender)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        speech_to_text_responses = speech_to_text_client.streaming_recognize(
            streaming_config, requests)
        # Now, put the transcription responses to use.
        while True:
            location = geocoder.ip('me')
            latlng_string = str(location.latlng[0]) + ', ' + str(
                location.latlng[1])
            estimate_time = get_time_estimation(latlng_string, address)
            listen_respond_loop(speech_to_text_responses, check_list,
                                user_name, language_code, gender, address,
                                estimate_time)
            sleep(0.05)
def main():
    rospy.init_node('Speech_node')
    language_code = 'en-US'
    if_restart = True
    client = speech.SpeechClient()
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        speech_contexts=[{
            "phrases": [
                "attach", "move", "make", "get", "grab", "take", "pick",
                "select", "put", "object", "item", "objects", "items", "one",
                "ones", "cube", "cubes", "blocks", "block", "guys", "guy",
                "here", "there", "place", "location", "position", "yellow",
                "green", "blue", "this", "that", "these", "those", "targets",
                "target", "it", "transport"
            ]
        }])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)
    while if_restart:
        with MicrophoneStream(RATE, CHUNK) as stream:
            audio_generator = stream.generator()
            requests = (types.StreamingRecognizeRequest(audio_content=content)
                        for content in audio_generator)

            responses = client.streaming_recognize(streaming_config, requests)
            if_restart = listen_print_loop(responses)
Ejemplo n.º 28
0
    def process(self):
        """
        Audio stream recognition and result parsing
        """
        print("Processing audio on site " + self.site_id + "...")
        client = speech.SpeechClient()
        config = types.RecognitionConfig(
            encoding=self.encoding,
            sample_rate_hertz=self.rate,
            language_code=self.language
        )
        streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=False,
            single_utterance=False)
        audio_generator = self.stream_generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        try:
            self.response_loop(responses)
        except:
            # self.start()
            print("Audio processing stopped on site " +  self.site_id)
            return
Ejemplo n.º 29
0
    def decodeStream(self, session: DialogSession) -> Optional[ASRResult]:
        super().decodeStream(session)

        recorder = Recorder(self._timeout, session.user, session.siteId)
        self.ASRManager.addRecorder(session.siteId, recorder)
        self._recorder = recorder
        with Stopwatch() as processingTime:
            with recorder as stream:
                audioStream = stream.audioStream()
                # noinspection PyUnresolvedReferences
                try:
                    requests = (types.StreamingRecognizeRequest(
                        audio_content=content) for content in audioStream)
                    responses = self._client.streaming_recognize(
                        self._streamingConfig, requests)
                    result = self._checkResponses(session, responses)
                except:
                    self.logWarning('Failed ASR request')

            self.end()

        return ASRResult(
            text=result[0],
            session=session,
            likelihood=result[1],
            processingTime=processingTime.time) if result else None
Ejemplo n.º 30
0
def main():
    # See http://g.co/cloud/speech/docs/languages
    # for a list of supported languages.
    language_code = 'en-GB'  # a BCP-47 language tag
    key = os.path.join(__file__, 'creds.json')
    credentials = service_account.Credentials.from_service_account_file(
        'creds.json')

    client = speech.SpeechClient(credentials=credentials)
    config = types.RecognitionConfig(
        encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
        sample_rate_hertz=RATE,
        language_code=language_code,
        #maxAlternatives=5,
        speech_contexts=[
            types.SpeechContext(phrases=[
                "poo", "f**k", "f*****g", "arse", "bollocks", "s***e",
                "innovation"
            ], )
        ])
    streaming_config = types.StreamingRecognitionConfig(config=config,
                                                        interim_results=True)

    with MicrophoneStream(RATE, CHUNK) as stream:
        audio_generator = stream.generator()
        requests = (types.StreamingRecognizeRequest(audio_content=content)
                    for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)

        # Now, put the transcription responses to use.
        try:
            listen_print_loop(responses)
        except exceptions.OutOfRange:
            main()