def record(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) complete_response = listen_print_loop(responses) while complete_response == '-1': audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) complete_response = listen_print_loop(responses) # Now, put the transcription responses to use. return complete_response
def start_recognize(self): with MicrophoneStream(RATE, CHUNK) as stream: self.audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in self.audio_generator) self.responses = self.client.streaming_recognize( self.streaming_config, requests) # Now, put the transcription responses to use. try: while not self.exit: self.listen_print_loop(self.responses) # Discard this stream and create a new one. # Note: calling .cancel() doesn't immediately raise an RpcError # - it only raises when the iterator's next() is requested self.responses.cancel() self.audio_generator = stream.generator() logging.debug('Starting new stream') requests = (types.StreamingRecognizeRequest( audio_content=content) for content in self.audio_generator) self.responses = self.client.streaming_recognize( self.streaming_config, requests) if self.exit: break except grpc.RpcError: # This happens because of the interrupt handler print "error in GPRC" pass
def main_2(): import os os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = "/home/aleks/gcp/speechtotext-281122-7d0875e1d1ca.json" print(os.environ['GOOGLE_APPLICATION_CREDENTIALS']) # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: #audio_generator = stream.generator() audio_generator = stream.generator_from_files('./data/rssd_A.wav', './data/rssd_B.wav') #test_requests = (content[0] for content in audio_generator) #test_sample = next(test_requests) #test_sample = cis.convert_toPyAudio(test_sample) #test_sample = test_sample.tostring() #print(type(test_sample)) #print(len(test_sample)) #print(test_sample.shape) requests_0 = (types.StreamingRecognizeRequest( audio_content=cis.convert_toPyAudio(content[0]).tostring()) for content in audio_generator) #while True: #try: #next(requests_0) #except Exception as err: #print("exception"+str(err)) #break responses_0 = client.streaming_recognize(streaming_config, requests_0) # Now, put the transcription responses to use. listen_print_loop(responses_0) audio_generator = stream.generator_from_files('./data/rssd_A.wav', './data/rssd_B.wav') requests_1 = (types.StreamingRecognizeRequest( audio_content=cis.convert_toPyAudio(content[1]).tostring()) for content in audio_generator) responses_1 = client.streaming_recognize(streaming_config, requests_1) # Now, put the transcription responses to use. listen_print_loop(responses_1)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. transcription = listen_print_loop(responses) client = nexmo.Client( application_id = '826a404d-9c0d-49d5-ad96-63f39d07bb49', private_key = 'private.key' ) response = client.create_call({ 'to': [{'type':'phone', 'number': '447519432230'}], 'from': {'type': 'phone', 'number': '447418343967'}, # 'answer_url': ['https://developer.nexmo.com/ncco/tts.json'] 'ncco': [{'action':'talk', 'text':transcription}] })
def main2(): language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code,) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. print("here") listen_print_loop(responses) time.sleep(0.01) print("here2")
def main(): mode = choose_mode() #WORD LIST cues = get_word_list(mode) launchppt() # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) listen_for_cue(responses, cues, mode)
def run(self): syslog.syslog("starting: " + self.name) self.client = speech.SpeechClient() while self.running: stream = self.dataStream(self) #syslog.syslog(self.name+"for chunk in stream") #for chunk in stream: # syslog.syslog(self.name+" chunk size:"+str(len(chunk))) requests = (types.StreamingRecognizeRequest(audio_content=chunk) for chunk in stream) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=44100, language_code='en-US') streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) # streaming_recognize returns a generator. try: responses = self.client.streaming_recognize( streaming_config, requests) for response in responses: for result in response.results: self.anal.queue.put(result) except Exception as e: print("%s: exception %s" % (self.name, e)) if str(e).find("iterating requests") == -1: traceback.print_exc() else: print("%s: %s" % (self.name, e)) self.running = False
def bigBrother(): #Google Imports - Cant have them leaking from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types #Begin google code language_code = 'en-US' client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) for response in responses: if response.results[ 0].is_final: #Checks if the word is the most likely... searchSkills(response.results[0].alternatives[0].transcript ) #check if its right return #Bail out!
def startStream(self): def read(responses, passage): missed = [] passage_index = 0 self.passage_label = str(".\n".join(passage[passage_index:]) + ".") for response in responses: if not response.results: continue result = response.results[0] if not result.alternatives: continue if result.is_final: print(result.alternatives[0].transcript) print( passageCheck(passage[passage_index], result.alternatives[0].transcript)) comp_result = passageCheck( passage[passage_index], result.alternatives[0].transcript) missed += comp_result[0] if not comp_result[1]: passage_index += 1 self.ids.repeat_button.disabled = True else: passage[passage_index] = " ".join(comp_result[1]) generatePronun(comp_result[1][0]) self.ids.repeat_button.disabled = False missed += [comp_result[1][0]] self.help_label = str( "Tip: " + " ".join(getWord( comp_result[1][0]))) # call dictionary lookup self.input_label = result.alternatives[0].transcript if passage_index < len(passage): self.passage_label = str( ".\n".join(passage[passage_index:]) + ".") if passage_index == len(passage): self.passage_label = str("") print("woo") return missed language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. finals = read(responses, passages.english) App.get_running_app().missed_keys = finals print("yippee") return finals
def main(): try: # Supports 119 languages language_code = 'en-US' # a BCP-47 language tag list can be found at: https://cloud.google.com/speech-to-text/docs/languages client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) except: main()
def get_speech_data(): # threading.Timer(10.0, main).start() print("running main..") # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = speech.types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, enable_speaker_diarization=True, diarization_speaker_count=2) streaming_config = speech.types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. data = listen_print_loop(responses) return data
def run(self): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() translate_client = translate.Client() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, enable_automatic_punctuation=True) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: while True: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. for sentence in listen_print_loop(responses): print(sentence) translation = translate_client.translate( sentence, target_language='zh') zimu = translation['translatedText'] self.label.config(text=zimu)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) while True: with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) stream._buff.put(None) stream._audio_stream.stop_stream() stream._audio_stream.close() stream.closed = True # Signal the generator to terminate so that the client's # streaming_recognize method will not block the process termination. stream._audio_interface.terminate()
def StreamingRecognize(self, request_iterator, context): print("Recieved streaming request") # We need to capture a configuration from the first incoming request, # which corresponds to call of StartRecognizing on the client side. All other requests # will have configuration empty. if (self._is_first_message == True): request = next(request_iterator) if (request.streaming_config.interim_results == True): self._is_first_message = False self._streaming_config = request.streaming_config #Here we can inject custom logic to modify audio content requests = (types.StreamingRecognizeRequest( audio_content=request.audio_content) for request in request_iterator) responses = self._speech_client.streaming_recognize( self._streaming_config, requests) # Now, print the transcription responses to use, # self._listen_print_loop(responses) #but actually we need to yield them for response in responses: yield response self._is_first_message = True
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-GB' # a BCP-47 language tag # Prepare audio subprocess.Popen(shlex.split("killall -9 mpg321"), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) subprocess.run(["amixer", "cset", AUDIO_DEVICE_ID, str(INITIAL_VOLUME)]) client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, model="command_and_search", language_code=language_code, max_alternatives=10, profanity_filter=True, speech_contexts=[{ 'phrases': key_phrases }]) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. # language_code = 'en-US' # a BCP-47 language tag te-IN en-IN language_code = 'en-IN' # a BCP-47 language tag te-IN en-IN # language_code = 'ja-JP' # a BCP-47 language tag te-IN # language_code = 'te-IN' # a BCP-47 language tag te-IN credentials = service_account.Credentials.from_service_account_file( 'googleKeys.json') # client = vision.ImageAnnotatorClient(credentials=credentials) # client = speech.SpeechClient() client = speech.SpeechClient(credentials=credentials) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=False) with MicrophoneStream(RATE, CHUNK) as stream: print("inside stream") audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # fo#r a list of supported languages. #language_code = 'ja-JP' # a BCP-47 language tag language_code = 'en-CA' # a BCP-47 language tag client = speech.SpeechClient() speech_contexts = [ speech.types.SpeechContext(phrases=place_hint + command_hint + object_hint) ] config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, speech_contexts=speech_contexts) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): global running # p = vlc.MediaPlayer("./output.mp3") # p.play() # time.sleep(7) # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, single_utterance=True, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() while running: stream.reset() audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() while True: requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) try: listen_print_loop(responses) break except Exception, e: sys.stderr.write("Error, retrying: {}".format(e)) sys.stderr.flush() print "ended" quit()
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = "ja-JP" # a BCP-47 language tag # 英語にしたければ 'en-US' server_base_url = "http://0.0.0.0:8000/" res = requests.post(server_base_url + "init", data="please do init") print(json.loads(res.text)["ResultSet"]) client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'ko-KR' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) # Character analysis model_path = 'data/wiki_dmpv_1000_no_taginfo_word2vec_format.bin' #model = w.KeyedVectors.load_word2vec_format(model_path,binary=True, unicode_errors='ignore') with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses)
def run_loop(phrases): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() speech_context = types.SpeechContext(phrases=phrases) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, speech_contexts=[speech_context]) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: while True: try: print "running a recognition..." audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest( audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) except grpc._channel._Rendezvous as e: print "timeout, restarting" pass
def _start_google_stream(self): self._logger.info("[gstar] Start streaming to Google") # Configure Google speech recognition self._google_client = speech.SpeechClient() self._logger.info("[gstar] Got Google client") contexts = [types.SpeechContext(phrases=[])] config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=self._google_rate, language_code="en_US", max_alternatives=1, profanity_filter=False, speech_contexts=contexts, enable_word_time_offsets=False) self._google_recognition_config = types.StreamingRecognitionConfig( config=config, single_utterance=False, interim_results=False) self._logger.info("[gstar] Google configuration ready") source_audio = (types.StreamingRecognizeRequest(audio_content=content) for content in self._generate_next_buffer()) self._logger.info("[gstar] source list ready") self._google_response_iterator = self._google_client.streaming_recognize( self._google_recognition_config, self._generate_next_buffer()) # source_audio) self._logger.info("[gstar] Streaming started!") async (self._process_next_response)
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'th-TH' # a BCP-47 language tag client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) rounds = 1 while True: try: print('streaming loop :' + str(rounds)) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() # Create request data requests = (types.StreamingRecognizeRequest( audio_content=content) for content in audio_generator) # POST data to google cloud speech responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses) except Exception as err: print(err) rounds = rounds + 1
def start_stream_transcription(*, on_update=lambda *args: None, on_exit=lambda *args: None): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-US' # a BCP-47 language tag client = speech.SpeechClient() diarization_config = { "enableSpeakerDiarization": True, "minSpeakerCount": 2, "maxSpeakerCount": 3, } config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, enable_automatic_punctuation=True, model="phone_call", ) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. listen_print_loop(responses, on_update=on_update, on_exit=on_exit)
def startSpeechDetector(user_name, check_list, language_code, gender, address): speech_to_text_client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) text_to_speech( "Well Done" + user_name + ", new you are all set!, Just tell me the items you are carrying out when you leave the house, I'll remind you the forgotten items. Enjoy!", language_code, gender) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) speech_to_text_responses = speech_to_text_client.streaming_recognize( streaming_config, requests) # Now, put the transcription responses to use. while True: location = geocoder.ip('me') latlng_string = str(location.latlng[0]) + ', ' + str( location.latlng[1]) estimate_time = get_time_estimation(latlng_string, address) listen_respond_loop(speech_to_text_responses, check_list, user_name, language_code, gender, address, estimate_time) sleep(0.05)
def main(): rospy.init_node('Speech_node') language_code = 'en-US' if_restart = True client = speech.SpeechClient() config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, speech_contexts=[{ "phrases": [ "attach", "move", "make", "get", "grab", "take", "pick", "select", "put", "object", "item", "objects", "items", "one", "ones", "cube", "cubes", "blocks", "block", "guys", "guy", "here", "there", "place", "location", "position", "yellow", "green", "blue", "this", "that", "these", "those", "targets", "target", "it", "transport" ] }]) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) while if_restart: with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) if_restart = listen_print_loop(responses)
def process(self): """ Audio stream recognition and result parsing """ print("Processing audio on site " + self.site_id + "...") client = speech.SpeechClient() config = types.RecognitionConfig( encoding=self.encoding, sample_rate_hertz=self.rate, language_code=self.language ) streaming_config = types.StreamingRecognitionConfig( config=config, interim_results=False, single_utterance=False) audio_generator = self.stream_generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) try: self.response_loop(responses) except: # self.start() print("Audio processing stopped on site " + self.site_id) return
def decodeStream(self, session: DialogSession) -> Optional[ASRResult]: super().decodeStream(session) recorder = Recorder(self._timeout, session.user, session.siteId) self.ASRManager.addRecorder(session.siteId, recorder) self._recorder = recorder with Stopwatch() as processingTime: with recorder as stream: audioStream = stream.audioStream() # noinspection PyUnresolvedReferences try: requests = (types.StreamingRecognizeRequest( audio_content=content) for content in audioStream) responses = self._client.streaming_recognize( self._streamingConfig, requests) result = self._checkResponses(session, responses) except: self.logWarning('Failed ASR request') self.end() return ASRResult( text=result[0], session=session, likelihood=result[1], processingTime=processingTime.time) if result else None
def main(): # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code = 'en-GB' # a BCP-47 language tag key = os.path.join(__file__, 'creds.json') credentials = service_account.Credentials.from_service_account_file( 'creds.json') client = speech.SpeechClient(credentials=credentials) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=RATE, language_code=language_code, #maxAlternatives=5, speech_contexts=[ types.SpeechContext(phrases=[ "poo", "f**k", "f*****g", "arse", "bollocks", "s***e", "innovation" ], ) ]) streaming_config = types.StreamingRecognitionConfig(config=config, interim_results=True) with MicrophoneStream(RATE, CHUNK) as stream: audio_generator = stream.generator() requests = (types.StreamingRecognizeRequest(audio_content=content) for content in audio_generator) responses = client.streaming_recognize(streaming_config, requests) # Now, put the transcription responses to use. try: listen_print_loop(responses) except exceptions.OutOfRange: main()