def request_stream(data_stream, rate, interim_results=True): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: data_stream: A generator that yields raw audio data to send. rate: The sampling rate in hertz. interim_results: Whether to return intermediate results, before the transcription is finalized. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( interim_results=interim_results, config=recognition_config, ) yield cloud_speech_pb2.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: # Subsequent requests can all just have the content yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def _create_config_request(self): recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=AUDIO_SAMPLE_RATE_HZ, # For a list of supported languages see: # https://cloud.google.com/speech/docs/languages. language_code=self.language_code, # a BCP-47 language tag speech_context=self._get_speech_context(), ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, single_utterance=True, # TODO(rodrigoq): find a way to handle pauses ) return cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config)