def request_stream(data_stream, rate, interim_results=True): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: data_stream: A generator that yields raw audio data to send. rate: The sampling rate in hertz. interim_results: Whether to return intermediate results, before the transcription is finalized. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( interim_results=interim_results, config=recognition_config, ) yield cloud_speech_pb2.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: # Subsequent requests can all just have the content yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def main(input_uri, encoding, sample_rate, language_code='en-US'): service = cloud_speech_pb2.SpeechStub( make_channel('speech.googleapis.com', 443)) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto response = service.SyncRecognize( cloud_speech_pb2.SyncRecognizeRequest( config=cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See https://g.co/cloud/speech/docs/languages for a list of # supported languages. language_code=language_code, # a BCP-47 language tag ), audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the recognition result alternatives and confidence scores. for result in response.results: print('Result:') for alternative in result.alternatives: print(u' ({}): {}'.format(alternative.confidence, alternative.transcript))
def main(input_uri, encoding, sample_rate, language_code='en-US'): channel = make_channel('speech.googleapis.com', 443) service = cloud_speech_pb2.SpeechStub(channel) # The method and parameters can be inferred from the proto from which the # grpc client lib was generated. See: # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto operation = service.AsyncRecognize( cloud_speech_pb2.AsyncRecognizeRequest( config=cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding=encoding, # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB sample_rate=sample_rate, # the rate in hertz # See https://g.co/cloud/speech/docs/languages for a list of # supported languages. language_code=language_code, # a BCP-47 language tag ), audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )), DEADLINE_SECS) # Print the longrunning operation handle. print(operation) # Construct a long running operation endpoint. service = operations_pb2.OperationsStub(channel) name = operation.name while True: # Give the server a few seconds to process. print('Waiting for server processing...') time.sleep(1) operation = service.GetOperation( operations_pb2.GetOperationRequest(name=name), DEADLINE_SECS) if operation.error.message: print('\nOperation error:\n{}'.format(operation.error)) if operation.done: break response = cloud_speech_pb2.AsyncRecognizeResponse() operation.response.Unpack(response) # Print the recognition result alternatives and confidence scores. for result in response.results: print('Result:') for alternative in result.alternatives: print(u' ({}): {}'.format(alternative.confidence, alternative.transcript))
def _create_config_request(self): recognition_config = cloud_speech.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=AUDIO_SAMPLE_RATE_HZ, # For a list of supported languages see: # https://cloud.google.com/speech/docs/languages. language_code=self.language_code, # a BCP-47 language tag speech_context=self._get_speech_context(), ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config, single_utterance=True, # TODO(rodrigoq): find a way to handle pauses ) return cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config)
from google.cloud.gapic.speech.v1beta1 import enums from google.cloud.gapic.speech.v1beta1 import speech_api from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as types # point this at an audio sample for your project INPUT_URI = 'gs://gapic-speech-v1/hello.flac' # set these to correspond to your audio sample ENCODING = enums.RecognitionConfig.AudioEncoding.FLAC SAMPLE_RATE = 44100 api = speech_api.SpeechApi() config = types.RecognitionConfig(encoding=ENCODING, sample_rate=SAMPLE_RATE) audio = types.RecognitionAudio(uri=INPUT_URI) response = api.sync_recognize(config, audio) print response