def request_stream(data_stream, rate, interim_results=False): """Yields `StreamingRecognizeRequest`s constructed from a recording audio stream. Args: data_stream: A generator that yields raw audio data to send. rate: The sampling rate in hertz. interim_results: Whether to return intermediate results, before the transcription is finalized. """ # The initial request must contain metadata about the stream, so the # server knows how to interpret it. recognition_config = cloud_speech_pb2.RecognitionConfig( # There are a bunch of config options you can specify. See # https://goo.gl/KPZn97 for the full list. encoding='LINEAR16', # raw 16-bit signed LE samples sample_rate=rate, # the rate in hertz # See http://g.co/cloud/speech/docs/languages # for a list of supported languages. language_code='en-US', # a BCP-47 language tag ) streaming_config = cloud_speech_pb2.StreamingRecognitionConfig( interim_results=interim_results, config=recognition_config, ) yield cloud_speech_pb2.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: # Subsequent requests can all just have the content yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def test_long_running_recognize_exception(self, mock_create_stub): # Mock gRPC layer grpc_stub = mock.Mock() mock_create_stub.return_value = grpc_stub client = speech_client.SpeechClient() # Mock request encoding = enums.RecognitionConfig.AudioEncoding.FLAC sample_rate_hertz = 44100 language_code = 'en-US' config = cloud_speech_pb2.RecognitionConfig( encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code) uri = 'gs://bucket_name/file_name.flac' audio = cloud_speech_pb2.RecognitionAudio(uri=uri) # Mock exception response error = status_pb2.Status() operation = operations_pb2.Operation( name='operations/test_long_running_recognize_exception', done=True) operation.error.CopyFrom(error) grpc_stub.LongRunningRecognize.return_value = operation response = client.long_running_recognize(config, audio) self.assertEqual(error, response.exception())
def test_recognize(self, mock_create_stub): # Mock gRPC layer grpc_stub = mock.Mock() mock_create_stub.return_value = grpc_stub client = speech_client.SpeechClient() # Mock request encoding = enums.RecognitionConfig.AudioEncoding.FLAC sample_rate_hertz = 44100 language_code = 'en-US' config = cloud_speech_pb2.RecognitionConfig( encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code) uri = 'gs://bucket_name/file_name.flac' audio = cloud_speech_pb2.RecognitionAudio(uri=uri) # Mock response expected_response = cloud_speech_pb2.RecognizeResponse() grpc_stub.Recognize.return_value = expected_response response = client.recognize(config, audio) self.assertEqual(expected_response, response) grpc_stub.Recognize.assert_called_once() args, kwargs = grpc_stub.Recognize.call_args self.assertEqual(len(args), 2) self.assertEqual(len(kwargs), 1) self.assertIn('metadata', kwargs) actual_request = args[0] expected_request = cloud_speech_pb2.RecognizeRequest(config=config, audio=audio) self.assertEqual(expected_request, actual_request)
def test_long_running_recognize(self, mock_create_stub): # Mock gRPC layer grpc_stub = mock.Mock(spec=cloud_speech_pb2.SpeechStub) mock_create_stub.return_value = grpc_stub client = speech_client.SpeechClient() # Mock request encoding = enums.RecognitionConfig.AudioEncoding.FLAC sample_rate_hertz = 44100 language_code = 'en-US' config = cloud_speech_pb2.RecognitionConfig(encoding, sample_rate_hertz, language_code) uri = 'gs://bucket_name/file_name.flac' audio = cloud_speech_pb2.RecognitionAudio(uri) # Mock response expected_response = cloud_speech_pb2.LongRunningRecognizeResponse() operation = operations_pb2.Operation( 'operations/test_long_running_recognize', True) operation.response.Pack(expected_response) grpc_stub.LongRunningRecognize.return_value = operation response = client.long_running_recognize(config, audio) self.assertEqual(expected_response, response.result()) grpc_stub.LongRunningRecognize.assert_called_once() request = grpc_stub.LongRunningRecognize.call_args[0] self.assertEqual(config, request.config) self.assertEqual(audio, request.audio)
def request_stream(data_stream, rate): recognition_config = cloud_speech.RecognitionConfig( encoding='LINEAR16', sample_rate_hertz=rate, language_code='ko-KR' ) streaming_config = cloud_speech.StreamingRecognitionConfig( config=recognition_config ) yield cloud_speech.StreamingRecognizeRequest( streaming_config=streaming_config) for data in data_stream: yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
def _process(self): """sets up a streaming speech api request. And streams results into a queue.""" self.client = speech_client.SpeechClient() self.config = cloud_speech_pb2.StreamingRecognitionConfig( config=cloud_speech_pb2.RecognitionConfig( encoding=self.encoding, sample_rate_hertz=self.rate, language_code=self.language), interim_results=True) requests = StreamingRequest(self.audio, self.config) streaming_resp = self.client.streaming_recognize(iter(requests)) # This will block until self.audio is closed...which closes the streaming_recognize req for resp in streaming_resp: self.results.put(resp)
def test_recognize_exception(self, mock_create_stub): # Mock gRPC layer grpc_stub = mock.Mock(spec=cloud_speech_pb2.SpeechStub) mock_create_stub.return_value = grpc_stub client = speech_client.SpeechClient() # Mock request encoding = enums.RecognitionConfig.AudioEncoding.FLAC sample_rate_hertz = 44100 language_code = 'en-US' config = cloud_speech_pb2.RecognitionConfig(encoding, sample_rate_hertz, language_code) uri = 'gs://bucket_name/file_name.flac' audio = cloud_speech_pb2.RecognitionAudio(uri) # Mock exception response grpc_stub.Recognize.side_effect = CustomException() self.assertRaises(errors.GaxError, client.recognize, config, audio)
def VoiceRecognitionG(b_voice_data): client = speech.SpeechClient() audio = direct_gcall.RecognitionAudio(content=b_voice_data) config = direct_gcall.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding. OGG_OPUS, # setup default Telegram format sample_rate_hertz=48000, language_code='ru-RU', max_alternatives=0) # Recognize speech content response = client.recognize(config, audio) if (response.results): print(response.results) for result in response.results: rec_voice = result.alternatives[0].transcript return rec_voice else: rec_voice = "No data responded" return rec_voice
client = storage.Client() bucket = client.lookup_bucket(BUCKET) if not bucket: bucket = client.create_bucket(BUCKET) print("Uploading Podcast") blob = storage.Blob(FILE, bucket) blob.upload_from_filename(FILE) sclient = speech_client.SpeechClient() encoding = enums.RecognitionConfig.AudioEncoding.FLAC sample_rate_hertz = 16000 language_code = 'en-US' config = cloud_speech_pb2.RecognitionConfig( encoding=encoding, sample_rate_hertz=sample_rate_hertz, language_code=language_code) uri = 'gs://%s/%s' % (BUCKET, FILE) audio = cloud_speech_pb2.RecognitionAudio(uri=uri) response = sclient.long_running_recognize(config, audio) def callback(operation_future): # Handle result. result = operation_future.result() with open("google-transcript.json", "w") as f: f.write(json_format.MessageToJson(result)) print("Done!") response.add_done_callback(callback) print("Running speech recognition")