예제 #1
0
def request_stream(data_stream, rate, interim_results=True):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
    stream.

    Args:
        data_stream: A generator that yields raw audio data to send.
        rate: The sampling rate in hertz.
        interim_results: Whether to return intermediate results, before the
            transcription is finalized.
    """
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech_pb2.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
        interim_results=interim_results,
        config=recognition_config,
    )

    yield cloud_speech_pb2.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    for data in data_stream:
        # Subsequent requests can all just have the content
        yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
    def test_async_recognize(self, mock_create_stub):
        # Mock gRPC layer
        grpc_stub = mock.Mock()
        mock_create_stub.return_value = grpc_stub

        client = speech_client.SpeechClient()

        # Mock request
        encoding = enums.RecognitionConfig.AudioEncoding.FLAC
        sample_rate = 44100
        config = cloud_speech_pb2.RecognitionConfig(encoding=encoding,
                                                    sample_rate=sample_rate)
        uri = 'gs://bucket_name/file_name.flac'
        audio = cloud_speech_pb2.RecognitionAudio(uri=uri)

        # Mock response
        expected_response = cloud_speech_pb2.AsyncRecognizeResponse()
        operation = operations_pb2.Operation(
            name='operations/test_async_recognize', done=True)
        operation.response.Pack(expected_response)
        grpc_stub.AsyncRecognize.return_value = operation

        response = client.async_recognize(config, audio)
        self.assertEqual(expected_response, response.result())

        grpc_stub.AsyncRecognize.assert_called_once()
        args, kwargs = grpc_stub.AsyncRecognize.call_args
        self.assertEqual(len(args), 2)
        self.assertEqual(len(kwargs), 1)
        self.assertIn('metadata', kwargs)
        actual_request = args[0]

        expected_request = cloud_speech_pb2.AsyncRecognizeRequest(
            config=config, audio=audio)
        self.assertEqual(expected_request, actual_request)
예제 #3
0
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    service = cloud_speech_pb2.SpeechStub(
        make_channel('speech.googleapis.com', 443))

    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.SyncRecognize(
        cloud_speech_pb2.SyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
예제 #4
0
def test_sync_recognize():
    api = speech_client.SpeechClient()
    config = types.RecognitionConfig(encoding=ENCODING,
                                     sample_rate=SAMPLE_RATE)
    audio = types.RecognitionAudio(uri=INPUT_URI)
    response = api.sync_recognize(config, audio)

    print response
예제 #5
0
def test_async_recognize():
    def callback(operation_future):
        print(operation_future.result())

    api = speech_client.SpeechClient()
    config = types.RecognitionConfig(encoding=ENCODING,
                                     sample_rate=SAMPLE_RATE)
    audio = types.RecognitionAudio(uri=INPUT_URI)
    response = api.async_recognize(config, audio)
    response.add_done_callback(callback)
    print("Metadata: \n%s\n" % response.metadata())
예제 #6
0
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    channel = make_channel('speech.googleapis.com', 443)
    service = cloud_speech_pb2.SpeechStub(channel)

    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    operation = service.AsyncRecognize(
        cloud_speech_pb2.AsyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the longrunning operation handle.
    print(operation)

    # Construct a long running operation endpoint.
    service = operations_pb2.OperationsStub(channel)

    name = operation.name

    while True:
        # Give the server a few seconds to process.
        print('Waiting for server processing...')
        time.sleep(1)
        operation = service.GetOperation(
            operations_pb2.GetOperationRequest(name=name), DEADLINE_SECS)

        if operation.error.message:
            print('\nOperation error:\n{}'.format(operation.error))

        if operation.done:
            break

    response = cloud_speech_pb2.AsyncRecognizeResponse()
    operation.response.Unpack(response)
    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
    def test_sync_recognize_exception(self, mock_create_stub):
        # Mock gRPC layer
        grpc_stub = mock.Mock()
        mock_create_stub.return_value = grpc_stub

        client = speech_client.SpeechClient()

        # Mock request
        encoding = enums.RecognitionConfig.AudioEncoding.FLAC
        sample_rate = 44100
        config = cloud_speech_pb2.RecognitionConfig(encoding=encoding,
                                                    sample_rate=sample_rate)
        uri = 'gs://bucket_name/file_name.flac'
        audio = cloud_speech_pb2.RecognitionAudio(uri=uri)

        # Mock exception response
        grpc_stub.SyncRecognize.side_effect = CustomException()

        self.assertRaises(errors.GaxError, client.sync_recognize, config,
                          audio)
예제 #8
0
    def request_transcribe_sync(self, content):
        """
        TODO: Description.
        :param content:
        :return:
        """
        service = cloud_speech_pb2.SpeechStub(
            self.make_channel('speech.googleapis.com', 443))

        # The method and parameters can be inferred from the proto from which the
        # grpc client lib was generated. See:
        # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
        response = service.SyncRecognize(
            cloud_speech_pb2.SyncRecognizeRequest(
                config=cloud_speech_pb2.RecognitionConfig(
                    # There are a bunch of config options you can specify. See
                    # https://goo.gl/KPZn97 for the full list.
                    encoding=self.
                    encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                    sample_rate=self.sample_rate,  # the rate in hertz
                    # See https://g.co/cloud/speech/docs/languages for a list of
                    # supported languages.
                    language_code=self.language_code,  # a BCP-47 language tag
                ),
                audio=cloud_speech_pb2.RecognitionAudio(content=content, )),
            DEADLINE_SECS)

        # Print the recognition result alternatives and confidence scores.
        alternatives = []
        for result in response.results:
            for alternative in result.alternatives:
                alternatives.append({
                    'service_name': self.config['service_name'],
                    'confidence': alternative.confidence,
                    'transcript': alternative.transcript
                })

        return alternatives
    def test_async_recognize_exception(self, mock_create_stub):
        # Mock gRPC layer
        grpc_stub = mock.Mock()
        mock_create_stub.return_value = grpc_stub

        client = speech_client.SpeechClient()

        # Mock request
        encoding = enums.RecognitionConfig.AudioEncoding.FLAC
        sample_rate = 44100
        config = cloud_speech_pb2.RecognitionConfig(encoding=encoding,
                                                    sample_rate=sample_rate)
        uri = 'gs://bucket_name/file_name.flac'
        audio = cloud_speech_pb2.RecognitionAudio(uri=uri)

        # Mock exception response
        error = status_pb2.Status()
        operation = operations_pb2.Operation(
            name='operations/test_async_recognize_exception', done=True)
        operation.error.CopyFrom(error)
        grpc_stub.AsyncRecognize.return_value = operation

        response = client.async_recognize(config, audio)
        self.assertEqual(error, response.exception())