Exemple #1
0
def request_stream(data_stream, rate, interim_results=True):
    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
	stream.

	Args:
		data_stream: A generator that yields raw audio data to send.
		rate: The sampling rate in hertz.
		interim_results: Whether to return intermediate results, before the
			transcription is finalized.
	"""
    # The initial request must contain metadata about the stream, so the
    # server knows how to interpret it.
    recognition_config = cloud_speech_pb2.RecognitionConfig(
        # There are a bunch of config options you can specify. See
        # https://goo.gl/KPZn97 for the full list.
        encoding='LINEAR16',  # raw 16-bit signed LE samples
        sample_rate=rate,  # the rate in hertz
        # See http://g.co/cloud/speech/docs/languages
        # for a list of supported languages.
        language_code='en-US',  # a BCP-47 language tag
    )
    streaming_config = cloud_speech_pb2.StreamingRecognitionConfig(
        interim_results=interim_results,
        config=recognition_config,
    )

    yield cloud_speech_pb2.StreamingRecognizeRequest(
        streaming_config=streaming_config)

    for data in data_stream:
        # Subsequent requests can all just have the content
        yield cloud_speech_pb2.StreamingRecognizeRequest(audio_content=data)
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    service = cloud_speech_pb2.SpeechStub(
        make_channel('speech.googleapis.com', 443))

    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    response = service.SyncRecognize(
        cloud_speech_pb2.SyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
def main(input_uri, encoding, sample_rate, language_code='en-US'):
    channel = make_channel('speech.googleapis.com', 443)
    service = cloud_speech_pb2.SpeechStub(channel)

    # The method and parameters can be inferred from the proto from which the
    # grpc client lib was generated. See:
    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
    operation = service.AsyncRecognize(
        cloud_speech_pb2.AsyncRecognizeRequest(
            config=cloud_speech_pb2.RecognitionConfig(
                # There are a bunch of config options you can specify. See
                # https://goo.gl/KPZn97 for the full list.
                encoding=encoding,  # one of LINEAR16, FLAC, MULAW, AMR, AMR_WB
                sample_rate=sample_rate,  # the rate in hertz
                # See https://g.co/cloud/speech/docs/languages for a list of
                # supported languages.
                language_code=language_code,  # a BCP-47 language tag
            ),
            audio=cloud_speech_pb2.RecognitionAudio(uri=input_uri, )),
        DEADLINE_SECS)

    # Print the longrunning operation handle.
    print(operation)

    # Construct a long running operation endpoint.
    service = operations_pb2.OperationsStub(channel)

    name = operation.name

    while True:
        # Give the server a few seconds to process.
        print('Waiting for server processing...')
        time.sleep(1)
        operation = service.GetOperation(
            operations_pb2.GetOperationRequest(name=name), DEADLINE_SECS)

        if operation.error.message:
            print('\nOperation error:\n{}'.format(operation.error))

        if operation.done:
            break

    response = cloud_speech_pb2.AsyncRecognizeResponse()
    operation.response.Unpack(response)
    # Print the recognition result alternatives and confidence scores.
    for result in response.results:
        print('Result:')
        for alternative in result.alternatives:
            print(u'  ({}): {}'.format(alternative.confidence,
                                       alternative.transcript))
    def _create_config_request(self):
        recognition_config = cloud_speech.RecognitionConfig(
            # There are a bunch of config options you can specify. See
            # https://goo.gl/KPZn97 for the full list.
            encoding='LINEAR16',  # raw 16-bit signed LE samples
            sample_rate=AUDIO_SAMPLE_RATE_HZ,
            # For a list of supported languages see:
            # https://cloud.google.com/speech/docs/languages.
            language_code=self.language_code,  # a BCP-47 language tag
            speech_context=self._get_speech_context(),
        )
        streaming_config = cloud_speech.StreamingRecognitionConfig(
            config=recognition_config,
            single_utterance=True,  # TODO(rodrigoq): find a way to handle pauses
        )

        return cloud_speech.StreamingRecognizeRequest(
            streaming_config=streaming_config)
Exemple #5
0
from google.cloud.gapic.speech.v1beta1 import enums
from google.cloud.gapic.speech.v1beta1 import speech_api
from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 as types

# point this at an audio sample for your project
INPUT_URI = 'gs://gapic-speech-v1/hello.flac'

# set these to correspond to your audio sample
ENCODING = enums.RecognitionConfig.AudioEncoding.FLAC
SAMPLE_RATE = 44100

api = speech_api.SpeechApi()
config = types.RecognitionConfig(encoding=ENCODING, sample_rate=SAMPLE_RATE)
audio = types.RecognitionAudio(uri=INPUT_URI)
response = api.sync_recognize(config, audio)

print response