예제 #1
0
    def process(self, loop):
        """
        Audio stream recognition and result parsing
        """
        #You can add speech contexts for better recognition
        cap_speech_context = types.SpeechContext(**self.context)
        metadata = types.RecognitionMetadata(**self.metadata)
        client = speech.SpeechClient()
        config = types.RecognitionConfig(encoding=self.encoding,
                                         sample_rate_hertz=self.rate,
                                         language_code=self.language,
                                         speech_contexts=[
                                             cap_speech_context,
                                         ],
                                         enable_automatic_punctuation=True,
                                         model=self.model,
                                         metadata=metadata)

        streaming_config = types.StreamingRecognitionConfig(
            config=config,
            interim_results=self.interim_results,
            single_utterance=self.single_utterance)
        audio_generator = self.stream_generator()
        requests = iter(
            types.StreamingRecognizeRequest(audio_content=content)
            for content in audio_generator)

        responses = client.streaming_recognize(streaming_config, requests)
        #print('process',type(responses))
        try:
            #print('process')
            for response in responses:
                #print('process received')
                if self.terminated:
                    break
                if not response.results:
                    continue
                result = response.results[0]
                if not result.alternatives:
                    continue
                speechData = MessageToDict(response)
                global_async_worker.add_task(self.async_callback(speechData))

                # debug
                transcript = result.alternatives[0].transcript

                print('>>', transcript, "(OK)" if result.is_final else "")
        except Exception as e:
            print('process excepted', e)
            self.start()
예제 #2
0
 def gspeech_client(self):
     """Creates the Google Speech API client, configures it, and sends/gets
     audio/text data for parsing.
     """
     language_code = 'en-US'
     # Hints for the API
     context = types.SpeechContext(phrases=self.context)
     client = speech.SpeechClient()
     # Create metadata object, helps processing
     metadata = types.RecognitionMetadata()
     # Interaction Type:
     # VOICE_SEARCH: Transcribe spoken questions and queries into text.
     # VOICE_COMMAND: Transcribe voice commands, such as for controlling a device.
     metadata.interaction_type = (
         enums.RecognitionMetadata.InteractionType.VOICE_COMMAND)
     # Microphone Distance:
     # NEARFIELD: The audio was captured from a closely placed microphone.
     # MIDFIELD: The speaker is within 3 meters of the microphone.
     # FARFIELD: The speaker is more than 3 meters away from the microphone.
     metadata.microphone_distance = (
         enums.RecognitionMetadata.MicrophoneDistance.MIDFIELD)
     # Device Type:
     # PC: Speech was recorded using a personal computer or tablet.
     # VEHICLE: Speech was recorded in a vehicle.
     # OTHER_OUTDOOR_DEVICE: Speech was recorded outdoors.
     # OTHER_INDOOR_DEVICE: Speech was recorded indoors.
     metadata.recording_device_type = (
         enums.RecognitionMetadata.RecordingDeviceType.PC)
     # Media Type:
     # AUDIO: The speech data is an audio recording.
     # VIDEO: The speech data originally recorded on a video.
     metadata.original_media_type = (
         enums.RecognitionMetadata.OriginalMediaType.AUDIO)
     config = types.RecognitionConfig(
         encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
         sample_rate_hertz=16000,
         language_code=language_code,
         speech_contexts=[context],
         use_enhanced=True,
         model='command_and_search',
         metadata=metadata)
     streaming_config = types.StreamingRecognitionConfig(
         config=config, single_utterance=False, interim_results=False)
     # Hack from Google Speech Python docs, very pythonic c:
     requests = (types.StreamingRecognizeRequest(audio_content=content)
                 for content in self.generator())
     responses = client.streaming_recognize(streaming_config, requests)
     self._listen_print_loop(responses)