def detect_intent_stream(project_id, session_id, audio_file_path, language_code): """Returns the result of detect intent with streaming audio as input. Using the same `session_id` between requests allows continuation of the conversation.""" from google.cloud import dialogflow session_client = dialogflow.SessionsClient() # Note: hard coding audio_encoding and sample_rate_hertz for simplicity. audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16 sample_rate_hertz = 16000 session_path = session_client.session_path(project_id, session_id) print('Session path: {}\n'.format(session_path)) def request_generator(audio_config, audio_file_path): query_input = dialogflow.QueryInput(audio_config=audio_config) # The first request contains the configuration. yield dialogflow.StreamingDetectIntentRequest(session=session_path, query_input=query_input) # Here we are reading small chunks of audio data from a local # audio file. In practice these chunks should come from # an audio input device. with open(audio_file_path, 'rb') as audio_file: while True: chunk = audio_file.read(4096) if not chunk: break # The later requests contains audio data. yield dialogflow.StreamingDetectIntentRequest( input_audio=chunk) audio_config = dialogflow.InputAudioConfig( audio_encoding=audio_encoding, language_code=language_code, sample_rate_hertz=sample_rate_hertz) requests = request_generator(audio_config, audio_file_path) responses = session_client.streaming_detect_intent(requests=requests) print('=' * 20) for response in responses: print('Intermediate transcript: "{}".'.format( response.recognition_result.transcript)) # Note: The result from the last response is the final transcript along # with the detected content. query_result = response.query_result print('=' * 20) print('Query text: {}'.format(query_result.query_text)) print('Detected intent: {} (confidence: {})\n'.format( query_result.intent.display_name, query_result.intent_detection_confidence)) print('Fulfillment text: {}\n'.format(query_result.fulfillment_text))
def detect_intent_audio(project_id, session_id, audio_file_path, language_code): """Returns the result of detect intent with an audio file as input. Using the same `session_id` between requests allows continuation of the conversation.""" from google.cloud import dialogflow session_client = dialogflow.SessionsClient() # Note: hard coding audio_encoding and sample_rate_hertz for simplicity. audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16 sample_rate_hertz = 16000 session = session_client.session_path(project_id, session_id) print("Session path: {}\n".format(session)) with open(audio_file_path, "rb") as audio_file: input_audio = audio_file.read() audio_config = dialogflow.InputAudioConfig( audio_encoding=audio_encoding, language_code=language_code, sample_rate_hertz=sample_rate_hertz, ) query_input = dialogflow.QueryInput(audio_config=audio_config) request = dialogflow.DetectIntentRequest( session=session, query_input=query_input, input_audio=input_audio, ) response = session_client.detect_intent(request=request) print("=" * 20) print("Query text: {}".format(response.query_result.query_text)) print("Detected intent: {} (confidence: {})\n".format( response.query_result.intent.display_name, response.query_result.intent_detection_confidence, )) print("Fulfillment text: {}\n".format( response.query_result.fulfillment_text))
def runDetectIntent(self, session_id): print("run detect intent") self._isIntentDetect = True session_client = dialogflow.SessionsClient() audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16 sample_rate_hertz = 16000 language_code = 'ru-RU' session_id = '{}'.format(session_id) print("session #{}".format(session_id)) session_path = session_client.session_path(self._project_id, session_id) print('Session path: {}\n'.format(session_path)) def _audio_callback_intent(in_data, frame_count, time_info, status): if not self._is_playing: self._buff.put(in_data) return None, pyaudio.paContinue try: num_channels = 1 audio_format = pyaudio.paInt16 frame_length = 4096 audio_config = dialogflow.InputAudioConfig( audio_encoding=audio_encoding, language_code=language_code, sample_rate_hertz=sample_rate_hertz) self._pa = pyaudio.PyAudio() self._audio_stream = self._pa.open( rate=sample_rate_hertz, channels=num_channels, format=audio_format, input=True, frames_per_buffer=frame_length, input_device_index=self._input_device_index, stream_callback=_audio_callback_intent) self._audio_stream.start_stream() print("Waiting for command ...\n") def request_generator(audio_config): query_input = dialogflow.QueryInput(audio_config=audio_config) output_audio_config = dialogflow.OutputAudioConfig( audio_encoding=dialogflow.OutputAudioEncoding. OUTPUT_AUDIO_ENCODING_LINEAR_16) # The first request contains the configuration. yield dialogflow.StreamingDetectIntentRequest( session=session_path, query_input=query_input, single_utterance=True, output_audio_config=output_audio_config) while True: chunk = self._buff.get() if chunk is None: print("chunk none") return if not self._isIntentDetect: print("done intent") return yield dialogflow.StreamingDetectIntentRequest( input_audio=chunk) requests = request_generator(audio_config) responses = session_client.streaming_detect_intent(requests) print('=' * 20) self._isEndConversation = True for response in responses: self.handleDialogflowResponse(response) self.stopDetectIntent() if self._isEndConversation: print('end conversation') print("send mqtt end detectinetnt event") self._mqtt.publish(self._endDetectIntentEventTopic, "1") self.runDetectHotword() else: self.playSound(self._wake_sound_file) print('conversation continue') self.runDetectIntent(session_id) except KeyboardInterrupt: print('stopping ...') finally: if self._audio_stream is not None: self._audio_stream.stop_stream() self._audio_stream.close() # delete Porcupine last to avoid segfault in callback. if self._porcupine is not None: self._porcupine.delete()
def __init__(self): rospy.init_node('dialogflow_node') self.project_id = "folke-jkih" self.session_id = str(uuid.uuid4()) self.language = rospy.get_param('~default_language', 'sv') self.disable_audio = rospy.get_param('~disable_audio', False) time_before_start = rospy.get_param('~time_before_start', 0.8) self.save_audio_requests = rospy.get_param('~save_audio_requests', True) self.session_client = dialogflow.SessionsClient() self.query_params = dialogflow.QueryParameters( geo_location=latlng_pb2.LatLng(latitude=58.4106611, longitude=15.6198244), contexts=[ dialogflow.Context(lifespan_count=100, name="projects/" + self.project_id + "/agent/sessions/" + self.session_id + "/contexts/linkoping") ]) self.audio_chunk_queue = deque( maxlen=int(time_before_start * 31.25) ) # 16000/512 = 31.25, # Times 7.8 since the data is sent in 7.8Hz (16000 / 2048) # Note: hard coding audio_encoding and sample_rate_hertz for simplicity. audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16 sample_rate_hertz = 16000 self.audio_config = dialogflow.InputAudioConfig( audio_encoding=audio_encoding, language_code=self.language, sample_rate_hertz=sample_rate_hertz, single_utterance=True) self.query_result_pub = rospy.Publisher('response', Response, queue_size=2) self.query_text_pub = rospy.Publisher('query_text', String, queue_size=2) self.transcript_pub = rospy.Publisher('transcript', String, queue_size=2) self.fulfillment_pub = rospy.Publisher('fulfillment_text', String, queue_size=10) self.is_listening_pub = rospy.Publisher('is_listening', Bool, queue_size=2, latch=True) self.is_waiting_for_hot_word = rospy.Publisher('waiting_for_hot_word', Bool, queue_size=2, latch=True) self.volume = 0 self.is_talking = False self.is_in_dialog = False self.detected_wake_word = False self.head_visible = False self.waiting_for_wake_word = False self.cancel_stream_intent = False self.skip_audio = False rospy.wait_for_service('/qt_robot/audio/play') self.audio_play_srv = rospy.ServiceProxy('/qt_robot/audio/play', audio_play) rospy.wait_for_service('/qt_robot/speech/config') self.speech_config_srv = rospy.ServiceProxy('/qt_robot/speech/config', speech_config) rospy.Subscriber('text', String, self.text_callback) rospy.Subscriber('is_talking', Bool, self.is_talking_callback) rospy.Subscriber('event', Event, self.event_callback) rospy.Subscriber('head_visible', Bool, self.head_visible_callback) rospy.Subscriber('detected_wake_word', String, self.detected_wake_word_callback) rospy.Subscriber('end_of_conversation', EmptyMsg, self.end_of_conversation_callback) if not self.disable_audio: rospy.Subscriber('sound', AudioData, self.audio_callback) rospy.Subscriber('volume', UInt16, self.volume_callback) self.list_intents_sevice = rospy.Service('list_intents', Empty, self.handle_list_intents) self.list_context_sevice = rospy.Service('list_context', Empty, self.handle_list_context) self.list_context_sevice = rospy.Service('clear_context', Empty, self.handle_clear_context)