def start_node(task_queue): engine = pyttsx3.init() try: while True: task = task_queue.get() if task is None: break else: message = None if task[0] == "overview": objects = task[1] if len(objects) == 0: message = "Nothing is over here" else: names = ", ".join(obj["classname"] for obj in objects) message = "There are %s around you, sir." % names elif task[0] == "find": target = task[1] objects = task[2] found = False for obj in objects: className, rect, m = obj["classname"], obj[ "rect"], obj["distance"] / 1000 if className in target: direction = "left" if ( rect[0] + rect[2]) / 2.0 < 320.0 else "right" message = "%s is about %.1f meters in front of your %s." % ( target, round(m, 1), direction) found = True break if not found: message = "I cannot find %s" % target elif task[0] == "location": name = task[1] message = "Sir, you're currently located at %s" % name elif task[0] == 'bing': play_audio.play('res/bing.wav') else: message = "Sir, it seems something wrong with me!" if message is not None: engine.say(message) engine.runAndWait() except KeyboardInterrupt: print("Shutdown speech worker ...") finally: # Say goodbye engine.say("Goodbye") engine.runAndWait()
def set_timer(settime): mins = 0 timer = settime while mins != timer: time.sleep(60) mins += 1 # When time is up, the buzzer will sound digitalWrite(buzzer,1) time.sleep(3) digitalWrite(buzzer,0) # When time is up, play "time up!" audio in user's earphone name = texttospeech('time up') pa.play(name)
import requests from play_audio import play from record import record from stt_ailab import get_speech import utils utils.mkdir('resources') api = 'http://devopenai.topica.vn:11119/get_stt' while True: record() with open('resources/question.wav', 'rb') as f: audio = f.read() r = requests.post(api, files={'file': audio}) try: answer = unicode(r.content) except: answer = unicode(r.content, encoding='utf-8') print(u'answer: %s' % (answer)) get_speech(answer) play()
import play_audio play_audio.play('res/moss.wav')
#!/usr/bin/env python3 # -*- coding: utf-8 -*- __author__ = 'ipetrash' if __name__ == '__main__': import sys if len(sys.argv) > 1: import play_audio play_audio.play(sys.argv[1]) else: import os file_name = os.path.basename(sys.argv[0]) print('usage: {} [-h] audio_file_name'.format(file_name))
def assistant(): name = texttospeech('how can I help you') # Play audio "How can I help you?" in user's earphone pa.play(name) # Let the user record his/her input voice filename = ra.record() # Recognize and translate the user's input speech to text using IBM Bluemix NLP service speech2=speechtotext(filename) # speech2[0] returns the confidence of the text # speech2[1] returns the text content # We assume that when confidence > 0.25, the user records effective audio if Speech2[0] > 0.25: # i\If the user wants to set timer if "time" in speech2[1]: texttospeech('how many minutes') pa.play('output.wav') filename = ra.record() speech2=speechtotext(filename) if speech2[0] > 0.25: settime = int(speech2[1]) set_timer(settime) else: texttospeech('Sorry I do not understand') pa.play('output.wav') # If the user wants to check the doneness of the food elif 'check' in speech2[1]: doneness = pm.well_done() texttospeech(doneness) pa.play('output.wav') # If the user wants to recognize the food elif 'recognize' in speech2[1]: tmp = 'The labels for this object are' label_list = pm.what_is_it() for label in label_list: tmp = tmp + str(label) texttospeech(tmp) pa.play('output.wav') # We ignore all other kinds of input speech else: texttospeech('Sorry I do not understand') pa.play('output.wav') # The confidence value is too low, we ignore this input else: texttospeech('Sorry I do not understand') pa.play('output.wav') time.sleep(30)
def start_node(task_queue, objdetect_tasks, nav_tasks, text2speech_tasks): """ Creates an input audio stream, initializes wake word detection (Porcupine) object, and monitors the audio stream for occurrences of the wake word(s). It prints the time of detection for each occurrence and index of wake word. """ num_keywords = len(keyword_file_paths) keyword_names =\ [os.path.basename(x).replace('.ppn', '').replace('_compressed', '').split('_')[0] for x in keyword_file_paths] print('Listening for:') for keyword_name, sensitivity in zip(keyword_names, sensitivities): print('- %s (sensitivity: %f)' % (keyword_name, sensitivity)) porcupine = None pa = None audio_stream = None try: porcupine = Porcupine(library_path=library_path, model_file_path=model_file_path, keyword_file_paths=keyword_file_paths, sensitivities=sensitivities) pa = pyaudio.PyAudio() audio_stream = pa.open(rate=porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=porcupine.frame_length, input_device_index=input_audio_device_index) speech_key, service_region = "b1b54e5bcd8943f0b8106e000e1298d7", "eastasia" speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region) # Creates a recognizer with the given settings speech_recognizer = speechsdk.SpeechRecognizer( speech_config=speech_config) play_audio.play('res/moss.wav') while True: try: task = task_queue.get_nowait() if task is None: break except queue.Empty: pass pcm = audio_stream.read(porcupine.frame_length) pcm = struct.unpack_from("h" * porcupine.frame_length, pcm) recorded_frames = [] if output_path is not None: recorded_frames.append(pcm) result = porcupine.process(pcm) if num_keywords == 1 and result: print('[%s] detected keyword' % str(datetime.now())) # Creates an instance of a speech config with specified subscription key and service region. # Replace with your own subscription key and service region (e.g., "westus"). print("Any instruction, sir?") text2speech_tasks.put(('bing', )) # Starts speech recognition, and returns after a single utterance is recognized. The end of a # single utterance is determined by listening for silence at the end or until a maximum of 15 # seconds of audio is processed. The task returns the recognition text as result. # Note: Since recognize_once() returns only a single utterance, it is suitable only for single # shot recognition like command or query. # For long-running multi-utterance recognition, use start_continuous_recognition() instead. result = speech_recognizer.recognize_once() # Checks result. if result.reason == speechsdk.ResultReason.RecognizedSpeech: print("Recognized: {}".format(result.text)) text = result.text.lower() punc = '!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' for c in punc: text = text.replace(c, ' ') # Remove all punctuation tokens = text.split() for i, kws in enumerate(CMD_KWS): matched = 0 for token in tokens: if token in kws: matched += 1 if matched == len(kws): if i == 0: objdetect_tasks.put(('overview', )) elif i == 1: objdetect_tasks.put(('find', tokens[-1])) elif i == 2: nav_tasks.put(('location', )) elif result.reason == speechsdk.ResultReason.NoMatch: print("No speech could be recognized: {}".format( result.no_match_details)) elif result.reason == speechsdk.ResultReason.Canceled: cancellation_details = result.cancellation_details print("Speech Recognition canceled: {}".format( cancellation_details.reason)) if cancellation_details.reason == speechsdk.CancellationReason.Error: print("Error details: {}".format( cancellation_details.error_details)) elif num_keywords > 1 and result >= 0: # Multi-keywords detected # Not used pass except KeyboardInterrupt: print("Shutdown nlp worker ...") finally: if porcupine is not None: porcupine.delete() if audio_stream is not None: audio_stream.close() if pa is not None: pa.terminate() if output_path is not None and len(recorded_frames) > 0: recorded_audio = np.concatenate(recorded_frames, axis=0).astype(np.int16) soundfile.write(output_path, recorded_audio, samplerate=porcupine.sample_rate, subtype='PCM_16')