class process_sentence_node(): # listens to /averaged_markers from object_detection package and in parallel to language input # (from microphone or from keyboard). For each received sentence, a program template and grounded program is created # via communication with database def __init__(self): rospy.init_node('process_sentence_node', anonymous=False) logging.config.fileConfig(get_full_path('config', 'logging.ini')) self._getdatsrv = rospy.ServiceProxy('/db_interface_node/getDatabase', GetDatabase) req = GetDatabaseRequest() req.write = False res = self._getdatsrv.call(req) self.sub = rospy.Subscriber('/nl_input', SentenceProgram, queue_size=10, callback=self.callback) self.pub = rospy.Publisher('/sentence_output', SentenceProgram, queue_size=10) self._addsrv = rospy.ServiceProxy('/db_interface_node/addObject', AddObject) self._senddatsrv = rospy.ServiceProxy( '/db_interface_node/sendDatabase', SendDatabase) self.db_api = DatabaseAPI() self.db = self.db_api.get_db() self.ontoC = self.db.change_onto(res.path) #obj = self.db.onto.search(type = db.onto.Cube) #obj2 = self.ontoC.search(type = db.onto.Cube) #obj3 = self.db.onto.search(type = db.onto.Cube) #obj4 = self.ontoC.search(type = db.onto.Cube) #path = '/home/algernon/ros_melodic_ws/base_ws/src/crow_nlp/scripts/saved_updated_onto.owl' self.UngroundedID = 1 self.GroundedID = 1 def callback(self, data): input_sentences = data.data self.process_sentences(input_sentences) return def process_sentences(self, input_sentences): # populate the workspace #TODO should be replaced by input from a realsense self.add_object('Glue', x=0.2, y=0.4, z=0, color='black') self.add_object('Panel', x=0.2, y=0.1, z=0, color='red') self.add_object('Panel', x=0.2, y=-0.2, z=0, color='blue') self.add_object('Cube', x=0.1, y=0.3, z=0, id='0', color='red') self.add_object('Cube', x=0.1, y=0.2, z=0, id='1', color='red') self.add_object('Cube', x=0.2, y=0.2, z=0, id='2', color='green') # get current database after adding all objects self.get_database(write=False) # just to see if objects were added to the database obj = self.db.onto.search(type=db.onto.Cube) print(obj) obj = self.db.onto.search(type=db.onto.Glue) print(obj) obj = self.db.onto.search(type=db.onto.Panel) print(obj) for input_sentence in input_sentences: self.get_database(write=True) #self.ontoC = self.db.onto.__enter__() nl_processor = NLProcessor() program_template = nl_processor.process_text(input_sentence) # get current database state for writing an ungrounded and currently grounded program to be executed print() print("Program Template") print("--------") print(program_template) if self.db_api.get_state() == State.DEFAULT: # self.save_unground_program(program_template) self.send_database() self.get_database(write=True) # self.ontoC = self.db.onto.__enter__() robot_program = self.run_program(program_template) if self.db_api.get_state() == State.DEFAULT: self.save_grounded_program(robot_program) self.send_database() elif self.db_api.get_state() != State.LEARN_FROM_INSTRUCTIONS: self.db_api.set_state(State.DEFAULT) self.send_database() elif self.db_api.get_state() == State.LEARN_FROM_INSTRUCTIONS: self.save_new_template(program_template) self.send_database() # print list of programs self.get_database(write=False) all_custom_templates = self.db_api.get_custom_templates() for custom_template in all_custom_templates: print(custom_template.name[1:]) all_programs = self.db.onto.search(type=self.db.onto.RobotProgram) path = os.path.dirname( os.path.abspath(__file__)) + '/saved_updated_onto.owl' for program in all_programs: print(program.name) return def act(self, program_template): state = self.db_api.get_state() if state == State.DEFAULT: self.run_program(program_template) elif state == State.LEARN_FROM_INSTRUCTIONS: self.db_api.add_custom_template(program_template) self.db_api.set_state(State.DEFAULT) def run_program(self, program_template): program_runner = ProgramRunner() robot_program = program_runner.evaluate(program_template) print() print("Grounded Program") print("--------") print(robot_program) return robot_program def save_new_template(self, program_template): self.db_api.add_custom_template(program_template) self.db_api.set_state(State.DEFAULT) return def save_unground_program(self, program_template): # save to database and when database with the program sent, # we sent a message that the ground program was written to # database and new database sent # TODO add parameter time to the added program # TODO add parameter to be done to the added program # TODO search ontology for last program id name = 'ungrounded_' + str(self.UngroundedID) self.UngroundedID = self.UngroundedID + 1 self.db_api.save_program(program_template, name) return def save_grounded_program(self, ground_program): # save to database and when database with the program sent, # we sent a message that the ground program was written to # database and new database sent # TODO search ontology for last program id # TODO link the corresponding ungrounded program in grounded one or vice versa # TODO add parameter time to the added program name = 'grounded_' + str(self.GroundedID) self.GroundedID = self.GroundedID + 1 self.db_api.save_program(ground_program, name) return def get_database(self, write): req = GetDatabaseRequest() req.write = write res = self._getdatsrv.call(req) self.ontoC = self.db.change_onto(res.path) obj = self.ontoC.search(type=self.ontoC.Cube) if write: self.ontoC = self.db.onto.__enter__() return def send_database(self): req = SendDatabaseRequest() self.ontoC.__exit__() path = os.path.dirname( os.path.abspath(__file__)) + '/saved_updated_onto.owl' self.ontoC.save(path) # rospy.sleep(1.0) req.path = path res = self._senddatsrv.call(req) print(res.received.msg) print(res.received.success) # self.db.onto.__exit__() return def add_object(self, type, x, y, z, id=None, color=None): req = AddObjectRequest() req.obj_class = type req.x = x req.y = y req.z = z if id: req.id = id if color: req.color = color req.action.action = DBOperation.ADD res = self._addsrv.call(req) assert isinstance(res, AddObjectResponse) return
class SpeechProcessor(): WRONG_INPUT_STATE = 0 SEND_ERRORS = False # whether to send error to OPC server or just show them in console USE_ACTIVE_STATE = False STRIP_ACCENTS = True # whether to remove accents before comparing recognized text and possible choices ALLOW_INTERRUPTIONS = False # whether the user can interupt TTS playback MICROPHONE_WAIT_TIMEOUT = 5 # time to wait for any non-zero audio from mic LISTENING_START_TIMEOUT = 5 # time to start speaking after recognition is ran PHRASE_TIMEOUT = 4 # maximum length of phrase before listening is cut off CALIBRATION_TIME = 1 # time to spend calibrating the microphone bgr energy levels def __init__(self, serverAdress, gain: int = 1.0): # instantiate client object; make sure address and port are correct self.DEBUG_mode = False self.client = Client(serverAdress) self.shouldProcessInput = True self.LANG = 'en' self.recognizer = sr.Recognizer() self.fast_recognizer = sr.Recognizer() self.fast_recognizer.non_speaking_duration = 0.05 self.fast_recognizer.pause_threshold = 0.15 self.recText = RawNLParser(language=self.LANG) self.lastRecognitionFailed = False self.repeatChoices = False self.last_key = '' self.keys_pressed = set() if self.DEBUG_mode: self.keys_pressed = 's' self.ADD_OBJECTS = 1 self.kb_listener = Listener(on_press=self.on_press) self.kb_listener.start() self.play_sound("start_speech") """ SoX Effects http://sox.sourceforge.net/sox.html#EFFECTS gain -n normalize the audio to 0dB -b balance the audio, try to prevent clipping dither adds noise to mask low sampling rate vol changes volume: above 1 -> increase volume below 1 -> decrease volume pad adds silence to the begining (first argument) and the end (second argument) of the audio - attempt to mask sound cut-off but extends the audio duration """ self.sox_effects = ("gain", "-n", "-b", "vol", str(gain), "pad", "0", "0.5", "dither", "-a") SpeechCustom.MAX_SEGMENT_SIZE = 300 Speech.MAX_SEGMENT_SIZE = 300 self.hint_directive_re = re.compile(r"<([b-])>\s*$") rospy.init_node('process_sentence_node', anonymous=False) logging.config.fileConfig(get_full_path('config', 'logging.ini')) self._getdatsrv = rospy.ServiceProxy('/db_interface_node/getDatabase', GetDatabase) req = GetDatabaseRequest() req.write = False res = self._getdatsrv.call(req) self.sub = rospy.Subscriber('/nl_input', SentenceProgram, queue_size=10, callback=self.callback) self.pub = rospy.Publisher('/sentence_output', SentenceProgram, queue_size=10) self._addsrv = rospy.ServiceProxy('/db_interface_node/addObject', AddObject) self._senddatsrv = rospy.ServiceProxy( '/db_interface_node/sendDatabase', SendDatabase) self.db_api = DatabaseAPI() self.db = self.db_api.get_db() self.ontoC = self.db.change_onto(res.path) # obj = self.db.onto.search(type = db.onto.Cube) # obj2 = self.ontoC.search(type = db.onto.Cube) # obj3 = self.db.onto.search(type = db.onto.Cube) # obj4 = self.ontoC.search(type = db.onto.Cube) # path = '/home/algernon/ros_melodic_ws/base_ws/src/crow_nlp/scripts/saved_updated_onto.owl' self.UngroundedID = 1 self.GroundedID = 1 def on_press(self, key): self.last_key = key if isinstance(key, KeyCode): self.keys_pressed.add(key.char) print('{0} pressed'.format(key)) def connect(self): self.client.connect() # connect to server root = self.client.nodes.root # get the root entity print("Connected to server and successfully retrieved the root.") # Retreive some objects and variables dataObj = root.get_child(["0:Objects", "4:DATA"]) # a horse horse = dataObj.get_child("4:horse") # mic_active is the "You can now try recognizing speech" variable self.mic_active = horse.get_child("4:request") # the string where responses (errors and such) should be sent self.response_string = horse.get_child("4:response") # the state change request variable self.request = horse.get_child(["4:next_state_choice", "4:request"]) # self.request.set_value(True) # the number of the state to be changed to self.request_state_num = horse.get_child( ["4:next_state_choice", "4:state_num"]) # the variable with next state choices self.next_state_possibilities = horse.get_child( "4:next_state_possibilities") # self.client.load_type_definitions() # the variable with the current state self.actual_state_number = horse.get_child( ["4:actual_state", "4:number"]) self.sub = self.client.create_subscription(100, self) # create subscription self.handle = self.sub.subscribe_data_change(self.mic_active) def _send_state(self, state_num): """ Changes state to the specified number and sets "request" to True. """ dt = ua.DataValue(ua.Variant(state_num, ua.VariantType.Int16)) self.request_state_num.set_value(dt) self.request.set_value(True) def __extract_directive(self, string): result = self.hint_directive_re.split(string) if len(result) > 1: return result[0], result[1] else: return result[0], "" def init(self, configure_deivce=False): self.load_files() print("Audio input devices on this system:\n\tindex\tname") default_device_idx = sd.default.device[0] #default_device_idx = 12 device_list = sr.Microphone.list_microphone_names() for index, name in enumerate(device_list): if index == default_device_idx: d = ">\t" add = "(CURRENT)" else: d = "\t" add = "" print(f"{d}{index}:\t\t{name} {add}") self.microphone_index = 12 #default_device_idx if type(configure_deivce) is bool and configure_deivce: if INPUTIMEOUT_IMPORT_SUCCESS: timout = 10 try: self.microphone_index = int( inputimeout( prompt= f"Select device to use within {timout} seconds (enter to select the default device)\nDefault device '{default_device_idx}: {device_list[default_device_idx]}': ", timeout=timout) or default_device_idx) except TimeoutOccurred: print("Selecting the default device.") else: self.microphone_index = int( input( prompt= f"Select device to use (enter to select the default device)\nDefault device '{default_device_idx}: {device_list[default_device_idx]}': " ) or default_device_idx) if self.microphone_index != default_device_idx: print( f"Selected device with index {self.microphone_index}: {device_list[self.microphone_index]}." ) else: print("Selected the default device.") elif type(configure_deivce) is int: self.microphone_index = configure_deivce print( f"Selected device with index {self.microphone_index}: {device_list[self.microphone_index]} from command line." ) self.stream_device = [self.microphone_index, sd.default.device[1]] print( "Calibrating microphone. Make sure it is turned on and that no one is speaking." ) self._notbreak = True with sr.Microphone(device_index=self.microphone_index) as source: # wait for a second to let the recognizer adjust the # energy threshold based on the surrounding noise level self.recognizer.adjust_for_ambient_noise( source, duration=self.CALIBRATION_TIME) self.fast_recognizer.energy_threshold = self.recognizer.energy_threshold # self.recognizer.dynamic_energy_threshold = True print("Calibration done.") self.print_message("Ready.") def say(self, req, say=True, screen=True): """ produce a text via different output methods. self.say('tell me more.') :param req: text to output. :return: """ if screen: print(req) if say: # make request to google to get synthesis tts = gtts.gTTS(req, lang=self.LANG) # save the audio file tts.save("say.mp3") # play the audio file playsound("say.mp3") # proc = Popen(['spd-say', req]) # os.system(f'spd-say "{req}"') def listen(self): success = False recog_text_original = "" try: with sr.Microphone(device_index=self.microphone_index) as source: print("You may say Something") # self.play_sound("start_speech") # self.play_message("Řekněte, jakou možnost si přejete ..") # listens for the user's input audio = self.recognizer.listen( source, timeout=self.LISTENING_START_TIMEOUT, phrase_time_limit=self.PHRASE_TIMEOUT) # self.play_sound("end_speech") print("speech heard, processing...") # for testing purposes, we're just using the default API key # to use another API key, use `self.recognizer.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # speech recognition recog_text_original = self.recognizer.recognize_google( audio, language=self.LANG) print(recog_text_original) success = True except sr.UnknownValueError as e: # self.print_message(self.create_response("did_not_understand", locals()), block=True) print(f"Did not understand: {e}") except sr.WaitTimeoutError: # self.print_message(self.create_response("no_speech", locals())) print(f"No speech") else: # self.print_message(self.create_response("speech_recognition_result", locals())) print(f"I recognized text: {recog_text_original}") if success: return recog_text_original else: return "" def text_preprocessing(self, text_raw): # if success or (self.ALLOW_INTERRUPTIONS and recog_text_original): # processing recognized text recog_text = self.recText.replace_synonyms(text_raw) # self.sentences.append(recog_text) # rospy.sleep(1.0) msg = SentenceProgram() # msg.header.stamp = rospy.Time.now() msg.data.append(recog_text) # print("Publishing: {}".format(msg.data)) # self.pub.publish(msg) # self.whole = False # self.sentences = [] # print("Recognized text after synonyms substitution: ", recog_text) # load possible variants for next states # process recorded text using parser, tagging #tagged_text = self.get_tagged_text(recog_text) # TODO: more intelligent way of catching this return msg def add_object(self, type, x, y, z, id=None, color=None): req = AddObjectRequest() req.obj_class = type req.x = x req.y = y req.z = z if id: req.id = id if color: req.color = color req.action.action = DBOperation.ADD res = self._addsrv.call(req) assert isinstance(res, AddObjectResponse) return def get_database(self, write): req = GetDatabaseRequest() req.write = write res = self._getdatsrv.call(req) self.ontoC = self.db.change_onto(res.path) obj = self.ontoC.search(type=self.ontoC.Cube) if write: self.ontoC = self.db.onto.__enter__() return def send_database(self): req = SendDatabaseRequest() self.ontoC.__exit__() path = os.path.dirname( os.path.abspath(__file__)) + '/saved_updated_onto.owl' self.ontoC.save(path) # rospy.sleep(1.0) req.path = path res = self._senddatsrv.call(req) print(res.received.msg) print(res.received.success) # self.db.onto.__exit__() return def process_sentences(self, input_sentences): # populate the workspace #TODO should be replaced by input from a realsense if self.ADD_OBJECTS == 1: self.add_object('Glue', x=0.2, y=0.4, z=0, color='black') self.add_object('Panel', x=0.2, y=0.1, z=0, color='red') self.add_object('Panel', x=0.2, y=-0.2, z=0, color='blue') self.add_object('Cube', x=0.1, y=0.3, z=0, id='0', color='red') self.add_object('Cube', x=0.1, y=0.2, z=0, id='1', color='red') self.add_object('Cube', x=0.2, y=0.2, z=0, id='2', color='green') self.ADD_OBJECTS = 0 # self.add_object(onto.Screwdriver, x=0.1, y=0.3, z=0, id='0', color='red') # self.add_object('Screwdriver', x=0.1, y=0.2, z=0, id='1', color='red') # self.add_object('Screwdriver', x=0.2, y=0.2, z=0, id='2', color='green') # get current database after adding all objects self.get_database(write=False) # just to see if objects were added to the database obj = self.db.onto.search(type=db.onto.Cube) print(obj) obj = self.db.onto.search(type=db.onto.Glue) print(obj) obj = self.db.onto.search(type=db.onto.Panel) print(obj) for input_sentence in input_sentences: self.get_database(write=True) #self.ontoC = self.db.onto.__enter__() nl_processor = NLProcessor(language=self.LANG) program_template = nl_processor.process_text(input_sentence) # get current database state for writing an ungrounded and currently grounded program to be executed print() print("Program Template") print("--------") print(program_template) if self.db_api.get_state() == State.DEFAULT: # self.save_unground_program(program_template) self.send_database() self.get_database(write=True) # self.ontoC = self.db.onto.__enter__() robot_program = self.run_program(program_template) if self.db_api.get_state() == State.DEFAULT: self.save_grounded_program(robot_program) self.send_database() elif self.db_api.get_state() != State.LEARN_FROM_INSTRUCTIONS: self.db_api.set_state(State.DEFAULT) self.send_database() elif self.db_api.get_state() == State.LEARN_FROM_INSTRUCTIONS: self.save_new_template(program_template) self.send_database() # print list of programs self.get_database(write=False) all_custom_templates = self.db_api.get_custom_templates() for custom_template in all_custom_templates: print(custom_template.name[1:]) all_programs = self.db.onto.search(type=self.db.onto.RobotProgram) path = os.path.dirname( os.path.abspath(__file__)) + '/saved_updated_onto.owl' for program in all_programs: print(program.name) return def save_grounded_program(self, ground_program): # save to database and when database with the program sent, # we sent a message that the ground program was written to # database and new database sent # TODO search ontology for last program id # TODO link the corresponding ungrounded program in grounded one or vice versa # TODO add parameter time to the added program name = 'grounded_' + str(self.GroundedID) self.GroundedID = self.GroundedID + 1 self.db_api.save_program(ground_program, name) return def save_new_template(self, program_template): self.db_api.add_custom_template(program_template) self.db_api.set_state(State.DEFAULT) return def run_program(self, program_template): program_runner = ProgramRunner(language=self.LANG) robot_program = program_runner.evaluate(program_template) print() print("Grounded Program") print("--------") print(robot_program) return robot_program def callback(self, data): input_sentences = data.data self.process_sentences(input_sentences) return def run(self): """ The main function of this function is to hold the code execution. """ # embed() # this just pauses the code be entering IPython console, type "quit()" to quit or press ctrl+D # say "Hello World" # self.play_message("Slyšela jsem, že chceš postavit koně, teď se bude stavět.") while True: # key = self._readInput() # print(self._decode_states(self.next_state_possibilities.get_value())) # print(self.actual_state_number.get_value()) # print(self.mic_active.get_value()) # if key == "q": if 'q' in self.keys_pressed: self.print_message("User requested termination.") break elif 's' in self.keys_pressed: ui = UserInputManager(language=self.LANG) ui.query_state("shit on the string") print('detecting robot programs using ontology.') if not self.DEBUG_mode: self.say(self.guidance_file[self.LANG]["start_template"]) self.say(self.guidance_file[self.LANG]["start_specify"]) text = self.listen() if self.DEBUG_mode: if self.LANG == 'cs': # sentence_program.data = ["Polož kostka na pozici 3 3"] text = "Nalep bod sem a polož kostku sem." #text = "Ukliď červenou kostku." if self.LANG == 'en': text = "Glue a point here and Put cube to position 0 0" text = "Tidy up red cube." self.say(self.guidance_file[self.LANG]["debug_text"] + text) if text == "": self.play_message(self.create_response( "did_not_understand", locals()), block=True) self.print_message( self.create_response("no_speech", locals())) continue #raise NotImplementedError('What should happen when no text was recognized??') else: sentence_program = self.text_preprocessing(text) print(sentence_program) self.process_sentences(sentence_program.data) self.keys_pressed = set() print('processing complete') elif self.shouldProcessInput: # set mic_active back to False -> we started processing the input self.mic_active.set_value(False) # retrieve current and possible states # list of next state possibilities possible_states = self._decode_states( self.next_state_possibilities.get_value()) current_state = self.actual_state_number.get_value() # print(f"We are currently in state {current_state_text}.") #" and the possible choices are {text_possible_states}.") # self.play_message(f"Nyní jsme ve stavu {current_state_text}.") # možnosti k výběru jsou {text_possible_states}.") next_state = self.processInput( current_state, possible_states) # the chosen next state if next_state is not None and next_state > 0: try: # send state change request self._send_state(next_state) except Exception: print( "There was an error while sending the chosen next state. The error message was:" ) trace_exception() else: self.shouldProcessInput = False self.lastRecognitionFailed = False else: self.lastRecognitionFailed = True # recognition failed, attempt again on the next cycle continue def processInput(self, current_state, possible_states): """ This function should listen for audio and process the speech. It will be called only if user might enter an input. """ next_state_ID = -1 recog_text_original = "" [variants, possible_states, current_state_text] = self.get_variants(current_state, possible_states) if current_state_text == "": # no state hint, assuming passthrough state print("no state hint. Going to next state.") return possible_states[0] current_state_text, hint_directive = self.__extract_directive( current_state_text) if hint_directive == "-": # a passthrough state self.play_message(self.create_response("current_state", locals()), display=True) # automatically advance to the next state return possible_states[0] if not self.lastRecognitionFailed or self.repeatChoices: start_message = self.create_response("current_state", locals()) if not self.lastRecognitionFailed: if len(possible_states) > 1: start_message += self.create_response( "which_choice", locals()) elif hint_directive == "b": start_message += self.create_response( "yes_or_no", locals()) if self.ALLOW_INTERRUPTIONS: interrupted_text = self.play_message_and_listen(start_message, display=True) recog_text_original = interrupted_text + " " else: self.play_message(start_message, display=True) self.repeatChoices = False # breaking when above zero # if not self.block_until_sound(): # return -1 # speech to text from microphone success = False try: with sr.Microphone(device_index=self.microphone_index) as source: print("You may say Something") self.play_sound("start_speech") # self.play_message("Řekněte, jakou možnost si přejete ..") # listens for the user's input audio = self.recognizer.listen( source, timeout=self.LISTENING_START_TIMEOUT, phrase_time_limit=self.PHRASE_TIMEOUT) self.play_sound("end_speech") print("speech heard, processing...") # for testing purposes, we're just using the default API key # to use another API key, use `self.recognizer.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` # speech recognition recog_text_original += self.recognizer.recognize_google( audio, language=self.LANG) # recog_text_original = 'velky velikost' except sr.UnknownValueError: self.play_message(self.create_response("did_not_understand", locals()), block=True) except sr.WaitTimeoutError: self.print_message(self.create_response("no_speech", locals())) else: self.print_message( self.create_response("speech_recognition_result", locals())) success = True if success or (self.ALLOW_INTERRUPTIONS and recog_text_original): # processing recognized text recog_text = self.recText.replace_synonyms(recog_text_original) # print("Recognized text after synonyms substitution: ", recog_text) # load possible variants for next states # process recorded text using parser, tagging tagged_text = self.get_tagged_text(recog_text) # TODO: more intelligent way of catching this #if ["moznosti", "možnost", "volby", "volba", "výběr"] in tagged_text: if "CHOICE_option" in tagged_text: self.repeatChoices = True return if len(variants) == 0: # TODO if nothing matching... print("No choices for this state. Is this an error?") return # select variant for the given actual state from possible next states if hint_directive == "b": #yes, no = [choice in tagged_text for choice in [ # ["ano", "muze", "jo", "preji", "chci","jasně","jasne","podej","dej","můžeš", "chtěla","můžete","muzete","muzes","podejte"], # ["ne", "nemuze","nechci","nedavej"] #]] #TODO yes is checked before no. What to do if both words? yes = "YES_option" in tagged_text no = "NO_option" in tagged_text # yes, no = [choice in tagged_text for choice in [ # self.synonyms_file[self.LANG]["YES_option"], # self.synonyms_file[self.LANG]["NO_option"] # ]] if yes and no: self.play_message( self.create_response("makes_no_sense", locals())) return elif yes: self.print_message("") return possible_states[0] elif no: self.play_message( self.create_response("no_cannot_proceed", locals())) return variantID, next_state = self.select_variant(tagged_text, variants) if variantID > -1: selected_variant = variants[variantID] next_state_ID = possible_states[variantID] self.print_message( self.create_response("selected_state_full", locals())) self.play_message( self.create_response("selected_state", locals())) else: print(f"Tagged text: {tagged_text}") print(f"Variants: {variants}") self.play_message( self.create_response("unknown_choice", locals())) return next_state_ID def create_response(self, response_id, local_variables={}, language=None): if "self" not in local_variables: local_variables["self"] = self response = "" try: response = self.responses[self.LANG if language is None else language][response_id].format( **local_variables) except KeyError as e: print(f"Variable missing from locals: {e}") print(f"Locals: {local_variables}") return response def print_message(self, message, raw=True): print(message) if self.SEND_ERRORS: self.response_string.set_value(message) def play_message(self, text, display=False, block=True): """Plays a message. The message is transformed into speech using TTS. Optionally, the message can be output to the console. Parameters ---------- text : str The text to be played as speech. display : bool, optional If true, the text is also displayed in the console (to remove the need for extra function calls). The *print_message* function is used for that. By default False """ if display: self.print_message(text) self.current_speech = SpeechThread(text, self.LANG, self.sox_effects) self.current_speech.start() if block: # self.block_until_sound(threshold=1, timeout=20) # if self.ALLOW_INTERRUPTIONS: # self.current_speech.terminate() # else: # self.current_speech.join() self.current_speech.join() def play_message_and_listen(self, text, display=False): self.play_message(text, display=display, block=False) while not self.current_speech.terminated: recog_text = "" try: with sr.Microphone( device_index=self.microphone_index) as source: audio = self.fast_recognizer.listen(source, phrase_time_limit=1) recog_text = self.fast_recognizer.recognize_google( audio, language=self.LANG) except Exception: continue else: if recog_text: return recog_text return "" def play_sound(self, sound_name): #cmd = ["sox", "-q", "-t", "wav", resource_filename("speech_vr", f"sounds/{sound_name}.wav")] playsound(resource_filename("speech_vr", f"sounds/{sound_name}.wav")) # cmd = ["mplayer", resource_filename("speech_vr", f"sounds/{sound_name}.wav")] # if sys.platform.startswith("win32"): # cmd.extend(("-t", "waveaudio")) # #cmd.extend(("gain", "-n", "vol", "1")) # subprocess.Popen(cmd) def block_until_sound(self, threshold=0.1, timeout=MICROPHONE_WAIT_TIMEOUT): # waiting for stream of data from microphone self._notbreak = True start_time = time() with sd.Stream(callback=lambda indata, outdata, frames, time, status, threshold=threshold: self.__print_sound( indata, outdata, frames, time, status, threshold), device=self.stream_device): while self._notbreak: # sd.sleep(0.1) if time() - start_time > timeout: self.print_message( self.create_response("no_audio", locals())) return False return True def __print_sound(self, indata, outdata, frames, time, status, threshold): volume_norm = np.linalg.norm(indata) * 10 # print(int(volume_norm)) if volume_norm > threshold: self._notbreak = False def get_variants(self, current_state, possible_states=[]): if self.USE_ACTIVE_STATE: # only based on the active_state print("Variant list: ", self.next_states_list) print(f"Current state ID: {current_state}") possible_states = self.next_states_list[self.LANG][str( current_state)] print('Loaded possible next states:', possible_states) # or based on the next_state_possibilities variants = [ self.variants_list[self.LANG][str(option)] for option in possible_states ] # text description of the current state current_state_text = self.state_hints_list[self.LANG][str( current_state)] if str(current_state) in self.state_hints_list[ self.LANG] else "" return variants, possible_states, current_state_text def load_files(self): # root_dir = os.path.join(os.path.dirname(os.path.abspath(__file__))) # variants_file = os.path.join(root_dir, "utils", "state_description_next.json") variants_file = resource_filename( "speech_vr", os.path.join("utils", "state_description.json")) with open(variants_file, "r", encoding="utf-8") as f: self.variants_list = json.load(f) if self.USE_ACTIVE_STATE: next_states_file = resource_filename( "speech_vr", os.path.join("utils", "next_states.json")) with open(next_states_file, "r", encoding="utf-8") as f: self.next_states_list = json.load(f) state_hints_file = resource_filename( "speech_vr", os.path.join("utils", "state_hints.json")) with open(state_hints_file, "r", encoding="utf-8") as f: self.state_hints_list = json.load(f) state_hints_file = resource_filename( "speech_vr", os.path.join("utils", "responses.json")) with open(state_hints_file, "r", encoding="utf-8") as f: self.responses = json.load(f) synonyms_file = resource_filename( "speech_vr", os.path.join("utils", "synonyms.json")) with open(synonyms_file, "r", encoding="utf-8") as f: self.synonyms_file = json.load(f) guidance_file = resource_filename( "speech_vr", os.path.join("utils", "guidance_dialogue.json")) with open(guidance_file, "r", encoding="utf-8") as f: self.guidance_file = json.load(f) def select_variant(self, tagged_text, variants): sel_variant_ID = -1 selected_variant = None # if len(variants) == 1: # sel_variant_ID = 0 # selected_variant = variants[0] # print('only one next state') # else: if self.STRIP_ACCENTS: variants = [unidecode(var) for var in variants] for variant_ID in range(len(variants)): # TODO handle multiple variants detection # TODO exchange for variants[variant_ID] in tagged_text - string works same, if list, if at least one in the list, gives back true if tagged_text.contains_pos_token(variants[variant_ID], '*'): print('Detected variant:', variants[variant_ID]) selected_variant = variants[variant_ID] sel_variant_ID = variant_ID return sel_variant_ID, selected_variant def get_tagged_text(self, recog_text): tagged_text = TaggedText() tagged_text.tokens = [] tagged_text.tags = [] tokens = nltk.word_tokenize(recog_text) # print(tokens) for pair in nltk.pos_tag(tokens): tag = Tag() tag.pos = POS(value=pair[1]) tagged_text.tokens.append( unidecode(pair[0]) if self.STRIP_ACCENTS else pair[0]) tagged_text.tags.append(tag) return tagged_text def datachange_notification(self, node, val, data): """ Subscription handler for "mic_active" """ if not val: return # the "mic_active" is False -> do nothing print("The mic is active!") # self.client.connect() # <- wtf point self.shouldProcessInput = True def event_notification(self, event): print("New event received: ", event) def _decode_states(self, next_state_values): if type(next_state_values[0]) is int: return next_state_values else: return [ v for v in [ int.from_bytes(b[:2], byteorder="little") for b in [ns.Body for ns in next_state_values] ] if v > 0 ] def _readInput(self, timeout=0.1): # start_time = time() # sys.stdout.write('%s(%s):' % (caption, default)) inp = '' if self.last_key == '': return '' if not isinstance(self.last_key, KeyCode): return '' # while True: # if msvcrt.kbhit(): # if kbhit(): # if self.last_key != '': # char = msvcrt.getche() if self.last_key.char == 'q': inp = self.last_key.char if self.last_key.char == 's': inp = self.last_key.char # char = getch.getche() # if ord(char) == 13: # enter_key # break # elif ord(char) >= 32: # space_char # inp = chr(ord(char)) # break # if len(inp) == 0 and (time() - start_time) > timeout: # break # # print('') # needed to move to next line self.last_key = '' if len(inp) > 0: return inp else: return "" def disconnect(self): try: self.sub.unsubscribe(self.handle) # cancel subscription self.sub.delete() except AttributeError: pass # not subscribed, yet except Exception: print("Error trying to unsubscribe from the mic_active variable!") self.client.disconnect()