def in_loaded(self, message: Any, sender: RhasspyActor) -> None: """Handle messages in loaded state.""" if isinstance(message, RecognizeIntent): try: self.load_graph() # Assume lower case, white-space separated tokens text = message.text tokens = re.split(r"\s+", text) if self.profile.get("intent.fsticuffs.ignore_unknown_words", True): # Filter tokens tokens = [w for w in tokens if w in self.words] recognitions = recognize( tokens, self.graph, fuzzy=self.fuzzy, stop_words=self.stop_words ) assert recognitions, "No intent recognized" # Use first intent recognition = recognitions[0] # Convert to JSON intent = recognition.asdict() except Exception: self._logger.exception("in_loaded") intent = empty_intent() intent["speech_confidence"] = message.confidence self.send( message.receiver or sender, IntentRecognized(intent, handle=message.handle), )
def getIntent(self, text): # recognitions = rhasspynlu.recognize("set brightness to two", graph) # assert recognitions[0].tokens[-1] == 2 # # recognitions = rhasspynlu.recognize("set brightness to one", graph) # assert recognitions[0].tokens[-1] == 1 recognitions = rhasspynlu.recognize(text, self.graph) return recognitions
def handle_query(self, query: NluQuery): """Do intent recognition.""" def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if query.intentFilter: return intent_name in query.intentFilter return True recognitions = recognize(query.input, self.graph, intent_filter=intent_filter) if recognitions: # Use first recognition only. recognition = recognitions[0] assert recognition is not None assert recognition.intent is not None self.publish( NluIntent( input=query.input, id=query.id, siteId=query.siteId, sessionId=query.sessionId, intent=Intent( intentName=recognition.intent.name, confidenceScore=recognition.intent.confidence, ), slots=[ Slot( entity=e.entity, slotName=e.entity, confidence=1, value=e.value, raw_value=e.raw_value, range=SlotRange(start=e.raw_start, end=e.raw_end), ) for e in recognition.entities ], ), intentName=recognition.intent.name, ) else: # Not recognized self.publish( NluIntentNotRecognized( input=query.input, id=query.id, siteId=query.siteId, sessionId=query.sessionId, ))
async def handle_query( self, query: NluQuery ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[ NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]: """Do intent recognition.""" original_input = query.input try: if not self.intent_graph and self.graph_path and self.graph_path.is_file( ): # Load graph from file _LOGGER.debug("Loading %s", self.graph_path) with open(self.graph_path, mode="rb") as graph_file: self.intent_graph = rhasspynlu.gzip_pickle_to_graph( graph_file) if self.intent_graph: def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if query.intent_filter: return intent_name in query.intent_filter return True # Replace digits with words if self.replace_numbers: # Have to assume whitespace tokenization words = rhasspynlu.replace_numbers(query.input.split(), self.language) query.input = " ".join(words) input_text = query.input # Fix casing for output event if self.word_transform: input_text = self.word_transform(input_text) if self.failure_token and (self.failure_token in query.input.split()): # Failure token was found in input recognitions = [] else: # Pass in raw query input so raw values will be correct recognitions = recognize( query.input, self.intent_graph, intent_filter=intent_filter, word_transform=self.word_transform, fuzzy=self.fuzzy, extra_converters=self.extra_converters, ) else: _LOGGER.error("No intent graph loaded") recognitions = [] if NluHermesMqtt.is_success(recognitions): # Use first recognition only. recognition = recognitions[0] assert recognition is not None assert recognition.intent is not None intent = Intent( intent_name=recognition.intent.name, confidence_score=recognition.intent.confidence, ) slots = [ Slot( entity=(e.source or e.entity), slot_name=e.entity, confidence=1.0, value=e.value_dict, raw_value=e.raw_value, range=SlotRange( start=e.start, end=e.end, raw_start=e.raw_start, raw_end=e.raw_end, ), ) for e in recognition.entities ] if query.custom_entities: # Copy user-defined entities for entity_name, entity_value in query.custom_entities.items( ): slots.append( Slot( entity=entity_name, confidence=1.0, value={"value": entity_value}, )) # intentParsed yield NluIntentParsed( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, ) # intent yield ( NluIntent( input=recognition.text, id=query.id, site_id=query.site_id, session_id=query.session_id, intent=intent, slots=slots, asr_tokens=[ NluIntent.make_asr_tokens(recognition.tokens) ], asr_confidence=query.asr_confidence, raw_input=original_input, wakeword_id=query.wakeword_id, lang=(query.lang or self.lang), custom_data=query.custom_data, ), { "intent_name": recognition.intent.name }, ) else: # Not recognized yield NluIntentNotRecognized( input=query.input, id=query.id, site_id=query.site_id, session_id=query.session_id, custom_data=query.custom_data, ) except Exception as e: _LOGGER.exception("handle_query") yield NluError( site_id=query.site_id, session_id=query.session_id, error=str(e), context=original_input, )
async def recognize(args: argparse.Namespace, core: Voice2JsonCore) -> None: """Recognize intent from sentence(s).""" import networkx as nx import rhasspynlu from .train import WordCasing # Make sure profile has been trained assert core.check_trained(), "Not trained" # Load settings language_code = pydash.get(core.profile, "language.code", "en-US") word_casing = WordCasing( pydash.get(core.profile, "training.word-casing", "ignore").lower()) intent_graph_path = core.ppath("training.intent-graph", "intent.pickle.gz") converters_dir = core.ppath("training.converters-directory", "converters") stop_words_path = core.ppath("intent-recognition.stop-words", "stop_words.txt") fuzzy = pydash.get(core.profile, "intent-recognition.fuzzy", True) # Load stop words stop_words: typing.Optional[typing.Set[str]] = None if stop_words_path and stop_words_path.is_file(): stop_words = set() with open(stop_words_path, "r") as stop_words_file: for line in stop_words_file: line = line.strip() if line: stop_words.add(line) # Load converters extra_converters: typing.Optional[typing.Dict[str, typing.Any]] = {} if converters_dir: extra_converters = load_converters(converters_dir) # Case transformation for input words word_transform = None if word_casing == WordCasing.UPPER: word_transform = str.upper elif word_casing == WordCasing.LOWER: word_transform = str.lower if args.sentence: sentences = args.sentence else: if os.isatty(sys.stdin.fileno()): print("Reading sentences from stdin", file=sys.stderr) sentences = sys.stdin # Whitelist function for intents if args.intent_filter: args.intent_filter = set(args.intent_filter) def intent_filter(intent_name: str) -> bool: """Filter out intents.""" if args.intent_filter: return intent_name in args.intent_filter return True # Load intent graph _LOGGER.debug("Loading %s", intent_graph_path) with gzip.GzipFile(intent_graph_path, mode="rb") as graph_gzip: intent_graph = nx.readwrite.gpickle.read_gpickle(graph_gzip) # Process sentences try: for sentence in sentences: if args.text_input: # Input is plain text text = sentence sentence_object = {"text": text} else: # Input is JSON sentence_object = json.loads(sentence) text = sentence_object.get(args.transcription_property, "") # Tokenize text = text.strip() tokens = text.split() if args.replace_numbers: tokens = list( rhasspynlu.replace_numbers(tokens, language=language_code)) # Recognize intent recognitions = rhasspynlu.recognize( tokens, intent_graph, fuzzy=fuzzy, stop_words=stop_words, word_transform=word_transform, extra_converters=extra_converters, intent_filter=intent_filter, ) if recognitions: # Use first recognition recognition = recognitions[0] else: # Recognition failure recognition = rhasspynlu.intent.Recognition.empty() result = dataclasses.asdict(recognition) # Add slots result["slots"] = {e.entity: e.value for e in recognition.entities} # Merge with input object for key, value in result.items(): if (key not in sentence_object) or (value is not None): sentence_object[key] = value if not sentence_object["text"]: sentence_object["text"] = text # Keep text from transcription sentence_object["raw_text"] = text if args.perplexity: # Compute perplexity of input text for one or more language # models (stored in FST binary format). perplexity = {} for lm_fst_path in args.perplexity: try: perplexity[ lm_fst_path] = rhasspynlu.arpa_lm.get_perplexity( text, lm_fst_path, debug=args.debug) except Exception: _LOGGER.exception(lm_fst_path) sentence_object["perplexity"] = perplexity print_json(sentence_object) except KeyboardInterrupt: pass
def recognize( text: str, engine: SnipsNLUEngine, slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None, slot_graphs: typing.Optional[typing.Dict[str, nx.DiGraph]] = None, **parse_args, ) -> typing.List[Recognition]: """Recognize intent using Snips NLU.""" result = engine.parse(text, **parse_args) intent_name = result.get("intent", {}).get("intentName") if not intent_name: # Recognition failure return [] slots_dict = slots_dict or {} slot_graphs = slot_graphs or {} recognition = Recognition(text=text, raw_text=text, intent=Intent(name=intent_name, confidence=1.0)) # Replace Snips slot values with Rhasspy slot values (substituted) for slot in result.get("slots", []): slot_name = slot.get("slotName") slot_value_dict = slot.get("value", {}) slot_value = slot_value_dict.get("value") entity = Entity( entity=slot_name, source=slot.get("entity", ""), value=slot_value, raw_value=slot.get("rawValue", slot_value), start=slot["range"]["start"], end=slot["range"]["end"], ) recognition.entities.append(entity) if (not slot_name) or (not slot_value): continue slot_graph = slot_graphs.get(slot_name) if not slot_graph and (slot_name in slots_dict): # Convert slot values to graph slot_graph = rhasspynlu.sentences_to_graph({ slot_name: [ rhasspynlu.jsgf.Sentence.parse(slot_line) for slot_line in slots_dict[slot_name] if slot_line.strip() ] }) slot_graphs[slot_name] = slot_graph entity.tokens = slot_value.split() entity.raw_tokens = list(entity.tokens) if slot_graph: # Pass Snips value through graph slot_recognitions = rhasspynlu.recognize(entity.tokens, slot_graph) if slot_recognitions: # Pull out substituted value and replace in Rhasspy entitiy new_slot_value = slot_recognitions[0].text entity.value = new_slot_value entity.tokens = new_slot_value.split() return [recognition]