def do_graph_to_fst(intent_graph, targets): with open(intent_graph, "r") as graph_file: json_graph = json.load(graph_file) graph = json_to_graph(json_graph) graph_fst = graph_to_fst(graph) # Create symbol tables isymbols = fst.SymbolTable() for symbol, number in graph_fst.input_symbols.items(): isymbols.add_symbol(symbol, number) osymbols = fst.SymbolTable() for symbol, number in graph_fst.output_symbols.items(): osymbols.add_symbol(symbol, number) # Compile FST compiler = fst.Compiler(isymbols=isymbols, osymbols=osymbols, keep_isymbols=True, keep_osymbols=True) compiler.write(graph_fst.intent_fst) compiled_fst = compiler.compile() # Write to file compiled_fst.write(str(targets[0]))
def load_graph(self): """Load intent graph from JSON file.""" if self.graph is None: graph_path = self.profile.read_path( self.profile.get("intent.fsticuffs.intent_graph", "intent.json")) with open(graph_path, "r") as graph_file: json_graph = json.load(graph_file) self.graph = json_to_graph(json_graph) # Add words from FST self.words = set() for _, data in self.graph.nodes(data=True): if "word" in data: self.words.add(data["word"]) # Load stop words stop_words_path = self.profile.read_path("stop_words.txt") if os.path.exists(stop_words_path): self._logger.debug("Using stop words at %s", stop_words_path) with open(stop_words_path, "r") as stop_words_file: self.stop_words = { line.strip() for line in stop_words_file if len(line.strip()) > 0 }
def to_training_sentences(self, from_state: str) -> None: """Transition to training_sentences state.""" # Use doit to train saved_argv = sys.argv try: # Store doit database in profile directory sys.argv = [ sys.argv[0], "--db-file", str(self.profile.write_path(".doit.db")), ] code, errors = train_profile(Path(self.profile.read_path()), self.profile) if code != 0: raise Exception("\n".join(errors)) self.transition("training_intent") intent_graph_path = self.profile.read_path( self.profile.get("intent.fsticuffs.intent_graph", "intent.json") ) with open(intent_graph_path, "r") as graph_file: json_graph = json.load(graph_file) intent_graph = rhasspynlu.json_to_graph(json_graph) self.send(self.intent_trainer, TrainIntent(intent_graph)) except Exception as e: self.transition("ready") self.send(self.training_receiver, ProfileTrainingFailed(str(e))) finally: # Restore sys.argv sys.argv = saved_argv
def recognize(args: argparse.Namespace): """Do intent recognition from query text.""" try: # Convert to Paths args.examples = Path(args.examples) args.intent_graph = Path(args.intent_graph) # Load graph/examples _LOGGER.debug("Loading intent graph from %s", str(args.intent_graph)) with open(args.intent_graph, "r") as intent_graph_file: graph_dict = json.load(intent_graph_file) intent_graph = rhasspynlu.json_to_graph(graph_dict) _LOGGER.debug("Loading examples from %s", str(args.examples)) with open(args.examples, "r") as examples_file: examples = json.load(examples_file) _LOGGER.debug("Processing sentences") word_transform = get_word_transform(args.word_casing) # Process queries if args.query: sentences = args.query else: if os.isatty(sys.stdin.fileno()): print("Reading queries from stdin...", file=sys.stderr) sentences = sys.stdin for sentence in sentences: # Handle casing sentence = sentence.strip() sentence = word_transform(sentence) # Do recognition recognitions = fuzzywuzzy_recognize(sentence, intent_graph, examples) if recognitions: # Intent recognized recognition = recognitions[0] else: # Intent not recognized recognition = Recognition.empty() # Print as a line of JSON json.dump(recognition.asdict(), sys.stdout) print("") sys.stdout.flush() except KeyboardInterrupt: pass
def poll_graph(seconds: float, graph_path: str, hermes: NluHermesMqtt): """Watch graph file for changes and reload.""" last_timestamp: typing.Optional[int] = None while True: time.sleep(seconds) try: timestamp = os.stat(graph_path).st_mtime_ns if last_timestamp is None: last_timestamp = timestamp elif timestamp != last_timestamp: # Reload graph _LOGGER.debug("Re-loading graph from %s", graph_path) with open(graph_path, "r") as graph_file: # Set in Hermes object hermes.graph = json_to_graph(json.load(graph_file)) last_timestamp = timestamp except Exception: _LOGGER.exception("poll_graph")
def main(): """Main method.""" parser = argparse.ArgumentParser(prog="rhasspynlu_hermes") parser.add_argument("--graph", required=True, help="Path to rhasspy graph JSON file") parser.add_argument( "--reload", type=float, default=None, help= "Poll graph JSON file for given number of seconds and automatically reload when changed", ) parser.add_argument("--host", default="localhost", help="MQTT host (default: localhost)") parser.add_argument("--port", type=int, default=1883, help="MQTT port (default: 1883)") parser.add_argument( "--siteId", action="append", help="Hermes siteId(s) to listen for (default: all)", ) parser.add_argument("--debug", action="store_true", help="Print DEBUG messages to the console") args = parser.parse_args() if args.debug: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) _LOGGER.debug(args) try: # Load graph _LOGGER.debug("Loading graph from %s", args.graph) with open(args.graph, "r") as graph_file: graph = json_to_graph(json.load(graph_file)) # Listen for messages client = mqtt.Client() hermes = NluHermesMqtt(client, graph, siteIds=args.siteId) if args.reload: # Start polling thread threading.Thread(target=poll_graph, args=(args.reload, args.graph, hermes), daemon=True).start() def on_disconnect(client, userdata, flags, rc): try: # Automatically reconnect _LOGGER.info("Disconnected. Trying to reconnect...") client.reconnect() except Exception: logging.exception("on_disconnect") # Connect client.on_connect = hermes.on_connect client.on_disconnect = on_disconnect client.on_message = hermes.on_message _LOGGER.debug("Connecting to %s:%s", args.host, args.port) client.connect(args.host, args.port) client.loop_forever() except KeyboardInterrupt: pass finally: _LOGGER.debug("Shutting down")
def train(args: argparse.Namespace): """Generate HCLG.fst from intent graph.""" # Convert to Paths args.model_dir = Path(args.model_dir) if args.graph_dir: args.graph_dir = Path(args.graph_dir) else: args.graph_dir = args.model_dir / "graph" if args.dictionary: args.dictionary = Path(args.dictionary) else: args.dictionary = args.model_dir.parent / "dictionary.txt" if args.language_model: args.language_model = Path(args.language_model) else: args.language_model = args.model_dir.parent / "language_model.txt" if args.g2p_model: args.g2p_model = Path(args.g2p_model) else: args.g2p_model = args.model_dir.parent / "g2p.fst" if args.base_dictionary: args.base_dictionary = [Path(p) for p in args.base_dictionary] else: args.base_dictionary = [args.model_dir.parent / "base_dictionary.txt"] graph: typing.Optional[nx.DiGraph] = None if args.intent_graph: # Load graph from file args.intent_graph = Path(args.intent_graph) _LOGGER.debug("Loading intent graph from %s", args.intent_graph) with open(args.intent_graph, "r") as graph_file: graph_dict = json.load(graph_file) graph = rhasspynlu.json_to_graph(graph_dict) else: # Load graph from stdin if os.isatty(sys.stdin.fileno()): print("Reading intent graph from stdin...", file=sys.stderr) graph_dict = json.load(sys.stdin) graph = rhasspynlu.json_to_graph(graph_dict) assert graph is not None # Load base dictionaries pronunciations: PronunciationsType = {} for dict_path in args.base_dictionary: if os.path.exists(dict_path): _LOGGER.debug("Loading dictionary %s", str(dict_path)) with open(dict_path, "r") as dict_file: rhasspynlu.g2p.read_pronunciations(dict_file, pronunciations) # Load frequent words frequent_words: typing.Optional[typing.Set[str]] = None frequent_words_path = args.model_dir.parent / "frequent_words.txt" if frequent_words_path.is_file(): frequent_words = set() with open(frequent_words_path, "r") as frequent_words_file: for line in frequent_words_file: line = line.strip() if line: frequent_words.add(line) kaldi_train( graph, pronunciations, args.model_dir, args.graph_dir, dictionary_word_transform=get_word_transform(args.dictionary_casing), dictionary=args.dictionary, language_model=args.language_model, language_model_type=args.language_model_type, g2p_model=args.g2p_model, g2p_word_transform=get_word_transform(args.g2p_casing), allow_unknown_words=args.unknown_words, frequent_words=frequent_words, )