Esempio n. 1
0
    def do_graph_to_fst(intent_graph, targets):
        with open(intent_graph, "r") as graph_file:
            json_graph = json.load(graph_file)

        graph = json_to_graph(json_graph)
        graph_fst = graph_to_fst(graph)

        # Create symbol tables
        isymbols = fst.SymbolTable()
        for symbol, number in graph_fst.input_symbols.items():
            isymbols.add_symbol(symbol, number)

        osymbols = fst.SymbolTable()
        for symbol, number in graph_fst.output_symbols.items():
            osymbols.add_symbol(symbol, number)

        # Compile FST
        compiler = fst.Compiler(isymbols=isymbols,
                                osymbols=osymbols,
                                keep_isymbols=True,
                                keep_osymbols=True)

        compiler.write(graph_fst.intent_fst)
        compiled_fst = compiler.compile()

        # Write to file
        compiled_fst.write(str(targets[0]))
Esempio n. 2
0
    def load_graph(self):
        """Load intent graph from JSON file."""
        if self.graph is None:
            graph_path = self.profile.read_path(
                self.profile.get("intent.fsticuffs.intent_graph",
                                 "intent.json"))

            with open(graph_path, "r") as graph_file:
                json_graph = json.load(graph_file)

            self.graph = json_to_graph(json_graph)

            # Add words from FST
            self.words = set()
            for _, data in self.graph.nodes(data=True):
                if "word" in data:
                    self.words.add(data["word"])

            # Load stop words
            stop_words_path = self.profile.read_path("stop_words.txt")
            if os.path.exists(stop_words_path):
                self._logger.debug("Using stop words at %s", stop_words_path)
                with open(stop_words_path, "r") as stop_words_file:
                    self.stop_words = {
                        line.strip()
                        for line in stop_words_file if len(line.strip()) > 0
                    }
Esempio n. 3
0
    def to_training_sentences(self, from_state: str) -> None:
        """Transition to training_sentences state."""
        # Use doit to train
        saved_argv = sys.argv
        try:
            # Store doit database in profile directory
            sys.argv = [
                sys.argv[0],
                "--db-file",
                str(self.profile.write_path(".doit.db")),
            ]

            code, errors = train_profile(Path(self.profile.read_path()), self.profile)
            if code != 0:
                raise Exception("\n".join(errors))

            self.transition("training_intent")

            intent_graph_path = self.profile.read_path(
                self.profile.get("intent.fsticuffs.intent_graph", "intent.json")
            )

            with open(intent_graph_path, "r") as graph_file:
                json_graph = json.load(graph_file)
                intent_graph = rhasspynlu.json_to_graph(json_graph)
                self.send(self.intent_trainer, TrainIntent(intent_graph))
        except Exception as e:
            self.transition("ready")
            self.send(self.training_receiver, ProfileTrainingFailed(str(e)))
        finally:
            # Restore sys.argv
            sys.argv = saved_argv
Esempio n. 4
0
def recognize(args: argparse.Namespace):
    """Do intent recognition from query text."""
    try:
        # Convert to Paths
        args.examples = Path(args.examples)
        args.intent_graph = Path(args.intent_graph)

        # Load graph/examples
        _LOGGER.debug("Loading intent graph from %s", str(args.intent_graph))
        with open(args.intent_graph, "r") as intent_graph_file:
            graph_dict = json.load(intent_graph_file)
            intent_graph = rhasspynlu.json_to_graph(graph_dict)

        _LOGGER.debug("Loading examples from %s", str(args.examples))
        with open(args.examples, "r") as examples_file:
            examples = json.load(examples_file)

        _LOGGER.debug("Processing sentences")
        word_transform = get_word_transform(args.word_casing)

        # Process queries
        if args.query:
            sentences = args.query
        else:
            if os.isatty(sys.stdin.fileno()):
                print("Reading queries from stdin...", file=sys.stderr)

            sentences = sys.stdin

        for sentence in sentences:
            # Handle casing
            sentence = sentence.strip()
            sentence = word_transform(sentence)

            # Do recognition
            recognitions = fuzzywuzzy_recognize(sentence, intent_graph,
                                                examples)

            if recognitions:
                # Intent recognized
                recognition = recognitions[0]
            else:
                # Intent not recognized
                recognition = Recognition.empty()

            # Print as a line of JSON
            json.dump(recognition.asdict(), sys.stdout)
            print("")
            sys.stdout.flush()

    except KeyboardInterrupt:
        pass
def poll_graph(seconds: float, graph_path: str, hermes: NluHermesMqtt):
    """Watch graph file for changes and reload."""
    last_timestamp: typing.Optional[int] = None

    while True:
        time.sleep(seconds)
        try:
            timestamp = os.stat(graph_path).st_mtime_ns
            if last_timestamp is None:
                last_timestamp = timestamp
            elif timestamp != last_timestamp:
                # Reload graph
                _LOGGER.debug("Re-loading graph from %s", graph_path)
                with open(graph_path, "r") as graph_file:
                    # Set in Hermes object
                    hermes.graph = json_to_graph(json.load(graph_file))

                last_timestamp = timestamp
        except Exception:
            _LOGGER.exception("poll_graph")
def main():
    """Main method."""
    parser = argparse.ArgumentParser(prog="rhasspynlu_hermes")
    parser.add_argument("--graph",
                        required=True,
                        help="Path to rhasspy graph JSON file")
    parser.add_argument(
        "--reload",
        type=float,
        default=None,
        help=
        "Poll graph JSON file for given number of seconds and automatically reload when changed",
    )
    parser.add_argument("--host",
                        default="localhost",
                        help="MQTT host (default: localhost)")
    parser.add_argument("--port",
                        type=int,
                        default=1883,
                        help="MQTT port (default: 1883)")
    parser.add_argument(
        "--siteId",
        action="append",
        help="Hermes siteId(s) to listen for (default: all)",
    )
    parser.add_argument("--debug",
                        action="store_true",
                        help="Print DEBUG messages to the console")
    args = parser.parse_args()

    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    _LOGGER.debug(args)

    try:
        # Load graph
        _LOGGER.debug("Loading graph from %s", args.graph)
        with open(args.graph, "r") as graph_file:
            graph = json_to_graph(json.load(graph_file))

        # Listen for messages
        client = mqtt.Client()
        hermes = NluHermesMqtt(client, graph, siteIds=args.siteId)

        if args.reload:
            # Start polling thread
            threading.Thread(target=poll_graph,
                             args=(args.reload, args.graph, hermes),
                             daemon=True).start()

        def on_disconnect(client, userdata, flags, rc):
            try:
                # Automatically reconnect
                _LOGGER.info("Disconnected. Trying to reconnect...")
                client.reconnect()
            except Exception:
                logging.exception("on_disconnect")

        # Connect
        client.on_connect = hermes.on_connect
        client.on_disconnect = on_disconnect
        client.on_message = hermes.on_message

        _LOGGER.debug("Connecting to %s:%s", args.host, args.port)
        client.connect(args.host, args.port)

        client.loop_forever()
    except KeyboardInterrupt:
        pass
    finally:
        _LOGGER.debug("Shutting down")
Esempio n. 7
0
def train(args: argparse.Namespace):
    """Generate HCLG.fst from intent graph."""
    # Convert to Paths
    args.model_dir = Path(args.model_dir)

    if args.graph_dir:
        args.graph_dir = Path(args.graph_dir)
    else:
        args.graph_dir = args.model_dir / "graph"

    if args.dictionary:
        args.dictionary = Path(args.dictionary)
    else:
        args.dictionary = args.model_dir.parent / "dictionary.txt"

    if args.language_model:
        args.language_model = Path(args.language_model)
    else:
        args.language_model = args.model_dir.parent / "language_model.txt"

    if args.g2p_model:
        args.g2p_model = Path(args.g2p_model)
    else:
        args.g2p_model = args.model_dir.parent / "g2p.fst"

    if args.base_dictionary:
        args.base_dictionary = [Path(p) for p in args.base_dictionary]
    else:
        args.base_dictionary = [args.model_dir.parent / "base_dictionary.txt"]

    graph: typing.Optional[nx.DiGraph] = None
    if args.intent_graph:
        # Load graph from file
        args.intent_graph = Path(args.intent_graph)

        _LOGGER.debug("Loading intent graph from %s", args.intent_graph)
        with open(args.intent_graph, "r") as graph_file:
            graph_dict = json.load(graph_file)
            graph = rhasspynlu.json_to_graph(graph_dict)
    else:
        # Load graph from stdin
        if os.isatty(sys.stdin.fileno()):
            print("Reading intent graph from stdin...", file=sys.stderr)

        graph_dict = json.load(sys.stdin)
        graph = rhasspynlu.json_to_graph(graph_dict)

    assert graph is not None

    # Load base dictionaries
    pronunciations: PronunciationsType = {}
    for dict_path in args.base_dictionary:
        if os.path.exists(dict_path):
            _LOGGER.debug("Loading dictionary %s", str(dict_path))
            with open(dict_path, "r") as dict_file:
                rhasspynlu.g2p.read_pronunciations(dict_file, pronunciations)

    # Load frequent words
    frequent_words: typing.Optional[typing.Set[str]] = None
    frequent_words_path = args.model_dir.parent / "frequent_words.txt"
    if frequent_words_path.is_file():
        frequent_words = set()
        with open(frequent_words_path, "r") as frequent_words_file:
            for line in frequent_words_file:
                line = line.strip()
                if line:
                    frequent_words.add(line)

    kaldi_train(
        graph,
        pronunciations,
        args.model_dir,
        args.graph_dir,
        dictionary_word_transform=get_word_transform(args.dictionary_casing),
        dictionary=args.dictionary,
        language_model=args.language_model,
        language_model_type=args.language_model_type,
        g2p_model=args.g2p_model,
        g2p_word_transform=get_word_transform(args.g2p_casing),
        allow_unknown_words=args.unknown_words,
        frequent_words=frequent_words,
    )