コード例 #1
0
def api_intents():
    """Return JSON with information about intents."""
    assert core is not None

    sentences_ini = Path(
        core.profile.read_path(core.profile.get("speech_to_text.sentences_ini"))
    )

    sentences_dir = Path(
        core.profile.read_path(core.profile.get("speech_to_text.sentences_dir"))
    )

    # Load all .ini files and parse
    ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)
    intents: Dict[str, Any] = get_all_intents(ini_paths)

    def add_type(item, item_dict: Dict[str, Any]):
        """Add item_type to expression dictionary."""
        item_dict["item_type"] = type(item).__name__
        if hasattr(item, "items"):
            # Group, alternative, etc.
            for sub_item, sub_item_dict in zip(item.items, item_dict["items"]):
                add_type(sub_item, sub_item_dict)
        elif hasattr(item, "rule_body"):
            # Rule
            add_type(item.rule_body, item_dict["rule_body"])

    # Convert to dictionary
    intents_dict = {}
    for intent_name, intent_sentences in intents.items():
        sentence_dicts = []
        for sentence in intent_sentences:
            sentence_dict = attr.asdict(sentence)

            # Add item_type field
            add_type(sentence, sentence_dict)
            sentence_dicts.append(sentence_dict)

        intents_dict[intent_name] = sentence_dicts

    # Convert to JSON
    return jsonify(intents_dict)
コード例 #2
0
ファイル: __init__.py プロジェクト: canadaycrew/rhasspy
def train_profile(profile_dir: Path,
                  profile: Profile) -> Tuple[int, List[str]]:

    # Compact
    def ppath(query, default=None, write=False):
        return utils_ppath(profile, profile_dir, query, default, write=write)

    language = profile.get("language", "")

    # Inputs
    stt_system = profile.get("speech_to_text.system")
    stt_prefix = f"speech_to_text.{stt_system}"

    # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
    sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
    sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir")
    base_dictionary = ppath(f"{stt_prefix}.base_dictionary",
                            "base_dictionary.txt")
    base_language_model = ppath(f"{stt_prefix}.base_language_model",
                                "base_language_model.txt")
    base_language_model_weight = float(
        profile.get(f"{stt_prefix}.mix_weight", 0))
    g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
    acoustic_model_type = stt_system

    # Pocketsphinx
    acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model")

    # Kaldi
    kaldi_dir = Path(
        os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir",
                                       "/opt/kaldi")))
    kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph",
                                                   "graph")

    if acoustic_model_type == "kaldi":
        # Kaldi acoustic models are inside model directory
        acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
    else:
        _LOGGER.warning("Unsupported acoustic model type: %s",
                        acoustic_model_type)

    # ignore/upper/lower
    word_casing = profile.get("speech_to_text.dictionary_casing",
                              "ignore").lower()

    # default/ignore/upper/lower
    g2p_word_casing = profile.get("speech_to_text.g2p_casing",
                                  word_casing).lower()

    # all/first
    dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule",
                                  "all").lower()

    # Outputs
    dictionary = ppath(f"{stt_prefix}.dictionary",
                       "dictionary.txt",
                       write=True)
    custom_words = ppath(f"{stt_prefix}.custom_words",
                         "custom_words.txt",
                         write=True)
    language_model = ppath(f"{stt_prefix}.language_model",
                           "language_model.txt",
                           write=True)
    base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst",
                                    "base_language_model.fst",
                                    write=True)
    intent_graph = ppath("intent.fsticiffs.intent_graph",
                         "intent.json",
                         write=True)
    intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
    vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
    unknown_words = ppath(f"{stt_prefix}.unknown_words",
                          "unknown_words.txt",
                          write=True)
    grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True)
    fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True)
    slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True)

    # -----------------------------------------------------------------------------

    # Create cache directories
    for dir_path in [grammar_dir, fsts_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)

    # -----------------------------------------------------------------------------

    ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)

    # Join ini files into a single combined file and parse
    _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths])

    try:
        intents = get_all_intents(ini_paths)
    except Exception:
        _LOGGER.exception("Failed to parse %s", ini_paths)
        return (1, ["Failed to parse sentences"])

    # -----------------------------------------------------------------------------

    def get_slot_names(item):
        """Yield referenced slot names."""
        if isinstance(item, jsgf.SlotReference):
            yield item.slot_name
        elif isinstance(item, jsgf.Sequence):
            for sub_item in item.items:
                for slot_name in get_slot_names(sub_item):
                    yield slot_name
        elif isinstance(item, jsgf.Rule):
            for slot_name in get_slot_names(item.rule_body):
                yield slot_name

    def number_transform(word):
        """Automatically transform numbers"""
        if not isinstance(word, jsgf.Word):
            # Skip anything besides words
            return

        try:
            n = int(word.text)

            # 75 -> (seventy five):75
            number_text = num2words(n, lang=language).replace("-", " ").strip()
            assert number_text, f"Empty num2words result for {n}"
            number_words = number_text.split()

            if len(number_words) == 1:
                # Easy case, single word
                word.text = number_text
                word.substitution = str(n)
            else:
                # Hard case, split into mutliple Words
                return jsgf.Sequence(
                    text=number_text,
                    type=jsgf.SequenceType.GROUP,
                    substitution=str(n),
                    items=[jsgf.Word(w) for w in number_words],
                )
        except ValueError:
            # Not a number
            pass

    def do_intents_to_graph(intents, slot_names, targets):
        sentences, replacements = ini_jsgf.split_rules(intents)

        # Load slot values
        for slot_name in slot_names:
            slot_path = slots_dir / slot_name
            assert slot_path.is_file(), f"Missing slot file at {slot_path}"

            # Parse each non-empty line as a JSGF sentence
            slot_values = []
            with open(slot_path, "r") as slot_file:
                for line in slot_file:
                    line = line.strip()
                    if line:
                        sentence = jsgf.Sentence.parse(line)
                        slot_values.append(sentence)

            # Replace $slot with sentences
            replacements[f"${slot_name}"] = slot_values

        if profile.get("intent.replace_numbers", True):
            # Replace numbers in parsed sentences
            for intent_sentences in sentences.values():
                for sentence in intent_sentences:
                    jsgf.walk_expression(sentence, number_transform,
                                         replacements)

        # Convert to directed graph
        graph = intents_to_graph(intents, replacements)

        # Write graph to JSON file
        json_graph = graph_to_json(graph)
        with open(targets[0], "w") as graph_file:
            json.dump(json_graph, graph_file)

    def task_ini_graph():
        """sentences.ini -> intent.json"""
        slot_names = set()
        for intent_name in intents:
            for item in intents[intent_name]:
                for slot_name in get_slot_names(item):
                    slot_names.add(slot_name)

        # Add slot files as dependencies
        deps = [(slots_dir / slot_name) for slot_name in slot_names]

        # Add profile itself as a dependency
        profile_json_path = profile_dir / "profile.json"
        if profile_json_path.is_file():
            deps.append(profile_json_path)

        return {
            "file_dep": ini_paths + deps,
            "targets": [intent_graph],
            "actions": [(do_intents_to_graph, [intents, slot_names])],
        }

    # -----------------------------------------------------------------------------

    def do_graph_to_fst(intent_graph, targets):
        with open(intent_graph, "r") as graph_file:
            json_graph = json.load(graph_file)

        graph = json_to_graph(json_graph)
        graph_fst = graph_to_fst(graph)

        # Create symbol tables
        isymbols = fst.SymbolTable()
        for symbol, number in graph_fst.input_symbols.items():
            isymbols.add_symbol(symbol, number)

        osymbols = fst.SymbolTable()
        for symbol, number in graph_fst.output_symbols.items():
            osymbols.add_symbol(symbol, number)

        # Compile FST
        compiler = fst.Compiler(isymbols=isymbols,
                                osymbols=osymbols,
                                keep_isymbols=True,
                                keep_osymbols=True)

        compiler.write(graph_fst.intent_fst)
        compiled_fst = compiler.compile()

        # Write to file
        compiled_fst.write(str(targets[0]))

    def task_intent_fst():
        """intent.json -> intent.fst"""
        return {
            "file_dep": [intent_graph],
            "targets": [intent_fst],
            "actions": [(do_graph_to_fst, [intent_graph])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="intent_fst")
    def task_language_model():
        """Creates an ARPA language model from intent.fst."""

        if base_language_model_weight > 0:
            yield {
                "name": "base_lm_to_fst",
                "file_dep": [base_language_model],
                "targets": [base_language_model_fst],
                "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"],
            }

        # FST -> n-gram counts
        intent_counts = str(intent_fst) + ".counts"
        yield {
            "name": "intent_counts",
            "file_dep": [intent_fst],
            "targets": [intent_counts],
            "actions": ["ngramcount %(dependencies)s %(targets)s"],
        }

        # n-gram counts -> model
        intent_model = str(intent_fst) + ".model"
        yield {
            "name": "intent_model",
            "file_dep": [intent_counts],
            "targets": [intent_model],
            "actions": ["ngrammake %(dependencies)s %(targets)s"],
        }

        if base_language_model_weight > 0:
            merged_model = Path(str(intent_model) + ".merge")

            # merge
            yield {
                "name":
                "lm_merge",
                "file_dep": [base_language_model_fst, intent_model],
                "targets": [merged_model],
                "actions": [
                    f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s"
                ],
            }

            intent_model = merged_model

        # model -> ARPA
        yield {
            "name": "intent_arpa",
            "file_dep": [intent_model],
            "targets": [language_model],
            "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"],
        }

    # -----------------------------------------------------------------------------

    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__")
                                   or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

    @create_after(executed="language_model")
    def task_vocab():
        """Writes all vocabulary words to a file from intent.fst."""
        return {
            "file_dep": [intent_fst],
            "targets": [vocab],
            "actions": [do_vocab]
        }

    # -----------------------------------------------------------------------------

    def do_dict(dictionary_paths: Iterable[Path], targets):
        with open(targets[0], "w") as dictionary_file:
            if unknown_words.exists():
                unknown_words.unlink()

            dictionary_format = FORMAT_CMU
            if acoustic_model_type == "julius":
                dictionary_format = FORMAT_JULIUS

            make_dict(
                vocab,
                dictionary_paths,
                dictionary_file,
                unknown_path=unknown_words,
                dictionary_format=dictionary_format,
                merge_rule=dict_merge_rule,
                upper=(word_casing == "upper"),
                lower=(word_casing == "lower"),
            )

            if unknown_words.exists() and g2p_model.exists():
                # Generate single pronunciation guesses
                _LOGGER.debug("Guessing pronunciations for unknown word(s)")

                g2p_output = subprocess.check_output(
                    [
                        "phonetisaurus-apply",
                        "--model",
                        str(g2p_model),
                        "--word_list",
                        str(unknown_words),
                        "--nbest",
                        "1",
                    ],
                    universal_newlines=True,
                )

                g2p_transform = lambda w: w
                if g2p_word_casing == "upper":
                    g2p_transform = lambda w: w.upper()
                elif g2p_word_casing == "lower":
                    g2p_transform = lambda w: w.lower()

                # Append to dictionary and custom words
                with open(custom_words, "a") as words_file:
                    with open(unknown_words, "w") as unknown_words_file:
                        for line in g2p_output.splitlines():
                            line = line.strip()
                            word, phonemes = re.split(r"\s+", line, maxsplit=1)
                            word = g2p_transform(word)
                            print(word, phonemes, file=dictionary_file)
                            print(word, phonemes, file=words_file)
                            print(word, phonemes, file=unknown_words_file)

    @create_after(executed="vocab")
    def task_vocab_dict():
        """Creates custom pronunciation dictionary based on desired vocabulary."""
        dictionary_paths = [base_dictionary]
        if custom_words.exists():
            # Custom dictionary goes first so that the "first" dictionary merge
            # rule will choose pronunciations from it.
            dictionary_paths.insert(0, custom_words)

        # Exclude dictionaries that don't exist
        dictionary_paths = [p for p in dictionary_paths if p.exists()]

        return {
            "file_dep": [vocab] + dictionary_paths,
            "targets": [dictionary],
            "actions": [(do_dict, [dictionary_paths])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="vocab_dict")
    def task_kaldi_train():
        """Creates HCLG.fst for a Kaldi nnet3 or gmm model."""
        if acoustic_model_type == "kaldi":
            return {
                "file_dep": [dictionary, language_model],
                "targets": [kaldi_graph_dir / "HCLG.fst"],
                "actions": [[
                    "bash",
                    str(acoustic_model / "train.sh"),
                    str(kaldi_dir),
                    str(acoustic_model),
                    str(dictionary),
                    str(language_model),
                ]],
            }

    # -----------------------------------------------------------------------------

    errors = []

    class MyReporter(ConsoleReporter):
        def add_failure(self, task, exception):
            super().add_failure(task, exception)
            errors.append(f"{task}: {exception}")

        def runtime_error(self, msg):
            super().runtime_error(msg)
            errors.append(msg)

    DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter}

    # Monkey patch inspect to make doit work inside Pyinstaller.
    # It grabs the line numbers of functions probably for debugging reasons, but
    # PyInstaller doesn't seem to keep that information around.
    #
    # This better thing to do would be to create a custom TaskLoader.
    import inspect

    inspect.getsourcelines = lambda obj: [0, 0]

    # Run doit main
    result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])
    return (result, errors)