Example #1
0
    def __init__(self, rhasspyConfig="./intents/serindaCommands.ini"):
        f = open(rhasspyConfig, "r")
        self.intentFile = f.read()
        # Load and parse
        self.intents = rhasspynlu.parse_ini(self.intentFile)

        self.graph = rhasspynlu.intents_to_graph(self.intents)
Example #2
0
def get_all_intents(ini_paths: List[Path]) -> Dict[str, Any]:
    """Get intents from all .ini files in profile."""
    try:
        with io.StringIO() as combined_ini_file:
            for ini_path in ini_paths:
                combined_ini_file.write(ini_path.read_text())
                print("", file=combined_ini_file)

            return rhasspynlu.parse_ini(combined_ini_file.getvalue())
    except Exception:
        _LOGGER.exception("Failed to parse %s", ini_paths)

    return {}
Example #3
0
    def setUp(self):
        self.siteId = str(uuid.uuid4())
        self.sessionId = str(uuid.uuid4())

        ini_text = """
        [SetLightColor]
        set the (bedroom | living room){name} light to (red | green | blue){color}
        """

        self.graph = intents_to_graph(parse_ini(ini_text))
        self.client = MagicMock()
        self.hermes = NluHermesMqtt(self.client,
                                    self.graph,
                                    siteIds=[self.siteId])
Example #4
0
    def setUp(self):
        self.site_id = str(uuid.uuid4())
        self.session_id = str(uuid.uuid4())

        ini_text = """
        [SetLightColor]
        set the (bedroom | living room){name} light to (red | green | blue){color}

        [GetTime]
        what time is it
        """

        self.graph = intents_to_graph(parse_ini(ini_text))
        self.examples = rhasspyfuzzywuzzy.train(self.graph)
        self.client = MagicMock()
        self.hermes = NluHermesMqtt(
            self.client,
            self.graph,
            examples=self.examples,
            confidence_threshold=1.0,
            site_ids=[self.site_id],
        )
Example #5
0
async def train_profile(profile_dir: Path,
                        profile: typing.Dict[str, typing.Any]) -> None:
    """Re-generate speech/intent artifacts for profile."""

    # Compact
    def ppath(query, default=None):
        return utils_ppath(profile, profile_dir, query, default)

    language_code = pydash.get(profile, "language.code", "en-US")

    sentences_ini = ppath("training.sentences-file", "sentences.ini")
    slots_dir = ppath("training.slots-directory", "slots")
    slot_programs = ppath("training.slot-programs-directory", "slot_programs")

    # Profile files that are split into parts and gzipped
    large_paths = [
        Path(p) for p in pydash.get(profile, "training.large-files", [])
    ]

    # -------------------
    # Speech to text
    # -------------------
    base_dictionary = ppath("training.base-dictionary", "base_dictionary.txt")
    custom_words = ppath("training.custom-words-file", "custom_words.txt")
    custom_words_action = PronunciationAction(
        pydash.get(profile, "training.custom-words-action", "append"))
    sounds_like = ppath("training.sounds-like-file", "sounds_like.txt")
    sounds_like_action = PronunciationAction(
        pydash.get(profile, "training.sounds-like-action", "append"))

    acoustic_model = ppath("training.acoustic-model", "acoustic_model")
    acoustic_model_type = AcousticModelType(
        pydash.get(profile, "training.acoustic-model-type",
                   AcousticModelType.DUMMY))

    # Replace numbers with words
    replace_numbers = bool(
        pydash.get(profile, "training.replace-numbers", True))

    # ignore/upper/lower
    word_casing = pydash.get(profile, "training.word-casing",
                             WordCasing.IGNORE)

    # Large pre-built language model
    base_language_model_fst = ppath("training.base-language-model-fst",
                                    "base_language_model.fst")
    base_language_model_weight = float(
        pydash.get(profile, "training.base-language-model-weight", 0))

    # -------------------
    # Grapheme to phoneme
    # -------------------
    g2p_model = ppath("training.grapheme-to-phoneme-model", "g2p.fst")
    g2p_corpus = ppath("training.grapheme-to-phoneme-corpus", "g2p.corpus")

    # default/ignore/upper/lower
    g2p_word_casing = pydash.get(profile, "training.g2p-word-casing",
                                 word_casing)

    # -------
    # Outputs
    # -------
    dictionary_path = ppath("training.dictionary", "dictionary.txt")
    language_model_path = ppath("training.language-model",
                                "language_model.txt")
    language_model_fst_path = ppath("training.language-model-fst",
                                    "language_model.fst")
    mixed_language_model_fst_path = ppath("training.mixed-language-model-fst",
                                          "mixed_language_model.fst")
    intent_graph_path = ppath("training.intent-graph", "intent.pickle.gz")
    vocab_path = ppath("training.vocabulary-file", "vocab.txt")
    unknown_words_path = ppath("training.unknown-words-file",
                               "unknown_words.txt")

    async def run(command: typing.List[str], **kwargs):
        """Run a command asynchronously."""
        process = await asyncio.create_subprocess_exec(*command, **kwargs)
        await process.wait()
        assert process.returncode == 0, "Command failed"

    # -------------------------------------------------------------------------
    # 1. Reassemble large files
    # -------------------------------------------------------------------------

    for target_path in large_paths:
        gzip_path = Path(str(target_path) + ".gz")
        part_paths = sorted(
            list(gzip_path.parent.glob(f"{gzip_path.name}.part-*")))
        if part_paths:
            # Concatenate paths to together
            cat_command = ["cat"] + [str(p) for p in part_paths]
            _LOGGER.debug(cat_command)

            with open(gzip_path, "wb") as gzip_file:
                await run(cat_command, stdout=gzip_file)

        if gzip_path.is_file():
            # Unzip single file
            unzip_command = ["gunzip", "-f", "--stdout", str(gzip_path)]
            _LOGGER.debug(unzip_command)

            with open(target_path, "wb") as target_file:
                await run(unzip_command, stdout=target_file)

            # Delete zip file
            gzip_path.unlink()

        # Delete unneeded .gz-part files
        for part_path in part_paths:
            part_path.unlink()

    # -------------------------------------------------------------------------
    # 2. Generate intent graph
    # -------------------------------------------------------------------------

    # Parse JSGF sentences
    _LOGGER.debug("Parsing %s", sentences_ini)
    intents = rhasspynlu.parse_ini(sentences_ini)

    # Split into sentences and rule/slot replacements
    sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents)

    word_transform = None
    if word_casing == WordCasing.UPPER:
        word_transform = str.upper
    elif word_casing == WordCasing.LOWER:
        word_transform = str.lower

    word_visitor: typing.Optional[typing.Callable[[Expression], typing.Union[
        bool, Expression]]] = None

    if word_transform:
        # Apply transformation to words

        def transform_visitor(word: Expression):
            if isinstance(word, Word):
                assert word_transform
                new_text = word_transform(word.text)

                # Preserve case by using original text as substition
                if (word.substitution is None) and (new_text != word.text):
                    word.substitution = word.text

                word.text = new_text

            return word

        word_visitor = transform_visitor

    # Apply case/number transforms
    if word_visitor or replace_numbers:
        for intent_sentences in sentences.values():
            for sentence in intent_sentences:
                if replace_numbers:
                    # Replace number ranges with slot references
                    # type: ignore
                    rhasspynlu.jsgf.walk_expression(
                        sentence, rhasspynlu.number_range_transform,
                        replacements)

                if word_visitor:
                    # Do case transformation
                    # type: ignore
                    rhasspynlu.jsgf.walk_expression(sentence, word_visitor,
                                                    replacements)

    # Load slot values
    slot_replacements = rhasspynlu.get_slot_replacements(
        intents,
        slots_dirs=[slots_dir],
        slot_programs_dirs=[slot_programs],
        slot_visitor=word_visitor,
    )

    # Merge with existing replacements
    for slot_key, slot_values in slot_replacements.items():
        replacements[slot_key] = slot_values

    if replace_numbers:
        # Do single number transformations
        for intent_sentences in sentences.values():
            for sentence in intent_sentences:
                rhasspynlu.jsgf.walk_expression(
                    sentence,
                    lambda w: rhasspynlu.number_transform(w, language_code),
                    replacements,
                )

    # Convert to directed graph
    intent_graph = rhasspynlu.sentences_to_graph(sentences,
                                                 replacements=replacements)

    # Convert to gzipped pickle
    intent_graph_path.parent.mkdir(exist_ok=True)
    with open(intent_graph_path, mode="wb") as intent_graph_file:
        rhasspynlu.graph_to_gzip_pickle(intent_graph, intent_graph_file)

    _LOGGER.debug("Wrote intent graph to %s", intent_graph_path)

    g2p_word_transform = None
    if g2p_word_casing == WordCasing.UPPER:
        g2p_word_transform = str.upper
    elif g2p_word_casing == WordCasing.LOWER:
        g2p_word_transform = str.lower

    # Load phonetic dictionaries
    pronunciations: PronunciationsType = {}
    if acoustic_model_type in [
            AcousticModelType.POCKETSPHINX,
            AcousticModelType.KALDI,
            AcousticModelType.JULIUS,
    ]:
        pronunciations, _ = load_pronunciations(
            base_dictionary=base_dictionary,
            custom_words=custom_words,
            custom_words_action=custom_words_action,
            sounds_like=sounds_like,
            sounds_like_action=sounds_like_action,
            g2p_corpus=g2p_corpus,
        )

    # -------------------------------------------------------------------------
    # Speech to Text Training
    # -------------------------------------------------------------------------

    if acoustic_model_type == AcousticModelType.POCKETSPHINX:
        # Pocketsphinx
        import rhasspyasr_pocketsphinx

        rhasspyasr_pocketsphinx.train(
            intent_graph,
            dictionary_path,
            language_model_path,
            pronunciations,
            dictionary_word_transform=word_transform,
            g2p_model=g2p_model,
            g2p_word_transform=g2p_word_transform,
            missing_words_path=unknown_words_path,
            vocab_path=vocab_path,
            language_model_fst=language_model_fst_path,
            base_language_model_fst=base_language_model_fst,
            base_language_model_weight=base_language_model_weight,
            mixed_language_model_fst=mixed_language_model_fst_path,
        )
    elif acoustic_model_type == AcousticModelType.KALDI:
        # Kaldi
        import rhasspyasr_kaldi
        from rhasspyasr_kaldi.train import LanguageModelType

        graph_dir = ppath("training.kaldi.graph-directory") or (
            acoustic_model / "graph")

        # Type of language model to generate
        language_model_type = LanguageModelType(
            pydash.get(profile, "training.kaldi.language-model-type", "arpa"))

        rhasspyasr_kaldi.train(
            intent_graph,
            pronunciations,
            acoustic_model,
            graph_dir,
            dictionary_path,
            language_model_path,
            language_model_type=language_model_type,
            dictionary_word_transform=word_transform,
            g2p_model=g2p_model,
            g2p_word_transform=g2p_word_transform,
            missing_words_path=unknown_words_path,
            vocab_path=vocab_path,
            language_model_fst=language_model_fst_path,
            base_language_model_fst=base_language_model_fst,
            base_language_model_weight=base_language_model_weight,
            mixed_language_model_fst=mixed_language_model_fst_path,
        )
    elif acoustic_model_type == AcousticModelType.DEEPSPEECH:
        # DeepSpeech
        import rhasspyasr_deepspeech

        trie_path = ppath("training.deepspeech.trie", "trie")
        alphabet_path = ppath("training.deepspeech.alphabet",
                              "model/alphabet.txt")

        rhasspyasr_deepspeech.train(
            intent_graph,
            language_model_path,
            trie_path,
            alphabet_path,
            vocab_path=vocab_path,
            language_model_fst=language_model_fst_path,
            base_language_model_fst=base_language_model_fst,
            base_language_model_weight=base_language_model_weight,
            mixed_language_model_fst=mixed_language_model_fst_path,
        )
    elif acoustic_model_type == AcousticModelType.JULIUS:
        # Julius
        from .julius import train as train_julius

        train_julius(
            intent_graph,
            dictionary_path,
            language_model_path,
            pronunciations,
            dictionary_word_transform=word_transform,
            silence_words={"<s>", "</s>"},
            g2p_model=g2p_model,
            g2p_word_transform=g2p_word_transform,
            missing_words_path=unknown_words_path,
            vocab_path=vocab_path,
            language_model_fst=language_model_fst_path,
            base_language_model_fst=base_language_model_fst,
            base_language_model_weight=base_language_model_weight,
            mixed_language_model_fst=mixed_language_model_fst_path,
        )
    else:
        _LOGGER.warning("Not training speech to text system (%s)",
                        acoustic_model_type)
Example #6
0
def train(
    sentences_dict: typing.Dict[str, str],
    language: str,
    slots_dict: typing.Optional[typing.Dict[str, typing.List[str]]] = None,
    engine_path: typing.Optional[typing.Union[str, Path]] = None,
    dataset_path: typing.Optional[typing.Union[str, Path]] = None,
) -> SnipsNLUEngine:
    """Generate Snips YAML dataset from Rhasspy sentences/slots."""
    slots_dict = slots_dict or {}

    _LOGGER.debug("Creating Snips engine for language %s", language)
    engine = SnipsNLUEngine(config=DEFAULT_CONFIGS[language])

    # Parse JSGF sentences
    _LOGGER.debug("Parsing sentences")
    with io.StringIO() as ini_file:
        # Join as single ini file
        for lines in sentences_dict.values():
            print(lines, file=ini_file)
            print("", file=ini_file)

        intents = rhasspynlu.parse_ini(ini_file.getvalue())

    # Split into sentences and rule/slot replacements
    sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents)

    for intent_sentences in sentences.values():
        for sentence in intent_sentences:
            rhasspynlu.jsgf.walk_expression(sentence,
                                            rhasspynlu.number_range_transform,
                                            replacements)

    # Convert to directed graph *without* expanding slots
    # (e.g., $rhasspy/number)
    _LOGGER.debug("Converting to intent graph")
    intent_graph = rhasspynlu.sentences_to_graph(sentences,
                                                 replacements=replacements,
                                                 expand_slots=False)

    # Get start/end nodes for graph
    start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes(
        intent_graph)
    assert (start_node
            is not None) and (end_node
                              is not None), "Missing start/end node(s)"

    if dataset_path:
        # Use user file
        dataset_file = open(dataset_path, "w+")
    else:
        # Use temporary file
        dataset_file = typing.cast(
            typing.TextIO, tempfile.NamedTemporaryFile(suffix=".yml",
                                                       mode="w+"))
        dataset_path = dataset_file.name

    with dataset_file:
        _LOGGER.debug("Writing YAML dataset to %s", dataset_path)

        # Walk first layer of edges with intents
        for _, intent_node, edge_data in intent_graph.edges(start_node,
                                                            data=True):
            intent_name: str = edge_data["olabel"][9:]

            # New intent
            print("---", file=dataset_file)
            print("type: intent", file=dataset_file)
            print("name:", quote(intent_name), file=dataset_file)
            print("utterances:", file=dataset_file)

            # Get all paths through the graph (utterances)
            used_utterances: typing.Set[str] = set()
            paths = nx.all_simple_paths(intent_graph, intent_node, end_node)
            for path in paths:
                utterance = []
                entity_name = None
                slot_name = None
                slot_value = None

                # Walk utterance edges
                for from_node, to_node in rhasspynlu.utils.pairwise(path):
                    edge_data = intent_graph.edges[(from_node, to_node)]
                    ilabel = edge_data.get("ilabel")
                    olabel = edge_data.get("olabel")
                    if olabel:
                        if olabel.startswith("__begin__"):
                            slot_name = olabel[9:]
                            entity_name = None
                            slot_value = ""
                        elif olabel.startswith("__end__"):
                            if entity_name == "rhasspy/number":
                                # Transform to Snips number
                                entity_name = "snips/number"
                            elif not entity_name:
                                # Collect actual value
                                assert (
                                    slot_name and slot_value
                                ), f"No slot name or value (name={slot_name}, value={slot_value})"

                                entity_name = slot_name
                                slot_values = slots_dict.get(slot_name)
                                if not slot_values:
                                    slot_values = []
                                    slots_dict[slot_name] = slot_values

                                slot_values.append(slot_value.strip())

                            # Reference slot/entity (values will be added later)
                            utterance.append(f"[{slot_name}:{entity_name}]")

                            # Reset current slot/entity
                            entity_name = None
                            slot_name = None
                            slot_value = None
                        elif olabel.startswith("__source__"):
                            # Use Rhasspy slot name as entity
                            entity_name = olabel[10:]

                    if ilabel:
                        # Add to current slot/entity value
                        if slot_name and (not entity_name):
                            slot_value += ilabel + " "
                        else:
                            # Add directly to utterance
                            utterance.append(ilabel)
                    elif (olabel and (not olabel.startswith("__"))
                          and slot_name and (not slot_value)
                          and (not entity_name)):
                        slot_value += olabel + " "

                if utterance:
                    utterance_str = " ".join(utterance)
                    if utterance_str not in used_utterances:
                        # Write utterance
                        print("  -", quote(utterance_str), file=dataset_file)
                        used_utterances.add(utterance_str)

            print("", file=dataset_file)

        # Write entities
        for slot_name, values in slots_dict.items():
            if slot_name.startswith("$"):
                # Remove arguments and $
                slot_name = slot_name.split(",")[0][1:]

            # Skip numbers
            if slot_name in {"rhasspy/number"}:
                # Should have been converted already to snips/number
                continue

            # Keep only unique values
            values_set = set(values)

            print("---", file=dataset_file)
            print("type: entity", file=dataset_file)
            print("name:", quote(slot_name), file=dataset_file)
            print("values:", file=dataset_file)

            slot_graph = rhasspynlu.sentences_to_graph({
                slot_name: [
                    rhasspynlu.jsgf.Sentence.parse(value)
                    for value in values_set
                ]
            })

            start_node, end_node = rhasspynlu.jsgf_graph.get_start_end_nodes(
                slot_graph)
            n_data = slot_graph.nodes(data=True)
            for path in nx.all_simple_paths(slot_graph, start_node, end_node):
                words = []
                for node in path:
                    node_data = n_data[node]
                    word = node_data.get("word")
                    if word:
                        words.append(word)

                if words:
                    print("  -", quote(" ".join(words)), file=dataset_file)

            print("", file=dataset_file)

        # ------------
        # Train engine
        # ------------

        if engine_path:
            # Delete existing engine
            engine_path = Path(engine_path)
            engine_path.parent.mkdir(exist_ok=True)

            if engine_path.is_dir():
                # Snips will fail it the directory exists
                _LOGGER.debug("Removing existing engine at %s", engine_path)
                shutil.rmtree(engine_path)
            elif engine_path.is_file():
                _LOGGER.debug("Removing unexpected file at %s", engine_path)
                engine_path.unlink()

        _LOGGER.debug("Training engine")
        dataset_file.seek(0)
        dataset = Dataset.from_yaml_files(language, [dataset_file])
        engine = engine.fit(dataset)

    if engine_path:
        # Save engine
        engine.persist(engine_path)
        _LOGGER.debug("Engine saved to %s", engine_path)

    return engine
Example #7
0
def train_profile(profile_dir: Path,
                  profile: Profile) -> Tuple[int, List[str]]:

    # Compact
    def ppath(query, default=None, write=False):
        return utils_ppath(profile, profile_dir, query, default, write=write)

    language = profile.get("language", "")

    # Inputs
    stt_system = profile.get("speech_to_text.system")
    stt_prefix = f"speech_to_text.{stt_system}"

    # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
    sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
    sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir")
    base_dictionary = ppath(f"{stt_prefix}.base_dictionary",
                            "base_dictionary.txt")
    base_language_model = ppath(f"{stt_prefix}.base_language_model",
                                "base_language_model.txt")
    base_language_model_weight = float(
        profile.get(f"{stt_prefix}.mix_weight", 0))
    g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
    acoustic_model_type = stt_system

    if acoustic_model_type == "pocketsphinx":
        acoustic_model = ppath(f"{stt_prefix}.acoustic_model",
                               "acoustic_model")
        kaldi_dir = None
    elif acoustic_model_type == "kaldi":
        kaldi_dir = Path(
            os.path.expandvars(
                profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi")))
        acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
    else:
        assert False, f"Unknown acoustic model type: {acoustic_model_type}"

    # ignore/upper/lower
    word_casing = profile.get("speech_to_text.dictionary_casing",
                              "ignore").lower()

    # default/ignore/upper/lower
    g2p_word_casing = profile.get("speech_to_text.g2p_casing",
                                  word_casing).lower()

    # all/first
    dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule",
                                  "all").lower()

    # Kaldi
    kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph",
                                                   "graph")

    # Outputs
    dictionary = ppath(f"{stt_prefix}.dictionary",
                       "dictionary.txt",
                       write=True)
    custom_words = ppath(f"{stt_prefix}.custom_words",
                         "custom_words.txt",
                         write=True)
    language_model = ppath(f"{stt_prefix}.language_model",
                           "language_model.txt",
                           write=True)
    base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst",
                                    "base_language_model.fst",
                                    write=True)
    intent_graph = ppath("intent.fsticiffs.intent_graph",
                         "intent.json",
                         write=True)
    intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
    vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
    unknown_words = ppath(f"{stt_prefix}.unknown_words",
                          "unknown_words.txt",
                          write=True)
    grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True)
    fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True)
    slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True)

    # -----------------------------------------------------------------------------

    # Create cache directories
    for dir_path in [grammar_dir, fsts_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)

    # -----------------------------------------------------------------------------

    ini_paths: List[Path] = []
    if sentences_ini.is_file():
        ini_paths = [sentences_ini]

    # Add .ini files from intents directory
    if sentences_dir.is_dir():
        for ini_path in sentences_dir.rglob("*.ini"):
            ini_paths.append(ini_path)

    # Join ini files into a single combined file and parse
    _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths])
    with io.StringIO() as combined_ini_file:
        for ini_path in ini_paths:
            combined_ini_file.write(ini_path.read_text())
            print("", file=combined_ini_file)

        intents = parse_ini(combined_ini_file.getvalue())

    # -----------------------------------------------------------------------------

    def get_slot_names(item):
        """Yield referenced slot names."""
        if isinstance(item, jsgf.SlotReference):
            yield item.slot_name
        elif isinstance(item, jsgf.Sequence):
            for sub_item in item.items:
                for slot_name in get_slot_names(sub_item):
                    yield slot_name
        elif isinstance(item, jsgf.Rule):
            for slot_name in get_slot_names(item.rule_body):
                yield slot_name

    def number_transform(word):
        """Automatically transform numbers"""
        if not isinstance(word, jsgf.Word):
            # Skip anything besides words
            return

        try:
            n = int(word.text)

            # 75 -> (seventy five):75
            number_text = num2words(n, lang=language).replace("-", " ").strip()
            assert number_text, f"Empty num2words result for {n}"
            number_words = number_text.split()

            if len(number_words) == 1:
                # Easy case, single word
                word.text = number_text
                word.substitution = str(n)
            else:
                # Hard case, split into mutliple Words
                return jsgf.Sequence(
                    text=number_text,
                    type=jsgf.SequenceType.GROUP,
                    substitution=str(n),
                    items=[jsgf.Word(w) for w in number_words],
                )
        except ValueError:
            # Not a number
            pass

    def do_intents_to_graph(intents, slot_names, targets):
        sentences, replacements = ini_jsgf.split_rules(intents)

        # Load slot values
        for slot_name in slot_names:
            slot_path = slots_dir / slot_name
            assert slot_path.is_file(), f"Missing slot file at {slot_path}"

            # Parse each non-empty line as a JSGF sentence
            slot_values = []
            with open(slot_path, "r") as slot_file:
                for line in slot_file:
                    line = line.strip()
                    if line:
                        sentence = jsgf.Sentence.parse(line)
                        slot_values.append(sentence)

            # Replace $slot with sentences
            replacements[f"${slot_name}"] = slot_values

        if profile.get("intent.replace_numbers", True):
            # Replace numbers in parsed sentences
            for intent_sentences in sentences.values():
                for sentence in intent_sentences:
                    jsgf.walk_expression(sentence, number_transform,
                                         replacements)

        # Convert to directed graph
        graph = intents_to_graph(intents, replacements)

        # Write graph to JSON file
        json_graph = graph_to_json(graph)
        with open(targets[0], "w") as graph_file:
            json.dump(json_graph, graph_file)

    def task_ini_graph():
        """sentences.ini -> intent.json"""
        slot_names = set()
        for intent_name in intents:
            for item in intents[intent_name]:
                for slot_name in get_slot_names(item):
                    slot_names.add(slot_name)

        # Add slot files as dependencies
        deps = [(slots_dir / slot_name) for slot_name in slot_names]

        # Add profile itself as a dependency
        profile_json_path = profile_dir / "profile.json"
        if profile_json_path.is_file():
            deps.append(profile_json_path)

        return {
            "file_dep": ini_paths + deps,
            "targets": [intent_graph],
            "actions": [(do_intents_to_graph, [intents, slot_names])],
        }

    # -----------------------------------------------------------------------------

    def do_graph_to_fst(intent_graph, targets):
        with open(intent_graph, "r") as graph_file:
            json_graph = json.load(graph_file)

        graph = json_to_graph(json_graph)
        graph_fst = graph_to_fst(graph)

        # Create symbol tables
        isymbols = fst.SymbolTable()
        for symbol, number in graph_fst.input_symbols.items():
            isymbols.add_symbol(symbol, number)

        osymbols = fst.SymbolTable()
        for symbol, number in graph_fst.output_symbols.items():
            osymbols.add_symbol(symbol, number)

        # Compile FST
        compiler = fst.Compiler(isymbols=isymbols,
                                osymbols=osymbols,
                                keep_isymbols=True,
                                keep_osymbols=True)

        compiler.write(graph_fst.intent_fst)
        compiled_fst = compiler.compile()

        # Write to file
        compiled_fst.write(str(targets[0]))

    def task_intent_fst():
        """intent.json -> intent.fst"""
        return {
            "file_dep": [intent_graph],
            "targets": [intent_fst],
            "actions": [(do_graph_to_fst, [intent_graph])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="intent_fst")
    def task_language_model():
        """Creates an ARPA language model from intent.fst."""

        if base_language_model_weight > 0:
            yield {
                "name": "base_lm_to_fst",
                "file_dep": [base_language_model],
                "targets": [base_language_model_fst],
                "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"],
            }

        # FST -> n-gram counts
        intent_counts = str(intent_fst) + ".counts"
        yield {
            "name": "intent_counts",
            "file_dep": [intent_fst],
            "targets": [intent_counts],
            "actions": ["ngramcount %(dependencies)s %(targets)s"],
        }

        # n-gram counts -> model
        intent_model = str(intent_fst) + ".model"
        yield {
            "name": "intent_model",
            "file_dep": [intent_counts],
            "targets": [intent_model],
            "actions": ["ngrammake %(dependencies)s %(targets)s"],
        }

        if base_language_model_weight > 0:
            merged_model = Path(str(intent_model) + ".merge")

            # merge
            yield {
                "name":
                "lm_merge",
                "file_dep": [base_language_model_fst, intent_model],
                "targets": [merged_model],
                "actions": [
                    f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s"
                ],
            }

            intent_model = merged_model

        # model -> ARPA
        yield {
            "name": "intent_arpa",
            "file_dep": [intent_model],
            "targets": [language_model],
            "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"],
        }

    # -----------------------------------------------------------------------------

    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__")
                                   or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

    @create_after(executed="language_model")
    def task_vocab():
        """Writes all vocabulary words to a file from intent.fst."""
        return {
            "file_dep": [intent_fst],
            "targets": [vocab],
            "actions": [do_vocab]
        }

    # -----------------------------------------------------------------------------

    def do_dict(dictionary_paths: Iterable[Path], targets):
        with open(targets[0], "w") as dictionary_file:
            if unknown_words.exists():
                unknown_words.unlink()

            dictionary_format = FORMAT_CMU
            if acoustic_model_type == "julius":
                dictionary_format = FORMAT_JULIUS

            make_dict(
                vocab,
                dictionary_paths,
                dictionary_file,
                unknown_path=unknown_words,
                dictionary_format=dictionary_format,
                merge_rule=dict_merge_rule,
                upper=(word_casing == "upper"),
                lower=(word_casing == "lower"),
            )

            if unknown_words.exists() and g2p_model.exists():
                # Generate single pronunciation guesses
                _LOGGER.debug("Guessing pronunciations for unknown word(s)")

                g2p_output = subprocess.check_output(
                    [
                        "phonetisaurus-apply",
                        "--model",
                        str(g2p_model),
                        "--word_list",
                        str(unknown_words),
                        "--nbest",
                        "1",
                    ],
                    universal_newlines=True,
                )

                g2p_transform = lambda w: w
                if g2p_word_casing == "upper":
                    g2p_transform = lambda w: w.upper()
                elif g2p_word_casing == "lower":
                    g2p_transform = lambda w: w.lower()

                # Append to dictionary and custom words
                with open(custom_words, "a") as words_file:
                    with open(unknown_words, "w") as unknown_words_file:
                        for line in g2p_output.splitlines():
                            line = line.strip()
                            word, phonemes = re.split(r"\s+", line, maxsplit=1)
                            word = g2p_transform(word)
                            print(word, phonemes, file=dictionary_file)
                            print(word, phonemes, file=words_file)
                            print(word, phonemes, file=unknown_words_file)

    @create_after(executed="vocab")
    def task_vocab_dict():
        """Creates custom pronunciation dictionary based on desired vocabulary."""
        dictionary_paths = [base_dictionary]
        if custom_words.exists():
            # Custom dictionary goes first so that the "first" dictionary merge
            # rule will choose pronunciations from it.
            dictionary_paths.insert(0, custom_words)

        # Exclude dictionaries that don't exist
        dictionary_paths = [p for p in dictionary_paths if p.exists()]

        return {
            "file_dep": [vocab] + dictionary_paths,
            "targets": [dictionary],
            "actions": [(do_dict, [dictionary_paths])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="vocab_dict")
    def task_kaldi_train():
        """Creates HCLG.fst for a Kaldi nnet3 or gmm model."""
        if acoustic_model_type == "kaldi":
            return {
                "file_dep": [dictionary, language_model],
                "targets": [kaldi_graph_dir / "HCLG.fst"],
                "actions": [[
                    "bash",
                    str(acoustic_model / "train.sh"),
                    str(kaldi_dir),
                    str(acoustic_model),
                    str(dictionary),
                    str(language_model),
                ]],
            }

    # -----------------------------------------------------------------------------

    errors = []

    class MyReporter(ConsoleReporter):
        def add_failure(self, task, exception):
            super().add_failure(task, exception)
            errors.append(f"{task}: {exception}")

        def runtime_error(self, msg):
            super().runtime_error(msg)
            errors.append(msg)

    DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter}

    # Monkey patch inspect to make doit work inside Pyinstaller.
    # It grabs the line numbers of functions probably for debugging reasons, but
    # PyInstaller doesn't seem to keep that information around.
    #
    # This better thing to do would be to create a custom TaskLoader.
    import inspect

    inspect.getsourcelines = lambda obj: [0, 0]

    # Run doit main
    result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])
    return (result, errors)
Example #8
0
def sentences_to_graph(
    sentences_dict: typing.Dict[str, str],
    slots_dirs: typing.Optional[typing.List[Path]] = None,
    slot_programs_dirs: typing.Optional[typing.List[Path]] = None,
    replace_numbers: bool = True,
    language: str = "en",
    word_transform: typing.Optional[typing.Callable[[str], str]] = None,
    add_intent_weights: bool = True,
) -> nx.DiGraph:
    """Transform sentences to an intent graph"""
    slots_dirs = slots_dirs or []
    slot_programs_dirs = slot_programs_dirs or []

    # Parse sentences and convert to graph
    with io.StringIO() as ini_file:
        # Join as single ini file
        for lines in sentences_dict.values():
            print(lines, file=ini_file)
            print("", file=ini_file)

        # Parse JSGF sentences
        intents = rhasspynlu.parse_ini(ini_file.getvalue())

    # Split into sentences and rule/slot replacements
    sentences, replacements = rhasspynlu.ini_jsgf.split_rules(intents)

    word_visitor: typing.Optional[typing.Callable[[Expression], typing.Union[
        bool, Expression]]] = None

    if word_transform:
        # Apply transformation to words

        def transform_visitor(word: Expression):
            if isinstance(word, Word):
                assert word_transform
                new_text = word_transform(word.text)

                # Preserve case by using original text as substition
                if (word.substitution is None) and (new_text != word.text):
                    word.substitution = word.text

                word.text = new_text

            return word

        word_visitor = transform_visitor

    # Apply case/number transforms
    if word_visitor or replace_numbers:
        for intent_sentences in sentences.values():
            for sentence in intent_sentences:
                if replace_numbers:
                    # Replace number ranges with slot references
                    # type: ignore
                    rhasspynlu.jsgf.walk_expression(
                        sentence, rhasspynlu.number_range_transform,
                        replacements)

                if word_visitor:
                    # Do case transformation
                    # type: ignore
                    rhasspynlu.jsgf.walk_expression(sentence, word_visitor,
                                                    replacements)

    # Load slot values
    slot_replacements = rhasspynlu.get_slot_replacements(
        intents,
        slots_dirs=slots_dirs,
        slot_programs_dirs=slot_programs_dirs,
        slot_visitor=word_visitor,
    )

    # Merge with existing replacements
    for slot_key, slot_values in slot_replacements.items():
        replacements[slot_key] = slot_values

    if replace_numbers:
        # Do single number transformations
        for intent_sentences in sentences.values():
            for sentence in intent_sentences:
                rhasspynlu.jsgf.walk_expression(
                    sentence,
                    lambda w: rhasspynlu.number_transform(w, language),
                    replacements,
                )

    # Convert to directed graph
    intent_graph = rhasspynlu.sentences_to_graph(
        sentences,
        replacements=replacements,
        add_intent_weights=add_intent_weights)

    return intent_graph, slot_replacements
Example #9
0
    def make_summary(targets):
        """Writes summary CSV."""
        with open(targets[0], "w") as out_file:
            writer = csv.DictWriter(
                out_file,
                fieldnames=[
                    "dataset",
                    "profile",
                    "training_seconds",
                    "transcription_accuracy",
                    "intent_entity_accuracy",
                    "average_transcription_speedup",
                    "average_recognize_seconds",
                    "num_wavs",
                    "num_sentences",
                ],
            )

            writer.writeheader()

            for p in _PROFILES:
                sentences_ini = p.out_profile_dir / "sentences.ini"
                slots_dir = p.out_profile_dir / "slots"
                report_json = p.results_dir / "report.json"
                train_results = p.results_dir / "train-profile.txt"

                with open(report_json, "r") as report_file:
                    report = json.load(report_file)

                # Get training time
                training_time = ""
                with open(train_results, "r") as training_file:
                    for line in training_file:
                        line = line.strip().lower()
                        if line.startswith("training completed in"):
                            training_time = "{0:.02f}".format(
                                float(line.split()[3]))

                # Get sentence count
                sentence_count = 0

                with open(sentences_ini, "r") as sentences_file:
                    intents = rhasspynlu.parse_ini(sentences_file)

                sentences, replacements = rhasspynlu.ini_jsgf.split_rules(
                    intents)

                if slots_dir.is_dir():
                    slot_replacements = rhasspynlu.slots.get_slot_replacements(
                        intents, slots_dirs=[slots_dir])

                    # Merge with existing replacements
                    for slot_key, slot_values in slot_replacements.items():
                        replacements[slot_key] = slot_values

                # Calculate number of possible sentences per intent
                intent_counts = rhasspynlu.ini_jsgf.get_intent_counts(
                    sentences, replacements, exclude_slots=False)

                sentence_count = sum(intent_counts.values())

                # Calculate average recognition time
                recognize_seconds = []
                for actual_value in report["actual"].values():
                    recognize_seconds.append(actual_value["recognize_seconds"])

                # Write CSV row
                writer.writerow({
                    "dataset":
                    p.dataset,
                    "profile":
                    p.profile,
                    "training_seconds":
                    training_time,
                    "transcription_accuracy":
                    "{0:.02f}".format(report["transcription_accuracy"]),
                    "intent_entity_accuracy":
                    "{0:.02f}".format(report["intent_entity_accuracy"]),
                    "average_transcription_speedup":
                    "{0:.02f}".format(report["average_transcription_speedup"]),
                    "num_wavs":
                    report["num_wavs"],
                    "num_sentences":
                    sentence_count,
                    "average_recognize_seconds":
                    sum(recognize_seconds) / len(recognize_seconds),
                })