Example #1
0
    def pronounce(self,
                  words: List[str],
                  n: int = 5) -> Dict[str, Dict[str, Any]]:
        """Look up or guess word pronunciation(s)"""
        assert n > 0, "No pronunciations requested"
        assert len(words) > 0, "No words to look up"

        self._logger.debug("Getting pronunciations for %s", words)

        # Load base and custom dictionaries
        base_dictionary_path = self.profile.read_path(
            self.profile.get(
                f"speech_to_text.{self.speech_system}.base_dictionary"))

        # Load base dictionary once
        if (self.base_dict is None) and os.path.exists(base_dictionary_path):
            self.base_dict = defaultdict(list)
            with open(base_dictionary_path, "r") as dictionary_file:
                read_dict(dictionary_file, self.base_dict)

        custom_path = self.profile.read_path(
            self.profile.get(
                f"speech_to_text.{self.speech_system}.custom_words"))

        word_dict: Dict[str, List[str]] = defaultdict(list)
        if os.path.exists(custom_path):
            with open(custom_path, "r") as dictionary_file:
                read_dict(dictionary_file, word_dict)

        # Mix in base dictionary
        if self.base_dict:
            for word, prons in self.base_dict.items():
                word_dict[word].extend(prons)
        else:
            self._logger.warning("Missing base dictionary")

        pronunciations = self._lookup_words(words, word_dict, n)

        # Get phonemes from eSpeak
        for word in words:
            espeak_command = ["espeak", "-q", "-x"]

            voice = self._get_voice()
            if voice is not None:
                espeak_command.extend(["-v", voice])

            espeak_command.append(word)

            self._logger.debug(repr(espeak_command))
            espeak_str = subprocess.check_output(
                espeak_command).decode().strip()
            pronunciations[word]["phonemes"] = espeak_str

        return pronunciations
Example #2
0
async def api_custom_words():
    """Read or write custom word dictionary for a profile"""
    assert core is not None
    speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")

    # Temporary fix for kaldi/custom_words -> kaldi_custom_words.txt
    old_kaldi_words_path = Path(
        core.profile.read_path("kaldi/custom_words.txt"))
    if old_kaldi_words_path.is_file():
        new_kaldi_words_path = Path(
            core.profile.write_path(
                core.profile.get("speech_to_text.kaldi.custom_words",
                                 "custom_words.txt")))

        if (new_kaldi_words_path != old_kaldi_words_path
                and not new_kaldi_words_path.is_file()):
            logger.warning("Moving %s to %s", str(old_kaldi_words_path),
                           str(new_kaldi_words_path))
            shutil.move(old_kaldi_words_path, new_kaldi_words_path)

    if request.method == "POST":
        custom_words_path = Path(
            core.profile.write_path(
                core.profile.get(
                    f"speech_to_text.{speech_system}.custom_words",
                    "custom_words.txt")))

        # Update custom words
        lines_written = 0
        with open(custom_words_path, "w") as custom_words_file:
            data = await request.data
            lines = data.decode().splitlines()
            for line in lines:
                line = line.strip()
                if not line:
                    continue

                print(line, file=custom_words_file)
                lines_written += 1

            return f"Wrote {lines_written} line(s) to {custom_words_path}"

    custom_words_path = Path(
        core.profile.read_path(
            core.profile.get(f"speech_to_text.{speech_system}.custom_words",
                             "custom_words.txt")))

    # Return custom_words
    if prefers_json():
        if not custom_words_path.is_file():
            return jsonify({})  # no custom_words yet

        with open(custom_words_path, "r") as words_file:
            return jsonify(read_dict(words_file))
    else:
        if not custom_words_path.is_file():
            return ""  # no custom_words yet

        # Return file contents
        return await send_file(custom_words_path)
Example #3
0
    def load_decoder(self) -> None:
        """Loads speech decoder if not cached."""
        if self.decoder is None:
            import pocketsphinx

            # Load decoder settings (use speech-to-text configuration as a fallback)
            hmm_path = self.profile.read_path(
                self.profile.get("wake.pocketsphinx.acoustic_model", None)
                or self.profile.get("speech_to_text.pocketsphinx.acoustic_model")
            )

            dict_path = self.profile.read_path(
                self.profile.get("wake.pocketsphinx.dictionary", None)
                or self.profile.get("speech_to_text.pocketsphinx.dictionary")
            )

            self.threshold = float(
                self.profile.get("wake.pocketsphinx.threshold", 1e-40)
            )
            self.keyphrase = self.profile.get("wake.pocketsphinx.keyphrase", "")
            assert len(self.keyphrase) > 0, "No wake keyphrase"

            # Verify that keyphrase words are in dictionary
            keyphrase_words = re.split(r"\s+", self.keyphrase)
            with open(dict_path, "r") as dict_file:
                word_dict = read_dict(dict_file)

            dict_upper = self.profile.get("speech_to_text.dictionary_upper", False)
            for word in keyphrase_words:
                if dict_upper:
                    word = word.upper()
                else:
                    word = word.lower()

                if word not in word_dict:
                    self._logger.warning("%s not in dictionary", word)

            self._logger.debug(
                "Loading wake decoder with hmm=%s, dict=%s", hmm_path, dict_path
            )

            decoder_config = pocketsphinx.Decoder.default_config()
            decoder_config.set_string("-hmm", hmm_path)
            decoder_config.set_string("-dict", dict_path)
            decoder_config.set_string("-keyphrase", self.keyphrase)
            decoder_config.set_string("-logfn", "/dev/null")
            decoder_config.set_float("-kws_threshold", self.threshold)

            mllr_path = self.profile.read_path(
                self.profile.get("wake.pocketsphinx.mllr_matrix")
            )

            if os.path.exists(mllr_path):
                self._logger.debug(
                    "Using tuned MLLR matrix for acoustic model: %s", mllr_path
                )
                decoder_config.set_string("-mllr", mllr_path)

            self.decoder = pocketsphinx.Decoder(decoder_config)
            self.decoder_started = False
Example #4
0
    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                symbol = input_symbols.find(i).decode().strip()
                if not (symbol.startswith("__") or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)
Example #5
0
async def api_custom_words():
    """Read or write custom word dictionary for a profile"""
    assert core is not None
    speech_system = core.profile.get("speech_to_text.system", "pocketsphinx")

    if request.method == "POST":
        custom_words_path = Path(
            core.profile.write_path(
                core.profile.get(
                    f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
                )
            )
        )

        # Update custom words
        lines_written = 0
        with open(custom_words_path, "w") as custom_words_file:
            data = await request.data
            lines = data.decode().splitlines()
            for line in lines:
                line = line.strip()
                if not line:
                    continue

                print(line, file=custom_words_file)
                lines_written += 1

            return "Wrote %s line(s) to %s" % (lines_written, custom_words_path)

    custom_words_path = Path(
        core.profile.read_path(
            core.profile.get(
                f"speech_to_text.{speech_system}.custom_words", "custom_words.txt"
            )
        )
    )

    # Return custom_words
    if prefers_json():
        if not custom_words_path.is_file():
            return jsonify({})  # no custom_words yet

        with open(custom_words_path, "r") as words_file:
            return jsonify(read_dict(words_file))
    else:
        if not custom_words_path.is_file():
            return ""  # no custom_words yet

        # Return file contents
        return await send_file(custom_words_path)
Example #6
0
    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__") or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)
Example #7
0
    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__")
                                   or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

            if profile.get("wake.system", "dummy") == "pocketsphinx":
                # Add words from Pocketsphinx wake keyphrase
                keyphrase = profile.get("wake.pocketsphinx.keyphrase", "")
                if keyphrase:
                    for word in re.split(r"\s+", keyphrase):
                        print(word, file=vocab_file)
Example #8
0
def make_dict(
    vocab_path: Path,
    dictionary_paths: Iterable[Path],
    dictionary_file: TextIO,
    unknown_path: Optional[Path] = None,
    upper: bool = False,
    lower: bool = False,
    no_number: bool = False,
    dictionary_format: str = FORMAT_CMU,
    silence_words: Set[str] = set(["<s>", "</s>"]),
    merge_rule: str = "all",
) -> List[str]:
    transform = lambda w: w
    if upper:
        transform = lambda w: w.upper()
        logger.debug("Forcing upper-case")
    elif lower:
        transform = lambda w: w.lower()
        logger.debug("Forcing lower-case")

    is_julius = dictionary_format == FORMAT_JULIUS

    # Read dictionaries
    word_dict: Dict[str, List[str]] = {}
    for dict_path in dictionary_paths:
        if os.path.exists(dict_path):
            logger.debug(f"Loading dictionary from {dict_path}")
            with open(dict_path, "r") as dict_file:
                read_dict(
                    dict_file,
                    word_dict,
                    transform=transform,
                    silence_words=silence_words,
                )

    # Resolve vocabulary
    words_needed: Set[str] = set()
    with open(vocab_path, "r") as vocab_file:
        for word in vocab_file:
            word = word.strip()
            if not word:
                continue

            word = transform(word)
            words_needed.add(word)

    logger.debug(f"Loaded {len(words_needed)} word(s) from {vocab_path}")

    # Add silence words
    words_needed.update(silence_words)

    # Write output dictionary
    merge_first = merge_rule == "first"
    words_in_dict: Set[str] = set()
    unknown_words: List[str] = []

    for word in sorted(words_needed):
        if (word not in word_dict) and (word not in silence_words):
            unknown_words.append(word)
            continue

        for i, pronounce in enumerate(word_dict.get(word, [])):
            if merge_first and (word in words_in_dict):
                # Only use first pronunciation when merge_rule is "first"
                continue

            if is_julius:
                # Julius format
                # word [word] P1 P2 P3
                print(word, f"[{word}]", pronounce, file=dictionary_file)
            else:
                # CMU format
                # word P1 P2 P3
                # word(N) P1 P2 P3
                if (i < 1) or no_number:
                    print(word, pronounce, file=dictionary_file)
                else:
                    print(f"{word}({i + 1})", pronounce, file=dictionary_file)

            words_in_dict.add(word)

    # -------------------------------------------------------------------------

    if unknown_words:
        logger.warning(f"{len(unknown_words)} word(s) are unknown")
        logger.warning(",".join(unknown_words))

        # Write unknown words
        if unknown_path:
            with open(unknown_path, "w") as unknown_file:
                for word in unknown_words:
                    print(word, file=unknown_file)

            logger.debug(f"Wrote unknown words to {unknown_path}")

    return unknown_words