コード例 #1
0
    def __init__(
        self,
        profile_name: str,
        system_profiles_dir: str,
        user_profiles_dir: str,
        actor_system: Optional[ActorSystem] = None,
    ) -> None:

        self._logger = logging.getLogger(self.__class__.__name__)
        self.profiles_dirs: List[str] = [
            user_profiles_dir, system_profiles_dir
        ]
        self.profile_name = profile_name
        self.actor_system = actor_system

        self.profile = Profile(self.profile_name, system_profiles_dir,
                               user_profiles_dir)
        self._logger.debug("Loaded profile from %s", self.profile.json_path)
        self._logger.debug("Profile files will be written to %s",
                           self.profile.write_path())

        self.defaults = Profile.load_defaults(system_profiles_dir)

        self.loop = asyncio.get_event_loop()
        self._session: Optional[
            aiohttp.ClientSession] = aiohttp.ClientSession()
        self.dialogue_manager: Optional[RhasspyActor] = None

        self.download_status: typing.List[str] = []
コード例 #2
0
ファイル: app.py プロジェクト: robocon2011/rhasspy
def start_rhasspy() -> None:
    global core

    default_settings = Profile.load_defaults(profiles_dirs)

    # Get name of profile
    profile_name = (
        args.profile
        or os.environ.get("RHASSPY_PROFILE", None)
        or pydash.get(default_settings, "rhasspy.default_profile", "en")
    )

    # Load core
    core = RhasspyCore(profile_name, profiles_dirs)

    # Set environment variables
    os.environ["RHASSPY_BASE_DIR"] = os.getcwd()
    os.environ["RHASSPY_PROFILE"] = core.profile.name
    os.environ["RHASSPY_PROFILE_DIR"] = core.profile.write_dir()

    # Add profile settings from the command line
    extra_settings = {}
    for key, value in args.set:
        try:
            value = json.loads(value)
        except:
            pass

        logger.debug("Profile: {0}={1}".format(key, value))
        extra_settings[key] = value
        core.profile.set(key, value)

    core.start()
    logger.info("Started")
コード例 #3
0
ファイル: app.py プロジェクト: dony71/rhasspy
def start_rhasspy() -> None:
    global core

    default_settings = Profile.load_defaults(profiles_dirs)

    # Get name of profile
    profile_name = args.profile \
        or os.environ.get('RHASSPY_PROFILE', None) \
        or pydash.get(default_settings, 'rhasspy.default_profile', 'en')

    # Load core
    core = RhasspyCore(profile_name, profiles_dirs)

    # Set environment variables
    os.environ['RHASSPY_BASE_DIR'] = os.getcwd()
    os.environ['RHASSPY_PROFILE'] = core.profile.name
    os.environ['RHASSPY_PROFILE_DIR'] = core.profile.write_dir()

    # Add profile settings from the command line
    extra_settings = {}
    for key, value in args.set:
        try:
            value = json.loads(value)
        except:
            pass

        logger.debug('Profile: {0}={1}'.format(key, value))
        extra_settings[key] = value
        core.profile.set(key, value)

    core.start()
コード例 #4
0
class RhasspyCore:
    """Core class for Rhasspy functionality."""
    def __init__(
        self,
        profile_name: str,
        system_profiles_dir: str,
        user_profiles_dir: str,
        actor_system: Optional[ActorSystem] = None,
    ) -> None:

        self._logger = logging.getLogger(self.__class__.__name__)
        self.profiles_dirs: List[str] = [
            user_profiles_dir, system_profiles_dir
        ]
        self.profile_name = profile_name
        self.actor_system = actor_system

        self.profile = Profile(self.profile_name, system_profiles_dir,
                               user_profiles_dir)
        self._logger.debug("Loaded profile from %s", self.profile.json_path)
        self._logger.debug("Profile files will be written to %s",
                           self.profile.write_path())

        self.defaults = Profile.load_defaults(system_profiles_dir)

        self.loop = asyncio.get_event_loop()
        self._session: Optional[
            aiohttp.ClientSession] = aiohttp.ClientSession()
        self.dialogue_manager: Optional[RhasspyActor] = None

        self.download_status: typing.List[str] = []

    # -------------------------------------------------------------------------

    @property
    def session(self) -> aiohttp.ClientSession:
        """Get HTTP client session."""
        assert self._session is not None
        return self._session

    # -------------------------------------------------------------------------

    async def start(
        self,
        preload: Optional[bool] = None,
        block: bool = True,
        timeout: float = 60,
        observer: Optional[RhasspyActor] = None,
    ) -> None:
        """Start Rhasspy core."""

        if self.actor_system is None:
            self.actor_system = ActorSystem()

        if preload is None:
            preload = self.profile.get("rhasspy.preload_profile", False)

        assert self.actor_system is not None
        self.dialogue_manager = self.actor_system.createActor(DialogueManager)
        with self.actor_system.private() as sys:
            await sys.async_ask(
                self.dialogue_manager,
                ConfigureEvent(
                    self.profile,
                    preload=preload,
                    ready=block,
                    transitions=False,
                    load_timeout_sec=30,
                    observer=observer,
                ),
            )

            # Block until ready
            if block:
                await sys.async_listen(timeout)

    # -------------------------------------------------------------------------

    async def get_microphones(self,
                              system: Optional[str] = None) -> Dict[Any, Any]:
        """Get available audio recording devices."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetMicrophones(system))
            assert isinstance(result, dict), result
            return result

    async def test_microphones(self,
                               system: Optional[str] = None) -> Dict[Any, Any]:
        """Listen to all microphones and determine if they're live."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         TestMicrophones(system))
            assert isinstance(result, dict), result
            return result

    async def get_speakers(self,
                           system: Optional[str] = None) -> Dict[Any, Any]:
        """Get available audio playback devices."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetSpeakers(system))
            assert isinstance(result, dict), result
            return result

    # -------------------------------------------------------------------------

    def listen_for_wake(self) -> None:
        """Tell Rhasspy to start listening for a wake word."""
        assert self.actor_system is not None
        self.actor_system.tell(self.dialogue_manager, ListenForWakeWord())

    async def listen_for_command(
        self,
        handle: bool = True,
        timeout: Optional[float] = None,
        entity: Optional[str] = None,
        value: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Block until a voice command has been spoken. Optionally handle it."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            entities = None
            if entity is not None:
                entities = [{"entity": entity, "value": value}]

            result = await sys.async_ask(
                self.dialogue_manager,
                ListenForCommand(handle=handle,
                                 timeout=timeout,
                                 entities=entities),
            )
            assert isinstance(result, dict), result

            return result

    async def record_command(self,
                             timeout: Optional[float] = None) -> VoiceCommand:
        """Record a single voice command."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetVoiceCommand(timeout=timeout))
            assert isinstance(result, VoiceCommand), result
            return result

    # -------------------------------------------------------------------------

    async def transcribe_wav(self, wav_data: bytes) -> WavTranscription:
        """Transcribe text from WAV buffer."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         TranscribeWav(wav_data, handle=False))
            assert isinstance(result, WavTranscription), result
            return result

    async def recognize_intent(self,
                               text: str,
                               wakeId: str = "") -> IntentRecognized:
        """Recognize an intent from text."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            # Fix casing
            dict_casing = self.profile.get("speech_to_text.dictionary_casing",
                                           "")
            if dict_casing == "lower":
                text = text.lower()
            elif dict_casing == "upper":
                text = text.upper()

            # Replace numbers
            if self.profile.get("intent.replace_numbers", True):
                language = self.profile.get("language", "")
                if not language:
                    language = None

                # 75 -> seventy five
                text = numbers_to_words(text, language=language)

            result = await sys.async_ask(self.dialogue_manager,
                                         RecognizeIntent(text, handle=False))
            assert isinstance(result, IntentRecognized), result

            # Add slots
            intent_slots = {}
            for ev in result.intent.get("entities", []):
                intent_slots[ev["entity"]] = ev["value"]

            result.intent["slots"] = intent_slots

            # Add wake/site ID
            result.intent["wakeId"] = wakeId
            result.intent["siteId"] = self.profile.get("mqtt.site_id",
                                                       "default")

            return result

    async def handle_intent(self, intent: Dict[str, Any]) -> IntentHandled:
        """Handle an intent."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         HandleIntent(intent))
            assert isinstance(result, IntentHandled), result
            return result

    # -------------------------------------------------------------------------

    def start_recording_wav(self, buffer_name: str = "") -> None:
        """Record audio data to a named buffer."""
        assert self.actor_system is not None
        self.actor_system.tell(self.dialogue_manager,
                               StartRecordingToBuffer(buffer_name))

    async def stop_recording_wav(self, buffer_name: str = "") -> AudioData:
        """Stop recording audio data to a named buffer."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         StopRecordingToBuffer(buffer_name))
            assert isinstance(result, AudioData), result
            return result

    # -------------------------------------------------------------------------

    def play_wav_data(self, wav_data: bytes) -> None:
        """Play WAV buffer through audio playback system."""
        assert self.actor_system is not None
        self.actor_system.tell(self.dialogue_manager, PlayWavData(wav_data))

    def play_wav_file(self, wav_path: str) -> None:
        """Play WAV file through audio playback system."""
        assert self.actor_system is not None
        self.actor_system.tell(self.dialogue_manager, PlayWavFile(wav_path))

    # -------------------------------------------------------------------------

    async def get_word_pronunciations(self,
                                      words: List[str],
                                      n: int = 5) -> WordPronunciations:
        """Look up or guess pronunciations for a word."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetWordPronunciations(words, n))
            assert isinstance(result, WordPronunciations), result
            return result

    async def get_word_phonemes(self, word: str) -> WordPhonemes:
        """Get eSpeak phonemes for a word."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetWordPhonemes(word))
            assert isinstance(result, WordPhonemes), result
            return result

    async def speak_word(self, word: str) -> WordSpoken:
        """Speak a single word."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         SpeakWord(word))
            assert isinstance(result, WordSpoken), result
            return result

    async def speak_sentence(
        self,
        sentence: str,
        play: bool = True,
        language: Optional[str] = None,
        voice: Optional[str] = None,
        siteId: Optional[str] = None,
    ) -> SentenceSpoken:
        """Speak an entire sentence using text to speech system."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(
                self.dialogue_manager,
                SpeakSentence(sentence,
                              play=play,
                              language=language,
                              voice=voice,
                              siteId=siteId),
            )
            assert isinstance(result, SentenceSpoken), result
            return result

    # -------------------------------------------------------------------------

    async def train(
        self,
        reload_actors: bool = True,
        no_cache: bool = False
    ) -> Union[ProfileTrainingComplete, ProfileTrainingFailed]:
        """Generate speech/intent artifacts for profile."""
        if no_cache:
            # Delete doit database
            db_path = Path(self.profile.write_path(".doit.db"))
            if db_path.is_file():
                db_path.unlink()

        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(
                self.dialogue_manager,
                TrainProfile(reload_actors=reload_actors))
            assert isinstance(
                result,
                (ProfileTrainingComplete, ProfileTrainingFailed)), result
            return result

    # -------------------------------------------------------------------------

    def mqtt_publish(self, topic: str, payload: bytes) -> None:
        """Publish a payload to an MQTT topic."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            sys.tell(self.dialogue_manager, MqttPublish(topic, payload))

    # -------------------------------------------------------------------------

    async def wakeup_and_wait(
            self) -> Union[WakeWordDetected, WakeWordNotDetected]:
        """Listen for a wake word to be detected or not."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         ListenForWakeWord())
            assert isinstance(result,
                              (WakeWordDetected, WakeWordNotDetected)), result

            return result

    # -------------------------------------------------------------------------

    async def get_actor_states(self) -> Dict[str, str]:
        """Get the current state of each Rhasspy actor."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager,
                                         GetActorStates())
            assert isinstance(result, dict), result
            return result

    # -------------------------------------------------------------------------

    def send_audio_data(self, data: AudioData) -> None:
        """Send raw audio data to Rhasspy."""
        assert self.actor_system is not None
        self.actor_system.tell(self.dialogue_manager, data)

    # -------------------------------------------------------------------------

    async def shutdown(self) -> None:
        """Shut down actors."""
        # Clear environment variables
        rhasspy_vars = [v for v in os.environ if v.startswith("RHASSPY")]

        for v in rhasspy_vars:
            del os.environ[v]

        # Stop actor system
        if self.actor_system is not None:
            self.actor_system.shutdown()
            self.actor_system = None

        if self._session is not None:
            await self._session.close()
            self._session = None

    # -------------------------------------------------------------------------

    def check_profile(self) -> Dict[str, str]:
        """Return True if the profile has all necessary files downloaded."""
        output_dir = self.profile.write_path()
        missing_files: Dict[str, Any] = {}

        # Load configuration
        conditions = self.profile.get("download.conditions", {})

        # Check conditions
        for setting_name in conditions:
            real_value = self.profile.get(setting_name, None)

            # Compare setting values
            for setting_value, files_dict in conditions[setting_name].items():
                compare_func = self._get_compare_func(setting_value)

                if compare_func(real_value):
                    # Check if file needs to be downloaded
                    for dest_name in files_dict:
                        dest_path = os.path.join(output_dir, dest_name)
                        if not os.path.exists(dest_path) or (
                                os.path.getsize(dest_path) == 0):
                            missing_files[dest_path] = (setting_name,
                                                        setting_value)

        return missing_files

    def _get_compare_func(self, value: str):
        """Use mini-language to allow for profile setting value comparison."""
        if value.startswith(">="):
            f_value = float(value[2:])
            return lambda v: v >= f_value

        if value.startswith("<="):
            f_value = float(value[2:])
            return lambda v: v <= f_value

        if value.startswith(">"):
            f_value = float(value[1:])
            return lambda v: v > f_value

        if value.startswith("<"):
            f_value = float(value[1:])
            return lambda v: v < f_value

        if value.startswith("!"):
            return lambda v: v != value

        return lambda v: str(v) == value

    def _unpack_gz(self, src_path, temp_dir):
        # Strip off .gz and put relative to temporary directory
        temp_file_path = os.path.join(temp_dir,
                                      os.path.split(src_path[:-3])[1])

        # Decompress single file
        with open(src_path, "rb") as src_file:
            with open(temp_file_path, "wb") as temp_file:
                temp_file.write(gzip.decompress(src_file.read()))

        return temp_file_path

    # -------------------------------------------------------------------------

    async def download_profile(self, delete=False, chunk_size=4096) -> None:
        """Download all necessary profile files from the internet and extract them."""
        self.download_status = []

        output_dir = Path(self.profile.write_path())
        download_dir = Path(
            self.profile.write_path(
                self.profile.get("download.cache_dir", "download")))

        if delete and download_dir.exists():
            self._logger.debug("Deleting download cache at %s", download_dir)
            shutil.rmtree(download_dir)

        download_dir.mkdir(parents=True, exist_ok=True)

        # Load configuration
        conditions = self.profile.get("download.conditions", {})
        all_files = self.profile.get("download.files", {})
        files_to_copy = {}
        files_to_extract: Dict[str, List[Tuple[str, str]]] = defaultdict(list)
        files_to_download: Set[str] = set()

        async def download_file(url, filename):
            try:
                status = f"Downloading {url} to {filename}"
                self.download_status.append(status)
                self._logger.debug(status)
                os.makedirs(os.path.dirname(filename), exist_ok=True)

                async with self.session.get(url) as response:
                    with open(filename, "wb") as out_file:
                        async for chunk in response.content.iter_chunked(
                                chunk_size):
                            out_file.write(chunk)

                status = f"Downloaded {filename}"
                self.download_status.append(status)
                self._logger.debug(status)
            except Exception:
                self._logger.exception(url)

                # Try to delete partially downloaded file
                try:
                    status = f"Failed to download {filename}"
                    self.download_status.append(status)
                    self._logger.debug(status)
                    os.unlink(filename)
                except Exception:
                    pass

        # Check conditions
        machine_type = platform.machine()
        download_tasks = []
        for setting_name in conditions:
            real_value = self.profile.get(setting_name, None)

            # Compare setting values
            for setting_value, files_dict in conditions[setting_name].items():
                compare_func = self._get_compare_func(setting_value)

                if compare_func(real_value):
                    # Check if file needs to be downloaded
                    for dest_name, src_name in files_dict.items():
                        dest_path = os.path.join(output_dir, dest_name)
                        if ":" in src_name:
                            # File is an archive
                            src_name, src_extract = src_name.split(":",
                                                                   maxsplit=1)
                            src_path = os.path.join(download_dir, src_name)
                            files_to_extract[src_path].append(
                                (dest_path, src_extract))
                        else:
                            # Just a regular file
                            src_path = os.path.join(download_dir, src_name)
                            files_to_copy[src_path] = dest_path

                        # Get download/cache info for file
                        src_info = all_files.get(src_name, None)
                        if src_info is None:
                            self._logger.error("No entry for download file %s",
                                               src_name)
                            continue

                        if not src_info.get("cache", True):
                            # File will be downloaded in-place
                            files_to_copy.pop(src_path)
                            src_path = dest_path

                        # Check if file is already in cache
                        if os.path.exists(src_path) and (
                                os.path.getsize(src_path) > 0):
                            self._logger.debug("Using cached %s for %s",
                                               src_path, dest_name)
                        else:
                            # File needs to be downloaded
                            src_url = src_info.get("url", None)
                            if src_url is None:
                                # Try with machine type
                                if machine_type in src_info:
                                    src_url = src_info[machine_type]["url"]
                                else:
                                    self._logger.error(
                                        "No entry for download file %s with machine type %s",
                                        src_url,
                                        machine_type,
                                    )
                                    continue

                            # Schedule file for download
                            if src_url not in files_to_download:
                                download_tasks.append(
                                    self.loop.create_task(
                                        download_file(src_url, src_path)))
                                files_to_download.add(src_url)

        # Wait for downloads to complete
        await asyncio.gather(*download_tasks)

        # Copy files
        for src_path, dest_path in files_to_copy.items():
            # Remove existing file/directory
            if os.path.isdir(dest_path):
                self._logger.debug("Removing %s", dest_path)
                shutil.rmtree(dest_path)
            elif os.path.isfile(dest_path):
                self._logger.debug("Removing %s", dest_path)
                os.unlink(dest_path)

            # Create necessary directories
            os.makedirs(os.path.dirname(dest_path), exist_ok=True)

            # Copy file/directory as is
            status = f"Copying {src_path} to {dest_path}"
            self.download_status.append(status)
            self._logger.debug(status)
            if os.path.isdir(src_path):
                shutil.copytree(src_path, dest_path)
            else:
                shutil.copy2(src_path, dest_path)

        # Extract/install files
        unpack_extensions = [
            ext for fmt in shutil.get_unpack_formats() for ext in fmt[1]
        ]

        for src_path, extract_paths in files_to_extract.items():
            # Check if the file extension will be understood by shutil.unpack_archive
            known_format = False
            for ext in unpack_extensions:
                if src_path.endswith(ext):
                    known_format = True

            def unpack_default(temp_dir):
                return shutil.unpack_archive(src_path, temp_dir)

            def unpack_gz(temp_dir):
                return self._unpack_gz(src_path, temp_dir)

            unpack = unpack_default

            if not known_format:
                # Handle special archives
                if src_path.endswith(".gz"):
                    # Single file compressed with gzip
                    unpack = unpack_gz
                else:
                    # Very bad situation
                    self._logger.warning(
                        "Unknown archive extension %s. This is probably going to fail.",
                        src_path,
                    )

            # Cached file is an archive. Unpack first.
            with tempfile.TemporaryDirectory() as temp_dir:
                unpack(temp_dir)

                for dest_path, src_extract in extract_paths:
                    src_exclude: Dict[str, List[str]] = {}
                    if "!" in src_extract:
                        extract_parts = src_extract.split("!")
                        src_extract = extract_parts[0]
                        src_exclude = defaultdict(list)
                        for exclude_path in extract_parts[1:]:
                            exclude_path = os.path.join(temp_dir, exclude_path)
                            exclude_dir, exclude_name = os.path.split(
                                exclude_path)
                            src_exclude[exclude_dir].append(exclude_name)

                    # Remove existing file/directory
                    if os.path.isdir(dest_path):
                        self._logger.debug("Removing %s", dest_path)
                        shutil.rmtree(dest_path)
                    elif os.path.isfile(dest_path):
                        self._logger.debug("Removing %s", dest_path)
                        os.unlink(dest_path)

                    # Create necessary directories
                    os.makedirs(os.path.dirname(dest_path), exist_ok=True)

                    if src_extract.endswith(":"):
                        # Unpack .gz inside archive
                        src_path = os.path.join(temp_dir, src_extract[:-1])
                        extract_path = self._unpack_gz(src_path, temp_dir)
                    else:
                        # Regular file
                        extract_path = os.path.join(temp_dir, src_extract)

                    # Copy specific file/directory
                    status = f"Copying {extract_path} to {dest_path}"
                    self.download_status.append(status)
                    self._logger.debug(status)
                    if os.path.isdir(extract_path):
                        if src_exclude:
                            # Ignore some files
                            # pylint: disable=W0640
                            shutil.copytree(
                                extract_path,
                                dest_path,
                                ignore=lambda d, fs: src_exclude[d],
                            )
                        else:
                            # Copy everything
                            shutil.copytree(extract_path, dest_path)
                    else:
                        shutil.copy2(extract_path, dest_path)

    # -------------------------------------------------------------------------

    async def get_problems(self) -> Dict[str, Any]:
        """Return a dictionary with problems from each actor."""
        assert self.actor_system is not None
        with self.actor_system.private() as sys:
            result = await sys.async_ask(self.dialogue_manager, GetProblems())
            assert isinstance(result, Problems), result
            return result.problems
コード例 #5
0
ファイル: __init__.py プロジェクト: patchedsoul/rhasspy
async def main() -> None:
    global mic_stdin_running, mic_stdin_thread

    # Parse command-line arguments
    parser = argparse.ArgumentParser(description="Rhasspy")
    parser.add_argument("--profile",
                        "-p",
                        required=True,
                        type=str,
                        help="Name of profile to use")
    parser.add_argument(
        "--system-profiles",
        type=str,
        help="Directory with base profile files (read only)",
        default=os.path.join(os.getcwd(), "profiles"),
    )
    parser.add_argument(
        "--user-profiles",
        type=str,
        help="Directory with user profile files (read/write)",
        default=os.path.expanduser("~/.config/rhasspy/profiles"),
    )
    parser.add_argument(
        "--set",
        "-s",
        nargs=2,
        action="append",
        help="Set a profile setting value",
        default=[],
    )
    parser.add_argument("--debug",
                        action="store_true",
                        help="Print DEBUG log to console")
    parser.add_argument(
        "--no-check",
        action="store_true",
        help="Don't check profile for necessary files",
    )

    sub_parsers = parser.add_subparsers(dest="command")
    sub_parsers.required = True

    # info
    info_parser = sub_parsers.add_parser("info", help="Profile information")
    info_parser.add_argument("--defaults",
                             action="store_true",
                             help="Only print default settings")

    sentences_parser = sub_parsers.add_parser(
        "sentences", help="Print profile sentences.ini")

    # validate
    # validate_parser = sub_parsers.add_parser(
    #     "validate", help="Validate profile against schema"
    # )

    # wav2text
    wav2text_parser = sub_parsers.add_parser(
        "wav2text", help="WAV file to text transcription")
    wav2text_parser.add_argument("wav_files",
                                 nargs="*",
                                 help="Paths to WAV files")

    # text2intent
    text2intent_parser = sub_parsers.add_parser("text2intent",
                                                help="Text parsed to intent")
    text2intent_parser.add_argument("sentences",
                                    nargs="*",
                                    help="Sentences to parse")
    text2intent_parser.add_argument("--handle",
                                    action="store_true",
                                    help="Pass result to intent handler")

    # wav2intent
    wav2intent_parser = sub_parsers.add_parser(
        "wav2intent", help="WAV file to parsed intent")
    wav2intent_parser.add_argument("wav_files",
                                   nargs="*",
                                   help="Paths to WAV files")
    wav2intent_parser.add_argument("--handle",
                                   action="store_true",
                                   help="Pass result to intent handler")

    # train
    train_parser = sub_parsers.add_parser("train", help="Re-train profile")

    # record
    # record_parser = sub_parsers.add_parser('record', help='Record test phrases for profile')
    # record_parser.add_argument('--directory', help='Directory to write WAV files and intent JSON files')

    # record-wake
    # record_wake_parser = sub_parsers.add_parser('record-wake', help='Record wake word examples for profile')
    # record_wake_parser.add_argument('--directory', help='Directory to write WAV files')
    # record_wake_parser.add_argument('--negative', action='store_true', help='Record negative examples (not the wake word)')

    # tune
    # tune_parser = sub_parsers.add_parser('tune', help='Tune speech acoustic model for profile')
    # tune_parser.add_argument('--directory', help='Directory with WAV files and intent JSON files')

    # tune-wake
    # tune_wake_parser = sub_parsers.add_parser('tune-wake', help='Tune wake acoustic model for profile')
    # tune_wake_parser.add_argument('--directory', help='Directory with WAV files')

    # test
    # test_parser = sub_parsers.add_parser('test', help='Test speech/intent recognizers for profile')
    # test_parser.add_argument('directory', help='Directory with WAV files and intent JSON files')

    # test-wake
    # test_wake_parser = sub_parsers.add_parser(
    #     "test-wake", help="Test wake word examples for profile"
    # )
    # test_wake_parser.add_argument("directory", help="Directory with WAV files")
    # test_wake_parser.add_argument(
    #     "--threads", type=int, default=4, help="Number of threads to use"
    # )
    # test_wake_parser.add_argument(
    #     "--system", type=str, default=None, help="Override wake word system"
    # )

    # mic2wav
    mic2wav_parser = sub_parsers.add_parser("mic2wav",
                                            help="Voice command to WAV data")
    mic2wav_parser.add_argument(
        "--timeout",
        type=float,
        default=None,
        help="Maximum number of seconds to record (default=profile)",
    )

    # mic2text
    mic2text_parser = sub_parsers.add_parser(
        "mic2text", help="Voice command to text transcription")
    mic2text_parser.add_argument(
        "--timeout",
        type=float,
        default=None,
        help="Maximum number of seconds to record (default=profile)",
    )

    # mic2intent
    mic2intent_parser = sub_parsers.add_parser(
        "mic2intent", help="Voice command to parsed intent")
    mic2intent_parser.add_argument("--stdin",
                                   action="store_true",
                                   help="Read audio data from stdin")
    mic2intent_parser.add_argument("--handle",
                                   action="store_true",
                                   help="Pass result to intent handler")
    mic2intent_parser.add_argument(
        "--timeout",
        type=float,
        default=None,
        help="Maximum number of seconds to record (default=profile)",
    )

    # word2phonemes
    word2phonemes_parser = sub_parsers.add_parser(
        "word2phonemes", help="Get pronunciation(s) for word(s)")
    word2phonemes_parser.add_argument("words",
                                      nargs="*",
                                      help="Word(s) to pronounce")
    word2phonemes_parser.add_argument("-n",
                                      type=int,
                                      default=1,
                                      help="Maximum number of pronunciations")

    # word2wav
    word2wav_parser = sub_parsers.add_parser("word2wav", help="Pronounce word")
    word2wav_parser.add_argument("word", help="Word to pronounce")

    # wav2mqtt
    wav2mqtt_parser = sub_parsers.add_parser("wav2mqtt",
                                             help="Push WAV file(s) to MQTT")
    wav2mqtt_parser.add_argument("wav_files",
                                 nargs="*",
                                 help="Paths to WAV files")
    wav2mqtt_parser.add_argument(
        "--frames",
        type=int,
        default=480,
        help="WAV frames per MQTT message (default=0 for all)",
    )
    wav2mqtt_parser.add_argument("--site-id",
                                 type=str,
                                 default="default",
                                 help="Hermes siteId (default=default)")
    wav2mqtt_parser.add_argument(
        "--silence-before",
        type=float,
        default=0,
        help="Seconds of silence to add before each WAV",
    )
    wav2mqtt_parser.add_argument(
        "--silence-after",
        type=float,
        default=0,
        help="Seconds of silence to add after each WAV",
    )
    wav2mqtt_parser.add_argument(
        "--pause",
        type=float,
        default=0.01,
        help="Seconds to wait before sending next chunk (default=0.01)",
    )

    # text2wav
    text2wav_parser = sub_parsers.add_parser(
        "text2wav", help="Output WAV file using text to speech system")
    text2wav_parser.add_argument("sentence", help="Sentence to speak")

    # text2speech
    text2speech_parser = sub_parsers.add_parser(
        "text2speech", help="Speak sentences using text to speech system")
    text2speech_parser.add_argument("sentences",
                                    nargs="*",
                                    help="Sentences to speak")

    # sleep
    sleep_parser = sub_parsers.add_parser("sleep", help="Wait for wake word")

    # download
    download_parser = sub_parsers.add_parser("download",
                                             help="Download profile files")
    download_parser.add_argument(
        "--delete",
        action="store_true",
        help="Clear download cache before downloading")

    # check
    check_parser = sub_parsers.add_parser(
        "check", help="Check downloaded profile files")

    # -------------------------------------------------------------------------

    args = parser.parse_args()

    if args.debug:
        logging.root.setLevel(logging.DEBUG)

    profiles_dirs = [args.system_profiles, args.user_profiles]
    logger.debug(profiles_dirs)

    default_settings = Profile.load_defaults(args.system_profiles)

    # Create rhasspy core
    from rhasspy.core import RhasspyCore

    core = RhasspyCore(args.profile, args.system_profiles, args.user_profiles)

    # Add profile settings from the command line
    extra_settings = {}
    for key, value in args.set:
        try:
            value = json.loads(value)
        except:
            pass

        logger.debug("Profile: {0}={1}".format(key, value))
        extra_settings[key] = value
        core.profile.set(key, value)

    # Handle command
    if args.command == "info":
        if args.defaults:
            # Print default settings
            json.dump(core.defaults, sys.stdout, indent=4)
        else:
            # Print profile settings
            json.dump(core.profile.json, sys.stdout, indent=4)
    # elif args.command == "validate":
    #     from cerberus import Validator

    #     schema_path = os.path.join(os.path.dirname(__file__), "profile_schema.json")
    #     with open(schema_path, "r") as schema_file:
    #         v = Validator(json.load(schema_file))
    #         if v.validate(core.profile.json):
    #             print("VALID")
    #         else:
    #             print("INVALID")
    #             for err in v._errors:
    #                 print(err)
    elif args.command == "sentences":
        sentences_path = core.profile.read_path(
            core.profile.get("speech_to_text.sentences_ini", "sentences.ini"))

        with open(sentences_path, "r") as sentences_file:
            sys.stdout.write(sentences_file.read())
    else:
        # Patch profile
        profile = core.profile
        profile.set("rhasspy.listen_on_start", False)
        profile.set("rhasspy.preload_profile", False)

        if args.command == "wav2mqtt":
            profile.set("mqtt.enabled", True)
        elif args.command in ["mic2intent"] and args.stdin:
            profile.set("microphone.system", "stdin")
            profile.set("microphone.stdin.auto_start", False)
            mic_stdin_running = True
        elif args.command == "text2wav":
            profile.set("sounds.system", "dummy")

        # Set environment variables
        os.environ["RHASSPY_BASE_DIR"] = os.getcwd()
        os.environ["RHASSPY_PROFILE"] = core.profile.name
        os.environ["RHASSPY_PROFILE_DIR"] = core.profile.write_dir()

        # Execute command
        command_funcs = {
            "wav2text": wav2text,
            "text2intent": text2intent,
            "wav2intent": wav2intent,
            "train": train_profile,
            # 'record': record,
            # 'record-wake': record_wake,
            # 'tune': tune,
            # 'tune-wake': tune_wake,
            # 'test': test,
            # "test-wake": test_wake,
            "mic2text": mic2text,
            "mic2intent": mic2intent,
            "mic2wav": mic2wav,
            "word2phonemes": word2phonemes,
            "word2wav": word2wav,
            "wav2mqtt": wav2mqtt,
            "text2wav": text2wav,
            "text2speech": text2speech,
            "sleep": sleep,
            "download": download,
            "check": check,
        }

        if not args.command in ["test-wake"]:
            # Automatically start core
            await core.start()

        if not args.no_check and (args.command not in ["check", "download"]):
            # Verify that profile has necessary files
            missing_files = core.check_profile()
            if len(missing_files) > 0:
                logger.fatal(
                    f"Missing required files for {profile.name}: {missing_files.keys()}. Please run download command and try again."
                )
                sys.exit(1)

        if mic_stdin_running:
            logger.debug("Reading audio data from stdin")
            mic_stdin_thread = threading.Thread(target=read_audio_stdin,
                                                args=(core, ),
                                                daemon=True)
            mic_stdin_thread.start()

        # Run command
        try:
            await command_funcs[args.command](core, profile, args)

            if mic_stdin_thread is not None:
                mic_stdin_running = False
                mic_stdin_thread.join()
        finally:
            await core.shutdown()
コード例 #6
0
ファイル: __init__.py プロジェクト: canadaycrew/rhasspy
def train_profile(profile_dir: Path,
                  profile: Profile) -> Tuple[int, List[str]]:

    # Compact
    def ppath(query, default=None, write=False):
        return utils_ppath(profile, profile_dir, query, default, write=write)

    language = profile.get("language", "")

    # Inputs
    stt_system = profile.get("speech_to_text.system")
    stt_prefix = f"speech_to_text.{stt_system}"

    # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
    sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
    sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir")
    base_dictionary = ppath(f"{stt_prefix}.base_dictionary",
                            "base_dictionary.txt")
    base_language_model = ppath(f"{stt_prefix}.base_language_model",
                                "base_language_model.txt")
    base_language_model_weight = float(
        profile.get(f"{stt_prefix}.mix_weight", 0))
    g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
    acoustic_model_type = stt_system

    # Pocketsphinx
    acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model")

    # Kaldi
    kaldi_dir = Path(
        os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir",
                                       "/opt/kaldi")))
    kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph",
                                                   "graph")

    if acoustic_model_type == "kaldi":
        # Kaldi acoustic models are inside model directory
        acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
    else:
        _LOGGER.warning("Unsupported acoustic model type: %s",
                        acoustic_model_type)

    # ignore/upper/lower
    word_casing = profile.get("speech_to_text.dictionary_casing",
                              "ignore").lower()

    # default/ignore/upper/lower
    g2p_word_casing = profile.get("speech_to_text.g2p_casing",
                                  word_casing).lower()

    # all/first
    dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule",
                                  "all").lower()

    # Outputs
    dictionary = ppath(f"{stt_prefix}.dictionary",
                       "dictionary.txt",
                       write=True)
    custom_words = ppath(f"{stt_prefix}.custom_words",
                         "custom_words.txt",
                         write=True)
    language_model = ppath(f"{stt_prefix}.language_model",
                           "language_model.txt",
                           write=True)
    base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst",
                                    "base_language_model.fst",
                                    write=True)
    intent_graph = ppath("intent.fsticiffs.intent_graph",
                         "intent.json",
                         write=True)
    intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
    vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
    unknown_words = ppath(f"{stt_prefix}.unknown_words",
                          "unknown_words.txt",
                          write=True)
    grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True)
    fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True)
    slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True)

    # -----------------------------------------------------------------------------

    # Create cache directories
    for dir_path in [grammar_dir, fsts_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)

    # -----------------------------------------------------------------------------

    ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir)

    # Join ini files into a single combined file and parse
    _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths])

    try:
        intents = get_all_intents(ini_paths)
    except Exception:
        _LOGGER.exception("Failed to parse %s", ini_paths)
        return (1, ["Failed to parse sentences"])

    # -----------------------------------------------------------------------------

    def get_slot_names(item):
        """Yield referenced slot names."""
        if isinstance(item, jsgf.SlotReference):
            yield item.slot_name
        elif isinstance(item, jsgf.Sequence):
            for sub_item in item.items:
                for slot_name in get_slot_names(sub_item):
                    yield slot_name
        elif isinstance(item, jsgf.Rule):
            for slot_name in get_slot_names(item.rule_body):
                yield slot_name

    def number_transform(word):
        """Automatically transform numbers"""
        if not isinstance(word, jsgf.Word):
            # Skip anything besides words
            return

        try:
            n = int(word.text)

            # 75 -> (seventy five):75
            number_text = num2words(n, lang=language).replace("-", " ").strip()
            assert number_text, f"Empty num2words result for {n}"
            number_words = number_text.split()

            if len(number_words) == 1:
                # Easy case, single word
                word.text = number_text
                word.substitution = str(n)
            else:
                # Hard case, split into mutliple Words
                return jsgf.Sequence(
                    text=number_text,
                    type=jsgf.SequenceType.GROUP,
                    substitution=str(n),
                    items=[jsgf.Word(w) for w in number_words],
                )
        except ValueError:
            # Not a number
            pass

    def do_intents_to_graph(intents, slot_names, targets):
        sentences, replacements = ini_jsgf.split_rules(intents)

        # Load slot values
        for slot_name in slot_names:
            slot_path = slots_dir / slot_name
            assert slot_path.is_file(), f"Missing slot file at {slot_path}"

            # Parse each non-empty line as a JSGF sentence
            slot_values = []
            with open(slot_path, "r") as slot_file:
                for line in slot_file:
                    line = line.strip()
                    if line:
                        sentence = jsgf.Sentence.parse(line)
                        slot_values.append(sentence)

            # Replace $slot with sentences
            replacements[f"${slot_name}"] = slot_values

        if profile.get("intent.replace_numbers", True):
            # Replace numbers in parsed sentences
            for intent_sentences in sentences.values():
                for sentence in intent_sentences:
                    jsgf.walk_expression(sentence, number_transform,
                                         replacements)

        # Convert to directed graph
        graph = intents_to_graph(intents, replacements)

        # Write graph to JSON file
        json_graph = graph_to_json(graph)
        with open(targets[0], "w") as graph_file:
            json.dump(json_graph, graph_file)

    def task_ini_graph():
        """sentences.ini -> intent.json"""
        slot_names = set()
        for intent_name in intents:
            for item in intents[intent_name]:
                for slot_name in get_slot_names(item):
                    slot_names.add(slot_name)

        # Add slot files as dependencies
        deps = [(slots_dir / slot_name) for slot_name in slot_names]

        # Add profile itself as a dependency
        profile_json_path = profile_dir / "profile.json"
        if profile_json_path.is_file():
            deps.append(profile_json_path)

        return {
            "file_dep": ini_paths + deps,
            "targets": [intent_graph],
            "actions": [(do_intents_to_graph, [intents, slot_names])],
        }

    # -----------------------------------------------------------------------------

    def do_graph_to_fst(intent_graph, targets):
        with open(intent_graph, "r") as graph_file:
            json_graph = json.load(graph_file)

        graph = json_to_graph(json_graph)
        graph_fst = graph_to_fst(graph)

        # Create symbol tables
        isymbols = fst.SymbolTable()
        for symbol, number in graph_fst.input_symbols.items():
            isymbols.add_symbol(symbol, number)

        osymbols = fst.SymbolTable()
        for symbol, number in graph_fst.output_symbols.items():
            osymbols.add_symbol(symbol, number)

        # Compile FST
        compiler = fst.Compiler(isymbols=isymbols,
                                osymbols=osymbols,
                                keep_isymbols=True,
                                keep_osymbols=True)

        compiler.write(graph_fst.intent_fst)
        compiled_fst = compiler.compile()

        # Write to file
        compiled_fst.write(str(targets[0]))

    def task_intent_fst():
        """intent.json -> intent.fst"""
        return {
            "file_dep": [intent_graph],
            "targets": [intent_fst],
            "actions": [(do_graph_to_fst, [intent_graph])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="intent_fst")
    def task_language_model():
        """Creates an ARPA language model from intent.fst."""

        if base_language_model_weight > 0:
            yield {
                "name": "base_lm_to_fst",
                "file_dep": [base_language_model],
                "targets": [base_language_model_fst],
                "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"],
            }

        # FST -> n-gram counts
        intent_counts = str(intent_fst) + ".counts"
        yield {
            "name": "intent_counts",
            "file_dep": [intent_fst],
            "targets": [intent_counts],
            "actions": ["ngramcount %(dependencies)s %(targets)s"],
        }

        # n-gram counts -> model
        intent_model = str(intent_fst) + ".model"
        yield {
            "name": "intent_model",
            "file_dep": [intent_counts],
            "targets": [intent_model],
            "actions": ["ngrammake %(dependencies)s %(targets)s"],
        }

        if base_language_model_weight > 0:
            merged_model = Path(str(intent_model) + ".merge")

            # merge
            yield {
                "name":
                "lm_merge",
                "file_dep": [base_language_model_fst, intent_model],
                "targets": [merged_model],
                "actions": [
                    f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s"
                ],
            }

            intent_model = merged_model

        # model -> ARPA
        yield {
            "name": "intent_arpa",
            "file_dep": [intent_model],
            "targets": [language_model],
            "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"],
        }

    # -----------------------------------------------------------------------------

    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                # Critical that we use get_nth_key here when input symbols
                # numbering is discontiguous.
                key = input_symbols.get_nth_key(i)
                symbol = input_symbols.find(key).decode().strip()
                if symbol and not (symbol.startswith("__")
                                   or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

    @create_after(executed="language_model")
    def task_vocab():
        """Writes all vocabulary words to a file from intent.fst."""
        return {
            "file_dep": [intent_fst],
            "targets": [vocab],
            "actions": [do_vocab]
        }

    # -----------------------------------------------------------------------------

    def do_dict(dictionary_paths: Iterable[Path], targets):
        with open(targets[0], "w") as dictionary_file:
            if unknown_words.exists():
                unknown_words.unlink()

            dictionary_format = FORMAT_CMU
            if acoustic_model_type == "julius":
                dictionary_format = FORMAT_JULIUS

            make_dict(
                vocab,
                dictionary_paths,
                dictionary_file,
                unknown_path=unknown_words,
                dictionary_format=dictionary_format,
                merge_rule=dict_merge_rule,
                upper=(word_casing == "upper"),
                lower=(word_casing == "lower"),
            )

            if unknown_words.exists() and g2p_model.exists():
                # Generate single pronunciation guesses
                _LOGGER.debug("Guessing pronunciations for unknown word(s)")

                g2p_output = subprocess.check_output(
                    [
                        "phonetisaurus-apply",
                        "--model",
                        str(g2p_model),
                        "--word_list",
                        str(unknown_words),
                        "--nbest",
                        "1",
                    ],
                    universal_newlines=True,
                )

                g2p_transform = lambda w: w
                if g2p_word_casing == "upper":
                    g2p_transform = lambda w: w.upper()
                elif g2p_word_casing == "lower":
                    g2p_transform = lambda w: w.lower()

                # Append to dictionary and custom words
                with open(custom_words, "a") as words_file:
                    with open(unknown_words, "w") as unknown_words_file:
                        for line in g2p_output.splitlines():
                            line = line.strip()
                            word, phonemes = re.split(r"\s+", line, maxsplit=1)
                            word = g2p_transform(word)
                            print(word, phonemes, file=dictionary_file)
                            print(word, phonemes, file=words_file)
                            print(word, phonemes, file=unknown_words_file)

    @create_after(executed="vocab")
    def task_vocab_dict():
        """Creates custom pronunciation dictionary based on desired vocabulary."""
        dictionary_paths = [base_dictionary]
        if custom_words.exists():
            # Custom dictionary goes first so that the "first" dictionary merge
            # rule will choose pronunciations from it.
            dictionary_paths.insert(0, custom_words)

        # Exclude dictionaries that don't exist
        dictionary_paths = [p for p in dictionary_paths if p.exists()]

        return {
            "file_dep": [vocab] + dictionary_paths,
            "targets": [dictionary],
            "actions": [(do_dict, [dictionary_paths])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="vocab_dict")
    def task_kaldi_train():
        """Creates HCLG.fst for a Kaldi nnet3 or gmm model."""
        if acoustic_model_type == "kaldi":
            return {
                "file_dep": [dictionary, language_model],
                "targets": [kaldi_graph_dir / "HCLG.fst"],
                "actions": [[
                    "bash",
                    str(acoustic_model / "train.sh"),
                    str(kaldi_dir),
                    str(acoustic_model),
                    str(dictionary),
                    str(language_model),
                ]],
            }

    # -----------------------------------------------------------------------------

    errors = []

    class MyReporter(ConsoleReporter):
        def add_failure(self, task, exception):
            super().add_failure(task, exception)
            errors.append(f"{task}: {exception}")

        def runtime_error(self, msg):
            super().runtime_error(msg)
            errors.append(msg)

    DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter}

    # Monkey patch inspect to make doit work inside Pyinstaller.
    # It grabs the line numbers of functions probably for debugging reasons, but
    # PyInstaller doesn't seem to keep that information around.
    #
    # This better thing to do would be to create a custom TaskLoader.
    import inspect

    inspect.getsourcelines = lambda obj: [0, 0]

    # Run doit main
    result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])
    return (result, errors)
コード例 #7
0
ファイル: __init__.py プロジェクト: patchedsoul/rhasspy
def train_profile(profile_dir: Path, profile: Profile) -> None:

    # Compact
    def ppath(query, default=None, write=False):
        return utils_ppath(profile, profile_dir, query, default, write=write)

    # Inputs
    stt_system = profile.get("speech_to_text.system")
    stt_prefix = f"speech_to_text.{stt_system}"

    # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist")
    sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini")
    base_dictionary = ppath(f"{stt_prefix}.base_dictionary",
                            "base_dictionary.txt")
    base_language_model = ppath(f"{stt_prefix}.language_model",
                                "base_language_model.txt")
    base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst",
                                    "base_language_model.fst")
    base_language_model_weight = float(
        profile.get(f"{stt_prefix}.mix_weight", 0))
    custom_words = ppath(f"{stt_prefix}.custom_words", "custom_words.txt")
    g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst")
    acoustic_model_type = stt_system

    if acoustic_model_type == "pocketsphinx":
        acoustic_model = ppath(f"{stt_prefix}.acoustic-model",
                               "acoustic_model")
        kaldi_dir = None
    elif acoustic_model_type == "kaldi":
        kaldi_dir = Path(
            os.path.expandvars(
                profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi")))
        acoustic_model = ppath(f"{stt_prefix}.model_dir", "model")
    else:
        assert False, f"Unknown acoustic model type: {acoustic_model_type}"

    # ignore/upper/lower
    word_casing = profile.get("speech_to_text.dictionary_casing",
                              "ignore").lower()

    # default/ignore/upper/lower
    g2p_word_casing = profile.get("speech_to_text.g2p_casing",
                                  word_casing).lower()

    # all/first
    dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule",
                                  "all").lower()

    # Kaldi
    kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph",
                                                   "graph")

    # Outputs
    dictionary = ppath(f"{stt_prefix}.dictionary",
                       "dictionary.txt",
                       write=True)
    language_model = ppath(f"{stt_prefix}.language_model",
                           "language_model.txt",
                           write=True)
    intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True)
    vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True)
    unknown_words = ppath(f"{stt_prefix}.unknown_words",
                          "unknown_words.txt",
                          write=True)
    grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True)
    fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True)
    slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True)

    # -----------------------------------------------------------------------------

    # Create cache directories
    for dir_path in [grammar_dir, fsts_dir]:
        dir_path.mkdir(parents=True, exist_ok=True)

    # -----------------------------------------------------------------------------

    # Set of used intents
    intents: Set[str] = set()
    whitelist = None

    # Default to using all intents
    intents.update(_get_intents(sentences_ini))

    # Check if intent whitelist exists
    # if intent_whitelist.exists():
    #     with open(intent_whitelist, "r") as whitelist_file:
    #         # Each line is an intent to use
    #         for line in whitelist_file:
    #             line = line.strip()
    #             if len(line) > 0:
    #                 if whitelist is None:
    #                     whitelist = []
    #                     intents.clear()

    #                 whitelist.append(line)
    #                 intents.add(line)

    # -----------------------------------------------------------------------------

    def task_grammars():
        """Transforms sentences.ini into JSGF grammars, one per intent."""
        maybe_deps = []

        # if intent_whitelist.exists():
        #     maybe_deps.append(intent_whitelist)

        def ini_to_grammars(targets):
            with open(sentences_ini, "r") as sentences_file:
                make_grammars(sentences_file, grammar_dir, whitelist=whitelist)

        return {
            "file_dep": [sentences_ini] + maybe_deps,
            "targets": [grammar_dir / f"{intent}.gram" for intent in intents],
            "actions": [ini_to_grammars],
        }

    # -----------------------------------------------------------------------------

    def do_slots_to_fst(slot_names, targets):
        # Extra arguments for word casing
        kwargs = {}
        if word_casing == "upper":
            kwargs["upper"] = True
        elif word_casing == "lower":
            kwargs["lower"] = True

        slot_fsts = slots_to_fsts(slots_dir, slot_names=slot_names, **kwargs)
        for slot_name, slot_fst in slot_fsts.items():
            # Slot name will already have "$"
            slot_fst.write(str(fsts_dir / f"{slot_name}.fst"))

    def do_grammar_to_fsts(grammar_path: Path,
                           replace_fst_paths: Dict[str, Path], targets):
        # Load dependent FSTs
        replace_fsts = {
            replace_name: fst.Fst.read(str(replace_path))
            for replace_name, replace_path in replace_fst_paths.items()
        }

        # Extra arguments for word casing
        kwargs = {}
        if word_casing == "upper":
            kwargs["upper"] = True
        elif word_casing == "lower":
            kwargs["lower"] = True

        grammar = grammar_path.read_text()
        listener = grammar_to_fsts(grammar,
                                   replace_fsts=replace_fsts,
                                   **kwargs)
        grammar_name = listener.grammar_name

        # Write FST for each JSGF rule
        for rule_name, rule_fst in listener.fsts.items():
            fst_path = fsts_dir / f"{rule_name}.fst"
            rule_fst.write(str(fst_path))

        # Write FST for main grammar rule
        grammar_fst_path = fsts_dir / f"{grammar_name}.fst"
        assert listener.grammar_fst is not None
        listener.grammar_fst.write(str(grammar_fst_path))

    # -----------------------------------------------------------------------------

    def do_grammar_dependencies(grammar_path: Path, targets):
        grammar = grammar_path.read_text()
        grammar_deps = get_grammar_dependencies(grammar).graph
        graph_json = nx.readwrite.json_graph.node_link_data(grammar_deps)
        with open(targets[0], "w") as graph_file:
            json.dump(graph_json, graph_file)

    @create_after(executed="grammars")
    def task_grammar_dependencies():
        """Creates grammar dependency graphs from JSGF grammars and relevant slots."""

        for intent in intents:
            grammar_path = grammar_dir / f"{intent}.gram"
            yield {
                "name": intent + "_dependencies",
                "file_dep": [grammar_path],
                "targets": [str(grammar_path) + ".json"],
                "actions": [(do_grammar_dependencies, [grammar_path])],
            }

    # -----------------------------------------------------------------------------

    @create_after(executed="grammar_dependencies")
    def task_grammar_fsts():
        """Creates grammar FSTs from JSGF grammars and relevant slots."""
        used_slots: Set[str] = set()

        for intent in intents:
            grammar_path = grammar_dir / f"{intent}.gram"
            grammar_dep_path = str(grammar_path) + ".json"

            # Load dependency graph
            with open(grammar_dep_path, "r") as graph_file:
                graph_data = json.load(graph_file)
                grammar_deps = nx.readwrite.json_graph.node_link_graph(
                    graph_data)

            rule_names: Set[str] = set()
            replace_fst_paths: Dict[str, Path] = {}

            # Process dependencies
            for node, data in grammar_deps.nodes(data=True):
                node_type = data["type"]

                if node_type == "slot":
                    # Strip "$"
                    slot_name = node[1:]
                    used_slots.add(slot_name)

                    # Path to slot FST
                    replace_fst_paths[node] = fsts_dir / f"{node}.fst"
                elif node_type == "remote rule":
                    # Path to rule FST
                    replace_fst_paths[node] = fsts_dir / f"{node}.fst"
                elif node_type == "local rule":
                    rule_names.add(node)

            # All rule/grammar FSTs that will be generated
            grammar_fst_paths = [
                fsts_dir / f"{rule_name}.fst" for rule_name in rule_names
            ]
            grammar_fst_paths.append(fsts_dir / f"{intent}.fst")

            yield {
                "name":
                intent + "_fst",
                "file_dep": [grammar_path, grammar_dep_path] +
                list(replace_fst_paths.values()),
                "targets":
                grammar_fst_paths,
                "actions":
                [(do_grammar_to_fsts, [grammar_path, replace_fst_paths])],
            }

        # slots -> FST
        if len(used_slots) > 0:
            yield {
                "name":
                "slot_fsts",
                "file_dep":
                [slots_dir / slot_name for slot_name in used_slots],
                "targets":
                [fsts_dir / f"${slot_name}.fst" for slot_name in used_slots],
                "actions": [(do_slots_to_fst, [used_slots])],
            }

    # -----------------------------------------------------------------------------

    def do_intent_fst(intents: Iterable[str], targets):
        intent_fsts = {
            intent: fst.Fst.read(str(fsts_dir / f"{intent}.fst"))
            for intent in intents
        }
        intent_fst = make_intent_fst(intent_fsts)
        intent_fst.write(targets[0])

    @create_after(executed="grammar_fsts")
    def task_intent_fst():
        """Merges grammar FSTs into single intent.fst."""
        return {
            "file_dep": [fsts_dir / f"{intent}.fst" for intent in intents],
            "targets": [intent_fst],
            "actions": [(do_intent_fst, [intents])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="intent_fst")
    def task_language_model():
        """Creates an ARPA language model from intent.fst."""

        if base_language_model_weight > 0:
            yield {
                "name": "base_lm_to_fst",
                "file_dep": [base_language_model],
                "targets": [base_language_model_fst],
                "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"],
            }

        # FST -> n-gram counts
        intent_counts = str(intent_fst) + ".counts"
        yield {
            "name": "intent_counts",
            "file_dep": [intent_fst],
            "targets": [intent_counts],
            "actions": ["ngramcount %(dependencies)s %(targets)s"],
        }

        # n-gram counts -> model
        intent_model = str(intent_fst) + ".model"
        yield {
            "name": "intent_model",
            "file_dep": [intent_counts],
            "targets": [intent_model],
            "actions": ["ngrammake %(dependencies)s %(targets)s"],
        }

        if base_language_model_weight > 0:
            merged_model = str(intent_model) + ".merge"

            # merge
            yield {
                "name":
                "lm_merge",
                "file_dep": [base_language_model_fst, intent_model],
                "targets": [merged_model],
                "actions": [
                    f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s"
                ],
            }

            intent_model = merged_model

        # model -> ARPA
        yield {
            "name": "intent_arpa",
            "file_dep": [intent_model],
            "targets": [language_model],
            "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"],
        }

    # -----------------------------------------------------------------------------

    def do_vocab(targets):
        with open(targets[0], "w") as vocab_file:
            input_symbols = fst.Fst.read(str(intent_fst)).input_symbols()
            for i in range(input_symbols.num_symbols()):
                symbol = input_symbols.find(i).decode().strip()
                if not (symbol.startswith("__") or symbol.startswith("<")):
                    print(symbol, file=vocab_file)

            if base_language_model_weight > 0:
                # Add all words from base dictionary
                with open(base_dictionary, "r") as dict_file:
                    for word in read_dict(dict_file):
                        print(word, file=vocab_file)

    @create_after(executed="language_model")
    def task_vocab():
        """Writes all vocabulary words to a file from intent.fst."""
        return {
            "file_dep": [intent_fst],
            "targets": [vocab],
            "actions": [do_vocab]
        }

    # -----------------------------------------------------------------------------

    def do_dict(dictionary_paths: Iterable[Path], targets):
        with open(targets[0], "w") as dictionary_file:
            if unknown_words.exists():
                unknown_words.unlink()

            dictionary_format = FORMAT_CMU
            if acoustic_model_type == "julius":
                dictionary_format = FORMAT_JULIUS

            # Extra arguments for word casing
            kwargs = {}
            if word_casing == "upper":
                kwargs["upper"] = True
            elif word_casing == "lower":
                kwargs["lower"] = True

            make_dict(
                vocab,
                dictionary_paths,
                dictionary_file,
                unknown_path=unknown_words,
                dictionary_format=dictionary_format,
                merge_rule=dict_merge_rule,
                **kwargs,
            )

            if unknown_words.exists() and g2p_model.exists():
                # Generate single pronunciation guesses
                logger.debug("Guessing pronunciations for unknown word(s)")

                g2p_output = subprocess.check_output(
                    [
                        "phonetisaurus-apply",
                        "--model",
                        str(g2p_model),
                        "--word_list",
                        str(unknown_words),
                        "--nbest",
                        "1",
                    ],
                    universal_newlines=True,
                )

                g2p_transform = lambda w: w
                if g2p_word_casing == "upper":
                    g2p_transform = lambda w: w.upper()
                elif g2p_word_casing == "lower":
                    g2p_transform = lambda w: w.lower()

                # Append to dictionary and custom words
                with open(custom_words, "a") as words_file:
                    with open(unknown_words, "w") as unknown_words_file:
                        for line in g2p_output.splitlines():
                            line = line.strip()
                            word, phonemes = re.split(r"\s+", line, maxsplit=1)
                            word = g2p_transform(word)
                            print(word, phonemes, file=dictionary_file)
                            print(word, phonemes, file=words_file)
                            print(word, phonemes, file=unknown_words_file)

    @create_after(executed="vocab")
    def task_vocab_dict():
        """Creates custom pronunciation dictionary based on desired vocabulary."""
        dictionary_paths = [base_dictionary]
        if custom_words.exists():
            # Custom dictionary goes first so that the "first" dictionary merge
            # rule will choose pronunciations from it.
            dictionary_paths.insert(0, custom_words)

        # Exclude dictionaries that don't exist
        dictionary_paths = [p for p in dictionary_paths if p.exists()]

        return {
            "file_dep": [vocab] + dictionary_paths,
            "targets": [dictionary],
            "actions": [(do_dict, [dictionary_paths])],
        }

    # -----------------------------------------------------------------------------

    @create_after(executed="vocab_dict")
    def task_kaldi_train():
        """Creates HCLG.fst for a Kaldi nnet3 or gmm model."""
        if acoustic_model_type == "kaldi":
            return {
                "file_dep": [dictionary, language_model],
                "targets": [kaldi_graph_dir / "HCLG.fst"],
                "actions": [[
                    "bash",
                    str(acoustic_model / "train.sh"),
                    str(kaldi_dir),
                    str(acoustic_model),
                    str(dictionary),
                    str(language_model),
                ]],
            }

    # -----------------------------------------------------------------------------

    DOIT_CONFIG = {"action_string_formatting": "old"}

    # Run doit main
    DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])