def train_profile(profile_dir: Path, profile: Profile) -> Tuple[int, List[str]]: # Compact def ppath(query, default=None, write=False): return utils_ppath(profile, profile_dir, query, default, write=write) language = profile.get("language", "") # Inputs stt_system = profile.get("speech_to_text.system") stt_prefix = f"speech_to_text.{stt_system}" # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist") sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini") sentences_dir = ppath("speech_to_text.sentences_dir", "sentences.dir") base_dictionary = ppath(f"{stt_prefix}.base_dictionary", "base_dictionary.txt") base_language_model = ppath(f"{stt_prefix}.base_language_model", "base_language_model.txt") base_language_model_weight = float( profile.get(f"{stt_prefix}.mix_weight", 0)) g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst") acoustic_model_type = stt_system # Pocketsphinx acoustic_model = ppath(f"{stt_prefix}.acoustic_model", "acoustic_model") # Kaldi kaldi_dir = Path( os.path.expandvars(profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi"))) kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph", "graph") if acoustic_model_type == "kaldi": # Kaldi acoustic models are inside model directory acoustic_model = ppath(f"{stt_prefix}.model_dir", "model") else: _LOGGER.warning("Unsupported acoustic model type: %s", acoustic_model_type) # ignore/upper/lower word_casing = profile.get("speech_to_text.dictionary_casing", "ignore").lower() # default/ignore/upper/lower g2p_word_casing = profile.get("speech_to_text.g2p_casing", word_casing).lower() # all/first dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule", "all").lower() # Outputs dictionary = ppath(f"{stt_prefix}.dictionary", "dictionary.txt", write=True) custom_words = ppath(f"{stt_prefix}.custom_words", "custom_words.txt", write=True) language_model = ppath(f"{stt_prefix}.language_model", "language_model.txt", write=True) base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst", "base_language_model.fst", write=True) intent_graph = ppath("intent.fsticiffs.intent_graph", "intent.json", write=True) intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True) vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True) unknown_words = ppath(f"{stt_prefix}.unknown_words", "unknown_words.txt", write=True) grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True) fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True) slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True) # ----------------------------------------------------------------------------- # Create cache directories for dir_path in [grammar_dir, fsts_dir]: dir_path.mkdir(parents=True, exist_ok=True) # ----------------------------------------------------------------------------- ini_paths: List[Path] = get_ini_paths(sentences_ini, sentences_dir) # Join ini files into a single combined file and parse _LOGGER.debug("Parsing ini file(s): %s", [str(p) for p in ini_paths]) try: intents = get_all_intents(ini_paths) except Exception: _LOGGER.exception("Failed to parse %s", ini_paths) return (1, ["Failed to parse sentences"]) # ----------------------------------------------------------------------------- def get_slot_names(item): """Yield referenced slot names.""" if isinstance(item, jsgf.SlotReference): yield item.slot_name elif isinstance(item, jsgf.Sequence): for sub_item in item.items: for slot_name in get_slot_names(sub_item): yield slot_name elif isinstance(item, jsgf.Rule): for slot_name in get_slot_names(item.rule_body): yield slot_name def number_transform(word): """Automatically transform numbers""" if not isinstance(word, jsgf.Word): # Skip anything besides words return try: n = int(word.text) # 75 -> (seventy five):75 number_text = num2words(n, lang=language).replace("-", " ").strip() assert number_text, f"Empty num2words result for {n}" number_words = number_text.split() if len(number_words) == 1: # Easy case, single word word.text = number_text word.substitution = str(n) else: # Hard case, split into mutliple Words return jsgf.Sequence( text=number_text, type=jsgf.SequenceType.GROUP, substitution=str(n), items=[jsgf.Word(w) for w in number_words], ) except ValueError: # Not a number pass def do_intents_to_graph(intents, slot_names, targets): sentences, replacements = ini_jsgf.split_rules(intents) # Load slot values for slot_name in slot_names: slot_path = slots_dir / slot_name assert slot_path.is_file(), f"Missing slot file at {slot_path}" # Parse each non-empty line as a JSGF sentence slot_values = [] with open(slot_path, "r") as slot_file: for line in slot_file: line = line.strip() if line: sentence = jsgf.Sentence.parse(line) slot_values.append(sentence) # Replace $slot with sentences replacements[f"${slot_name}"] = slot_values if profile.get("intent.replace_numbers", True): # Replace numbers in parsed sentences for intent_sentences in sentences.values(): for sentence in intent_sentences: jsgf.walk_expression(sentence, number_transform, replacements) # Convert to directed graph graph = intents_to_graph(intents, replacements) # Write graph to JSON file json_graph = graph_to_json(graph) with open(targets[0], "w") as graph_file: json.dump(json_graph, graph_file) def task_ini_graph(): """sentences.ini -> intent.json""" slot_names = set() for intent_name in intents: for item in intents[intent_name]: for slot_name in get_slot_names(item): slot_names.add(slot_name) # Add slot files as dependencies deps = [(slots_dir / slot_name) for slot_name in slot_names] # Add profile itself as a dependency profile_json_path = profile_dir / "profile.json" if profile_json_path.is_file(): deps.append(profile_json_path) return { "file_dep": ini_paths + deps, "targets": [intent_graph], "actions": [(do_intents_to_graph, [intents, slot_names])], } # ----------------------------------------------------------------------------- def do_graph_to_fst(intent_graph, targets): with open(intent_graph, "r") as graph_file: json_graph = json.load(graph_file) graph = json_to_graph(json_graph) graph_fst = graph_to_fst(graph) # Create symbol tables isymbols = fst.SymbolTable() for symbol, number in graph_fst.input_symbols.items(): isymbols.add_symbol(symbol, number) osymbols = fst.SymbolTable() for symbol, number in graph_fst.output_symbols.items(): osymbols.add_symbol(symbol, number) # Compile FST compiler = fst.Compiler(isymbols=isymbols, osymbols=osymbols, keep_isymbols=True, keep_osymbols=True) compiler.write(graph_fst.intent_fst) compiled_fst = compiler.compile() # Write to file compiled_fst.write(str(targets[0])) def task_intent_fst(): """intent.json -> intent.fst""" return { "file_dep": [intent_graph], "targets": [intent_fst], "actions": [(do_graph_to_fst, [intent_graph])], } # ----------------------------------------------------------------------------- @create_after(executed="intent_fst") def task_language_model(): """Creates an ARPA language model from intent.fst.""" if base_language_model_weight > 0: yield { "name": "base_lm_to_fst", "file_dep": [base_language_model], "targets": [base_language_model_fst], "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"], } # FST -> n-gram counts intent_counts = str(intent_fst) + ".counts" yield { "name": "intent_counts", "file_dep": [intent_fst], "targets": [intent_counts], "actions": ["ngramcount %(dependencies)s %(targets)s"], } # n-gram counts -> model intent_model = str(intent_fst) + ".model" yield { "name": "intent_model", "file_dep": [intent_counts], "targets": [intent_model], "actions": ["ngrammake %(dependencies)s %(targets)s"], } if base_language_model_weight > 0: merged_model = Path(str(intent_model) + ".merge") # merge yield { "name": "lm_merge", "file_dep": [base_language_model_fst, intent_model], "targets": [merged_model], "actions": [ f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s" ], } intent_model = merged_model # model -> ARPA yield { "name": "intent_arpa", "file_dep": [intent_model], "targets": [language_model], "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"], } # ----------------------------------------------------------------------------- def do_vocab(targets): with open(targets[0], "w") as vocab_file: input_symbols = fst.Fst.read(str(intent_fst)).input_symbols() for i in range(input_symbols.num_symbols()): # Critical that we use get_nth_key here when input symbols # numbering is discontiguous. key = input_symbols.get_nth_key(i) symbol = input_symbols.find(key).decode().strip() if symbol and not (symbol.startswith("__") or symbol.startswith("<")): print(symbol, file=vocab_file) if base_language_model_weight > 0: # Add all words from base dictionary with open(base_dictionary, "r") as dict_file: for word in read_dict(dict_file): print(word, file=vocab_file) @create_after(executed="language_model") def task_vocab(): """Writes all vocabulary words to a file from intent.fst.""" return { "file_dep": [intent_fst], "targets": [vocab], "actions": [do_vocab] } # ----------------------------------------------------------------------------- def do_dict(dictionary_paths: Iterable[Path], targets): with open(targets[0], "w") as dictionary_file: if unknown_words.exists(): unknown_words.unlink() dictionary_format = FORMAT_CMU if acoustic_model_type == "julius": dictionary_format = FORMAT_JULIUS make_dict( vocab, dictionary_paths, dictionary_file, unknown_path=unknown_words, dictionary_format=dictionary_format, merge_rule=dict_merge_rule, upper=(word_casing == "upper"), lower=(word_casing == "lower"), ) if unknown_words.exists() and g2p_model.exists(): # Generate single pronunciation guesses _LOGGER.debug("Guessing pronunciations for unknown word(s)") g2p_output = subprocess.check_output( [ "phonetisaurus-apply", "--model", str(g2p_model), "--word_list", str(unknown_words), "--nbest", "1", ], universal_newlines=True, ) g2p_transform = lambda w: w if g2p_word_casing == "upper": g2p_transform = lambda w: w.upper() elif g2p_word_casing == "lower": g2p_transform = lambda w: w.lower() # Append to dictionary and custom words with open(custom_words, "a") as words_file: with open(unknown_words, "w") as unknown_words_file: for line in g2p_output.splitlines(): line = line.strip() word, phonemes = re.split(r"\s+", line, maxsplit=1) word = g2p_transform(word) print(word, phonemes, file=dictionary_file) print(word, phonemes, file=words_file) print(word, phonemes, file=unknown_words_file) @create_after(executed="vocab") def task_vocab_dict(): """Creates custom pronunciation dictionary based on desired vocabulary.""" dictionary_paths = [base_dictionary] if custom_words.exists(): # Custom dictionary goes first so that the "first" dictionary merge # rule will choose pronunciations from it. dictionary_paths.insert(0, custom_words) # Exclude dictionaries that don't exist dictionary_paths = [p for p in dictionary_paths if p.exists()] return { "file_dep": [vocab] + dictionary_paths, "targets": [dictionary], "actions": [(do_dict, [dictionary_paths])], } # ----------------------------------------------------------------------------- @create_after(executed="vocab_dict") def task_kaldi_train(): """Creates HCLG.fst for a Kaldi nnet3 or gmm model.""" if acoustic_model_type == "kaldi": return { "file_dep": [dictionary, language_model], "targets": [kaldi_graph_dir / "HCLG.fst"], "actions": [[ "bash", str(acoustic_model / "train.sh"), str(kaldi_dir), str(acoustic_model), str(dictionary), str(language_model), ]], } # ----------------------------------------------------------------------------- errors = [] class MyReporter(ConsoleReporter): def add_failure(self, task, exception): super().add_failure(task, exception) errors.append(f"{task}: {exception}") def runtime_error(self, msg): super().runtime_error(msg) errors.append(msg) DOIT_CONFIG = {"action_string_formatting": "old", "reporter": MyReporter} # Monkey patch inspect to make doit work inside Pyinstaller. # It grabs the line numbers of functions probably for debugging reasons, but # PyInstaller doesn't seem to keep that information around. # # This better thing to do would be to create a custom TaskLoader. import inspect inspect.getsourcelines = lambda obj: [0, 0] # Run doit main result = DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:]) return (result, errors)
class RhasspyCore: """Core class for Rhasspy functionality.""" def __init__( self, profile_name: str, system_profiles_dir: str, user_profiles_dir: str, actor_system: Optional[ActorSystem] = None, ) -> None: self._logger = logging.getLogger(self.__class__.__name__) self.profiles_dirs: List[str] = [ user_profiles_dir, system_profiles_dir ] self.profile_name = profile_name self.actor_system = actor_system self.profile = Profile(self.profile_name, system_profiles_dir, user_profiles_dir) self._logger.debug("Loaded profile from %s", self.profile.json_path) self._logger.debug("Profile files will be written to %s", self.profile.write_path()) self.defaults = Profile.load_defaults(system_profiles_dir) self.loop = asyncio.get_event_loop() self._session: Optional[ aiohttp.ClientSession] = aiohttp.ClientSession() self.dialogue_manager: Optional[RhasspyActor] = None self.download_status: typing.List[str] = [] # ------------------------------------------------------------------------- @property def session(self) -> aiohttp.ClientSession: """Get HTTP client session.""" assert self._session is not None return self._session # ------------------------------------------------------------------------- async def start( self, preload: Optional[bool] = None, block: bool = True, timeout: float = 60, observer: Optional[RhasspyActor] = None, ) -> None: """Start Rhasspy core.""" if self.actor_system is None: self.actor_system = ActorSystem() if preload is None: preload = self.profile.get("rhasspy.preload_profile", False) assert self.actor_system is not None self.dialogue_manager = self.actor_system.createActor(DialogueManager) with self.actor_system.private() as sys: await sys.async_ask( self.dialogue_manager, ConfigureEvent( self.profile, preload=preload, ready=block, transitions=False, load_timeout_sec=30, observer=observer, ), ) # Block until ready if block: await sys.async_listen(timeout) # ------------------------------------------------------------------------- async def get_microphones(self, system: Optional[str] = None) -> Dict[Any, Any]: """Get available audio recording devices.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetMicrophones(system)) assert isinstance(result, dict), result return result async def test_microphones(self, system: Optional[str] = None) -> Dict[Any, Any]: """Listen to all microphones and determine if they're live.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, TestMicrophones(system)) assert isinstance(result, dict), result return result async def get_speakers(self, system: Optional[str] = None) -> Dict[Any, Any]: """Get available audio playback devices.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetSpeakers(system)) assert isinstance(result, dict), result return result # ------------------------------------------------------------------------- def listen_for_wake(self) -> None: """Tell Rhasspy to start listening for a wake word.""" assert self.actor_system is not None self.actor_system.tell(self.dialogue_manager, ListenForWakeWord()) async def listen_for_command( self, handle: bool = True, timeout: Optional[float] = None, entity: Optional[str] = None, value: Optional[str] = None, ) -> Dict[str, Any]: """Block until a voice command has been spoken. Optionally handle it.""" assert self.actor_system is not None with self.actor_system.private() as sys: entities = None if entity is not None: entities = [{"entity": entity, "value": value}] result = await sys.async_ask( self.dialogue_manager, ListenForCommand(handle=handle, timeout=timeout, entities=entities), ) assert isinstance(result, dict), result return result async def record_command(self, timeout: Optional[float] = None) -> VoiceCommand: """Record a single voice command.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetVoiceCommand(timeout=timeout)) assert isinstance(result, VoiceCommand), result return result # ------------------------------------------------------------------------- async def transcribe_wav(self, wav_data: bytes) -> WavTranscription: """Transcribe text from WAV buffer.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, TranscribeWav(wav_data, handle=False)) assert isinstance(result, WavTranscription), result return result async def recognize_intent(self, text: str, wakeId: str = "") -> IntentRecognized: """Recognize an intent from text.""" assert self.actor_system is not None with self.actor_system.private() as sys: # Fix casing dict_casing = self.profile.get("speech_to_text.dictionary_casing", "") if dict_casing == "lower": text = text.lower() elif dict_casing == "upper": text = text.upper() # Replace numbers if self.profile.get("intent.replace_numbers", True): language = self.profile.get("language", "") if not language: language = None # 75 -> seventy five text = numbers_to_words(text, language=language) result = await sys.async_ask(self.dialogue_manager, RecognizeIntent(text, handle=False)) assert isinstance(result, IntentRecognized), result # Add slots intent_slots = {} for ev in result.intent.get("entities", []): intent_slots[ev["entity"]] = ev["value"] result.intent["slots"] = intent_slots # Add wake/site ID result.intent["wakeId"] = wakeId result.intent["siteId"] = self.profile.get("mqtt.site_id", "default") return result async def handle_intent(self, intent: Dict[str, Any]) -> IntentHandled: """Handle an intent.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, HandleIntent(intent)) assert isinstance(result, IntentHandled), result return result # ------------------------------------------------------------------------- def start_recording_wav(self, buffer_name: str = "") -> None: """Record audio data to a named buffer.""" assert self.actor_system is not None self.actor_system.tell(self.dialogue_manager, StartRecordingToBuffer(buffer_name)) async def stop_recording_wav(self, buffer_name: str = "") -> AudioData: """Stop recording audio data to a named buffer.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, StopRecordingToBuffer(buffer_name)) assert isinstance(result, AudioData), result return result # ------------------------------------------------------------------------- def play_wav_data(self, wav_data: bytes) -> None: """Play WAV buffer through audio playback system.""" assert self.actor_system is not None self.actor_system.tell(self.dialogue_manager, PlayWavData(wav_data)) def play_wav_file(self, wav_path: str) -> None: """Play WAV file through audio playback system.""" assert self.actor_system is not None self.actor_system.tell(self.dialogue_manager, PlayWavFile(wav_path)) # ------------------------------------------------------------------------- async def get_word_pronunciations(self, words: List[str], n: int = 5) -> WordPronunciations: """Look up or guess pronunciations for a word.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetWordPronunciations(words, n)) assert isinstance(result, WordPronunciations), result return result async def get_word_phonemes(self, word: str) -> WordPhonemes: """Get eSpeak phonemes for a word.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetWordPhonemes(word)) assert isinstance(result, WordPhonemes), result return result async def speak_word(self, word: str) -> WordSpoken: """Speak a single word.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, SpeakWord(word)) assert isinstance(result, WordSpoken), result return result async def speak_sentence( self, sentence: str, play: bool = True, language: Optional[str] = None, voice: Optional[str] = None, siteId: Optional[str] = None, ) -> SentenceSpoken: """Speak an entire sentence using text to speech system.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask( self.dialogue_manager, SpeakSentence(sentence, play=play, language=language, voice=voice, siteId=siteId), ) assert isinstance(result, SentenceSpoken), result return result # ------------------------------------------------------------------------- async def train( self, reload_actors: bool = True, no_cache: bool = False ) -> Union[ProfileTrainingComplete, ProfileTrainingFailed]: """Generate speech/intent artifacts for profile.""" if no_cache: # Delete doit database db_path = Path(self.profile.write_path(".doit.db")) if db_path.is_file(): db_path.unlink() assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask( self.dialogue_manager, TrainProfile(reload_actors=reload_actors)) assert isinstance( result, (ProfileTrainingComplete, ProfileTrainingFailed)), result return result # ------------------------------------------------------------------------- def mqtt_publish(self, topic: str, payload: bytes) -> None: """Publish a payload to an MQTT topic.""" assert self.actor_system is not None with self.actor_system.private() as sys: sys.tell(self.dialogue_manager, MqttPublish(topic, payload)) # ------------------------------------------------------------------------- async def wakeup_and_wait( self) -> Union[WakeWordDetected, WakeWordNotDetected]: """Listen for a wake word to be detected or not.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, ListenForWakeWord()) assert isinstance(result, (WakeWordDetected, WakeWordNotDetected)), result return result # ------------------------------------------------------------------------- async def get_actor_states(self) -> Dict[str, str]: """Get the current state of each Rhasspy actor.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetActorStates()) assert isinstance(result, dict), result return result # ------------------------------------------------------------------------- def send_audio_data(self, data: AudioData) -> None: """Send raw audio data to Rhasspy.""" assert self.actor_system is not None self.actor_system.tell(self.dialogue_manager, data) # ------------------------------------------------------------------------- async def shutdown(self) -> None: """Shut down actors.""" # Clear environment variables rhasspy_vars = [v for v in os.environ if v.startswith("RHASSPY")] for v in rhasspy_vars: del os.environ[v] # Stop actor system if self.actor_system is not None: self.actor_system.shutdown() self.actor_system = None if self._session is not None: await self._session.close() self._session = None # ------------------------------------------------------------------------- def check_profile(self) -> Dict[str, str]: """Return True if the profile has all necessary files downloaded.""" output_dir = self.profile.write_path() missing_files: Dict[str, Any] = {} # Load configuration conditions = self.profile.get("download.conditions", {}) # Check conditions for setting_name in conditions: real_value = self.profile.get(setting_name, None) # Compare setting values for setting_value, files_dict in conditions[setting_name].items(): compare_func = self._get_compare_func(setting_value) if compare_func(real_value): # Check if file needs to be downloaded for dest_name in files_dict: dest_path = os.path.join(output_dir, dest_name) if not os.path.exists(dest_path) or ( os.path.getsize(dest_path) == 0): missing_files[dest_path] = (setting_name, setting_value) return missing_files def _get_compare_func(self, value: str): """Use mini-language to allow for profile setting value comparison.""" if value.startswith(">="): f_value = float(value[2:]) return lambda v: v >= f_value if value.startswith("<="): f_value = float(value[2:]) return lambda v: v <= f_value if value.startswith(">"): f_value = float(value[1:]) return lambda v: v > f_value if value.startswith("<"): f_value = float(value[1:]) return lambda v: v < f_value if value.startswith("!"): return lambda v: v != value return lambda v: str(v) == value def _unpack_gz(self, src_path, temp_dir): # Strip off .gz and put relative to temporary directory temp_file_path = os.path.join(temp_dir, os.path.split(src_path[:-3])[1]) # Decompress single file with open(src_path, "rb") as src_file: with open(temp_file_path, "wb") as temp_file: temp_file.write(gzip.decompress(src_file.read())) return temp_file_path # ------------------------------------------------------------------------- async def download_profile(self, delete=False, chunk_size=4096) -> None: """Download all necessary profile files from the internet and extract them.""" self.download_status = [] output_dir = Path(self.profile.write_path()) download_dir = Path( self.profile.write_path( self.profile.get("download.cache_dir", "download"))) if delete and download_dir.exists(): self._logger.debug("Deleting download cache at %s", download_dir) shutil.rmtree(download_dir) download_dir.mkdir(parents=True, exist_ok=True) # Load configuration conditions = self.profile.get("download.conditions", {}) all_files = self.profile.get("download.files", {}) files_to_copy = {} files_to_extract: Dict[str, List[Tuple[str, str]]] = defaultdict(list) files_to_download: Set[str] = set() async def download_file(url, filename): try: status = f"Downloading {url} to {filename}" self.download_status.append(status) self._logger.debug(status) os.makedirs(os.path.dirname(filename), exist_ok=True) async with self.session.get(url) as response: with open(filename, "wb") as out_file: async for chunk in response.content.iter_chunked( chunk_size): out_file.write(chunk) status = f"Downloaded {filename}" self.download_status.append(status) self._logger.debug(status) except Exception: self._logger.exception(url) # Try to delete partially downloaded file try: status = f"Failed to download {filename}" self.download_status.append(status) self._logger.debug(status) os.unlink(filename) except Exception: pass # Check conditions machine_type = platform.machine() download_tasks = [] for setting_name in conditions: real_value = self.profile.get(setting_name, None) # Compare setting values for setting_value, files_dict in conditions[setting_name].items(): compare_func = self._get_compare_func(setting_value) if compare_func(real_value): # Check if file needs to be downloaded for dest_name, src_name in files_dict.items(): dest_path = os.path.join(output_dir, dest_name) if ":" in src_name: # File is an archive src_name, src_extract = src_name.split(":", maxsplit=1) src_path = os.path.join(download_dir, src_name) files_to_extract[src_path].append( (dest_path, src_extract)) else: # Just a regular file src_path = os.path.join(download_dir, src_name) files_to_copy[src_path] = dest_path # Get download/cache info for file src_info = all_files.get(src_name, None) if src_info is None: self._logger.error("No entry for download file %s", src_name) continue if not src_info.get("cache", True): # File will be downloaded in-place files_to_copy.pop(src_path) src_path = dest_path # Check if file is already in cache if os.path.exists(src_path) and ( os.path.getsize(src_path) > 0): self._logger.debug("Using cached %s for %s", src_path, dest_name) else: # File needs to be downloaded src_url = src_info.get("url", None) if src_url is None: # Try with machine type if machine_type in src_info: src_url = src_info[machine_type]["url"] else: self._logger.error( "No entry for download file %s with machine type %s", src_url, machine_type, ) continue # Schedule file for download if src_url not in files_to_download: download_tasks.append( self.loop.create_task( download_file(src_url, src_path))) files_to_download.add(src_url) # Wait for downloads to complete await asyncio.gather(*download_tasks) # Copy files for src_path, dest_path in files_to_copy.items(): # Remove existing file/directory if os.path.isdir(dest_path): self._logger.debug("Removing %s", dest_path) shutil.rmtree(dest_path) elif os.path.isfile(dest_path): self._logger.debug("Removing %s", dest_path) os.unlink(dest_path) # Create necessary directories os.makedirs(os.path.dirname(dest_path), exist_ok=True) # Copy file/directory as is status = f"Copying {src_path} to {dest_path}" self.download_status.append(status) self._logger.debug(status) if os.path.isdir(src_path): shutil.copytree(src_path, dest_path) else: shutil.copy2(src_path, dest_path) # Extract/install files unpack_extensions = [ ext for fmt in shutil.get_unpack_formats() for ext in fmt[1] ] for src_path, extract_paths in files_to_extract.items(): # Check if the file extension will be understood by shutil.unpack_archive known_format = False for ext in unpack_extensions: if src_path.endswith(ext): known_format = True def unpack_default(temp_dir): return shutil.unpack_archive(src_path, temp_dir) def unpack_gz(temp_dir): return self._unpack_gz(src_path, temp_dir) unpack = unpack_default if not known_format: # Handle special archives if src_path.endswith(".gz"): # Single file compressed with gzip unpack = unpack_gz else: # Very bad situation self._logger.warning( "Unknown archive extension %s. This is probably going to fail.", src_path, ) # Cached file is an archive. Unpack first. with tempfile.TemporaryDirectory() as temp_dir: unpack(temp_dir) for dest_path, src_extract in extract_paths: src_exclude: Dict[str, List[str]] = {} if "!" in src_extract: extract_parts = src_extract.split("!") src_extract = extract_parts[0] src_exclude = defaultdict(list) for exclude_path in extract_parts[1:]: exclude_path = os.path.join(temp_dir, exclude_path) exclude_dir, exclude_name = os.path.split( exclude_path) src_exclude[exclude_dir].append(exclude_name) # Remove existing file/directory if os.path.isdir(dest_path): self._logger.debug("Removing %s", dest_path) shutil.rmtree(dest_path) elif os.path.isfile(dest_path): self._logger.debug("Removing %s", dest_path) os.unlink(dest_path) # Create necessary directories os.makedirs(os.path.dirname(dest_path), exist_ok=True) if src_extract.endswith(":"): # Unpack .gz inside archive src_path = os.path.join(temp_dir, src_extract[:-1]) extract_path = self._unpack_gz(src_path, temp_dir) else: # Regular file extract_path = os.path.join(temp_dir, src_extract) # Copy specific file/directory status = f"Copying {extract_path} to {dest_path}" self.download_status.append(status) self._logger.debug(status) if os.path.isdir(extract_path): if src_exclude: # Ignore some files # pylint: disable=W0640 shutil.copytree( extract_path, dest_path, ignore=lambda d, fs: src_exclude[d], ) else: # Copy everything shutil.copytree(extract_path, dest_path) else: shutil.copy2(extract_path, dest_path) # ------------------------------------------------------------------------- async def get_problems(self) -> Dict[str, Any]: """Return a dictionary with problems from each actor.""" assert self.actor_system is not None with self.actor_system.private() as sys: result = await sys.async_ask(self.dialogue_manager, GetProblems()) assert isinstance(result, Problems), result return result.problems
def train_profile(profile_dir: Path, profile: Profile) -> None: # Compact def ppath(query, default=None, write=False): return utils_ppath(profile, profile_dir, query, default, write=write) # Inputs stt_system = profile.get("speech_to_text.system") stt_prefix = f"speech_to_text.{stt_system}" # intent_whitelist = ppath("training.intent-whitelist", "intent_whitelist") sentences_ini = ppath("speech_to_text.sentences_ini", "sentences.ini") base_dictionary = ppath(f"{stt_prefix}.base_dictionary", "base_dictionary.txt") base_language_model = ppath(f"{stt_prefix}.language_model", "base_language_model.txt") base_language_model_fst = ppath(f"{stt_prefix}.base_language_model_fst", "base_language_model.fst") base_language_model_weight = float( profile.get(f"{stt_prefix}.mix_weight", 0)) custom_words = ppath(f"{stt_prefix}.custom_words", "custom_words.txt") g2p_model = ppath(f"{stt_prefix}.g2p_model", "g2p.fst") acoustic_model_type = stt_system if acoustic_model_type == "pocketsphinx": acoustic_model = ppath(f"{stt_prefix}.acoustic-model", "acoustic_model") kaldi_dir = None elif acoustic_model_type == "kaldi": kaldi_dir = Path( os.path.expandvars( profile.get(f"{stt_prefix}.kaldi_dir", "/opt/kaldi"))) acoustic_model = ppath(f"{stt_prefix}.model_dir", "model") else: assert False, f"Unknown acoustic model type: {acoustic_model_type}" # ignore/upper/lower word_casing = profile.get("speech_to_text.dictionary_casing", "ignore").lower() # default/ignore/upper/lower g2p_word_casing = profile.get("speech_to_text.g2p_casing", word_casing).lower() # all/first dict_merge_rule = profile.get("speech_to_text.dictionary_merge_rule", "all").lower() # Kaldi kaldi_graph_dir = acoustic_model / profile.get(f"{stt_prefix}.graph", "graph") # Outputs dictionary = ppath(f"{stt_prefix}.dictionary", "dictionary.txt", write=True) language_model = ppath(f"{stt_prefix}.language_model", "language_model.txt", write=True) intent_fst = ppath("intent.fsticiffs.intent_fst", "intent.fst", write=True) vocab = ppath(f"{stt_prefix}.vocabulary", "vocab.txt", write=True) unknown_words = ppath(f"{stt_prefix}.unknown_words", "unknown_words.txt", write=True) grammar_dir = ppath("speech_to_text.grammars_dir", "grammars", write=True) fsts_dir = ppath("speech_to_text.fsts_dir", "fsts", write=True) slots_dir = ppath("speech_to_text.slots_dir", "slots", write=True) # ----------------------------------------------------------------------------- # Create cache directories for dir_path in [grammar_dir, fsts_dir]: dir_path.mkdir(parents=True, exist_ok=True) # ----------------------------------------------------------------------------- # Set of used intents intents: Set[str] = set() whitelist = None # Default to using all intents intents.update(_get_intents(sentences_ini)) # Check if intent whitelist exists # if intent_whitelist.exists(): # with open(intent_whitelist, "r") as whitelist_file: # # Each line is an intent to use # for line in whitelist_file: # line = line.strip() # if len(line) > 0: # if whitelist is None: # whitelist = [] # intents.clear() # whitelist.append(line) # intents.add(line) # ----------------------------------------------------------------------------- def task_grammars(): """Transforms sentences.ini into JSGF grammars, one per intent.""" maybe_deps = [] # if intent_whitelist.exists(): # maybe_deps.append(intent_whitelist) def ini_to_grammars(targets): with open(sentences_ini, "r") as sentences_file: make_grammars(sentences_file, grammar_dir, whitelist=whitelist) return { "file_dep": [sentences_ini] + maybe_deps, "targets": [grammar_dir / f"{intent}.gram" for intent in intents], "actions": [ini_to_grammars], } # ----------------------------------------------------------------------------- def do_slots_to_fst(slot_names, targets): # Extra arguments for word casing kwargs = {} if word_casing == "upper": kwargs["upper"] = True elif word_casing == "lower": kwargs["lower"] = True slot_fsts = slots_to_fsts(slots_dir, slot_names=slot_names, **kwargs) for slot_name, slot_fst in slot_fsts.items(): # Slot name will already have "$" slot_fst.write(str(fsts_dir / f"{slot_name}.fst")) def do_grammar_to_fsts(grammar_path: Path, replace_fst_paths: Dict[str, Path], targets): # Load dependent FSTs replace_fsts = { replace_name: fst.Fst.read(str(replace_path)) for replace_name, replace_path in replace_fst_paths.items() } # Extra arguments for word casing kwargs = {} if word_casing == "upper": kwargs["upper"] = True elif word_casing == "lower": kwargs["lower"] = True grammar = grammar_path.read_text() listener = grammar_to_fsts(grammar, replace_fsts=replace_fsts, **kwargs) grammar_name = listener.grammar_name # Write FST for each JSGF rule for rule_name, rule_fst in listener.fsts.items(): fst_path = fsts_dir / f"{rule_name}.fst" rule_fst.write(str(fst_path)) # Write FST for main grammar rule grammar_fst_path = fsts_dir / f"{grammar_name}.fst" assert listener.grammar_fst is not None listener.grammar_fst.write(str(grammar_fst_path)) # ----------------------------------------------------------------------------- def do_grammar_dependencies(grammar_path: Path, targets): grammar = grammar_path.read_text() grammar_deps = get_grammar_dependencies(grammar).graph graph_json = nx.readwrite.json_graph.node_link_data(grammar_deps) with open(targets[0], "w") as graph_file: json.dump(graph_json, graph_file) @create_after(executed="grammars") def task_grammar_dependencies(): """Creates grammar dependency graphs from JSGF grammars and relevant slots.""" for intent in intents: grammar_path = grammar_dir / f"{intent}.gram" yield { "name": intent + "_dependencies", "file_dep": [grammar_path], "targets": [str(grammar_path) + ".json"], "actions": [(do_grammar_dependencies, [grammar_path])], } # ----------------------------------------------------------------------------- @create_after(executed="grammar_dependencies") def task_grammar_fsts(): """Creates grammar FSTs from JSGF grammars and relevant slots.""" used_slots: Set[str] = set() for intent in intents: grammar_path = grammar_dir / f"{intent}.gram" grammar_dep_path = str(grammar_path) + ".json" # Load dependency graph with open(grammar_dep_path, "r") as graph_file: graph_data = json.load(graph_file) grammar_deps = nx.readwrite.json_graph.node_link_graph( graph_data) rule_names: Set[str] = set() replace_fst_paths: Dict[str, Path] = {} # Process dependencies for node, data in grammar_deps.nodes(data=True): node_type = data["type"] if node_type == "slot": # Strip "$" slot_name = node[1:] used_slots.add(slot_name) # Path to slot FST replace_fst_paths[node] = fsts_dir / f"{node}.fst" elif node_type == "remote rule": # Path to rule FST replace_fst_paths[node] = fsts_dir / f"{node}.fst" elif node_type == "local rule": rule_names.add(node) # All rule/grammar FSTs that will be generated grammar_fst_paths = [ fsts_dir / f"{rule_name}.fst" for rule_name in rule_names ] grammar_fst_paths.append(fsts_dir / f"{intent}.fst") yield { "name": intent + "_fst", "file_dep": [grammar_path, grammar_dep_path] + list(replace_fst_paths.values()), "targets": grammar_fst_paths, "actions": [(do_grammar_to_fsts, [grammar_path, replace_fst_paths])], } # slots -> FST if len(used_slots) > 0: yield { "name": "slot_fsts", "file_dep": [slots_dir / slot_name for slot_name in used_slots], "targets": [fsts_dir / f"${slot_name}.fst" for slot_name in used_slots], "actions": [(do_slots_to_fst, [used_slots])], } # ----------------------------------------------------------------------------- def do_intent_fst(intents: Iterable[str], targets): intent_fsts = { intent: fst.Fst.read(str(fsts_dir / f"{intent}.fst")) for intent in intents } intent_fst = make_intent_fst(intent_fsts) intent_fst.write(targets[0]) @create_after(executed="grammar_fsts") def task_intent_fst(): """Merges grammar FSTs into single intent.fst.""" return { "file_dep": [fsts_dir / f"{intent}.fst" for intent in intents], "targets": [intent_fst], "actions": [(do_intent_fst, [intents])], } # ----------------------------------------------------------------------------- @create_after(executed="intent_fst") def task_language_model(): """Creates an ARPA language model from intent.fst.""" if base_language_model_weight > 0: yield { "name": "base_lm_to_fst", "file_dep": [base_language_model], "targets": [base_language_model_fst], "actions": ["ngramread --ARPA %(dependencies)s %(targets)s"], } # FST -> n-gram counts intent_counts = str(intent_fst) + ".counts" yield { "name": "intent_counts", "file_dep": [intent_fst], "targets": [intent_counts], "actions": ["ngramcount %(dependencies)s %(targets)s"], } # n-gram counts -> model intent_model = str(intent_fst) + ".model" yield { "name": "intent_model", "file_dep": [intent_counts], "targets": [intent_model], "actions": ["ngrammake %(dependencies)s %(targets)s"], } if base_language_model_weight > 0: merged_model = str(intent_model) + ".merge" # merge yield { "name": "lm_merge", "file_dep": [base_language_model_fst, intent_model], "targets": [merged_model], "actions": [ f"ngrammerge --alpha={base_language_model_weight} %(dependencies)s %(targets)s" ], } intent_model = merged_model # model -> ARPA yield { "name": "intent_arpa", "file_dep": [intent_model], "targets": [language_model], "actions": ["ngramprint --ARPA %(dependencies)s > %(targets)s"], } # ----------------------------------------------------------------------------- def do_vocab(targets): with open(targets[0], "w") as vocab_file: input_symbols = fst.Fst.read(str(intent_fst)).input_symbols() for i in range(input_symbols.num_symbols()): symbol = input_symbols.find(i).decode().strip() if not (symbol.startswith("__") or symbol.startswith("<")): print(symbol, file=vocab_file) if base_language_model_weight > 0: # Add all words from base dictionary with open(base_dictionary, "r") as dict_file: for word in read_dict(dict_file): print(word, file=vocab_file) @create_after(executed="language_model") def task_vocab(): """Writes all vocabulary words to a file from intent.fst.""" return { "file_dep": [intent_fst], "targets": [vocab], "actions": [do_vocab] } # ----------------------------------------------------------------------------- def do_dict(dictionary_paths: Iterable[Path], targets): with open(targets[0], "w") as dictionary_file: if unknown_words.exists(): unknown_words.unlink() dictionary_format = FORMAT_CMU if acoustic_model_type == "julius": dictionary_format = FORMAT_JULIUS # Extra arguments for word casing kwargs = {} if word_casing == "upper": kwargs["upper"] = True elif word_casing == "lower": kwargs["lower"] = True make_dict( vocab, dictionary_paths, dictionary_file, unknown_path=unknown_words, dictionary_format=dictionary_format, merge_rule=dict_merge_rule, **kwargs, ) if unknown_words.exists() and g2p_model.exists(): # Generate single pronunciation guesses logger.debug("Guessing pronunciations for unknown word(s)") g2p_output = subprocess.check_output( [ "phonetisaurus-apply", "--model", str(g2p_model), "--word_list", str(unknown_words), "--nbest", "1", ], universal_newlines=True, ) g2p_transform = lambda w: w if g2p_word_casing == "upper": g2p_transform = lambda w: w.upper() elif g2p_word_casing == "lower": g2p_transform = lambda w: w.lower() # Append to dictionary and custom words with open(custom_words, "a") as words_file: with open(unknown_words, "w") as unknown_words_file: for line in g2p_output.splitlines(): line = line.strip() word, phonemes = re.split(r"\s+", line, maxsplit=1) word = g2p_transform(word) print(word, phonemes, file=dictionary_file) print(word, phonemes, file=words_file) print(word, phonemes, file=unknown_words_file) @create_after(executed="vocab") def task_vocab_dict(): """Creates custom pronunciation dictionary based on desired vocabulary.""" dictionary_paths = [base_dictionary] if custom_words.exists(): # Custom dictionary goes first so that the "first" dictionary merge # rule will choose pronunciations from it. dictionary_paths.insert(0, custom_words) # Exclude dictionaries that don't exist dictionary_paths = [p for p in dictionary_paths if p.exists()] return { "file_dep": [vocab] + dictionary_paths, "targets": [dictionary], "actions": [(do_dict, [dictionary_paths])], } # ----------------------------------------------------------------------------- @create_after(executed="vocab_dict") def task_kaldi_train(): """Creates HCLG.fst for a Kaldi nnet3 or gmm model.""" if acoustic_model_type == "kaldi": return { "file_dep": [dictionary, language_model], "targets": [kaldi_graph_dir / "HCLG.fst"], "actions": [[ "bash", str(acoustic_model / "train.sh"), str(kaldi_dir), str(acoustic_model), str(dictionary), str(language_model), ]], } # ----------------------------------------------------------------------------- DOIT_CONFIG = {"action_string_formatting": "old"} # Run doit main DoitMain(ModuleTaskLoader(locals())).run(sys.argv[1:])