def get_feat_dim(self) -> int: """ Calculate the feature dimension for the corpus Returns ------- int Dimension of feature vectors """ feature_string = self.construct_base_feature_string() with open(os.path.join(self.features_log_directory, "feat-to-dim.log"), "w") as log_file: subset_proc = subprocess.Popen( [ thirdparty_binary("subset-feats"), "--n=1", feature_string, "ark:-", ], stderr=log_file, stdout=subprocess.PIPE, ) dim_proc = subprocess.Popen( [thirdparty_binary("feat-to-dim"), "ark:-", "-"], stdin=subset_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, ) stdout, stderr = dim_proc.communicate() feats = stdout.decode("utf8").strip() return int(feats)
def _run(self) -> typing.Generator[None]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: subsample_feats_proc = subprocess.Popen( [ thirdparty_binary("subsample-feats"), f"--n={self.ivector_options['subsample']}", self.feature_string, "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) acc_stats_proc = subprocess.Popen( [ thirdparty_binary("ivector-extractor-acc-stats"), "--num-threads=1", self.ie_path, "ark:-", f"ark:{self.post_path}", self.acc_init_path, ], stdin=subsample_feats_proc.stdout, stderr=log_file, env=os.environ, ) acc_stats_proc.communicate() yield None
def _run(self) -> typing.Generator[None]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: subsample_feats_proc = subprocess.Popen( [ thirdparty_binary("subsample-feats"), f"--n={self.ivector_options['subsample']}", self.feature_string, "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) gmm_global_acc_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-acc-stats"), f"--gselect=ark:{self.gselect_path}", self.dubm_model, "ark:-", self.acc_path, ], stderr=log_file, stdin=subsample_feats_proc.stdout, env=os.environ, ) gmm_global_acc_proc.communicate() yield None
def _trainer_initialization(self) -> None: """Ivector extractor training initialization""" self.iteration = 1 self.training_complete = False # Initialize job_name-vector extractor log_directory = os.path.join(self.working_directory, "log") log_path = os.path.join(log_directory, "init.log") diag_ubm_path = os.path.join(self.working_directory, "final.dubm") full_ubm_path = os.path.join(self.working_directory, "final.ubm") with open(log_path, "w") as log_file: subprocess.call( [ thirdparty_binary("gmm-global-to-fgmm"), diag_ubm_path, full_ubm_path ], stderr=log_file, ) subprocess.call( [ thirdparty_binary("ivector-extractor-init"), f"--ivector-dim={self.ivector_dimension}", "--use-weights=false", full_ubm_path, self.ie_path, ], stderr=log_file, ) # Do Gaussian selection and posterior extraction self.gauss_to_post() parse_logs(log_directory)
def compose_clg( in_disambig: str, out_disambig: str, context_width: int, central_pos: int, ilabels_temp: str, lg_path: str, clg_path: str, log_file: TextIO, ) -> None: """ Compose a CLG.fst See Also -------- :kaldi_src:`fstcomposecontext` Relevant Kaldi binary :openfst_src:`fstarcsort` Relevant OpenFst binary Parameters ---------- in_disambig: str Path to read disambiguation symbols file out_disambig: str Path to write disambiguation symbols file context_width: int Context width of the acoustic model central_pos: int Central position of the acoustic model ilabels_temp: Temporary file for ilabels lg_path: str Path to a LG.fst file clg_path: Path to save CLG.fst file log_file: TextIO Log file handler to output logging info to """ compose_proc = subprocess.Popen( [ thirdparty_binary("fstcomposecontext"), f"--context-size={context_width}", f"--central-position={central_pos}", f"--read-disambig-syms={in_disambig}", f"--write-disambig-syms={out_disambig}", ilabels_temp, lg_path, ], stdout=subprocess.PIPE, stderr=log_file, ) sort_proc = subprocess.Popen( [thirdparty_binary("fstarcsort"), "--sort_type=ilabel", "-", clg_path], stdin=compose_proc.stdout, stderr=log_file, env=os.environ, ) sort_proc.communicate()
def _lexicon_covering(self, ) -> None: """Builds covering grammar and lexicon FARs.""" # Sets of labels for the covering grammar. with open(os.path.join(self.working_log_directory, "covering_grammar.log"), "w", encoding="utf8") as log_file: com = [ thirdparty_binary("farcompilestrings"), "--fst_type=compact", ] if self.input_token_type != "utf8": com.append("--token_type=symbol") com.append(f"--symbols={self.input_token_type}", ) com.append("--unknown_symbol=<unk>") else: com.append("--token_type=utf8") com.extend([self.input_path, self.input_far_path]) print(" ".join(com), file=log_file) subprocess.check_call(com, env=os.environ, stderr=log_file, stdout=log_file) com = [ thirdparty_binary("farcompilestrings"), "--fst_type=compact", "--token_type=symbol", f"--symbols={self.phone_symbol_table_path}", self.output_path, self.output_far_path, ] print(" ".join(com), file=log_file) subprocess.check_call(com, env=os.environ, stderr=log_file, stdout=log_file) ilabels = _get_far_labels(self.input_far_path) print(ilabels, file=log_file) olabels = _get_far_labels(self.output_far_path) print(olabels, file=log_file) cg = pywrapfst.VectorFst() state = cg.add_state() cg.set_start(state) one = pywrapfst.Weight.one(cg.weight_type()) for ilabel, olabel in itertools.product(ilabels, olabels): cg.add_arc(state, pywrapfst.Arc(ilabel, olabel, one, state)) # Handles epsilons, carefully avoiding adding a useless 0:0 label. if self.insertions: for olabel in olabels: cg.add_arc(state, pywrapfst.Arc(0, olabel, one, state)) if self.deletions: for ilabel in ilabels: cg.add_arc(state, pywrapfst.Arc(ilabel, 0, one, state)) cg.set_final(state) assert cg.verify(), "Label acceptor is ill-formed" cg.write(self.cg_path)
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" with open(self.log_path, "a", encoding="utf8") as log_file: for dict_id in self.dictionaries: if sys.platform == "win32": project_type_arg = "--project_output=true" else: project_type_arg = "--project_type=output" lat_path = self.lat_paths[dict_id] rescored_lat_path = self.rescored_lat_paths[dict_id] old_g_path = self.old_g_paths[dict_id] new_g_path = self.new_g_paths[dict_id] if os.path.exists(rescored_lat_path): continue project_proc = subprocess.Popen( [ thirdparty_binary("fstproject"), project_type_arg, old_g_path ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) lmrescore_proc = subprocess.Popen( [ thirdparty_binary("lattice-lmrescore"), "--lm-scale=-1.0", f"ark:{lat_path}", "-", "ark:-", ], stdout=subprocess.PIPE, stdin=project_proc.stdout, stderr=log_file, env=os.environ, ) lmrescore_const_proc = subprocess.Popen( [ thirdparty_binary("lattice-lmrescore-const-arpa"), "--lm-scale=1.0", "ark:-", new_g_path, f"ark:{rescored_lat_path}", ], stdin=lmrescore_proc.stdout, stderr=subprocess.PIPE, env=os.environ, encoding="utf8", ) for line in lmrescore_const_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("succeeded")), int(m.group("failed")) self.check_call(lmrescore_const_proc)
def _run(self) -> typing.Generator[int]: """Run the function""" # Estimating MLLT with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: ali_path = self.ali_paths[dict_id] feature_string = self.feature_strings[dict_id] macc_path = self.macc_paths[dict_id] post_proc = subprocess.Popen( [thirdparty_binary("ali-to-post"), f"ark:{ali_path}", "ark:-"], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) weight_proc = subprocess.Popen( [ thirdparty_binary("weight-silence-post"), "0.0", self.lda_options["silence_csl"], self.model_path, "ark:-", "ark:-", ], stdin=post_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) acc_proc = subprocess.Popen( [ thirdparty_binary("gmm-acc-mllt"), f"--rand-prune={self.lda_options['random_prune']}", self.model_path, feature_string, "ark,s,cs:-", macc_path, ], stdin=weight_proc.stdout, stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in acc_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield 1 self.check_call(acc_proc)
def _run(self) -> typing.Generator[typing.Tuple[int, float]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: feature_string = self.feature_strings[dict_id] fst_path = self.fst_ark_paths[dict_id] ali_path = self.ali_paths[dict_id] com = [ thirdparty_binary("gmm-align-compiled"), f"--transition-scale={self.align_options['transition_scale']}", f"--acoustic-scale={self.align_options['acoustic_scale']}", f"--self-loop-scale={self.align_options['self_loop_scale']}", f"--beam={self.align_options['beam']}", f"--retry-beam={self.align_options['retry_beam']}", "--careful=false", "-", f"ark:{fst_path}", feature_string, f"ark:{ali_path}", "ark,t:-", ] boost_proc = subprocess.Popen( [ thirdparty_binary("gmm-boost-silence"), f"--boost={self.align_options['boost_silence']}", self.align_options["optional_silence_csl"], self.model_path, "-", ], stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) align_proc = subprocess.Popen( com, stdout=subprocess.PIPE, stderr=log_file, encoding="utf8", stdin=boost_proc.stdout, env=os.environ, ) for line in align_proc.stdout: line = line.strip() utterance, log_likelihood = line.split() u_id = int(utterance.split("-")[-1]) yield u_id, float(log_likelihood) self.check_call(align_proc)
def _run(self) -> typing.Generator[typing.Tuple[str, float, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: feature_string = self.feature_strings[dict_id] words_path = self.word_symbol_paths[dict_id] hclg_path = self.hclg_paths[dict_id] tmp_lat_path = self.tmp_lat_paths[dict_id] lat_gen_proc = subprocess.Popen( [ thirdparty_binary("gmm-latgen-faster"), f"--max-active={self.decode_options['max_active']}", f"--beam={self.decode_options['beam']}", f"--lattice-beam={self.decode_options['lattice_beam']}", f"--acoustic-scale={self.decode_options['acoustic_scale']}", "--determinize-lattice=false", "--allow-partial=true", f"--word-symbol-table={words_path}", self.model_path, hclg_path, feature_string, f"ark:{tmp_lat_path}", ], stderr=subprocess.PIPE, env=os.environ, encoding="utf8", ) for line in lat_gen_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield m.group("utterance"), float( m.group("loglike")), int(m.group("num_frames")) self.check_call(lat_gen_proc)
def calc_cmvn(self) -> None: """ Calculate CMVN statistics for speakers See Also -------- :kaldi_src:`compute-cmvn-stats` Relevant Kaldi binary """ self._write_feats() self._write_spk2utt() spk2utt = os.path.join(self.corpus_output_directory, "spk2utt.scp") feats = os.path.join(self.corpus_output_directory, "feats.scp") cmvn_ark = os.path.join(self.corpus_output_directory, "cmvn.ark") cmvn_scp = os.path.join(self.corpus_output_directory, "cmvn.scp") log_path = os.path.join(self.features_log_directory, "cmvn.log") with open(log_path, "w") as logf: subprocess.call( [ thirdparty_binary("compute-cmvn-stats"), f"--spk2utt=ark:{spk2utt}", f"scp:{feats}", f"ark,scp:{cmvn_ark},{cmvn_scp}", ], stderr=logf, env=os.environ, ) update_mapping = [] with self.session() as session: for s, cmvn in load_scp(cmvn_scp).items(): if isinstance(cmvn, list): cmvn = " ".join(cmvn) update_mapping.append({"id": int(s), "cmvn": cmvn}) session.bulk_update_mappings(Speaker, update_mapping) session.commit()
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: processed_count = 0 acc_proc = subprocess.Popen( [ thirdparty_binary("gmm-acc-stats-ali"), self.model_path, self.feature_strings[dict_id], f"ark,s,cs:{self.ali_paths[dict_id]}", self.acc_paths[dict_id], ], stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in acc_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: now_processed = int(m.group("utterances")) progress_update = now_processed - processed_count processed_count = now_processed yield progress_update, 0 else: m = self.done_pattern.match(line.strip()) if m: now_processed = int(m.group("utterances")) progress_update = now_processed - processed_count yield progress_update, int(m.group("errors")) self.check_call(acc_proc)
def tree_stats_func(arguments: TreeStatsArguments, ) -> None: """ Multiprocessing function for calculating tree stats for training See Also -------- :meth:`.TriphoneTrainer.tree_stats` Main function that calls this function in parallel :meth:`.TriphoneTrainer.tree_stats_arguments` Job method for generating arguments for this function :kaldi_src:`acc-tree-stats` Relevant Kaldi binary Parameters ---------- arguments: TreeStatsArguments Arguments for the function """ with open(arguments.log_path, "w", encoding="utf8") as log_file: for dict_id in arguments.dictionaries: feature_string = arguments.feature_strings[dict_id] ali_path = arguments.ali_paths[dict_id] treeacc_path = arguments.treeacc_paths[dict_id] subprocess.call( [ thirdparty_binary("acc-tree-stats"), f"--ci-phones={arguments.ci_phones}", arguments.model_path, feature_string, f"ark:{ali_path}", treeacc_path, ], stderr=log_file, )
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: ali_path = self.ali_paths[dict_id] new_ali_path = self.new_ali_paths[dict_id] convert_proc = subprocess.Popen( [ thirdparty_binary("convert-ali"), self.align_model_path, self.model_path, self.tree_path, f"ark:{ali_path}", f"ark:{new_ali_path}", ], stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in convert_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("utterances")), int( m.group("failed")) self.check_call(convert_proc)
def compose_g(arpa_path: str, words_path: str, g_path: str, log_file: TextIO) -> None: """ Create G.fst from an ARPA formatted language model See Also -------- :kaldi_src:`arpa2fst` Relevant Kaldi binary Parameters ---------- arpa_path: str Path to ARPA file words_path: str Path to words symbols file g_path: str Path to output G.fst file log_file: TextIO Log file handler to output logging info to """ arpafst_proc = subprocess.Popen( [ thirdparty_binary("arpa2fst"), "--disambig-symbol=#0", f"--read-symbol-table={words_path}", arpa_path, g_path, ], stderr=log_file, stdout=log_file, ) arpafst_proc.communicate()
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: ali_path = self.ali_paths[dict_id] feature_string = self.feature_strings[dict_id] acc_path = self.acc_paths[dict_id] ali_to_post_proc = subprocess.Popen( [thirdparty_binary("ali-to-post"), f"ark:{ali_path}", "ark:-"], stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) weight_silence_post_proc = subprocess.Popen( [ thirdparty_binary("weight-silence-post"), "0.0", self.lda_options["silence_csl"], self.model_path, "ark:-", "ark:-", ], stdin=ali_to_post_proc.stdout, stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) acc_lda_post_proc = subprocess.Popen( [ thirdparty_binary("acc-lda"), f"--rand-prune={self.lda_options['random_prune']}", self.model_path, feature_string, "ark,s,cs:-", acc_path, ], stdin=weight_silence_post_proc.stdout, stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in acc_lda_post_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("done")), int(m.group("failed")) self.check_call(acc_lda_post_proc)
def _run(self) -> typing.Generator[None]: """Run the function""" modified_posterior_scale = (self.ivector_options["posterior_scale"] * self.ivector_options["subsample"]) with open(self.log_path, "w", encoding="utf8") as log_file: subsample_feats_proc = subprocess.Popen( [ thirdparty_binary("subsample-feats"), f"--n={self.ivector_options['subsample']}", self.feature_string, "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) gmm_global_get_post_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-get-post"), f"--n={self.ivector_options['num_gselect']}", f"--min-post={self.ivector_options['min_post']}", self.dubm_model, "ark:-", "ark:-", ], stdout=subprocess.PIPE, stdin=subsample_feats_proc.stdout, stderr=log_file, env=os.environ, ) scale_post_proc = subprocess.Popen( [ thirdparty_binary("scale-post"), "ark:-", str(modified_posterior_scale), f"ark:{self.post_path}", ], stdin=gmm_global_get_post_proc.stdout, stderr=log_file, env=os.environ, ) scale_post_proc.communicate() yield None
def _run(self) -> typing.Generator[str]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: gmm_global_get_post_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-get-post"), f"--n={self.ivector_options['num_gselect']}", f"--min-post={self.ivector_options['min_post']}", self.dubm_path, self.feature_string, "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) extract_proc = subprocess.Popen( [ thirdparty_binary("ivector-extract"), f"--acoustic-weight={self.ivector_options['posterior_scale']}", "--compute-objf-change=true", f"--max-count={self.ivector_options['max_count']}", self.ie_path, self.feature_string, "ark,s,cs:-", f"ark,t:{self.ivectors_path}", ], stderr=subprocess.PIPE, encoding="utf8", stdin=gmm_global_get_post_proc.stdout, env=os.environ, ) for line in extract_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield m.group("speaker")
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: feature_string = self.feature_strings[dict_id] tmp_lat_path = self.tmp_lat_paths[dict_id] final_lat_path = self.final_lat_paths[dict_id] rescore_proc = subprocess.Popen( [ thirdparty_binary("gmm-rescore-lattice"), self.model_path, f"ark:{tmp_lat_path}", feature_string, "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) determinize_proc = subprocess.Popen( [ thirdparty_binary("lattice-determinize-pruned"), f"--acoustic-scale={self.fmllr_options['acoustic_scale']}", f"--beam={self.fmllr_options['lattice_beam']}", "ark:-", f"ark:{final_lat_path}", ], stdin=rescore_proc.stdout, stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in determinize_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("done")), int(m.group("errors")) self.check_call(determinize_proc)
def _run(self) -> typing.Generator[bool]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: ali_path = self.ali_paths[dict_id] acc_path = self.acc_paths[dict_id] feature_string = self.feature_strings[dict_id] si_feature_string = self.si_feature_strings[dict_id] ali_to_post_proc = subprocess.Popen( [ thirdparty_binary("ali-to-post"), f"ark:{ali_path}", "ark:-" ], stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) acc_proc = subprocess.Popen( [ thirdparty_binary("gmm-acc-stats-twofeats"), self.model_path, feature_string, si_feature_string, "ark,s,cs:-", acc_path, ], stderr=subprocess.PIPE, encoding="utf8", stdin=ali_to_post_proc.stdout, env=os.environ, ) for line in acc_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield True self.check_call(acc_proc)
def _encode(self) -> None: """Encodes the alignments.""" self.log_info("Encoding the alignments as FSAs") subprocess.check_call( [ thirdparty_binary("farencode"), "--encode_labels", self.afst_path, self.encoder_path, self.far_path, ], env=os.environ, ) self.log_info( f"Success! FAR path: {self.far_path}; encoder path: {self.encoder_path}" )
def _run(self) -> typing.Generator[typing.Tuple[str, float, int]]: """Run the function""" with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.dictionaries: feature_string = self.feature_strings[dict_id] lat_path = self.lat_paths[dict_id] word_symbol_path = self.word_symbol_paths[dict_id] hclg_path = self.hclg_paths[dict_id] if os.path.exists(lat_path): continue if (self.decode_options["uses_speaker_adaptation"] and self.decode_options["first_beam"] is not None): beam = self.decode_options["first_beam"] else: beam = self.decode_options["beam"] if (self.decode_options["uses_speaker_adaptation"] and self.decode_options["first_max_active"] is not None): max_active = self.decode_options["first_max_active"] else: max_active = self.decode_options["max_active"] decode_proc = subprocess.Popen( [ thirdparty_binary("gmm-latgen-faster"), f"--max-active={max_active}", f"--beam={beam}", f"--lattice-beam={self.decode_options['lattice_beam']}", "--allow-partial=true", f"--word-symbol-table={word_symbol_path}", f"--acoustic-scale={self.decode_options['acoustic_scale']}", self.model_path, hclg_path, feature_string, f"ark:{lat_path}", ], stderr=subprocess.PIPE, env=os.environ, encoding="utf8", ) for line in decode_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield m.group("utterance"), float( m.group("loglike")), int(m.group("num_frames")) self.check_call(decode_proc)
def tree_stats(self) -> None: """ Multiprocessing function that computes stats for decision tree training. See Also -------- :func:`~montreal_forced_aligner.acoustic_modeling.triphone.tree_stats_func` Multiprocessing helper function for each job :meth:`.TriphoneTrainer.tree_stats_arguments` Job method for generating arguments for the helper function :kaldi_src:`sum-tree-stats` Relevant Kaldi binary :kaldi_steps:`train_deltas` Reference Kaldi script :kaldi_steps:`train_lda_mllt` Reference Kaldi script :kaldi_steps:`train_sat` Reference Kaldi script """ jobs = self.tree_stats_arguments() if self.use_mp: run_mp(tree_stats_func, jobs, self.working_log_directory) else: run_non_mp(tree_stats_func, jobs, self.working_log_directory) tree_accs = [] for x in jobs: tree_accs.extend(x.treeacc_paths.values()) log_path = os.path.join(self.working_log_directory, "sum_tree_acc.log") with open(log_path, "w", encoding="utf8") as log_file: subprocess.call( [ thirdparty_binary("sum-tree-stats"), os.path.join(self.working_directory, "treeacc"), ] + tree_accs, stderr=log_file, ) if not self.debug: for f in tree_accs: os.remove(f)
def _trainer_initialization( self, initial_alignment_directory: Optional[str] = None) -> None: """DUBM training initialization""" # Initialize model from E-M in memory log_directory = os.path.join(self.working_directory, "log") if initial_alignment_directory and os.path.exists( initial_alignment_directory): jobs = self.align_arguments() for j in jobs: for p in j.ali_paths.values(): shutil.copyfile( p.replace(self.working_directory, initial_alignment_directory), p) shutil.copyfile( os.path.join(initial_alignment_directory, "final.mdl"), os.path.join(self.working_directory, "final.mdl"), ) num_gauss_init = int(self.initial_gaussian_proportion * int(self.num_gaussians)) log_path = os.path.join(log_directory, "gmm_init.log") feature_string = self.construct_base_feature_string(all_feats=True) self.iteration = 1 with open(log_path, "w") as log_file: gmm_init_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-init-from-feats"), f"--num-threads={self.worker.num_jobs}", f"--num-frames={self.num_frames}", f"--num_gauss={self.num_gaussians}", f"--num_gauss_init={num_gauss_init}", f"--num_iters={self.num_iterations_init}", feature_string, self.model_path, ], stderr=log_file, ) gmm_init_proc.communicate() # Store Gaussian selection indices on disk self.gmm_gselect() parse_logs(log_directory)
def _run(self) -> typing.Generator[typing.Tuple[int, int, int]]: """Run the function""" with open(self.log_path, "w") as log_file: feats_scp_path = self.feats_scp_path vad_scp_path = self.vad_scp_path vad_proc = subprocess.Popen( [ thirdparty_binary("compute-vad"), f"--vad-energy-mean-scale={self.vad_options['energy_mean_scale']}", f"--vad-energy-threshold={self.vad_options['energy_threshold']}", f"scp:{feats_scp_path}", f"ark,t:{vad_scp_path}", ], stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in vad_proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("done")), int(m.group("no_feats")), int( m.group("unvoiced")) self.check_call(vad_proc)
def acc_ivector_stats(self) -> None: """ Multiprocessing function that accumulates ivector extraction stats. See Also -------- :func:`~montreal_forced_aligner.ivector.trainer.AccIvectorStatsFunction` Multiprocessing helper function for each job :meth:`.IvectorTrainer.acc_ivector_stats_arguments` Job method for generating arguments for the helper function :kaldi_src:`ivector-extractor-sum-accs` Relevant Kaldi binary :kaldi_src:`ivector-extractor-est` Relevant Kaldi binary :kaldi_steps_sid:`train_ivector_extractor` Reference Kaldi script """ begin = time.time() self.log_info("Accumulating ivector stats...") arguments = self.acc_ivector_stats_arguments() if self.use_mp: error_dict = {} return_queue = mp.Queue() stopped = Stopped() procs = [] for i, args in enumerate(arguments): function = AccIvectorStatsFunction(args) p = KaldiProcessWorker(i, return_queue, function, stopped) procs.append(p) p.start() while True: try: result = return_queue.get(timeout=1) if stopped.stop_check(): continue except queue.Empty: for proc in procs: if not proc.finished.stop_check(): break else: break continue if isinstance(result, KaldiProcessingError): error_dict[result.job_name] = result continue for p in procs: p.join() if error_dict: for v in error_dict.values(): raise v else: self.log_debug("Not using multiprocessing...") for args in arguments: function = AccIvectorStatsFunction(args) for _ in function.run(): pass self.log_debug(f"Accumulating stats took {time.time() - begin}") log_path = os.path.join(self.working_log_directory, f"sum_acc.{self.iteration}.log") acc_path = os.path.join(self.working_directory, f"acc.{self.iteration}") with open(log_path, "w", encoding="utf8") as log_file: accinits = [] for j in arguments: accinits.append(j.acc_init_path) sum_accs_proc = subprocess.Popen( [ thirdparty_binary("ivector-extractor-sum-accs"), "--parallel=true" ] + accinits + [acc_path], stderr=log_file, env=os.environ, ) sum_accs_proc.communicate() # clean up for p in accinits: os.remove(p) # Est extractor log_path = os.path.join(self.working_log_directory, f"update.{self.iteration}.log") with open(log_path, "w") as log_file: extractor_est_proc = subprocess.Popen( [ thirdparty_binary("ivector-extractor-est"), f"--num-threads={len(self.jobs)}", f"--gaussian-min-count={self.gaussian_min_count}", self.ie_path, os.path.join(self.working_directory, f"acc.{self.iteration}"), self.next_ie_path, ], stderr=log_file, env=os.environ, ) extractor_est_proc.communicate()
def acc_global_stats(self) -> None: """ Multiprocessing function that accumulates global GMM stats See Also -------- :func:`~montreal_forced_aligner.ivector.trainer.AccGlobalStatsFunction` Multiprocessing helper function for each job :meth:`.DubmTrainer.acc_global_stats_arguments` Job method for generating arguments for the helper function :kaldi_src:`gmm-global-sum-accs` Relevant Kaldi binary :kaldi_steps:`train_diag_ubm` Reference Kaldi script """ begin = time.time() self.log_info("Accumulating global stats...") arguments = self.acc_global_stats_arguments() if self.use_mp: error_dict = {} return_queue = mp.Queue() stopped = Stopped() procs = [] for i, args in enumerate(arguments): function = AccGlobalStatsFunction(args) p = KaldiProcessWorker(i, return_queue, function, stopped) procs.append(p) p.start() while True: try: result = return_queue.get(timeout=1) if isinstance(result, Exception): error_dict[getattr(result, "job_name", 0)] = result continue if stopped.stop_check(): continue except queue.Empty: for proc in procs: if not proc.finished.stop_check(): break else: break continue for p in procs: p.join() if error_dict: for v in error_dict.values(): raise v else: self.log_debug("Not using multiprocessing...") for args in arguments: function = AccGlobalStatsFunction(args) for _ in function.run(): pass self.log_debug(f"Accumulating stats took {time.time() - begin}") # Don't remove low-count Gaussians till the last tier, # or gselect info won't be valid anymore if self.iteration < self.num_iterations: opt = "--remove-low-count-gaussians=false" else: opt = f"--remove-low-count-gaussians={self.remove_low_count_gaussians}" log_path = os.path.join(self.working_log_directory, f"update.{self.iteration}.log") with open(log_path, "w") as log_file: acc_files = [] for j in arguments: acc_files.append(j.acc_path) sum_proc = subprocess.Popen( [thirdparty_binary("gmm-global-sum-accs"), "-"] + acc_files, stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) gmm_global_est_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-est"), opt, f"--min-gaussian-weight={self.min_gaussian_weight}", self.model_path, "-", self.next_model_path, ], stderr=log_file, stdin=sum_proc.stdout, env=os.environ, ) gmm_global_est_proc.communicate() # Clean up if not self.debug: for p in acc_files: os.remove(p)
def _run( self ) -> typing.Generator[typing.Tuple[int, List[CtmInterval], List[CtmInterval]]]: """Run the function""" db_engine = sqlalchemy.create_engine( f"sqlite:///{self.db_path}?mode=ro&nolock=1") with Session(db_engine) as session: for dict_id in self.ali_paths.keys(): d = session.query(Dictionary).get(dict_id) self.position_dependent_phones = d.position_dependent_phones self.clitic_marker = d.clitic_marker self.silence_word = d.silence_word self.oov_word = d.oov_word self.optional_silence_phone = d.optional_silence_phone self.word_boundary_int_paths[ dict_id] = d.word_boundary_int_path silence_words = (session.query( Word.word).filter(Word.dictionary_id == dict_id).filter( Word.word_type == WordType.silence)) self.silence_words.update(x for x, in silence_words) words = (session.query( Word.word, Pronunciation.pronunciation).join( Pronunciation.word).filter( Word.dictionary_id == dict_id).filter( Word.word_type != WordType.silence)) self.words[dict_id] = {} for w, pron in words: if w not in self.words[dict_id]: self.words[dict_id][w] = set() self.words[dict_id][w].add(tuple(pron.split(" "))) utts = (session.query(Utterance).join( Utterance.speaker).options( load_only(Utterance.id, Utterance.normalized_text, Utterance.begin)).filter( Speaker.job_id == self.job_name)) for utt in utts: self.utterance_texts[utt.id] = utt.normalized_text.split() self.utterance_begins[utt.id] = utt.begin ds = session.query(Phone.phone, Phone.mapping_id).all() for phone, mapping_id in ds: self.reversed_phone_mapping[mapping_id] = phone with open(self.log_path, "w", encoding="utf8") as log_file: for dict_id in self.ali_paths.keys(): cur_utt = None intervals = [] ali_path = self.ali_paths[dict_id] text_int_path = self.text_int_paths[dict_id] word_boundary_int_path = self.word_boundary_int_paths[dict_id] lin_proc = subprocess.Popen( [ thirdparty_binary("linear-to-nbest"), f"ark:{ali_path}", f"ark:{text_int_path}", "", "", "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) align_words_proc = subprocess.Popen( [ thirdparty_binary("lattice-align-words"), word_boundary_int_path, self.model_path, "ark:-", "ark:-", ], stdin=lin_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) phone_proc = subprocess.Popen( [ thirdparty_binary("lattice-to-phone-lattice"), self.model_path, "ark:-", "ark:-", ], stdout=subprocess.PIPE, stdin=align_words_proc.stdout, stderr=log_file, env=os.environ, ) nbest_proc = subprocess.Popen( [ thirdparty_binary("nbest-to-ctm"), "--print-args=false", f"--frame-shift={self.frame_shift}", "ark:-", "-", ], stdin=phone_proc.stdout, stderr=log_file, stdout=subprocess.PIPE, env=os.environ, encoding="utf8", ) for line in nbest_proc.stdout: line = line.strip() if not line: continue try: interval = process_ctm_line(line) except ValueError: continue if cur_utt is None: cur_utt = interval.utterance if cur_utt != interval.utterance: word_intervals, phone_intervals = self.cleanup_intervals( cur_utt, dict_id, intervals) yield cur_utt, word_intervals, phone_intervals intervals = [] cur_utt = interval.utterance intervals.append(interval) self.check_call(nbest_proc) if intervals: word_intervals, phone_intervals = self.cleanup_intervals( cur_utt, dict_id, intervals) yield cur_utt, word_intervals, phone_intervals
def _run(self) -> typing.Generator[typing.Tuple[int, int, str]]: """Run the function""" db_engine = sqlalchemy.create_engine( f"sqlite:///{self.db_path}?mode=ro&nolock=1") with open(self.log_path, "w", encoding="utf8") as log_file, Session(db_engine) as session: phones = session.query(Phone.phone, Phone.mapping_id) for phone, mapping_id in phones: self.reversed_phone_mapping[mapping_id] = phone for dict_id in self.text_int_paths.keys(): d = session.query(Dictionary).get(dict_id) self.position_dependent_phones = d.position_dependent_phones self.clitic_marker = d.clitic_marker self.silence_words.add(d.silence_word) self.oov_word = d.oov_word self.optional_silence_phone = d.optional_silence_phone self.word_boundary_int_paths[d.id] = d.word_boundary_int_path self.reversed_word_mapping[d.id] = {} silence_words = (session.query( Word.word).filter(Word.dictionary_id == dict_id).filter( Word.word_type == WordType.silence)) self.silence_words.update(x for x, in silence_words) words = session.query( Word.mapping_id, Word.word).filter(Word.dictionary_id == dict_id) for w_id, w in words: self.reversed_word_mapping[d.id][w_id] = w current_utterance = None word_pronunciations = [] text_int_path = self.text_int_paths[dict_id] word_boundary_path = self.word_boundary_int_paths[dict_id] ali_path = self.ali_paths[dict_id] if not os.path.exists(ali_path): continue lin_proc = subprocess.Popen( [ thirdparty_binary("linear-to-nbest"), f"ark:{ali_path}", f"ark:{text_int_path}", "", "", "ark:-", ], stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) align_proc = subprocess.Popen( [ thirdparty_binary("lattice-align-words"), word_boundary_path, self.model_path, "ark:-", "ark:-", ], stdin=lin_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) prons_proc = subprocess.Popen( [ thirdparty_binary("nbest-to-prons"), self.model_path, "ark:-", "-" ], stdin=align_proc.stdout, stderr=log_file, encoding="utf8", stdout=subprocess.PIPE, env=os.environ, ) for line in prons_proc.stdout: line = line.strip().split() utt = line[0] if utt != current_utterance and current_utterance is not None: log_file.write( f"{current_utterance}\t{word_pronunciations}\n") if self.for_g2p: phones = "" for x in word_pronunciations: phones += x[1] + " " yield dict_id, current_utterance, phones.strip() else: yield dict_id, self._process_pronunciations( word_pronunciations) word_pronunciations = [] current_utterance = utt pron = [int(x) for x in line[4:]] word = self.reversed_word_mapping[dict_id][int(line[3])] if self.for_g2p: pron = " ".join(self.reversed_phone_mapping[x] for x in pron) else: if self.position_dependent_phones: pron = " ".join( split_phone_position( self.reversed_phone_mapping[x])[0] for x in pron) else: pron = " ".join(self.reversed_phone_mapping[x] for x in pron) word_pronunciations.append((word, pron)) if word_pronunciations: if self.for_g2p: phones = "" for x in word_pronunciations: phones += x[1] + " " yield dict_id, current_utterance, phones.strip() else: yield dict_id, self._process_pronunciations( word_pronunciations) self.check_call(prons_proc)
def _run(self) -> typing.Generator[typing.Tuple[int, int]]: """Run the function""" db_engine = sqlalchemy.create_engine( f"sqlite:///{self.db_path}?mode=ro&nolock=1") with open(self.log_path, "w", encoding="utf8") as log_file, Session(db_engine) as session: dictionaries = (session.query(Dictionary).join( Dictionary.speakers).filter( Speaker.job_id == self.job_name).distinct()) tree_proc = subprocess.Popen( [thirdparty_binary("tree-info"), self.tree_path], encoding="utf8", stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) stdout, _ = tree_proc.communicate() context_width = 1 central_pos = 0 for line in stdout.split("\n"): text = line.strip().split(" ") if text[0] == "context-width": context_width = int(text[1]) elif text[0] == "central-position": central_pos = int(text[1]) out_disambig = os.path.join(self.working_dir, f"{self.job_name}.disambig") ilabels_temp = os.path.join(self.working_dir, f"{self.job_name}.ilabels") clg_path = os.path.join(self.working_dir, f"{self.job_name}.clg.temp") ha_out_disambig = os.path.join( self.working_dir, f"{self.job_name}.ha_out_disambig.temp") for d in dictionaries: fst_ark_path = self.fst_ark_paths[d.id] text_path = self.text_int_paths[d.id] if d.use_g2p: import pynini from pynini.lib import rewrite from montreal_forced_aligner.g2p.generator import threshold_lattice_to_dfa fst = pynini.Fst.read(d.lexicon_fst_path) token_type = pynini.SymbolTable.read_text( d.grapheme_symbol_table_path) utterances = ( session.query( Utterance.kaldi_id, Utterance.normalized_character_text).join( Utterance.speaker).filter( Utterance.ignored == False) # noqa .filter( Utterance.normalized_character_text != "").filter( Speaker.job_id == self.job_name).filter( Speaker.dictionary_id == d.id).order_by( Utterance.kaldi_id)) with open(fst_ark_path, "wb") as fst_output_file: for utt_id, full_text in utterances: full_text = f"<s> {full_text} </s>" lattice = rewrite.rewrite_lattice( full_text, fst, token_type) lattice = threshold_lattice_to_dfa(lattice, 2.0) input = lattice.write_to_string() clg_compose_proc = subprocess.Popen( [ thirdparty_binary("fstcomposecontext"), f"--context-size={context_width}", f"--central-position={central_pos}", f"--read-disambig-syms={d.disambiguation_symbols_int_path}", f"--write-disambig-syms={out_disambig}", ilabels_temp, "-", "-", ], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) clg_sort_proc = subprocess.Popen( [ thirdparty_binary("fstarcsort"), "--sort_type=ilabel", "-", clg_path, ], stdin=clg_compose_proc.stdout, stderr=log_file, env=os.environ, ) clg_compose_proc.stdin.write(input) clg_compose_proc.stdin.flush() clg_compose_proc.stdin.close() clg_sort_proc.communicate() make_h_proc = subprocess.Popen( [ thirdparty_binary("make-h-transducer"), f"--disambig-syms-out={ha_out_disambig}", ilabels_temp, self.tree_path, self.model_path, ], stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) hclg_compose_proc = subprocess.Popen( [ thirdparty_binary("fsttablecompose"), "-", clg_path, "-" ], stderr=log_file, stdin=make_h_proc.stdout, stdout=subprocess.PIPE, env=os.environ, ) hclg_determinize_proc = subprocess.Popen( [ thirdparty_binary("fstdeterminizestar"), "--use-log=true" ], stdin=hclg_compose_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) hclg_rmsymbols_proc = subprocess.Popen( [ thirdparty_binary("fstrmsymbols"), ha_out_disambig ], stdin=hclg_determinize_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) hclg_rmeps_proc = subprocess.Popen( [thirdparty_binary("fstrmepslocal")], stdin=hclg_rmsymbols_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) hclg_minimize_proc = subprocess.Popen( [thirdparty_binary("fstminimizeencoded")], stdin=hclg_rmeps_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) hclg_self_loop_proc = subprocess.Popen( [ thirdparty_binary("add-self-loops"), "--self-loop-scale=0.1", "--reorder=true", self.model_path, "-", "-", ], stdin=hclg_minimize_proc.stdout, stdout=subprocess.PIPE, stderr=log_file, env=os.environ, ) stdout, _ = hclg_self_loop_proc.communicate() self.check_call(hclg_minimize_proc) fst_output_file.write(utt_id.encode("utf8") + b" ") fst_output_file.write(stdout) yield 1, 0 else: proc = subprocess.Popen( [ thirdparty_binary("compile-train-graphs"), f"--read-disambig-syms={d.disambiguation_symbols_int_path}", self.tree_path, self.model_path, d.lexicon_fst_path, f"ark:{text_path}", f"ark:{fst_ark_path}", ], stderr=subprocess.PIPE, encoding="utf8", env=os.environ, ) for line in proc.stderr: log_file.write(line) m = self.progress_pattern.match(line.strip()) if m: yield int(m.group("succeeded")), int( m.group("failed")) self.check_call(proc)