def forced_align( self, name: str, target_corpus_key: str, flow: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], feature_scorer: Union[str, List[str], Tuple[str], rasr.FeatureScorer], feature_scorer_corpus_key: str = None, dump_alignment: bool = False, **kwargs, ): """ TODO: docstring :param name: :param target_corpus_key: :param flow: :param feature_scorer: :param feature_scorer_corpus_key: :param dump_alignment: :param kwargs: :return: """ selected_feature_scorer = meta.select_element( self.feature_scorers, feature_scorer_corpus_key, feature_scorer) self.align( name=name, corpus=target_corpus_key, flow=flow, feature_scorer=selected_feature_scorer, kwargs=kwargs, ) align_job = self.jobs[target_corpus_key]["alignment_%s" % name] align_job.add_alias("forced_alignment/alignment_%s" % name) tk.register_output( "forced_alignment/alignment_%s.bundle" % name, align_job.out_alignment_bundle, ) if dump_alignment: dump_job = mm.DumpAlignmentJob( crp=self.crp[target_corpus_key], feature_flow=meta.select_element(self.feature_flows, target_corpus_key, flow), original_alignment=meta.select_element(self.alignments, target_corpus_key, name), ) self.jobs[target_corpus_key]["alignment_dump_%s" % name] = dump_job dump_job.add_alias("forced_alignment/alignment_dump_%s" % name) tk.register_output( "forced_alignment/alignment_dump_%s.bundle" % name, dump_job.out_alignment_bundle, )
def vtln_warping_mixtures( self, name: str, corpus_key: str, feature_flow_key: str, feature_scorer: Union[str, List[str], Tuple[str], rasr.FeatureScorer], alignment: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], splits: int, accs_per_split: int, ): """ TODO: docstring :param name: :param corpus_key: :param feature_flow_key: :param feature_scorer: :param alignment: :param splits: :param accs_per_split: :return: """ feature_flow_key = self.feature_flows[corpus_key][feature_flow_key] warp = vtln.ScoreFeaturesWithWarpingFactorsJob( crp=self.crp[corpus_key], feature_flow=feature_flow_key, feature_scorer=meta.select_element(self.feature_scorers, corpus_key, feature_scorer), alignment=meta.select_element(self.alignments, corpus_key, alignment), ) warp.rqmt = {"time": 24, "cpu": 1, "mem": 2} self.jobs[corpus_key]["vtln_warping_map_%s" % name] = warp seq = meta.TrainWarpingFactorsSequence( self.crp[corpus_key], None, feature_flow_key, warp.warping_map, warp.alphas_file, ["accumulate"] + meta.split_and_accumulate_sequence(splits, accs_per_split), ) self.mixtures[corpus_key]["vtln_warping_mix_%s" % name] = seq.selected_mixtures self.vtln_files[corpus_key][name + "_alphas_file"] = warp.alphas_file self.vtln_files[corpus_key][name + "_warping_map"] = warp.warping_map self.vtln_files[corpus_key][name + "_mixtures"] = seq.selected_mixtures
def single_density_mixtures(self, name: str, corpus_key: str, feature_flow_key: str, alignment: str): """ TODO: docstring :param name: :param corpus_key: :param feature_flow_key: :param alignment: :return: """ self.estimate_mixtures( name=name, corpus=corpus_key, flow=feature_flow_key, alignment=meta.select_element(self.alignments, corpus_key, (corpus_key, alignment, -1)), split_first=False, )
def sat_recognition( self, prev_ctm: str, feature_cache: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], cache_regex: str, cmllr_mixtures: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], train_corpus_key: str, name: str, iters: List[int], lm_scales: Union[float, List[float]], feature_scorer_key: Tuple[str, str], optimize_am_lm_scale: bool, corpus_key: str, feature_flow: str, pronunciation_scales: Union[float, List[float]], search_parameters: dict, rtf: float, mem: float, parallelize_conversion: bool, lattice_to_ctm_kwargs: dict, **kwargs, ): """ TODO: docstring :param prev_ctm: :param feature_cache: :param cache_regex: :param cmllr_mixtures: :param train_corpus_key: :param name: :param iters: :param lm_scales: :param feature_scorer_key: :param optimize_am_lm_scale: :param corpus_key: :param feature_flow: :param pronunciation_scales: :param search_parameters: :param rtf: :param mem: :param parallelize_conversion: :param lattice_to_ctm_kwargs: :param kwargs: :return: """ prev_ctm_key = f"recog_{train_corpus_key}-{prev_ctm[0]}-{corpus_key}-ps{prev_ctm[1]:02.2f}-lm{prev_ctm[2]:02.2f}-iter{prev_ctm[3]:02d}{prev_ctm[4]}" assert prev_ctm_key in self.ctm_files[corpus_key], ( "the previous recognition stage '%s' did not provide the required recognition: %s" % (prev_ctm, prev_ctm_key)) recognized_corpus = corpus_recipes.ReplaceTranscriptionFromCtmJob( self.corpora[corpus_key].corpus_file, self.ctm_files[corpus_key][prev_ctm_key], ) speaker_seq = corpus_recipes.SegmentCorpusBySpeakerJob( self.corpora[corpus_key].corpus_file) overlay_key = f"{corpus_key}_{name}_ps{prev_ctm[1]:02.2f}-lm{prev_ctm[2]:02.2f}-iter{prev_ctm[3]:02d}{prev_ctm[4]}_sat" self.add_overlay(corpus_key, overlay_key) self.crp[overlay_key].corpus_config = copy.deepcopy( self.crp[corpus_key].corpus_config) self.crp[ overlay_key].corpus_config.file = recognized_corpus.output_corpus_path self.crp[overlay_key].segment_path = copy.deepcopy( self.crp[corpus_key].segment_path) self.corpora[overlay_key] = copy.deepcopy(self.corpora[corpus_key]) self.corpora[ overlay_key].corpus_file = recognized_corpus.output_corpus_path alignment = mm.AlignmentJob( crp=self.crp[overlay_key], feature_flow=self.feature_flows[overlay_key][feature_flow], feature_scorer=self.default_mixture_scorer( meta.select_element(self.mixtures, corpus_key, (train_corpus_key, cmllr_mixtures)), ), ) self.estimate_cmllr( name=name, corpus_key=overlay_key, feature_cache=meta.select_element(self.feature_caches, corpus_key, feature_cache), feature_flow_key=feature_flow, cache_regex=cache_regex, alignment=alignment.out_alignment_path, mixtures=meta.select_element(self.mixtures, corpus_key, (train_corpus_key, cmllr_mixtures)), overlay_key=overlay_key, ) self.feature_flows[corpus_key][ "%s+cmllr" % feature_flow] = sat.add_cmllr_transform( feature_net=self.feature_flows[corpus_key][feature_flow], map_file=speaker_seq.out_cluster_map_file, transform_dir=self.jobs[overlay_key]["cmllr"].transforms, ) with tk.block(f"{name}_recognition"): self.recognition( name=name, iters=iters, lm_scales=lm_scales, feature_scorer_key=feature_scorer_key, optimize_am_lm_scale=optimize_am_lm_scale, corpus_key=corpus_key, feature_flow=feature_flow + "+cmllr", pronunciation_scales=pronunciation_scales, search_parameters=search_parameters, rtf=rtf, mem=mem, parallelize_conversion=parallelize_conversion, lattice_to_ctm_kwargs=lattice_to_ctm_kwargs, **kwargs, )
def sat_training( self, name: str, corpus_key: str, feature_cache: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], feature_flow_key: str, cache_regex: str, alignment: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], mixtures: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], splits: int, accs_per_split: int, align_keep_values: Optional[dict] = None, **kwargs, ): """ TODO: docstring :param name: :param corpus_key: :param feature_cache: :param feature_flow_key: :param cache_regex: :param alignment: :param mixtures: :param splits: :param accs_per_split: :param align_keep_values: :param kwargs: :return: """ self.estimate_cmllr( name=name, corpus_key=corpus_key, feature_cache=meta.select_element(self.feature_caches, corpus_key, feature_cache), feature_flow_key=feature_flow_key, cache_regex=cache_regex, alignment=meta.select_element(self.alignments, corpus_key, alignment), mixtures=meta.select_element(self.mixtures, corpus_key, mixtures), ) action_sequence = (["accumulate"] + meta.align_then_split_and_accumulate_sequence( splits, accs_per_split, mark_align=False) + ["align!"]) akv = dict(**self.default_align_keep_values) if align_keep_values is not None: akv.update(align_keep_values) self.train( name=name, corpus=corpus_key, sequence=action_sequence, flow="%s+cmllr" % feature_flow_key, initial_alignment=meta.select_element(self.alignments, corpus_key, alignment), align_keep_values=akv, **kwargs, ) self.jobs[corpus_key]["train_{}".format( name)].selected_alignment_jobs[-1].add_alias( "train/{}_{}_align_last".format(corpus_key, name)) self.jobs[corpus_key]["train_{}".format( name)].selected_mixture_jobs[-1].add_alias( "train/{}_{}_mix_last".format(corpus_key, name)) tk.register_output( "train/{}_{}_align_bundle_last".format(corpus_key, name), self.jobs[corpus_key]["train_{}".format( name)].selected_alignment_jobs[-1].out_alignment_bundle, ) tk.register_output( "train/{}_{}_mix_last".format(corpus_key, name), self.jobs[corpus_key]["train_{}".format( name)].selected_mixture_jobs[-1].out_mixtures, )
def cart_and_lda( self, name: str, corpus_key: str, initial_flow_key: str, context_flow_key: str, context_size: int, alignment: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], num_dim: int, num_iter: int, eigenvalue_args: dict, generalized_eigenvalue_args: dict, **kwargs, ): """ TODO: docstring :param name: :param corpus_key: :param initial_flow_key: :param context_flow_key: :param context_size: :param alignment: :param num_dim: :param num_iter: :param eigenvalue_args: :param generalized_eigenvalue_args: :param kwargs: :return: """ for f in self.feature_flows.values(): f["{}+context".format(context_flow_key)] = lda.add_context_flow( feature_net=f[context_flow_key], max_size=context_size, right=int(context_size / 2.0), ) cart_lda = meta.CartAndLDA( original_crp=self.crp[corpus_key], initial_flow=self.feature_flows[corpus_key][initial_flow_key], context_flow=self.feature_flows[corpus_key]["{}+context".format( context_flow_key)], alignment=meta.select_element(self.alignments, corpus_key, alignment), questions=self.cart_questions, num_dim=num_dim, num_iter=num_iter, eigenvalue_args=eigenvalue_args, generalized_eigenvalue_args=generalized_eigenvalue_args, ) self.jobs[corpus_key]["cart_and_lda_{}_{}".format(corpus_key, name)] = cart_lda self.lda_matrices[corpus_key][name] = cart_lda.last_lda_matrix self.cart_trees[corpus_key][name] = cart_lda.last_cart_tree tk.register_output( "{}_{}_last_num_cart_labels".format(corpus_key, name), cart_lda.last_num_cart_labels, ) tk.register_output("{}_{}.tree.xml.gz".format(corpus_key, name), cart_lda.last_cart_tree) for f in self.feature_flows.values(): f["{}+context+lda".format( context_flow_key)] = features.add_linear_transform( f["{}+context".format(context_flow_key)], cart_lda.last_lda_matrix) for crp in self.crp.values(): crp.acoustic_model_config.state_tying.type = "cart" crp.acoustic_model_config.state_tying.file = cart_lda.last_cart_tree state_tying_job = allophones.DumpStateTyingJob(self.crp[corpus_key]) tk.register_output( "{}_{}_state_tying".format(corpus_key, name), state_tying_job.out_state_tying, )
def monophone_training( self, name: str, corpus_key: str, linear_alignment_args: dict, feature_energy_flow_key: str, feature_flow: Union[str, List[str], Tuple[str], rasr.FlagDependentFlowAttribute], align_iter: int, splits: int, accs_per_split: int, align_keep_values: Optional[dict] = None, **kwargs, ): """ TODO: docstring :param name: :param corpus_key: :param linear_alignment_args: :param feature_energy_flow_key: :param feature_flow: :param align_iter: :param splits: :param accs_per_split: :param align_keep_values: :param kwargs: :return: """ if linear_alignment_args is not None: self.linear_alignment( name, corpus_key, feature_energy_flow_key, prefix=f"{corpus_key}_", **linear_alignment_args, ) action_sequence = meta.align_and_accumulate_sequence( align_iter, 1, mark_accumulate=False, mark_align=False) action_sequence += meta.split_and_accumulate_sequence( splits, accs_per_split) + ["align!"] akv = dict(**self.default_align_keep_values) if align_keep_values is not None: akv.update(align_keep_values) self.train( name=name, corpus=corpus_key, sequence=action_sequence, flow=feature_flow, initial_mixtures=meta.select_element( self.mixtures, corpus_key, "linear_alignment_{}".format(name)), align_keep_values=akv, **kwargs, ) self.jobs[corpus_key]["train_{}".format( name)].selected_alignment_jobs[-1].add_alias( "train/{}_{}_align_last".format(corpus_key, name)) self.jobs[corpus_key]["train_{}".format( name)].selected_mixture_jobs[-1].add_alias( "train/{}_{}_mix_last".format(corpus_key, name)) tk.register_output( "train/{}_{}_align_bundle_last".format(corpus_key, name), self.jobs[corpus_key]["train_{}".format( name)].selected_alignment_jobs[-1].out_alignment_bundle, ) tk.register_output( "train/{}_{}_mix_last".format(corpus_key, name), self.jobs[corpus_key]["train_{}".format( name)].selected_mixture_jobs[-1].out_mixtures, ) state_tying_job = allophones.DumpStateTyingJob(self.crp[corpus_key]) tk.register_output( "{}_{}_state_tying".format(corpus_key, name), state_tying_job.out_state_tying, )