Beispiel #1
0
    def forced_align(
        self,
        name: str,
        target_corpus_key: str,
        flow: Union[str, List[str], Tuple[str],
                    rasr.FlagDependentFlowAttribute],
        feature_scorer: Union[str, List[str], Tuple[str], rasr.FeatureScorer],
        feature_scorer_corpus_key: str = None,
        dump_alignment: bool = False,
        **kwargs,
    ):
        """
        TODO: docstring

        :param name:
        :param target_corpus_key:
        :param flow:
        :param feature_scorer:
        :param feature_scorer_corpus_key:
        :param dump_alignment:
        :param kwargs:
        :return:
        """
        selected_feature_scorer = meta.select_element(
            self.feature_scorers, feature_scorer_corpus_key, feature_scorer)
        self.align(
            name=name,
            corpus=target_corpus_key,
            flow=flow,
            feature_scorer=selected_feature_scorer,
            kwargs=kwargs,
        )

        align_job = self.jobs[target_corpus_key]["alignment_%s" % name]
        align_job.add_alias("forced_alignment/alignment_%s" % name)
        tk.register_output(
            "forced_alignment/alignment_%s.bundle" % name,
            align_job.out_alignment_bundle,
        )

        if dump_alignment:
            dump_job = mm.DumpAlignmentJob(
                crp=self.crp[target_corpus_key],
                feature_flow=meta.select_element(self.feature_flows,
                                                 target_corpus_key, flow),
                original_alignment=meta.select_element(self.alignments,
                                                       target_corpus_key,
                                                       name),
            )
            self.jobs[target_corpus_key]["alignment_dump_%s" % name] = dump_job
            dump_job.add_alias("forced_alignment/alignment_dump_%s" % name)
            tk.register_output(
                "forced_alignment/alignment_dump_%s.bundle" % name,
                dump_job.out_alignment_bundle,
            )
Beispiel #2
0
    def vtln_warping_mixtures(
        self,
        name: str,
        corpus_key: str,
        feature_flow_key: str,
        feature_scorer: Union[str, List[str], Tuple[str], rasr.FeatureScorer],
        alignment: Union[str, List[str], Tuple[str],
                         rasr.FlagDependentFlowAttribute],
        splits: int,
        accs_per_split: int,
    ):
        """
        TODO:  docstring

        :param name:
        :param corpus_key:
        :param feature_flow_key:
        :param feature_scorer:
        :param alignment:
        :param splits:
        :param accs_per_split:
        :return:
        """
        feature_flow_key = self.feature_flows[corpus_key][feature_flow_key]
        warp = vtln.ScoreFeaturesWithWarpingFactorsJob(
            crp=self.crp[corpus_key],
            feature_flow=feature_flow_key,
            feature_scorer=meta.select_element(self.feature_scorers,
                                               corpus_key, feature_scorer),
            alignment=meta.select_element(self.alignments, corpus_key,
                                          alignment),
        )
        warp.rqmt = {"time": 24, "cpu": 1, "mem": 2}
        self.jobs[corpus_key]["vtln_warping_map_%s" % name] = warp

        seq = meta.TrainWarpingFactorsSequence(
            self.crp[corpus_key],
            None,
            feature_flow_key,
            warp.warping_map,
            warp.alphas_file,
            ["accumulate"] +
            meta.split_and_accumulate_sequence(splits, accs_per_split),
        )
        self.mixtures[corpus_key]["vtln_warping_mix_%s" %
                                  name] = seq.selected_mixtures
        self.vtln_files[corpus_key][name + "_alphas_file"] = warp.alphas_file
        self.vtln_files[corpus_key][name + "_warping_map"] = warp.warping_map
        self.vtln_files[corpus_key][name + "_mixtures"] = seq.selected_mixtures
Beispiel #3
0
    def single_density_mixtures(self, name: str, corpus_key: str,
                                feature_flow_key: str, alignment: str):
        """
        TODO: docstring

        :param name:
        :param corpus_key:
        :param feature_flow_key:
        :param alignment:
        :return:
        """
        self.estimate_mixtures(
            name=name,
            corpus=corpus_key,
            flow=feature_flow_key,
            alignment=meta.select_element(self.alignments, corpus_key,
                                          (corpus_key, alignment, -1)),
            split_first=False,
        )
Beispiel #4
0
    def sat_recognition(
        self,
        prev_ctm: str,
        feature_cache: Union[str, List[str], Tuple[str],
                             rasr.FlagDependentFlowAttribute],
        cache_regex: str,
        cmllr_mixtures: Union[str, List[str], Tuple[str],
                              rasr.FlagDependentFlowAttribute],
        train_corpus_key: str,
        name: str,
        iters: List[int],
        lm_scales: Union[float, List[float]],
        feature_scorer_key: Tuple[str, str],
        optimize_am_lm_scale: bool,
        corpus_key: str,
        feature_flow: str,
        pronunciation_scales: Union[float, List[float]],
        search_parameters: dict,
        rtf: float,
        mem: float,
        parallelize_conversion: bool,
        lattice_to_ctm_kwargs: dict,
        **kwargs,
    ):
        """
        TODO: docstring

        :param prev_ctm:
        :param feature_cache:
        :param cache_regex:
        :param cmllr_mixtures:
        :param train_corpus_key:
        :param name:
        :param iters:
        :param lm_scales:
        :param feature_scorer_key:
        :param optimize_am_lm_scale:
        :param corpus_key:
        :param feature_flow:
        :param pronunciation_scales:
        :param search_parameters:
        :param rtf:
        :param mem:
        :param parallelize_conversion:
        :param lattice_to_ctm_kwargs:
        :param kwargs:
        :return:
        """
        prev_ctm_key = f"recog_{train_corpus_key}-{prev_ctm[0]}-{corpus_key}-ps{prev_ctm[1]:02.2f}-lm{prev_ctm[2]:02.2f}-iter{prev_ctm[3]:02d}{prev_ctm[4]}"
        assert prev_ctm_key in self.ctm_files[corpus_key], (
            "the previous recognition stage '%s' did not provide the required recognition: %s"
            % (prev_ctm, prev_ctm_key))
        recognized_corpus = corpus_recipes.ReplaceTranscriptionFromCtmJob(
            self.corpora[corpus_key].corpus_file,
            self.ctm_files[corpus_key][prev_ctm_key],
        )
        speaker_seq = corpus_recipes.SegmentCorpusBySpeakerJob(
            self.corpora[corpus_key].corpus_file)

        overlay_key = f"{corpus_key}_{name}_ps{prev_ctm[1]:02.2f}-lm{prev_ctm[2]:02.2f}-iter{prev_ctm[3]:02d}{prev_ctm[4]}_sat"
        self.add_overlay(corpus_key, overlay_key)
        self.crp[overlay_key].corpus_config = copy.deepcopy(
            self.crp[corpus_key].corpus_config)
        self.crp[
            overlay_key].corpus_config.file = recognized_corpus.output_corpus_path
        self.crp[overlay_key].segment_path = copy.deepcopy(
            self.crp[corpus_key].segment_path)

        self.corpora[overlay_key] = copy.deepcopy(self.corpora[corpus_key])
        self.corpora[
            overlay_key].corpus_file = recognized_corpus.output_corpus_path

        alignment = mm.AlignmentJob(
            crp=self.crp[overlay_key],
            feature_flow=self.feature_flows[overlay_key][feature_flow],
            feature_scorer=self.default_mixture_scorer(
                meta.select_element(self.mixtures, corpus_key,
                                    (train_corpus_key, cmllr_mixtures)), ),
        )

        self.estimate_cmllr(
            name=name,
            corpus_key=overlay_key,
            feature_cache=meta.select_element(self.feature_caches, corpus_key,
                                              feature_cache),
            feature_flow_key=feature_flow,
            cache_regex=cache_regex,
            alignment=alignment.out_alignment_path,
            mixtures=meta.select_element(self.mixtures, corpus_key,
                                         (train_corpus_key, cmllr_mixtures)),
            overlay_key=overlay_key,
        )
        self.feature_flows[corpus_key][
            "%s+cmllr" % feature_flow] = sat.add_cmllr_transform(
                feature_net=self.feature_flows[corpus_key][feature_flow],
                map_file=speaker_seq.out_cluster_map_file,
                transform_dir=self.jobs[overlay_key]["cmllr"].transforms,
            )

        with tk.block(f"{name}_recognition"):
            self.recognition(
                name=name,
                iters=iters,
                lm_scales=lm_scales,
                feature_scorer_key=feature_scorer_key,
                optimize_am_lm_scale=optimize_am_lm_scale,
                corpus_key=corpus_key,
                feature_flow=feature_flow + "+cmllr",
                pronunciation_scales=pronunciation_scales,
                search_parameters=search_parameters,
                rtf=rtf,
                mem=mem,
                parallelize_conversion=parallelize_conversion,
                lattice_to_ctm_kwargs=lattice_to_ctm_kwargs,
                **kwargs,
            )
Beispiel #5
0
    def sat_training(
        self,
        name: str,
        corpus_key: str,
        feature_cache: Union[str, List[str], Tuple[str],
                             rasr.FlagDependentFlowAttribute],
        feature_flow_key: str,
        cache_regex: str,
        alignment: Union[str, List[str], Tuple[str],
                         rasr.FlagDependentFlowAttribute],
        mixtures: Union[str, List[str], Tuple[str],
                        rasr.FlagDependentFlowAttribute],
        splits: int,
        accs_per_split: int,
        align_keep_values: Optional[dict] = None,
        **kwargs,
    ):
        """
        TODO: docstring

        :param name:
        :param corpus_key:
        :param feature_cache:
        :param feature_flow_key:
        :param cache_regex:
        :param alignment:
        :param mixtures:
        :param splits:
        :param accs_per_split:
        :param align_keep_values:
        :param kwargs:
        :return:
        """
        self.estimate_cmllr(
            name=name,
            corpus_key=corpus_key,
            feature_cache=meta.select_element(self.feature_caches, corpus_key,
                                              feature_cache),
            feature_flow_key=feature_flow_key,
            cache_regex=cache_regex,
            alignment=meta.select_element(self.alignments, corpus_key,
                                          alignment),
            mixtures=meta.select_element(self.mixtures, corpus_key, mixtures),
        )

        action_sequence = (["accumulate"] +
                           meta.align_then_split_and_accumulate_sequence(
                               splits, accs_per_split, mark_align=False) +
                           ["align!"])

        akv = dict(**self.default_align_keep_values)
        if align_keep_values is not None:
            akv.update(align_keep_values)

        self.train(
            name=name,
            corpus=corpus_key,
            sequence=action_sequence,
            flow="%s+cmllr" % feature_flow_key,
            initial_alignment=meta.select_element(self.alignments, corpus_key,
                                                  alignment),
            align_keep_values=akv,
            **kwargs,
        )
        self.jobs[corpus_key]["train_{}".format(
            name)].selected_alignment_jobs[-1].add_alias(
                "train/{}_{}_align_last".format(corpus_key, name))

        self.jobs[corpus_key]["train_{}".format(
            name)].selected_mixture_jobs[-1].add_alias(
                "train/{}_{}_mix_last".format(corpus_key, name))
        tk.register_output(
            "train/{}_{}_align_bundle_last".format(corpus_key, name),
            self.jobs[corpus_key]["train_{}".format(
                name)].selected_alignment_jobs[-1].out_alignment_bundle,
        )
        tk.register_output(
            "train/{}_{}_mix_last".format(corpus_key, name),
            self.jobs[corpus_key]["train_{}".format(
                name)].selected_mixture_jobs[-1].out_mixtures,
        )
Beispiel #6
0
    def cart_and_lda(
        self,
        name: str,
        corpus_key: str,
        initial_flow_key: str,
        context_flow_key: str,
        context_size: int,
        alignment: Union[str, List[str], Tuple[str],
                         rasr.FlagDependentFlowAttribute],
        num_dim: int,
        num_iter: int,
        eigenvalue_args: dict,
        generalized_eigenvalue_args: dict,
        **kwargs,
    ):
        """
        TODO:  docstring

        :param name:
        :param corpus_key:
        :param initial_flow_key:
        :param context_flow_key:
        :param context_size:
        :param alignment:
        :param num_dim:
        :param num_iter:
        :param eigenvalue_args:
        :param generalized_eigenvalue_args:
        :param kwargs:
        :return:
        """
        for f in self.feature_flows.values():
            f["{}+context".format(context_flow_key)] = lda.add_context_flow(
                feature_net=f[context_flow_key],
                max_size=context_size,
                right=int(context_size / 2.0),
            )

        cart_lda = meta.CartAndLDA(
            original_crp=self.crp[corpus_key],
            initial_flow=self.feature_flows[corpus_key][initial_flow_key],
            context_flow=self.feature_flows[corpus_key]["{}+context".format(
                context_flow_key)],
            alignment=meta.select_element(self.alignments, corpus_key,
                                          alignment),
            questions=self.cart_questions,
            num_dim=num_dim,
            num_iter=num_iter,
            eigenvalue_args=eigenvalue_args,
            generalized_eigenvalue_args=generalized_eigenvalue_args,
        )
        self.jobs[corpus_key]["cart_and_lda_{}_{}".format(corpus_key,
                                                          name)] = cart_lda
        self.lda_matrices[corpus_key][name] = cart_lda.last_lda_matrix
        self.cart_trees[corpus_key][name] = cart_lda.last_cart_tree
        tk.register_output(
            "{}_{}_last_num_cart_labels".format(corpus_key, name),
            cart_lda.last_num_cart_labels,
        )
        tk.register_output("{}_{}.tree.xml.gz".format(corpus_key, name),
                           cart_lda.last_cart_tree)

        for f in self.feature_flows.values():
            f["{}+context+lda".format(
                context_flow_key)] = features.add_linear_transform(
                    f["{}+context".format(context_flow_key)],
                    cart_lda.last_lda_matrix)

        for crp in self.crp.values():
            crp.acoustic_model_config.state_tying.type = "cart"
            crp.acoustic_model_config.state_tying.file = cart_lda.last_cart_tree

        state_tying_job = allophones.DumpStateTyingJob(self.crp[corpus_key])
        tk.register_output(
            "{}_{}_state_tying".format(corpus_key, name),
            state_tying_job.out_state_tying,
        )
Beispiel #7
0
    def monophone_training(
        self,
        name: str,
        corpus_key: str,
        linear_alignment_args: dict,
        feature_energy_flow_key: str,
        feature_flow: Union[str, List[str], Tuple[str],
                            rasr.FlagDependentFlowAttribute],
        align_iter: int,
        splits: int,
        accs_per_split: int,
        align_keep_values: Optional[dict] = None,
        **kwargs,
    ):
        """
        TODO: docstring
        :param name:
        :param corpus_key:
        :param linear_alignment_args:
        :param feature_energy_flow_key:
        :param feature_flow:
        :param align_iter:
        :param splits:
        :param accs_per_split:
        :param align_keep_values:
        :param kwargs:
        :return:
        """
        if linear_alignment_args is not None:
            self.linear_alignment(
                name,
                corpus_key,
                feature_energy_flow_key,
                prefix=f"{corpus_key}_",
                **linear_alignment_args,
            )

        action_sequence = meta.align_and_accumulate_sequence(
            align_iter, 1, mark_accumulate=False, mark_align=False)
        action_sequence += meta.split_and_accumulate_sequence(
            splits, accs_per_split) + ["align!"]

        akv = dict(**self.default_align_keep_values)
        if align_keep_values is not None:
            akv.update(align_keep_values)

        self.train(
            name=name,
            corpus=corpus_key,
            sequence=action_sequence,
            flow=feature_flow,
            initial_mixtures=meta.select_element(
                self.mixtures, corpus_key, "linear_alignment_{}".format(name)),
            align_keep_values=akv,
            **kwargs,
        )
        self.jobs[corpus_key]["train_{}".format(
            name)].selected_alignment_jobs[-1].add_alias(
                "train/{}_{}_align_last".format(corpus_key, name))

        self.jobs[corpus_key]["train_{}".format(
            name)].selected_mixture_jobs[-1].add_alias(
                "train/{}_{}_mix_last".format(corpus_key, name))
        tk.register_output(
            "train/{}_{}_align_bundle_last".format(corpus_key, name),
            self.jobs[corpus_key]["train_{}".format(
                name)].selected_alignment_jobs[-1].out_alignment_bundle,
        )
        tk.register_output(
            "train/{}_{}_mix_last".format(corpus_key, name),
            self.jobs[corpus_key]["train_{}".format(
                name)].selected_mixture_jobs[-1].out_mixtures,
        )

        state_tying_job = allophones.DumpStateTyingJob(self.crp[corpus_key])
        tk.register_output(
            "{}_{}_state_tying".format(corpus_key, name),
            state_tying_job.out_state_tying,
        )