def _trainer_initialization(self) -> None:
        """Ivector extractor training initialization"""
        self.iteration = 1
        self.training_complete = False
        # Initialize job_name-vector extractor
        log_directory = os.path.join(self.working_directory, "log")
        log_path = os.path.join(log_directory, "init.log")
        diag_ubm_path = os.path.join(self.working_directory, "final.dubm")

        full_ubm_path = os.path.join(self.working_directory, "final.ubm")
        with open(log_path, "w") as log_file:
            subprocess.call(
                [
                    thirdparty_binary("gmm-global-to-fgmm"), diag_ubm_path,
                    full_ubm_path
                ],
                stderr=log_file,
            )
            subprocess.call(
                [
                    thirdparty_binary("ivector-extractor-init"),
                    f"--ivector-dim={self.ivector_dimension}",
                    "--use-weights=false",
                    full_ubm_path,
                    self.ie_path,
                ],
                stderr=log_file,
            )

        # Do Gaussian selection and posterior extraction
        self.gauss_to_post()
        parse_logs(log_directory)
    def segment(self) -> None:
        """
        Performs VAD and segmentation into utterances

        Raises
        ------
        :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError`
            If there were any errors in running Kaldi binaries
        """
        self.setup()
        log_directory = os.path.join(self.working_directory, "log")
        done_path = os.path.join(self.working_directory, "done")
        if os.path.exists(done_path):
            self.log_info("Classification already done, skipping.")
            return
        try:
            self.compute_vad()
            self.uses_vad = True
            self.segment_vad()
            parse_logs(log_directory)
        except Exception as e:
            if isinstance(e, KaldiProcessingError):
                import logging

                logger = logging.getLogger(self.identifier)
                log_kaldi_errors(e.error_logs, logger)
                e.update_log_file(logger)
            raise
        with open(done_path, "w"):
            pass
    def _trainer_initialization(self) -> None:
        """Speaker adapted training initialization"""
        if self.initialized:
            self.speaker_independent = False
            self.worker.speaker_independent = False
            return
        if os.path.exists(
                os.path.join(self.previous_aligner.working_directory,
                             "lda.mat")):
            shutil.copyfile(
                os.path.join(self.previous_aligner.working_directory,
                             "lda.mat"),
                os.path.join(self.working_directory, "lda.mat"),
            )
        for j in self.jobs:
            if not j.has_data:
                continue
            for path in j.construct_path_dictionary(
                    self.previous_aligner.working_directory, "trans",
                    "ark").values():
                if os.path.exists(path):
                    break
            else:
                continue
            break
        else:
            self.speaker_independent = True
            self.worker.speaker_independent = True
            self.calc_fmllr()
        self.speaker_independent = False
        self.worker.speaker_independent = False
        for j in self.jobs:
            if not j.has_data:
                continue
            transform_paths = j.construct_path_dictionary(
                self.previous_aligner.working_directory, "trans", "ark")
            output_paths = j.construct_path_dictionary(self.working_directory,
                                                       "trans", "ark")
            for k, path in transform_paths.items():
                shutil.copy(path, output_paths[k])
        self.tree_stats()
        self._setup_tree(init_from_previous=self.quick,
                         initial_mix_up=self.quick)

        self.convert_alignments()

        self.compile_train_graphs()
        os.rename(self.model_path, self.next_model_path)

        self.iteration = 1
        parse_logs(self.working_log_directory)
    def train_iteration(self) -> None:
        """Perform an iteration of training"""
        if os.path.exists(self.next_model_path):
            self.iteration += 1
            if self.iteration <= self.final_gaussian_iteration:
                self.increment_gaussians()
            return
        if self.iteration in self.realignment_iterations:
            self.align_iteration()
        self.acc_stats()

        parse_logs(self.working_log_directory)
        if self.iteration <= self.final_gaussian_iteration:
            self.increment_gaussians()
        self.iteration += 1
Example #5
0
    def train_iteration(self) -> None:
        """
        Run a single LDA training iteration
        """
        if os.path.exists(self.next_model_path):
            return
        if self.iteration in self.realignment_iterations:
            self.align_iteration()
        if self.iteration in self.mllt_iterations:
            self.calc_lda_mllt()

        self.acc_stats()
        parse_logs(self.working_log_directory)
        if self.iteration <= self.final_gaussian_iteration:
            self.increment_gaussians()
        self.iteration += 1
 def _trainer_initialization(
         self, initial_alignment_directory: Optional[str] = None) -> None:
     """DUBM training initialization"""
     # Initialize model from E-M in memory
     log_directory = os.path.join(self.working_directory, "log")
     if initial_alignment_directory and os.path.exists(
             initial_alignment_directory):
         jobs = self.align_arguments()
         for j in jobs:
             for p in j.ali_paths.values():
                 shutil.copyfile(
                     p.replace(self.working_directory,
                               initial_alignment_directory), p)
         shutil.copyfile(
             os.path.join(initial_alignment_directory, "final.mdl"),
             os.path.join(self.working_directory, "final.mdl"),
         )
     num_gauss_init = int(self.initial_gaussian_proportion *
                          int(self.num_gaussians))
     log_path = os.path.join(log_directory, "gmm_init.log")
     feature_string = self.construct_base_feature_string(all_feats=True)
     self.iteration = 1
     with open(log_path, "w") as log_file:
         gmm_init_proc = subprocess.Popen(
             [
                 thirdparty_binary("gmm-global-init-from-feats"),
                 f"--num-threads={self.worker.num_jobs}",
                 f"--num-frames={self.num_frames}",
                 f"--num_gauss={self.num_gaussians}",
                 f"--num_gauss_init={num_gauss_init}",
                 f"--num_iters={self.num_iterations_init}",
                 feature_string,
                 self.model_path,
             ],
             stderr=log_file,
         )
         gmm_init_proc.communicate()
     # Store Gaussian selection indices on disk
     self.gmm_gselect()
     parse_logs(log_directory)
    def _setup_tree(self,
                    init_from_previous=False,
                    initial_mix_up=True) -> None:
        """
        Set up the tree for the triphone model

        Raises
        ------
        :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError`
            If there were any errors in running Kaldi binaries
        """
        log_path = os.path.join(self.working_log_directory, "questions.log")
        tree_path = os.path.join(self.working_directory, "tree")
        treeacc_path = os.path.join(self.working_directory, "treeacc")
        sets_int_path = os.path.join(self.worker.phones_dir, "sets.int")
        roots_int_path = os.path.join(self.worker.phones_dir, "roots.int")
        extra_question_int_path = os.path.join(self.worker.phones_dir,
                                               "extra_questions.int")
        topo_path = self.worker.topo_path
        questions_path = os.path.join(self.working_directory, "questions.int")
        questions_qst_path = os.path.join(self.working_directory,
                                          "questions.qst")
        with open(log_path, "w") as log_file:
            subprocess.call(
                [
                    thirdparty_binary("cluster-phones"),
                    treeacc_path,
                    sets_int_path,
                    questions_path,
                ],
                stderr=log_file,
            )

        with open(extra_question_int_path,
                  "r") as inf, open(questions_path, "a") as outf:
            for line in inf:
                outf.write(line)

        log_path = os.path.join(self.working_log_directory,
                                "compile_questions.log")
        with open(log_path, "w") as log_file:
            subprocess.call(
                [
                    thirdparty_binary("compile-questions"),
                    topo_path,
                    questions_path,
                    questions_qst_path,
                ],
                stderr=log_file,
            )

        log_path = os.path.join(self.working_log_directory, "build_tree.log")
        with open(log_path, "w") as log_file:
            subprocess.call(
                [
                    thirdparty_binary("build-tree"),
                    "--verbose=1",
                    f"--max-leaves={self.num_leaves}",
                    f"--cluster-thresh={self.cluster_threshold}",
                    treeacc_path,
                    roots_int_path,
                    questions_qst_path,
                    topo_path,
                    tree_path,
                ],
                stderr=log_file,
            )

        log_path = os.path.join(self.working_log_directory, "init_model.log")
        occs_path = os.path.join(self.working_directory, "0.occs")
        mdl_path = self.model_path
        if init_from_previous:
            command = [
                thirdparty_binary("gmm-init-model"),
                f"--write-occs={occs_path}",
                tree_path,
                treeacc_path,
                topo_path,
                mdl_path,
                os.path.join(self.previous_aligner.working_directory, "tree"),
                os.path.join(self.previous_aligner.working_directory,
                             "final.mdl"),
            ]
        else:
            command = [
                thirdparty_binary("gmm-init-model"),
                f"--write-occs={occs_path}",
                tree_path,
                treeacc_path,
                topo_path,
                mdl_path,
            ]
        with open(log_path, "w") as log_file:
            subprocess.call(command, stderr=log_file)
        if initial_mix_up:
            if init_from_previous:
                command = [
                    thirdparty_binary("gmm-mixup"),
                    f"--mix-up={self.initial_gaussians}",
                    f"--mix-down={self.initial_gaussians}",
                    mdl_path,
                    occs_path,
                    mdl_path,
                ]
            else:
                command = [
                    thirdparty_binary("gmm-mixup"),
                    f"--mix-up={self.initial_gaussians}",
                    mdl_path,
                    occs_path,
                    mdl_path,
                ]
            log_path = os.path.join(self.working_log_directory, "mixup.log")
            with open(log_path, "w") as log_file:
                subprocess.call(command, stderr=log_file)
        os.remove(treeacc_path)
        os.rename(occs_path, self.next_occs_path)
        parse_logs(self.working_log_directory)
    def create_align_model(self) -> None:
        """
        Create alignment model for speaker-adapted training that will use speaker-independent
        features in later aligning.

        See Also
        --------
        :func:`~montreal_forced_aligner.acoustic_modeling.sat.AccStatsTwoFeatsFunction`
            Multiprocessing helper function for each job
        :meth:`.SatTrainer.acc_stats_two_feats_arguments`
            Job method for generating arguments for the helper function
        :kaldi_src:`gmm-est`
            Relevant Kaldi binary
        :kaldi_src:`gmm-sum-accs`
            Relevant Kaldi binary
        :kaldi_steps:`train_sat`
            Reference Kaldi script
        """
        self.log_info(
            "Creating alignment model for speaker-independent features...")
        begin = time.time()

        arguments = self.acc_stats_two_feats_arguments()
        with tqdm.tqdm(total=self.num_current_utterances,
                       disable=getattr(self, "quiet", False)) as pbar:
            if self.use_mp:
                error_dict = {}
                return_queue = mp.Queue()
                stopped = Stopped()
                procs = []
                for i, args in enumerate(arguments):
                    function = AccStatsTwoFeatsFunction(args)
                    p = KaldiProcessWorker(i, return_queue, function, stopped)
                    procs.append(p)
                    p.start()
                while True:
                    try:
                        result = return_queue.get(timeout=1)
                        if isinstance(result, Exception):
                            error_dict[getattr(result, "job_name", 0)] = result
                            continue
                        if stopped.stop_check():
                            continue
                    except Empty:
                        for proc in procs:
                            if not proc.finished.stop_check():
                                break
                        else:
                            break
                        continue
                    pbar.update(1)
                for p in procs:
                    p.join()
                if error_dict:
                    for v in error_dict.values():
                        raise v
            else:
                for args in arguments:
                    function = AccStatsTwoFeatsFunction(args)
                    for _ in function.run():
                        pbar.update(1)

        log_path = os.path.join(self.working_log_directory,
                                "align_model_est.log")
        with open(log_path, "w", encoding="utf8") as log_file:

            acc_files = []
            for x in arguments:
                acc_files.extend(x.acc_paths.values())
            sum_proc = subprocess.Popen(
                [thirdparty_binary("gmm-sum-accs"), "-"] + acc_files,
                stderr=log_file,
                stdout=subprocess.PIPE,
                env=os.environ,
            )
            est_command = [
                thirdparty_binary("gmm-est"),
                "--remove-low-count-gaussians=false",
            ]
            if not self.quick:
                est_command.append(f"--power={self.power}")
            else:
                est_command.append(
                    f"--write-occs={os.path.join(self.working_directory, 'final.occs')}"
                )
            est_command.extend([
                self.model_path,
                "-",
                self.model_path.replace(".mdl", ".alimdl"),
            ])
            est_proc = subprocess.Popen(
                est_command,
                stdin=sum_proc.stdout,
                stderr=log_file,
                env=os.environ,
            )
            est_proc.communicate()
        parse_logs(self.working_log_directory)
        if not self.debug:
            for f in acc_files:
                os.remove(f)
        self.log_debug(f"Alignment model creation took {time.time() - begin}")