def _trainer_initialization(self) -> None: """Ivector extractor training initialization""" self.iteration = 1 self.training_complete = False # Initialize job_name-vector extractor log_directory = os.path.join(self.working_directory, "log") log_path = os.path.join(log_directory, "init.log") diag_ubm_path = os.path.join(self.working_directory, "final.dubm") full_ubm_path = os.path.join(self.working_directory, "final.ubm") with open(log_path, "w") as log_file: subprocess.call( [ thirdparty_binary("gmm-global-to-fgmm"), diag_ubm_path, full_ubm_path ], stderr=log_file, ) subprocess.call( [ thirdparty_binary("ivector-extractor-init"), f"--ivector-dim={self.ivector_dimension}", "--use-weights=false", full_ubm_path, self.ie_path, ], stderr=log_file, ) # Do Gaussian selection and posterior extraction self.gauss_to_post() parse_logs(log_directory)
def segment(self) -> None: """ Performs VAD and segmentation into utterances Raises ------ :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError` If there were any errors in running Kaldi binaries """ self.setup() log_directory = os.path.join(self.working_directory, "log") done_path = os.path.join(self.working_directory, "done") if os.path.exists(done_path): self.log_info("Classification already done, skipping.") return try: self.compute_vad() self.uses_vad = True self.segment_vad() parse_logs(log_directory) except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise with open(done_path, "w"): pass
def _trainer_initialization(self) -> None: """Speaker adapted training initialization""" if self.initialized: self.speaker_independent = False self.worker.speaker_independent = False return if os.path.exists( os.path.join(self.previous_aligner.working_directory, "lda.mat")): shutil.copyfile( os.path.join(self.previous_aligner.working_directory, "lda.mat"), os.path.join(self.working_directory, "lda.mat"), ) for j in self.jobs: if not j.has_data: continue for path in j.construct_path_dictionary( self.previous_aligner.working_directory, "trans", "ark").values(): if os.path.exists(path): break else: continue break else: self.speaker_independent = True self.worker.speaker_independent = True self.calc_fmllr() self.speaker_independent = False self.worker.speaker_independent = False for j in self.jobs: if not j.has_data: continue transform_paths = j.construct_path_dictionary( self.previous_aligner.working_directory, "trans", "ark") output_paths = j.construct_path_dictionary(self.working_directory, "trans", "ark") for k, path in transform_paths.items(): shutil.copy(path, output_paths[k]) self.tree_stats() self._setup_tree(init_from_previous=self.quick, initial_mix_up=self.quick) self.convert_alignments() self.compile_train_graphs() os.rename(self.model_path, self.next_model_path) self.iteration = 1 parse_logs(self.working_log_directory)
def train_iteration(self) -> None: """Perform an iteration of training""" if os.path.exists(self.next_model_path): self.iteration += 1 if self.iteration <= self.final_gaussian_iteration: self.increment_gaussians() return if self.iteration in self.realignment_iterations: self.align_iteration() self.acc_stats() parse_logs(self.working_log_directory) if self.iteration <= self.final_gaussian_iteration: self.increment_gaussians() self.iteration += 1
def train_iteration(self) -> None: """ Run a single LDA training iteration """ if os.path.exists(self.next_model_path): return if self.iteration in self.realignment_iterations: self.align_iteration() if self.iteration in self.mllt_iterations: self.calc_lda_mllt() self.acc_stats() parse_logs(self.working_log_directory) if self.iteration <= self.final_gaussian_iteration: self.increment_gaussians() self.iteration += 1
def _trainer_initialization( self, initial_alignment_directory: Optional[str] = None) -> None: """DUBM training initialization""" # Initialize model from E-M in memory log_directory = os.path.join(self.working_directory, "log") if initial_alignment_directory and os.path.exists( initial_alignment_directory): jobs = self.align_arguments() for j in jobs: for p in j.ali_paths.values(): shutil.copyfile( p.replace(self.working_directory, initial_alignment_directory), p) shutil.copyfile( os.path.join(initial_alignment_directory, "final.mdl"), os.path.join(self.working_directory, "final.mdl"), ) num_gauss_init = int(self.initial_gaussian_proportion * int(self.num_gaussians)) log_path = os.path.join(log_directory, "gmm_init.log") feature_string = self.construct_base_feature_string(all_feats=True) self.iteration = 1 with open(log_path, "w") as log_file: gmm_init_proc = subprocess.Popen( [ thirdparty_binary("gmm-global-init-from-feats"), f"--num-threads={self.worker.num_jobs}", f"--num-frames={self.num_frames}", f"--num_gauss={self.num_gaussians}", f"--num_gauss_init={num_gauss_init}", f"--num_iters={self.num_iterations_init}", feature_string, self.model_path, ], stderr=log_file, ) gmm_init_proc.communicate() # Store Gaussian selection indices on disk self.gmm_gselect() parse_logs(log_directory)
def _setup_tree(self, init_from_previous=False, initial_mix_up=True) -> None: """ Set up the tree for the triphone model Raises ------ :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError` If there were any errors in running Kaldi binaries """ log_path = os.path.join(self.working_log_directory, "questions.log") tree_path = os.path.join(self.working_directory, "tree") treeacc_path = os.path.join(self.working_directory, "treeacc") sets_int_path = os.path.join(self.worker.phones_dir, "sets.int") roots_int_path = os.path.join(self.worker.phones_dir, "roots.int") extra_question_int_path = os.path.join(self.worker.phones_dir, "extra_questions.int") topo_path = self.worker.topo_path questions_path = os.path.join(self.working_directory, "questions.int") questions_qst_path = os.path.join(self.working_directory, "questions.qst") with open(log_path, "w") as log_file: subprocess.call( [ thirdparty_binary("cluster-phones"), treeacc_path, sets_int_path, questions_path, ], stderr=log_file, ) with open(extra_question_int_path, "r") as inf, open(questions_path, "a") as outf: for line in inf: outf.write(line) log_path = os.path.join(self.working_log_directory, "compile_questions.log") with open(log_path, "w") as log_file: subprocess.call( [ thirdparty_binary("compile-questions"), topo_path, questions_path, questions_qst_path, ], stderr=log_file, ) log_path = os.path.join(self.working_log_directory, "build_tree.log") with open(log_path, "w") as log_file: subprocess.call( [ thirdparty_binary("build-tree"), "--verbose=1", f"--max-leaves={self.num_leaves}", f"--cluster-thresh={self.cluster_threshold}", treeacc_path, roots_int_path, questions_qst_path, topo_path, tree_path, ], stderr=log_file, ) log_path = os.path.join(self.working_log_directory, "init_model.log") occs_path = os.path.join(self.working_directory, "0.occs") mdl_path = self.model_path if init_from_previous: command = [ thirdparty_binary("gmm-init-model"), f"--write-occs={occs_path}", tree_path, treeacc_path, topo_path, mdl_path, os.path.join(self.previous_aligner.working_directory, "tree"), os.path.join(self.previous_aligner.working_directory, "final.mdl"), ] else: command = [ thirdparty_binary("gmm-init-model"), f"--write-occs={occs_path}", tree_path, treeacc_path, topo_path, mdl_path, ] with open(log_path, "w") as log_file: subprocess.call(command, stderr=log_file) if initial_mix_up: if init_from_previous: command = [ thirdparty_binary("gmm-mixup"), f"--mix-up={self.initial_gaussians}", f"--mix-down={self.initial_gaussians}", mdl_path, occs_path, mdl_path, ] else: command = [ thirdparty_binary("gmm-mixup"), f"--mix-up={self.initial_gaussians}", mdl_path, occs_path, mdl_path, ] log_path = os.path.join(self.working_log_directory, "mixup.log") with open(log_path, "w") as log_file: subprocess.call(command, stderr=log_file) os.remove(treeacc_path) os.rename(occs_path, self.next_occs_path) parse_logs(self.working_log_directory)
def create_align_model(self) -> None: """ Create alignment model for speaker-adapted training that will use speaker-independent features in later aligning. See Also -------- :func:`~montreal_forced_aligner.acoustic_modeling.sat.AccStatsTwoFeatsFunction` Multiprocessing helper function for each job :meth:`.SatTrainer.acc_stats_two_feats_arguments` Job method for generating arguments for the helper function :kaldi_src:`gmm-est` Relevant Kaldi binary :kaldi_src:`gmm-sum-accs` Relevant Kaldi binary :kaldi_steps:`train_sat` Reference Kaldi script """ self.log_info( "Creating alignment model for speaker-independent features...") begin = time.time() arguments = self.acc_stats_two_feats_arguments() with tqdm.tqdm(total=self.num_current_utterances, disable=getattr(self, "quiet", False)) as pbar: if self.use_mp: error_dict = {} return_queue = mp.Queue() stopped = Stopped() procs = [] for i, args in enumerate(arguments): function = AccStatsTwoFeatsFunction(args) p = KaldiProcessWorker(i, return_queue, function, stopped) procs.append(p) p.start() while True: try: result = return_queue.get(timeout=1) if isinstance(result, Exception): error_dict[getattr(result, "job_name", 0)] = result continue if stopped.stop_check(): continue except Empty: for proc in procs: if not proc.finished.stop_check(): break else: break continue pbar.update(1) for p in procs: p.join() if error_dict: for v in error_dict.values(): raise v else: for args in arguments: function = AccStatsTwoFeatsFunction(args) for _ in function.run(): pbar.update(1) log_path = os.path.join(self.working_log_directory, "align_model_est.log") with open(log_path, "w", encoding="utf8") as log_file: acc_files = [] for x in arguments: acc_files.extend(x.acc_paths.values()) sum_proc = subprocess.Popen( [thirdparty_binary("gmm-sum-accs"), "-"] + acc_files, stderr=log_file, stdout=subprocess.PIPE, env=os.environ, ) est_command = [ thirdparty_binary("gmm-est"), "--remove-low-count-gaussians=false", ] if not self.quick: est_command.append(f"--power={self.power}") else: est_command.append( f"--write-occs={os.path.join(self.working_directory, 'final.occs')}" ) est_command.extend([ self.model_path, "-", self.model_path.replace(".mdl", ".alimdl"), ]) est_proc = subprocess.Popen( est_command, stdin=sum_proc.stdout, stderr=log_file, env=os.environ, ) est_proc.communicate() parse_logs(self.working_log_directory) if not self.debug: for f in acc_files: os.remove(f) self.log_debug(f"Alignment model creation took {time.time() - begin}")