def adapt(self) -> None: """Run the adaptation""" self.setup() dirty_path = os.path.join(self.working_directory, "dirty") done_path = os.path.join(self.working_directory, "done") if os.path.exists(done_path): self.log_info("Adaptation already done, skipping.") return self.log_info("Generating initial alignments...") for f in ["final.mdl", "final.alimdl"]: p = os.path.join(self.working_directory, f) if not os.path.exists(p): continue os.rename( p, os.path.join(self.working_directory, f.replace("final", "unadapted"))) self.align() os.makedirs(self.align_directory, exist_ok=True) try: self.log_info("Adapting pretrained model...") self.train_map() self.export_model( os.path.join(self.working_log_directory, "acoustic_model.zip")) shutil.copyfile( os.path.join(self.working_directory, "final.mdl"), os.path.join(self.align_directory, "final.mdl"), ) shutil.copyfile( os.path.join(self.working_directory, "final.occs"), os.path.join(self.align_directory, "final.occs"), ) shutil.copyfile( os.path.join(self.working_directory, "tree"), os.path.join(self.align_directory, "tree"), ) if os.path.exists( os.path.join(self.working_directory, "final.alimdl")): shutil.copyfile( os.path.join(self.working_directory, "final.alimdl"), os.path.join(self.align_directory, "final.alimdl"), ) if os.path.exists(os.path.join(self.working_directory, "lda.mat")): shutil.copyfile( os.path.join(self.working_directory, "lda.mat"), os.path.join(self.align_directory, "lda.mat"), ) self.adaptation_done = True except Exception as e: with open(dirty_path, "w"): pass if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise with open(done_path, "w"): pass
def segment(self) -> None: """ Performs VAD and segmentation into utterances Raises ------ :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError` If there were any errors in running Kaldi binaries """ self.setup() log_directory = os.path.join(self.working_directory, "log") done_path = os.path.join(self.working_directory, "done") if os.path.exists(done_path): self.log_info("Classification already done, skipping.") return try: self.compute_vad() self.uses_vad = True self.segment_vad() parse_logs(log_directory) except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise with open(done_path, "w"): pass
def align(self) -> None: """ Multiprocessing function that aligns based on the current model. See Also -------- :class:`~montreal_forced_aligner.alignment.multiprocessing.AlignFunction` Multiprocessing helper function for each job :meth:`.AlignMixin.align_arguments` Job method for generating arguments for the helper function :kaldi_steps:`align_si` Reference Kaldi script :kaldi_steps:`align_fmllr` Reference Kaldi script """ done_path = os.path.join(self.working_directory, "done") if os.path.exists(done_path): self.log_debug( f"Skipping {self.current_aligner.identifier} alignments") return try: self.current_acoustic_model.export_model(self.working_directory) self.speaker_independent = True self.compile_train_graphs() self.align_utterances() if self.current_acoustic_model.meta["features"][ "uses_speaker_adaptation"]: arguments = self.calc_fmllr_arguments() missing_transforms = False for arg in arguments: for path in arg.trans_paths.values(): if not os.path.exists(path): missing_transforms = True if missing_transforms: assert self.alignment_model_path.endswith(".alimdl") self.calc_fmllr() self.speaker_independent = False assert self.alignment_model_path.endswith(".mdl") self.align_utterances() if self.current_subset: self.log_debug( f"Analyzing alignment diagnostics for {self.current_aligner.identifier} on {self.current_subset} utterances" ) else: self.log_debug( f"Analyzing alignment diagnostics for {self.current_aligner.identifier} on the full corpus" ) self.compile_information() with open(done_path, "w"): pass except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise
def setup(self) -> None: """Setup segmentation""" self.check_previous_run() log_dir = os.path.join(self.working_directory, "log") os.makedirs(log_dir, exist_ok=True) try: self.load_corpus() except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise
def setup(self) -> None: """Setup ivector extractor training""" if self.initialized: return self.check_previous_run() try: self.load_corpus() except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise self.initialized = True
def initialize_training(self) -> None: """Initialize training""" begin = time.time() dirty_path = os.path.join(self.working_directory, "dirty") done_path = os.path.join(self.working_directory, "done") self.log_info(f"Initializing training for {self.identifier}...") if self.subset and self.subset >= self.worker.num_utterances: self.log_warning("Subset specified is larger than the dataset, " "using full corpus for this training block.") self.subset = 0 self.worker.current_subset = 0 try: self._trainer_initialization() except Exception as e: with open(dirty_path, "w"): pass if isinstance(e, KaldiProcessingError): logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise self.iteration = 1 self.worker.current_trainer = self self.compute_calculated_properties() self.current_gaussians = self.initial_gaussians if self.initialized: self.log_info( f"{self.identifier} training already initialized, skipping initialization." ) if os.path.exists(done_path): self.training_complete = True return if os.path.exists( dirty_path): # if there was an error, let's redo from scratch shutil.rmtree(self.working_directory) os.makedirs(self.working_log_directory, exist_ok=True) self.log_info("Initialization complete!") self.log_debug( f"Initialization for {self.identifier} took {time.time() - begin} seconds" )
def setup(self) -> None: """Setup for acoustic model training""" if self.initialized: return self.check_previous_run() try: self.load_corpus() self.write_training_information() for config in self.training_configs.values(): if isinstance(config, str): continue config.non_silence_phones = self.non_silence_phones except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise self.initialized = True
def finalize_training(self) -> None: """ Finalize training and create a speaker independent model for initial alignment Raises ------ :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError` If there were any errors in running Kaldi binaries """ try: self.create_align_model() self.uses_speaker_adaptation = True super().finalize_training() assert self.alignment_model_path.endswith("final.alimdl") assert os.path.exists(self.alignment_model_path) except Exception as e: if isinstance(e, KaldiProcessingError): import logging logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise
def train(self) -> None: """ Train the model Raises ------ :class:`~montreal_forced_aligner.exceptions.KaldiProcessingError` If there were any errors in running Kaldi binaries """ done_path = os.path.join(self.working_directory, "done") dirty_path = os.path.join(self.working_directory, "dirty") os.makedirs(self.working_log_directory, exist_ok=True) try: self.initialize_training() if self.training_complete: return begin = time.time() for iteration in range(1, self.num_iterations + 1): self.log_info( f"{self.identifier} - Iteration {iteration} of {self.num_iterations}" ) self.iteration = iteration self.train_iteration() self.finalize_training() except Exception as e: with open(dirty_path, "w"): pass if isinstance(e, KaldiProcessingError): logger = logging.getLogger(self.identifier) log_kaldi_errors(e.error_logs, logger) e.update_log_file(logger) raise with open(done_path, "w"): pass self.log_info("Training complete!") self.log_debug(f"Training took {time.time() - begin} seconds")