def __init__( self, crp, feature_flow, feature_scorer, denominator_path, alignment_options=None, short_pauses=None, use_gpu=False, rtf=40, mem=4, # TODO check requirements extra_config=None, extra_post_config=None, ): self.set_vis_name("Accuracy Lattice") kwargs = locals() del kwargs["self"] self.config, self.post_config = self.create_config(**kwargs) self.alignment_flow = self.create_flow(**kwargs) self.exe = self.select_exe(crp.lattice_processor_exe, "lattice-processor") self.concurrent = crp.concurrent self.use_gpu = use_gpu self.log_file = self.log_file_output_path("create-accuracy", crp, True) self.single_lattice_caches = { task_id: self.output_path("accuracy.%d" % task_id, cached=True) for task_id in range(1, crp.concurrent + 1) } self.lattice_bundle = self.output_path("accuracy.bundle", cached=True) self.lattice_path = util.MultiOutputPath(self, "accuracy.$(TASK)", self.single_lattice_caches, cached=True) self.single_segmentwise_alignment_caches = { task_id: self.output_path("segmentwise-alignment.%d" % task_id, cached=True) for task_id in range(1, crp.concurrent + 1) } self.segmentwise_alignment_bundle = self.output_path( "segmentwise-alignment.bundle", cached=True) self.segmentwise_alignment_path = util.MultiOutputPath( self, "segmentwise-alignment.$(TASK)", self.single_segmentwise_alignment_caches, cached=True, ) self.rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5), "cpu": 2, "gpu": 1 if self.use_gpu else 0, "mem": mem, }
def __init__(self, crp, feature_flow, label_scorer, alignment_options, word_boundaries=False, align_node_options={}, use_gpu=False, rtf=1.0, rasr_exe=None, extra_config=None, extra_post_config=None): """ :param recipe.rasr.csp.CommonSprintParameters crp: :param feature_flow: :param rasr.FeatureScorer feature_scorer: :param dict[str] alignment_options: :param bool word_boundaries: :param bool label_aligner: :param recipe.rasr.LabelScorer label_scorer: :param dict[str] align_node_options: :param bool use_gpu: :param float rtf: :param extra_config: :param extra_post_config: """ assert label_scorer is not None, 'need label scorer for label aligner' self.set_vis_name('Alignment') kwargs = locals() del kwargs['self'] self.config, self.post_config = LabelAlignmentJob.create_config(**kwargs) self.alignment_flow = LabelAlignmentJob.create_flow(**kwargs) self.concurrent = crp.concurrent if rasr_exe is None: rasr_exe = crp.acoustic_model_trainer_exe self.exe = self.select_exe(rasr_exe, 'acoustic-model-trainer') self.use_gpu = use_gpu self.word_boundaries = word_boundaries self.out_log_file = self.log_file_output_path('alignment', crp, True) self.out_single_alignment_caches = dict((i, self.output_path('alignment.cache.%d' % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_alignment_path = util.MultiOutputPath(self, 'alignment.cache.$(TASK)', self.out_single_alignment_caches, cached=True) self.out_alignment_bundle = self.output_path('alignment.cache.bundle', cached=True) if self.word_boundaries: self.single_word_boundary_caches = dict((i, self.output_path('word_boundary.cache.%d' % i, cached=True)) for i in range(1, self.concurrent + 1)) self.word_boundary_path = util.MultiOutputPath(self, 'word_boundary.cache.$(TASK)', self.single_word_boundary_caches, cached=True) self.word_boundary_bundle = self.output_path('word_boundary.cache.bundle', cached=True) self.rqmt = { 'time' : max(rtf * crp.corpus_duration / crp.concurrent, .5), 'cpu' : 1, 'gpu' : 1 if self.use_gpu else 0, 'mem' : 2 }
def __init__( self, crp, feature_flow, label_tree, label_scorer, search_parameters=None, lm_lookahead=True, lookahead_options=None, eval_single_best=True, eval_best_in_lattice=True, use_gpu=False, rtf=2, mem=8, hard_rqmt=False, extra_config=None, extra_post_config=None, sprint_exe=None, # allow separat executable than default settings lm_gc_job=None, lm_gc_job_local=False, lm_gc_job_mem=2, lm_gc_job_default_search=False, ): # TODO set this to true later self.set_vis_name("Label Synchronized Search") kwargs = locals() del kwargs["self"] self.config, self.post_config = LabelSyncSearchJob.create_config( **kwargs) self.feature_flow = feature_flow if sprint_exe is None: sprint_exe = crp.flf_tool_exe self.exe = self.select_exe(sprint_exe, "flf-tool") self.concurrent = crp.concurrent self.use_gpu = use_gpu self.out_log_file = self.log_file_output_path("search", crp, True) self.out_single_lattice_caches = dict( (task_id, self.output_path("lattice.cache.%d" % task_id, cached=True)) for task_id in range(1, crp.concurrent + 1)) self.out_lattice_bundle = self.output_path("lattice.bundle", cached=True) self.out_lattice_path = util.MultiOutputPath( self, "lattice.cache.$(TASK)", self.out_single_lattice_caches, cached=True) self.rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 4.5), "cpu": 3, "gpu": 1 if self.use_gpu else 0, "mem": mem, } # no automatic resume with doubled rqmt self.hard_rqmt = hard_rqmt
def __init__( self, crp, feature_flow, feature_scorer, search_parameters=None, lm_lookahead=True, lookahead_options=None, use_gpu=False, rtf=30, mem=4, model_combination_config=None, model_combination_post_config=None, extra_config=None, extra_post_config=None, ): assert isinstance(feature_scorer, rasr.FeatureScorer) self.set_vis_name("Raw Denominator Lattice") kwargs = locals() del kwargs["self"] self.config, self.post_config = self.create_config(**kwargs) self.feature_flow = feature_flow self.exe = self.select_exe(crp.speech_recognizer_exe, "speech-recognizer") self.concurrent = crp.concurrent self.use_gpu = use_gpu self.log_file = self.log_file_output_path("create-raw-denominator", crp, True) self.single_lattice_caches = { task_id: self.output_path("raw-denominator.%d" % task_id, cached=True) for task_id in range(1, crp.concurrent + 1) } self.lattice_bundle = self.output_path("raw-denominator.bundle", cached=True) self.lattice_path = util.MultiOutputPath(self, "raw-denominator.$(TASK)", self.single_lattice_caches, cached=True) self.rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5), "cpu": 2, "gpu": 1 if self.use_gpu else 0, "mem": mem, }
def __init__( self, crp, feature_flow, feature_scorer, lattice_cache, *, global_scale=1.0, confidence_threshold=0.75, weight_scale=1.0, ref_alignment_path=None, use_gpu=False, rtf=0.5, extra_config=None, extra_post_config=None, ): assert isinstance(feature_scorer, rasr.FeatureScorer) self.set_vis_name("Confidence-based alignment") kwargs = locals() del kwargs["self"] self.config, self.post_config = ConfidenceBasedAlignmentJob.create_config( **kwargs) self.alignment_flow = ConfidenceBasedAlignmentJob.create_flow(**kwargs) self.concurrent = crp.concurrent self.exe = self.select_exe(crp.acoustic_model_trainer_exe, "acoustic-model-trainer") self.feature_scorer = feature_scorer self.use_gpu = use_gpu self.out_log_file = self.log_file_output_path("alignment", crp, True) self.out_single_alignment_caches = dict( (i, self.output_path("alignment.cache.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_alignment_path = util.MultiOutputPath( self, "alignment.cache.$(TASK)", self.out_single_alignment_caches, cached=True, ) self.out_alignment_bundle = self.output_path("alignment.cache.bundle", cached=True) self.rqmt = { "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5), "cpu": 1, "gpu": 1 if self.use_gpu else 0, "mem": 2, }
def __init__( self, crp, feature_energy_flow, minimum_segment_length=0, maximum_segment_length=6000, iterations=1, penalty=0, minimum_speech_proportion=0.7, save_alignment=False, keep_accumulators=False, extra_merge_args=None, extra_config=None, extra_post_config=None, ): self.set_vis_name("Linear Alignment") kwargs = locals() del kwargs["self"] super().__init__(**LinearAlignmentJob.merge_args(**kwargs)) self.config, self.post_config = LinearAlignmentJob.create_config( **kwargs) self.linear_alignment_flow = LinearAlignmentJob.create_flow(**kwargs) self.exe = self.select_exe(crp.acoustic_model_trainer_exe, "acoustic-model-trainer") self.concurrent = crp.concurrent self.save_alignment = save_alignment self.keep_accumulators = keep_accumulators self.out_log_file = self.log_file_output_path("accumulate", crp, True) if save_alignment: self.single_alignment_caches = dict( (i, self.output_path("alignment.cache.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_alignment_path = util.MultiOutputPath( self, "alignment.cache.$(TASK)", self.single_alignment_caches, cached=True, ) self.out_alignment_bundle = self.output_path( "alignment.cache.bundle") self.accumulate_rqmt = { "time": max(crp.corpus_duration / (20.0 * self.concurrent), 0.5), "cpu": 1, "mem": 1, }
def __init__( self, crp, feature_flow, feature_scorer, alignment_options=None, use_gpu=False, rtf=10.0, extra_config=None, extra_post_config=None, ): assert isinstance(feature_scorer, rasr.FeatureScorer) self.set_vis_name("NumeratorLattice") kwargs = locals() del kwargs["self"] self.config, self.post_config = NumeratorLatticeJob.create_config( **kwargs) self.alignment_flow = NumeratorLatticeJob.create_flow(**kwargs) self.exe = self.select_exe(crp.acoustic_model_trainer_exe, "acoustic-model-trainer") self.concurrent = crp.concurrent self.feature_scorer = feature_scorer self.use_gpu = use_gpu self.log_file = self.log_file_output_path("create-numerator", crp, True) self.single_lattice_caches = { i: self.output_path("numerator.%d" % i, cached=True) for i in range(1, self.concurrent + 1) } self.lattice_path = util.MultiOutputPath(self, "numerator.$(TASK)", self.single_lattice_caches, cached=True) self.lattice_bundle = self.output_path("numerator.bundle", cached=True) self.rqmt = { "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5), "cpu": 2, "gpu": 1 if self.use_gpu else 0, "mem": 2, }
def __init__( self, crp, t_matrix, ubm, features, alignment, allophones, dim, allophones_to_ignore, length_norm=True, rqmt=None, ): """ :param crp: (CommonRasrParameters) need for concurrency :param t_matrix: (HDF5File) IVectorTrainingJob.t_matrix, contains learned ubm and JFA :param ubm: (Path) to UBM trained with ubm.TrainWarpingFactorsSequence :param features: system.feature_caches['corpus']['mfcc'].hidden_paths; gone feature.cache file per i-vector, good features are mfcc, plp :param alignment: system.alignments['corpus'][''].alternatives['task_dependent'].hidden_paths; one alignment.cache file per i-vector :param allophones: system.allophone_files['base'] :param dim: (int) dimension of the i-vec, usually between 50-400 :param allophones_to_ignore: list(string) ['laughs', 'noise', 'sil', 'inaudible', 'spn'] :param length_norm: (bool) normalize i-vector to unit length :param rqmt: """ self.crp = crp self.ubm = ubm self.t_matrix = t_matrix self.features = features self.alignment = alignment self.allophones = allophones self.dim = dim self.allophones_to_ignore = allophones_to_ignore self.length_norm = length_norm self.concurrent = crp.concurrent self.rqmt = rqmt if rqmt else {"time": 1, "cpu": 1, "gpu": 0, "mem": 1} self.single_ivec_caches = dict( (i, self.output_path("ivec.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.ivec_path = util.MultiOutputPath(self, "ivec.$(TASK)", self.single_ivec_caches, cached=True)
def __init__( self, crp, lattice_path, pruning_threshold=100, phone_coverage=0, nonword_phones="[*", max_arcs_per_second=50000, max_arcs_per_segment=1000000, output_format="flf", pronunciation_scale=None, extra_config=None, extra_post_config=None, ): self.set_vis_name("Lattice Pruning") kwargs = locals() del kwargs["self"] self.config, self.post_config = self.create_config(**kwargs) self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool") self.concurrent = crp.concurrent self.out_log_file = self.log_file_output_path("pruning", crp, True) self.out_single_lattice_caches = dict( ( task_id, self.output_path("pruned_lattice.cache.%d" % task_id, cached=True), ) for task_id in range(1, crp.concurrent + 1) ) self.out_lattice_bundle = self.output_path("pruned_lattice.bundle", cached=True) self.out_lattice_path = util.MultiOutputPath( self, "pruned_lattice.cache.$(TASK)", self.out_single_lattice_caches, cached=True, ) self.rqmt = { "time": max(crp.corpus_duration * 0.2 / crp.concurrent, 0.5), "cpu": 1, "gpu": 0, "mem": 2.0, }
def __init__( self, crp, ubm, features, alignment, allophones, dim, allophones_to_ignore, iter=10, rqmt=None, ): """ :param crp: (CommonRasrParameters) need for concurrency :param ubm: (Path) to UBM trained with ubm.TrainWarpingFactorsSequence :param features: system.feature_caches['corpus']['mfcc'].hidden_paths; gone feature.cache file per i-vector, good features are mfcc, plp :param alignment: system.alignments['corpus'][''].alternatives['task_dependent'].hidden_paths; one alignment.cache file per i-vector :param allophones: system.allophone_files['base'] :param dim: (int) dimension of the i-vec, usually between 50-400 :param allophones_to_ignore: list(string) ['laughs', 'noise', 'sil', 'inaudible', 'spn'] :param iter: (int) number of em iterations during ivector training :param rqmt: """ self.crp = crp self.ubm = ubm self.features = features self.alignment = alignment self.allophones = allophones self.dim = dim self.allophones_to_ignore = allophones_to_ignore self.iter = iter self.concurrent = crp.concurrent self.rqmt = rqmt if rqmt else {"time": 1, "cpu": 1, "gpu": 0, "mem": 1} self.single_accu_caches = dict( (i, self.output_path("accu.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.accu_path = util.MultiOutputPath(self, "accu.$(TASK)", self.single_accu_caches, cached=True) self.t_matrix = self.output_path("t.matrix")
def __init__( self, crp, raw_denominator_path, numerator_path, use_gpu=False, rtf=1, mem=4, # TODO check requirements search_options=None, extra_config=None, extra_post_config=None, ): self.set_vis_name("Denominator Lattice") kwargs = locals() del kwargs["self"] self.config, self.post_config = self.create_config(**kwargs) self.exe = self.select_exe(crp.lattice_processor_exe, "lattice-processor") self.concurrent = crp.concurrent self.use_gpu = use_gpu self.log_file = self.log_file_output_path("create-denominator", crp, True) self.single_lattice_caches = { task_id: self.output_path("denominator.%d" % task_id, cached=True) for task_id in range(1, crp.concurrent + 1) } self.lattice_bundle = self.output_path("denominator.bundle", cached=True) self.lattice_path = util.MultiOutputPath(self, "denominator.$(TASK)", self.single_lattice_caches, cached=True) self.rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5), "cpu": 2, "gpu": 1 if self.use_gpu else 0, "mem": mem, }
def __init__( self, crp, lattice_path, lm_scale, pron_scale=1.0, write_cn=False, extra_config=None, extra_post_config=None, ): self.set_vis_name("CN decoding") kwargs = locals() del kwargs["self"] self.config, self.post_config = self.create_config(**kwargs) self.exe = self.select_exe(crp.flf_tool_exe, "flf-tool") self.concurrent = crp.concurrent self.write_cn = write_cn self.out_log_file = self.log_file_output_path("cn_decoding", crp, True) self.out_single_lattice_caches = dict(( task_id, self.output_path("confusion_lattice.cache.%d" % task_id, cached=True), ) for task_id in range(1, crp.concurrent + 1)) self.out_ctm_file = self.output_path("lattice.ctm") if self.write_cn: self.out_lattice_bundle = self.output_path( "confusion_lattice.bundle", cached=True) self.out_lattice_path = util.MultiOutputPath( self, "confusion_lattice.cache.$(TASK)", self.out_single_lattice_caches, cached=True, ) self.rqmt = { "time": max(crp.corpus_duration * 0.2 / crp.concurrent, 0.5), "cpu": 1, "gpu": 0, "mem": 2.0, }
def __init__( self, crp, feature_flow, original_alignment, extra_config=None, extra_post_config=None, ): self.set_vis_name("Dump Alignment") kwargs = locals() del kwargs["self"] self.config, self.post_config = DumpAlignmentJob.create_config( **kwargs) self.dump_flow = DumpAlignmentJob.create_flow(**kwargs) self.exe = self.select_exe(crp.acoustic_model_trainer_exe, "acoustic-model-trainer") self.concurrent = crp.concurrent self.out_log_file = self.log_file_output_path("dump", crp, True) self.out_single_alignment_caches = dict( (i, self.output_path("alignment.cache.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_alignment_path = util.MultiOutputPath( self, "alignment.cache.$(TASK)", self.out_single_alignment_caches, cached=True, ) self.out_alignment_bundle = self.output_path("alignment.cache.bundle", cached=True) self.rqmt = { "time": max(crp.corpus_duration / (50.0 * crp.concurrent), 0.5), "cpu": 1, "mem": 1, }
def __init__( self, crp, timestamp_flow, *, samples_flow=None, min_length=0.5, timestamp_port="features", extract_concurrent=4, rtf=0.1, mem=2.0, extra_dump_config=None, extra_dump_post_config=None, extra_convert_config=None, extra_convert_post_config=None, ): kwargs = locals() del kwargs["self"] self.min_length = min_length self.extract_concurrent = extract_concurrent self.dump_config, self.dump_post_config = self.create_dump_config( **kwargs) self.dump_flow = self.create_dump_flow(**kwargs) self.convert_config, self.convert_post_config = self.create_convert_config( **kwargs) self.convert_flow = self.create_convert_flow(**kwargs) self.exe = (crp.feature_extraction_exe if crp.feature_extraction_exe is not None else self.default_exe("feature-extraction")) self.concurrent = crp.concurrent self.out_dump_log_file = self.log_file_output_path("dump", crp, True) self.out_convert_log_file = self.log_file_output_path( "convert", crp, True) self.out_single_feature_caches = dict( (task_id, self.output_path("tone.cache.%d" % task_id, cached=True)) for task_id in range(1, crp.concurrent + 1)) self.out_feature_bundle = self.output_path("tone.cache.bundle", cached=True) self.out_feature_path = util.MultiOutputPath( self, "tone.cache.$(TASK)", self.out_single_feature_caches, cached=True) self.dump_rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5), "cpu": 1, "mem": mem, } self.extract_pitch_rqmt = { "time": max(crp.corpus_duration * rtf / self.extract_concurrent, 0.5), "cpu": extract_concurrent, "mem": mem, } self.convert_rqmt = { "time": max(crp.corpus_duration * rtf / crp.concurrent, 0.5), "cpu": 1, "mem": mem, }
def __init__( self, crp, feature_flow, feature_scorer, alignment_options=None, word_boundaries=False, use_gpu=False, rtf=1.0, extra_config=None, extra_post_config=None, ): """ :param rasr.crp.CommonRasrParameters crp: :param feature_flow: :param rasr.FeatureScorer feature_scorer: :param dict[str] alignment_options: :param bool word_boundaries: :param bool use_gpu: :param float rtf: :param extra_config: :param extra_post_config: """ assert isinstance(feature_scorer, rasr.FeatureScorer) self.set_vis_name("Alignment") kwargs = locals() del kwargs["self"] self.config, self.post_config = AlignmentJob.create_config(**kwargs) self.alignment_flow = AlignmentJob.create_flow(**kwargs) self.concurrent = crp.concurrent self.exe = self.select_exe(crp.acoustic_model_trainer_exe, "acoustic-model-trainer") self.feature_scorer = feature_scorer self.use_gpu = use_gpu self.word_boundaries = word_boundaries self.out_log_file = self.log_file_output_path("alignment", crp, True) self.out_single_alignment_caches = dict( (i, self.output_path("alignment.cache.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_alignment_path = util.MultiOutputPath( self, "alignment.cache.$(TASK)", self.out_single_alignment_caches, cached=True, ) self.out_alignment_bundle = self.output_path("alignment.cache.bundle", cached=True) if self.word_boundaries: self.out_single_word_boundary_caches = dict( (i, self.output_path("word_boundary.cache.%d" % i, cached=True)) for i in range(1, self.concurrent + 1)) self.out_word_boundary_path = util.MultiOutputPath( self, "word_boundary.cache.$(TASK)", self.out_single_word_boundary_caches, cached=True, ) self.out_word_boundary_bundle = self.output_path( "word_boundary.cache.bundle", cached=True) self.rqmt = { "time": max(rtf * crp.corpus_duration / crp.concurrent, 0.5), "cpu": 1, "gpu": 1 if self.use_gpu else 0, "mem": 2, }