def create_config(cls, crp, eval_recordings, eval_lm, extra_config, extra_post_config): config = rasr.RasrConfig() post_config = rasr.RasrConfig() config._update(crp.log_config) post_config._update(crp.log_post_config) config.costa.statistics.corpus = crp.corpus_config post_config.costa.statistics.corpus = crp.corpus_post_config config.costa.statistics.evaluate_recordings = eval_recordings if eval_recordings: config.costa.statistics.feature_extraction.file = "audio.flow" config.costa.lexical_statistics = crp.lexicon_config is not None config.costa.statistics.lexicon = crp.lexicon_config post_config.costa.statistics.lexicon = crp.lexicon_post_config config.costa.lm_statistics = (crp.language_model_config is not None and crp.lexicon_config is not None and eval_lm) config.costa.statistics.lm = crp.language_model_config post_config.costa.statistics.lm = crp.language_model_post_config if extra_config is not None: config._update(extra_config) if extra_post_config is not None: config._update(extra_post_config) return config, post_config
def build_crp( self, am_args, corpus_object, concurrent, segment_path, lexicon_args, cart_tree_path=None, allophone_file=None, lm_args=None, ): """ constructs and returns a CommonRasrParameters from the given settings and files """ crp = rasr.CommonRasrParameters() rasr.crp_add_default_output(crp) crp.acoustic_model_config = am.acoustic_model_config(**am_args) rasr.crp_set_corpus(crp, corpus_object) crp.concurrent = concurrent crp.segment_path = segment_path crp.lexicon_config = rasr.RasrConfig() crp.lexicon_config.file = lexicon_args["filename"] crp.lexicon_config.normalize_pronunciation = lexicon_args[ "normalize_pronunciation"] if "add_from_lexicon" in lexicon_args: crp.acoustic_model_config.allophones.add_from_lexicon = lexicon_args[ "add_from_lexicon"] if "add_all" in lexicon_args: crp.acoustic_model_config.allophones.add_all = lexicon_args[ "add_all"] if cart_tree_path is not None: crp.acoustic_model_config.state_tying.type = "cart" crp.acoustic_model_config.state_tying.file = cart_tree_path if lm_args is not None: crp.language_model_config = rasr.RasrConfig() crp.language_model_config.type = lm_args["type"] crp.language_model_config.file = lm_args["filename"] crp.language_model_config.scale = lm_args["scale"] if allophone_file is not None: crp.acoustic_model_config.allophones.add_from_file = allophone_file self.crp = crp
def lattice_combined_recognition(self): for epoch in self.epochs: lattice_bundles = [] epoch_name = "{}_epoch.{}".format(self.name, epoch) print("{}: Adding recognition for epoch {}".format( self.name, epoch_name)) for key, segment in self.single_segments.items(): eval_corpus = copy.deepcopy(self.corpus) eval_corpus.out_segment_path = segment eval_corpus.concurrent = 1 bundled_flow = self.recog_args["feature_flow"] bundled_flow.flags["cache_mode"] = "bundle" scorer_name = "{}_{}".format(epoch_name, key) returnn_scorer = rasr.ReturnnScorer( feature_dimension=self.scorer_args["feature_dimension"], output_dimension=self.scorer_args["output_dimension"], prior_mixtures=self.scorer_args["prior_mixtures"], model=self.models[key][epoch], prior_scale=self.scorer_args["prior_scale"], prior_file=None, ) eval_corpus.language_model_config.scale = self.recog_args[ "lm_scale"] model_combination_config = rasr.RasrConfig() model_combination_config.pronunciation_scale = self.recog_args[ "pronunciation_scale"] rec = recog.AdvancedTreeSearchJob( crp=eval_corpus, feature_flow=bundled_flow, feature_scorer=returnn_scorer, model_combination_config=model_combination_config, ) rec.keep_value(self.recognition_keep_value) rec.set_vis_name("Recog %s" % scorer_name) self.jobs["recog_%s" % scorer_name] = rec lattice_bundles.append(rec.out_lattice_bundle) m = MergeFilesJob(lattice_bundles) self.jobs["lat2ctm_%s" % epoch_name] = lat2ctm = recog.LatticeToCtmJob( crp=self.corpus, lattice_cache=m.out_file, parallelize=False) self.ctm_files["recog_%s" % epoch_name] = lat2ctm.out_ctm_file kwargs = copy.deepcopy(self.wer_scorer_args) kwargs["hyp"] = lat2ctm.out_ctm_file scorer = self.wer_scorer(**kwargs) self.jobs["scorer_%s" % epoch_name] = scorer self.scorers[epoch_name] = scorer tk.register_output("recog_%s.reports" % epoch_name, scorer.report_dir) self.add_input(scorer.report_dir)
def create_config(cls, crp, feature_flow, alignment_options, word_boundaries, label_scorer, align_node_options, extra_config, extra_post_config, **kwargs): """ :param recipe.rasr.csp.CommonSprintParameters csp: :param feature_flow: :param rasr.FeatureScorer feature_scorer: :param dict[str] alignment_options: :param bool word_boundaries: :param recipe.rasr.LabelScorer label_scorer: :param dict[str] align_node_options: :param extra_config: :param extra_post_config: :return: config, post_config :rtype: (rasr.SprintConfig, rasr.SprintConfig) """ alignment_flow = cls.create_flow(feature_flow) align_node = 'speech-label-alignment' assert label_scorer is not None, 'need label scorer for label aligner' # acoustic model + lexicon for the flow nodes mapping = { 'corpus' : 'acoustic-model-trainer.corpus', 'lexicon' : [], 'acoustic_model': [] } for node in alignment_flow.get_node_names_by_filter(align_node): mapping['lexicon'] .append('acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.lexicon' % node) mapping['acoustic_model'].append('acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.acoustic-model' % node) config, post_config = rasr.build_config_from_mapping(crp, mapping, parallelize=True) # alignment options for the flow nodes alignopt = {} if alignment_options is not None: alignopt.update(alignment_options) for node in alignment_flow.get_node_names_by_filter(align_node): node_config = config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction[node] # alignment node option for k, v in align_node_options.items(): node_config[k] = v # alinger search option node_config.aligner = rasr.RasrConfig() for k, v in alignopt.items(): node_config.aligner[k] = v # scorer label_scorer.apply_config('label-scorer', node_config, node_config) alignment_flow.apply_config('acoustic-model-trainer.aligning-feature-extractor.feature-extraction', config, post_config) config.action = 'dry' config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction.file = 'alignment.flow' post_config['*'].allow_overwrite = True config._update(extra_config) post_config._update(extra_post_config) return config, post_config
def acoustic_model_config( state_tying="monophone", states_per_phone=3, state_repetitions=1, across_word_model=True, early_recombination=False, tdp_scale=1.0, tdp_transition=(3.0, 0.0, 3.0, 2.0), tdp_silence=(0.0, 3.0, "infinity", 6.0), tying_type="global", nonword_phones="", tdp_nonword=(0.0, 3.0, "infinity", 6.0), ): config = rasr.RasrConfig() config.state_tying.type = state_tying config.allophones.add_from_lexicon = True config.allophones.add_all = False config.hmm.states_per_phone = states_per_phone config.hmm.state_repetitions = state_repetitions config.hmm.across_word_model = across_word_model config.hmm.early_recombination = early_recombination config.tdp.scale = tdp_scale config.tdp["*"].loop = tdp_transition[0] config.tdp["*"].forward = tdp_transition[1] config.tdp["*"].skip = tdp_transition[2] config.tdp["*"].exit = tdp_transition[3] config.tdp.silence.loop = tdp_silence[0] config.tdp.silence.forward = tdp_silence[1] config.tdp.silence.skip = tdp_silence[2] config.tdp.silence.exit = tdp_silence[3] config.tdp["entry-m1"].loop = "infinity" config.tdp["entry-m2"].loop = "infinity" if tying_type == "global-and-nonword": config.tdp.tying_type = "global-and-nonword" config.tdp.nonword_phones = nonword_phones for nw in [0, 1]: k = "nonword-%d" % nw config.tdp[k].loop = tdp_nonword[0] config.tdp[k].forward = tdp_nonword[1] config.tdp[k].skip = tdp_nonword[2] config.tdp[k].exit = tdp_nonword[3] return config
def _init_lexicon(self, corpus_key: str, filename: Path, normalize_pronunciation: bool, **kwargs): """ TODO: docstring :param corpus_key: :param filename: :param normalize_pronunciation: :param kwargs: :return: """ self.crp[corpus_key].lexicon_config = rasr.RasrConfig() self.crp[corpus_key].lexicon_config.file = filename self.crp[ corpus_key].lexicon_config.normalize_pronunciation = normalize_pronunciation
def _init_lm(self, corpus_key: str, filename: Path, type: str, scale: int, **kwargs): """ TODO: docstring :param corpus_key: :param filename: :param type: :param scale: :param kwargs: :return: """ self.crp[corpus_key].language_model_config = rasr.RasrConfig() self.crp[corpus_key].language_model_config.type = type self.crp[corpus_key].language_model_config.file = filename self.crp[corpus_key].language_model_config.scale = scale
def get_tf_flow( checkpoint_path: Union[Path, returnn.Checkpoint], tf_graph_path: Path, returnn_op_path: Path, forward_output_layer: str = "output", tf_fwd_input_name: str = "tf-fwd-input", ): """ Create flow network and config for the tf-fwd node :param Path checkpoint_path: RETURNN model checkpoint which should be loaded :param Path tf_graph_path: compiled tf graph for the model :param Path returnn_op_path: path to native lstm library :param str forward_output_layer: name of layer whose output is used :param str tf_fwd_input_name: tf flow node input name. see: add_tf_flow_base_flow() :rtype: FlowNetwork """ input_name = tf_fwd_input_name tf_flow = rasr.FlowNetwork() tf_flow.add_input(input_name) tf_flow.add_output("features") tf_flow.add_param("id") tf_fwd = tf_flow.add_node("tensorflow-forward", "tf-fwd", {"id": "$(id)"}) tf_flow.link(f"network:{input_name}", tf_fwd + ":input") tf_flow.link(tf_fwd + ":log-posteriors", "network:features") tf_flow.config = rasr.RasrConfig() tf_flow.config[tf_fwd].input_map.info_0.param_name = "input" tf_flow.config[ tf_fwd].input_map.info_0.tensor_name = "extern_data/placeholders/data/data" tf_flow.config[tf_fwd].input_map.info_0.seq_length_tensor_name = ( "extern_data/placeholders/data/data_dim0_size") tf_flow.config[tf_fwd].output_map.info_0.param_name = "log-posteriors" tf_flow.config[ tf_fwd].output_map.info_0.tensor_name = f"{forward_output_layer}/output_batch_major" tf_flow.config[tf_fwd].loader.type = "meta" tf_flow.config[tf_fwd].loader.meta_graph_file = tf_graph_path tf_flow.config[tf_fwd].loader.saved_model_file = checkpoint_path tf_flow.config[tf_fwd].loader.required_libraries = returnn_op_path return tf_flow
def store_allophones(self, source_corpus, target_corpus="base", **kwargs): """ dump allophones into a file :param str source_corpus: :param str target_corpus: :param kwargs: :return: """ self.jobs[target_corpus]["allophones"] = lexicon.StoreAllophonesJob( self.crp[source_corpus], **kwargs) # noinspection PyUnresolvedReferences self.allophone_files[target_corpus] = self.jobs[target_corpus][ "allophones"].out_allophone_file if self.crp[target_corpus].acoustic_model_post_config is None: self.crp[ target_corpus].acoustic_model_post_config = rasr.RasrConfig() self.crp[ target_corpus].acoustic_model_post_config.allophones.add_from_file = self.allophone_files[ target_corpus]
def recognized_warping_factor_flow( feature_net, alphas_file, mixtures, filterbank_node="filterbank", amplitude_spectrum_node="amplitude-spectrum", omega=0.875, ): assert filterbank_node in feature_net.nodes assert feature_net.nodes[filterbank_node]["filter"] == "signal-filterbank" assert amplitude_spectrum_node in feature_net.nodes # copy original net net = rasr.FlowNetwork(name=feature_net.name) mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) # remove output for features original_feature_outputs = net.get_output_links("features") net.unlink(to_name="%s:%s" % (net.name, "features")) warped_net, broken_links = feature_net.subnet_from_node(filterbank_node) warped_mapping = net.add_net(warped_net) net.interconnect_outputs(warped_net, warped_mapping) for l in broken_links: net.link(mapping[l[0]], warped_mapping[l[1]]) fbnode = net.nodes[warped_mapping[filterbank_node]] fbnode["warping-function"] = "nest(linear-2($input(alpha), %s), %s)" % ( omega, fbnode["warping-function"], ) # energy energy = net.add_node("generic-vector-f32-norm", "energy", {"value": 1}) net.link(mapping[amplitude_spectrum_node], energy) convert_energy_to_vector = net.add_node( "generic-convert-f32-to-vector-f32", "convert-energy-to-vector" ) net.link(energy, convert_energy_to_vector) energy_normalization = net.add_node( "signal-normalization", "energy-normalization", {"type": "divide-by-mean", "length": "infinite", "right": "infinite"}, ) net.link(convert_energy_to_vector, energy_normalization) convert_energy_to_scalar = net.add_node( "generic-convert-vector-f32-to-f32", "convert-energy-vector-to-scalar" ) net.link(energy_normalization, convert_energy_to_scalar) energy_sync = net.add_node("generic-synchronization", "energy-sync") net.link(convert_energy_to_scalar, energy_sync) net.link(original_feature_outputs.pop(), "%s:target" % energy_sync) rec = net.add_node( "signal-bayes-classification", "warping-factor-recognizer", {"class-label-file": alphas_file}, ) net.link(rec, "%s:alpha" % warped_mapping[filterbank_node]) net.link(energy_sync, "%s:feature-score-weight" % rec) net.link("%s:target" % energy_sync, rec) net.config = rasr.RasrConfig() net.config[rec].likelihood_function.file = mixtures net.config[rec].likelihood_function.feature_scorer_type = "SIMD-diagonal-maximum" return net
def nn_align( self, name, corpus_key, flow, tf_checkpoint, pronunciation_scale, alignment_options=None, parallelize_conversion=False, prefix="", **kwargs, ): """ :param str name: :param str corpus_key: :param str|list[str]|tuple[str]|rasr.FlagDependentFlowAttribute flow: :param Checkpoint tf_checkpoint: :param float pronunciation_scale: :param float lm_scale: :param bool lm_lookahead: :param dict|None lookahead_options: :param bool parallelize_conversion: :param dict|None lattice_to_ctm_kwargs: :param str prefix: :param kwargs: :return: """ # self.crp[corpus_key].language_model_config.scale = lm_scale #self.crp[corpus_key].acoustic_model_config.tdp["*"].skip = 0 #self.crp[corpus_key].acoustic_model_config.tdp.silence.skip = 0 model_combination_config = rasr.RasrConfig() model_combination_config.pronunciation_scale = pronunciation_scale # label tree # label_unit = kwargs.pop('label_unit', None) assert label_unit, 'label_unit not given' label_tree_args = kwargs.pop('label_tree_args', {}) # label_tree = rasr_experimental.LabelTree(label_unit, **label_tree_args) scorer_type = kwargs.pop('label_scorer_type', None) assert scorer_type, 'label_scorer_type not given' label_scorer_args = kwargs.pop('label_scorer_args', {}) # add vocab file from i6_experiments.users.rossenbach.rasr.vocabulary import GenerateLabelFileFromStateTying label_scorer_args['labelFile'] = GenerateLabelFileFromStateTying( self.state_tying, add_eow=True).out_label_file label_scorer_args['priorFile'] = self.estimate_nn_prior( self.train_corpora[0], feature_flow=flow, tf_checkpoint=tf_checkpoint, **kwargs) am_scale = label_scorer_args.get('scale', 1.0) tf_graph = self.make_model_graph(self.returnn_config) feature_flow = self.make_tf_feature_flow( self.feature_flows[corpus_key][flow], tf_graph, tf_checkpoint, **kwargs) label_scorer = rasr_experimental.LabelScorer(scorer_type, **label_scorer_args) extra_config = rasr.RasrConfig() if pronunciation_scale > 0: extra_config.flf_lattice_tool.network.recognizer.pronunciation_scale = pronunciation_scale # Fixed CTC settings: extra_config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction.alignment.allow_label_loop = True if alignment_options is None: alignment_options = { 'label-pruning': 10, 'label-pruning-limit': 10000, } # label alignment align_args = { 'crp': self.crp[corpus_key], 'use_gpu': kwargs.get('use_gpu', True), 'feature_flow': feature_flow, 'label_scorer': label_scorer, 'alignment_options': alignment_options, # aligner search option, 'extra_config': extra_config, } align_job = LabelAlignmentJob(**align_args) #align_job.rqmt.update(job_rqmt) alignment = rasr.FlagDependentFlowAttribute( 'cache_mode', { 'task_dependent': align_job.out_alignment_path, 'bundle': align_job.out_alignment_bundle }) self.alignments[corpus_key][name] = [alignment] if kwargs.get('register_output', False): tk.register_output('%s_%s' % (corpus_key, name), align_job.out_alignment_bundle) return name
def create_config(cls, crp, feature_flow, label_tree, label_scorer, search_parameters=None, lm_lookahead=True, lookahead_options=None, eval_single_best=True, eval_best_in_lattice=True, extra_config=None, extra_post_config=None, sprint_exe=None, lm_gc_job=None, lm_gc_job_local=False, lm_gc_job_mem=16, lm_gc_job_default_search=False, **kwargs): # optional individual lm-image and global-cache job # if lm_gc_job is None: lm_gc_job = LabelSyncSearchLmImageAndGlobalCacheJob( crp, label_tree, label_scorer, extra_config, extra_post_config, mem=lm_gc_job_mem, local_job=lm_gc_job_local, sprint_exe=sprint_exe, default_search=lm_gc_job_default_search, ) # get config from csp # config, post_config = rasr.build_config_from_mapping( crp, { "corpus": "flf-lattice-tool.corpus", "lexicon": "flf-lattice-tool.lexicon", "acoustic_model": "flf-lattice-tool.network.recognizer.acoustic-model", "language_model": "flf-lattice-tool.network.recognizer.lm", }, parallelize=True, ) # acoustic model maybe used for allophones and state-tying, but no mixture is needed # # skip conventional AM or load it without GMM # if crp.acoustic_model_config is None: config.flf_lattice_tool.network.recognizer.use_acoustic_model = False else: config.flf_lattice_tool.network.recognizer.use_mixture = False # feature flow # config.flf_lattice_tool.network.recognizer.feature_extraction.file = ( "feature.flow") feature_flow.apply_config( "flf-lattice-tool.network.recognizer.feature-extraction", config, post_config, ) # label tree and optional lexicon overwrite # label_tree.apply_config( "flf-lattice-tool.network.recognizer.recognizer.label-tree", config, post_config, ) if label_tree.lexicon_config is not None: config["flf-lattice-tool.lexicon"]._update( label_tree.lexicon_config) # label scorer # label_scorer.apply_config( "flf-lattice-tool.network.recognizer.label-scorer", config, post_config) # search settings # search_config = rasr.RasrConfig() if search_parameters is not None: for key in search_parameters.keys(): search_config[key] = search_parameters[key] config.flf_lattice_tool.network.recognizer.recognizer._update( search_config) # lookahead settings # config.flf_lattice_tool.network.recognizer.recognizer.lm_lookahead._value = ( lm_lookahead) if lm_lookahead: lookahead_config = rasr.RasrConfig() if lookahead_options is not None: for key in lookahead_options.keys(): lookahead_config[key] = lookahead_options[key] config.flf_lattice_tool.network.recognizer.recognizer.lm_lookahead._update( lookahead_config) # flf network # config.flf_lattice_tool.network.initial_nodes = "segment" config.flf_lattice_tool.network.segment.type = "speech-segment" config.flf_lattice_tool.network.segment.links = ( "1->recognizer:1 0->archive-writer:1 0->evaluator:1") config.flf_lattice_tool.network.recognizer.type = "recognizer" config.flf_lattice_tool.network.recognizer.search_type = "label-sync-search" config.flf_lattice_tool.network.recognizer.apply_non_word_closure_filter = False config.flf_lattice_tool.network.recognizer.add_confidence_score = False config.flf_lattice_tool.network.recognizer.apply_posterior_pruning = False if label_scorer.config.label_unit == "hmm": config.flf_lattice_tool.network.recognizer.links = "expand" config.flf_lattice_tool.network.expand.type = "expand-transits" config.flf_lattice_tool.network.expand.links = "evaluator archive-writer" else: config.flf_lattice_tool.network.recognizer.links = ( "evaluator archive-writer") config.flf_lattice_tool.network.evaluator.type = "evaluator" config.flf_lattice_tool.network.evaluator.links = "sink:0" config.flf_lattice_tool.network.evaluator.word_errors = True config.flf_lattice_tool.network.evaluator.single_best = eval_single_best config.flf_lattice_tool.network.evaluator.best_in_lattice = eval_best_in_lattice config.flf_lattice_tool.network.evaluator.edit_distance.format = "bliss" config.flf_lattice_tool.network.evaluator.edit_distance.allow_broken_words = ( False) config.flf_lattice_tool.network.archive_writer.type = "archive-writer" config.flf_lattice_tool.network.archive_writer.links = "sink:1" config.flf_lattice_tool.network.archive_writer.format = "flf" config.flf_lattice_tool.network.archive_writer.path = "lattice.cache.$(TASK)" post_config.flf_lattice_tool.network.archive_writer.info = True config.flf_lattice_tool.network.sink.type = "sink" post_config.flf_lattice_tool.network.sink.warn_on_empty_lattice = True post_config.flf_lattice_tool.network.sink.error_on_empty_lattice = False post_config["*"].output_channel.unbuffered = True # update parameters # config._update(extra_config) post_config._update(extra_post_config) # image and cache # arpa_lms = LabelSyncSearchLmImageAndGlobalCacheJob.find_arpa_lms( config) assert (len(arpa_lms) == lm_gc_job.num_images ), "mismatch between image-cache config and recognition config" for i, lm_config in enumerate(arpa_lms): lm_config.image = lm_gc_job.lm_images[i + 1] if post_config.flf_lattice_tool.global_cache._get("file") is None: post_config.flf_lattice_tool.global_cache.read_only = True post_config.flf_lattice_tool.global_cache.file = lm_gc_job.global_cache return config, post_config
def get_monophone_args( feature_flow: str = "mfcc+deriv+norm", *, train_align_iter: int = 75, allow_zero_weights: bool = False, zero_weights_in: str = "extra_config", ): linear_alignment_args = { "minimum_segment_length": 0, "maximum_segment_length": 6000, "iterations": 5, "penalty": 0, "minimum_speech_proportion": 0.7, "save_alignment": False, "keep_accumulators": False, "extra_merge_args": None, "extra_config": None, "extra_post_config": None, } monophone_training_args = { "name": "mono", "feature_flow": feature_flow, "feature_energy_flow_key": f"energy,{feature_flow}", "align_iter": train_align_iter, "splits": 10, "accs_per_split": 2, } monophone_recognition_args = { # GmmSystem.recognition() args: "iters": [8, 10], "lm_scales": [10.5], "optimize_am_lm_scale": True, # meta.System.recog() args: "feature_flow": feature_flow, "pronunciation_scales": [6.0], "lm_lookahead": True, "lookahead_options": None, "create_lattice": True, "eval_single_best": True, "eval_best_in_lattice": True, "search_parameters": { "beam-pruning": 18.0, "beam-pruning-limit": 100000, "word-end-pruning": 0.75, "word-end-pruning-limit": 15000, }, "parallelize_conversion": False, "lattice_to_ctm_kwargs": { "fill_empty_segments": False, "best_path_algo": "bellman-ford", }, "rtf": 50, "mem": 8, "use_gpu": False, } monophone_test_recognition_args = None # { # "optimize_am_lm_scale": False, # "pronunciation_scales": [1.0], # "lm_scales": [11.0], # } if allow_zero_weights: allow_zero_weights_extra_config = rasr.RasrConfig() allow_zero_weights_extra_config.allow_zero_weights = True monophone_training_args["align_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } monophone_training_args["accumulate_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } monophone_training_args["split_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } monophone_recognition_args[ zero_weights_in] = allow_zero_weights_extra_config sdm_args = { "name": "sdm.mono", "alignment": "train_mono", "feature_flow_key": feature_flow, } return rasr_util.GmmMonophoneArgs( linear_alignment_args=linear_alignment_args, training_args=monophone_training_args, recognition_args=monophone_recognition_args, test_recognition_args=monophone_test_recognition_args, sdm_args=sdm_args, )
def get_triphone_args( name: str = "tri", initial_alignment: str = "mono", feature_flow: str = "mfcc+context+lda", allow_zero_weights: bool = False, zero_weights_in: str = "extra_config", ): triphone_training_args = { "name": name, "initial_alignment": f"train_{initial_alignment}", "feature_flow": feature_flow, "splits": 10, "accs_per_split": 2, "align_extra_rqmt": { "mem": 8 }, "accumulate_extra_rqmt": { "mem": 8 }, "split_extra_rqmt": { "mem": 8 }, } triphone_recognition_args = { "iters": [8, 10], "feature_flow": feature_flow, "pronunciation_scales": [6.0], "lm_scales": [24.9], "lm_lookahead": True, "lookahead_options": None, "create_lattice": True, "eval_single_best": True, "eval_best_in_lattice": True, "search_parameters": { "beam_pruning": 12.0, "beam-pruning-limit": 100000, "word-end-pruning": 0.5, "word-end-pruning-limit": 15000, }, "lattice_to_ctm_kwargs": { "fill_empty_segments": False, "best_path_algo": "bellman-ford", }, "optimize_am_lm_scale": True, "rtf": 50, "mem": 8, "parallelize_conversion": True, } if allow_zero_weights: allow_zero_weights_extra_config = rasr.RasrConfig() allow_zero_weights_extra_config.allow_zero_weights = True triphone_training_args["align_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } triphone_training_args["accumulate_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } triphone_training_args["split_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } triphone_recognition_args[ zero_weights_in] = allow_zero_weights_extra_config sdm_args = { "name": f"sdm.{name}", "alignment": f"train_{name}", "feature_flow_key": feature_flow, } return rasr_util.GmmTriphoneArgs( training_args=triphone_training_args, recognition_args=triphone_recognition_args, sdm_args=sdm_args, )
def get_vtln_args( name: str = "vtln", feature_flow: str = "mfcc+context+lda", initial_alignment_key: str = "tri", allow_zero_weights: bool = False, zero_weights_in: str = "extra_config", ): vtln_training_args = { "feature_flow": { "name": f"uncached_{feature_flow}", "lda_matrix_key": "mono", "base_flow_key": f"uncached_{feature_flow.split('+')[0]}", "context_size": 9, }, "warp_mix": { "name": "tri", "alignment": f"train_{initial_alignment_key}", "feature_scorer": "estimate_mixtures_sdm.tri", "splits": 8, "accs_per_split": 2, }, "train": { "name": name, "initial_alignment_key": f"train_{initial_alignment_key}", "splits": 10, "accs_per_split": 2, "feature_flow": f"{feature_flow}+vtln", "accumulate_extra_rqmt": { "mem": 8 }, "align_extra_rqmt": { "mem": 8 }, "split_extra_rqmt": { "mem": 8 }, }, } vtln_recognition_args = { "iters": [8, 10], "feature_flow": f"uncached_{feature_flow}+vtln", "pronunciation_scales": [6.0], "lm_scales": [22.4], "lm_lookahead": True, "lookahead_options": None, "create_lattice": True, "eval_single_best": True, "eval_best_in_lattice": True, "search_parameters": { "beam_pruning": 12.0, "beam-pruning-limit": 100000, "word-end-pruning": 0.5, "word-end-pruning-limit": 15000, }, "lattice_to_ctm_kwargs": { "fill_empty_segments": False, "best_path_algo": "bellman-ford", }, "optimize_am_lm_scale": True, "rtf": 50, "mem": 8, "parallelize_conversion": True, } if allow_zero_weights: allow_zero_weights_extra_config = rasr.RasrConfig() allow_zero_weights_extra_config.allow_zero_weights = True vtln_training_args["train"]["align_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_training_args["train"]["accumulate_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_training_args["train"]["split_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_recognition_args[ zero_weights_in] = allow_zero_weights_extra_config sdm_args = { "name": f"sdm.{name}", "alignment": f"train_{name}", "feature_flow_key": f"{feature_flow}+vtln", } return rasr_util.GmmVtlnArgs( training_args=vtln_training_args, recognition_args=vtln_recognition_args, sdm_args=sdm_args, )
def samples_with_silence_normalization_flow( audio_format="wav", dc_detection=True, dc_params=None, silence_params=None ): _dc_params = { "min-dc-length": 0.01, "max-dc-increment": 0.9, "min-non-dc-segment-length": 0.021, } _silence_params = { "absolute-silence-threshold": 250, "discard-unsure-segments": True, "min-surrounding-silence": 0.1, "fill-up-silence": True, "silence-ratio": 0.25, "silence-threshold": 0.05, } if dc_params is not None: _dc_params.update(dc_params) if silence_params is not None: _silence_params.update(silence_params) net = rasr.FlowNetwork() net.add_output("samples") net.add_param(["input-file", "start-time", "end-time", "track"]) samples = net.add_node( "audio-input-file-" + audio_format, "samples", { "file": "$(input-file)", "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) demultiplex = net.add_node( "generic-vector-s16-demultiplex", "demultiplex", track="$(track)" ) net.link(samples, demultiplex) convert = net.add_node("generic-convert-vector-s16-to-vector-f32", "convert") net.link(demultiplex, convert) sil_norm = net.add_node("signal-silence-normalization", "silence-normalization") net.link(convert, sil_norm) warp_time = net.add_node("warp-time", "warp-time", {"start-time": "$(start-time)"}) if dc_detection: dc_detection = net.add_node("signal-dc-detection", "dc-detection", _dc_params) net.link(sil_norm, dc_detection) net.link(dc_detection, warp_time) else: net.link(sil_norm, warp_time) net.link(warp_time, "network:samples") net.config = rasr.RasrConfig() for k, v in _silence_params: net.config[sil_norm][k] = v return net
def recog( self, name, corpus, flow, feature_scorer, pronunciation_scale, lm_scale, parallelize_conversion=False, lattice_to_ctm_kwargs=None, prefix="", **kwargs, ): """ :param str name: :param str corpus: :param str|list[str]|tuple[str]|rasr.FlagDependentFlowAttribute flow: :param str|list[str]|tuple[str]|rasr.FeatureScorer feature_scorer: :param float pronunciation_scale: :param float lm_scale: :param bool parallelize_conversion: :param dict lattice_to_ctm_kwargs: :param str prefix: :param kwargs: :return: """ if lattice_to_ctm_kwargs is None: lattice_to_ctm_kwargs = {} self.crp[corpus].language_model_config.scale = lm_scale model_combination_config = rasr.RasrConfig() model_combination_config.pronunciation_scale = pronunciation_scale rec = recog.AdvancedTreeSearchJob( crp=self.crp[corpus], feature_flow=select_element(self.feature_flows, corpus, flow), feature_scorer=select_element(self.feature_scorers, corpus, feature_scorer), model_combination_config=model_combination_config, **kwargs, ) rec.set_vis_name("Recog %s%s" % (prefix, name)) rec.add_alias("%srecog_%s" % (prefix, name)) self.jobs[corpus]["recog_%s" % name] = rec self.jobs[corpus]["lat2ctm_%s" % name] = lat2ctm = recog.LatticeToCtmJob( crp=self.crp[corpus], lattice_cache=rec.out_lattice_bundle, parallelize=parallelize_conversion, **lattice_to_ctm_kwargs, ) self.ctm_files[corpus]["recog_%s" % name] = lat2ctm.out_ctm_file kwargs = copy.deepcopy(self.scorer_args[corpus]) kwargs[self.scorer_hyp_arg[corpus]] = lat2ctm.out_ctm_file scorer = self.scorers[corpus](**kwargs) self.jobs[corpus]["scorer_%s" % name] = scorer tk.register_output("%srecog_%s.reports" % (prefix, name), scorer.out_report_dir)
def recog( self, name, corpus_key, flow, tf_checkpoint, pronunciation_scale, lm_scale, lm_lookahead, lookahead_options=None, parallelize_conversion=False, lattice_to_ctm_kwargs=None, prefix="", **kwargs, ): """ :param str name: :param str corpus_key: :param str|list[str]|tuple[str]|rasr.FlagDependentFlowAttribute flow: :param Checkpoint tf_checkpoint: :param float pronunciation_scale: :param float lm_scale: :param bool lm_lookahead: :param dict|None lookahead_options: :param bool parallelize_conversion: :param dict|None lattice_to_ctm_kwargs: :param str prefix: :param kwargs: :return: """ if lattice_to_ctm_kwargs is None: lattice_to_ctm_kwargs = {} self.crp[corpus_key].language_model_config.scale = lm_scale self.crp[corpus_key].acoustic_model_config.tdp["*"].skip = 0 self.crp[corpus_key].acoustic_model_config.tdp.silence.skip = 0 model_combination_config = rasr.RasrConfig() model_combination_config.pronunciation_scale = pronunciation_scale # label tree # label_unit = kwargs.pop('label_unit', None) assert label_unit, 'label_unit not given' label_tree_args = kwargs.pop('label_tree_args', {}) label_tree = rasr_experimental.LabelTree(label_unit, **label_tree_args) scorer_type = kwargs.pop('label_scorer_type', None) assert scorer_type, 'label_scorer_type not given' label_scorer_args = kwargs.pop('label_scorer_args', {}) # add vocab file from i6_experiments.users.rossenbach.rasr.vocabulary import GenerateLabelFileFromStateTying label_scorer_args['labelFile'] = GenerateLabelFileFromStateTying( self.state_tying, add_eow=True).out_label_file label_scorer_args['priorFile'] = self.estimate_nn_prior( self.train_corpora[0], feature_flow=flow, tf_checkpoint=tf_checkpoint, **kwargs) am_scale = label_scorer_args.get('scale', 1.0) tf_graph = self.make_model_graph(self.returnn_config) feature_flow = self.make_tf_feature_flow( self.feature_flows[corpus_key][flow], tf_graph, tf_checkpoint, **kwargs) label_scorer = rasr_experimental.LabelScorer(scorer_type, **label_scorer_args) extra_config = rasr.RasrConfig() if pronunciation_scale > 0: extra_config.flf_lattice_tool.network.recognizer.pronunciation_scale = pronunciation_scale if lm_lookahead: assert lookahead_options is not None # we want to alter this now lookahead_options = copy.deepcopy(lookahead_options) if lookahead_options.get("scale", None) is None: lookahead_options["scale"] = lm_scale # Fixed CTC settings: extra_config.flf_lattice_tool.network.recognizer.recognizer.allow_label_loop = True extra_config.flf_lattice_tool.network.recognizer.recognizer.allow_blank_label = True extra_config.flf_lattice_tool.network.recognizer.recognizer.allow_label_recombination = True extra_config.flf_lattice_tool.network.recognizer.recognizer.allow_word_end_recombination = True rec = LabelSyncSearchJob( crp=self.crp[corpus_key], feature_flow=feature_flow, label_scorer=label_scorer, label_tree=label_tree, lm_lookahead=lm_lookahead, lookahead_options=lookahead_options, extra_config=extra_config, **kwargs, ) rec.set_vis_name("Recog %s%s" % (prefix, name)) rec.add_alias("%srecog_%s" % (prefix, name)) self.jobs[corpus_key]["recog_%s" % name] = rec self.jobs[corpus_key]["lat2ctm_%s" % name] = lat2ctm = recog.LatticeToCtmJob( crp=self.crp[corpus_key], lattice_cache=rec.out_lattice_bundle, parallelize=parallelize_conversion, **lattice_to_ctm_kwargs, ) self.ctm_files[corpus_key]["recog_%s" % name] = lat2ctm.out_ctm_file kwargs = copy.deepcopy(self.scorer_args[corpus_key]) kwargs[self.scorer_hyp_arg[corpus_key]] = lat2ctm.out_ctm_file scorer = self.scorers[corpus_key](**kwargs) self.jobs[corpus_key]["scorer_%s" % name] = scorer tk.register_output("%srecog_%s.reports" % (prefix, name), scorer.out_report_dir)
def create_config( cls, crp, feature_flow, feature_scorer, alignment_options, extra_config, extra_post_config, **kwargs, ): alignment_flow = cls.create_flow(feature_flow) alignopt = { "increase-pruning-until-no-score-difference": True, "min-acoustic-pruning": 500, "max-acoustic-pruning": 10000, "acoustic-pruning-increment-factor": 2, } if alignment_options is not None: alignopt.update(alignment_options) mapping = { "corpus": "acoustic-model-trainer.corpus", "lexicon": [], "acoustic_model": [], } # acoustic model + lexicon for the flow nodes for node in alignment_flow.get_node_names_by_filter( "speech-alignment"): mapping["lexicon"].append( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.lexicon" % node) mapping["acoustic_model"].append( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.acoustic-model" % node) config, post_config = rasr.build_config_from_mapping(crp, mapping, parallelize=True) # alignment options for the flow nodes for node in alignment_flow.get_node_names_by_filter( "speech-alignment"): node_config = config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction[ node] node_config.aligner = rasr.RasrConfig() for k, v in alignopt.items(): node_config.aligner[k] = v feature_scorer.apply_config( "model-combination.acoustic-model.mixture-set", node_config, node_config) node_config.store_lattices = True node_config.lattice_archive.path = "numerator.$(TASK)" alignment_flow.apply_config( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction", config, post_config, ) config.action = "dry" config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction.file = ( "alignment.flow") post_config["*"].allow_overwrite = True config._update(extra_config) post_config._update(extra_post_config) return config, post_config
def create_config( cls, crp, feature_flow, feature_scorer, alignment_options, word_boundaries, extra_config, extra_post_config, **kwargs, ): """ :param rasr.crp.CommonRasrParameters crp: :param feature_flow: :param rasr.FeatureScorer feature_scorer: :param dict[str] alignment_options: :param bool word_boundaries: :param extra_config: :param extra_post_config: :return: config, post_config :rtype: (rasr.RasrConfig, rasr.RasrConfig) """ alignment_flow = cls.create_flow(feature_flow) # TODO: think about mode alignopt = { "increase-pruning-until-no-score-difference": True, "min-acoustic-pruning": 500, "max-acoustic-pruning": 4000, "acoustic-pruning-increment-factor": 2, } if alignment_options is not None: alignopt.update(alignment_options) mapping = { "corpus": "acoustic-model-trainer.corpus", "lexicon": [], "acoustic_model": [], } # acoustic model + lexicon for the flow nodes for node in alignment_flow.get_node_names_by_filter( "speech-alignment"): mapping["lexicon"].append( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.lexicon" % node) mapping["acoustic_model"].append( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction.%s.model-combination.acoustic-model" % node) config, post_config = rasr.build_config_from_mapping(crp, mapping, parallelize=True) # alignment options for the flow nodes for node in alignment_flow.get_node_names_by_filter( "speech-alignment"): node_config = config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction[ node] node_config.aligner = rasr.RasrConfig() for k, v in alignopt.items(): node_config.aligner[k] = v feature_scorer.apply_config( "model-combination.acoustic-model.mixture-set", node_config, node_config) if word_boundaries: node_config.store_lattices = True node_config.lattice_archive.path = "word_boundary.cache.$(TASK)" alignment_flow.apply_config( "acoustic-model-trainer.aligning-feature-extractor.feature-extraction", config, post_config, ) config.action = "dry" config.acoustic_model_trainer.aligning_feature_extractor.feature_extraction.file = ( "alignment.flow") post_config["*"].allow_overwrite = True config._update(extra_config) post_config._update(extra_post_config) return config, post_config
def get_vtln_sat_args( name: str = "vtln+sat", feature_flow: str = "mfcc+context+lda+vtln", initial_mixture: str = "estimate_mixtures_sdm.vtln", initial_alignment: str = "vtln", allow_zero_weights: bool = False, zero_weights_in: str = "extra_config", ): feature_base_cache = feature_flow.split("+")[0] vtln_sat_training_args = { "name": name, "mixtures": initial_mixture, "alignment": f"train_{initial_alignment}", "feature_cache": feature_flow, "feature_flow_key": feature_flow, "cache_regex": "^.*\\+vtln$", "splits": 10, "accs_per_split": 2, "accumulate_extra_rqmt": { "mem": 8 }, "align_extra_rqmt": { "mem": 8 }, "split_extra_rqmt": { "mem": 8 }, } vtln_sat_recognition_args = { "prev_ctm": ( "vtln", 6.0, 22.4, 10, "-optlm", ), # (name, pron_scale, lm_scale, it, opt) "feature_cache": feature_base_cache, "cache_regex": f"^{feature_base_cache}.*$", "cmllr_mixtures": initial_mixture, "iters": [8, 10], "feature_flow": f"uncached_{feature_flow}", "pronunciation_scales": [6.0], "lm_scales": [30.0], "lm_lookahead": True, "lookahead_options": None, "create_lattice": True, "eval_single_best": True, "eval_best_in_lattice": True, "search_parameters": { "beam_pruning": 12.0, "beam-pruning-limit": 100000, "word-end-pruning": 0.5, "word-end-pruning-limit": 15000, }, "lattice_to_ctm_kwargs": { "fill_empty_segments": False, "best_path_algo": "bellman-ford", }, "optimize_am_lm_scale": True, "rtf": 50, "mem": 8, "parallelize_conversion": True, } if allow_zero_weights: allow_zero_weights_extra_config = rasr.RasrConfig() allow_zero_weights_extra_config.allow_zero_weights = True vtln_sat_training_args["align_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_sat_training_args["accumulate_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_sat_training_args["split_extra_args"] = { zero_weights_in: allow_zero_weights_extra_config } vtln_sat_recognition_args[ zero_weights_in] = allow_zero_weights_extra_config sdm_args = { "name": f"sdm.{name}", "alignment": f"train_{name}", "feature_flow_key": f"{feature_flow}+cmllr", } return rasr_util.GmmVtlnSatArgs( training_args=vtln_sat_training_args, recognition_args=vtln_sat_recognition_args, sdm_args=sdm_args, )
def create_config( cls, crp, feature_flow, feature_scorer, search_parameters, lm_lookahead, lookahead_options, mem, model_combination_config, model_combination_post_config, extra_config, extra_post_config, **kwargs, ): lm_gc = recognition.AdvancedTreeSearchLmImageAndGlobalCacheJob( crp, feature_scorer, extra_config, extra_post_config) lm_gc.rqmt["mem"] = mem if search_parameters is None: search_parameters = {} default_search_parameters = { "beam-pruning": 15, "beam-pruning-limit": 100000, "word-end-pruning": 0.5, "word-end-pruning-limit": 10000, } default_search_parameters.update(search_parameters) search_parameters = default_search_parameters la_opts = { "history_limit": 1, "tree_cutoff": 30, "minimum_representation": 1, "cache_low": 2000, "cache_high": 3000, "laziness": 15, } if lookahead_options is not None: la_opts.update(lookahead_options) config, post_config = rasr.build_config_from_mapping( crp, { "corpus": "speech-recognizer.corpus", "lexicon": "speech-recognizer.model-combination.lexicon", "acoustic_model": "speech-recognizer.model-combination.acoustic-model", "language_model": "speech-recognizer.model-combination.lm", }, parallelize=True, ) # Parameters for Speech::Recognizer config.speech_recognizer.search_type = "advanced-tree-search" # Parameters for Speech::DataSource or Sparse::DataSource config.speech_recognizer.feature_extraction.file = "feature.flow" feature_flow.apply_config("speech-recognizer.feature-extraction", config, post_config) # Parameters for Am::ClassicAcousticModel feature_scorer.apply_config( "speech-recognizer.model-combination.acoustic-model.mixture-set", config, post_config, ) # Parameters for Speech::Model combination (besides AM and LM parameters) config.speech_recognizer.model_combination.pronunciation_scale = 3.0 config.speech_recognizer.model_combination._update( model_combination_config) post_config.speech_recognizer.model_combination._update( model_combination_post_config) # Search parameters config.speech_recognizer.recognizer.create_lattice = True config.speech_recognizer.store_lattices = True config.speech_recognizer.recognizer.beam_pruning = search_parameters[ "beam-pruning"] config.speech_recognizer.recognizer.beam_pruning_limit = search_parameters[ "beam-pruning-limit"] config.speech_recognizer.recognizer.word_end_pruning = search_parameters[ "word-end-pruning"] config.speech_recognizer.recognizer.word_end_pruning_limit = search_parameters[ "word-end-pruning-limit"] config.speech_recognizer.recognizer.lm_lookahead = rasr.RasrConfig() config.speech_recognizer.recognizer.lm_lookahead._value = lm_lookahead config.speech_recognizer.recognizer.optimize_lattice = "simple" if lm_lookahead: config.speech_recognizer.recognizer.lm_lookahead_laziness = la_opts[ "laziness"] config.speech_recognizer.recognizer.lm_lookahead.history_limit = la_opts[ "history_limit"] config.speech_recognizer.recognizer.lm_lookahead.tree_cutoff = la_opts[ "tree_cutoff"] config.speech_recognizer.recognizer.lm_lookahead.minimum_representation = ( la_opts["minimum_representation"]) post_config.speech_recognizer.recognizer.lm_lookahead.cache_size_low = ( la_opts["cache_low"]) post_config.speech_recognizer.recognizer.lm_lookahead.cache_size_high = ( la_opts["cache_high"]) post_config.speech_recognizer.global_cache.read_only = True post_config.speech_recognizer.global_cache.file = lm_gc.out_global_cache post_config.speech_recognizer.model_combination.lm.image = lm_gc.lm_image # Lattice writer options config.speech_recognizer.lattice_archive.path = "raw-denominator.$(TASK)" post_config.speech_recognizer.lattice_archive.info = True config._update(extra_config) post_config._update(extra_post_config) return config, post_config
def make_tf_feature_flow(self, feature_flow, tf_graph, tf_checkpoint, **kwargs): """ :param feature_flow: :param Path tf_graph :param Checkpoint tf_checkpoint: :param kwargs: :return: """ # tf flow (model scoring done in tf flow node) # tf_flow = rasr.FlowNetwork() tf_flow.add_input("input-features") tf_flow.add_output("features") tf_flow.add_param("id") tf_fwd = tf_flow.add_node("tensorflow-forward", "tf-fwd", {"id": "$(id)"}) tf_flow.link("network:input-features", tf_fwd + ":features") tf_flow.link(tf_fwd + ":log-posteriors", "network:features") tf_flow.config = rasr.RasrConfig() tf_flow.config[tf_fwd].input_map.info_0.param_name = "features" tf_flow.config[ tf_fwd].input_map.info_0.tensor_name = "extern_data/placeholders/data/data" tf_flow.config[tf_fwd].input_map.info_0.seq_length_tensor_name = ( "extern_data/placeholders/data/data_dim0_size") tf_flow.config[tf_fwd].output_map.info_0.param_name = "log-posteriors" tf_flow.config[tf_fwd].output_map.info_0.tensor_name = kwargs.get( "output_tensor_name", "output/output_batch_major") from sisyphus.delayed_ops import DelayedFunction tf_flow.config[tf_fwd].loader.type = "meta" tf_flow.config[tf_fwd].loader.meta_graph_file = tf_graph #tf_flow.config[tf_fwd].loader.saved_model_file = tf_checkpoint.get_delayed_checkpoint_path() tf_flow.config[tf_fwd].loader.saved_model_file = tf_checkpoint # TODO: HACK from i6_core.returnn.compile import CompileNativeOpJob # DO NOT USE BLAS ON I6, THIS WILL SLOW DOWN RECOGNITION ON OPTERON MACHNIES BY FACTOR 4 native_op = CompileNativeOpJob( "NativeLstm2", returnn_python_exe=self.recognition_args.compile_exec or self.defalt_training_args['returnn_python_exe'], returnn_root=self.defalt_training_args['returnn_root'], # blas_lib=tk.Path(gs.BLAS_LIB, hash_overwrite="BLAS_LIB")).out_op, blas_lib=self.recognition_args.blas_lib, search_numpy_blas=False).out_op tf_flow.config[tf_fwd].loader.required_libraries = native_op # interconnect flows # tf_feature_flow = rasr.FlowNetwork() base_mapping = tf_feature_flow.add_net(feature_flow) tf_mapping = tf_feature_flow.add_net(tf_flow) tf_feature_flow.interconnect_inputs(feature_flow, base_mapping) tf_feature_flow.interconnect( feature_flow, base_mapping, tf_flow, tf_mapping, {"features": "input-features"}, ) if kwargs.get("append", False): concat = tf_feature_flow.add_node( "generic-vector-f32-concat", "concat", attr={"timestamp-port": "features"}, ) tf_feature_flow.link( tf_mapping[tf_flow.get_output_links("features").pop()], concat + ":tf") tf_feature_flow.link( base_mapping[feature_flow.get_output_links("features").pop()], concat + ":features", ) tf_feature_flow.add_output("features") tf_feature_flow.link(concat, "network:features") else: tf_feature_flow.interconnect_outputs(tf_flow, tf_mapping) # ensure cache_mode as base feature net tf_feature_flow.add_flags(feature_flow.flags) return tf_feature_flow