def normalize_features(feature_net, length="infinite", right="infinite", norm_type="mean-and-variance"): """ Add normalization of the specfified type to the feature flow :param feature_net rasr.FlowNetwork: the unnormalized flow network, must have an output named 'features' :param length int|str: length of the normalization window in frames (or 'infinite') :param right int|str: number of frames right of the current position in the normalization window (can also be 'infinite') :param norm_type str: type of normalization, possible values are 'level', 'mean', 'mean-and-variance', 'mean-and-variance-1D', 'divide-by-mean', 'mean-norm' :returns rasr.FlowNetwork: input FlowNetwork with a signal-normalization node before the output """ net = rasr.FlowNetwork() net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) normalization = net.add_node( "signal-normalization", "normalization", { "length": str(length), "right": str(right), "type": norm_type }, ) net.link(mapping[feature_net.get_output_links("features").pop()], normalization) net.link(normalization, "network:features") return net
def create_dump_flow(cls, crp, samples_flow, **kwargs): if samples_flow is None: samples_flow = default_samples_flow(crp.audio_format) net = rasr.FlowNetwork() net.add_param("id") net.add_output("features") samples_mapping = net.add_net(samples_flow) net.interconnect_inputs(samples_flow, samples_mapping) samples = samples_mapping[samples_flow.get_output_links( "samples").pop()] convert = net.add_node("generic-convert-vector-f32-to-vector-s16", "convert-back") net.link(samples, convert) write = net.add_node("audio-output-file-wav", "write", {"file": "dump/$(id).wav"}) net.link(convert, write) convert2 = net.add_node("generic-convert-vector-s16-to-vector-f32", "convert-again") net.link(write, convert2) net.link(convert2, "network:features") return net
def sync_features(feature_net, target_net, feature_output="features", target_output="features"): net = rasr.FlowNetwork() feature_mapping = net.add_net(feature_net) target_mapping = net.add_net(target_net) net.interconnect_inputs(feature_net, feature_mapping) net.interconnect_inputs(target_net, target_mapping) sync = net.add_node("signal-repeating-frame-prediction", "sync") net.link( feature_mapping[feature_net.get_output_links(feature_output).pop()], sync) net.link( target_mapping[target_net.get_output_links(target_output).pop()], sync + ":target", ) net.add_output("features") net.link(sync, "network:features") return net
def label_features_with_map_flow( feature_net, map_file, map_key="$(id)", default_output=1.0 ): # copy original net net = rasr.FlowNetwork(name=feature_net.name) mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) if map_key.startswith("$(") and map_key.endswith(")"): net.add_param(map_key[2:-1]) net.add_output("labels") corpus_map = net.add_node( "generic-coprus-key-map", "warping-factor", { "key": map_key, "map-file": map_file, "default-output": "%s" % default_output, "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) net.link(corpus_map, "network:labels") return net
def sync_energy_features(feature_net, energy_net): assert "features" in feature_net.outputs assert "energy" in energy_net.outputs or "features" in energy_net.outputs energy_out = "energy" if "energy" in energy_net.outputs else "features" net = rasr.FlowNetwork() feature_mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, feature_mapping) net.interconnect_outputs(feature_net, feature_mapping) energy_mapping = net.add_net(energy_net) net.interconnect_inputs(energy_net, energy_mapping) sync = net.add_node("generic-synchronization", "energy-synchronization") net.link( feature_mapping[feature_net.get_output_links("features").pop()], sync + ":target", ) net.link(energy_mapping[energy_net.get_output_links(energy_out).pop()], sync) net.add_output("energy") net.link(sync, "network:energy") return net
def add_context_flow( feature_net, max_size=9, right=4, margin_condition="present-not-empty", expand_timestamp=False, ): net = rasr.FlowNetwork() net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) context = net.add_node( "signal-vector-f32-sequence-concatenation", "context-window", { "max-size": max_size, "right": right, "margin-condition": margin_condition, "expand-timestamp": expand_timestamp, }, ) net.link(mapping[feature_net.get_output_links("features").pop()], context) net.link(context, "network:features") return net
def add_tf_flow_to_base_flow( base_flow: rasr.FlowNetwork, tf_flow: rasr.FlowNetwork, tf_fwd_input_name: str = "tf-fwd-input", ): """ Integrate tf-fwd node into the regular flow network :param FlowNetwork base_flow: :param FlowNetwork tf_flow: :param str tf_fwd_input_name: see: get_tf_flow() :rtype: FlowNetwork """ assert (len(base_flow.outputs) == 1 ), "Not implemented otherwise" # see hard coded tf-fwd input base_output = list(base_flow.outputs)[0] input_name = tf_fwd_input_name feature_flow = rasr.FlowNetwork() base_mapping = feature_flow.add_net(base_flow) tf_mapping = feature_flow.add_net(tf_flow) feature_flow.interconnect_inputs(base_flow, base_mapping) feature_flow.interconnect(base_flow, base_mapping, tf_flow, tf_mapping, {base_output: input_name}) feature_flow.interconnect_outputs(tf_flow, tf_mapping) return feature_flow
def basic_cache_flow(cache_files): if not type(cache_files) == list: cache_files = [cache_files] net = rasr.FlowNetwork() net.add_param("id") net.add_output("features") num_caches = len(cache_files) caches = [] for num, cf in zip(_numerate(num_caches), cache_files): node_name = "cache" + num caches.append( net.add_node( "generic-cache", node_name, { "id": "$(id)", "path": rasr.NamedFlowAttribute(node_name, cf) }, )) if len(caches) > 1: concat = net.add_node("generic-vector-f32-concat", "concat") for num, cache in enumerate(caches): net.link(cache, "concat:in%d" % num) net.link(concat, "network:features") else: net.link(caches[0], "network:features") return net
def select_features(feature_net, select_range): net = rasr.FlowNetwork() net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) select = net.add_node("generic-vector-f32-select", "select", {"select": select_range}) net.link(mapping[feature_net.get_output_links("features").pop()], select) net.link(select, "network:features") return net
def add_linear_transform(feature_net, matrix_path): net = rasr.FlowNetwork() net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) transform = net.add_node("signal-matrix-multiplication-f32", "linear-transform", {"file": matrix_path}) net.link(mapping[feature_net.get_output_links("features").pop()], transform) net.link(transform, "network:features") return net
def make_first_feature_energy(feature_net): net = rasr.FlowNetwork() mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) net.add_output("energy") split = net.add_node("generic-vector-f32-split", "split") net.link(mapping[feature_net.get_output_links("features").pop()], split) convert = net.add_node("generic-convert-vector-f32-to-f32", "convert") net.link(split + ":0", convert) net.link(convert, "network:energy") return net
def create_convert_flow(cls, crp, timestamp_flow, timestamp_port, **kwargs): net = rasr.FlowNetwork() net.add_param("id") net.add_param("start-time") net.add_output("features") text_input = net.add_node( "generic-vector-f32-text-input", "reader", { "offset": "$(start-time)", "file": "dump/$(id).xml.gz" }, ) timestamp_mapping = net.add_net(timestamp_flow) timestamp = timestamp_mapping[timestamp_flow.get_output_links( timestamp_port).pop()] sync = net.add_node("timestamp-copy", "synchronization", {"ignore-errors": True}) net.link(timestamp, sync + ":target") net.link(text_input, sync) norm = net.add_node( "signal-normalization", "normalization", { "type": "mean-and-variance", "length": "infinite", "right": "infinite" }, ) net.link(sync, norm) repeat = net.add_node("signal-repeating-frame-prediction", "feature-sync") net.link(timestamp, repeat + ":target") net.link(norm, repeat) cache = net.add_node("generic-cache", "out-cache", { "path": "tone.cache.$(TASK)", "id": "$(id)" }) net.link(repeat, cache) net.link(cache, "network:features") return net
def energy_flow( without_samples=False, samples_options={}, fft_options={}, normalization_type="divide-by-mean", ): net = rasr.FlowNetwork() if without_samples: net.add_input("samples") fft_net = fft_flow(**fft_options) fft_mapping = net.add_net(fft_net) net.interconnect_inputs(fft_net, fft_mapping) else: samples_net = samples_flow(**samples_options) samples_mapping = net.add_net(samples_net) fft_net = fft_flow(**fft_options) fft_mapping = net.add_net(fft_net) net.interconnect(samples_net, samples_mapping, fft_net, fft_mapping) energy = net.add_node("generic-vector-f32-norm", "energy", {"value": 1}) net.link(fft_mapping[fft_net.get_output_links("amplitude-spectrum").pop()], energy) convert_energy_to_vector = net.add_node( "generic-convert-f32-to-vector-f32", "convert-energy-to-vector") net.link(energy, convert_energy_to_vector) convert_energy_to_scalar = net.add_node( "generic-convert-vector-f32-to-f32", "convert-energy-vector-to-scalar") if normalization_type is not None: energy_normalization = net.add_node( "signal-normalization", "energy-normalization", { "type": normalization_type, "length": "infinite", "right": "infinite" }, ) net.link(convert_energy_to_vector, energy_normalization) net.link(energy_normalization, convert_energy_to_scalar) else: net.link(convert_energy_to_vector, convert_energy_to_scalar) net.link(convert_energy_to_scalar, "network:energy") return net
def get_tf_flow( checkpoint_path: Union[Path, returnn.Checkpoint], tf_graph_path: Path, returnn_op_path: Path, forward_output_layer: str = "output", tf_fwd_input_name: str = "tf-fwd-input", ): """ Create flow network and config for the tf-fwd node :param Path checkpoint_path: RETURNN model checkpoint which should be loaded :param Path tf_graph_path: compiled tf graph for the model :param Path returnn_op_path: path to native lstm library :param str forward_output_layer: name of layer whose output is used :param str tf_fwd_input_name: tf flow node input name. see: add_tf_flow_base_flow() :rtype: FlowNetwork """ input_name = tf_fwd_input_name tf_flow = rasr.FlowNetwork() tf_flow.add_input(input_name) tf_flow.add_output("features") tf_flow.add_param("id") tf_fwd = tf_flow.add_node("tensorflow-forward", "tf-fwd", {"id": "$(id)"}) tf_flow.link(f"network:{input_name}", tf_fwd + ":input") tf_flow.link(tf_fwd + ":log-posteriors", "network:features") tf_flow.config = rasr.RasrConfig() tf_flow.config[tf_fwd].input_map.info_0.param_name = "input" tf_flow.config[ tf_fwd].input_map.info_0.tensor_name = "extern_data/placeholders/data/data" tf_flow.config[tf_fwd].input_map.info_0.seq_length_tensor_name = ( "extern_data/placeholders/data/data_dim0_size") tf_flow.config[tf_fwd].output_map.info_0.param_name = "log-posteriors" tf_flow.config[ tf_fwd].output_map.info_0.tensor_name = f"{forward_output_layer}/output_batch_major" tf_flow.config[tf_fwd].loader.type = "meta" tf_flow.config[tf_fwd].loader.meta_graph_file = tf_graph_path tf_flow.config[tf_fwd].loader.saved_model_file = checkpoint_path tf_flow.config[tf_fwd].loader.required_libraries = returnn_op_path return tf_flow
def add_derivatives(feature_net, derivatives=1): assert derivatives in [0, 1, 2] if derivatives == 0: return feature_net net = rasr.FlowNetwork() net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) delay = net.add_node( "signal-delay", "delay", { "max-size": 5, "right": 2, "margin-condition": "present-not-empty" }, ) net.link(mapping[feature_net.get_output_links("features").pop()], delay) delta = net.add_node("signal-regression", "delta", { "order": 1, "timestamp-port": 0 }) for i in range(-2, 3): net.link("%s:%d" % (delay, i), "%s:%d" % (delta, i)) if derivatives == 2: deltadelta = net.add_node("signal-regression", "deltadelta", { "order": 2, "timestamp-port": 0 }) for i in range(-2, 3): net.link("%s:%d" % (delay, i), "%s:%d" % (deltadelta, i)) concat = net.add_node("generic-vector-f32-concat", "concat") net.link(mapping[feature_net.get_output_links("features").pop()], "%s:in-1" % concat) net.link(delta, "%s:in-2" % concat) if derivatives == 2: net.link(deltadelta, "%s:in-3" % concat) net.link(concat, "network:features") return net
def feature_extraction_cache_flow(feature_net, port_name_mapping, one_dimensional_outputs=None): """ :param rasr.FlowNetwork feature_net: feature flow to extract features from :param dict[str,str] port_name_mapping: maps output ports to names of the cache files :param set[str]|None one_dimensional_outputs: output ports that return one-dimensional features (e.g. energy) :rtype: rasr.FlowNetwork """ if one_dimensional_outputs is None: one_dimensional_outputs = set() net = rasr.FlowNetwork() net.add_output("features") net.add_param("id") net.add_param("TASK") node_mapping = net.add_net(feature_net) caches = [] for port, name in port_name_mapping.items(): node_name = "feature-cache-" + name fc = net.add_node("generic-cache", node_name, { "id": "$(id)", "path": name + ".cache.$(TASK)" }) for src in feature_net.get_output_links(port): net.link(node_mapping[src], fc) if port in one_dimensional_outputs: convert = net.add_node("generic-convert-f32-to-vector-f32", "convert-" + name) net.link(fc, convert) caches.append(convert) else: caches.append(fc) if len(caches) > 1: concat = net.add_node("generic-vector-f32-concat", "concat") for num, fc in enumerate(caches): net.link(fc, "%s:in%d" % (concat, num)) net.link(concat, "network:features") else: net.link(caches[0], "network:features") return net
def raw_audio_flow(audio_format="wav"): net = rasr.FlowNetwork() net.add_output("out") net.add_param(["input-file", "start-time", "end-time"]) input_node_type = get_input_node_type(audio_format) samples = net.add_node( "audio-input-file-" + input_node_type, "samples", { "file": "$(input-file)", "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) net.link(samples, "network:out") return net
def concat_features_with_ivec(feature_net, ivec_path): """ Generate a new flow-network with i-vectors repeated and concatenated to original feature stream :param feature_net: original flow-network :param ivec_path: ivec_path from IVectorExtractionJob :return: """ # copy original net net = rasr.FlowNetwork(name=feature_net.name) net.add_param(["id", "start-time", "end-time"]) net.add_output("features") mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) # load ivec cache and repeat fc = net.add_node("generic-cache", "feature-cache-ivec", { "id": "$(id)", "path": ivec_path }) sync = net.add_node("signal-repeating-frame-prediction", "sync") net.link(fc, sync) for node in feature_net.get_output_links("features"): net.link(node, "%s:%s" % (sync, "target")) # concat original feature output with repeated ivecs concat = net.add_node( "generic-vector-f32-concat", "concatenation", { "check-same-length": True, "timestamp-port": "feature-1" }, ) for node in feature_net.get_output_links("features"): net.link(node, "%s:%s" % (concat, "feature-1")) net.link(sync, "%s:%s" % (concat, "feature-2")) net.link(concat, "network:features") return net
def label_features_with_map_flow(feature_net, map_file, map_key="$(id)", default_output=0.0): """ augments a feature-net to outputs network:labels based on coprus-key-map :param feature_net: base feature-net :param map_file: coprus-key-map :param map_key: '$(id) :param default_output: 0.0 :return: """ # copy original net net = rasr.FlowNetwork(name=feature_net.name) net.add_param(["id", "start-time", "end-time"]) mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) if map_key.startswith("$(") and map_key.endswith(")"): net.add_param(map_key[2:-1]) net.add_output("labels") corpus_map = net.add_node( "generic-coprus-key-map", "warping-factor", { "key": map_key, "map-file": map_file, "default-output": "%s" % default_output, "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) net.link(corpus_map, "network:labels") return net
def warp_filterbank_with_map_flow( feature_net, map_file, map_key="$(id)", default_output=1.0, omega=0.875, node_name="filterbank", ): assert node_name in feature_net.nodes assert feature_net.nodes[node_name]["filter"] == "signal-filterbank" # copy original net net = rasr.FlowNetwork() mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) node = net.nodes[mapping[node_name]] node["warping-function"] = "nest(linear-2($input(alpha), %s), %s)" % ( omega, node["warping-function"], ) corpus_map = net.add_node( "generic-coprus-key-map", "warping-factor", { "key": map_key, "map-file": map_file, "default-output": "%s" % default_output, "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) net.link(corpus_map, "%s:alpha" % mapping[node_name]) return net
def add_static_warping_to_filterbank_flow( feature_net, alpha_name="warping-alpha", omega_name="warping-omega", node_name="filterbank", ): assert node_name in feature_net.nodes assert feature_net.nodes[node_name]["filter"] == "signal-filterbank" # copy original net net = rasr.FlowNetwork(name=feature_net.name) mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) net.add_param([alpha_name, omega_name]) node = net.nodes[mapping[node_name]] node["warping-function"] = "nest(linear-2($(%s), $(%s)), %s)" % ( alpha_name, omega_name, node["warping-function"], ) return net
def cepstrum_flow(normalize=True, outputs=16, add_epsilon=False, epsilon=1.175494e-38): net = rasr.FlowNetwork() net.add_input("in") net.add_output("out") if add_epsilon: nonlinear = net.add_node("generic-vector-f32-log-plus", "nonlinear", {"value": str(epsilon)}) else: nonlinear = net.add_node("generic-vector-f32-log", "nonlinear") cepstrum = net.add_node("signal-cosine-transform", "cepstrum", {"nr-outputs": outputs}) net.link("network:in", nonlinear) net.link(nonlinear, cepstrum) if normalize: normalization = net.add_node( "signal-normalization", "normalization", { "length": "infinite", "right": "infinite", "type": "mean" }, ) net.link(cepstrum, normalization) net.link(normalization, "network:out") else: net.link(cepstrum, "network:out") return net
def fft_flow(preemphasis=1.0, window_type="hamming", window_shift=0.01, window_length=0.025): net = rasr.FlowNetwork() net.add_input("samples") net.add_output("amplitude-spectrum") preemphasis = net.add_node("signal-preemphasis", "preemphasis", alpha=preemphasis) window = net.add_node( "signal-window", "window", { "type": window_type, "shift": window_shift, "length": window_length }, ) fft = net.add_node( "signal-real-fast-fourier-transform", "fft", {"maximum-input-size": window_length}, ) spectrum = net.add_node("signal-vector-alternating-complex-f32-amplitude", "amplitude-spectrum") net.link("network:samples", preemphasis) net.link(preemphasis, window) net.link(window, fft) net.link(fft, spectrum) net.link(spectrum, "network:amplitude-spectrum") return net
def external_file_feature_flow(flow_file): net = rasr.FlowNetwork() net.add_param("input-file") net.add_param("start-time") net.add_param("end-time") net.add_param("track") net.add_param("id") net.add_output("features") bfe = net.add_node( flow_file, "base-feature-extraction", { "input-file": "$(input-file)", "start-time": "$(start-time)", "end-time": "$(end-time)", "track": "$(track)", "id": "$(id)", "ignore-unknown-parameters": "true", }, ) net.link(bfe + ":out", "network:features") return net
def samples_flow( audio_format="wav", dc_detection=True, dc_params={ "min-dc-length": 0.01, "max-dc-increment": 0.9, "min-non-dc-segment-length": 0.021, }, input_options=None, scale_input=None, ): """ Create a flow to read samples from audio files, convert it to f32 and apply optional dc-detection. Files that do not have a native input node will be opened with the ffmpeg flow node. Please check if scaling is needed. Native input formats are: - wav - nist - flac - mpeg (mp3) - gsm - htk - phondat - oss For more information see: https://www-i6.informatik.rwth-aachen.de/rwth-asr/manual/index.php/Audio_Nodes :param str audio_format: the input audio format :param bool dc_detection: enable dc-detection node :param dict dc_params: optional dc-detection node parameters :param dict input_options: additional options for the input node :param int|float|None scale_input: scale the waveform samples, this might be needed to scale ogg inputs by 2**15 to support feature flows designed for 16-bit wav inputs :return: """ net = rasr.FlowNetwork() net.add_output("samples") net.add_param(["input-file", "start-time", "end-time", "track"]) input_opts = { "file": "$(input-file)", "start-time": "$(start-time)", "end-time": "$(end-time)", } if input_options is not None: input_opts.update(**input_options) input_node_type = get_input_node_type(audio_format) samples = net.add_node("audio-input-file-" + input_node_type, "samples", input_opts) if input_node_type == "ffmpeg": samples_out = samples else: demultiplex = net.add_node("generic-vector-s16-demultiplex", "demultiplex", track="$(track)") net.link(samples, demultiplex) convert = net.add_node("generic-convert-vector-s16-to-vector-f32", "convert") net.link(demultiplex, convert) samples_out = convert if scale_input: scale = net.add_node("generic-vector-f32-multiplication", "scale", value=str(scale_input)) net.link(samples_out, scale) pre_dc_out = scale else: pre_dc_out = samples_out if dc_detection: dc_detection = net.add_node("signal-dc-detection", "dc-detection", dc_params) net.link(pre_dc_out, dc_detection) net.link(dc_detection, "network:samples") else: net.link(pre_dc_out, "network:samples") return net
def returnn_rasr_training( self, name, returnn_config, nn_train_args, train_corpus_key, cv_corpus_key, ): train_data = self.train_input_data[train_corpus_key] dev_data = self.cv_input_data[cv_corpus_key] train_crp = train_data.get_crp() dev_crp = dev_data.get_crp() assert train_data.feature_flow == dev_data.feature_flow assert train_data.features == dev_data.features assert train_data.alignments == dev_data.alignments if train_data.feature_flow is not None: feature_flow = train_data.feature_flow else: if isinstance(train_data.features, rasr.FlagDependentFlowAttribute): feature_path = train_data.features elif isinstance(train_data.features, (MultiPath, MultiOutputPath)): feature_path = rasr.FlagDependentFlowAttribute( "cache_mode", { "task_dependent": train_data.features, }, ) elif isinstance(train_data.features, tk.Path): feature_path = rasr.FlagDependentFlowAttribute( "cache_mode", { "bundle": train_data.features, }, ) else: raise NotImplementedError feature_flow = features.basic_cache_flow(feature_path) if isinstance(train_data.features, tk.Path): feature_flow.flags = {"cache_mode": "bundle"} if isinstance(train_data.alignments, rasr.FlagDependentFlowAttribute): alignments = copy.deepcopy(train_data.alignments) net = rasr.FlowNetwork() net.flags = {"cache_mode": "bundle"} alignments = alignments.get(net) elif isinstance(train_data.alignments, (MultiPath, MultiOutputPath)): raise NotImplementedError elif isinstance(train_data.alignments, tk.Path): alignments = train_data.alignments else: raise NotImplementedError assert isinstance(returnn_config, returnn.ReturnnConfig) train_job = returnn.ReturnnRasrTrainingJob( train_crp=train_crp, dev_crp=dev_crp, feature_flow=feature_flow, alignment=alignments, returnn_config=returnn_config, returnn_root=self.returnn_root, returnn_python_exe=self.returnn_python_exe, **nn_train_args, ) self._add_output_alias_for_train_job( train_job=train_job, train_corpus_key=train_corpus_key, cv_corpus_key=cv_corpus_key, name=name, ) return train_job
def recognized_warping_factor_flow( feature_net, alphas_file, mixtures, filterbank_node="filterbank", amplitude_spectrum_node="amplitude-spectrum", omega=0.875, ): assert filterbank_node in feature_net.nodes assert feature_net.nodes[filterbank_node]["filter"] == "signal-filterbank" assert amplitude_spectrum_node in feature_net.nodes # copy original net net = rasr.FlowNetwork(name=feature_net.name) mapping = net.add_net(feature_net) net.interconnect_inputs(feature_net, mapping) net.interconnect_outputs(feature_net, mapping) # remove output for features original_feature_outputs = net.get_output_links("features") net.unlink(to_name="%s:%s" % (net.name, "features")) warped_net, broken_links = feature_net.subnet_from_node(filterbank_node) warped_mapping = net.add_net(warped_net) net.interconnect_outputs(warped_net, warped_mapping) for l in broken_links: net.link(mapping[l[0]], warped_mapping[l[1]]) fbnode = net.nodes[warped_mapping[filterbank_node]] fbnode["warping-function"] = "nest(linear-2($input(alpha), %s), %s)" % ( omega, fbnode["warping-function"], ) # energy energy = net.add_node("generic-vector-f32-norm", "energy", {"value": 1}) net.link(mapping[amplitude_spectrum_node], energy) convert_energy_to_vector = net.add_node( "generic-convert-f32-to-vector-f32", "convert-energy-to-vector" ) net.link(energy, convert_energy_to_vector) energy_normalization = net.add_node( "signal-normalization", "energy-normalization", {"type": "divide-by-mean", "length": "infinite", "right": "infinite"}, ) net.link(convert_energy_to_vector, energy_normalization) convert_energy_to_scalar = net.add_node( "generic-convert-vector-f32-to-f32", "convert-energy-vector-to-scalar" ) net.link(energy_normalization, convert_energy_to_scalar) energy_sync = net.add_node("generic-synchronization", "energy-sync") net.link(convert_energy_to_scalar, energy_sync) net.link(original_feature_outputs.pop(), "%s:target" % energy_sync) rec = net.add_node( "signal-bayes-classification", "warping-factor-recognizer", {"class-label-file": alphas_file}, ) net.link(rec, "%s:alpha" % warped_mapping[filterbank_node]) net.link(energy_sync, "%s:feature-score-weight" % rec) net.link("%s:target" % energy_sync, rec) net.config = rasr.RasrConfig() net.config[rec].likelihood_function.file = mixtures net.config[rec].likelihood_function.feature_scorer_type = "SIMD-diagonal-maximum" return net
def samples_with_silence_normalization_flow( audio_format="wav", dc_detection=True, dc_params=None, silence_params=None ): _dc_params = { "min-dc-length": 0.01, "max-dc-increment": 0.9, "min-non-dc-segment-length": 0.021, } _silence_params = { "absolute-silence-threshold": 250, "discard-unsure-segments": True, "min-surrounding-silence": 0.1, "fill-up-silence": True, "silence-ratio": 0.25, "silence-threshold": 0.05, } if dc_params is not None: _dc_params.update(dc_params) if silence_params is not None: _silence_params.update(silence_params) net = rasr.FlowNetwork() net.add_output("samples") net.add_param(["input-file", "start-time", "end-time", "track"]) samples = net.add_node( "audio-input-file-" + audio_format, "samples", { "file": "$(input-file)", "start-time": "$(start-time)", "end-time": "$(end-time)", }, ) demultiplex = net.add_node( "generic-vector-s16-demultiplex", "demultiplex", track="$(track)" ) net.link(samples, demultiplex) convert = net.add_node("generic-convert-vector-s16-to-vector-f32", "convert") net.link(demultiplex, convert) sil_norm = net.add_node("signal-silence-normalization", "silence-normalization") net.link(convert, sil_norm) warp_time = net.add_node("warp-time", "warp-time", {"start-time": "$(start-time)"}) if dc_detection: dc_detection = net.add_node("signal-dc-detection", "dc-detection", _dc_params) net.link(sil_norm, dc_detection) net.link(dc_detection, warp_time) else: net.link(sil_norm, warp_time) net.link(warp_time, "network:samples") net.config = rasr.RasrConfig() for k, v in _silence_params: net.config[sil_norm][k] = v return net
def plp_flow( warping_function="bark", num_features=20, sampling_rate=8000, filter_width=3.8, normalize=True, normalization_options=None, without_samples=False, samples_options=None, fft_options=None, ): if normalization_options is None: normalization_options = {} if samples_options is None: samples_options = {} if fft_options is None: fft_options = {} net = rasr.FlowNetwork() if without_samples: net.add_input("samples") else: samples_net = samples_flow(**samples_options) samples_mapping = net.add_net(samples_net) fft_net = fft_flow(**fft_options) fft_mapping = net.add_net(fft_net) if without_samples: net.interconnect_inputs(fft_net, fft_mapping) else: net.interconnect(samples_net, samples_mapping, fft_net, fft_mapping) power_spectrum = net.add_node( "generic-vector-f32-power", "power-spectrum", {"value": 2} ) net.link( fft_mapping[fft_net.get_output_links("amplitude-spectrum").pop()], power_spectrum, ) f = sampling_rate bark = 6 * log((f / 600) + sqrt((f / 600) ** 2 + 1)) # For IncludeBoundary # Number of filters = floor((maximal-frequency - filter-width) / spacing + 1) # => spacing = (max-width) / num-1 spacing = (bark - filter_width) / (num_features - 1) filterbank = net.add_node( "signal-filterbank", "filterbank", { "warping-function": warping_function, "filter-width": filter_width, "spacing": spacing, "type": "trapeze", "boundary": "include-boundary", }, ) net.link(power_spectrum, filterbank) split_filterbank = net.add_node("generic-vector-f32-split", "split-filterbank") net.link(filterbank, split_filterbank) reverse_split_filterbank = net.add_node( "generic-vector-f32-split", "reverse-split-filterbank", {"reverse": "true"} ) net.link(filterbank, reverse_split_filterbank) copy_fl_filterbank = net.add_node( "generic-vector-f32-concat", "copy-first-last-filterbank" ) net.link(split_filterbank + ":0", copy_fl_filterbank + ":first") net.link(filterbank, copy_fl_filterbank + ":middle") net.link(reverse_split_filterbank + ":0", copy_fl_filterbank + ":last") equal_loudness_preemphasis = net.add_node( "signal-vector-f32-continuous-transform", "equal-loudness-preemphasis", { "f": "nest(nest(disc-to-cont, invert(bark)), equal-loudness-preemphasis)", "operation": "multiplies", }, ) net.link(copy_fl_filterbank, equal_loudness_preemphasis) intensity_loudness_law = net.add_node( "generic-vector-f32-power", "intensity-loudness-law", {"value": "0.33"} ) net.link(equal_loudness_preemphasis, intensity_loudness_law) autocorrelation = net.add_node( "signal-cosine-transform", "autocorrelation", {"nr-outputs": num_features, "input-type": "N-plus-one", "normalize": "true"}, ) net.link(intensity_loudness_law, autocorrelation) autoregression = net.add_node( "signal-autocorrelation-to-autoregression", "autoregression" ) net.link(autocorrelation, autoregression) linear_cepstrum = net.add_node( "signal-autoregression-to-cepstrum", "linear-prediction-cepstrum", {"nr-outputs": num_features}, ) net.link(autoregression, linear_cepstrum) if normalize: attr = { "type": "mean-and-variance", "length": "infinity", "right": "infinity", } attr.update(normalization_options) normalization = net.add_node( "signal-normalization", "feature-normalization", attr ) net.link(linear_cepstrum, normalization) net.link(normalization, "network:features") else: net.link(linear_cepstrum, "network:features") return net
def gammatone_flow( minfreq=100, maxfreq=7500, channels=68, warp_freqbreak=None, tempint_type="hanning", tempint_shift=0.01, tempint_length=0.025, flush_before_gap=True, do_specint=True, specint_type="hanning", specint_shift=4, specint_length=9, normalize=True, preemphasis=True, legacy_scaling=False, without_samples=False, samples_options={}, normalization_options={}, ): net = rasr.FlowNetwork() if without_samples: net.add_input("samples") sample_input = "network:samples" else: samples_net = samples_flow(**samples_options) samples_mapping = net.add_net(samples_net) sample_input = samples_mapping[samples_net.get_output_links( "samples").pop()] gammatone_args = { "minfreq": minfreq, "maxfreq": maxfreq, "channels": channels } if warp_freqbreak is not None: gammatone_args["warp-freqbreak"] = warp_freqbreak gammatone = net.add_node("signal-gammatone", "gammatone", gammatone_args) if preemphasis: node_preemphasis = net.add_node("signal-preemphasis", "preemphasis", {"alpha": 1.00}) net.link(sample_input, node_preemphasis) net.link(node_preemphasis, gammatone) else: net.link(sample_input, gammatone) tempint = net.add_node( "signal-temporalintegration", "temporal-integration", { "type": tempint_type, "shift": tempint_shift, "length": tempint_length, "flush-before-gap": flush_before_gap, }, ) net.link(gammatone, tempint) if do_specint: specint = net.add_node( "signal-spectralintegration", "spectral-integration", { "type": specint_type, "shift": specint_shift, "length": specint_length }, ) net.link(tempint, specint) else: specint = None # this line is here just to silence a PyCharm warning convert = net.add_node("generic-convert-vector-vector-f32-to-vector-f32", "typeconvert") if do_specint: net.link(specint, convert) else: net.link(tempint, convert) scaling = net.add_node("generic-vector-f32-multiplication", "scaling", {"value": 0.00035}) net.link(convert, scaling) nonlinear = net.add_node("generic-vector-f32-power", "nonlinear", {"value": 0.1}) net.link(scaling, nonlinear) cos_transform = net.add_node("signal-cosine-transform", "cos_transform", {"nr-outputs": channels}) net.link(nonlinear, cos_transform) if normalize: attr = { "type": "mean-and-variance", "length": "infinity", "right": "infinity", } attr.update(normalization_options) normalization = net.add_node("signal-normalization", "gt-normalization", attr) net.link(cos_transform, normalization) if (legacy_scaling ): # In legacy setups, features were multiplied with a scalar of 3 post_norm_scaling = net.add_node( "generic-vector-f32-multiplication", "post-norm-scaling", {"value": 3}) net.link(normalization, post_norm_scaling) net.link(post_norm_scaling, "network:features") else: net.link(normalization, "network:features") else: net.link(cos_transform, "network:features") return net