Ejemplo n.º 1
0
def normalize_features(feature_net,
                       length="infinite",
                       right="infinite",
                       norm_type="mean-and-variance"):
    """
    Add normalization of the specfified type to the feature flow
    :param feature_net rasr.FlowNetwork: the unnormalized flow network, must have an output named 'features'
    :param length int|str: length of the normalization window in frames (or 'infinite')
    :param right int|str: number of frames right of the current position in the normalization window (can also be 'infinite')
    :param norm_type str: type of normalization, possible values are 'level', 'mean', 'mean-and-variance', 'mean-and-variance-1D', 'divide-by-mean', 'mean-norm'
    :returns rasr.FlowNetwork: input FlowNetwork with a signal-normalization node before the output
    """
    net = rasr.FlowNetwork()
    net.add_output("features")

    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    normalization = net.add_node(
        "signal-normalization",
        "normalization",
        {
            "length": str(length),
            "right": str(right),
            "type": norm_type
        },
    )
    net.link(mapping[feature_net.get_output_links("features").pop()],
             normalization)
    net.link(normalization, "network:features")

    return net
Ejemplo n.º 2
0
    def create_dump_flow(cls, crp, samples_flow, **kwargs):
        if samples_flow is None:
            samples_flow = default_samples_flow(crp.audio_format)

        net = rasr.FlowNetwork()
        net.add_param("id")
        net.add_output("features")

        samples_mapping = net.add_net(samples_flow)
        net.interconnect_inputs(samples_flow, samples_mapping)

        samples = samples_mapping[samples_flow.get_output_links(
            "samples").pop()]

        convert = net.add_node("generic-convert-vector-f32-to-vector-s16",
                               "convert-back")
        net.link(samples, convert)

        write = net.add_node("audio-output-file-wav", "write",
                             {"file": "dump/$(id).wav"})
        net.link(convert, write)

        convert2 = net.add_node("generic-convert-vector-s16-to-vector-f32",
                                "convert-again")
        net.link(write, convert2)
        net.link(convert2, "network:features")

        return net
Ejemplo n.º 3
0
def sync_features(feature_net,
                  target_net,
                  feature_output="features",
                  target_output="features"):
    net = rasr.FlowNetwork()

    feature_mapping = net.add_net(feature_net)
    target_mapping = net.add_net(target_net)

    net.interconnect_inputs(feature_net, feature_mapping)
    net.interconnect_inputs(target_net, target_mapping)

    sync = net.add_node("signal-repeating-frame-prediction", "sync")
    net.link(
        feature_mapping[feature_net.get_output_links(feature_output).pop()],
        sync)
    net.link(
        target_mapping[target_net.get_output_links(target_output).pop()],
        sync + ":target",
    )

    net.add_output("features")
    net.link(sync, "network:features")

    return net
Ejemplo n.º 4
0
def label_features_with_map_flow(
    feature_net, map_file, map_key="$(id)", default_output=1.0
):
    # copy original net
    net = rasr.FlowNetwork(name=feature_net.name)
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    if map_key.startswith("$(") and map_key.endswith(")"):
        net.add_param(map_key[2:-1])

    net.add_output("labels")
    corpus_map = net.add_node(
        "generic-coprus-key-map",
        "warping-factor",
        {
            "key": map_key,
            "map-file": map_file,
            "default-output": "%s" % default_output,
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
        },
    )
    net.link(corpus_map, "network:labels")

    return net
Ejemplo n.º 5
0
def sync_energy_features(feature_net, energy_net):
    assert "features" in feature_net.outputs
    assert "energy" in energy_net.outputs or "features" in energy_net.outputs
    energy_out = "energy" if "energy" in energy_net.outputs else "features"

    net = rasr.FlowNetwork()

    feature_mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, feature_mapping)
    net.interconnect_outputs(feature_net, feature_mapping)

    energy_mapping = net.add_net(energy_net)
    net.interconnect_inputs(energy_net, energy_mapping)

    sync = net.add_node("generic-synchronization", "energy-synchronization")
    net.link(
        feature_mapping[feature_net.get_output_links("features").pop()],
        sync + ":target",
    )
    net.link(energy_mapping[energy_net.get_output_links(energy_out).pop()],
             sync)

    net.add_output("energy")
    net.link(sync, "network:energy")

    return net
Ejemplo n.º 6
0
def add_context_flow(
    feature_net,
    max_size=9,
    right=4,
    margin_condition="present-not-empty",
    expand_timestamp=False,
):
    net = rasr.FlowNetwork()
    net.add_output("features")

    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    context = net.add_node(
        "signal-vector-f32-sequence-concatenation",
        "context-window",
        {
            "max-size": max_size,
            "right": right,
            "margin-condition": margin_condition,
            "expand-timestamp": expand_timestamp,
        },
    )
    net.link(mapping[feature_net.get_output_links("features").pop()], context)
    net.link(context, "network:features")

    return net
Ejemplo n.º 7
0
    def add_tf_flow_to_base_flow(
        base_flow: rasr.FlowNetwork,
        tf_flow: rasr.FlowNetwork,
        tf_fwd_input_name: str = "tf-fwd-input",
    ):
        """
        Integrate tf-fwd node into the regular flow network

        :param FlowNetwork base_flow:
        :param FlowNetwork tf_flow:
        :param str tf_fwd_input_name: see: get_tf_flow()
        :rtype: FlowNetwork
        """
        assert (len(base_flow.outputs) == 1
                ), "Not implemented otherwise"  # see hard coded tf-fwd input
        base_output = list(base_flow.outputs)[0]

        input_name = tf_fwd_input_name

        feature_flow = rasr.FlowNetwork()
        base_mapping = feature_flow.add_net(base_flow)
        tf_mapping = feature_flow.add_net(tf_flow)
        feature_flow.interconnect_inputs(base_flow, base_mapping)
        feature_flow.interconnect(base_flow, base_mapping, tf_flow, tf_mapping,
                                  {base_output: input_name})
        feature_flow.interconnect_outputs(tf_flow, tf_mapping)

        return feature_flow
Ejemplo n.º 8
0
def basic_cache_flow(cache_files):
    if not type(cache_files) == list:
        cache_files = [cache_files]

    net = rasr.FlowNetwork()

    net.add_param("id")
    net.add_output("features")

    num_caches = len(cache_files)
    caches = []
    for num, cf in zip(_numerate(num_caches), cache_files):
        node_name = "cache" + num
        caches.append(
            net.add_node(
                "generic-cache",
                node_name,
                {
                    "id": "$(id)",
                    "path": rasr.NamedFlowAttribute(node_name, cf)
                },
            ))

    if len(caches) > 1:
        concat = net.add_node("generic-vector-f32-concat", "concat")
        for num, cache in enumerate(caches):
            net.link(cache, "concat:in%d" % num)
        net.link(concat, "network:features")
    else:
        net.link(caches[0], "network:features")

    return net
Ejemplo n.º 9
0
def select_features(feature_net, select_range):
    net = rasr.FlowNetwork()
    net.add_output("features")
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    select = net.add_node("generic-vector-f32-select", "select",
                          {"select": select_range})
    net.link(mapping[feature_net.get_output_links("features").pop()], select)
    net.link(select, "network:features")

    return net
Ejemplo n.º 10
0
def add_linear_transform(feature_net, matrix_path):
    net = rasr.FlowNetwork()
    net.add_output("features")

    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    transform = net.add_node("signal-matrix-multiplication-f32",
                             "linear-transform", {"file": matrix_path})
    net.link(mapping[feature_net.get_output_links("features").pop()],
             transform)
    net.link(transform, "network:features")

    return net
Ejemplo n.º 11
0
def make_first_feature_energy(feature_net):
    net = rasr.FlowNetwork()
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    net.add_output("energy")
    split = net.add_node("generic-vector-f32-split", "split")
    net.link(mapping[feature_net.get_output_links("features").pop()], split)
    convert = net.add_node("generic-convert-vector-f32-to-f32", "convert")
    net.link(split + ":0", convert)
    net.link(convert, "network:energy")

    return net
Ejemplo n.º 12
0
    def create_convert_flow(cls, crp, timestamp_flow, timestamp_port,
                            **kwargs):
        net = rasr.FlowNetwork()
        net.add_param("id")
        net.add_param("start-time")
        net.add_output("features")

        text_input = net.add_node(
            "generic-vector-f32-text-input",
            "reader",
            {
                "offset": "$(start-time)",
                "file": "dump/$(id).xml.gz"
            },
        )

        timestamp_mapping = net.add_net(timestamp_flow)
        timestamp = timestamp_mapping[timestamp_flow.get_output_links(
            timestamp_port).pop()]

        sync = net.add_node("timestamp-copy", "synchronization",
                            {"ignore-errors": True})
        net.link(timestamp, sync + ":target")
        net.link(text_input, sync)

        norm = net.add_node(
            "signal-normalization",
            "normalization",
            {
                "type": "mean-and-variance",
                "length": "infinite",
                "right": "infinite"
            },
        )
        net.link(sync, norm)

        repeat = net.add_node("signal-repeating-frame-prediction",
                              "feature-sync")
        net.link(timestamp, repeat + ":target")
        net.link(norm, repeat)

        cache = net.add_node("generic-cache", "out-cache", {
            "path": "tone.cache.$(TASK)",
            "id": "$(id)"
        })
        net.link(repeat, cache)
        net.link(cache, "network:features")

        return net
Ejemplo n.º 13
0
def energy_flow(
    without_samples=False,
    samples_options={},
    fft_options={},
    normalization_type="divide-by-mean",
):
    net = rasr.FlowNetwork()

    if without_samples:
        net.add_input("samples")
        fft_net = fft_flow(**fft_options)
        fft_mapping = net.add_net(fft_net)
        net.interconnect_inputs(fft_net, fft_mapping)
    else:
        samples_net = samples_flow(**samples_options)
        samples_mapping = net.add_net(samples_net)
        fft_net = fft_flow(**fft_options)
        fft_mapping = net.add_net(fft_net)
        net.interconnect(samples_net, samples_mapping, fft_net, fft_mapping)

    energy = net.add_node("generic-vector-f32-norm", "energy", {"value": 1})
    net.link(fft_mapping[fft_net.get_output_links("amplitude-spectrum").pop()],
             energy)

    convert_energy_to_vector = net.add_node(
        "generic-convert-f32-to-vector-f32", "convert-energy-to-vector")
    net.link(energy, convert_energy_to_vector)

    convert_energy_to_scalar = net.add_node(
        "generic-convert-vector-f32-to-f32", "convert-energy-vector-to-scalar")
    if normalization_type is not None:
        energy_normalization = net.add_node(
            "signal-normalization",
            "energy-normalization",
            {
                "type": normalization_type,
                "length": "infinite",
                "right": "infinite"
            },
        )
        net.link(convert_energy_to_vector, energy_normalization)
        net.link(energy_normalization, convert_energy_to_scalar)
    else:
        net.link(convert_energy_to_vector, convert_energy_to_scalar)

    net.link(convert_energy_to_scalar, "network:energy")

    return net
Ejemplo n.º 14
0
    def get_tf_flow(
        checkpoint_path: Union[Path, returnn.Checkpoint],
        tf_graph_path: Path,
        returnn_op_path: Path,
        forward_output_layer: str = "output",
        tf_fwd_input_name: str = "tf-fwd-input",
    ):
        """
        Create flow network and config for the tf-fwd node

        :param Path checkpoint_path: RETURNN model checkpoint which should be loaded
        :param Path tf_graph_path: compiled tf graph for the model
        :param Path returnn_op_path: path to native lstm library
        :param str forward_output_layer: name of layer whose output is used
        :param str tf_fwd_input_name: tf flow node input name. see: add_tf_flow_base_flow()
        :rtype: FlowNetwork
        """
        input_name = tf_fwd_input_name

        tf_flow = rasr.FlowNetwork()
        tf_flow.add_input(input_name)
        tf_flow.add_output("features")
        tf_flow.add_param("id")
        tf_fwd = tf_flow.add_node("tensorflow-forward", "tf-fwd",
                                  {"id": "$(id)"})
        tf_flow.link(f"network:{input_name}", tf_fwd + ":input")
        tf_flow.link(tf_fwd + ":log-posteriors", "network:features")

        tf_flow.config = rasr.RasrConfig()

        tf_flow.config[tf_fwd].input_map.info_0.param_name = "input"
        tf_flow.config[
            tf_fwd].input_map.info_0.tensor_name = "extern_data/placeholders/data/data"
        tf_flow.config[tf_fwd].input_map.info_0.seq_length_tensor_name = (
            "extern_data/placeholders/data/data_dim0_size")

        tf_flow.config[tf_fwd].output_map.info_0.param_name = "log-posteriors"
        tf_flow.config[
            tf_fwd].output_map.info_0.tensor_name = f"{forward_output_layer}/output_batch_major"

        tf_flow.config[tf_fwd].loader.type = "meta"
        tf_flow.config[tf_fwd].loader.meta_graph_file = tf_graph_path
        tf_flow.config[tf_fwd].loader.saved_model_file = checkpoint_path

        tf_flow.config[tf_fwd].loader.required_libraries = returnn_op_path

        return tf_flow
Ejemplo n.º 15
0
def add_derivatives(feature_net, derivatives=1):
    assert derivatives in [0, 1, 2]
    if derivatives == 0:
        return feature_net

    net = rasr.FlowNetwork()
    net.add_output("features")
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    delay = net.add_node(
        "signal-delay",
        "delay",
        {
            "max-size": 5,
            "right": 2,
            "margin-condition": "present-not-empty"
        },
    )
    net.link(mapping[feature_net.get_output_links("features").pop()], delay)

    delta = net.add_node("signal-regression", "delta", {
        "order": 1,
        "timestamp-port": 0
    })
    for i in range(-2, 3):
        net.link("%s:%d" % (delay, i), "%s:%d" % (delta, i))

    if derivatives == 2:
        deltadelta = net.add_node("signal-regression", "deltadelta", {
            "order": 2,
            "timestamp-port": 0
        })
        for i in range(-2, 3):
            net.link("%s:%d" % (delay, i), "%s:%d" % (deltadelta, i))

    concat = net.add_node("generic-vector-f32-concat", "concat")
    net.link(mapping[feature_net.get_output_links("features").pop()],
             "%s:in-1" % concat)
    net.link(delta, "%s:in-2" % concat)
    if derivatives == 2:
        net.link(deltadelta, "%s:in-3" % concat)

    net.link(concat, "network:features")

    return net
Ejemplo n.º 16
0
def feature_extraction_cache_flow(feature_net,
                                  port_name_mapping,
                                  one_dimensional_outputs=None):
    """
    :param rasr.FlowNetwork feature_net: feature flow to extract features from
    :param dict[str,str] port_name_mapping: maps output ports to names of the cache files
    :param set[str]|None one_dimensional_outputs: output ports that return one-dimensional features (e.g. energy)
    :rtype: rasr.FlowNetwork
    """
    if one_dimensional_outputs is None:
        one_dimensional_outputs = set()

    net = rasr.FlowNetwork()

    net.add_output("features")
    net.add_param("id")
    net.add_param("TASK")
    node_mapping = net.add_net(feature_net)

    caches = []
    for port, name in port_name_mapping.items():
        node_name = "feature-cache-" + name
        fc = net.add_node("generic-cache", node_name, {
            "id": "$(id)",
            "path": name + ".cache.$(TASK)"
        })
        for src in feature_net.get_output_links(port):
            net.link(node_mapping[src], fc)

        if port in one_dimensional_outputs:
            convert = net.add_node("generic-convert-f32-to-vector-f32",
                                   "convert-" + name)
            net.link(fc, convert)
            caches.append(convert)
        else:
            caches.append(fc)

    if len(caches) > 1:
        concat = net.add_node("generic-vector-f32-concat", "concat")
        for num, fc in enumerate(caches):
            net.link(fc, "%s:in%d" % (concat, num))
        net.link(concat, "network:features")
    else:
        net.link(caches[0], "network:features")

    return net
Ejemplo n.º 17
0
def raw_audio_flow(audio_format="wav"):
    net = rasr.FlowNetwork()

    net.add_output("out")
    net.add_param(["input-file", "start-time", "end-time"])

    input_node_type = get_input_node_type(audio_format)

    samples = net.add_node(
        "audio-input-file-" + input_node_type,
        "samples",
        {
            "file": "$(input-file)",
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
        },
    )

    net.link(samples, "network:out")

    return net
Ejemplo n.º 18
0
def concat_features_with_ivec(feature_net, ivec_path):
    """
    Generate a new flow-network with i-vectors repeated and concatenated to original feature stream
    :param feature_net: original flow-network
    :param ivec_path: ivec_path from IVectorExtractionJob
    :return:
    """
    # copy original net
    net = rasr.FlowNetwork(name=feature_net.name)
    net.add_param(["id", "start-time", "end-time"])
    net.add_output("features")
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)

    # load ivec cache and repeat
    fc = net.add_node("generic-cache", "feature-cache-ivec", {
        "id": "$(id)",
        "path": ivec_path
    })
    sync = net.add_node("signal-repeating-frame-prediction", "sync")
    net.link(fc, sync)
    for node in feature_net.get_output_links("features"):
        net.link(node, "%s:%s" % (sync, "target"))

    # concat original feature output with repeated ivecs
    concat = net.add_node(
        "generic-vector-f32-concat",
        "concatenation",
        {
            "check-same-length": True,
            "timestamp-port": "feature-1"
        },
    )
    for node in feature_net.get_output_links("features"):
        net.link(node, "%s:%s" % (concat, "feature-1"))
    net.link(sync, "%s:%s" % (concat, "feature-2"))

    net.link(concat, "network:features")

    return net
Ejemplo n.º 19
0
def label_features_with_map_flow(feature_net,
                                 map_file,
                                 map_key="$(id)",
                                 default_output=0.0):
    """
    augments a feature-net to outputs network:labels based on coprus-key-map
    :param feature_net: base feature-net
    :param map_file: coprus-key-map
    :param map_key: '$(id)
    :param default_output: 0.0
    :return:
    """
    # copy original net
    net = rasr.FlowNetwork(name=feature_net.name)
    net.add_param(["id", "start-time", "end-time"])
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    if map_key.startswith("$(") and map_key.endswith(")"):
        net.add_param(map_key[2:-1])

    net.add_output("labels")
    corpus_map = net.add_node(
        "generic-coprus-key-map",
        "warping-factor",
        {
            "key": map_key,
            "map-file": map_file,
            "default-output": "%s" % default_output,
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
        },
    )
    net.link(corpus_map, "network:labels")

    return net
Ejemplo n.º 20
0
def warp_filterbank_with_map_flow(
    feature_net,
    map_file,
    map_key="$(id)",
    default_output=1.0,
    omega=0.875,
    node_name="filterbank",
):
    assert node_name in feature_net.nodes
    assert feature_net.nodes[node_name]["filter"] == "signal-filterbank"

    # copy original net
    net = rasr.FlowNetwork()
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    node = net.nodes[mapping[node_name]]
    node["warping-function"] = "nest(linear-2($input(alpha), %s), %s)" % (
        omega,
        node["warping-function"],
    )

    corpus_map = net.add_node(
        "generic-coprus-key-map",
        "warping-factor",
        {
            "key": map_key,
            "map-file": map_file,
            "default-output": "%s" % default_output,
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
        },
    )
    net.link(corpus_map, "%s:alpha" % mapping[node_name])

    return net
Ejemplo n.º 21
0
def add_static_warping_to_filterbank_flow(
    feature_net,
    alpha_name="warping-alpha",
    omega_name="warping-omega",
    node_name="filterbank",
):
    assert node_name in feature_net.nodes
    assert feature_net.nodes[node_name]["filter"] == "signal-filterbank"

    # copy original net
    net = rasr.FlowNetwork(name=feature_net.name)
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    net.add_param([alpha_name, omega_name])
    node = net.nodes[mapping[node_name]]
    node["warping-function"] = "nest(linear-2($(%s), $(%s)), %s)" % (
        alpha_name,
        omega_name,
        node["warping-function"],
    )

    return net
Ejemplo n.º 22
0
def cepstrum_flow(normalize=True,
                  outputs=16,
                  add_epsilon=False,
                  epsilon=1.175494e-38):
    net = rasr.FlowNetwork()

    net.add_input("in")
    net.add_output("out")

    if add_epsilon:
        nonlinear = net.add_node("generic-vector-f32-log-plus", "nonlinear",
                                 {"value": str(epsilon)})
    else:
        nonlinear = net.add_node("generic-vector-f32-log", "nonlinear")
    cepstrum = net.add_node("signal-cosine-transform", "cepstrum",
                            {"nr-outputs": outputs})

    net.link("network:in", nonlinear)
    net.link(nonlinear, cepstrum)

    if normalize:
        normalization = net.add_node(
            "signal-normalization",
            "normalization",
            {
                "length": "infinite",
                "right": "infinite",
                "type": "mean"
            },
        )
        net.link(cepstrum, normalization)
        net.link(normalization, "network:out")
    else:
        net.link(cepstrum, "network:out")

    return net
Ejemplo n.º 23
0
def fft_flow(preemphasis=1.0,
             window_type="hamming",
             window_shift=0.01,
             window_length=0.025):
    net = rasr.FlowNetwork()

    net.add_input("samples")
    net.add_output("amplitude-spectrum")

    preemphasis = net.add_node("signal-preemphasis",
                               "preemphasis",
                               alpha=preemphasis)
    window = net.add_node(
        "signal-window",
        "window",
        {
            "type": window_type,
            "shift": window_shift,
            "length": window_length
        },
    )
    fft = net.add_node(
        "signal-real-fast-fourier-transform",
        "fft",
        {"maximum-input-size": window_length},
    )
    spectrum = net.add_node("signal-vector-alternating-complex-f32-amplitude",
                            "amplitude-spectrum")

    net.link("network:samples", preemphasis)
    net.link(preemphasis, window)
    net.link(window, fft)
    net.link(fft, spectrum)
    net.link(spectrum, "network:amplitude-spectrum")

    return net
Ejemplo n.º 24
0
def external_file_feature_flow(flow_file):
    net = rasr.FlowNetwork()

    net.add_param("input-file")
    net.add_param("start-time")
    net.add_param("end-time")
    net.add_param("track")
    net.add_param("id")
    net.add_output("features")

    bfe = net.add_node(
        flow_file,
        "base-feature-extraction",
        {
            "input-file": "$(input-file)",
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
            "track": "$(track)",
            "id": "$(id)",
            "ignore-unknown-parameters": "true",
        },
    )
    net.link(bfe + ":out", "network:features")
    return net
Ejemplo n.º 25
0
def samples_flow(
    audio_format="wav",
    dc_detection=True,
    dc_params={
        "min-dc-length": 0.01,
        "max-dc-increment": 0.9,
        "min-non-dc-segment-length": 0.021,
    },
    input_options=None,
    scale_input=None,
):
    """
    Create a flow to read samples from audio files, convert it to f32 and apply optional dc-detection.

    Files that do not have a native input node will be opened with the ffmpeg flow node.
    Please check if scaling is needed.

    Native input formats are:
        - wav
        - nist
        - flac
        - mpeg (mp3)
        - gsm
        - htk
        - phondat
        - oss

    For more information see: https://www-i6.informatik.rwth-aachen.de/rwth-asr/manual/index.php/Audio_Nodes

    :param str audio_format: the input audio format
    :param bool dc_detection: enable dc-detection node
    :param dict dc_params: optional dc-detection node parameters
    :param dict input_options: additional options for the input node
    :param int|float|None scale_input: scale the waveform samples,
        this might be needed to scale ogg inputs by 2**15 to support feature flows
        designed for 16-bit wav inputs
    :return:
    """
    net = rasr.FlowNetwork()

    net.add_output("samples")
    net.add_param(["input-file", "start-time", "end-time", "track"])

    input_opts = {
        "file": "$(input-file)",
        "start-time": "$(start-time)",
        "end-time": "$(end-time)",
    }

    if input_options is not None:
        input_opts.update(**input_options)

    input_node_type = get_input_node_type(audio_format)

    samples = net.add_node("audio-input-file-" + input_node_type, "samples",
                           input_opts)
    if input_node_type == "ffmpeg":
        samples_out = samples
    else:
        demultiplex = net.add_node("generic-vector-s16-demultiplex",
                                   "demultiplex",
                                   track="$(track)")
        net.link(samples, demultiplex)

        convert = net.add_node("generic-convert-vector-s16-to-vector-f32",
                               "convert")
        net.link(demultiplex, convert)
        samples_out = convert

    if scale_input:
        scale = net.add_node("generic-vector-f32-multiplication",
                             "scale",
                             value=str(scale_input))
        net.link(samples_out, scale)
        pre_dc_out = scale
    else:
        pre_dc_out = samples_out

    if dc_detection:
        dc_detection = net.add_node("signal-dc-detection", "dc-detection",
                                    dc_params)
        net.link(pre_dc_out, dc_detection)
        net.link(dc_detection, "network:samples")
    else:
        net.link(pre_dc_out, "network:samples")

    return net
Ejemplo n.º 26
0
    def returnn_rasr_training(
        self,
        name,
        returnn_config,
        nn_train_args,
        train_corpus_key,
        cv_corpus_key,
    ):
        train_data = self.train_input_data[train_corpus_key]
        dev_data = self.cv_input_data[cv_corpus_key]

        train_crp = train_data.get_crp()
        dev_crp = dev_data.get_crp()

        assert train_data.feature_flow == dev_data.feature_flow
        assert train_data.features == dev_data.features
        assert train_data.alignments == dev_data.alignments

        if train_data.feature_flow is not None:
            feature_flow = train_data.feature_flow
        else:
            if isinstance(train_data.features,
                          rasr.FlagDependentFlowAttribute):
                feature_path = train_data.features
            elif isinstance(train_data.features, (MultiPath, MultiOutputPath)):
                feature_path = rasr.FlagDependentFlowAttribute(
                    "cache_mode",
                    {
                        "task_dependent": train_data.features,
                    },
                )
            elif isinstance(train_data.features, tk.Path):
                feature_path = rasr.FlagDependentFlowAttribute(
                    "cache_mode",
                    {
                        "bundle": train_data.features,
                    },
                )
            else:
                raise NotImplementedError

            feature_flow = features.basic_cache_flow(feature_path)
            if isinstance(train_data.features, tk.Path):
                feature_flow.flags = {"cache_mode": "bundle"}

        if isinstance(train_data.alignments, rasr.FlagDependentFlowAttribute):
            alignments = copy.deepcopy(train_data.alignments)
            net = rasr.FlowNetwork()
            net.flags = {"cache_mode": "bundle"}
            alignments = alignments.get(net)
        elif isinstance(train_data.alignments, (MultiPath, MultiOutputPath)):
            raise NotImplementedError
        elif isinstance(train_data.alignments, tk.Path):
            alignments = train_data.alignments
        else:
            raise NotImplementedError

        assert isinstance(returnn_config, returnn.ReturnnConfig)

        train_job = returnn.ReturnnRasrTrainingJob(
            train_crp=train_crp,
            dev_crp=dev_crp,
            feature_flow=feature_flow,
            alignment=alignments,
            returnn_config=returnn_config,
            returnn_root=self.returnn_root,
            returnn_python_exe=self.returnn_python_exe,
            **nn_train_args,
        )
        self._add_output_alias_for_train_job(
            train_job=train_job,
            train_corpus_key=train_corpus_key,
            cv_corpus_key=cv_corpus_key,
            name=name,
        )

        return train_job
Ejemplo n.º 27
0
def recognized_warping_factor_flow(
    feature_net,
    alphas_file,
    mixtures,
    filterbank_node="filterbank",
    amplitude_spectrum_node="amplitude-spectrum",
    omega=0.875,
):
    assert filterbank_node in feature_net.nodes
    assert feature_net.nodes[filterbank_node]["filter"] == "signal-filterbank"
    assert amplitude_spectrum_node in feature_net.nodes

    # copy original net
    net = rasr.FlowNetwork(name=feature_net.name)
    mapping = net.add_net(feature_net)
    net.interconnect_inputs(feature_net, mapping)
    net.interconnect_outputs(feature_net, mapping)

    # remove output for features
    original_feature_outputs = net.get_output_links("features")
    net.unlink(to_name="%s:%s" % (net.name, "features"))

    warped_net, broken_links = feature_net.subnet_from_node(filterbank_node)

    warped_mapping = net.add_net(warped_net)
    net.interconnect_outputs(warped_net, warped_mapping)

    for l in broken_links:
        net.link(mapping[l[0]], warped_mapping[l[1]])

    fbnode = net.nodes[warped_mapping[filterbank_node]]
    fbnode["warping-function"] = "nest(linear-2($input(alpha), %s), %s)" % (
        omega,
        fbnode["warping-function"],
    )

    # energy
    energy = net.add_node("generic-vector-f32-norm", "energy", {"value": 1})
    net.link(mapping[amplitude_spectrum_node], energy)

    convert_energy_to_vector = net.add_node(
        "generic-convert-f32-to-vector-f32", "convert-energy-to-vector"
    )
    net.link(energy, convert_energy_to_vector)

    energy_normalization = net.add_node(
        "signal-normalization",
        "energy-normalization",
        {"type": "divide-by-mean", "length": "infinite", "right": "infinite"},
    )
    net.link(convert_energy_to_vector, energy_normalization)

    convert_energy_to_scalar = net.add_node(
        "generic-convert-vector-f32-to-f32", "convert-energy-vector-to-scalar"
    )
    net.link(energy_normalization, convert_energy_to_scalar)

    energy_sync = net.add_node("generic-synchronization", "energy-sync")
    net.link(convert_energy_to_scalar, energy_sync)
    net.link(original_feature_outputs.pop(), "%s:target" % energy_sync)

    rec = net.add_node(
        "signal-bayes-classification",
        "warping-factor-recognizer",
        {"class-label-file": alphas_file},
    )
    net.link(rec, "%s:alpha" % warped_mapping[filterbank_node])
    net.link(energy_sync, "%s:feature-score-weight" % rec)
    net.link("%s:target" % energy_sync, rec)

    net.config = rasr.RasrConfig()
    net.config[rec].likelihood_function.file = mixtures
    net.config[rec].likelihood_function.feature_scorer_type = "SIMD-diagonal-maximum"

    return net
Ejemplo n.º 28
0
def samples_with_silence_normalization_flow(
    audio_format="wav", dc_detection=True, dc_params=None, silence_params=None
):
    _dc_params = {
        "min-dc-length": 0.01,
        "max-dc-increment": 0.9,
        "min-non-dc-segment-length": 0.021,
    }
    _silence_params = {
        "absolute-silence-threshold": 250,
        "discard-unsure-segments": True,
        "min-surrounding-silence": 0.1,
        "fill-up-silence": True,
        "silence-ratio": 0.25,
        "silence-threshold": 0.05,
    }
    if dc_params is not None:
        _dc_params.update(dc_params)
    if silence_params is not None:
        _silence_params.update(silence_params)

    net = rasr.FlowNetwork()

    net.add_output("samples")
    net.add_param(["input-file", "start-time", "end-time", "track"])

    samples = net.add_node(
        "audio-input-file-" + audio_format,
        "samples",
        {
            "file": "$(input-file)",
            "start-time": "$(start-time)",
            "end-time": "$(end-time)",
        },
    )

    demultiplex = net.add_node(
        "generic-vector-s16-demultiplex", "demultiplex", track="$(track)"
    )
    net.link(samples, demultiplex)

    convert = net.add_node("generic-convert-vector-s16-to-vector-f32", "convert")
    net.link(demultiplex, convert)

    sil_norm = net.add_node("signal-silence-normalization", "silence-normalization")
    net.link(convert, sil_norm)
    warp_time = net.add_node("warp-time", "warp-time", {"start-time": "$(start-time)"})
    if dc_detection:
        dc_detection = net.add_node("signal-dc-detection", "dc-detection", _dc_params)
        net.link(sil_norm, dc_detection)
        net.link(dc_detection, warp_time)
    else:
        net.link(sil_norm, warp_time)

    net.link(warp_time, "network:samples")

    net.config = rasr.RasrConfig()
    for k, v in _silence_params:
        net.config[sil_norm][k] = v

    return net
Ejemplo n.º 29
0
def plp_flow(
    warping_function="bark",
    num_features=20,
    sampling_rate=8000,
    filter_width=3.8,
    normalize=True,
    normalization_options=None,
    without_samples=False,
    samples_options=None,
    fft_options=None,
):
    if normalization_options is None:
        normalization_options = {}
    if samples_options is None:
        samples_options = {}
    if fft_options is None:
        fft_options = {}

    net = rasr.FlowNetwork()

    if without_samples:
        net.add_input("samples")
    else:
        samples_net = samples_flow(**samples_options)
        samples_mapping = net.add_net(samples_net)

    fft_net = fft_flow(**fft_options)
    fft_mapping = net.add_net(fft_net)

    if without_samples:
        net.interconnect_inputs(fft_net, fft_mapping)
    else:
        net.interconnect(samples_net, samples_mapping, fft_net, fft_mapping)

    power_spectrum = net.add_node(
        "generic-vector-f32-power", "power-spectrum", {"value": 2}
    )
    net.link(
        fft_mapping[fft_net.get_output_links("amplitude-spectrum").pop()],
        power_spectrum,
    )

    f = sampling_rate
    bark = 6 * log((f / 600) + sqrt((f / 600) ** 2 + 1))
    # For IncludeBoundary
    # Number of filters = floor((maximal-frequency - filter-width) / spacing + 1)
    # => spacing = (max-width) / num-1
    spacing = (bark - filter_width) / (num_features - 1)
    filterbank = net.add_node(
        "signal-filterbank",
        "filterbank",
        {
            "warping-function": warping_function,
            "filter-width": filter_width,
            "spacing": spacing,
            "type": "trapeze",
            "boundary": "include-boundary",
        },
    )
    net.link(power_spectrum, filterbank)

    split_filterbank = net.add_node("generic-vector-f32-split", "split-filterbank")
    net.link(filterbank, split_filterbank)

    reverse_split_filterbank = net.add_node(
        "generic-vector-f32-split", "reverse-split-filterbank", {"reverse": "true"}
    )
    net.link(filterbank, reverse_split_filterbank)

    copy_fl_filterbank = net.add_node(
        "generic-vector-f32-concat", "copy-first-last-filterbank"
    )
    net.link(split_filterbank + ":0", copy_fl_filterbank + ":first")
    net.link(filterbank, copy_fl_filterbank + ":middle")
    net.link(reverse_split_filterbank + ":0", copy_fl_filterbank + ":last")

    equal_loudness_preemphasis = net.add_node(
        "signal-vector-f32-continuous-transform",
        "equal-loudness-preemphasis",
        {
            "f": "nest(nest(disc-to-cont, invert(bark)), equal-loudness-preemphasis)",
            "operation": "multiplies",
        },
    )
    net.link(copy_fl_filterbank, equal_loudness_preemphasis)

    intensity_loudness_law = net.add_node(
        "generic-vector-f32-power", "intensity-loudness-law", {"value": "0.33"}
    )
    net.link(equal_loudness_preemphasis, intensity_loudness_law)

    autocorrelation = net.add_node(
        "signal-cosine-transform",
        "autocorrelation",
        {"nr-outputs": num_features, "input-type": "N-plus-one", "normalize": "true"},
    )
    net.link(intensity_loudness_law, autocorrelation)

    autoregression = net.add_node(
        "signal-autocorrelation-to-autoregression", "autoregression"
    )
    net.link(autocorrelation, autoregression)

    linear_cepstrum = net.add_node(
        "signal-autoregression-to-cepstrum",
        "linear-prediction-cepstrum",
        {"nr-outputs": num_features},
    )
    net.link(autoregression, linear_cepstrum)

    if normalize:
        attr = {
            "type": "mean-and-variance",
            "length": "infinity",
            "right": "infinity",
        }
        attr.update(normalization_options)
        normalization = net.add_node(
            "signal-normalization", "feature-normalization", attr
        )
        net.link(linear_cepstrum, normalization)
        net.link(normalization, "network:features")
    else:
        net.link(linear_cepstrum, "network:features")

    return net
Ejemplo n.º 30
0
def gammatone_flow(
    minfreq=100,
    maxfreq=7500,
    channels=68,
    warp_freqbreak=None,
    tempint_type="hanning",
    tempint_shift=0.01,
    tempint_length=0.025,
    flush_before_gap=True,
    do_specint=True,
    specint_type="hanning",
    specint_shift=4,
    specint_length=9,
    normalize=True,
    preemphasis=True,
    legacy_scaling=False,
    without_samples=False,
    samples_options={},
    normalization_options={},
):
    net = rasr.FlowNetwork()

    if without_samples:
        net.add_input("samples")
        sample_input = "network:samples"
    else:
        samples_net = samples_flow(**samples_options)
        samples_mapping = net.add_net(samples_net)
        sample_input = samples_mapping[samples_net.get_output_links(
            "samples").pop()]

    gammatone_args = {
        "minfreq": minfreq,
        "maxfreq": maxfreq,
        "channels": channels
    }
    if warp_freqbreak is not None:
        gammatone_args["warp-freqbreak"] = warp_freqbreak
    gammatone = net.add_node("signal-gammatone", "gammatone", gammatone_args)

    if preemphasis:
        node_preemphasis = net.add_node("signal-preemphasis", "preemphasis",
                                        {"alpha": 1.00})
        net.link(sample_input, node_preemphasis)
        net.link(node_preemphasis, gammatone)
    else:
        net.link(sample_input, gammatone)

    tempint = net.add_node(
        "signal-temporalintegration",
        "temporal-integration",
        {
            "type": tempint_type,
            "shift": tempint_shift,
            "length": tempint_length,
            "flush-before-gap": flush_before_gap,
        },
    )
    net.link(gammatone, tempint)

    if do_specint:
        specint = net.add_node(
            "signal-spectralintegration",
            "spectral-integration",
            {
                "type": specint_type,
                "shift": specint_shift,
                "length": specint_length
            },
        )
        net.link(tempint, specint)
    else:
        specint = None  # this line is here just to silence a PyCharm warning

    convert = net.add_node("generic-convert-vector-vector-f32-to-vector-f32",
                           "typeconvert")
    if do_specint:
        net.link(specint, convert)
    else:
        net.link(tempint, convert)

    scaling = net.add_node("generic-vector-f32-multiplication", "scaling",
                           {"value": 0.00035})
    net.link(convert, scaling)

    nonlinear = net.add_node("generic-vector-f32-power", "nonlinear",
                             {"value": 0.1})
    net.link(scaling, nonlinear)

    cos_transform = net.add_node("signal-cosine-transform", "cos_transform",
                                 {"nr-outputs": channels})
    net.link(nonlinear, cos_transform)

    if normalize:
        attr = {
            "type": "mean-and-variance",
            "length": "infinity",
            "right": "infinity",
        }
        attr.update(normalization_options)
        normalization = net.add_node("signal-normalization",
                                     "gt-normalization", attr)
        net.link(cos_transform, normalization)

        if (legacy_scaling
            ):  # In legacy setups, features were multiplied with a scalar of 3
            post_norm_scaling = net.add_node(
                "generic-vector-f32-multiplication", "post-norm-scaling",
                {"value": 3})
            net.link(normalization, post_norm_scaling)
            net.link(post_norm_scaling, "network:features")
        else:
            net.link(normalization, "network:features")

    else:
        net.link(cos_transform, "network:features")

    return net