예제 #1
0
def tensors_to_rgb_images(inputs, colors, size_multiplier=1):
    """
    Map all values of 'inputs' between [0, 1] and then into RGB color indices.
    Gather colors from 'colors' using the indices.
    Based on https://gist.github.com/jimfleming/c1adfdb0f526465c99409cc143dea97b
    """
    tf.debugging.assert_rank(inputs, 3, message="tensors_to_rgb_images expects batches of 2 dimensional tensors with shape [batch, cols, rows].")
    tf.debugging.assert_rank(colors, 2, message="tensors_to_rgb_images expects a colormap of shape [color, component].")
    tf.debugging.assert_equal(tf.shape(colors)[1], 3, message="tensors_to_rgb_images expects an RGB colormap.")
    # Scale features between 0 and 1 to produce a grayscale image
    inputs = features.feature_scaling(inputs, tf.constant(0.0), tf.constant(1.0))
    # Map linear colormap over all grayscale values [0, 1] to produce an RGB image
    indices = tf.cast(tf.math.round(inputs * tf.cast(tf.shape(colors)[0] - 1, tf.float32)), tf.int32)
    tf.debugging.assert_non_negative(indices, message="Negative color indices")
    images = tf.gather(colors, indices, axis=0, batch_dims=0)
    tf.debugging.assert_rank(images, 4, message="Gathering colors failed, output images do not have a channel dimension. Make sure the inputs have a known shape.")
    # Here it is assumed the output images are going to Tensorboard
    images = tf.image.transpose(images)
    images = tf.image.flip_up_down(images)
    # Rows and columns from shape
    old_size = tf.cast(tf.shape(images)[1:3], tf.float32)
    new_size = tf.cast(size_multiplier * old_size, tf.int32)
    images = tf.image.resize(images, new_size)
    tf.debugging.assert_all_finite(images, message="Tensor conversion to RGB images failed, non-finite values in output")
    return images
예제 #2
0
def extract_features(signals, sample_rates, feattype, spec_kwargs, melspec_kwargs, mfcc_kwargs, db_spec_kwargs, feat_scale_kwargs, window_norm_kwargs):
    tf.debugging.assert_rank(signals, 2, message="Input signals for feature extraction must be batches of mono signals without channels, i.e. of shape [B, N] where B is batch size and N number of samples.")
    tf.debugging.assert_equal(sample_rates, [sample_rates[0]], message="Different sample rates in a single batch not supported, all signals in the same batch should have the same sample rate.")
    #TODO batches with different sample rates (probably not worth the effort)
    sample_rate = sample_rates[0]
    X = audio_features.spectrograms(signals, sample_rate, **spec_kwargs)
    tf.debugging.assert_all_finite(X, "spectrogram failed")
    if feattype in ("melspectrogram", "logmelspectrogram", "mfcc"):
        X = audio_features.melspectrograms(X, sample_rate=sample_rate, **melspec_kwargs)
        tf.debugging.assert_all_finite(X, "melspectrogram failed")
        if feattype in ("logmelspectrogram", "mfcc"):
            X = tf.math.log(X + 1e-6)
            tf.debugging.assert_all_finite(X, "logmelspectrogram failed")
            if feattype == "mfcc":
                coef_begin = mfcc_kwargs.get("coef_begin", 1)
                coef_end = mfcc_kwargs.get("coef_end", 13)
                mfccs = tf.signal.mfccs_from_log_mel_spectrograms(X)
                X = mfccs[..., coef_begin:coef_end]
                tf.debugging.assert_all_finite(X, "mfcc failed")
    elif feattype in ("db_spectrogram",):
        X = audio_features.power_to_db(X, **db_spec_kwargs)
        tf.debugging.assert_all_finite(X, "db_spectrogram failed")
    if feat_scale_kwargs:
        X = features.feature_scaling(X, **feat_scale_kwargs)
        tf.debugging.assert_all_finite(X, "feature scaling failed")
    if window_norm_kwargs:
        X = features.window_normalization(X, **window_norm_kwargs)
        tf.debugging.assert_all_finite(X, "window normalization failed")
    return X
예제 #3
0
 def test_feature_scaling(self):
     for rank in range(1, 5):
         for _ in range(300):
             delta = np.random.uniform(1, 1e3)
             min = np.random.uniform(-delta, delta)
             max = min + np.random.uniform(0, delta / 2)
             x = np.random.normal(0,
                                  delta**2,
                                  size=np.random.randint(2, 20, size=rank))
             for axis in [None] + list(range(rank)):
                 y = features.feature_scaling(x, min, max,
                                              axis=axis).numpy()
                 assert not np.isnan(y).any()
                 assert y.shape == x.shape
                 assert np.abs(y.min(axis=axis) - min).max() < 1e-9
                 assert np.abs(y.max(axis=axis) - max).max() < 1e-9