def tensors_to_rgb_images(inputs, colors, size_multiplier=1): """ Map all values of 'inputs' between [0, 1] and then into RGB color indices. Gather colors from 'colors' using the indices. Based on https://gist.github.com/jimfleming/c1adfdb0f526465c99409cc143dea97b """ tf.debugging.assert_rank(inputs, 3, message="tensors_to_rgb_images expects batches of 2 dimensional tensors with shape [batch, cols, rows].") tf.debugging.assert_rank(colors, 2, message="tensors_to_rgb_images expects a colormap of shape [color, component].") tf.debugging.assert_equal(tf.shape(colors)[1], 3, message="tensors_to_rgb_images expects an RGB colormap.") # Scale features between 0 and 1 to produce a grayscale image inputs = features.feature_scaling(inputs, tf.constant(0.0), tf.constant(1.0)) # Map linear colormap over all grayscale values [0, 1] to produce an RGB image indices = tf.cast(tf.math.round(inputs * tf.cast(tf.shape(colors)[0] - 1, tf.float32)), tf.int32) tf.debugging.assert_non_negative(indices, message="Negative color indices") images = tf.gather(colors, indices, axis=0, batch_dims=0) tf.debugging.assert_rank(images, 4, message="Gathering colors failed, output images do not have a channel dimension. Make sure the inputs have a known shape.") # Here it is assumed the output images are going to Tensorboard images = tf.image.transpose(images) images = tf.image.flip_up_down(images) # Rows and columns from shape old_size = tf.cast(tf.shape(images)[1:3], tf.float32) new_size = tf.cast(size_multiplier * old_size, tf.int32) images = tf.image.resize(images, new_size) tf.debugging.assert_all_finite(images, message="Tensor conversion to RGB images failed, non-finite values in output") return images
def extract_features(signals, sample_rates, feattype, spec_kwargs, melspec_kwargs, mfcc_kwargs, db_spec_kwargs, feat_scale_kwargs, window_norm_kwargs): tf.debugging.assert_rank(signals, 2, message="Input signals for feature extraction must be batches of mono signals without channels, i.e. of shape [B, N] where B is batch size and N number of samples.") tf.debugging.assert_equal(sample_rates, [sample_rates[0]], message="Different sample rates in a single batch not supported, all signals in the same batch should have the same sample rate.") #TODO batches with different sample rates (probably not worth the effort) sample_rate = sample_rates[0] X = audio_features.spectrograms(signals, sample_rate, **spec_kwargs) tf.debugging.assert_all_finite(X, "spectrogram failed") if feattype in ("melspectrogram", "logmelspectrogram", "mfcc"): X = audio_features.melspectrograms(X, sample_rate=sample_rate, **melspec_kwargs) tf.debugging.assert_all_finite(X, "melspectrogram failed") if feattype in ("logmelspectrogram", "mfcc"): X = tf.math.log(X + 1e-6) tf.debugging.assert_all_finite(X, "logmelspectrogram failed") if feattype == "mfcc": coef_begin = mfcc_kwargs.get("coef_begin", 1) coef_end = mfcc_kwargs.get("coef_end", 13) mfccs = tf.signal.mfccs_from_log_mel_spectrograms(X) X = mfccs[..., coef_begin:coef_end] tf.debugging.assert_all_finite(X, "mfcc failed") elif feattype in ("db_spectrogram",): X = audio_features.power_to_db(X, **db_spec_kwargs) tf.debugging.assert_all_finite(X, "db_spectrogram failed") if feat_scale_kwargs: X = features.feature_scaling(X, **feat_scale_kwargs) tf.debugging.assert_all_finite(X, "feature scaling failed") if window_norm_kwargs: X = features.window_normalization(X, **window_norm_kwargs) tf.debugging.assert_all_finite(X, "window normalization failed") return X
def test_feature_scaling(self): for rank in range(1, 5): for _ in range(300): delta = np.random.uniform(1, 1e3) min = np.random.uniform(-delta, delta) max = min + np.random.uniform(0, delta / 2) x = np.random.normal(0, delta**2, size=np.random.randint(2, 20, size=rank)) for axis in [None] + list(range(rank)): y = features.feature_scaling(x, min, max, axis=axis).numpy() assert not np.isnan(y).any() assert y.shape == x.shape assert np.abs(y.min(axis=axis) - min).max() < 1e-9 assert np.abs(y.max(axis=axis) - max).max() < 1e-9