def test_gain_pitch_same_size(): """ tests that varying the gain and the pitch has no affect on the audio_data size """ tempo = 1.0 gain_pitch_tuples = [ (0, 0), # not augmentation (8, 0), # only gain aug (0, 400), # only pitch (-6, -400) ] # both gain and pitch audio_path = get_all_test_audio()[0] # only using a single audio path for gain, pitch in gain_pitch_tuples: # un-augmented audio_data audio_data, samp_rate = array_from_wave(audio_path) aug_data, samp_rate = tempo_gain_pitch_perturb(audio_path, sample_rate=samp_rate, tempo_range=(tempo, tempo), gain_range=(gain, gain), pitch_range=(pitch, pitch)) assert audio_data.size == aug_data.size, "data size is not the same"
def test_time_masking(logger: Logger = None): """ Checks that the number of time masks are less than the maximum number allowed. Values of test_tuples are: ('time_warping_para', 'frequency_masking_para', 'time_masking_para' 'frequency_mask_num', 'time_mask_num') """ test_tuples = [ (0, 0, 60, 0, 1), # 1 mask with max width of 60 (0, 0, 30, 0, 2), (0, 0, 20, 0, 3) ] audio_paths = get_all_test_audio() number_of_tests = 10 # multiple tests as mask selection is random for _ in range(number_of_tests): for audio_path in audio_paths: for param_tuple in test_tuples: audio_data, samp_rate = array_from_wave(audio_path) features = log_spectrogram_from_data(audio_data, samp_rate, window_size=32, step_size=16) features = torch.from_numpy(features.T) aug_features = spec_augment(features, *param_tuple) aug_features = to_numpy(aug_features) num_mask_rows = count_time_mask(aug_features) time_mask_size = param_tuple[2] num_time_masks = param_tuple[4] max_time_masks = time_mask_size * num_time_masks #print(f"number of time masked rows: {num_mask_rows}, max_time_masked: {max_time_masks}") assert num_mask_rows <= max_time_masks
def test_datatype(): test_audio_paths = get_all_test_audio() snr_level = 30 for audio_path in test_audio_paths: audio_data, sr = array_from_wave(audio_path) augmented_data = synthetic_gaussian_noise_inject(audio_data, snr_range=(snr_level, snr_level)) assert augmented_data.dtype == "int16"
def test_audio_feature_normalize(): """ """ audio_files = get_all_test_audio() for audio_file in audio_files: feature = log_spectrogram_from_file(audio_file) normalized_feature = feature_normalize(feature) np.testing.assert_almost_equal(normalized_feature.mean(), 0) np.testing.assert_almost_equal(normalized_feature.std(), 1)
def test_apply_spec_augment_call(logger: Logger = None): """ Just tests if the apply_spec_augment can be called without errors Arguments: logger - Logger: can be taken as input to teset logger """ audio_paths = get_all_test_audio() for audio_path in audio_paths: audio_data, samp_rate = array_from_wave(audio_path) features = log_spectrogram_from_data(audio_data, samp_rate, window_size=32, step_size=16) apply_spec_augment(features, logger)
def test_high_snr_value(): test_audio_paths = get_all_test_audio() snr_level = 100 # absolute tolerance is 1e-5 of the range of values in pcm16 format (2**16) atol = 2**16 * 1e-5 for audio_path in test_audio_paths: audio_data, sr = array_from_wave(audio_path) augmented_data = synthetic_gaussian_noise_inject(audio_data, snr_range=(snr_level, snr_level)) np.testing.assert_allclose(audio_data, augmented_data, rtol=1e-03, atol=atol)
def test_no_augment(): """ tests that input audio and augmented data are identical with no augmentation: tempo=1.0, gain=0 pitch = 0 """ tempo = 1.0 gain = 0.0 pitch = 0.0 audio_paths = get_all_test_audio() for audio_path in audio_paths: # un-augmented audio_data audio_data, samp_rate = array_from_wave(audio_path) aug_data, samp_rate = tempo_gain_pitch_perturb(audio_path, sample_rate=samp_rate, tempo_range=(tempo, tempo), gain_range=(gain, gain), pitch_range=(pitch, pitch)) assert all(audio_data == aug_data), "data is not the same"
def test_gain_increase_amplitude(): """ tests that 1) 6 dB increase in gain coorespondes to a 1.995 increase in the sum of the absolute value of the amplitudes and, 2) a 6 db decrease cooresponds to a 0.5 decrease in the sum abs value of amplitudes Ratio is computed as: ratio = 10**(gain/20) """ tempo = 1.0 pitch = 0.0 gain_ratio_tuples = [ (0, 1.0), # not augmentation (6, 1.995), (-6, 0.501) ] #(10, 3.162), # these two tests fail. #(-10, 0.3162) # I'm not sure why, likely an error in my approach. audio_paths = get_all_test_audio() # only using a single audio path for audio_path in audio_paths: print(f"audio_path: {audio_path}") for gain, amp_ratio in gain_ratio_tuples: # un-augmented audio_data audio_data, samp_rate = array_from_wave(audio_path) aug_data, samp_rate = tempo_gain_pitch_perturb( audio_path, sample_rate=samp_rate, tempo_range=(tempo, tempo), gain_range=(gain, gain), pitch_range=(pitch, pitch)) audio_rms = audioop.rms(audio_data, 2) scaled_aug_rms = audioop.rms(aug_data, 2) / amp_ratio accuracy = -1 # same up to 10^(-accuracy) print( f"audio rms: {audio_rms}, scaled_aug rms: {scaled_aug_rms}, ratio:{amp_ratio}, accuracy:{10**(-accuracy)}" ) np.testing.assert_almost_equal(audio_rms, scaled_aug_rms, decimal=accuracy)
def test_tempo_augment(): """ Verifies the size of the augmented data scaled by the tempo equals the size of the un-augmented data """ audio_paths = get_all_test_audio() tempos = [0, 0.5, 0.85, 1, 1.15, 2] for audio_path in audio_paths: # un-augmented audio_data audio_data, samp_rate = array_from_wave(audio_path) for tempo in tempos: aug_data, samp_rate = tempo_gain_pitch_perturb( audio_path, sample_rate=samp_rate, tempo_range=(tempo, tempo), gain_range=(0, 0), pitch_range=(0, 0)) print( f"audio_data size: {audio_data.size}, aug_data: {aug_data.size}, tempo: {tempo}" ) assert audio_data.size == pytest.approx(aug_data.size * tempo, 1e-1)