def test_gain_pitch_same_size():
    """
    tests that varying the gain and the pitch has no affect on the audio_data size
    """

    tempo = 1.0
    gain_pitch_tuples = [
        (0, 0),  # not augmentation
        (8, 0),  # only gain aug
        (0, 400),  # only pitch
        (-6, -400)
    ]  # both gain and pitch
    audio_path = get_all_test_audio()[0]  # only using a single audio path
    for gain, pitch in gain_pitch_tuples:
        # un-augmented audio_data
        audio_data, samp_rate = array_from_wave(audio_path)
        aug_data, samp_rate = tempo_gain_pitch_perturb(audio_path,
                                                       sample_rate=samp_rate,
                                                       tempo_range=(tempo,
                                                                    tempo),
                                                       gain_range=(gain, gain),
                                                       pitch_range=(pitch,
                                                                    pitch))

        assert audio_data.size == aug_data.size, "data size is not the same"
def test_time_masking(logger: Logger = None):
    """
    Checks that the number of time masks are less than the maximum number allowed. 
    Values of test_tuples are:
    ('time_warping_para', 'frequency_masking_para', 'time_masking_para'
    'frequency_mask_num',  'time_mask_num')
    """
    test_tuples = [
        (0, 0, 60, 0, 1),  # 1 mask with max width of 60
        (0, 0, 30, 0, 2),
        (0, 0, 20, 0, 3)
    ]
    audio_paths = get_all_test_audio()
    number_of_tests = 10  # multiple tests as mask selection is random
    for _ in range(number_of_tests):
        for audio_path in audio_paths:
            for param_tuple in test_tuples:
                audio_data, samp_rate = array_from_wave(audio_path)
                features = log_spectrogram_from_data(audio_data,
                                                     samp_rate,
                                                     window_size=32,
                                                     step_size=16)
                features = torch.from_numpy(features.T)
                aug_features = spec_augment(features, *param_tuple)
                aug_features = to_numpy(aug_features)
                num_mask_rows = count_time_mask(aug_features)

                time_mask_size = param_tuple[2]
                num_time_masks = param_tuple[4]
                max_time_masks = time_mask_size * num_time_masks

                #print(f"number of time masked rows: {num_mask_rows}, max_time_masked: {max_time_masks}")
                assert num_mask_rows <= max_time_masks
def test_datatype():
    test_audio_paths = get_all_test_audio()
    snr_level = 30
    for audio_path in test_audio_paths:
        audio_data, sr = array_from_wave(audio_path)
        augmented_data = synthetic_gaussian_noise_inject(audio_data,
                                                         snr_range=(snr_level,
                                                                    snr_level))
        assert augmented_data.dtype == "int16"
Exemple #4
0
def test_audio_feature_normalize():
    """

    """
    audio_files = get_all_test_audio()

    for audio_file in audio_files:
        feature = log_spectrogram_from_file(audio_file)
        normalized_feature = feature_normalize(feature)
        np.testing.assert_almost_equal(normalized_feature.mean(), 0)
        np.testing.assert_almost_equal(normalized_feature.std(), 1)
def test_apply_spec_augment_call(logger: Logger = None):
    """
    Just tests if the apply_spec_augment can be called without errors
    Arguments:
        logger - Logger: can be taken as input to teset logger
    """
    audio_paths = get_all_test_audio()
    for audio_path in audio_paths:
        audio_data, samp_rate = array_from_wave(audio_path)
        features = log_spectrogram_from_data(audio_data,
                                             samp_rate,
                                             window_size=32,
                                             step_size=16)
        apply_spec_augment(features, logger)
def test_high_snr_value():
    test_audio_paths = get_all_test_audio()
    snr_level = 100
    # absolute tolerance is 1e-5 of the range of values in pcm16 format (2**16)
    atol = 2**16 * 1e-5
    for audio_path in test_audio_paths:
        audio_data, sr = array_from_wave(audio_path)
        augmented_data = synthetic_gaussian_noise_inject(audio_data,
                                                         snr_range=(snr_level,
                                                                    snr_level))
        np.testing.assert_allclose(audio_data,
                                   augmented_data,
                                   rtol=1e-03,
                                   atol=atol)
def test_no_augment():
    """
    tests that input audio and augmented data are identical with no augmentation: tempo=1.0, gain=0
    pitch = 0
    """

    tempo = 1.0
    gain = 0.0
    pitch = 0.0
    audio_paths = get_all_test_audio()
    for audio_path in audio_paths:
        # un-augmented audio_data
        audio_data, samp_rate = array_from_wave(audio_path)
        aug_data, samp_rate = tempo_gain_pitch_perturb(audio_path,
                                                       sample_rate=samp_rate,
                                                       tempo_range=(tempo,
                                                                    tempo),
                                                       gain_range=(gain, gain),
                                                       pitch_range=(pitch,
                                                                    pitch))

        assert all(audio_data == aug_data), "data is not the same"
def test_gain_increase_amplitude():
    """
    tests that 1) 6 dB increase in gain coorespondes to a 1.995 increase in the sum of the absolute
    value of the amplitudes and,
    2) a 6 db decrease cooresponds to a 0.5 decrease in the sum abs value of amplitudes
    Ratio is computed as: ratio = 10**(gain/20)
    """

    tempo = 1.0
    pitch = 0.0
    gain_ratio_tuples = [
        (0, 1.0),  # not augmentation
        (6, 1.995),
        (-6, 0.501)
    ]
    #(10, 3.162),        # these two tests fail.
    #(-10, 0.3162)       # I'm not sure why, likely an error in my approach.
    audio_paths = get_all_test_audio()  # only using a single audio path
    for audio_path in audio_paths:
        print(f"audio_path: {audio_path}")
        for gain, amp_ratio in gain_ratio_tuples:
            # un-augmented audio_data
            audio_data, samp_rate = array_from_wave(audio_path)
            aug_data, samp_rate = tempo_gain_pitch_perturb(
                audio_path,
                sample_rate=samp_rate,
                tempo_range=(tempo, tempo),
                gain_range=(gain, gain),
                pitch_range=(pitch, pitch))
            audio_rms = audioop.rms(audio_data, 2)
            scaled_aug_rms = audioop.rms(aug_data, 2) / amp_ratio
            accuracy = -1  # same up to 10^(-accuracy)
            print(
                f"audio rms: {audio_rms}, scaled_aug rms: {scaled_aug_rms}, ratio:{amp_ratio}, accuracy:{10**(-accuracy)}"
            )
            np.testing.assert_almost_equal(audio_rms,
                                           scaled_aug_rms,
                                           decimal=accuracy)
def test_tempo_augment():
    """
    Verifies the size of the augmented data scaled by the tempo equals the size
    of the un-augmented data
    """

    audio_paths = get_all_test_audio()
    tempos = [0, 0.5, 0.85, 1, 1.15, 2]
    for audio_path in audio_paths:
        # un-augmented audio_data
        audio_data, samp_rate = array_from_wave(audio_path)
        for tempo in tempos:
            aug_data, samp_rate = tempo_gain_pitch_perturb(
                audio_path,
                sample_rate=samp_rate,
                tempo_range=(tempo, tempo),
                gain_range=(0, 0),
                pitch_range=(0, 0))

        print(
            f"audio_data size: {audio_data.size}, aug_data: {aug_data.size}, tempo: {tempo}"
        )
        assert audio_data.size == pytest.approx(aug_data.size * tempo, 1e-1)