Esempio n. 1
0
def test_mask_time():
    c, f, t = 2, 120, 80

    min_size = 5
    max_size = 7

    sg = AudioSpectrogram(torch.rand([c, f, t]))
    val = 10  # Use a value not in the original spectrogram
    gradient_sg = AudioSpectrogram(
        torch.linspace(0, 1, t).view(1, 1, t).repeat([c, f, 1]))
    ones = torch.ones_like(sg)

    # Test patching with mean
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10)]),
            ],
    ):
        mask_with_mean = MaskTimeGPU(min_size=min_size,
                                     max_size=max_size,
                                     mask_val=None)
        # Use a gradient so we can be sure the mean will never show up outside the mask
        inp, out = apply_transform(mask_with_mean, gradient_sg)
        channelwise_mean = inp[..., :10].mean(dim=(-2, -1)).reshape(-1, 1, 1)
        _test_close(
            out[..., :10],
            channelwise_mean * ones[..., :10],
        )
        assert not (out[..., 10:]
                    == channelwise_mean).any(), out == channelwise_mean

    # Test multiple masks (and patching with value)
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10),
                                  [0] * (t - 10) + [1] * 10]),
            ],
    ):
        mask_with_val = MaskTimeGPU(min_size=min_size,
                                    num_masks=2,
                                    max_size=max_size,
                                    mask_val=val)
        inp, out = apply_transform(mask_with_val, sg)
        _test_eq(
            out[..., :10],
            val * ones[..., :10],
        )
        _test_eq(
            out[..., t - 10:],
            val * ones[..., t - 10:],
        )
        matches = out[..., 10:t - 10] == val
        assert not matches.any(), matches
Esempio n. 2
0
def test_resizing_signal():
    "Can use the ResizeSignal Transform"
    audio = test_audio_tensor(seconds=10, sr=1000)
    mcaudio = test_audio_tensor(channels=2)

    for i in [1, 2, 5]:
        inp, out = apply_transform(ResizeSignal(i * 1000), audio)
        _test_eq(out.duration, i)
        _test_eq(out.nsamples, out.duration * inp.sr)

        inp, out = apply_transform(ResizeSignal(i * 1000), mcaudio)
        _test_eq(out.duration, i)
def test_crop_time_after_padding():
    sg_orig = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(sg_orig)
    crop_time = CropTime((sg.duration + 5) * 1000, pad_mode=AudioPadType.Zeros_After)
    inp, out = apply_transform(crop_time, sg.clone())
    _test_ne(sg.duration, sg_orig.duration)
Esempio n. 4
0
def test_resample_rates(audio):
    "Test and hear realistic sample rates"
    for rate in [2000, 4000, 8000, 22050, 44100]:
        resampler = Resample(rate)
        inp, out = apply_transform(resampler, audio)
        assert rate == out.sr
        assert out.nsamples == inp.duration * rate
Esempio n. 5
0
def test_resample_multi_channel(audio):
    audio = test_audio_tensor(channels=3)
    resampler = Resample(8000)
    _, out = apply_transform(resampler, audio)
    _test_eq(out.nsamples, out.duration * 8000)
    _test_eq(out.nchannels, 3)
    _test_eq(out.sr, 8000)
def test_crop_time():
    for i in [1, 2, 5]:
        a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
        audio = test_audio_tensor(seconds=3)
        crop = CropTime(i * 1000)
        inp, out = apply_transform(crop, a2s(audio))
        _test_eq(i, round(out.duration))
        _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_resize_int():
    # Test when size is an int
    size = 224
    resize_int = TfmResize(size)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(resize_int, sg)
    _test_eq(out.shape[1:], torch.Size([size, size]))
Esempio n. 8
0
def test_padding_after_resize(audio):
    "Padding is added to the end  but not the beginning"
    new_duration = (audio.duration + 1) * 1000
    cropsig_pad_after = ResizeSignal(new_duration, pad_mode=AudioPadType.Zeros_After)
    # generate a random input signal that is 3s long
    inp, out = apply_transform(cropsig_pad_after, audio)
    # test end of signal is padded with zeros
    _test_eq(out[:, -10:], torch.zeros_like(out)[:, -10:])
    # test front of signal is not padded with zeros
    _test_ne(out[:, 0:10], out[:, -10:])
def test_crop_time_repeat_padding():
    "Test that repeat padding works when cropping time"
    repeat = 3
    audio = test_audio_tensor()
    crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration,
                                   pad_mode=AudioPadType.Repeat)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(crop_12000ms_repeat, sg)
    _test_eq(inp.width, sg.width)
    _test_ne(sg.width, out.width)
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = Delta()
    audio = test_audio_tensor(channels=1)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[1:], inp.shape[1:])
    _test_ne(out[0], out[1])
Esempio n. 11
0
def test_signal_cutout():
    c, s = 2, 16000
    min_cut_pct, max_cut_pct = 0.10, 0.15
    # Create tensor with no zeros
    audio = AudioTensor(torch.rand([c, s]), sr=16000) * 0.9 + 0.1
    cutout = SignalCutoutGPU(p=1.0, min_cut_pct=min_cut_pct, max_cut_pct=max_cut_pct)
    inp, out = apply_transform(cutout, audio)

    _test_ne(inp.data, out.data)

    num_zeros = (out == 0).sum()
    assert min_cut_pct * s * c <= num_zeros <= max_cut_pct * s * c, num_zeros
def test_mask_freq():
    # create a random time mask and test that it is being correctly applied
    size, start, val = [random.randint(1, 50) for i in range(3)]
    time_mask_test = MaskTime(size=size, start=start, val=val)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(time_mask_test, sg)
    _test_eq(
        out[:, :, start:start + size],
        val * torch.ones_like(inp)[:, :, start:start + size],
    )
Esempio n. 13
0
def test_resize_signal_repeat(audio):
    """
    Test pad_mode repeat by making sure that columns are
    equal at the appropriate offsets
    """
    dur = audio.duration * 1000
    repeat = 3
    cropsig_repeat = ResizeSignal(dur * repeat, pad_mode=AudioPadType.Repeat)
    inp, out = apply_transform(cropsig_repeat, audio)
    for i in range(repeat):
        s = int(i * inp.nsamples)
        e = int(s + inp.nsamples)
        _test_eq(out[:, s:e], inp)
Esempio n. 14
0
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = DeltaGPU()
    # Explicitly check more than one channel
    audio = test_audio_tensor(channels=2)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[-2:], inp.shape[-2:])
    for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]:
        assert not torch.allclose(out[i1], out[i2])
Esempio n. 15
0
def test_cropping():
    "Can use the ResizeSignal Transform"
    audio = test_audio_tensor(seconds=10, sr=1000)

    inp, out1000 = apply_transform(ResizeSignal(1000), audio.clone())
    inp, out2000 = apply_transform(ResizeSignal(2000), audio.clone())
    inp, out5000 = apply_transform(ResizeSignal(5000), audio.clone())

    _test_eq(out1000.duration, 1)
    _test_eq(out2000.duration, 2)
    _test_eq(out5000.duration, 5)

    _test_eq(out1000.nsamples, out1000.duration * inp.sr)
    _test_eq(out2000.nsamples, out2000.duration * inp.sr)
    _test_eq(out5000.nsamples, out5000.duration * inp.sr)

    # Multi Channel Cropping
    inp, mc1000 = apply_transform(ResizeSignal(1000), audio.clone())
    inp, mc2000 = apply_transform(ResizeSignal(2000), audio.clone())
    inp, mc5000 = apply_transform(ResizeSignal(5000), audio.clone())

    _test_eq(mc1000.duration, 1)
    _test_eq(mc2000.duration, 2)
    _test_eq(mc5000.duration, 5)
Esempio n. 16
0
def test_noise_non_white(audio):
    addnoise = AddNoise(color=NoiseColor.Pink)
    inp, out = apply_transform(addnoise, audio)
    _test_ne(inp.data, out.data)
Esempio n. 17
0
def test_resample(audio):
    no_resample_needed = Resample(audio.sr)
    inp, out = apply_transform(no_resample_needed, audio)
    assert inp.sr == out.sr
    _test_eq(inp.data, out.data)
Esempio n. 18
0
def test_silence_removed(audio):
    "Add silence to a signal and test that it gets removed"
    silencer = RemoveSilence(threshold=20, pad_ms=20)
    orig, silenced = apply_transform(silencer, audio)
    assert silenced.nsamples <= orig.nsamples
Esempio n. 19
0
def test_signal_cutout(audio):
    cutout = SignalCutout(1)
    inp, out = apply_transform(cutout, audio)
    _test_ne(inp.data, out.data)
Esempio n. 20
0
def test_signal_loss(audio):
    signalloss = SignalLossGPU(1)
    inp, out = apply_transform(signalloss, audio)
    _test_ne(inp.data, out.data)
Esempio n. 21
0
def test_change_volume(audio):
    changevol = ChangeVolumeGPU(1)
    inp, out = apply_transform(changevol, audio)
    _test_ne(inp.data, out.data)
Esempio n. 22
0
def test_padding_both_side_resize(audio):
    "Make sure they are padding on both sides"
    new_duration = (audio.duration + 1) * 1000
    cropsig_pad_after = ResizeSignal(new_duration)
    inp, out = apply_transform(cropsig_pad_after, audio)
    _test_eq(out[:, 0:2], out[:, -2:])
Esempio n. 23
0
def test_noise_white(audio):
    addnoise = AddNoiseGPU(color=NoiseColor.White, p=1.0, min_level=0.1, max_level=0.2)
    inp, out = apply_transform(addnoise, audio)
    _test_ne(inp.data, out.data)
Esempio n. 24
0
def test_down_mix_mono(audio):
    "Test downmixing 1 channel has no effect"
    downmixer = DownmixMono()
    inp, out = apply_transform(downmixer, audio)
    _test_eq(inp.data, out.data)
Esempio n. 25
0
def test_no_rolling(audio):
    shift_and_roll = SignalShifter(p=1, max_pct=0.5, roll=False)
    inp, out = apply_transform(shift_and_roll, audio)
    _test_eq(inp.data.shape, out.data.shape)
Esempio n. 26
0
def test_shift_max_time(audio):
    shift = SignalShifter(max_time=1)
    inp, out = apply_transform(shift, audio)
    _test_eq(inp.data.shape, out.data.shape)
def test_signal_shift_on_sg():
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    shifter = SignalShifter(1, 1)
    inp, out = apply_transform(shifter, a2s(audio))
    _test_ne(inp, out)
def test_sg_roll():
    roll = SGRoll()
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    inp, out = apply_transform(roll, a2s(audio))
    _test_ne(inp, out)
Esempio n. 29
0
def test_resize_same_duration(audio):
    "Asking to resize to the duration should return the audio back"
    resize = ResizeSignal(audio.duration * 1000)
    inp, out = apply_transform(resize, audio)
    _test_eq(inp, out)
Esempio n. 30
0
def test_noise_non_white(audio):
    # White noise uses a different method to other noises, so test both.
    addnoise = AddNoiseGPU(color=NoiseColor.Pink, p=1.0, min_level=0.1, max_level=0.2)
    inp, out = apply_transform(addnoise, audio)
    _test_ne(inp.data, out.data)