Ejemplo n.º 1
0
def test_item_noise_not_applied_in_valid(audio):
    add_noise = AddNoise(p=1.0)
    test_aud = AudioTensor(torch.ones_like(audio), 16000)
    train_out = add_noise(test_aud.clone(), split_idx=0)
    val_out = add_noise(test_aud.clone(), split_idx=1)
    _test_ne(test_aud, train_out)
    _test_eq(test_aud, val_out)
def test_crop_time():
    for i in [1, 2, 5]:
        a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
        audio = test_audio_tensor(seconds=3)
        crop = CropTime(i * 1000)
        inp, out = apply_transform(crop, a2s(audio))
        _test_eq(i, round(out.duration))
        _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_resize_int():
    # Test when size is an int
    size = 224
    resize_int = TfmResize(size)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(resize_int, sg)
    _test_eq(out.shape[1:], torch.Size([size, size]))
Ejemplo n.º 4
0
def test_padding_after_resize(audio):
    "Padding is added to the end  but not the beginning"
    new_duration = (audio.duration + 1) * 1000
    cropsig_pad_after = ResizeSignal(new_duration, pad_mode=AudioPadType.Zeros_After)
    # generate a random input signal that is 3s long
    inp, out = apply_transform(cropsig_pad_after, audio)
    # test end of signal is padded with zeros
    _test_eq(out[:, -10:], torch.zeros_like(out)[:, -10:])
    # test front of signal is not padded with zeros
    _test_ne(out[:, 0:10], out[:, -10:])
Ejemplo n.º 5
0
def test_mask_time():
    c, f, t = 2, 120, 80

    min_size = 5
    max_size = 7

    sg = AudioSpectrogram(torch.rand([c, f, t]))
    val = 10  # Use a value not in the original spectrogram
    gradient_sg = AudioSpectrogram(
        torch.linspace(0, 1, t).view(1, 1, t).repeat([c, f, 1]))
    ones = torch.ones_like(sg)

    # Test patching with mean
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10)]),
            ],
    ):
        mask_with_mean = MaskTimeGPU(min_size=min_size,
                                     max_size=max_size,
                                     mask_val=None)
        # Use a gradient so we can be sure the mean will never show up outside the mask
        inp, out = apply_transform(mask_with_mean, gradient_sg)
        channelwise_mean = inp[..., :10].mean(dim=(-2, -1)).reshape(-1, 1, 1)
        _test_close(
            out[..., :10],
            channelwise_mean * ones[..., :10],
        )
        assert not (out[..., 10:]
                    == channelwise_mean).any(), out == channelwise_mean

    # Test multiple masks (and patching with value)
    with patch(
            "fastaudio.augment.functional.region_mask",
            side_effect=[
                torch.BoolTensor([[1] * 10 + [0] * (t - 10),
                                  [0] * (t - 10) + [1] * 10]),
            ],
    ):
        mask_with_val = MaskTimeGPU(min_size=min_size,
                                    num_masks=2,
                                    max_size=max_size,
                                    mask_val=val)
        inp, out = apply_transform(mask_with_val, sg)
        _test_eq(
            out[..., :10],
            val * ones[..., :10],
        )
        _test_eq(
            out[..., t - 10:],
            val * ones[..., t - 10:],
        )
        matches = out[..., 10:t - 10] == val
        assert not matches.any(), matches
def test_crop_time_repeat_padding():
    "Test that repeat padding works when cropping time"
    repeat = 3
    audio = test_audio_tensor()
    crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration,
                                   pad_mode=AudioPadType.Repeat)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(crop_12000ms_repeat, sg)
    _test_eq(inp.width, sg.width)
    _test_ne(sg.width, out.width)
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = Delta()
    audio = test_audio_tensor(channels=1)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[1:], inp.shape[1:])
    _test_ne(out[0], out[1])
def test_mask_freq():
    # create a random time mask and test that it is being correctly applied
    size, start, val = [random.randint(1, 50) for i in range(3)]
    time_mask_test = MaskTime(size=size, start=start, val=val)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(time_mask_test, sg)
    _test_eq(
        out[:, :, start:start + size],
        val * torch.ones_like(inp)[:, :, start:start + size],
    )
Ejemplo n.º 9
0
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = DeltaGPU()
    # Explicitly check more than one channel
    audio = test_audio_tensor(channels=2)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[-2:], inp.shape[-2:])
    for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]:
        assert not torch.allclose(out[i1], out[i2])
Ejemplo n.º 10
0
 def test_max(self):
     # Test max size
     with patch(
             "torch.rand",
             side_effect=[
                 torch.Tensor([[[[1.0]]]]),
                 torch.Tensor([[[[0.0]]]]),
             ],
     ):
         _test_eq(
             region_mask(1, 4, 6, 10),
             torch.BoolTensor([[[[1] * 6 + [0] * 4]]]),
         )
Ejemplo n.º 11
0
def test_resize_signal_repeat(audio):
    """
    Test pad_mode repeat by making sure that columns are
    equal at the appropriate offsets
    """
    dur = audio.duration * 1000
    repeat = 3
    cropsig_repeat = ResizeSignal(dur * repeat, pad_mode=AudioPadType.Repeat)
    inp, out = apply_transform(cropsig_repeat, audio)
    for i in range(repeat):
        s = int(i * inp.nsamples)
        e = int(s + inp.nsamples)
        _test_eq(out[:, s:e], inp)
Ejemplo n.º 12
0
 def test_multiple_masks(self):
     # Test multiple masks
     with patch(
             "torch.rand",
             side_effect=[
                 torch.Tensor([[1.0], [0.0]]),
                 torch.Tensor([[0.0], [0.5]]),
             ],
     ):
         _test_eq(
             region_mask(2, 4, 6, 10),
             torch.BoolTensor([[1] * 6 + [0] * 4,
                               [0] * 3 + [1] * 4 + [0] * 3]),
         )
Ejemplo n.º 13
0
 def test_min(self):
     # Test min size
     with patch(
             "torch.rand",
             side_effect=[
                 torch.Tensor([0.0]),
                 # Test start middle start here too
                 torch.Tensor([0.5]),
             ],
     ):
         _test_eq(
             region_mask(1, 4, 6, 10),
             torch.BoolTensor([0] * 3 + [1] * 4 + [0] * 3),
         )
def test_crop_time_with_pipeline(ex_files):
    """
    AudioToSpec->CropTime and ResizeSignal->AudioToSpec
    will result in same size images
    """
    oa = OpenAudio(ex_files)
    crop_dur = random.randint(1000, 5000)
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    pipe_cropsig = Pipeline(
        [oa, DBMelSpec(hop_length=128),
         CropTime(crop_dur)])
    pipe_cropspec = Pipeline([
        oa,
        ResizeSignal(crop_dur),
        DBMelSpec(hop_length=128),
    ])
    for i in range(4):
        _test_eq(pipe_cropsig(i).width, pipe_cropspec(i).width)
Ejemplo n.º 15
0
def test_resample_multi_channel(audio):
    audio = test_audio_tensor(channels=3)
    resampler = Resample(8000)
    _, out = apply_transform(resampler, audio)
    _test_eq(out.nsamples, out.duration * 8000)
    _test_eq(out.nchannels, 3)
    _test_eq(out.sr, 8000)
Ejemplo n.º 16
0
def test_resizing_signal():
    "Can use the ResizeSignal Transform"
    audio = test_audio_tensor(seconds=10, sr=1000)
    mcaudio = test_audio_tensor(channels=2)

    for i in [1, 2, 5]:
        inp, out = apply_transform(ResizeSignal(i * 1000), audio)
        _test_eq(out.duration, i)
        _test_eq(out.nsamples, out.duration * inp.sr)

        inp, out = apply_transform(ResizeSignal(i * 1000), mcaudio)
        _test_eq(out.duration, i)
Ejemplo n.º 17
0
def test_cropping():
    "Can use the ResizeSignal Transform"
    audio = test_audio_tensor(seconds=10, sr=1000)

    for i in [1, 2, 5]:
        inp, out = apply_transform(ResizeSignal(i * 1000), audio.clone())

        _test_eq(out.duration, i)
        _test_eq(out.nsamples, out.duration * inp.sr)

        # Multi Channel Cropping
        inp, mc = apply_transform(ResizeSignal(i * 1000), audio.clone())
        _test_eq(mc.duration, i)
Ejemplo n.º 18
0
def test_shift():
    t1 = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
    t3 = torch.tensor(
        [
            [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            [11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
            [21, 22, 23, 24, 25, 26, 27, 28, 29, 30],
        ]
    )
    b4 = torch.stack([t3, t3, t3, t3])

    _test_eq(b4.shape, torch.Size([4, 3, 10]))
    _test_eq(_shift(t1, 4), torch.tensor([[0, 0, 0, 0, 1, 2, 3, 4, 5, 6]]))
    _test_eq(
        _shift(t3, -2),
        torch.tensor(
            [
                [3, 4, 5, 6, 7, 8, 9, 10, 0, 0],
                [13, 14, 15, 16, 17, 18, 19, 20, 0, 0],
                [23, 24, 25, 26, 27, 28, 29, 30, 0, 0],
            ]
        ),
    )
Ejemplo n.º 19
0
 def test_shape(self):
     _test_eq(region_mask(1, 5, 7, 10).shape, (1, 10))
     _test_eq(region_mask(2, 3, 7, 12).shape, (2, 12))
     _test_eq(region_mask(4, 0, 3, 3).shape, (4, 3))
Ejemplo n.º 20
0
def test_resample(audio):
    no_resample_needed = Resample(audio.sr)
    inp, out = apply_transform(no_resample_needed, audio)
    assert inp.sr == out.sr
    _test_eq(inp.data, out.data)
Ejemplo n.º 21
0
def test_cropping():
    "Can use the ResizeSignal Transform"
    audio = test_audio_tensor(seconds=10, sr=1000)

    inp, out1000 = apply_transform(ResizeSignal(1000), audio.clone())
    inp, out2000 = apply_transform(ResizeSignal(2000), audio.clone())
    inp, out5000 = apply_transform(ResizeSignal(5000), audio.clone())

    _test_eq(out1000.duration, 1)
    _test_eq(out2000.duration, 2)
    _test_eq(out5000.duration, 5)

    _test_eq(out1000.nsamples, out1000.duration * inp.sr)
    _test_eq(out2000.nsamples, out2000.duration * inp.sr)
    _test_eq(out5000.nsamples, out5000.duration * inp.sr)

    # Multi Channel Cropping
    inp, mc1000 = apply_transform(ResizeSignal(1000), audio.clone())
    inp, mc2000 = apply_transform(ResizeSignal(2000), audio.clone())
    inp, mc5000 = apply_transform(ResizeSignal(5000), audio.clone())

    _test_eq(mc1000.duration, 1)
    _test_eq(mc2000.duration, 2)
    _test_eq(mc5000.duration, 5)
Ejemplo n.º 22
0
def test_down_mix_mono(audio):
    "Test downmixing 1 channel has no effect"
    downmixer = DownmixMono()
    inp, out = apply_transform(downmixer, audio)
    _test_eq(inp.data, out.data)
Ejemplo n.º 23
0
def test_no_rolling(audio):
    shift_and_roll = SignalShifter(p=1, max_pct=0.5, roll=False)
    inp, out = apply_transform(shift_and_roll, audio)
    _test_eq(inp.data.shape, out.data.shape)
Ejemplo n.º 24
0
def test_shift_max_time(audio):
    shift = SignalShifter(max_time=1)
    inp, out = apply_transform(shift, audio)
    _test_eq(inp.data.shape, out.data.shape)
Ejemplo n.º 25
0
def test_shift_with_zero():
    _test_eq(_shift(torch.arange(1, 10), 0), torch.arange(1, 10))
Ejemplo n.º 26
0
def test_padding_both_side_resize(audio):
    "Make sure they are padding on both sides"
    new_duration = (audio.duration + 1) * 1000
    cropsig_pad_after = ResizeSignal(new_duration)
    inp, out = apply_transform(cropsig_pad_after, audio)
    _test_eq(out[:, 0:2], out[:, -2:])
Ejemplo n.º 27
0
def test_resize_same_duration(audio):
    "Asking to resize to the duration should return the audio back"
    resize = ResizeSignal(audio.duration * 1000)
    inp, out = apply_transform(resize, audio)
    _test_eq(inp, out)