def test_crop_time_after_padding():
    sg_orig = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(sg_orig)
    crop_time = CropTime((sg.duration + 5) * 1000, pad_mode=AudioPadType.Zeros_After)
    inp, out = apply_transform(crop_time, sg.clone())
    _test_ne(sg.duration, sg_orig.duration)
def test_crop_time():
    for i in [1, 2, 5]:
        a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
        audio = test_audio_tensor(seconds=3)
        crop = CropTime(i * 1000)
        inp, out = apply_transform(crop, a2s(audio))
        _test_eq(i, round(out.duration))
        _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
Beispiel #3
0
def test_basic_pipeline():
    cfg = {"mel": False, "to_db": False, "hop_length": 128, "n_fft": 400}
    test_audio_tensor().save("./test.wav")
    f = "./test.wav"
    oa = OpenAudio([f])
    a2s = AudioToSpec.from_cfg(cfg)
    db_mel_pipe = Pipeline([oa, a2s])
    assert db_mel_pipe(0).hop_length == cfg["hop_length"]
def test_resize_int():
    # Test when size is an int
    size = 224
    resize_int = TfmResize(size)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(resize_int, sg)
    _test_eq(out.shape[1:], torch.Size([size, size]))
Beispiel #5
0
def test_basic_pipeline():
    cfg = {"mel": False, "to_db": False, "hop_length": 128, "n_fft": 400}

    p = untar_data(URLs.SAMPLE_SPEAKERS10)
    f = p / "train/f0001_us_f0001_00001.wav"

    oa = OpenAudio([f])
    a2s = AudioToSpec.from_cfg(cfg)
    db_mel_pipe = Pipeline([oa, a2s])

    assert db_mel_pipe(0).hop_length == cfg["hop_length"]
def test_crop_time_repeat_padding():
    "Test that repeat padding works when cropping time"
    repeat = 3
    audio = test_audio_tensor()
    crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration,
                                   pad_mode=AudioPadType.Repeat)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(crop_12000ms_repeat, sg)
    _test_eq(inp.width, sg.width)
    _test_ne(sg.width, out.width)
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = Delta()
    audio = test_audio_tensor(channels=1)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[1:], inp.shape[1:])
    _test_ne(out[0], out[1])
Beispiel #8
0
def test_load_audio_with_basic_config():
    """
    Grab a random file, test that the n_fft are passed successfully
    via config and stored in sg settings
    """
    sg_cfg = AudioConfig.BasicSpectrogram(n_fft=2000, hop_length=155)
    a2sg = AudioToSpec.from_cfg(sg_cfg)
    audio = test_audio_tensor()
    sg = a2sg(audio)
    assert sg.n_fft == sg_cfg.n_fft
    assert sg.width == int(audio.nsamples / sg_cfg.hop_length) + 1
def test_mask_freq():
    # create a random time mask and test that it is being correctly applied
    size, start, val = [random.randint(1, 50) for i in range(3)]
    time_mask_test = MaskTime(size=size, start=start, val=val)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(time_mask_test, sg)
    _test_eq(
        out[:, :, start:start + size],
        val * torch.ones_like(inp)[:, :, start:start + size],
    )
Beispiel #10
0
def test_load_audio_with_basic_config():
    """
    Grab a random file, test that the n_fft are passed successfully
    via config and stored in sg settings
    """
    p = untar_data(URLs.SAMPLE_SPEAKERS10)
    f = p / "train/f0001_us_f0001_00001.wav"
    oa = OpenAudio([f])
    sg_cfg = AudioConfig.BasicSpectrogram(n_fft=2000, hop_length=155)
    a2sg = AudioToSpec.from_cfg(sg_cfg)
    sg = a2sg(oa(0))
    assert sg.n_fft == sg_cfg.n_fft
    assert sg.width == int(oa(0).nsamples / sg_cfg.hop_length) + 1
Beispiel #11
0
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = DeltaGPU()
    # Explicitly check more than one channel
    audio = test_audio_tensor(channels=2)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[-2:], inp.shape[-2:])
    for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]:
        assert not torch.allclose(out[i1], out[i2])
def test_sg_roll():
    roll = SGRoll()
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    inp, out = apply_transform(roll, a2s(audio))
    _test_ne(inp, out)
def test_signal_shift_on_sg():
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    shifter = SignalShifter(1, 1)
    inp, out = apply_transform(shifter, a2s(audio))
    _test_ne(inp, out)