예제 #1
0
def test_basic_pipeline():
    cfg = {"mel": False, "to_db": False, "hop_length": 128, "n_fft": 400}
    test_audio_tensor().save("./test.wav")
    f = "./test.wav"
    oa = OpenAudio([f])
    a2s = AudioToSpec.from_cfg(cfg)
    db_mel_pipe = Pipeline([oa, a2s])
    assert db_mel_pipe(0).hop_length == cfg["hop_length"]
def test_crop_time_after_padding():
    sg_orig = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(sg_orig)
    crop_time = CropTime((sg.duration + 5) * 1000, pad_mode=AudioPadType.Zeros_After)
    inp, out = apply_transform(crop_time, sg.clone())
    _test_ne(sg.duration, sg_orig.duration)
def test_crop_time():
    for i in [1, 2, 5]:
        a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
        audio = test_audio_tensor(seconds=3)
        crop = CropTime(i * 1000)
        inp, out = apply_transform(crop, a2s(audio))
        _test_eq(i, round(out.duration))
        _test_close(out.width, int((i / inp.duration) * inp.width), eps=1.01)
def test_resize_int():
    # Test when size is an int
    size = 224
    resize_int = TfmResize(size)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(resize_int, sg)
    _test_eq(out.shape[1:], torch.Size([size, size]))
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = Delta()
    audio = test_audio_tensor(channels=1)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[1:], inp.shape[1:])
    _test_ne(out[0], out[1])
예제 #6
0
def test_load_audio_with_basic_config():
    """
    Grab a random file, test that the n_fft are passed successfully
    via config and stored in sg settings
    """
    sg_cfg = AudioConfig.BasicSpectrogram(n_fft=2000, hop_length=155)
    a2sg = AudioToSpec.from_cfg(sg_cfg)
    audio = test_audio_tensor()
    sg = a2sg(audio)
    assert sg.n_fft == sg_cfg.n_fft
    assert sg.width == int(audio.nsamples / sg_cfg.hop_length) + 1
def test_crop_time_repeat_padding():
    "Test that repeat padding works when cropping time"
    repeat = 3
    audio = test_audio_tensor()
    crop_12000ms_repeat = CropTime(repeat * 1000 * audio.duration,
                                   pad_mode=AudioPadType.Repeat)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(crop_12000ms_repeat, sg)
    _test_eq(inp.width, sg.width)
    _test_ne(sg.width, out.width)
예제 #8
0
def test_saved_audiotensor_keeps_metadata():
    # This test is related to this issue: https://github.com/fastaudio/fastaudio/issues/95
    # What happens is that multiprocessing uses pickling to distribute the data
    # and the way it was done inside fastai breaks when loading the metadata
    audio_tensor = test_audio_tensor()

    with TemporaryFile("wb+") as f:
        torch.save(audio_tensor, f)
        f.seek(0, 0)  # Go back to the begining of the file to read
        new_audio_tensor = torch.load(f)
        assert new_audio_tensor.sr == audio_tensor.sr
예제 #9
0
def test_saved_spectrogram_keeps_metadata():
    # Same issue as the test above
    item0 = test_audio_tensor()
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    a2s = DBMelSpec(f_max=20000, n_mels=137)
    sg = a2s(item0)

    with TemporaryFile("wb+") as f:
        torch.save(sg, f)
        f.seek(0, 0)  # Go back to the begining of the file to read
        new_sg = torch.load(f)
        assert new_sg.sr == item0.sr
def test_mask_freq():
    # create a random time mask and test that it is being correctly applied
    size, start, val = [random.randint(1, 50) for i in range(3)]
    time_mask_test = MaskTime(size=size, start=start, val=val)
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(time_mask_test, sg)
    _test_eq(
        out[:, :, start:start + size],
        val * torch.ones_like(inp)[:, :, start:start + size],
    )
def test_crop_time_with_pipeline():
    """
    AudioToSpec->CropTime and ResizeSignal->AudioToSpec
    will result in same size images
    """
    afn = "./test.wav"
    test_audio_tensor().save(afn)
    ex_files = [afn] * 4
    oa = OpenAudio(ex_files)
    crop_dur = random.randint(1000, 5000)
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    pipe_cropsig = Pipeline([oa, DBMelSpec(hop_length=128), CropTime(crop_dur)])
    pipe_cropspec = Pipeline(
        [
            oa,
            ResizeSignal(crop_dur),
            DBMelSpec(hop_length=128),
        ]
    )
    for i in range(4):
        _test_eq(pipe_cropsig(i).width, pipe_cropspec(i).width)
예제 #12
0
def test_delta_channels():
    " nchannels for a spectrogram is how many channels its original audio had "
    delta = DeltaGPU()
    # Explicitly check more than one channel
    audio = test_audio_tensor(channels=2)
    a2s = AudioToSpec.from_cfg(AudioConfig.Voice())
    sg = a2s(audio)
    inp, out = apply_transform(delta, sg)

    _test_eq(out.nchannels, inp.nchannels * 3)
    _test_eq(out.shape[-2:], inp.shape[-2:])
    for i1, i2 in [(0, 2), (1, 3), (0, 4), (1, 5), (2, 4), (3, 5)]:
        assert not torch.allclose(out[i1], out[i2])
예제 #13
0
def audio():
    """
    Create a test tensor to be played with. This tensor will only be created
    once at the begging of the tests in this file
    """
    return test_audio_tensor()
예제 #14
0
def test_shape_of_sin_wave_tensor():
    sr = 16000
    secs = 2
    ai = test_audio_tensor(secs, sr)
    assert ai.duration == secs
    assert ai.nsamples == secs * sr
예제 #15
0
def test_pre_process_audio():
    d = "data_test"
    if not os.path.isdir(d):
        os.mkdir(d)
    test_audio_tensor().save(d + "/test.wav")
    preprocess_audio_folder(d)
def test_signal_shift_on_sg():
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    shifter = SignalShifter(1, 1)
    inp, out = apply_transform(shifter, a2s(audio))
    _test_ne(inp, out)
def test_sg_roll():
    roll = SGRoll()
    audio = test_audio_tensor()
    a2s = AudioToSpec.from_cfg(AudioConfig.BasicSpectrogram())
    inp, out = apply_transform(roll, a2s(audio))
    _test_ne(inp, out)