Beispiel #1
0
def test_saved_spectrogram_keeps_metadata():
    # Same issue as the test above
    item0 = test_audio_tensor()
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    a2s = DBMelSpec(f_max=20000, n_mels=137)
    sg = a2s(item0)

    with TemporaryFile("wb+") as f:
        torch.save(sg, f)
        f.seek(0, 0)  # Go back to the begining of the file to read
        new_sg = torch.load(f)
        assert new_sg.sr == item0.sr
Beispiel #2
0
def test_spectrograms_right_side_up():
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    a2s_5hz = DBMelSpec(
        sample_rate=16000,
        n_fft=1024,
        win_length=1024,
        hop_length=512,
        f_min=0.0,
        f_max=20000,
        pad=0,
        n_mels=137,
    )
    sine_5hz = 0.5 * torch.cos(2 * math.pi * 5 * torch.arange(0, 1.0, 1.0 / 16000))
    at_5hz = AudioTensor(sine_5hz[None], 16000)
    sg_5hz = a2s_5hz(at_5hz)
    max_row = sg_5hz.max(dim=1).indices.mode().values.item()
    assert max_row < 2
def test_crop_time_with_pipeline(ex_files):
    """
    AudioToSpec->CropTime and ResizeSignal->AudioToSpec
    will result in same size images
    """
    oa = OpenAudio(ex_files)
    crop_dur = random.randint(1000, 5000)
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    pipe_cropsig = Pipeline(
        [oa, DBMelSpec(hop_length=128),
         CropTime(crop_dur)])
    pipe_cropspec = Pipeline([
        oa,
        ResizeSignal(crop_dur),
        DBMelSpec(hop_length=128),
    ])
    for i in range(4):
        _test_eq(pipe_cropsig(i).width, pipe_cropspec(i).width)
Beispiel #4
0
def test_load_audio():
    item0 = test_audio_tensor()
    DBMelSpec = SpectrogramTransformer(mel=True, to_db=True)
    a2s = DBMelSpec(f_max=20000, n_mels=137)
    sg = a2s(item0)

    assert type(item0.data) == torch.Tensor
    assert item0.sr == 16000
    assert item0.nchannels == 1
    assert item0.nsamples == 32000
    assert item0.duration == 2

    assert sg.f_max == 20000
    assert sg.hop_length == 512
    assert sg.sr == item0.sr
    assert sg.mel
    assert sg.to_db
    assert sg.nchannels == 1
    assert sg.height == 137
    assert sg.n_mels == sg.height
    assert sg.width == 63

    defaults = {
        k: v.default
        for k, v in inspect.signature(MelSpectrogram).parameters.items()
    }
    hop_length = 345
    a2s = DBMelSpec(f_max=20000, hop_length=hop_length)
    sg = a2s(item0)
    assert sg.n_mels == defaults["n_mels"]
    assert sg.n_fft == 1024
    assert sg.shape[1] == sg.n_mels
    assert sg.hop_length == hop_length

    # test the spectrogram and audio have same duration, both are computed
    # on the fly as transforms can change their duration
    _close(sg.duration, item0.duration, eps=0.1)