def test_download_partial(httpserver):
    data_home = "tests/resources/mir_datasets/maestro_download"
    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    httpserver.serve_content(
        open("tests/resources/download/maestro-v2.0.0.json", "r").read())
    remotes = {
        "all":
        download_utils.RemoteFileMetadata(
            filename="1-maestro-v2.0.0.json",
            url=httpserver.url,
            checksum=("d41d8cd98f00b204e9800998ecf8427e"),
            unpack_directories=["maestro-v2.0.0"],
        ),
        "midi":
        download_utils.RemoteFileMetadata(
            filename="2-maestro-v2.0.0.json",
            url=httpserver.url,
            checksum=("d41d8cd98f00b204e9800998ecf8427e"),
            unpack_directories=["maestro-v2.0.0"],
        ),
        "metadata":
        download_utils.RemoteFileMetadata(
            filename="3-maestro-v2.0.0.json",
            url=httpserver.url,
            checksum=("d41d8cd98f00b204e9800998ecf8427e"),
        ),
    }
    dataset = maestro.Dataset(data_home)
    dataset.remotes = remotes
    dataset.download(None, False, False)
    assert os.path.exists(os.path.join(data_home, "1-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "2-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "3-maestro-v2.0.0.json"))

    if os.path.exists(data_home):
        shutil.rmtree(data_home)
    dataset.download(["all", "midi"], False, False)
    assert os.path.exists(os.path.join(data_home, "1-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "2-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "3-maestro-v2.0.0.json"))

    if os.path.exists(data_home):
        shutil.rmtree(data_home)
    dataset.download(["metadata", "midi"], False, False)
    assert not os.path.exists(os.path.join(data_home, "1-maestro-v2.0.0.json"))
    assert os.path.exists(os.path.join(data_home, "2-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "3-maestro-v2.0.0.json"))

    if os.path.exists(data_home):
        shutil.rmtree(data_home)
    dataset.download(["metadata"], False, False)
    assert not os.path.exists(os.path.join(data_home, "1-maestro-v2.0.0.json"))
    assert not os.path.exists(os.path.join(data_home, "2-maestro-v2.0.0.json"))
    assert os.path.exists(os.path.join(data_home, "3-maestro-v2.0.0.json"))
Example #2
0
def test_unpackdir(httpserver):

    data_home = "tests/resources/sound_datasets/esc50_download"
    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # download the full dataset
    httpserver.serve_content(
        open("tests/resources/download/ESC-50-master.zip", "rb").read()
    )

    remotes = {
        "all": download_utils.RemoteFileMetadata(
            filename="ESC-50-master.zip",
            url=httpserver.url,
            checksum=("91281932a1c5ff0473f103c628a53d77"),
            unpack_directories=["ESC-50-master"],
        )
    }
    dataset = esc50.Dataset(data_home)
    dataset.remotes = remotes
    dataset.download(None, False, False)

    assert os.path.exists(data_home)
    assert not os.path.exists(os.path.join(data_home, "ESC-50-master"))

    assert os.path.exists(os.path.join(data_home, "requirements.txt"))
    assert os.path.exists(
        os.path.join(
            data_home,
            "audio/1-137-A-32.wav",
        )
    )
def test_downloader_with_server_file(httpserver):

    httpserver.serve_content(open("tests/resources/remote.wav").read())

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.wav",
        url=httpserver.url,
        checksum=("3f77d0d69dc41b3696f074ad6bf2852f"),
    )

    save_dir = "tests/resources/tmp_download_test"

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              cleanup=True)
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              force_overwrite=True)

    _clean(save_dir)
def test_downloader_with_server_tar(httpserver):

    httpserver.serve_content(
        open("tests/resources/remote.tar.gz", "rb").read())

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.tar.gz",
        url=httpserver.url,
        checksum=("9042f5eebdcd0b94aa7a3c9bf12dc51d"),
    )

    save_dir = "tests/resources/_tmp_test_download_utils"

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              cleanup=True)
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              force_overwrite=True)

    _clean(save_dir)
Example #5
0
def test_download(httpserver):
    data_home = "tests/resources/mir_datasets/orchset_download"
    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    httpserver.serve_content(
        open("tests/resources/download/Orchset_dataset_0.zip", "rb").read())

    remotes = {
        "all":
        download_utils.RemoteFileMetadata(
            filename="Orchset_dataset_0.zip",
            url=httpserver.url,
            checksum=("4794bc3514f7e8d1727f0d975d6d1ee2"),
            unpack_directories=["Orchset"],
        )
    }
    dataset = orchset.Dataset(data_home)
    dataset.remotes = remotes
    dataset.download(None, False, False)

    assert os.path.exists(data_home)
    assert not os.path.exists(os.path.join(data_home, "Orchset"))

    assert os.path.exists(os.path.join(data_home, "README.txt"))
    assert os.path.exists(
        os.path.join(data_home,
                     "Orchset - Predominant Melodic Instruments.csv"))
    track = dataset.track("Beethoven-S3-I-ex1")
    assert os.path.exists(track.audio_path_mono)
    assert os.path.exists(track.audio_path_stereo)
    assert os.path.exists(track.melody_path)

    # test downloading again
    dataset.download(None, False, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with cleanup
    dataset.download(None, False, True)
    dataset.download(None, False, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with force overwrite
    dataset.download(None, False, False)
    dataset.download(None, True, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with force overwrite and cleanup
    dataset.download(None, False, True)
    dataset.download(None, True, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)
Example #6
0
def test_download_from_remote_raises_IOError(httpserver, tmpdir):
    httpserver.serve_content("File not found!", 404)

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.wav", url=httpserver.url, checksum=("1234")
    )

    with pytest.raises(IOError):
        download_utils.download_from_remote(TEST_REMOTE, str(tmpdir), False)
def test_download(httpserver):
    data_home = "tests/resources/mir_datasets/groove_midi_download"
    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    httpserver.serve_content(
        open("tests/resources/download/groove-v1-0.0.zip", "rb").read()
    )

    remotes = {
        "all": download_utils.RemoteFileMetadata(
            filename="groove-v1-0.0.zip",
            url=httpserver.url,
            checksum=("97a9a888d2a65cc87bb26e74df08b011"),
            unpack_directories=["groove"],
        )
    }
    dataset = groove_midi.Dataset(data_home)
    dataset.remotes = remotes
    dataset.download(None, False, False)

    assert os.path.exists(data_home)
    assert not os.path.exists(os.path.join(data_home, "groove"))

    assert os.path.exists(os.path.join(data_home, "info.csv"))
    track = dataset.track("drummer1/eval_session/1")
    assert os.path.exists(track.midi_path)
    assert os.path.exists(track.audio_path)

    # test downloading again
    dataset.download(None, False, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with cleanup
    dataset.download(None, False, True)
    dataset.download(None, False, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with force overwrite
    dataset.download(None, False, False)
    dataset.download(None, True, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    # test downloading twice with force overwrite and cleanup
    dataset.download(None, False, True)
    dataset.download(None, True, False)

    if os.path.exists(data_home):
        shutil.rmtree(data_home)
Example #8
0
def test_download_from_remote(httpserver, tmpdir):

    httpserver.serve_content(open("tests/resources/remote.wav").read())

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.wav",
        url=httpserver.url,
        checksum=("3f77d0d69dc41b3696f074ad6bf2852f"),
    )

    download_path = download_utils.download_from_remote(TEST_REMOTE, str(tmpdir), False)
Example #9
0
def test_download_from_remote_destdir(httpserver, tmpdir):
    httpserver.serve_content(open("tests/resources/remote.wav").read())

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.wav",
        url=httpserver.url,
        checksum=("3f77d0d69dc41b3696f074ad6bf2852f"),
        destination_dir="subfolder",
    )

    download_path = download_utils.download_from_remote(TEST_REMOTE, str(tmpdir), False)
    expected_download_path = os.path.join(str(tmpdir), "subfolder", "remote.wav")
    assert expected_download_path == download_path
Example #10
0
def test_download_multipart_zip(mocker, mock_download_from_remote, mock_unzip):
    Path("tests/resources/foo.zip").touch()
    Path("tests/resources/foo.z01").touch()
    multipart_zip_remote = {
        "foo": [
            download_utils.RemoteFileMetadata(
                filename="foo.zip", url="a", checksum=("1234")
            ),
            download_utils.RemoteFileMetadata(
                filename="foo.z01", url="b", checksum=("2345")
            ),
        ]
    }
    download_utils.downloader(
        "tests/resources", multipart_zip_remote, force_overwrite=False, cleanup=True
    )
    mock_download_from_remote.assert_has_calls(
        [
            mocker.call(multipart_zip_remote["foo"][0], "tests/resources", False),
            mocker.call(multipart_zip_remote["foo"][1], "tests/resources", False),
        ]
    )
    mock_unzip.assert_called_once_with("tests/resources/foo_single.zip", cleanup=True)
def test_downloader_with_server_zip(httpserver):

    httpserver.serve_content(open("tests/resources/remote.zip", "rb").read())

    TEST_REMOTE = download_utils.RemoteFileMetadata(
        filename="remote.zip",
        url=httpserver.url,
        checksum=("7a31ccfa28bfa3fb112d16c96e9d9a89"),
    )

    save_dir = "tests/resources/_tmp_test_download_utils"

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              cleanup=True)
    # test downloading twice
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})

    _clean(save_dir)
    download_utils.downloader(save_dir, remotes={"b": TEST_REMOTE})
    # test downloading twice
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              force_overwrite=True)

    _clean(save_dir)
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              cleanup=True)
    # test downloading twice
    download_utils.downloader(save_dir,
                              remotes={"b": TEST_REMOTE},
                              force_overwrite=True)

    _clean(save_dir)
def test_downloader(mocker, mock_path):
    mock_zip = mocker.patch.object(download_utils, "download_zip_file")
    mock_tar = mocker.patch.object(download_utils, "download_tar_file")
    mock_download_from_remote = mocker.patch.object(download_utils,
                                                    "download_from_remote")

    zip_remote = download_utils.RemoteFileMetadata(filename="remote.zip",
                                                   url="a",
                                                   checksum=("1234"))
    tar_remote = download_utils.RemoteFileMetadata(filename="remote.tar.gz",
                                                   url="a",
                                                   checksum=("1234"))

    file_remote = download_utils.RemoteFileMetadata(filename="remote.txt",
                                                    url="a",
                                                    checksum=("1234"))

    # Zip only
    download_utils.downloader("a", remotes={"b": zip_remote})
    mock_zip.assert_called_once_with(zip_remote, "a", False, False)
    mocker.resetall()

    # tar only
    download_utils.downloader("a", remotes={"b": tar_remote})
    mock_tar.assert_called_once_with(tar_remote, "a", False, False)
    mocker.resetall()

    # file only
    download_utils.downloader("a", remotes={"b": file_remote})
    mock_download_from_remote.assert_called_once_with(file_remote, "a", False)
    mocker.resetall()

    # zip and tar
    download_utils.downloader("a", remotes={"b": zip_remote, "c": tar_remote})
    mock_zip.assert_called_once_with(zip_remote, "a", False, False)
    mock_tar.assert_called_once_with(tar_remote, "a", False, False)
    mocker.resetall()

    # zip and file
    download_utils.downloader("a", remotes={"b": zip_remote, "c": file_remote})
    mock_zip.assert_called_once_with(zip_remote, "a", False, False)
    mock_download_from_remote.assert_called_once_with(file_remote, "a", False)
    mocker.resetall()

    # tar and file
    download_utils.downloader("a", remotes={"b": tar_remote, "c": file_remote})
    mock_tar.assert_called_once_with(tar_remote, "a", False, False)
    mock_download_from_remote.assert_called_once_with(file_remote, "a", False)
    mocker.resetall()

    # zip and tar and file
    download_utils.downloader("a",
                              remotes={
                                  "b": zip_remote,
                                  "c": tar_remote,
                                  "d": file_remote
                              })
    mock_zip.assert_called_once_with(zip_remote, "a", False, False)
    mock_download_from_remote.assert_called_once_with(file_remote, "a", False)
    mock_tar.assert_called_once_with(tar_remote, "a", False, False)
    mocker.resetall()

    # test partial download
    download_utils.downloader(
        "a",
        remotes={
            "b": zip_remote,
            "c": tar_remote,
            "d": file_remote
        },
        partial_download=["b", "d"],
    )
    mock_zip.assert_called_once_with(zip_remote, "a", False, False)
    mock_download_from_remote.assert_called_once_with(file_remote, "a", False)
    mocker.resetall()

    # test bad type partial download
    with pytest.raises(ValueError):
        download_utils.downloader(
            "a",
            remotes={
                "b": zip_remote,
                "c": tar_remote,
                "d": file_remote
            },
            partial_download="b",
        )

    with pytest.raises(ValueError):
        download_utils.downloader(
            "a",
            remotes={
                "b": zip_remote,
                "c": tar_remote,
                "d": file_remote
            },
            partial_download=["d", "e"],
        )

    # test info message
    download_utils.downloader("a", info_message="I am a message!")
    mocker.resetall()

    # test download twice - defaults
    download_utils.downloader("a",
                              remotes={
                                  "b": zip_remote,
                                  "c": tar_remote,
                                  "d": file_remote
                              })
    download_utils.downloader("a",
                              remotes={
                                  "b": zip_remote,
                                  "c": tar_remote,
                                  "d": file_remote
                              })

    # test download twice - cleanup=True
    download_utils.downloader("a",
                              remotes={
                                  "b": zip_remote,
                                  "c": tar_remote,
                                  "d": file_remote
                              },
                              cleanup=True)
    download_utils.downloader("a",
                              remotes={
                                  "b": zip_remote,
                                  "c": tar_remote,
                                  "d": file_remote
                              })
Example #13
0
  author = {Piczak, Karol J.},
  booktitle = {Proceedings of the 23rd {Annual ACM Conference} on {Multimedia}},
  date = {2015-10-13},
  url = {http://dl.acm.org/citation.cfm?doid=2733373.2806390},
  doi = {10.1145/2733373.2806390},
  location = {{Brisbane, Australia}},
  isbn = {978-1-4503-3459-4},
  publisher = {{ACM Press}},
  pages = {1015--1018}
}
"""
REMOTES = {
    "all":
    download_utils.RemoteFileMetadata(
        filename="ESC-50-master.zip",
        url="https://github.com/karoldvl/ESC-50/archive/master.zip",
        checksum="70aba3bada37d2674b8f6cd5afd5f065",
        unpack_directories=["ESC-50-master"],
    )
}

LICENSE_INFO = "Creative Commons Attribution-NonCommercial 3.0 Unported (CC BY-NC 3.0)"


class Clip(core.Clip):
    """ESC-50 Clip class

    Args:
        clip_id (str): id of the clip

    Attributes:
        tags (soundata.annotation.Tags): tag (label) of the clip + confidence. In ESC-50 every clip has one tag.
Example #14
0
BIBTEX = """
@inproceedings{Mesaros:DCASE:18,
    Address = {Surrey, UK},
    Author = {Mesaros, A. and Heittola, T. and Virtanen, T.},
    Booktitle = {Proceedings of the Detection and Classification of Acoustic
                 Scenes and Events 2018 Workshop (DCASE2018)},
    Month = {November},
    Pages = {9--13},
    Title = {A multi-device dataset for urban acoustic scene classification},
    Year = {2018}}
"""
REMOTES = {
    "development.audio.1":
    download_utils.RemoteFileMetadata(
        filename="TAU-urban-acoustic-scenes-2019-development.audio.1.zip",
        url=
        "https://zenodo.org/record/2589280/files/TAU-urban-acoustic-scenes-2019-development.audio.1.zip?download=1",
        checksum="aca4ebfd9ed03d5f747d6ba8c24bc728",
    ),
    "development.audio.2":
    download_utils.RemoteFileMetadata(
        filename="TAU-urban-acoustic-scenes-2019-development.audio.2.zip",
        url=
        "https://zenodo.org/record/2589280/files/TAU-urban-acoustic-scenes-2019-development.audio.2.zip?download=1",
        checksum="c4f170408ce77c8c70c532bf268d7be0",
    ),
    "development.audio.3":
    download_utils.RemoteFileMetadata(
        filename="TAU-urban-acoustic-scenes-2019-development.audio.3.zip",
        url=
        "https://zenodo.org/record/2589280/files/TAU-urban-acoustic-scenes-2019-development.audio.3.zip?download=1",
        checksum="c7214a07211f10f3250290d05e72c37e",
Example #15
0
  title={EigenScape: A database of spatial acoustic scene recordings},
  author={Green, Marc Ciufo and Murphy, Damian},
  journal={Applied Sciences},
  volume={7},
  number={11},
  pages={1204},
  year={2017},
  publisher={Multidisciplinary Digital Publishing Institute}
}
"""

REMOTES = {
    "Beach":
    download_utils.RemoteFileMetadata(
        filename="Beach.zip",
        url=
        "https://webfiles.york.ac.uk/INFODATA/eaeaac50-483e-408f-a391-01b02d4ff9c4/Beach.zip",
        checksum="3f03a527291b5aedffb436fe4f12774b",
    ),
    "BusyStreet":
    download_utils.RemoteFileMetadata(
        filename="BusyStreet.zip",
        url=
        "https://webfiles.york.ac.uk/INFODATA/eaeaac50-483e-408f-a391-01b02d4ff9c4/BusyStreet.zip",
        checksum="7561ef835b4f43c5f0fa637a9747bc41",
    ),
    "Park":
    download_utils.RemoteFileMetadata(
        filename="Park.zip",
        url=
        "https://webfiles.york.ac.uk/INFODATA/eaeaac50-483e-408f-a391-01b02d4ff9c4/Park.zip",
        checksum="081417d792a31c7cdada5442227c796c",
Example #16
0
BIBTEX = """
@inproceedings{Salamon:UrbanSound:ACMMM:14,
	Address = {Orlando, FL, USA},
	Author = {Salamon, J. and Jacoby, C. and Bello, J. P.},
	Booktitle = {22nd {ACM} International Conference on Multimedia (ACM-MM'14)},
	Month = {Nov.},
	Pages = {1041--1044},
	Title = {A Dataset and Taxonomy for Urban Sound Research},
	Year = {2014}}
"""
REMOTES = {
    "all":
    download_utils.RemoteFileMetadata(
        filename="UrbanSound8K.tar.gz",
        url=
        "https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz?download=1",
        checksum="9aa69802bbf37fb986f71ec1483a196e",
        unpack_directories=["UrbanSound8K"],
    )
}

LICENSE_INFO = "Creative Commons Attribution Non Commercial 4.0 International"


class Clip(core.Clip):
    """urbansound8k Clip class

    Args:
        clip_id (str): id of the clip

    Attributes:
Example #17
0
  title = {{ESC}: {Dataset} for {Environmental Sound Classification}},
  author = {Piczak, Karol J.},
  booktitle = {Proceedings of the 23rd {Annual ACM Conference} on {Multimedia}},
  date = {2015-10-13},
  url = {http://dl.acm.org/citation.cfm?doid=2733373.2806390},
  doi = {10.1145/2733373.2806390},
  location = {{Brisbane, Australia}},
  isbn = {978-1-4503-3459-4},
  publisher = {{ACM Press}},
  pages = {1015--1018}
}
"""
REMOTES = {
    "all": download_utils.RemoteFileMetadata(
        filename="ESC-50-master.zip",
        url="https://github.com/karoldvl/ESC-50/archive/master.zip",
        checksum="7771e4b9d86d0945acce719c7a59305a",
        unpack_directories=["ESC-50-master"],
    )
}

LICENSE_INFO = "Creative Commons Attribution-NonCommercial 3.0 Unported (CC BY-NC 3.0)"


class Clip(core.Clip):
    """ESC-50 Clip class

    Args:
        clip_id (str): id of the clip

    Attributes:
        audio (np.ndarray, float): path to the audio file
Example #18
0
@article{green2017eigenscape,
  title={EigenScape: A database of spatial acoustic scene recordings},
  author={Green, Marc Ciufo and Murphy, Damian},
  journal={Applied Sciences},
  volume={7},
  number={11},
  pages={1204},
  year={2017},
  publisher={Multidisciplinary Digital Publishing Institute}
}
"""

REMOTES = {
    "Beach": download_utils.RemoteFileMetadata(
        filename="Beach.zip",
        url="https://zenodo.org/record/1284156/files/Beach.zip?download=1",
        checksum="3dd3920c3a5e56f534760fa2dac86359",
    ),
    "BusyStreet": download_utils.RemoteFileMetadata(
        filename="BusyStreet.zip",
        url="https://zenodo.org/record/1284156/files/BusyStreet.zip?download=1",
        checksum="532b45f5d941d66506c42321a3e062ab",
    ),
    "Park": download_utils.RemoteFileMetadata(
        filename="Park.zip",
        url="https://zenodo.org/record/1284156/files/Park.zip?download=1",
        checksum="1268c7d8057529672d3cc17bec8ae302",
    ),
    "PedestrianZone": download_utils.RemoteFileMetadata(
        filename="PedestrianZone.zip",
        url="https://zenodo.org/record/1284156/files/PedestrianZone.zip?download=1",
Example #19
0
BIBTEX = """
@inproceedings{politis2020dataset,
    author = "Politis, Archontis and Adavanne, Sharath and Virtanen, Tuomas",
    title = "A Dataset of Reverberant Spatial Sound Scenes with Moving Sources for Sound Event Localization and Detection",
    year = "2020",
    booktitle = "Proceedings of the Workshop on Detection and Classification of Acoustic Scenes and Events (DCASE2020)",
    month = "November",
}
"""

REMOTES = {
    "foa_dev": [
        download_utils.RemoteFileMetadata(
            filename="foa_dev.zip",
            url="http://zenodo.org/record/4064792/files/foa_dev.zip?download=1",
            checksum="6aad48e7346884b3929245e7553fd97d",
        ),
        download_utils.RemoteFileMetadata(
            filename="foa_dev.z01",
            url="http://zenodo.org/record/4064792/files/foa_dev.z01?download=1",
            checksum="86acab46854a57f5ba3e5b80a19c01b5",
        ),
        download_utils.RemoteFileMetadata(
            filename="foa_dev.z02",
            url="http://zenodo.org/record/4064792/files/foa_dev.z02?download=1",
            checksum="363c8c159be003271c05a71a57b2ced4",
        ),
    ],
    "mic_dev": [
        download_utils.RemoteFileMetadata(
Example #20
0
    title={FSD50K: an Open Dataset of Human-Labeled Sound Events}, 
    author={Eduardo Fonseca and Xavier Favory and Jordi Pons and Frederic Font and Xavier Serra},
    year={2020},
    eprint={2010.00475},
    archivePrefix={arXiv},
    primaryClass={cs.SD}
}
"""

# a dictionary key that has a list of RemoteFileMetadata implies a multi-part zip
# and will be processed as such using the zip subprocess (see soundata.download_utils)
REMOTES = {
    "FSD50K.dev_audio": [
        download_utils.RemoteFileMetadata(
            filename="FSD50K.dev_audio.zip",
            url=
            "https://zenodo.org/record/4060432/files/FSD50K.dev_audio.zip?download=1",
            checksum="c480d119b8f7a7e32fdb58f3ea4d6c5a",
        ),
        download_utils.RemoteFileMetadata(
            filename="FSD50K.dev_audio.z01",
            url=
            "https://zenodo.org/record/4060432/files/FSD50K.dev_audio.z01?download=1",
            checksum="faa7cf4cc076fc34a44a479a5ed862a3",
        ),
        download_utils.RemoteFileMetadata(
            filename="FSD50K.dev_audio.z02",
            url=
            "https://zenodo.org/record/4060432/files/FSD50K.dev_audio.z02?download=1",
            checksum="8f9b66153e68571164fb1315d00bc7bc",
        ),
        download_utils.RemoteFileMetadata(
Example #21
0
        "535242cf1094d95d086fc574874e9ddf",
    ),  # the base zip should be the last file
]

audio_files = [
    ("labelled.zip", "bdd46cc5e9187e97c37989b3b73e786e"),
    ("labelled.z01", "98477daca861c6950cc8b620cecc286d"),
    ("labelled.z02", "873b26cfe25bb3084e39e5af0dfebcad"),
    ("labelled.z03", "71322a5c3ba33badfdd8e25c9ebf559a"),
    ("labelled.z04", "a6d087babea1f797af99b81cb7c7ea4a"),
]

meta_remotes = {
    k: download_utils.RemoteFileMetadata(
        filename=f,
        url=f"https://zenodo.org/record/5645825/files/{f}?download=1",
        checksum=m,  # -- the md5 checksum
        destination_dir=None,  # -- relative path for where to unzip the data, or None
    )
    for k, f, m in meta_files
}

# put as list for multipart zip
audio_remotes = {
    "audio": [
        download_utils.RemoteFileMetadata(
            filename=f,
            url=f"https://zenodo.org/record/5645825/files/{f}?download=1",
            checksum=m,  # -- the md5 checksum
            destination_dir=None,  # -- relative path for where to unzip the data, or None
        )
        for f, m in audio_files
Example #22
0
BIBTEX = """
@inproceedings{adavanne2019multi,
  title={A Multi-room Reverberant Dataset for Sound Event Localization and Detection},
  author={Adavanne, Sharath and Politis, Archontis and Virtanen, Tuomas},
  booktitle={Workshop on Detection and Classification of Acoustic Scenes and Events},
  pages={10--14},
  year={2019}
}
"""

REMOTES = {
    "foa_dev": [
        download_utils.RemoteFileMetadata(
            filename="foa_dev.z01",
            url="https://zenodo.org/record/2599196/files/foa_dev.z01?download=1",
            checksum="bd5b18a47a3ed96e80069baa6b221a5a",
        ),
        download_utils.RemoteFileMetadata(
            filename="foa_dev.z02",
            url="https://zenodo.org/record/2599196/files/foa_dev.z02?download=1",
            checksum="5194ebf43ae095190ed78691ec9889b1",
        ),
        download_utils.RemoteFileMetadata(
            filename="foa_dev.zip",
            url="https://zenodo.org/record/2599196/files/foa_dev.zip?download=1",
            checksum="2154ad0d9e1e45bfc933b39591b49206",
        ),
    ],
    "mic_dev": [
        download_utils.RemoteFileMetadata(
Example #23
0
@article{article-minimal,
  author = "L[eslie] B. Lamport",
  title = "The Gnats and Gnus Document Preparation System",
  journal = "G-Animal's Journal",
  year = "1986"
}
"""

# -- REMOTES is a dictionary containing all files that need to be downloaded.
# -- The keys should be descriptive (e.g. 'annotations', 'audio').
# -- When having data that can be partially downloaded, remember to set up
# -- correctly destination_dir to download the files following the correct structure.
REMOTES = {
    'remote_data': download_utils.RemoteFileMetadata(
        filename='a_zip_file.zip',
        url='http://website/hosting/the/zipfile.zip',
        checksum='00000000000000000000000000000000',  # -- the md5 checksum
        destination_dir='path/to/unzip' # -- relative path for where to unzip the data, or None
    ),
}

# -- Include any information that should be printed when downloading
# -- remove this variable if you don't need to print anything during download
DOWNLOAD_INFO = """
Include any information you want to be printed when dataset.download() is called.
These can be instructions for how to download the dataset (e.g. request access on zenodo),
caveats about the download, etc
"""

# -- Include the dataset's license information
LICENSE_INFO = """
The dataset's license information goes here.
Example #24
0

BIBTEX = """
@misc{fonseca2019learning,
      title={Learning Sound Event Classifiers from Web Audio with Noisy Labels},
      author={Eduardo Fonseca and Manoj Plakal and Daniel P. W. Ellis and Frederic Font and Xavier Favory and Xavier Serra},
      year={2019},
      eprint={1901.01189},
      archivePrefix={arXiv},
      primaryClass={cs.SD}
}
"""
REMOTES = {
    "audio_train": download_utils.RemoteFileMetadata(
        filename="FSDnoisy18k.audio_train.zip",
        url="https://zenodo.org/record/2529934/files/FSDnoisy18k.audio_train.zip?download=1",
        checksum="34dc1d34ca44622af5bf439ceb6f0d55",
    ),
    "audio_test": download_utils.RemoteFileMetadata(
        filename="FSDnoisy18k.audio_test.zip",
        url="https://zenodo.org/record/2529934/files/FSDnoisy18k.audio_test.zip?download=1",
        checksum="1ac73d70b4ef3f81900d98c261a832de",
    ),
    "docs": download_utils.RemoteFileMetadata(
        filename="FSDnoisy18k.doc.zip",
        url="https://zenodo.org/record/2529934/files/FSDnoisy18k.doc.zip?download=1",
        checksum="093a1ca185ec341ca4eac14215e7f96b",
    ),
    "metadata": download_utils.RemoteFileMetadata(
        filename="FSDnoisy18k.meta.zip",
        url="https://zenodo.org/record/2529934/files/FSDnoisy18k.meta.zip?download=1",
Example #25
0
BIBTEX = """
@inproceedings{Salamon:Scaper:WASPAA:17,
	Address = {New Paltz, NY, USA},
	Author = {Salamon, J. and MacConnell, D. and Cartwright, M. and Li, P. and Bello, J.~P.},
	Booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
	Month = {Oct.},
	Pages = {344--348},
	Title = {Scaper: A Library for Soundscape Synthesis and Augmentation},
	Year = {2017}}
"""
REMOTES = {
    "all":
    download_utils.RemoteFileMetadata(
        filename="URBAN-SED_v2.0.0.tar.gz",
        url=
        "https://zenodo.org/record/1324404/files/URBAN-SED_v2.0.0.tar.gz?download=1",
        checksum="a2d24a2148ece7c021fcc079ee87c2dc",
        unpack_directories=["URBAN-SED_v2.0.0"],
    )
}

LICENSE_INFO = "Creative Commons Attribution 4.0 International"


class Clip(core.Clip):
    """URBAN-SED Clip class

    Args:
        clip_id (str): id of the clip

    Attributes:
Example #26
0
    author = "Politis, Archontis and Adavanne, Sharath and Krause, Daniel and Deleforge, Antoine and Srivastava, Prerak and Virtanen, Tuomas",
    title = "A Dataset of Dynamic Reverberant Sound Scenes with Directional Interferers for Sound Event Localization and Detection",
    year = "2021",
    journal = "arXiv preprint arXiv:2106.06999",
    eprint = "2106.06999",
    archiveprefix = "arXiv",
    primaryclass = "eess.AS",
    url = "https://arxiv.org/abs/2106.06999"
}
"""

REMOTES = {
    "foa_dev": [
        download_utils.RemoteFileMetadata(
            filename="foa_dev.zip",
            url="http://zenodo.org/record/5476980/files/foa_dev.zip?download=1",
            checksum="80648b5f64b1b4a824084560f1334f54",
        ),
        download_utils.RemoteFileMetadata(
            filename="foa_dev.z01",
            url="http://zenodo.org/record/5476980/files/foa_dev.z01?download=1",
            checksum="270a94dc5cd183ea6532c5a3f6e9036c",
        ),
    ],
    "mic_dev": [
        download_utils.RemoteFileMetadata(
            filename="mic_dev.zip",
            url="http://zenodo.org/record/5476980/files/mic_dev.zip?download=1",
            checksum="a5131297547431160b732a3481626c2d",
        ),
        download_utils.RemoteFileMetadata(
Example #27
0
    Address = {Munich, Germany},
    Author = {Mesaros, A. and Heittola, T. and Diment, A. and Elizalde, B. and
              Shah, A. and Vincent, E. and Raj, B. and Virtanen, T.},
    Booktitle = {Proceedings of the Detection and Classification of Acoustic
                 Scenes and Events 2017 Workshop (DCASE2017)},
    Month = {November},
    Pages = {85--92},
    Title = {{DCASE} 2017 Challenge Setup: Tasks, Datasets and Baseline
             System},
    Year = {2017}}
"""
REMOTES = {
    "development.audio.1":
    download_utils.RemoteFileMetadata(
        filename="TUT-sound-events-2017-development.audio.1.zip",
        url=("https://zenodo.org/record/814831/files/TUT-sound-events-2017-"
             "development.audio.1.zip?download=1"),
        checksum="6f1cd31592b8240a14be3ee513db6a23",
    ),
    "development.audio.2":
    download_utils.RemoteFileMetadata(
        filename="TUT-sound-events-2017-development.audio.2.zip",
        url=("https://zenodo.org/record/814831/files/TUT-sound-events-2017-"
             "development.audio.2.zip?download=1"),
        checksum="adcff03341b84dc8d35f035b93c1efa0",
    ),
    "development.doc":
    download_utils.RemoteFileMetadata(
        filename="TUT-sound-events-2017-development.doc.zip",
        url=("https://zenodo.org/record/814831/files/TUT-sound-events-2017-"
             "development.doc.zip?download=1"),
        checksum="aa6024e70f5bff3fe15d962b01753e23",
Example #28
0
  title        = {{STARSS22: Sony-TAu Realistic Spatial Soundscapes 
                   2022 dataset}},
  month        = mar,
  year         = 2022,
  publisher    = {Zenodo},
  version      = {1.0.0},
  doi          = {10.5281/zenodo.6387880},
  url          = {https://doi.org/10.5281/zenodo.6387880}
}
"""

REMOTES = {
    "foa_dev":
    download_utils.RemoteFileMetadata(
        filename="foa_dev.zip",
        url="https://zenodo.org/record/6387880/files/foa_dev.zip?download=1",
        checksum="165dd033b262dc11a8853635c1def59b",
    ),
    "mic_dev":
    download_utils.RemoteFileMetadata(
        filename="mic_dev.zip",
        url="https://zenodo.org/record/6387880/files/mic_dev.zip?download=1",
        checksum="46b55d0be507afa986cd29120e42b188",
    ),
    "metadata_dev":
    download_utils.RemoteFileMetadata(
        filename="metadata_dev.zip",
        url=
        "https://zenodo.org/record/6387880/files/metadata_dev.zip?download=1",
        checksum="b460e17e0848c49f03f238afb89fa87e",
    ),
Example #29
0
from soundata import download_utils, jams_utils, core, annotations, io

BIBTEX = """
@inproceedings{lee2019open,
  title={An open-access database of 3D microphone array recordings},
  author={Lee, Hyunkook and Johnson, Dale},
  booktitle={Audio Engineering Society Convention 147},
  year={2019},
  organization={Audio Engineering Society}
}
"""

REMOTES = {
    "ImpulseResponses": download_utils.RemoteFileMetadata(
        filename="03 3D-MARCo Impulse Responses.zip",
        url="https://zenodo.org/record/3477602/files/03%203D-MARCo%20Impulse%20Responses.zip?download=1",
        checksum="d328425ee2d1e847e225d78b676cd81e",
    ),
    "Quartet": download_utils.RemoteFileMetadata(
        filename="04 3D-MARCo Samples_Quartet.zip",
        url="https://zenodo.org/record/3477602/files/04%203D-MARCo%20Samples_Quartet.zip?download=1",
        checksum="cce3442ae5a11ea869412c2e6a4cadcd",
    ),
    "Trio": download_utils.RemoteFileMetadata(
        filename="05 3D-MARCo Samples_Trio.zip",
        url="https://zenodo.org/record/3477602/files/05%203D-MARCo%20Samples_Trio.zip?download=1",
        checksum="48262496ecb6a32843e4b69393eeeec1",
    ),
    "Organ": download_utils.RemoteFileMetadata(
        filename="06 3D-MARCo Samples_Organ.zip",
        url="https://zenodo.org/record/3477602/files/06%203D-MARCo%20Samples_Organ.zip?download=1",
def test_download(httpserver):

    data_home = "tests/resources/mir_datasets/acousticbrainz_genre_download"

    if os.path.exists(data_home):
        shutil.rmtree(data_home)

    httpserver.serve_content(
        open(
            "tests/resources/download/acousticbrainz_genre_index.json.zip",
            "rb",
        ).read()
    )

    remotes = {
        "index": download_utils.RemoteFileMetadata(
            filename="acousticbrainz_genre_index.json.zip",
            url=httpserver.url,
            checksum="b32a663449c1da55de424d845521eb79",
        )
    }

    dataset = acousticbrainz_genre.Dataset(data_home)
    dataset.remotes = remotes
    dataset.download()

    assert os.path.exists(data_home)
    assert os.path.exists(
        os.path.join(data_home, "acousticbrainz_genre_index.json.zip")
    )

    httpserver.serve_content(
        open(
            "tests/resources/download/acousticbrainz-mediaeval-features-train-01.tar.bz2",
            "rb",
        ).read()
    )

    remotes = {
        "train-01": download_utils.RemoteFileMetadata(
            filename="acousticbrainz-mediaeval-features-train-01.tar.bz2",
            url=httpserver.url,
            checksum="eb155784e1d4de0f35aa23ded4d34849",
            destination_dir="acousticbrainz-mediaeval-train",
            unpack_directories=["acousticbrainz-mediaeval-train"],
        )
    }

    dataset.remotes = remotes
    dataset.download()

    assert os.path.exists(data_home)
    assert os.path.exists(os.path.join(data_home, "acousticbrainz-mediaeval-train"))
    assert os.path.exists(
        os.path.join(data_home, "acousticbrainz-mediaeval-train", "01")
    )
    assert os.path.exists(
        os.path.join(
            data_home,
            "acousticbrainz-mediaeval-train",
            "01",
            "01a0a332-d340-4806-a88b-cb60a05355c0.json",
        )
    )

    shutil.rmtree(data_home)