Example #1
0
def download(data_home=None,
             partial_download=None,
             force_overwrite=False,
             cleanup=True):
    """Download GuitarSet.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        partial_download (list):
            List indicating what to partially download. The list can include any of:
             * `'annotations'` the annotation files
             * `'audio_hex_original'` original 6 channel wave file from hexaphonic pickup
             * `'audio_hex_debleeded'` hex wave files with interference removal applied
             * `'audio_mic'` monophonic recording from reference microphone
             * `'audio_mix'` monophonic mixture of original 6 channel file
             If `None`, all data is downloaded.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        partial_download=partial_download,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #2
0
def download(data_home=None, force_overwrite=False):
    """DALI is not available for downloading directly.
    This function prints a helper message to download DALI
    through zenodo.org.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """

    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        To download this dataset, visit:
        https://zenodo.org/record/2577915 and request access.

        Once downloaded, unzip the file DALI_v1.0.zip
        and place the result in:
        {save_path}

        Use the function dali_code.get_audio you can find at:
        https://github.com/gabolsgabs/DALI for getting the audio and place them at:
        {audio_path}
    """.format(
        save_path=os.path.join(data_home, 'annotations'),
        force_overwrite=force_overwrite,
        audio_path=os.path.join(data_home, 'audio'),
    )

    download_utils.downloader(data_home,
                              remotes=REMOTES,
                              info_message=info_message)
Example #3
0
def download(
    data_home=None, partial_download=None, force_overwrite=False, cleanup=True
):
    """Download TinySOL.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        partial_download (list):
            List indicating what to partially download. The list can include any of:
            * `'annotations'` the annotation files
            * `'audio'` the audio files
            If `None`, all data is downloaded.

        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        partial_download=partial_download,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #4
0
    def download(self,
                 partial_download=None,
                 force_overwrite=False,
                 cleanup=False):
        """Download the dataset

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        download_utils.downloader(
            self.data_home,
            remotes=self.remotes,
            partial_download=partial_download,
            force_overwrite=force_overwrite,
            cleanup=cleanup,
        )

        self._find_replace(os.path.join(self.data_home, "meta"), ": nan",
                           ": null", "*.json")
Example #5
0
    def download(self, partial_download=None, force_overwrite=False, cleanup=False):
        """Download the dataset

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        # in MAESTRO "metadata" is contained in "midi" is contained in "all"
        if partial_download is None or "all" in partial_download:
            partial_download = ["all"]
        elif "midi" in partial_download:
            partial_download = ["midi"]

        download_utils.downloader(
            self.data_home,
            remotes=self.remotes,
            partial_download=partial_download,
            force_overwrite=force_overwrite,
            cleanup=cleanup,
        )
Example #6
0
def download(data_home=None, force_overwrite=False):
    """Download the Beatles Dataset (annotations).
    The audio files are not provided due to the copyright.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool): Whether to overwrite the existing downloaded data

    """

    # use the default location: ~/mir_datasets/Beatles
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_message = """
        Unfortunately the audio files of the Beatles dataset are not available
        for download. If you have the Beatles dataset, place the contents into
        a folder called Beatles with the following structure:
            > Beatles/
                > annotations/
                > audio/
        and copy the Beatles folder to {}
    """.format(
        data_home
    )

    download_utils.downloader(
        data_home,
        tar_downloads=[ANNOTATIONS_REMOTE],
        info_message=download_message,
        force_overwrite=force_overwrite,
    )
Example #7
0
def _download(save_dir, remotes, partial_download, info_message,
              force_overwrite, cleanup):
    """Download the dataset.

    Args:
        save_dir (str):
            The directory to download the data
        remotes (dict or None):
            A dictionary of RemoteFileMetadata tuples of data in zip format.
            If None, there is no data to download
        partial_download (list or None):
            A list of keys to partially download the remote objects of the download dict.
            If None, all data is downloaded
        info_message (str or None):
            A string of info to print when this function is called.
            If None, no string is printed.
        force_overwrite (bool):
            If True, existing files are overwritten by the downloaded files.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
    """
    download_utils.downloader(
        save_dir,
        remotes=remotes,
        partial_download=partial_download,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
    # removing nans from JSON files
    find_replace(os.path.join(save_dir, "meta"), ": nan", ": null", "*.json")
Example #8
0
def download(data_home=None, force_overwrite=False):
    """Download ORCHSET Dataset.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool): Whether to overwrite the existing downloaded data

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(data_home,
                              zip_downloads=[REMOTE],
                              force_overwrite=force_overwrite)

    # files get downloaded to a folder called Orchset - move everything up a level
    duplicated_orchset_dir = os.path.join(data_home, 'Orchset')
    orchset_files = glob.glob(os.path.join(duplicated_orchset_dir, '*'))

    for fpath in orchset_files:
        shutil.move(fpath, data_home)

    if os.path.exists(duplicated_orchset_dir):
        os.removedirs(duplicated_orchset_dir)
Example #9
0
def download(data_home=None, force_overwrite=False):
    """Download iKala Dataset. However, iKala dataset is not available for
    download anymore. This function prints a helper message to organize
    pre-downloaded iKala dataset.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool): If True, existing files are overwritten by the
            downloaded files.
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_message = """
        Unfortunately the iKala dataset is not available for download.
        If you have the iKala dataset, place the contents into a folder called
        {ikala_dir} with the following structure:
            > {ikala_dir}/
                > Lyrics/
                > PitchLabel/
                > Wavfile/
        and copy the {ikala_dir} folder to {save_path}
    """.format(ikala_dir=DATASET_DIR, save_path=data_home)

    download_utils.downloader(
        data_home,
        file_downloads=[ID_MAPPING_REMOTE],
        info_message=download_message,
        force_overwrite=force_overwrite,
    )
Example #10
0
def download(data_home=None):
    """MedleyDB is not available for downloading directly.
    This function prints a helper message to download MedleyDB
    through zenodo.org.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """

    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        To download this dataset, visit:
        https://zenodo.org/record/2628782#.XKZdABNKh24
        and request access.

        Once downloaded, unzip the file MedleyDB-Melody.zip
        and copy the result to:
        {data_home}
    """.format(data_home=data_home)

    download_utils.downloader(data_home, info_message=info_message)
Example #11
0
    def download(self,
                 partial_download=None,
                 force_overwrite=False,
                 cleanup=False):
        """Download data to `save_dir` and optionally print a message.

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        download_utils.downloader(
            self.data_home,
            remotes=self.remotes,
            partial_download=partial_download,
            info_message=self._download_info,
            force_overwrite=force_overwrite,
            cleanup=cleanup,
        )
Example #12
0
def download(data_home=None, force_overwrite=False, cleanup=True):
    """Download SALAMI Dataset (annotations).
    The audio files are not provided.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        Unfortunately the audio files of the Salami dataset are not available
        for download. If you have the Salami dataset, place the contents into a
        folder called Salami with the following structure:
            > Salami/
                > salami-data-public-hierarchy-corrections/
                > audio/
        and copy the Salami folder to {}
    """.format(data_home)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        info_message=info_message,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #13
0
def download(data_home=None, force_overwrite=False):
    """Download SALAMI Dataset (annotations).
    The audio files are not provided.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`

        force_overwrite (bool): whether to overwrite the existing downloaded data

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        Unfortunately the audio files of the Salami dataset are not available
        for download. If you have the Salami dataset, place the contents into a
        folder called Salami with the following structure:
            > Salami/
                > salami-data-public-master/
                > audio/
        and copy the Salami folder to {}
    """.format(data_home)

    download_utils.downloader(
        data_home,
        zip_downloads=[ANNOTATIONS_REMOTE],
        info_message=info_message,
        force_overwrite=force_overwrite,
    )
Example #14
0
def download(data_home=None,
             force_overwrite=False,
             cleanup=True,
             partial_download=None):
    """Download the giantsteps_key Dataset (annotations).
    The audio files are not provided due to copyright issues.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
        partial_download(list of str)
            arguments can be 'audio' 'metadata' or/and 'keys'
    """

    # use the default location: ~/mir_datasets/giantsteps_key
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_message = ""

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        partial_download=partial_download,
        info_message=download_message,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #15
0
def download(data_home=None, force_overwrite=False, cleanup=True):
    """Download Groove MIDI.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )

    # files get downloaded to a folder called groove - move everything up a level
    groove_dir = os.path.join(data_home, 'groove')
    groove_files = glob.glob(os.path.join(groove_dir, '*'))

    for fpath in groove_files:
        shutil.move(fpath, data_home)

    if os.path.exists(groove_dir):
        shutil.rmtree(groove_dir)
Example #16
0
def download(data_home=None, force_overwrite=False):

    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        Unfortunately the audio files of the RWC-Popular dataset are not available
        for download. If you have the RWC-Popular dataset, place the contents into a
        folder called RWC-Popular with the following structure:
            > RWC-Popular/
                > annotations/
                > audio/rwc-p-m0i with i in [1 .. 7]
                > metadata-master/
        and copy the RWC-Popular folder to {}
    """.format(data_home)

    download_utils.downloader(
        data_home,
        zip_downloads=[
            METADATA_REMOTE,
            ANNOTATIONS_REMOTE_1,
            ANNOTATIONS_REMOTE_2,
            ANNOTATIONS_REMOTE_3,
            ANNOTATIONS_REMOTE_4,
        ],
        info_message=info_message,
        force_overwrite=force_overwrite,
    )
Example #17
0
    def download(self,
                 partial_download=None,
                 force_overwrite=False,
                 cleanup=False):
        """Download the dataset

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        # in MAESTRO "metadata" is contained in "midi" is contained in "all"
        if partial_download is None or "all" in partial_download:
            partial_download = ["all"]
        elif "midi" in partial_download:
            partial_download = ["midi"]

        download_utils.downloader(
            self.data_home,
            remotes=self.remotes,
            partial_download=partial_download,
            force_overwrite=force_overwrite,
            cleanup=cleanup,
        )

        # files get downloaded to a folder called maestro-v2.0.0
        # move everything up a level
        maestro_dir = os.path.join(self.data_home, "maestro-v2.0.0")
        if not os.path.exists(maestro_dir):
            logging.info(
                "Maestro data not downloaded, because it probably already exists on your computer. "
                +
                "Run .validate() to check, or rerun with force_overwrite=True to delete any "
                + "existing files and download from scratch")
            return
        maestro_files = glob.glob(os.path.join(maestro_dir, "*"))

        for fpath in maestro_files:
            target_path = os.path.join(self.data_home, os.path.basename(fpath))
            if os.path.exists(target_path):
                logging.info(
                    "{} already exists. Run with force_overwrite=True to download from scratch"
                    .format(target_path))
                continue
            shutil.move(fpath, self.data_home)

        if os.path.exists(maestro_dir):
            shutil.rmtree(maestro_dir)
Example #18
0
 def index(self):
     if self.remote_index is not None:
         working_dir = os.path.dirname(os.path.realpath(__file__))
         path_index_file = os.path.join(working_dir, "datasets/indexes",
                                        self.index_file)
         if not os.path.isfile(path_index_file):
             path_indexes = os.path.join(working_dir, "datasets/indexes")
             download_utils.downloader(path_indexes,
                                       remotes=self.remote_index)
     return load_json_index(self.index_file)
Example #19
0
def download(data_home=None):
    """Download the GTZAN-Genre dataset.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(data_home, tar_downloads=[DATASET_REMOTE])
Example #20
0
    def download(self,
                 partial_download=None,
                 force_overwrite=False,
                 cleanup=False):
        """Download the dataset

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        download_utils.downloader(
            self.data_home,
            partial_download=partial_download,
            remotes=self.remotes,
            info_message=None,
            force_overwrite=force_overwrite,
            cleanup=cleanup,
        )

        # files get downloaded to a folder called groove - move everything up a level
        groove_dir = os.path.join(self.data_home, "groove")
        if not os.path.exists(groove_dir):
            logging.info(
                "Groove MIDI data not downloaded, because it probably already exists on your computer. "
                +
                "Run .validate() to check, or rerun with force_overwrite=True to delete any "
                + "existing files and download from scratch")
            return

        groove_files = glob.glob(os.path.join(groove_dir, "*"))

        for fpath in groove_files:
            target_path = os.path.join(self.data_home, os.path.basename(fpath))
            if os.path.exists(target_path):
                logging.info(
                    "{} already exists. Run with force_overwrite=True to download from scratch"
                    .format(target_path))
                continue
            shutil.move(fpath, self.data_home)

        if os.path.exists(groove_dir):
            shutil.rmtree(groove_dir)
Example #21
0
def download(
    data_home=None, partial_download=None, force_overwrite=False, cleanup=True
):
    """Download the dataset.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        partial_download (list):
            List indicating what to partially download. The list can include any of:
                * 'all': audio, midi and metadata
                * 'midi': midi and metadata only
                * 'metadata': metadata only
            If `None`, all data is downloaded.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    # in MAESTRO "metadata" is contained in "midi" is contained in "all"
    if partial_download is None or 'all' in partial_download:
        partial_download = ['all']
    elif 'midi' in partial_download:
        partial_download = ['midi']

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        partial_download=partial_download,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )

    # files get downloaded to a folder called maestro-v2.0.0
    # move everything up a level
    maestro_dir = os.path.join(data_home, 'maestro-v2.0.0')
    maestro_files = glob.glob(os.path.join(maestro_dir, '*'))

    for fpath in maestro_files:
        shutil.move(fpath, data_home)

    if os.path.exists(maestro_dir):
        shutil.rmtree(maestro_dir)
Example #22
0
def _download(save_dir, remotes, partial_download, info_message,
              force_overwrite, cleanup):
    """Download the dataset.
    Args:
        save_dir (str):
            The directory to download the data
        remotes (dict or None):
            A dictionary of RemoteFileMetadata tuples of data in zip format.
            If None, there is no data to download
        partial_download (list or None):
            List indicating what to partially download. The list can include any of:
                * 'all': audio, midi and metadata
                * 'midi': midi and metadata only
                * 'metadata': metadata only
            If None, all data is downloaded
        info_message (str or None):
            A string of info to print when this function is called.
            If None, no string is printed.
        force_overwrite (bool):
            If True, existing files are overwritten by the downloaded files.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    # in MAESTRO "metadata" is contained in "midi" is contained in "all"
    if partial_download is None or "all" in partial_download:
        partial_download = ["all"]
    elif "midi" in partial_download:
        partial_download = ["midi"]

    download_utils.downloader(
        save_dir,
        remotes=remotes,
        partial_download=partial_download,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )

    # files get downloaded to a folder called maestro-v2.0.0
    # move everything up a level
    maestro_dir = os.path.join(save_dir, "maestro-v2.0.0")
    maestro_files = glob.glob(os.path.join(maestro_dir, "*"))

    for fpath in maestro_files:
        shutil.move(fpath, save_dir)

    if os.path.exists(maestro_dir):
        shutil.rmtree(maestro_dir)
Example #23
0
def download(data_home=None,
             partial_download=None,
             force_overwrite=False,
             cleanup=True):
    """Download the RWC Popular (annotations and metadata).
    The audio files are not provided due to copyright issues.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        partial_download (list):
             List indicating what to partially download. The list can include any of:
             * `'annotations_beat'` the beat annotation files
             * `'annotations_sections'` the sections annotation files
             * `'annotations_chords'` the chords annotation files
             * `'annotations_vocal_act'` the vocal activity annotation files
             * `'metadata'` the metadata files
             If `None`, all data is downloaded.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """

    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    info_message = """
        Unfortunately the audio files of the RWC-Popular dataset are not available
        for download. If you have the RWC-Popular dataset, place the contents into a
        folder called RWC-Popular with the following structure:
            > RWC-Popular/
                > annotations/
                > audio/rwc-p-m0i with i in [1 .. 7]
                > metadata-master/
        and copy the RWC-Popular folder to {}
    """.format(data_home)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        partial_download=partial_download,
        info_message=info_message,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #24
0
def download(data_home=None):
    """Download Groove MIDI.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(data_home,
                              zip_downloads=[AUDIO_MIDI_REMOTE],
                              cleanup=True)

    os.rename(os.path.join(data_home, 'groove'),
              os.path.join(data_home, 'Groove MIDI'))
Example #25
0
def download(data_home=None):
    """Download TinySOL.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        tar_downloads=[AUDIO_REMOTE],
        file_downloads=[ANNOTATION_REMOTE],
        cleanup=True,
    )
Example #26
0
def download(data_home=None):
    """Download GuitarSet.

    Args:
        data_home (str): Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        zip_downloads=[
            ANNOTATION_REMOTE,
            AUDIO_HEX_CLN_REMOTE,
            AUDIO_HEX_REMOTE,
            AUDIO_MIC_REMOTE,
            AUDIO_MIX_REMOTE,
        ],
        cleanup=True,
    )
Example #27
0
def download(data_home=None, force_overwrite=False, cleanup=True):
    """Download the GTZAN-Genre dataset.

    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
    """
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #28
0
def _download(save_dir, remotes, partial_download, info_message,
              force_overwrite, cleanup):
    """Download the dataset.

    Args:
        save_dir (str):
            The directory to download the data
        remotes (dict or None):
            A dictionary of RemoteFileMetadata tuples of data in zip format.
            If None, there is no data to download
        partial_download (list or None):
            A list of keys to partially download the remote objects of the download dict.
            If None, all data is downloaded
        info_message (str or None):
            A string of info to print when this function is called.
            If None, no string is printed.
        force_overwrite (bool):
            If True, existing files are overwritten by the downloaded files.
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.

    """
    download_utils.downloader(
        save_dir,
        remotes=remotes,
        info_message=None,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )

    # files get downloaded to a folder called groove - move everything up a level
    groove_dir = os.path.join(save_dir, "groove")
    groove_files = glob.glob(os.path.join(groove_dir, "*"))

    for fpath in groove_files:
        shutil.move(fpath, save_dir)

    if os.path.exists(groove_dir):
        shutil.rmtree(groove_dir)
Example #29
0
def download(data_home=None, force_overwrite=False, cleanup=True):
    """Download the giantsteps_tempo Dataset (annotations).
    The audio files are not provided due to copyright issues.
    Args:
        data_home (str):
            Local path where the dataset is stored.
            If `None`, looks for the data in the default directory, `~/mir_datasets`
        force_overwrite (bool):
            Whether to overwrite the existing downloaded data
        cleanup (bool):
            Whether to delete the zip/tar file after extracting.
        partial_download(list of str)
            arguments can be 'audio' 'metadata' or/and 'tempos'
    """

    # use the default location: ~/mir_datasets/giantsteps_tempo
    if data_home is None:
        data_home = utils.get_default_dataset_path(DATASET_DIR)

    download_message = """
            Unfortunately the audio files of the Giant Steps Tempo dataset are not available
            for download. If you have the Giant Steps audio dataset, place the contents into
            a folder called GiantSteps_tempo with the following structure:
                > GiantSteps_tempo/
                    > giantsteps-tempo-dataset-0b7d47ba8cae59d3535a02e3db69e2cf6d0af5bb/
                    > audio/
            and copy the folder to {}
        """.format(data_home)

    download_utils.downloader(
        data_home,
        remotes=REMOTES,
        info_message=download_message,
        force_overwrite=force_overwrite,
        cleanup=cleanup,
    )
Example #30
0
    def download(self,
                 partial_download=None,
                 force_overwrite=False,
                 cleanup=False):
        """Download the dataset

        Args:
            partial_download (list or None):
                A list of keys of remotes to partially download.
                If None, all data is downloaded
            force_overwrite (bool):
                If True, existing files are overwritten by the downloaded files.
                By default False.
            cleanup (bool):
                Whether to delete any zip/tar files after extracting.

        Raises:
            ValueError: if invalid keys are passed to partial_download
            IOError: if a downloaded file's checksum is different from expected

        """
        if not os.path.exists(self.data_home):
            os.makedirs(self.data_home)
        # Create these directories if doesn't exist
        train = "acousticbrainz-mediaeval-train"
        train_dir = os.path.join(self.data_home, train)
        if not os.path.isdir(train_dir):
            os.mkdir(train_dir)
        validate = "acousticbrainz-mediaeval-validation"
        validate_dir = os.path.join(self.data_home, validate)
        if not os.path.isdir(validate_dir):
            os.mkdir(validate_dir)

        # start to download
        for key, remote in self.remotes.items():
            # check overwrite
            file_downloaded = False
            if not force_overwrite:
                fold, first_dir = key.split("-")
                first_dir_path = os.path.join(
                    train_dir if fold == "train" else validate_dir, first_dir)
                if os.path.isdir(first_dir_path):
                    file_downloaded = True
                    logging.info(
                        "File " + remote.filename +
                        " downloaded. Skip download (force_overwrite=False).")
            if not file_downloaded:
                #  if this typical error happend it repeat download
                download_utils.downloader(
                    self.data_home,
                    remotes={key: remote},
                    partial_download=None,
                    info_message=None,
                    force_overwrite=True,
                    cleanup=cleanup,
                )
            # move from a temporary directory to final one
            source_dir = os.path.join(self.data_home, "temp",
                                      train if "train" in key else validate)
            target_dir = train_dir if "train" in key else validate_dir
            dir_names = os.listdir(source_dir)
            for dir_name in dir_names:
                shutil.move(
                    os.path.join(source_dir, dir_name),
                    os.path.join(target_dir, dir_name),
                )