def download(data_home=None, partial_download=None, force_overwrite=False, cleanup=True): """Download GuitarSet. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * `'annotations'` the annotation files * `'audio_hex_original'` original 6 channel wave file from hexaphonic pickup * `'audio_hex_debleeded'` hex wave files with interference removal applied * `'audio_mic'` monophonic recording from reference microphone * `'audio_mix'` monophonic mixture of original 6 channel file If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False): """DALI is not available for downloading directly. This function prints a helper message to download DALI through zenodo.org. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ To download this dataset, visit: https://zenodo.org/record/2577915 and request access. Once downloaded, unzip the file DALI_v1.0.zip and place the result in: {save_path} Use the function dali_code.get_audio you can find at: https://github.com/gabolsgabs/DALI for getting the audio and place them at: {audio_path} """.format( save_path=os.path.join(data_home, 'annotations'), force_overwrite=force_overwrite, audio_path=os.path.join(data_home, 'audio'), ) download_utils.downloader(data_home, remotes=REMOTES, info_message=info_message)
def download( data_home=None, partial_download=None, force_overwrite=False, cleanup=True ): """Download TinySOL. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * `'annotations'` the annotation files * `'audio'` the audio files If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download the dataset Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ download_utils.downloader( self.data_home, remotes=self.remotes, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, ) self._find_replace(os.path.join(self.data_home, "meta"), ": nan", ": null", "*.json")
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download the dataset Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ # in MAESTRO "metadata" is contained in "midi" is contained in "all" if partial_download is None or "all" in partial_download: partial_download = ["all"] elif "midi" in partial_download: partial_download = ["midi"] download_utils.downloader( self.data_home, remotes=self.remotes, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False): """Download the Beatles Dataset (annotations). The audio files are not provided due to the copyright. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data """ # use the default location: ~/mir_datasets/Beatles if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the audio files of the Beatles dataset are not available for download. If you have the Beatles dataset, place the contents into a folder called Beatles with the following structure: > Beatles/ > annotations/ > audio/ and copy the Beatles folder to {} """.format( data_home ) download_utils.downloader( data_home, tar_downloads=[ANNOTATIONS_REMOTE], info_message=download_message, force_overwrite=force_overwrite, )
def _download(save_dir, remotes, partial_download, info_message, force_overwrite, cleanup): """Download the dataset. Args: save_dir (str): The directory to download the data remotes (dict or None): A dictionary of RemoteFileMetadata tuples of data in zip format. If None, there is no data to download partial_download (list or None): A list of keys to partially download the remote objects of the download dict. If None, all data is downloaded info_message (str or None): A string of info to print when this function is called. If None, no string is printed. force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete the zip/tar file after extracting. """ download_utils.downloader( save_dir, remotes=remotes, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, ) # removing nans from JSON files find_replace(os.path.join(save_dir, "meta"), ": nan", ": null", "*.json")
def download(data_home=None, force_overwrite=False): """Download ORCHSET Dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader(data_home, zip_downloads=[REMOTE], force_overwrite=force_overwrite) # files get downloaded to a folder called Orchset - move everything up a level duplicated_orchset_dir = os.path.join(data_home, 'Orchset') orchset_files = glob.glob(os.path.join(duplicated_orchset_dir, '*')) for fpath in orchset_files: shutil.move(fpath, data_home) if os.path.exists(duplicated_orchset_dir): os.removedirs(duplicated_orchset_dir)
def download(data_home=None, force_overwrite=False): """Download iKala Dataset. However, iKala dataset is not available for download anymore. This function prints a helper message to organize pre-downloaded iKala dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): If True, existing files are overwritten by the downloaded files. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the iKala dataset is not available for download. If you have the iKala dataset, place the contents into a folder called {ikala_dir} with the following structure: > {ikala_dir}/ > Lyrics/ > PitchLabel/ > Wavfile/ and copy the {ikala_dir} folder to {save_path} """.format(ikala_dir=DATASET_DIR, save_path=data_home) download_utils.downloader( data_home, file_downloads=[ID_MAPPING_REMOTE], info_message=download_message, force_overwrite=force_overwrite, )
def download(data_home=None): """MedleyDB is not available for downloading directly. This function prints a helper message to download MedleyDB through zenodo.org. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ To download this dataset, visit: https://zenodo.org/record/2628782#.XKZdABNKh24 and request access. Once downloaded, unzip the file MedleyDB-Melody.zip and copy the result to: {data_home} """.format(data_home=data_home) download_utils.downloader(data_home, info_message=info_message)
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download data to `save_dir` and optionally print a message. Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ download_utils.downloader( self.data_home, remotes=self.remotes, partial_download=partial_download, info_message=self._download_info, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False, cleanup=True): """Download SALAMI Dataset (annotations). The audio files are not provided. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the Salami dataset are not available for download. If you have the Salami dataset, place the contents into a folder called Salami with the following structure: > Salami/ > salami-data-public-hierarchy-corrections/ > audio/ and copy the Salami folder to {} """.format(data_home) download_utils.downloader( data_home, remotes=REMOTES, info_message=info_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False): """Download SALAMI Dataset (annotations). The audio files are not provided. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): whether to overwrite the existing downloaded data """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the Salami dataset are not available for download. If you have the Salami dataset, place the contents into a folder called Salami with the following structure: > Salami/ > salami-data-public-master/ > audio/ and copy the Salami folder to {} """.format(data_home) download_utils.downloader( data_home, zip_downloads=[ANNOTATIONS_REMOTE], info_message=info_message, force_overwrite=force_overwrite, )
def download(data_home=None, force_overwrite=False, cleanup=True, partial_download=None): """Download the giantsteps_key Dataset (annotations). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. partial_download(list of str) arguments can be 'audio' 'metadata' or/and 'keys' """ # use the default location: ~/mir_datasets/giantsteps_key if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = "" download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=download_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False, cleanup=True): """Download Groove MIDI. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, remotes=REMOTES, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called groove - move everything up a level groove_dir = os.path.join(data_home, 'groove') groove_files = glob.glob(os.path.join(groove_dir, '*')) for fpath in groove_files: shutil.move(fpath, data_home) if os.path.exists(groove_dir): shutil.rmtree(groove_dir)
def download(data_home=None, force_overwrite=False): if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the RWC-Popular dataset are not available for download. If you have the RWC-Popular dataset, place the contents into a folder called RWC-Popular with the following structure: > RWC-Popular/ > annotations/ > audio/rwc-p-m0i with i in [1 .. 7] > metadata-master/ and copy the RWC-Popular folder to {} """.format(data_home) download_utils.downloader( data_home, zip_downloads=[ METADATA_REMOTE, ANNOTATIONS_REMOTE_1, ANNOTATIONS_REMOTE_2, ANNOTATIONS_REMOTE_3, ANNOTATIONS_REMOTE_4, ], info_message=info_message, force_overwrite=force_overwrite, )
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download the dataset Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ # in MAESTRO "metadata" is contained in "midi" is contained in "all" if partial_download is None or "all" in partial_download: partial_download = ["all"] elif "midi" in partial_download: partial_download = ["midi"] download_utils.downloader( self.data_home, remotes=self.remotes, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called maestro-v2.0.0 # move everything up a level maestro_dir = os.path.join(self.data_home, "maestro-v2.0.0") if not os.path.exists(maestro_dir): logging.info( "Maestro data not downloaded, because it probably already exists on your computer. " + "Run .validate() to check, or rerun with force_overwrite=True to delete any " + "existing files and download from scratch") return maestro_files = glob.glob(os.path.join(maestro_dir, "*")) for fpath in maestro_files: target_path = os.path.join(self.data_home, os.path.basename(fpath)) if os.path.exists(target_path): logging.info( "{} already exists. Run with force_overwrite=True to download from scratch" .format(target_path)) continue shutil.move(fpath, self.data_home) if os.path.exists(maestro_dir): shutil.rmtree(maestro_dir)
def index(self): if self.remote_index is not None: working_dir = os.path.dirname(os.path.realpath(__file__)) path_index_file = os.path.join(working_dir, "datasets/indexes", self.index_file) if not os.path.isfile(path_index_file): path_indexes = os.path.join(working_dir, "datasets/indexes") download_utils.downloader(path_indexes, remotes=self.remote_index) return load_json_index(self.index_file)
def download(data_home=None): """Download the GTZAN-Genre dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader(data_home, tar_downloads=[DATASET_REMOTE])
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download the dataset Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ download_utils.downloader( self.data_home, partial_download=partial_download, remotes=self.remotes, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called groove - move everything up a level groove_dir = os.path.join(self.data_home, "groove") if not os.path.exists(groove_dir): logging.info( "Groove MIDI data not downloaded, because it probably already exists on your computer. " + "Run .validate() to check, or rerun with force_overwrite=True to delete any " + "existing files and download from scratch") return groove_files = glob.glob(os.path.join(groove_dir, "*")) for fpath in groove_files: target_path = os.path.join(self.data_home, os.path.basename(fpath)) if os.path.exists(target_path): logging.info( "{} already exists. Run with force_overwrite=True to download from scratch" .format(target_path)) continue shutil.move(fpath, self.data_home) if os.path.exists(groove_dir): shutil.rmtree(groove_dir)
def download( data_home=None, partial_download=None, force_overwrite=False, cleanup=True ): """Download the dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * 'all': audio, midi and metadata * 'midi': midi and metadata only * 'metadata': metadata only If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) # in MAESTRO "metadata" is contained in "midi" is contained in "all" if partial_download is None or 'all' in partial_download: partial_download = ['all'] elif 'midi' in partial_download: partial_download = ['midi'] download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called maestro-v2.0.0 # move everything up a level maestro_dir = os.path.join(data_home, 'maestro-v2.0.0') maestro_files = glob.glob(os.path.join(maestro_dir, '*')) for fpath in maestro_files: shutil.move(fpath, data_home) if os.path.exists(maestro_dir): shutil.rmtree(maestro_dir)
def _download(save_dir, remotes, partial_download, info_message, force_overwrite, cleanup): """Download the dataset. Args: save_dir (str): The directory to download the data remotes (dict or None): A dictionary of RemoteFileMetadata tuples of data in zip format. If None, there is no data to download partial_download (list or None): List indicating what to partially download. The list can include any of: * 'all': audio, midi and metadata * 'midi': midi and metadata only * 'metadata': metadata only If None, all data is downloaded info_message (str or None): A string of info to print when this function is called. If None, no string is printed. force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete the zip/tar file after extracting. """ # in MAESTRO "metadata" is contained in "midi" is contained in "all" if partial_download is None or "all" in partial_download: partial_download = ["all"] elif "midi" in partial_download: partial_download = ["midi"] download_utils.downloader( save_dir, remotes=remotes, partial_download=partial_download, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called maestro-v2.0.0 # move everything up a level maestro_dir = os.path.join(save_dir, "maestro-v2.0.0") maestro_files = glob.glob(os.path.join(maestro_dir, "*")) for fpath in maestro_files: shutil.move(fpath, save_dir) if os.path.exists(maestro_dir): shutil.rmtree(maestro_dir)
def download(data_home=None, partial_download=None, force_overwrite=False, cleanup=True): """Download the RWC Popular (annotations and metadata). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * `'annotations_beat'` the beat annotation files * `'annotations_sections'` the sections annotation files * `'annotations_chords'` the chords annotation files * `'annotations_vocal_act'` the vocal activity annotation files * `'metadata'` the metadata files If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the RWC-Popular dataset are not available for download. If you have the RWC-Popular dataset, place the contents into a folder called RWC-Popular with the following structure: > RWC-Popular/ > annotations/ > audio/rwc-p-m0i with i in [1 .. 7] > metadata-master/ and copy the RWC-Popular folder to {} """.format(data_home) download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=info_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None): """Download Groove MIDI. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader(data_home, zip_downloads=[AUDIO_MIDI_REMOTE], cleanup=True) os.rename(os.path.join(data_home, 'groove'), os.path.join(data_home, 'Groove MIDI'))
def download(data_home=None): """Download TinySOL. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, tar_downloads=[AUDIO_REMOTE], file_downloads=[ANNOTATION_REMOTE], cleanup=True, )
def download(data_home=None): """Download GuitarSet. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, zip_downloads=[ ANNOTATION_REMOTE, AUDIO_HEX_CLN_REMOTE, AUDIO_HEX_REMOTE, AUDIO_MIC_REMOTE, AUDIO_MIX_REMOTE, ], cleanup=True, )
def download(data_home=None, force_overwrite=False, cleanup=True): """Download the GTZAN-Genre dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, remotes=REMOTES, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, )
def _download(save_dir, remotes, partial_download, info_message, force_overwrite, cleanup): """Download the dataset. Args: save_dir (str): The directory to download the data remotes (dict or None): A dictionary of RemoteFileMetadata tuples of data in zip format. If None, there is no data to download partial_download (list or None): A list of keys to partially download the remote objects of the download dict. If None, all data is downloaded info_message (str or None): A string of info to print when this function is called. If None, no string is printed. force_overwrite (bool): If True, existing files are overwritten by the downloaded files. cleanup (bool): Whether to delete the zip/tar file after extracting. """ download_utils.downloader( save_dir, remotes=remotes, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, ) # files get downloaded to a folder called groove - move everything up a level groove_dir = os.path.join(save_dir, "groove") groove_files = glob.glob(os.path.join(groove_dir, "*")) for fpath in groove_files: shutil.move(fpath, save_dir) if os.path.exists(groove_dir): shutil.rmtree(groove_dir)
def download(data_home=None, force_overwrite=False, cleanup=True): """Download the giantsteps_tempo Dataset (annotations). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. partial_download(list of str) arguments can be 'audio' 'metadata' or/and 'tempos' """ # use the default location: ~/mir_datasets/giantsteps_tempo if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the audio files of the Giant Steps Tempo dataset are not available for download. If you have the Giant Steps audio dataset, place the contents into a folder called GiantSteps_tempo with the following structure: > GiantSteps_tempo/ > giantsteps-tempo-dataset-0b7d47ba8cae59d3535a02e3db69e2cf6d0af5bb/ > audio/ and copy the folder to {} """.format(data_home) download_utils.downloader( data_home, remotes=REMOTES, info_message=download_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(self, partial_download=None, force_overwrite=False, cleanup=False): """Download the dataset Args: partial_download (list or None): A list of keys of remotes to partially download. If None, all data is downloaded force_overwrite (bool): If True, existing files are overwritten by the downloaded files. By default False. cleanup (bool): Whether to delete any zip/tar files after extracting. Raises: ValueError: if invalid keys are passed to partial_download IOError: if a downloaded file's checksum is different from expected """ if not os.path.exists(self.data_home): os.makedirs(self.data_home) # Create these directories if doesn't exist train = "acousticbrainz-mediaeval-train" train_dir = os.path.join(self.data_home, train) if not os.path.isdir(train_dir): os.mkdir(train_dir) validate = "acousticbrainz-mediaeval-validation" validate_dir = os.path.join(self.data_home, validate) if not os.path.isdir(validate_dir): os.mkdir(validate_dir) # start to download for key, remote in self.remotes.items(): # check overwrite file_downloaded = False if not force_overwrite: fold, first_dir = key.split("-") first_dir_path = os.path.join( train_dir if fold == "train" else validate_dir, first_dir) if os.path.isdir(first_dir_path): file_downloaded = True logging.info( "File " + remote.filename + " downloaded. Skip download (force_overwrite=False).") if not file_downloaded: # if this typical error happend it repeat download download_utils.downloader( self.data_home, remotes={key: remote}, partial_download=None, info_message=None, force_overwrite=True, cleanup=cleanup, ) # move from a temporary directory to final one source_dir = os.path.join(self.data_home, "temp", train if "train" in key else validate) target_dir = train_dir if "train" in key else validate_dir dir_names = os.listdir(source_dir) for dir_name in dir_names: shutil.move( os.path.join(source_dir, dir_name), os.path.join(target_dir, dir_name), )