def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError( '{} is not a valid track ID in Salami'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] if METADATA is None or METADATA['data_home'] != data_home: _reload_metadata(data_home) if METADATA is not None and track_id in METADATA: self._track_metadata = METADATA[track_id] else: # annotations with missing metadata self._track_metadata = { 'source': None, 'annotator_1_id': None, 'annotator_2_id': None, 'duration_sec': None, 'title': None, 'artist': None, 'annotator_1_time': None, 'annotator_2_time': None, 'class': None, 'genre': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.source = self._track_metadata['source'] self.annotator_1_id = self._track_metadata['annotator_1_id'] self.annotator_2_id = self._track_metadata['annotator_2_id'] self.duration_sec = self._track_metadata['duration_sec'] self.title = self._track_metadata['title'] self.artist = self._track_metadata['artist'] self.annotator_1_time = self._track_metadata['annotator_1_time'] self.annotator_2_time = self._track_metadata['annotator_2_time'] self.broad_genre = self._track_metadata['class'] self.genre = self._track_metadata['genre']
def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError( '{} is not a valid track ID in RWC-Popular'.format(track_id) ) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] if METADATA is None or METADATA['data_home'] != data_home: _reload_metadata(data_home) if METADATA is not None and track_id in METADATA: self._track_metadata = METADATA[track_id] else: # annotations with missing metadata self._track_metadata = { 'piece_number': None, 'suffix': None, 'track_number': None, 'title': None, 'artist': None, 'singer_information': None, 'duration_sec': None, 'tempo': None, 'instruments': None, 'drum_information': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.piece_number = self._track_metadata['piece_number'] self.suffix = self._track_metadata['suffix'] self.track_number = self._track_metadata['track_number'] self.title = self._track_metadata['title'] self.artist = self._track_metadata['artist'] self.singer_information = self._track_metadata['singer_information'] self.duration_sec = self._track_metadata['duration_sec'] self.tempo = self._track_metadata['tempo'] self.instruments = self._track_metadata['instruments'] self.drum_information = self._track_metadata['drum_information']
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in Groove MIDI'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata: self._track_metadata = metadata[track_id] else: self._track_metadata = { "drummer": None, "session": None, "style": None, "tempo": None, "beat_type": None, "time_signature": None, "midi_filename": None, "audio_filename": None, "duration": None, "split": None, } self.drummer = self._track_metadata["drummer"] self.session = self._track_metadata["session"] self.style = self._track_metadata["style"] self.tempo = self._track_metadata["tempo"] self.beat_type = self._track_metadata["beat_type"] self.time_signature = self._track_metadata["time_signature"] self.duration = self._track_metadata["duration"] self.split = self._track_metadata["split"] self.midi_filename = self._track_metadata["midi_filename"] self.audio_filename = self._track_metadata["audio_filename"] self.midi_path = os.path.join(self._data_home, self._track_paths["midi"][0]) self.audio_path = utils.none_path_join( [self._data_home, self._track_paths["audio"][0]])
def load(data_home=None): """Load GTZAN-Genre Args: data_home (str): Local path where GTZAN-Genre is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) data = {} for key in DATA.index.keys(): data[key] = Track(key, data_home=data_home) return data
def download( data_home=None, partial_download=None, force_overwrite=False, cleanup=True ): """Download the RWC Jazz (annotations and metadata). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data partial_download (list): List indicating what to partially download. The list can include any of: * `'annotations_beat'` the beat annotation files * `'annotations_sections'` the sections annotation files * `'metadata'` the metadata files If `None`, all data is downloaded. cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the RWC-Jazz dataset are not available for download. If you have the RWC-Jazz dataset, place the contents into a folder called RWC-Jazz with the following structure: > RWC-Jazz/ > annotations/ > audio/rwc-j-m0i with i in [1 .. 4] > metadata-master/ and copy the RWC-Jazz folder to {} """.format( data_home ) download_utils.downloader( data_home, remotes=REMOTES, partial_download=partial_download, info_message=info_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError( '{} is not a valid track ID in Beatles'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.title = os.path.basename( self._track_paths['sections'][0]).split('.')[0]
def validate(data_home=None, silence=False): """Validate if a local version of this dataset is consistent Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: missing_files (list): List of file paths that are in the dataset index but missing locally invalid_checksums (list): List of file paths where the expected file exists locally but has a different checksum than the reference """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) missing_files, invalid_checksums = utils.validator(DATA.index, data_home, silence=silence) return missing_files, invalid_checksums
def validate(data_home=None, silence=False): """Validate if the stored dataset is a valid version Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: missing_files (list): List of file paths that are in the dataset index but missing locally invalid_checksums (list): List of file paths that file exists in the dataset index but has a different checksum compare to the reference checksum """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) missing_files, invalid_checksums = utils.validator(INDEX, data_home, silence=silence) return missing_files, invalid_checksums
def load(data_home=None): """Load giantsteps_key dataset Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) beatles_data = {} for key in track_ids(): beatles_data[key] = Track(key, data_home=data_home) return beatles_data
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( "{} is not a valid track ID in GTZAN-Genre".format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.genre = track_id.split(".")[0] if self.genre == 'hiphop': self.genre = 'hip-hop' self.audio_path = os.path.join(self._data_home, self._track_paths["audio"][0])
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in RWC-Classical'.format(track_id) ) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.sections_path = os.path.join( self._data_home, self._track_paths['sections'][0] ) self.beats_path = os.path.join(self._data_home, self._track_paths['beats'][0]) metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata: self._track_metadata = metadata[track_id] else: self._track_metadata = { 'piece_number': None, 'suffix': None, 'track_number': None, 'title': None, 'composer': None, 'artist': None, 'duration': None, 'category': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.piece_number = self._track_metadata['piece_number'] self.suffix = self._track_metadata['suffix'] self.track_number = self._track_metadata['track_number'] self.title = self._track_metadata['title'] self.composer = self._track_metadata['composer'] self.artist = self._track_metadata['artist'] self.duration = self._track_metadata['duration'] self.category = self._track_metadata['category']
def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError( '{} is not a valid track ID in RWC-Genre'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] if METADATA is None or METADATA['data_home'] != data_home: _reload_metadata(data_home) if METADATA is not None and track_id in METADATA: self._track_metadata = METADATA[track_id] else: self._track_metadata = { 'piece_number': None, 'suffix': None, 'track_number': None, 'category': None, 'sub_category': None, 'title': None, 'composer': None, 'artist': None, 'duration_sec': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.piece_number = self._track_metadata['piece_number'] self.suffix = self._track_metadata['suffix'] self.track_number = self._track_metadata['track_number'] self.category = self._track_metadata['category'] self.sub_category = self._track_metadata['sub_category'] self.title = self._track_metadata['title'] self.composer = self._track_metadata['composer'] self.artist = self._track_metadata['artist'] self.duration_sec = self._track_metadata['duration_sec']
def load(data_home=None, silence_validator=False): """Load RWC-Classical dataset Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) validate(data_home, silence=silence_validator) rwc_classical_data = {} for key in track_ids(): rwc_classical_data[key] = Track(key, data_home=data_home) return rwc_classical_data
def load(data_home=None): """Load iKala dataset Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` Returns: (dict): {`track_id`: track data} """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) ikala_data = {} for key in INDEX.keys(): ikala_data[key] = Track(key, data_home=data_home) return ikala_data
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in Salami'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata.keys(): self._track_metadata = metadata[track_id] else: # annotations with missing metadata self._track_metadata = { 'source': None, 'annotator_1_id': None, 'annotator_2_id': None, 'duration': None, 'title': None, 'artist': None, 'annotator_1_time': None, 'annotator_2_time': None, 'class': None, 'genre': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.source = self._track_metadata['source'] self.annotator_1_id = self._track_metadata['annotator_1_id'] self.annotator_2_id = self._track_metadata['annotator_2_id'] self.duration = self._track_metadata['duration'] self.title = self._track_metadata['title'] self.artist = self._track_metadata['artist'] self.annotator_1_time = self._track_metadata['annotator_1_time'] self.annotator_2_time = self._track_metadata['annotator_2_time'] self.broad_genre = self._track_metadata['class'] self.genre = self._track_metadata['genre']
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in MedleyDB-Melody'.format( track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.melody1_path = os.path.join(self._data_home, self._track_paths['melody1'][0]) self.melody2_path = os.path.join(self._data_home, self._track_paths['melody2'][0]) self.melody3_path = os.path.join(self._data_home, self._track_paths['melody3'][0]) metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata: self._track_metadata = metadata[track_id] else: self._track_metadata = { 'artist': None, 'title': None, 'genre': None, 'is_excerpt': None, 'is_instrumental': None, 'n_sources': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.artist = self._track_metadata['artist'] self.title = self._track_metadata['title'] self.genre = self._track_metadata['genre'] self.is_excerpt = self._track_metadata['is_excerpt'] self.is_instrumental = self._track_metadata['is_instrumental'] self.n_sources = self._track_metadata['n_sources']
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in giantsteps_key'.format( track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.keys_path = os.path.join(self._data_home, self._track_paths['key'][0]) self.metadata_path = ( os.path.join(self._data_home, self._track_paths['meta'][0]) if self._track_paths['meta'][0] is not None else None) self.title = self.audio_path.replace(".mp3", '').split('/')[-1]
def download(data_home=None): """Download GuitarSet. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, zip_downloads=[ ANNOTATION_REMOTE, AUDIO_HEX_CLN_REMOTE, AUDIO_HEX_REMOTE, AUDIO_MIC_REMOTE, AUDIO_MIX_REMOTE, ], cleanup=True, )
def download(data_home=None, force_overwrite=False): """Download ORCHSET Dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): whether to overwrite the existing downloaded data """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader(data_home, zip_downloads=[REMOTE], force_overwrite=force_overwrite) # files get downloaded to a folder called Orchset - move everything up a level duplicated_orchset_dir = os.path.join(data_home, 'Orchset') orchset_files = glob.glob(os.path.join(duplicated_orchset_dir, '*')) for fpath in orchset_files: shutil.move(fpath, data_home) os.removedirs(duplicated_orchset_dir)
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in giantsteps_tempo'.format( track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.annotation_v1_path = os.path.join( self._data_home, self._track_paths['annotation_v1'][0]) self.annotation_v2_path = os.path.join( self._data_home, self._track_paths['annotation_v2'][0]) self.title = self.audio_path.replace(".mp3", '').split('/')[-1].split('.')[0]
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError( '{} is not a valid track ID in RWC-Jazz'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = DATA.index[track_id] metadata = DATA.metadata(data_home) if metadata is not None and track_id in metadata: self._track_metadata = metadata[track_id] else: self._track_metadata = { 'piece_number': None, 'suffix': None, 'track_number': None, 'title': None, 'artist': None, 'duration': None, 'variation': None, 'instruments': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.piece_number = self._track_metadata['piece_number'] self.suffix = self._track_metadata['suffix'] self.track_number = self._track_metadata['track_number'] self.title = self._track_metadata['title'] self.artist = self._track_metadata['artist'] self.duration = self._track_metadata['duration'] self.variation = self._track_metadata['variation'] self.instruments = self._track_metadata['instruments']
def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError('{} is not a valid track ID in iKala'.format(track_id)) if METADATA is None or METADATA['data_home'] != data_home: _reload_metadata(data_home) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.song_id = track_id.split('_')[0] self.section = track_id.split('_')[1] if METADATA is not None and self.song_id in METADATA: self.singer_id = METADATA[self.song_id] else: self.singer_id = None
def download(data_home=None, force_overwrite=False, cleanup=True): """Download the GTZAN-Genre dataset. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_utils.downloader( data_home, remotes=REMOTES, info_message=None, force_overwrite=force_overwrite, cleanup=cleanup, )
def _load_metadata(data_home): if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) id_map_path = os.path.join(data_home, 'id_mapping.txt') if not os.path.exists(id_map_path): logging.info( 'Metadata file {} not found.'.format(id_map_path) + 'You can download the metadata file for ikala by running ikala.download' ) return None with open(id_map_path, 'r') as fhandle: reader = csv.reader(fhandle, delimiter='\t') singer_map = {} for line in reader: if line[0] == 'singer': continue singer_map[line[1]] = line[0] singer_map['data_home'] = data_home return singer_map
def __init__(self, track_id, data_home=None): if track_id not in INDEX: raise ValueError( '{} is not a valid track ID in MedleyDB-Melody'.format( track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) self._data_home = data_home self._track_paths = INDEX[track_id] if METADATA is None or METADATA['data_home'] != data_home: _reload_metadata(data_home) if METADATA is not None and track_id in METADATA: self._track_metadata = METADATA[track_id] else: self._track_metadata = { 'artist': None, 'title': None, 'genre': None, 'is_excerpt': None, 'is_instrumental': None, 'n_sources': None, } self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.artist = self._track_metadata['artist'] self.title = self._track_metadata['title'] self.genre = self._track_metadata['genre'] self.is_excerpt = self._track_metadata['is_excerpt'] self.is_instrumental = self._track_metadata['is_instrumental'] self.n_sources = self._track_metadata['n_sources']
def download(data_home=None): """MedleyDB is not available for downloading directly. This function prints a helper message to download MedleyDB through zenodo.org. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` """ if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ To download this dataset, visit: https://zenodo.org/record/2628782#.XKZdABNKh24 and request access. Once downloaded, unzip the file MedleyDB-Melody.zip and copy the result to: {data_home} """.format(data_home=data_home) download_utils.downloader(info_message=info_message)
def download(data_home=None, force_overwrite=False): if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) info_message = """ Unfortunately the audio files of the RWC-Jazz dataset are not available for download. If you have the RWC-Classical dataset, place the contents into a folder called RWC-Classical with the following structure: > RWC-Classical/ > annotations/ > audio/rwc-c-m0i with i in [1 .. 6] > metadata-master/ and copy the RWC-Classical folder to {} """.format( data_home ) download_utils.downloader( data_home, zip_downloads=[METADATA_REMOTE, ANNOTATIONS_REMOTE_1, ANNOTATIONS_REMOTE_2], info_message=info_message, force_overwrite=force_overwrite, )
def download(data_home=None, force_overwrite=False, cleanup=True): """Download the giantsteps_tempo Dataset (annotations). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. partial_download(list of str) arguments can be 'audio' 'metadata' or/and 'tempos' """ # use the default location: ~/mir_datasets/giantsteps_tempo if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the audio files of the Giant Steps Tempo dataset are not available for download. If you have the Giant Steps audio dataset, place the contents into a folder called GiantSteps_tempo with the following structure: > GiantSteps_tempo/ > giantsteps-tempo-dataset-0b7d47ba8cae59d3535a02e3db69e2cf6d0af5bb/ > audio/ and copy the folder to {} """.format(data_home) download_utils.downloader( data_home, remotes=REMOTES, info_message=download_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def download(data_home=None, force_overwrite=False, cleanup=True): """Download the Beatles Dataset (annotations). The audio files are not provided due to copyright issues. Args: data_home (str): Local path where the dataset is stored. If `None`, looks for the data in the default directory, `~/mir_datasets` force_overwrite (bool): Whether to overwrite the existing downloaded data cleanup (bool): Whether to delete the zip/tar file after extracting. """ # use the default location: ~/mir_datasets/Beatles if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) download_message = """ Unfortunately the audio files of the Beatles dataset are not available for download. If you have the Beatles dataset, place the contents into a folder called Beatles with the following structure: > Beatles/ > annotations/ > audio/ and copy the Beatles folder to {} """.format(data_home) download_utils.downloader( data_home, remotes=REMOTES, info_message=download_message, force_overwrite=force_overwrite, cleanup=cleanup, )
def __init__(self, track_id, data_home=None): if track_id not in DATA.index: raise ValueError('{} is not a valid track ID in iKala'.format(track_id)) self.track_id = track_id if data_home is None: data_home = utils.get_default_dataset_path(DATASET_DIR) metadata = DATA.metadata(data_home) self._data_home = data_home self._track_paths = DATA.index[track_id] self.f0_path = os.path.join(self._data_home, self._track_paths['pitch'][0]) self.lyrics_path = os.path.join(self._data_home, self._track_paths['lyrics'][0]) self.audio_path = os.path.join(self._data_home, self._track_paths['audio'][0]) self.song_id = track_id.split('_')[0] self.section = track_id.split('_')[1] if metadata is not None and self.song_id in metadata: self.singer_id = metadata[self.song_id] else: self.singer_id = None