def get_bttimbre(h5): """ Get beat-aligned timbre from a song file of the Million Song Dataset INPUT: h5 - filename or open h5 file RETURN: bttimbre - beat-aligned timbre, one beat per column or None if something went wrong (e.g. no beats) """ # if string, open and get timbre, if h5, get timbre if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) timbre = GETTERS.get_segments_timbre(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) h5.close() else: timbre = GETTERS.get_segments_timbre(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) # get the series of starts for segments and beats # NOTE: MAYBE USELESS? # result for track: 'TR0002Q11C3FA8332D' # segstarts.shape = (708,) # btstarts.shape = (304,) segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # aligned features bttimbre = align_feats(timbre.T, segstarts, btstarts, duration) if bttimbre is None: return None # done (no renormalization) return bttimbre
def iterate_folder_songs_extracted(root_path, filename_re): """Iterate over a collection of HDF5 database files, each one containing structured data, presumably a song. Yields a processed record of song data. Arguments: root_path -- File path to the root of the file collection. filename_re -- Regular expression to match HDF5 files """ for filepath in iterate_folder_songs(root_path, filename_re): # Extract the important data from the full song record filename = os.path.basename(filepath) ext = os.path.splitext(filename)[1] if ext == '.h5': song = hdf5_getters.open_h5_file_read(filepath) id = hdf5_getters.get_track_id(song) artist = hdf5_getters.get_artist_name(song) title = hdf5_getters.get_title(song) timbre = hdf5_getters.get_segments_timbre(song) sections_start = hdf5_getters.get_sections_start(song) sections_conf = hdf5_getters.get_sections_confidence(song) segments_start = hdf5_getters.get_segments_start(song) song_end = hdf5_getters.get_duration(song) song.close() elif ext == '.analysis': id = json_anal_getters.get_track_id(filepath) artist = json_anal_getters.get_artist_name(filepath) title = json_anal_getters.get_title(filepath) timbre = json_anal_getters.get_segments_timbre(filepath) sections_start = json_anal_getters.get_sections_start(filepath) sections_conf = json_anal_getters.get_sections_confidence(filepath) segments_start = json_anal_getters.get_segments_start(filepath) song_end = json_anal_getters.get_duration(filepath) else: raise Exception("unrecognized file type: {0}".format(filename)) # Combine into a song record song_rec = song_record(id, artist, title, timbre, sections_start, sections_conf, segments_start, song_end) yield song_rec