def get_btchromas_loudness(h5): """ Similar to btchroma, but adds the loudness back. We use the segments_loudness_max There is no max value constraint, simply no negative values. """ # if string, open and get chromas, if h5, get chromas if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) h5.close() else: chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) # get the series of starts for segments and beats segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # add back loudness chromas = chromas.T * idB(loudnessmax) # aligned features btchroma = align_feats(chromas, segstarts, btstarts, duration) if btchroma is None: return None # done (no renormalization) return btchroma
def get_bttimbre(h5): """ Get beat-aligned timbre from a song file of the Million Song Dataset INPUT: h5 - filename or open h5 file RETURN: bttimbre - beat-aligned timbre, one beat per column or None if something went wrong (e.g. no beats) """ # if string, open and get timbre, if h5, get timbre if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) timbre = GETTERS.get_segments_timbre(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) h5.close() else: timbre = GETTERS.get_segments_timbre(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) # get the series of starts for segments and beats # NOTE: MAYBE USELESS? # result for track: 'TR0002Q11C3FA8332D' # segstarts.shape = (708,) # btstarts.shape = (304,) segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # aligned features bttimbre = align_feats(timbre.T, segstarts, btstarts, duration) if bttimbre is None: return None # done (no renormalization) return bttimbre
def get_btloudnessmax(h5): """ Get beat-aligned loudness max from a song file of the Million Song Dataset INPUT: h5 - filename or open h5 file RETURN: btloudnessmax - beat-aligned loudness max, one beat per column or None if something went wrong (e.g. no beats) """ # if string, open and get max loudness, if h5, get max loudness if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) h5.close() else: loudnessmax = GETTERS.get_segments_loudness_max(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) # get the series of starts for segments and beats # NOTE: MAYBE USELESS? # result for track: 'TR0002Q11C3FA8332D' # segstarts.shape = (708,) # btstarts.shape = (304,) segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # reverse dB loudnessmax = idB(loudnessmax) # aligned features btloudnessmax = align_feats(loudnessmax.reshape(1, loudnessmax.shape[0]), segstarts, btstarts, duration) if btloudnessmax is None: return None # set it back to dB btloudnessmax = dB(btloudnessmax + 1e-10) # done (no renormalization) return btloudnessmax
def get_btchromas(h5): """ Get beat-aligned chroma from a song file of the Million Song Dataset INPUT: h5 - filename or open h5 file RETURN: btchromas - beat-aligned chromas, one beat per column or None if something went wrong (e.g. no beats) """ # if string, open and get chromas, if h5, get chromas if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) h5.close() else: chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) # get the series of starts for segments and beats # NOTE: MAYBE USELESS? # result for track: 'TR0002Q11C3FA8332D' # segstarts.shape = (708,) # btstarts.shape = (304,) segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # aligned features btchroma = align_feats(chromas.T, segstarts, btstarts, duration) if btchroma is None: return None # Renormalize. Each column max is 1. maxs = btchroma.max(axis=0) maxs[np.where(maxs == 0)] = 1. btchroma = (btchroma / maxs) # done return btchroma
def iterate_folder_songs_extracted(root_path, filename_re): """Iterate over a collection of HDF5 database files, each one containing structured data, presumably a song. Yields a processed record of song data. Arguments: root_path -- File path to the root of the file collection. filename_re -- Regular expression to match HDF5 files """ for filepath in iterate_folder_songs(root_path, filename_re): # Extract the important data from the full song record filename = os.path.basename(filepath) ext = os.path.splitext(filename)[1] if ext == '.h5': song = hdf5_getters.open_h5_file_read(filepath) id = hdf5_getters.get_track_id(song) artist = hdf5_getters.get_artist_name(song) title = hdf5_getters.get_title(song) timbre = hdf5_getters.get_segments_timbre(song) sections_start = hdf5_getters.get_sections_start(song) sections_conf = hdf5_getters.get_sections_confidence(song) segments_start = hdf5_getters.get_segments_start(song) song_end = hdf5_getters.get_duration(song) song.close() elif ext == '.analysis': id = json_anal_getters.get_track_id(filepath) artist = json_anal_getters.get_artist_name(filepath) title = json_anal_getters.get_title(filepath) timbre = json_anal_getters.get_segments_timbre(filepath) sections_start = json_anal_getters.get_sections_start(filepath) sections_conf = json_anal_getters.get_sections_confidence(filepath) segments_start = json_anal_getters.get_segments_start(filepath) song_end = json_anal_getters.get_duration(filepath) else: raise Exception("unrecognized file type: {0}".format(filename)) # Combine into a song record song_rec = song_record(id, artist, title, timbre, sections_start, sections_conf, segments_start, song_end) yield song_rec