def get_btchromas_loudness(h5): """ Similar to btchroma, but adds the loudness back. We use the segments_loudness_max There is no max value constraint, simply no negative values. """ # if string, open and get chromas, if h5, get chromas if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) h5.close() else: chromas = GETTERS.get_segments_pitches(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) # get the series of starts for segments and beats segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # add back loudness chromas = chromas.T * idB(loudnessmax) # aligned features btchroma = align_feats(chromas, segstarts, btstarts, duration) if btchroma is None: return None # done (no renormalization) return btchroma
def get_btloudnessmax(h5): """ Get beat-aligned loudness max from a song file of the Million Song Dataset INPUT: h5 - filename or open h5 file RETURN: btloudnessmax - beat-aligned loudness max, one beat per column or None if something went wrong (e.g. no beats) """ # if string, open and get max loudness, if h5, get max loudness if type(h5).__name__ == 'str': h5 = GETTERS.open_h5_file_read(h5) loudnessmax = GETTERS.get_segments_loudness_max(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) h5.close() else: loudnessmax = GETTERS.get_segments_loudness_max(h5) segstarts = GETTERS.get_segments_start(h5) btstarts = GETTERS.get_beats_start(h5) duration = GETTERS.get_duration(h5) # get the series of starts for segments and beats # NOTE: MAYBE USELESS? # result for track: 'TR0002Q11C3FA8332D' # segstarts.shape = (708,) # btstarts.shape = (304,) segstarts = np.array(segstarts).flatten() btstarts = np.array(btstarts).flatten() # reverse dB loudnessmax = idB(loudnessmax) # aligned features btloudnessmax = align_feats(loudnessmax.reshape(1, loudnessmax.shape[0]), segstarts, btstarts, duration) if btloudnessmax is None: return None # set it back to dB btloudnessmax = dB(btloudnessmax + 1e-10) # done (no renormalization) return btloudnessmax