def compute_features(audio, y_harmonic): """Computes the HPCP and MFCC features. Parameters ---------- audio: np.array(N) Audio samples of the given input. y_harmonic: np.array(N) Harmonic part of the audio signal, in samples. Returns ------- mfcc: np.array(N, msaf.Anal.mfcc_coeff) Mel-frequency Cepstral Coefficients. hpcp: np.array(N, 12) Pitch Class Profiles. tonnetz: np.array(N, 6) Tonal Centroid features. cqt: np.array(N, msaf.Anal.cqt_bins) Constant-Q log-scale features. tempogram: np.array(N, 192) Tempogram features. """ logging.info("Computing Spectrogram...") S = librosa.feature.melspectrogram(audio, sr=msaf.Anal.sample_rate, n_fft=msaf.Anal.frame_size, hop_length=msaf.Anal.hop_size, n_mels=msaf.Anal.n_mels) logging.info("Computing Constant-Q...") cqt = librosa.logamplitude(np.abs( librosa.cqt(audio, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_bins=msaf.Anal.cqt_bins, real=False)) ** 2, ref_power=np.max).T logging.info("Computing MFCCs...") log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T logging.info("Computing HPCPs...") hpcp = librosa.feature.chroma_cqt(y=y_harmonic, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_octaves=msaf.Anal.n_octaves, fmin=msaf.Anal.f_min).T logging.info("Computing Tonnetz...") tonnetz = utils.chroma_to_tonnetz(hpcp) logging.info("Computing Tempogram...") tempogram = librosa.feature.tempogram(audio, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, win_length=192).T return mfcc, hpcp, tonnetz, cqt, tempogram
def compute_features(audio, y_harmonic): """Computes the HPCP and MFCC features. Parameters ---------- audio: np.array(N) Audio samples of the given input. y_harmonic: np.array(N) Harmonic part of the audio signal, in samples. Returns ------- mfcc: np.array(N, msaf.Anal.mfcc_coeff) Mel-frequency Cepstral Coefficients. hpcp: np.array(N, 12) Pitch Class Profiles. tonnetz: np.array(N, 6) Tonal Centroid features. cqt: np.array(N, msaf.Anal.cqt_bins) Constant-Q log-scale features. """ logging.info("Computing Spectrogram...") S = librosa.feature.melspectrogram(audio, sr=msaf.Anal.sample_rate, n_fft=msaf.Anal.frame_size, hop_length=msaf.Anal.hop_size, n_mels=msaf.Anal.n_mels) logging.info("Computing Constant-Q...") cqt = librosa.logamplitude(np.abs( librosa.cqt(audio, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_bins=msaf.Anal.cqt_bins, real=False))**2, ref_power=np.max).T logging.info("Computing MFCCs...") log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T logging.info("Computing HPCPs...") hpcp = librosa.feature.chroma_cqt(y=y_harmonic, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_octaves=msaf.Anal.n_octaves, fmin=msaf.Anal.f_min).T logging.info("Computing Tonnetz...") tonnetz = utils.chroma_to_tonnetz(hpcp) return mfcc, hpcp, tonnetz, cqt
def compute_features(audio, y_harmonic): """Computes the HPCP and MFCC features. Parameters ---------- audio: np.array(N) Audio samples of the given input. y_harmonic: np.array(N) Harmonic part of the audio signal, in samples. Returns ------- mfcc: np.array(N, msaf.Anal.mfcc_coeff) Mel-frequency Cepstral Coefficients. hpcp: np.array(N, 12) Pitch Class Profiles. tonnetz: np.array(N, 6) Tonal Centroid features. """ logging.info("Computing Spectrogram...") S = librosa.feature.melspectrogram(audio, sr=22050, n_fft=msaf.Anal.frame_size, hop_length=msaf.Anal.hop_size, n_mels=msaf.Anal.n_mels) logging.info("Computing MFCCs...") log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T logging.info("Computing HPCPs...") hpcp = librosa.feature.chroma_cqt(y=y_harmonic, sr=22050, hop_length=msaf.Anal.hop_size).T #plt.imshow(hpcp.T, interpolation="nearest", aspect="auto"); plt.show() logging.info("Computing Tonnetz...") tonnetz = utils.chroma_to_tonnetz(hpcp) return mfcc, hpcp, tonnetz
def compute_features(audio, beats=None): """Computes the HPCP and MFCC beat-synchronous features given a set of beats (beats).""" beatsync_str = "" if beats is not None: beatsync_str = "Beat-synchronous " MFCC = STFTFeature(msaf.Anal.frame_size, msaf.Anal.hop_size, msaf.Anal.window_type, ES.MFCC(numberCoefficients=msaf.Anal.mfcc_coeff), msaf.Anal.sample_rate, beats) HPCP = STFTFeature(msaf.Anal.frame_size, msaf.Anal.hop_size, msaf.Anal.window_type, ES.HPCP(), msaf.Anal.sample_rate, beats) logging.info("Computing %sMFCCs..." % beatsync_str) mfcc = MFCC.compute_features(audio) logging.info("Computing %sHPCPs..." % beatsync_str) hpcp = HPCP.compute_features(audio) #plt.imshow(hpcp.T, interpolation="nearest", aspect="auto"); plt.show() logging.info("Computing %sTonnetz..." % beatsync_str) tonnetz = utils.chroma_to_tonnetz(hpcp) return mfcc, hpcp, tonnetz
def compute_features(audio, y_harmonic): """Computes the HPCP and MFCC features. Parameters ---------- audio: np.array(N) Audio samples of the given input. y_harmonic: np.array(N) Harmonic part of the audio signal, in samples. Returns ------- mfcc: np.array(N, msaf.Anal.mfcc_coeff) Mel-frequency Cepstral Coefficients. hpcp: np.array(N, 12) Pitch Class Profiles. tonnetz: np.array(N, 6) Tonal Centroid features. cqt: np.array(N, msaf.Anal.cqt_bins) Constant-Q log-scale features. gmt: np.array(N, msaf.Anal.mfcc_coeff+6) Gammatone features """ logging.info("Computing Spectrogram...") S = librosa.feature.melspectrogram(audio, sr=msaf.Anal.sample_rate, n_fft=msaf.Anal.frame_size, hop_length=msaf.Anal.hop_size, n_mels=msaf.Anal.n_mels) logging.info("Computing Constant-Q...") cqt = librosa.logamplitude(librosa.cqt(audio, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_bins=msaf.Anal.cqt_bins)**2, ref_power=np.max).T # cqt = librosa.logamplitude(np.abs(librosa.cqt(audio, sr=msaf.Anal.sample_rate, # hop_length=msaf.Anal.hop_size, # n_bins=msaf.Anal.cqt_bins, real=False))**2, # ref_power=np.max).T logging.info("Computing MFCCs...") log_S = librosa.logamplitude(S, ref_power=np.max) mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=msaf.Anal.mfcc_coeff).T logging.info("Computing HPCPs...") # hpcp = librosa.feature.chroma_cqt(y=y_harmonic, # sr=msaf.Anal.sample_rate, # hop_length=msaf.Anal.hop_size, # n_octaves=msaf.Anal.n_octaves, # fmin=msaf.Anal.f_min).T hpcp = librosa.feature.chroma_cqt(y=y_harmonic, sr=msaf.Anal.sample_rate, hop_length=msaf.Anal.hop_size, n_octaves=msaf.Anal.n_octaves, n_chroma=12, fmin=msaf.Anal.f_min).T #plt.imshow(hpcp.T, interpolation="nearest", aspect="auto"); plt.show() logging.info("Computing Tonnetz...") tonnetz = utils.chroma_to_tonnetz(hpcp) '''Mi: Extracting Gammatone features''' logging.info("Computing gammatone features...") gcc = librosa.feature.gammatone_cepstral_coeffecients(audio, sr=msaf.Anal.sample_rate, nfft=msaf.Anal.frame_size*2,\ hop_length=msaf.Anal.hop_size, nfilters=64, f_min=50,\ f_max=msaf.Anal.sample_rate/2, nCoeff=msaf.Anal.mfcc_coeff, log=False).T gc = librosa.feature.gammatone_contrast(audio, sr=msaf.Anal.sample_rate, nfft=msaf.Anal.frame_size*2, hop_length=msaf.Anal.hop_size,\ nfilters=64, f_min=50, f_max=msaf.Anal.sample_rate/2, n_bands=6, quantile=0.02, log=False).T gmt = np.hstack((gcc, gc)) # logging.info("%s" %(hpcp.shape,)) # logging.info("%s" %(gmt.shape,)) return mfcc, hpcp, tonnetz, cqt, gmt