def hamming(win_len, mode='symmetric'): if mode == 'symmetric': window = get_window('hamm', win_len, fftbins=False) elif mode == 'periodic': window = get_window('hamm', win_len, fftbins=True) else: print('Window mode can not be {}'.format(mode)) raise return window
def hanning(win_len, mode='symmetric'): if mode == 'symmetric': window = get_window('hann', win_len + 2, fftbins=False) window = window[1:-1] # 去掉前后的两个0 elif mode == 'periodic': window = get_window('hann', win_len, fftbins=True) else: print('Window mode can not be {}'.format(mode)) raise return window
def get_chromagram(y, sr, chroma): """ returns chromagram Parameters ---------- y : number > 0 [scalar] audio sr: number > 0 [scalar] target sampling rate chroma: str chroma-samplerate-framesize-overlap Returns ------- list of chromagrams """ params = get_parameters_chroma(chroma) chroma = params["chroma"] doce_bins_tuned_chroma = None if chroma == 'nnls': doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"]) elif chroma == 'cqt': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, # threshold=0.0, window=win, fmin=110, n_chroma=12, n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5, bins_per_octave=36) elif chroma == 'cens': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"], C=None, hop_length=params["off"], norm=None, window=win, fmin=110, n_chroma=12, n_octaves=5, bins_per_octave=36) elif chroma == 'stft': win = get_window('blackmanharris', params["fr"]) doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win, n_chroma=12) return doce_bins_tuned_chroma
def stft_from_frames(frames, window='hann', dtype=np.complex64): """ Variation of the librosa.core.stft function, that computes the short-time-fourier-transfrom from frames instead from the signal. See http://librosa.github.io/librosa/_modules/librosa/core/spectrum.html#stft """ win_length = frames.shape[0] n_fft = win_length fft_window = filters.get_window(window, win_length, fftbins=True) # Reshape so that the window can be broadcast fft_window = fft_window.reshape((-1, 1)) # Pre-allocate the STFT matrix stft_matrix = np.empty((int(1 + n_fft // 2), frames.shape[1]), dtype=dtype, order='F') # how many columns can we fit within MAX_MEM_BLOCK? n_columns = int(util.MAX_MEM_BLOCK / (stft_matrix.shape[0] * stft_matrix.itemsize)) for bl_s in range(0, stft_matrix.shape[1], n_columns): bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) # RFFT and Conjugate here to match phase from DPWE code stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window * frames[:, bl_s:bl_t], axis=0)[:stft_matrix.shape[0]].conj() return stft_matrix
def __init__( self, n_fft: int = 512, n_mels: int = 80, sample_rate: int = 16000, hop_length: int = 200, f_max=8000, # default f_min=0, # default power=2.0, # default win_length=None, window='hann', # default center=True, pad_mode='reflect', # default norm=None, # default for pytorch htk=True # default for pytorch ): self.n_fft = n_fft self.sample_rate = sample_rate self.pad_mode = pad_mode self.hop_length = hop_length self.power = power self.win_length = n_fft self.mel_basis = filters.mel( sr=sample_rate, n_fft=n_fft, n_mels=n_mels, # mel filter fmin=f_min, # mel filter fmax=f_max, # mel filter norm=norm, # mel filter htk=htk) self.fft_window = get_window(window, self.win_length, fftbins=True).reshape((-1, 1))
def __init__(self, length, stride=None, amplitude=1.): self.l = length self.stride = int(stride) if stride == None: self.stride = length // 2 self.amplitude = amplitude self.last_frame = None self.w = get_window('hann', self.l, True)
def get_mfcc(self, sig_frm): sig_frm = sig_frm / 32768.0 window = 'hamming' win_length = sig_frm.shape[0] hop_length = win_length center = True n_fft = win_length fft_window = get_window(window, win_length, fftbins=True) fft_window = util.pad_center(fft_window, n_fft) fft_window = fft_window.reshape((-1, 1)) util.valid_audio(sig_frm) sig_frm = sig_frm[:, None] stft_matrix = np.empty((int(1 + n_fft // 2), 1), dtype=np.complex64, order='F') stft = fft.fft(fft_window * sig_frm, axis=0)[:stft_matrix.shape[0]].conj() powspec = np.abs(stft)**2 melspec = librosa.feature.melspectrogram(S=powspec, hop_length=hop_length, n_fft=n_fft, n_mels=40) mfcc = librosa.feature.mfcc(S=librosa.logamplitude(melspec), n_mfcc=13) n_fft = 512 fft_window = get_window(window, win_length, fftbins=True) fft_window = util.pad_center(fft_window, n_fft) fft_window = fft_window.reshape((-1, 1)) y = np.pad(sig_frm[:, 0], int(n_fft // 2), mode='reflect') pad_frame = librosa.util.frame(y, frame_length=n_fft, hop_length=win_length * 2)[:, 0][:, None] stft_matrix = np.empty((int(1 + n_fft // 2), 1), dtype=np.complex64, order='F') stft = fft.fft(fft_window * pad_frame, axis=0)[:stft_matrix.shape[0]].conj() powspec = np.abs(stft)**2 power_to_db = getattr(librosa, 'power_to_db') spec = power_to_db(powspec) self.spec_tape_add(spec) return mfcc
def smooth(self, feat, win_len_smooth=4): ''' This code is similar to the one used on librosa for smoothing cens: https://librosa.github.io/librosa/generated/librosa.feature.chroma_cens.html ''' win = filters.get_window('hann', win_len_smooth + 2, fftbins=False) win /= np.sum(win) win = np.atleast_2d(win) feat = scipy.signal.convolve2d(feat, win, mode='same', boundary='fill') return util.normalize(feat, norm=2, axis=0)
def __init__(self, input_shape, epochs=20, batch_size=32): self.epochs = epochs self.batch_size = batch_size self.input_shape = input_shape #self.mod el = self.Model_build(input_shape=input_shape) self.Nx = 512 self.w = get_window(window='hann', Nx=self.Nx) #self.spectrum = Spectrum() # FFT() would return the complex FFT, here we just want the magnitude spectrum #self.mfcc = MFCC() self.numPreprocessFrames = 10 self.frameSize = input_shape[1] / self.numPreprocessFrames self.inputData = [] self.model = 0
def _init_wrapper(self, config): self._config = config self.sample_rate = config.sample_rate self._preemphasis = config.preemphasis self.n_fft = config.n_fft self.num_freq = self.n_fft / 2 + 1 self.hop_length = int(config.frame_shift_ms / 1000 * self.sample_rate) self.win_length = int(config.frame_length_ms / 1000 * self.sample_rate) self.num_mels = config.num_mels self._mel_basis = librosa.filters.mel(self.sample_rate, self.n_fft, n_mels=self.num_mels) self.fft_window = filters.get_window('hann', self.win_length, fftbins=True).reshape( (1, -1)).astype(np.float32)
def frames_stft(y_frames, n_fft=2048, win_length=None, window='hann', dtype=np.complex64): """ Adapted from librosa for frame input. NOTE: not centered anymore. """ # By default, use the entire frame if win_length is None: win_length = n_fft fft_window = get_window(window, win_length, fftbins=True) # Pad the window out to n_fft size fft_window = util.pad_center(fft_window, n_fft) # Reshape so that the window can be broadcast fft_window = fft_window.reshape((-1, 1)) # Pre-allocate the STFT matrix stft_matrix = np.empty((int(1 + n_fft // 2), y_frames.shape[1]), dtype=dtype, order='F') # how many columns can we fit within MAX_MEM_BLOCK? n_columns = int(util.MAX_MEM_BLOCK / float(stft_matrix.shape[0] * stft_matrix.itemsize)) for bl_s in range(0, stft_matrix.shape[1], n_columns): bl_t = min(bl_s + n_columns, stft_matrix.shape[1]) stft_matrix[:, bl_s:bl_t] = fft.fft(fft_window * y_frames[:, bl_s:bl_t], axis=0)[:stft_matrix.shape[0]] return stft_matrix
import librosa import numpy as np import librosa.util as util from librosa.filters import get_window audio_path = "../AudioData/audio/D4_750.wav" noise_path = "../AudioData/noise/Pink Noise.wav" # 读取音频文件 y, sr = librosa.load(audio_path) # 对音频文件进行分帧 win_len = n_fft = 200 hop_length = 80 # Pad the time series so that frames are centered y = np.pad(y, int(n_fft // 2), mode='reflect') # Window the time series. y_frames = util.frame(y, frame_length=n_fft, hop_length=hop_length, axis=0) # 获得窗系数 fft_window = get_window('hamm', 10, fftbins=False) # fft_window = fft_window[1:-1] print(fft_window) fft_window = get_window('hamm', 10, fftbins=True) print(fft_window) # Pad the window out to n_fft size fft_window = util.pad_center(fft_window, n_fft) # Reshape so that the window can be broadcast fft_window = fft_window.reshape((-1, 1)) #
frame_size = config['frame_size'] frame_step = config['frame_step'] n_fft = config['n_fft'] n_mels = config['mfcc_bank_cnt'] fmin = config['fmin'] fmax = config['fmax'] dtype = config.get('dtype', "int") high_prec = config.get('use_high_prec', False) or dtype == "fix32_scal" use_power = False rad4 = round(math.log(n_fft // 2, 4)) == math.log(n_fft // 2, 4) ndct = config.get('n_dct', False) from librosa.filters import get_window from librosa import util librosa_fft_window = get_window("hann", frame_size, fftbins=True) # Pad the window out to n_fft size librosa_fft_window = util.pad_center(librosa_fft_window, n_fft) stft = librosa.core.spectrum.stft(data, n_fft, frame_step, frame_size, center=False, pad_mode="constant") spect = np.abs(stft)**(1 if not use_power else 2) mel_basis = librosa.filters.mel(samplerate, n_fft, n_mels, fmin, fmax) mel_spect = np.dot(mel_basis, spect) logmel = power_to_db(mel_spect, top_db=None) mfcc = scipy.fftpack.dct(logmel, axis=0, type=2, norm=None) with open("ground_truth.h", "w") as f: