Ejemplo n.º 1
0
def hamming(win_len, mode='symmetric'):
    if mode == 'symmetric':
        window = get_window('hamm', win_len, fftbins=False)
    elif mode == 'periodic':
        window = get_window('hamm', win_len, fftbins=True)
    else:
        print('Window mode can not be {}'.format(mode))
        raise
    return window
Ejemplo n.º 2
0
def hanning(win_len, mode='symmetric'):
    if mode == 'symmetric':
        window = get_window('hann', win_len + 2, fftbins=False)
        window = window[1:-1]  # 去掉前后的两个0
    elif mode == 'periodic':
        window = get_window('hann', win_len, fftbins=True)
    else:
        print('Window mode can not be {}'.format(mode))
        raise
    return window
Ejemplo n.º 3
0
def get_chromagram(y, sr, chroma):
	"""
		returns chromagram

		Parameters
		----------
		y : number > 0 [scalar]
			audio

		sr: number > 0 [scalar]
			target sampling rate

		chroma: str 
		    chroma-samplerate-framesize-overlap

		
		Returns
		-------
		list of chromagrams	
	"""
	params = get_parameters_chroma(chroma)
	chroma = params["chroma"]
	doce_bins_tuned_chroma = None
	if chroma == 'nnls':
		doce_bins_tuned_chroma = get_nnls(y, params["sr"], params["fr"], params["off"])
	elif chroma == 'cqt':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cqt(y=y, sr=params["sr"],
		                                    C=None,
		                                    hop_length=params["off"],
		                                    norm=None,
		                                    # threshold=0.0,
		                                    window=win,
		                                    fmin=110,
		                                    n_chroma=12,
		                                    n_octaves=4 if params["chroma"] == "cqt" and params["sr"] == 5525 else 5,
		                                    bins_per_octave=36)
	elif chroma == 'cens':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_cens(y=y, sr=params["sr"],
		                                     C=None,
		                                     hop_length=params["off"],
		                                     norm=None,
		                                     window=win,
		                                     fmin=110,
		                                     n_chroma=12,
		                                     n_octaves=5,
		                                     bins_per_octave=36)
	elif chroma == 'stft':
		win = get_window('blackmanharris', params["fr"])
		doce_bins_tuned_chroma = chroma_stft(y=y, sr=params["sr"], hop_length=params["off"], norm=None, window=win,
		                                     n_chroma=12)
	return doce_bins_tuned_chroma
Ejemplo n.º 4
0
def stft_from_frames(frames, window='hann', dtype=np.complex64):
    """
    Variation of the librosa.core.stft function,
    that computes the short-time-fourier-transfrom from frames instead from the signal.

    See http://librosa.github.io/librosa/_modules/librosa/core/spectrum.html#stft
    """

    win_length = frames.shape[0]
    n_fft = win_length

    fft_window = filters.get_window(window, win_length, fftbins=True)

    # Reshape so that the window can be broadcast
    fft_window = fft_window.reshape((-1, 1))

    # Pre-allocate the STFT matrix
    stft_matrix = np.empty((int(1 + n_fft // 2), frames.shape[1]),
                           dtype=dtype,
                           order='F')

    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = int(util.MAX_MEM_BLOCK /
                    (stft_matrix.shape[0] * stft_matrix.itemsize))

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])

        # RFFT and Conjugate here to match phase from DPWE code
        stft_matrix[:,
                    bl_s:bl_t] = fft.fft(fft_window * frames[:, bl_s:bl_t],
                                         axis=0)[:stft_matrix.shape[0]].conj()

    return stft_matrix
Ejemplo n.º 5
0
 def __init__(
         self,
         n_fft: int = 512,
         n_mels: int = 80,
         sample_rate: int = 16000,
         hop_length: int = 200,
         f_max=8000,  # default
         f_min=0,  # default
         power=2.0,  # default
         win_length=None,
         window='hann',  # default
         center=True,
         pad_mode='reflect',  # default
         norm=None,  # default for pytorch
         htk=True  # default for pytorch
 ):
     self.n_fft = n_fft
     self.sample_rate = sample_rate
     self.pad_mode = pad_mode
     self.hop_length = hop_length
     self.power = power
     self.win_length = n_fft
     self.mel_basis = filters.mel(
         sr=sample_rate,
         n_fft=n_fft,
         n_mels=n_mels,  # mel filter
         fmin=f_min,  # mel filter
         fmax=f_max,  # mel filter
         norm=norm,  # mel filter
         htk=htk)
     self.fft_window = get_window(window, self.win_length,
                                  fftbins=True).reshape((-1, 1))
    def __init__(self, length, stride=None, amplitude=1.):
        self.l = length
        self.stride = int(stride)
        if stride == None:
            self.stride = length // 2

        self.amplitude = amplitude
        self.last_frame = None

        self.w = get_window('hann', self.l, True)
    def get_mfcc(self, sig_frm):
        sig_frm = sig_frm / 32768.0
        window = 'hamming'
        win_length = sig_frm.shape[0]
        hop_length = win_length
        center = True
        n_fft = win_length
        fft_window = get_window(window, win_length, fftbins=True)
        fft_window = util.pad_center(fft_window, n_fft)
        fft_window = fft_window.reshape((-1, 1))
        util.valid_audio(sig_frm)
        sig_frm = sig_frm[:, None]
        stft_matrix = np.empty((int(1 + n_fft // 2), 1),
                               dtype=np.complex64,
                               order='F')
        stft = fft.fft(fft_window * sig_frm,
                       axis=0)[:stft_matrix.shape[0]].conj()
        powspec = np.abs(stft)**2
        melspec = librosa.feature.melspectrogram(S=powspec,
                                                 hop_length=hop_length,
                                                 n_fft=n_fft,
                                                 n_mels=40)
        mfcc = librosa.feature.mfcc(S=librosa.logamplitude(melspec), n_mfcc=13)

        n_fft = 512
        fft_window = get_window(window, win_length, fftbins=True)
        fft_window = util.pad_center(fft_window, n_fft)
        fft_window = fft_window.reshape((-1, 1))
        y = np.pad(sig_frm[:, 0], int(n_fft // 2), mode='reflect')
        pad_frame = librosa.util.frame(y,
                                       frame_length=n_fft,
                                       hop_length=win_length * 2)[:, 0][:,
                                                                        None]
        stft_matrix = np.empty((int(1 + n_fft // 2), 1),
                               dtype=np.complex64,
                               order='F')
        stft = fft.fft(fft_window * pad_frame,
                       axis=0)[:stft_matrix.shape[0]].conj()
        powspec = np.abs(stft)**2
        power_to_db = getattr(librosa, 'power_to_db')
        spec = power_to_db(powspec)
        self.spec_tape_add(spec)
        return mfcc
Ejemplo n.º 8
0
    def smooth(self, feat, win_len_smooth=4):
        '''
        This code is similar to the one used on librosa for smoothing cens: 
        https://librosa.github.io/librosa/generated/librosa.feature.chroma_cens.html
        '''
        win = filters.get_window('hann', win_len_smooth + 2, fftbins=False)
        win /= np.sum(win)
        win = np.atleast_2d(win)

        feat = scipy.signal.convolve2d(feat, win, mode='same', boundary='fill')
        return util.normalize(feat, norm=2, axis=0)
Ejemplo n.º 9
0
 def __init__(self, input_shape, epochs=20, batch_size=32):
     self.epochs = epochs
     self.batch_size = batch_size
     self.input_shape = input_shape
     #self.mod   el = self.Model_build(input_shape=input_shape)
     self.Nx = 512
     self.w = get_window(window='hann', Nx=self.Nx)
     #self.spectrum = Spectrum()  # FFT() would return the complex FFT, here we just want the magnitude spectrum
     #self.mfcc = MFCC()
     self.numPreprocessFrames = 10
     self.frameSize = input_shape[1] / self.numPreprocessFrames
     self.inputData = []
     self.model = 0
Ejemplo n.º 10
0
    def _init_wrapper(self, config):
        self._config = config
        self.sample_rate = config.sample_rate
        self._preemphasis = config.preemphasis
        self.n_fft = config.n_fft
        self.num_freq = self.n_fft / 2 + 1
        self.hop_length = int(config.frame_shift_ms / 1000 * self.sample_rate)
        self.win_length = int(config.frame_length_ms / 1000 * self.sample_rate)
        self.num_mels = config.num_mels

        self._mel_basis = librosa.filters.mel(self.sample_rate,
                                              self.n_fft,
                                              n_mels=self.num_mels)

        self.fft_window = filters.get_window('hann',
                                             self.win_length,
                                             fftbins=True).reshape(
                                                 (1, -1)).astype(np.float32)
Ejemplo n.º 11
0
def frames_stft(y_frames,
                n_fft=2048,
                win_length=None,
                window='hann',
                dtype=np.complex64):
    """
    Adapted from librosa for frame input. NOTE: not centered anymore.
    """
    # By default, use the entire frame
    if win_length is None:
        win_length = n_fft

    fft_window = get_window(window, win_length, fftbins=True)

    # Pad the window out to n_fft size
    fft_window = util.pad_center(fft_window, n_fft)

    # Reshape so that the window can be broadcast
    fft_window = fft_window.reshape((-1, 1))

    # Pre-allocate the STFT matrix
    stft_matrix = np.empty((int(1 + n_fft // 2), y_frames.shape[1]),
                           dtype=dtype,
                           order='F')

    # how many columns can we fit within MAX_MEM_BLOCK?
    n_columns = int(util.MAX_MEM_BLOCK /
                    float(stft_matrix.shape[0] * stft_matrix.itemsize))

    for bl_s in range(0, stft_matrix.shape[1], n_columns):
        bl_t = min(bl_s + n_columns, stft_matrix.shape[1])

        stft_matrix[:,
                    bl_s:bl_t] = fft.fft(fft_window * y_frames[:, bl_s:bl_t],
                                         axis=0)[:stft_matrix.shape[0]]

    return stft_matrix
Ejemplo n.º 12
0
import librosa
import numpy as np
import librosa.util as util
from librosa.filters import get_window

audio_path = "../AudioData/audio/D4_750.wav"
noise_path = "../AudioData/noise/Pink Noise.wav"
# 读取音频文件
y, sr = librosa.load(audio_path)

# 对音频文件进行分帧
win_len = n_fft = 200
hop_length = 80
# Pad the time series so that frames are centered
y = np.pad(y, int(n_fft // 2), mode='reflect')
# Window the time series.
y_frames = util.frame(y, frame_length=n_fft, hop_length=hop_length, axis=0)

# 获得窗系数
fft_window = get_window('hamm', 10, fftbins=False)
# fft_window = fft_window[1:-1]
print(fft_window)
fft_window = get_window('hamm', 10, fftbins=True)
print(fft_window)
# Pad the window out to n_fft size
fft_window = util.pad_center(fft_window, n_fft)
# Reshape so that the window can be broadcast
fft_window = fft_window.reshape((-1, 1))

#
frame_size = config['frame_size']
frame_step = config['frame_step']
n_fft = config['n_fft']
n_mels = config['mfcc_bank_cnt']
fmin = config['fmin']
fmax = config['fmax']
dtype = config.get('dtype', "int")
high_prec = config.get('use_high_prec', False) or dtype == "fix32_scal"
use_power = False
rad4 = round(math.log(n_fft // 2, 4)) == math.log(n_fft // 2, 4)
ndct = config.get('n_dct', False)

from librosa.filters import get_window
from librosa import util
librosa_fft_window = get_window("hann", frame_size, fftbins=True)
# Pad the window out to n_fft size
librosa_fft_window = util.pad_center(librosa_fft_window, n_fft)

stft = librosa.core.spectrum.stft(data,
                                  n_fft,
                                  frame_step,
                                  frame_size,
                                  center=False,
                                  pad_mode="constant")
spect = np.abs(stft)**(1 if not use_power else 2)
mel_basis = librosa.filters.mel(samplerate, n_fft, n_mels, fmin, fmax)
mel_spect = np.dot(mel_basis, spect)
logmel = power_to_db(mel_spect, top_db=None)
mfcc = scipy.fftpack.dct(logmel, axis=0, type=2, norm=None)
with open("ground_truth.h", "w") as f: