def zpFFTsizeExpt(x, fs): """ Inputs: x (numpy array) = input signal (2*M samples long) fs (float) = sampling frequency in Hz Output: The function should return a tuple (mX1_80, mX2_80, mX3_80) mX1_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-1 mX2_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-2 mX3_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-3 The first few lines of the code to generate xseg and the windows have been written for you, please use it and do not modify it. """ M = len(x)/2 xseg = x[:M] w1 = get_window('hamming',M) w2 = get_window('hamming',2*M) ## Your code here # NOTE: The shape of the window is affected by the size parameter M. So if you want a 256 size window #you need to specifically create it and cant use w[:256] # Case-1: Input signal xseg (256 samples), window w1 (256 samples), and FFT size of 256 N = 256 mX1_80, pX1 = dftAnal(xseg, w1, N) #Input signal x (512 samples), window w2 (512 samples), and FFT size of 512 N = 512 mX2_80, pX2 = dftAnal(x[:N], w2[:N], N) #Input signal xseg (256 samples), window w1 (256 samples), and FFT size of 512 (Implicitly does a zero-padding of xseg by 256 samples) N = 256 mX3_80, pX3 = dftAnal(x[:N], w1, 512) return(mX1_80[:80],mX2_80[:80],mX3_80[:80])
def zpFFTsizeExpt(x, fs): """ Inputs: x (numpy array) = input signal (2*M samples long) fs (float) = sampling frequency in Hz Output: The function should return a tuple (mX1_80, mX2_80, mX3_80) mX1_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-1 mX2_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-2 mX3_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-3 The first few lines of the code to generate xseg and the windows have been written for you, please use it and do not modify it. """ M = len(x)/2 xseg = x[:M] w1 = get_window('hamming',M) w2 = get_window('hamming',2*M) ## Your code here (xM1, pX1) = dftAnal(xseg, w1, M) (xM2, pX2) = dftAnal(x, w2, len(x)) (xM3, pX3) = dftAnal(xseg, w1, len(x)) plt.plot(np.arange(0,80,2), xM1[:40], color="red") plt.plot(xM2[:80], color="blue") plt.plot(xM3[:80], color="green") plt.show() return (xM1[:80], xM2[:80], xM3[:80])
def zpFFTsizeExpt(x, fs): """ Inputs: x (numpy array) = input signal (2*M samples long) fs (float) = sampling frequency in Hz Output: The function should return a tuple (mX1_80, mX2_80, mX3_80) mX1_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-1 mX2_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-2 mX3_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-3 The first few lines of the code to generate xseg and the windows have been written for you, please use it and do not modify it. """ ## Your code here M = len(x)/2 xseg = x[:M] w1 = get_window('hamming',M) w2 = get_window('hamming',2*M) mX1 = dftAnal(xseg, w1, M)[0] mX2 = dftAnal(x, w2, 2*M)[0] mX3 = dftAnal(xseg, w1, 2*M)[0] mX1_80 = mX1[:80] mX2_80 = mX2[:80] mX3_80 = mX3[:80] plt.plot(mX1_80, label="mX1(half/half)", color="red") plt.plot(mX2_80, label="mX1(full/full)", color="blue") plt.plot(mX3_80, label="mX1(half/full)", color="green") plt.show() return mX1_80, mX2_80, mX3_80
def zpFFTsizeExpt(x, fs): """ Inputs: x (numpy array) = input signal (2*M = 512 samples long) fs (float) = sampling frequency in Hz Output: The function should return a tuple (mX1_80, mX2_80, mX3_80) mX1_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-1 mX2_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-2 mX3_80 (numpy array): The first 80 samples of the magnitude spectrum output of dftAnal for Case-3 The first few lines of the code to generate xseg and the windows have been written for you, please use it and do not modify it. """ M = len(x)/2 xseg = x[:M] w1 = get_window('hamming',M) w2 = get_window('hamming',2*M) mX1, P = dftAnal(xseg, w1, 256) mX2, P = dftAnal(x, w2, 512) mX3, P = dftAnal(xseg, w1, 512) return (mX1[:80], mX2[:80], mX3[:80])
def stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef): """ Morph of two sounds using the STFT x1, x2: input sounds, fs: sampling rate w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 returns y: output sound """ if (N2/2*smoothf < 3): # raise exception if decimation factor too small raise ValueError("Smooth factor too small") if (smoothf > 1): # raise exception if decimation factor too big raise ValueError("Smooth factor above 1") if (balancef > 1 or balancef < 0): # raise exception if balancef outside 0-1 raise ValueError("Balance factor outside range") if (H1 <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H1) smaller or equal to 0") M1 = w1.size # size of analysis window hM1_1 = int(math.floor((M1+1)/2)) # half analysis window size by rounding hM1_2 = int(math.floor(M1/2)) # half analysis window size by floor L = int(x1.size/H1) # number of frames for x1 x1 = np.append(np.zeros(hM1_2),x1) # add zeros at beginning to center first window at sample 0 x1 = np.append(x1,np.zeros(hM1_1)) # add zeros at the end to analyze last sample pin1 = hM1_1 # initialize sound pointer in middle of analysis window w1 = w1 / sum(w1) # normalize analysis window M2 = w2.size # size of analysis window hM2_1 = int(math.floor((M2+1)/2)) # half analysis window size by rounding hM2_2 = int(math.floor(M2/2)) # half analysis window size by floor2 H2 = int(x2.size/L) # hop size for second sound x2 = np.append(np.zeros(hM2_2),x2) # add zeros at beginning to center first window at sample 0 x2 = np.append(x2,np.zeros(hM2_1)) # add zeros at the end to analyze last sample pin2 = hM2_1 # initialize sound pointer in middle of analysis window y = np.zeros(x1.size) # initialize output array for l in range(L): #-----analysis----- mX1, pX1 = DFT.dftAnal(x1[pin1-hM1_1:pin1+hM1_2], w1, N1) # compute dft mX2, pX2 = DFT.dftAnal(x2[pin2-hM2_1:pin2+hM2_2], w2, N2) # compute dft #-----transformation----- mX2smooth = resample(np.maximum(-200, mX2), int(mX2.size*smoothf)) # smooth spectrum of second sound mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum mY = balancef * mX2 + (1-balancef) * mX1 # generate output spectrum #-----synthesis----- y[pin1-hM1_1:pin1+hM1_2] += H1*DFT.dftSynth(mY, pX1, M1) # overlap-add to generate output sound pin1 += H1 # advance sound pointer pin2 += H2 # advance sound pointer y = np.delete(y, range(hM1_2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1_1, y.size)) # add zeros at the end to analyze last sample return y
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) ## Your code here mX, pX = dftAnal(x, w, N) mXfilt = np.copy(mX) bin70hz = int(np.ceil(N * 70.0 / fs)) mXfilt[:bin70hz+1] = -120 y = dftSynth(mX, pX, w.size) * outputScaleFactor yfilt = dftSynth(mXfilt, pX, w.size) * outputScaleFactor return (y, yfilt)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window("hamming", M) outputScaleFactor = sum(w) # get Magnitude and Phase Spectrum mX, pX = dftAnal(x, w, N) # generate output signal without filtering y = dftSynth(mX, pX, M) * outputScaleFactor # get bin number that is nearest to 70 Hz bin_number = int(70.0 / (fs / float(N))) + 1 # do the 'filtering' for i in range(bin_number + 1): mX[i] = -120 # generate the time signal after filtering yfilt = dftSynth(mX, pX, M) * outputScaleFactor return (y, yfilt)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ ## Your code here M = len(x) w = get_window("hamming", M) outputScaleFactor = sum(w) # compute the dft of the sound fragment mX, pX = dftAnal(x, w, N) # compute no-filter y = dftSynth(mX, pX, w.size) * sum(w) # filter magnitude under 70Hz import math bin70 = math.ceil(70.0 * N / fs) mX[: bin70 + 1] = -120 yfilt = dftSynth(mX, pX, w.size) * sum(w) return y, yfilt
def stftAnal(x, w, N, H) : """ Analysis of a sound using the short-time Fourier transform x: input array sound, w: analysis window, N: FFT size, H: hop size returns xmX, xpX: magnitude and phase spectra """ if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(M/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size-hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window while pin<=pend: # while sound pointer is smaller than last sample x1 = x[pin-hM1:pin+hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft if pin == hM1: # if first frame create output arrays xmX = np.array([mX]) xpX = np.array([pX]) else: # append output to existing array xmX = np.vstack((xmX,np.array([mX]))) xpX = np.vstack((xpX,np.array([pX]))) pin += H # advance sound pointer return xmX, xpX
def stftFiltering(x, fs, w, N, H, filter): # apply a filter to a sound by using the STFT # x: input sound, w: analysis window, N: FFT size, H: hop size # filter: magnitude response of filter with frequency-magnitude pairs (in dB) # returns y: output sound M = w.size # size of analysis window hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(M/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size-hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin<=pend: # while sound pointer is smaller than last sample #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft #------transformation----- mY = mX + filter # filter input magnitude spectrum #-----synthesis----- y1 = DFT.dftSynth(mY, pX, M) # compute idft y[pin-hM1:pin+hM2] += H*y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) x_half = len(x) // 2 f_error = np.inf k = 1 while f_error > 0.05: # Hz M = 100 * k + 1 M2 = M // 2 W = get_window(window, M) N = int(2 ** np.ceil(np.log2(M))) mX, pX = DFT.dftAnal(x[x_half - M2: x_half - M2 + M], W, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N f_error = np.abs(f - fEst) k += 1 return(fEst, M, N)
def stftAnal(x, w, N, H) : """ Analysis of a sound using the short-time Fourier transform x: input array sound, w: analysis window, N: FFT size, H: hop size returns xmX, xpX: magnitude and phase spectra """ if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = (M+1)//2 # half analysis window size by rounding hM2 = M//2 # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size-hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window xmX = [] # Initialise empty list for mX xpX = [] # Initialise empty list for pX while pin<=pend: # while sound pointer is smaller than last sample x1 = x[pin-hM1:pin+hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft xmX.append(np.array(mX)) # Append output to list xpX.append(np.array(pX)) pin += H # advance sound pointer xmX = np.array(xmX) # Convert to numpy array xpX = np.array(xpX) return xmX, xpX
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) ## Your code here (mX, pX) = dftAnal(x, w, N) mXfilt = mX.copy() bin_size = fs / N seventy_bin_number = int ( math.ceil(float(70)/bin_size) ) for i in range(seventy_bin_number): mXfilt[i] = -120 yfilt = dftSynth(mXfilt, pX, w.size) * sum(w) y = dftSynth(mX, pX, w.size) * sum(w) return (y, yfilt)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) mx,px = dftAnal(x,w,N) mx2 = mx.copy() mx2[:np.floor(70*N/fs)+1] = -120 yone = dftSynth(mx,px,M)*sum(w) ytwo = dftSynth(mx2,px,M)*sum(w) return yone,ytwo
def stft(x, w, N, H): """ Analysis/synthesis of a sound using the short-time Fourier transform x: input sound, w: analysis window, N: FFT size, H: hop size returns y: output sound """ if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = int(math.floor((M+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(M/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size-hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin<=pend: # while sound pointer is smaller than last sample #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft #-----synthesis----- y1 = DFT.dftSynth(mX, pX, M) # compute idft y[pin-hM1:pin+hM2] += H*y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # delete half of the last window which as added in stftAnal return y
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) #smotthing window outputScaleFactor = sum(w) ## Your code here mX, pX = dftAnal(x,w,N) axis = (fs) * np.arange(M)/float(M) belowAxis = sum(axis < 70) mXX = mX.copy() mXX[:belowAxis+1] = -120 #Synthesis y = dftSynth(mX, pX, w.size)*sum(w) yfilt = dftSynth(mXX, pX, w.size)*sum(w) return (y,yfilt)
def f0Twm(x, fs, w, N, H, t, minf0, maxf0, f0et): # fundamental frequency detection using twm algorithm # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # returns f0: fundamental frequency hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] f0t = 0 f0stable = 0 while pin<pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) pin += H # advance sound pointer return f0
def check_k(fr1, fr2, fs, k, window): fr = fr1 for fr in fr1 + np.arange(fr2-fr1): t = np.arange(441000.0) x = np.sin(2.0*np.pi * fr * t / fs) M = 100*k+1 i=0 while (2**i) < M: i+=1 N = 2**i h = M/2 l_h = len(x)/2 - h + 1 h_h = l_h + M x_cnk = x[l_h:h_h] w = get_window(window, M) (mX, pX) = DFT.dftAnal(x_cnk, w, N) p_loc = UF.peakDetection(mX, -40) p_int = UF.peakInterp(mX, pX, p_loc) peak = p_int[0]*(fs/float(N)) p = peak[0] if abs(p-fr) > 0.05: print "fr: ", fr, " error: ", abs(p-fr) return 0 print "fr: ", fr, " checked" fr+=1 return 3
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, returns y: output sound, ys: sinusoidal component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2-Ys; # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = ys+xr # sum of sinusoidal and residual components return y, ys, xr
def peak(m, n): hfs = fs * 0.5 x1 = x[hfs-m/2:hfs+(m+1)/2] w = get_window(window, m) mX, pX = DFT.dftAnal(x1, w, n) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fest = fs * iploc[0] / n return fest, ploc, mX, pX
def run_one_estimate(x, fs, M, window=DEFAULT_WINDOW, t=DEFAULT_THRESHOLD): center_sample = int(len(x) / 2) start_sample = center_sample - int(M / 2) end_sample = start_sample + M N = min_power_2(M) x1 = x[start_sample:end_sample] w = get_window(window, M) mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N return (mX, pX, ploc, iploc, ipmag, ipphase, fEst, N)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window("hamming", M) outputScaleFactor = sum(w) ## Your code here # get a fragment of the input sound of size M sample = 0 if sample + M > x.size or sample < 0: # raise error if time outside of sound raise ValueError("Time outside sound boundaries") x1 = x[sample : sample + M] # compute the dft of the sound fragment mX, pX = dftAnal(x1, w, N) # compute the inverse dft of the spectrum y = dftSynth(mX, pX, w.size) * outputScaleFactor # Now, suppress any buckets representing 70Hz or less (including # the bucket just above 70Hz) # # How do you find the bucket for 70Hz? # # There are N buckets that span the frequency range from 0 to fs. # # (actually, mX only has N/2 buckets for a fs/2 frequency range # because it's symmetric and mX only has the positive part. For # N=1024, fs=10000, mX has 513 buckets.) # # So each bucket covers a range of fs / N frequency. So the # bucket that contains 70Hz is 70 * N / fs. Since we want to zero # out everything starting with the bucket just above this, we do # np.ceil. cutoff = np.ceil(N * 70 / fs) mX[: cutoff + 1] = -120 # compute the inverse dft of the filtered spectrum yfilt = dftSynth(mX, pX, w.size) * outputScaleFactor return (y, yfilt)
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin <= pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection( ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 #read the file fs, s = UF.wavread(inputFile) fEst = 0 error = abs(f - fEst) k = 1 #begin iteration while error > 0.05: #set window_size for this iteration M = 100 * k + 1 #compute FFT size as next power of two exponent = int(np.log2(M)) + 1 FFT_size = 2**exponent df = float(fs) / FFT_size #slice the input signal s_sliced = s[0.5 * fs - M/2 : 0.5 * fs + M/2 + 1] #generate window w = get_window("blackman", M) #compute DFT mX, pX = DFT.dftAnal(s_sliced, w, FFT_size) #detect the peaks peak_locations = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, peak_locations) fEst = iploc[0] * df error = abs(fEst - f) k += 1 return (fEst, M, FFT_size)
def stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef): # morph of two sounds using the STFT # x1, x2: input sounds, fs: sampling rate # w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size # smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, # balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 # returns y: output sound M1 = w1.size # size of analysis window hM1_1 = int(math.floor((M1+1)/2)) # half analysis window size by rounding hM1_2 = int(math.floor(M1/2)) # half analysis window size by floor L = int(x1.size/H1) # number of frames for x1 x1 = np.append(np.zeros(hM1_2),x1) # add zeros at beginning to center first window at sample 0 x1 = np.append(x1,np.zeros(hM1_1)) # add zeros at the end to analyze last sample pin1 = hM1_1 # initialize sound pointer in middle of analysis window w1 = w1 / sum(w1) # normalize analysis window M2 = w2.size # size of analysis window hM2_1 = int(math.floor((M2+1)/2)) # half analysis window size by rounding hM2_2 = int(math.floor(M2/2)) # half analysis window size by floor2 H2 = int(x2.size/L) # hop size for second sound x2 = np.append(np.zeros(hM2_2),x2) # add zeros at beginning to center first window at sample 0 x2 = np.append(x2,np.zeros(hM2_1)) # add zeros at the end to analyze last sample pin2 = hM2_1 # initialize sound pointer in middle of analysis window y = np.zeros(x1.size) # initialize output array for l in range(L): #-----analysis----- mX1, pX1 = DFT.dftAnal(x1[pin1-hM1_1:pin1+hM1_2], w1, N1) # compute dft mX2, pX2 = DFT.dftAnal(x2[pin2-hM2_1:pin2+hM2_2], w2, N2) # compute dft #-----transformation----- mX2smooth = resample(np.maximum(-200, mX2), mX2.size*smoothf) # smooth spectrum of second sound mX2 = resample(mX2smooth, N2/2) mY = balancef * mX2 + (1-balancef) * mX1 # generate output spectrum #-----synthesis----- y[pin1-hM1_1:pin1+hM1_2] += H1*DFT.dftSynth(mY, pX1, M1) # overlap-add to generate output sound pin1 += H1 # advance sound pointer pin2 += H2 # advance sound pointer y = np.delete(y, range(hM1_2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1_1, y.size)) # add zeros at the end to analyze last sample return y
def sineModelMultiRes(x, fs, wList, NList, t, BList): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ #-----synthesis params init----- Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window for i in range(3): #-----analysis params init----- w = wList[i] N = NList[i] Bmin = BList[i][0] Bmax = BList[i][1] hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ipmag = ipmag[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipphase = ipphase[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipfreq = ipfreq[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): """ Analysis of a sound using the sinusoidal model with sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ if (minSineDur <0): # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin<pend: # while input sound pointer is within sound x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array jtfreq[:tfreq.size]=tfreq # save track frequencies to temporary array jtmag[:tmag.size]=tmag # save track magnitudes to temporary array jtphase[:tphase.size]=tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: # rest of frames append values to sine tracks xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H # delete sine tracks shorter than minSineDur xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H)) return xtfreq, xtmag, xtphase
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here k = 1 M = 100*k + 1 N = nextPow2(M) fs, x = UF.wavread(inputFile) w = get_window(window, M) x1 = x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N while len(fEst) < 1 or abs(f - fEst[0]) >= 0.05: k += 1 M = 100*k + 1 N = nextPow2(M) w = get_window(window, M) fs, x = UF.wavread(inputFile) x1 = x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N return (fEst[0], M, N)
def test(): window = 'blackman' t = -40 fs = 44100 a = [101, 200, 440] k = 1 matched = False while True: M = (100*k) + 1 N = int(pow(2, np.ceil(np.log2(M)))) w = get_window(window, M) for f in np.arange(100,8000): #for i in range(len(a)): #f = a[i] x = generateSine(f) hx = len(x) / 2 x1 = x[(.5*fs)-(M/2):(.5*fs)+((M/2)+1)] #x1 = x[hx-(M/2):hx+(M/2)+1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) pmag = mX[ploc] (iploc, ipmag, ipphase) = UF.peakInterp(mX, pX, ploc) fEst = (fs * float(np.sum(iploc))) / float(N) esterror = np.abs(fEst - f) print esterror if (esterror > 0.05): matched = False break else: matched = True if matched: break else: k += 1 print fEst print k print M print N
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) (mX, pX) = dftAnal(x, w, N) filtMX = mX.copy() filtMX[:np.ceil(70 * N / fs + 1)] = -120 return (dftSynth(mX, pX, w.size) * outputScaleFactor, dftSynth(filtMX, pX, w.size) * outputScaleFactor)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) # length of x w = get_window('hamming', M) # window outputScaleFactor = sum(w) # outputScaleFactor - ? mX, pX = dftAnal(x,w,N) # dftAnal gets magintude (in dB) and phase y = dftSynth(mX,pX, w.size) * outputScaleFactor # Resynthesize binsToFilter = np.ceil(70.0 * N / fs) # 70Hz * period(second/H) * FFT Size (part of FFT to use) mX[ : binsToFilter + 1 ] = -120 # all parts of FFT become -120dB ( + 1 is because [:something] works with the size and not index ) yfilt = dftSynth(mX,pX, w.size) * outputScaleFactor # resynth with new bin Values return (y,yfilt)
def sineModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns//4 # Hop size used for analysis and synthesis hNs = Ns//2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) F_min = 100.0 F_max = 2000.0 k = 1 while True: M = 100 * k + 1 N = int(2 ** (math.floor(np.log2(M)) + 1)) #print("M {}, N {}".format(M, N)) w = get_window(window, M) x1 = x[0.5 * fs - (M + 1) / 2:0.5 * fs + (M + 1) / 2 - 1] # M must be odd mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / float(N) fEst = ipfreq[0] fEstError = abs(fEst - f) print("fEstError {0:.3f}".format(fEstError)) if fEstError < 0.05: break k += 1 print("fEst {}, M {}, N {}, frequency estimation error {:.3f}".format(fEst, M, N, fEstError)) return fEst, M, N
def stftFiltering(x, fs, w, N, H, filter): """ Apply a filter to a sound by using the STFT x: input sound, w: analysis window, N: FFT size, H: hop size filter: magnitude response of filter with frequency-magnitude pairs (in dB) returns y: output sound """ M = w.size # size of analysis window hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin <= pend: # while sound pointer is smaller than last sample #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft #------transformation----- mY = mX + filter # filter input magnitude spectrum #-----synthesis----- y1 = DFT.dftSynth(mY, pX, M) # compute idft y[pin - hM1:pin + hM2] += H * y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete( y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size - hM1, y.size)) # add zeros at the end to analyze last sample return y
def stft(x, w, N, H): """ Analysis/synthesis of a sound using the short-time Fourier transform x: input sound, w: analysis window, N: FFT size, H: hop size returns y: output sound """ if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = (M + 1) // 2 # half analysis window size by rounding hM2 = M // 2 # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin <= pend: # while sound pointer is smaller than last sample #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select one frame of input sound mX, pX = DFT.dftAnal(x1, w, N) # compute dft #-----synthesis----- y1 = DFT.dftSynth(mX, pX, M) # compute idft y[pin - hM1:pin + hM2] += H * y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete( y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range( y.size - hM1, y.size)) # delete half of the last window which as added in stftAnal return y
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ attenuated_freq = 70.0 attenuation_value = -120 # DB M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) mX1, pX1 = dftAnal(x, w, N) bin_index = asu.calcBinValue(fs, N, attenuated_freq) mX2 = mX1.copy() mX2[:(bin_index + 1)] = attenuation_value """ compute the inverse dft of the spectrum y = DFT.dftSynth(mX, pX, w.size)*sum(w) """ y = dftSynth(mX1, pX1, w.size) * outputScaleFactor yfilt = dftSynth(mX2, pX1, w.size) * outputScaleFactor return (y, yfilt)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) ## Your code here mX, pX = dftAnal(x, w, N) y = dftSynth(mX, pX, w.size) * outputScaleFactor binw70 = int(np.ceil((70 * N) / fs)) #applying the filter mF = mX.copy() mF[:binw70 + 1] = -120.0 yfilt = dftSynth(mF, pX, w.size) * outputScaleFactor return y, yfilt
def time2Freq(x, fs, w, N, pinFirst, hopSizeMelodia, t): ''' makes fourier transform, peak thresholding and interpolation for one window return interpolated iploc, ipmag, ipphase ''' ################### ## prepare params hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample # pin = hM1 # init sound pointer in middle of anal window pin = pinFirst + 300 * hopSizeMelodia pend = x.size - hM1 # last sample to start a frame ######################## # process one window print "at time {}".format(pin / fs) x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values # optional visualize(N, mX, pin, fs, ploc, iploc, ipmag, ipphase) return mX, iploc, ipmag, ipphase
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 (fs, x) = UF.wavread(inputFile) # read in the inputFile Ns = 2**np.arange(24) # List of possible FFT sizes error = 0.05 # allowable frequency error in Hz for k in xrange(1, 100): M = 100 * k + 1 w = get_window(window, M) # get the window hM1 = int(math.floor( (M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor fftbuffer = x[x.size / 2 - hM2:x.size / 2 + hM1] # dftBuffer N = Ns[np.where( Ns > M)[0][0]] # Get the smallest N value larger than M (mX, pX) = DFT.dftAnal(fftbuffer, w, N) # Calculate the dft ploc = UF.peakDetection(mX, t) # Get peak locations (iploc, ipmag, ipphase) = UF.peakInterp( mX, pX, ploc) # parabolic interpolation to find peak values fEst = fs * iploc[0] / N if abs(fEst - f) <= error: break return (fEst, M, N)
def suppressFreqDFTmodel(x, fs, N): """ Inputs: x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y (numpy array) = Output of the dftSynth() without filtering (M samples long) yfilt (numpy array) = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) ## Your code here # get magnitude and phase mX, pX = dftAnal(x, w, N) # systhesis of the original unaltered original signal y = dftSynth(mX, pX, w.size) * outputScaleFactor # calculate the nearset bin sup_freq = 70 num_bin = mX.size - 1 freq_each_bin = (0.5 * fs) / num_bin sup_freq_index = int(np.ceil(sup_freq / freq_each_bin)) # set -120dB lower than 70 hz mX[:sup_freq_index + 1] = -120 # systhesis of the filtered signal yfilt = dftSynth(mX, pX, w.size) * outputScaleFactor return y, yfilt
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ t = -40 window = 'blackman' ### Your code here fs, x = UF.wavread(inputFile) center = 0.5 * x.size window = 'blackman' t = -40 estimationError = 1000 iterM = 1 while estimationError > 0.05: M = iterM * 100 + 1 fragment = x[int(center - M / 2):int(center + M / 2 + 1)] w = get_window(window, M, False) N = int(np.power(2, np.ceil(np.log2(M)))) #nearest N mX, pX = DFT.dftAnal(fragment, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) locBinToHz = iploc[0] * fs / N estimationError = np.abs(f - locBinToHz) iterM = iterM + 1 return locBinToHz, int(M), int(N)
start = int(.81 * fs) x1 = x[start:start + N] plt.figure(1, figsize=(9.5, 6)) plt.subplot(321) plt.plot(np.arange(start, (start + N), 1.0) / fs, x1 * np.hamming(N), 'b', lw=1.5) plt.axis([ start / fs, (start + N) / fs, min(x1 * np.hamming(N)), max(x1 * np.hamming(N)) ]) plt.title('x1, M = 128') mX, pX = DF.dftAnal(x1, np.hamming(N), N) plt.subplot(323) plt.plot((fs / 2.0) * np.arange(mX.size) / float(mX.size), mX, 'r', lw=1.5) plt.axis([0, fs / 2.0, -90, max(mX)]) plt.title('mX1') plt.subplot(325) plt.plot((fs / 2.0) * np.arange(mX.size) / float(mX.size), pX, 'c', lw=1.5) plt.axis([0, fs / 2.0, min(pX), max(pX)]) plt.title('pX1') N = 1024 start = int(.81 * fs) x2 = x[start:start + N] mX, pX = DF.dftAnal(x2, np.hamming(N), N)
import numpy as np import matplotlib.pyplot as plt from scipy.signal import hamming, triang, blackmanharris import sys, os, functools, time sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import dftModel as DFT import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/sine-440-490.wav') w = np.hamming(3529) N = 32768 hN = N/2 t = -20 pin = 4850 x1 = x[pin:pin+w.size] mX1, pX1 = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX1, t) pmag = mX1[ploc] iploc, ipmag, ipphase = UF.peakInterp(mX1, pX1, ploc) plt.figure(1, figsize=(9, 6)) plt.subplot(311) plt.plot(fs*np.arange(pX1.size)/float(N), pX1, 'c', lw=1.5) plt.plot(fs * iploc / N, ipphase, marker='x', color='b', alpha=1, linestyle='', markeredgewidth=1.5) plt.axis([200, 1000, 50, 200]) plt.title('pX + peaks (sine-440-490.wav)') (fs, x) = UF.wavread('../../../sounds/vibraphone-C6.wav') w = np.blackman(401) N = 1024
(fs, x2) = UF.wavread('../../../sounds/impulse-response.wav') x1 = x[40000:44096] N = 4096 plt.figure(1, figsize=(9.5, 7)) plt.subplot(3, 2, 1) plt.title('x1 (ocean.wav)') plt.plot(x1, 'b') plt.axis([0, N, min(x1), max(x1)]) plt.subplot(3, 2, 2) plt.title('x2 (impulse-response.wav)') plt.plot(x2, 'b') plt.axis([0, N, min(x2), max(x2)]) mX1, pX1 = DF.dftAnal(x1, np.ones(N), N) mX1 = mX1 - max(mX1) plt.subplot(3, 2, 3) plt.title('mX1') plt.plot(mX1, 'r') plt.axis([0, N / 2, -70, 0]) mX2, pX2 = DF.dftAnal(x2, np.ones(N), N) mX2 = mX2 - max(mX2) plt.subplot(3, 2, 4) plt.title('mX2') plt.plot(mX2, 'r') plt.axis([0, N / 2, -70, 0]) y = np.convolve(x1, x2) mY, pY = DF.dftAnal(y[0:N], np.ones(N), N)
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft( fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2 - Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H # advance sound pointer y = yh + xr # sum of harmonic and residual components return y, yh, xr
def sineModelMultiRes(x, fs, w1, w2, w3, N1, N2, N3, t, B1, B2, B3): """ Analysis/synthesis of a sound using the multi resolution sinusoidal model, without sine tracking x: input array sound, w1, w2 & w3: analysis window, N1, N2, & N3: size of complex spectrum, t: threshold in negative dB B1, B2, & B3: different bandwith for given windows returns y: output array sound """ import dftModel as DFT import utilFunctions as UF # sms-tool https://github.com/MTG/sms-tools import numpy as np w = [w1, w2, w3] # build the arrays for loop N = [N1, N2, N3] plocinic = [0, np.floor(B1 * N2 / fs), np.floor(B2 * N3 / fs)] #ploc inicial for all B plocfin = [ np.ceil(B1 * N1 / fs), np.ceil(B2 * N2 / fs), np.ceil(B3 * N3 / fs) ] #ploc final for all B signal = np.zeros(len(x)) # build the output signal for i in range(3): hM1 = int(math.floor( (w[i].size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w[i].size / 2)) # half analysis window size by floor Ns = N[i] # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N[i]) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w[i] = w[i] / sum(w[i]) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular windows bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w[i], N[i]) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks ploc = ploc[(ploc >= plocinic[i]) & (ploc <= plocfin[i])] # filter ploc's out of range B iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N[i]) # -----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer signal = signal + y # sum of signals at different bandwith return signal
def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, returns y: output sound, ys: sinusoidal component, xr: residual component """ hN = N // 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc ) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / float(N) # convert peak locations to Hertz ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2 - Ys # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ysw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] ys[ri:ri + Ns] += sw * ysw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H # advance sound pointer y = ys + xr # sum of sinusoidal and residual components return y, ys, xr
import time, os, sys import math sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import dftModel as DFT import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/sine-440.wav') M = 400 x1 = x[2000:2000 + M] N = 2048 hM = int(M / 2.0) w = np.hamming(M) mX, pX = DFT.dftAnal(x1, w, N) freqaxis = fs * np.arange(0, N / 2) / float(N) taxis = np.arange(N) / float(fs) plt.figure(1, figsize=(9.5, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(M) / float(fs), x1, 'b', lw=1.5) plt.axis([0, (M - 1) / float(fs), min(x1) - .1, max(x1) + .1]) plt.title('x (sine-440.wav)') plt.subplot(3, 1, 2) plt.plot(freqaxis, mX, 'r', lw=1.5) plt.axis([0, fs / 10, -80, max(mX) + 1]) plt.title('mX')
def spsModel(x, fs, w, N, t, stocf): """ Analysis/synthesis of a sound using the sinusoidal plus stochastic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, ys: sinusoidal component, yst: stochastic component """ hN = N // 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns // 4 # Hop size used for analysis and synthesis hNs = Ns // 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] sws = H * hanning(Ns) / 2 # synthesis window for stochastic while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc ) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / float(N) # convert peak locations to Hertz ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2 - Ys # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of harmonic spectrum ysw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ysw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] ys[ri:ri + Ns] += sw * ysw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = ys + yst # sum of sinusoidal and residual components return y, ys, yst
x (numpy array) = input signal of length M (odd) fs (float) = sampling frequency (Hz) N (positive integer) = FFT size Outputs: The function should return a tuple (y, yfilt) y = Output of the dftSynth() without filtering (M samples long) yfilt = Output of the dftSynth() with filtering (M samples long) The first few lines of the code have been written for you, do not modify it. """ M = len(x) w = get_window('hamming', M) outputScaleFactor = sum(w) ## Your code here # compute the dft mX, pX = dftAnal(x, w, N) # compute the inverse dft y = dftSynth(mX, pX, w.size)*sum(w) # DFT Filtering # mX = np.absolute(20 * log10(mX)) cutoff_index = np.floor(70*N/fs) mX[:cutoff_index] = -120 yfilt = dftSynth(mX, pX, w.size)*sum(w) return (y, yfilt) ''' Example import numpy as np import matplotlib.pyplot as plt from A3utils import genSine from A3Part4 import suppressFreqDFTmodel
def sineModelAnal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): """ Analysis of a sound using the sinusoidal models_makam with sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ if (minSineDur < 0): # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin < pend: # while input sound pointer is within sound x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min( maxnSines, tfreq.size)) # limit number of tracks to maxnSines tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines tphase = np.resize(tphase, min( maxnSines, tphase.size)) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array jtfreq[:tfreq. size] = tfreq # save track frequencies to temporary array jtmag[:tmag.size] = tmag # save track magnitudes to temporary array jtphase[:tphase. size] = tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: # rest of frames append values to sine tracks xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H # delete sine tracks shorter than minSineDur xtfreq = cleaningSineTracks(xtfreq, round(fs * minSineDur / H)) return xtfreq, xtmag, xtphase
def stftMorph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef): """ Morph of two sounds using the STFT x1, x2: input sounds, fs: sampling rate w1, w2: analysis windows, N1, N2: FFT sizes, H1: hop size smoothf: smooth factor of sound 2, bigger than 0 to max of 1, where 1 is no smothing, balancef: balance between the 2 sounds, from 0 to 1, where 0 is sound 1 and 1 is sound 2 returns y: output sound """ if (N2 / 2 * smoothf < 3): # raise exception if decimation factor too small raise ValueError("Smooth factor too small") if (smoothf > 1): # raise exception if decimation factor too big raise ValueError("Smooth factor above 1") if (balancef > 1 or balancef < 0): # raise exception if balancef outside 0-1 raise ValueError("Balance factor outside range") if (H1 <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H1) smaller or equal to 0") M1 = w1.size # size of analysis window hM1_1 = int(math.floor( (M1 + 1) / 2)) # half analysis window size by rounding hM1_2 = int(math.floor(M1 / 2)) # half analysis window size by floor L = int(x1.size / H1) # number of frames for x1 x1 = np.append( np.zeros(hM1_2), x1) # add zeros at beginning to center first window at sample 0 x1 = np.append( x1, np.zeros(hM1_1)) # add zeros at the end to analyze last sample pin1 = hM1_1 # initialize sound pointer in middle of analysis window w1 = w1 / sum(w1) # normalize analysis window M2 = w2.size # size of analysis window hM2_1 = int(math.floor( (M2 + 1) / 2)) # half analysis window size by rounding hM2_2 = int(math.floor(M2 / 2)) # half analysis window size by floor2 H2 = int(x2.size / L) # hop size for second sound x2 = np.append( np.zeros(hM2_2), x2) # add zeros at beginning to center first window at sample 0 x2 = np.append( x2, np.zeros(hM2_1)) # add zeros at the end to analyze last sample pin2 = hM2_1 # initialize sound pointer in middle of analysis window y = np.zeros(x1.size) # initialize output array for l in range(L): #-----analysis----- mX1, pX1 = DFT.dftAnal(x1[pin1 - hM1_1:pin1 + hM1_2], w1, N1) # compute dft mX2, pX2 = DFT.dftAnal(x2[pin2 - hM2_1:pin2 + hM2_2], w2, N2) # compute dft #-----transformation----- mX2smooth = resample(np.maximum(-200, mX2), int( mX2.size * smoothf)) # smooth spectrum of second sound mX2 = resample(mX2smooth, mX1.size) # generate back the same size spectrum mY = balancef * mX2 + (1 - balancef) * mX1 # generate output spectrum #-----synthesis----- y[pin1 - hM1_1:pin1 + hM1_2] += H1 * DFT.dftSynth( mY, pX1, M1) # overlap-add to generate output sound pin1 += H1 # advance sound pointer pin2 += H2 # advance sound pointer y = np.delete(y, range( hM1_2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size - hM1_1, y.size)) # add zeros at the end to analyze last sample return y
def sineModel_MultiRes(x, fs, w1 , w2, w3, N1, N2, N3, t, B1, B2, B3): """ MultiResolution Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, [w1,w2,w3]: 3 analysis windows, [N1,N2,N3]: 3 sizes of complex spectrum, t: threshold in negative dB, [B1,B2,B3]: 3 frequency bands returns y: output array sound """ h1M1 = int(math.floor((w1.size+1)/2)) # half analysis window 1 size by rounding h1M2 = int(math.floor(w1.size/2)) # half analysis window 1 size by floor h2M1 = int(math.floor((w2.size+1)/2)) # half analysis window 2 size by rounding h2M2 = int(math.floor(w2.size/2)) # half analysis window 2 size by floor h3M1 = int(math.floor((w3.size+1)/2)) # half analysis window 3 size by rounding h3M2 = int(math.floor(w3.size/2)) # half analysis window 3 size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, h1M1, h2M1, h3M1) # init sound pointer in middle of biggest anal window pend = x.size - pin # last sample to start a frame fftbuffer = np.zeros(max(N1,N2,N3)) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w1 = w1 / sum(w1) # normalize analysis window 1 w2 = w2 / sum(w2) # normalize analysis window 2 w3 = w3 / sum(w3) # normalize analysis window 3 sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- #same frames with different window sizes centered to pin. x1 = x[pin-h1M1:pin+h1M2] # select frame 1 x2 = x[pin-h2M1:pin+h2M2] # select frame 2 x3 = x[pin-h3M1:pin+h3M2] # select frame 3 mX1, pX1 = DFT.dftAnal(x1, w1, N1) # compute dft of frame 1 mX2, pX2 = DFT.dftAnal(x2, w2, N2) # compute dft of frame 2 mX3, pX3 = DFT.dftAnal(x3, w3, N3) # compute dft of frame 3 ploc1 = UF.peakDetection(mX1, t) # detect locations of peaks of frame 1 ploc2 = UF.peakDetection(mX2, t) # detect locations of peaks of frame 2 ploc3 = UF.peakDetection(mX3, t) # detect locations of peaks of frame 3 iploc1, ipmag1, ipphase1 = UF.peakInterp(mX1, pX1, ploc1) # refine peak values of frame 1 by interpolation iploc2, ipmag2, ipphase2 = UF.peakInterp(mX2, pX2, ploc2) # refine peak values of frame 2 by interpolation iploc3, ipmag3, ipphase3 = UF.peakInterp(mX3, pX3, ploc3) # refine peak values of frame 3 by interpolation ipfreq1 = fs*iploc1/float(N1) # convert peak locations of frame 1 to Hertz ipfreq2 = fs*iploc2/float(N2) # convert peak locations of frame 2 to Hertz ipfreq3 = fs*iploc3/float(N3) # convert peak locations of frame 3 to Hertz #constracting final arrays according to frequency bands. finalfreq = [] finalmag = [] finalphase = [] for i in range(ipfreq1.size): if (ipfreq1[i]>=0 and ipfreq1[i]<=B1): finalfreq.append(ipfreq1[i]) finalmag.append(ipmag1[i]) finalphase.append(ipphase1[i]) for i in range(ipfreq2.size): if (ipfreq2[i]>B1 and ipfreq2[i]<=B2): finalfreq.append(ipfreq2[i]) finalmag.append(ipmag2[i]) finalphase.append(ipphase2[i]) for i in range(ipfreq3.size): if (ipfreq3[i]>B2 and ipfreq3[i]<=B3): finalfreq.append(ipfreq3[i]) finalmag.append(ipmag3[i]) finalphase.append(ipphase3[i]) finalfreq = np.array(finalfreq) finalmag = np.array(finalmag) finalphase = np.array(finalphase) #-----synthesis----- Y = UF.genSpecSines(finalfreq, finalmag, finalphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component sws = H * hanning(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components return y, yh, yst
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic models_makam x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yh[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yh # overlap-add pin += H # advance sound pointer y = np.delete( y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size - hM1, y.size)) # add zeros at the end to analyze last sample return y
def harmonicModelAnal_2(x, fs, w, N, hopSizeMelodia, pinFirst, pend, t, nH, f0Series, harmDevSlope=0.01, minSineDur=.02): """ Analysis of a sound using the sinusoidal harmonic models_makam x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if (minSineDur < 0): # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample # pin = hM1 # init sound pointer in middle of anal window pin = pinFirst # pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame # f0t = 0 # initialize f0 track # f0stable = 0 # initialize f0 stable idxF0Series = 0 while pin <= pend and idxF0Series < len(f0Series): logger.debug("at time {}".format(pin / fs)) x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations if len(ploc) == 0: print "no peaks detected at time {} with threshold {}".format( pin / fs, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz #extract fundamental freq # f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 # # if ((f0stable==0)&(f0t>0)) \ # or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): # f0stable = f0t # consider a stable f0 if it is close to the previous one # else: # f0stable = 0 f0t = f0Series[idxF0Series] if f0t < 50: f0t = 0 # find harmonics hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope) hfreqp = hfreq # if pin == hM1: # first frame if pin == pinFirst: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) # advance sound pointer pin += hopSizeMelodia idxF0Series += 1 xhfreq = SM.cleaningSineTracks( xhfreq, round(fs * minSineDur / hopSizeMelodia)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
sys.path.append( os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) import utilFunctions as UF import dftModel as DFT import matplotlib.pyplot as plt (fs, x1) = UF.wavread('../../sounds/rain.wav') (fs, x2) = UF.wavread('../../sounds/soprano-E4.wav') M = N = 512 w = get_window('hanning', M) x1w = x1[10000:10000 + M] * w x2w = x2[10000:10000 + M] * w mX1, pX1 = DFT.dftAnal(x1w, w, N) mX2, pX2 = DFT.dftAnal(x2w, w, N) smoothf = .2 mX2smooth1 = resample(np.maximum(-200, mX2), mX2.size * smoothf) mX2smooth2 = resample(mX2smooth1, N / 2 + 1) balancef = .7 mY = balancef * mX2smooth2 + (1 - balancef) * mX1 y = DFT.dftSynth(mY, pX1, N) # plt.plot(mX1) # plt.plot(mX2) # plt.plot(mX2smooth2) # plt.plot(mY)
(fs, x) = UF.wavread('../../../sounds/violin-B3.wav') N = 1024 pin = 5000 w = np.ones(801) hM1 = int(math.floor((w.size+1)/2)) hM2 = int(math.floor(w.size/2)) x1 = x[pin-hM1:pin+hM2] plt.figure(1, figsize=(9.5, 5)) plt.subplot(3,1,1) plt.plot(np.arange(-hM1, hM2), x1, lw=1.5) plt.axis([-hM1, hM2, min(x1), max(x1)]) plt.title('x (violin-B3.wav)') mX, pX = DF.dftAnal(x1, w, N) mX = mX - max(mX) plt.subplot(3,1,2) plt.plot(np.arange(mX.size), mX, 'r', lw=1.5) plt.axis([0,N/4,-70,0]) plt.title ('mX (rectangular window)') w = np.blackman(801) mX, pX = DF.dftAnal(x1, w, N) mX = mX - max(mX) plt.subplot(3,1,3) plt.plot(np.arange(mX.size), mX, 'r', lw=1.5) plt.axis([0,N/4,-70,0])
from scipy.signal import get_window, resample from scipy.fftpack import fft import sys, os, math sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) import utilFunctions as UF import dftModel as DFT fs, x = UF.wavread('../../sounds/oboe-A4.wav') M = N = 512 w = get_window('hanning', M) xw = x[10000:10000+M] * w filter = get_window('hamming', 30) * -60.0 mX, pX = DFT.dftAnal(xw, w, N) centerbin = 40 mY = np.copy(mX) mY[centerbin-15:centerbin+15] = mX[centerbin-15:centerbin+15] + filter y = DFT.dftSynth(mY, pX, N) * sum(w) import matplotlib.pyplot as plt plt.figure() plt.plot(xw) plt.plot(y) plt.figure() plt.plot(filter)