def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here fs, s = wavread(FileName) durin = len(s) / fs k = 1 M = 101 N = 128 w = get_window('blackman', M) hm1 = math.floor(len(w) / 2) hm2 = math.floor((len(w) / 2 + 1)) hfs = math.floor(fs / 2) cer = [] frame = s[hfs - hm1:hfs + hm2] X, Xf = dftAnal(frame, w, N) peak = UF.peakDetection(X, -40) iploc, ipmag, ipphase = UF.peakInterp(X, Xf, peak) err = abs(490 - iploc * fs / N) while err[0] > 0.05: k = k + 1 M = 100 * k + 1 if M > N: N = N * 2 w = get_window('blackman', M) hm1 = math.floor(len(w) / 2) hm2 = math.floor((len(w) / 2 + 1)) frame = s[hfs - hm1:hfs + hm2] X, Xf = dftAnal(frame, w, N) peak = UF.peakDetection(X, -40) iploc, ipmag, ipphase = UF.peakInterp(X, Xf, peak) err = abs(f - iploc * fs / N) fest = iploc * fs / N cer = np.append(cer, err) return np.stack(fest, M, N)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) ferror = 1 M = int(np.floor(fs / f)) k = int(np.floor(M / 100)) while (ferror >= 0.05): M = 100 * k + 1 Ns = int(2**(np.ceil(np.log2(M)))) w = get_window(window, M) x1 = x[int(.5 * fs) - M / 2:int(.5 * fs) + (M + 1) / 2] mX, pX = DFT.dftAnal(x1, w, Ns) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = fs * iploc / float(Ns) ferror = abs(fEst - f) k += 1 return float(fEst), M, Ns
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 # Your code here (fs, x) = UF.wavread(inputFile) for k in range(1, 50): M = 100 * k + 1 N = 2**int(np.ceil(np.log2(M))) w = get_window(window, M) x1 = x[int(.5 * fs - ((M - 1) / 2)):int(.5 * fs + ((M + 1) / 2))] mX, pX = DFT.dftAnal(x1, w, N) pLoc = UF.peakDetection(mX, t) (peakLoc, pMag, pPhase) = UF.peakInterp(mX, pX, pLoc) fEst = (peakLoc[0] / N) * fs if abs(fEst - f) < 0.05: break return fEst, M, N
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) #print len(x) startidx = int(44100 * 0.5) ### Your code here for i in range(1, 100): M = int(i * 100 + 1) N = 2 for j in range(1, 100): if 2 ** j > M: N = 2 ** j break #print str(i) + ' ' + str(N) windf = get_window(window, M) mx, px = DFT.dftAnal(x[startidx:startidx+M], windf, int(N)) #print fs * ploc[0] / N ploc = UF.peakDetection(mx, t) iploc, ipmag, ipphase = UF.peakInterp(mx, px, ploc) freq = iploc[0] * fs / N if abs(freq - f) <= 0.05: return (freq, M, N)
def analysis(x, fs, w, N, t): """Extracted from sineModel. Perform windowed analysis on audio frame.""" hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor pin = int(math.floor(len(x) + 1) / 2) # init sound pointer in middle of data window # -----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame # logger.debug("Analyse input: N {N}, M {M}, x[{pin}-{hM1}={lo}, {pin}+{hM2}={hi}]" # .format(N=N, # M=w.size, # pin=pin, # hM1=hM1, # lo=pin-hM1, # hM2=hM2, # hi=pin+hM2)) mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz return iploc, ipmag, ipphase, ipfreq
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 (fs, x) = UF.wavread(inputFile) for i in range(1, 25): M = 100 * i + 1 hM = int(math.floor(M / 2)) N = int(1 << (M - 1).bit_length()) w = get_window(window, M) x1 = x[0.5 * fs - hM - 1:0.5 * fs + hM] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) if iploc: fEst = fs * iploc[0] / float(N) if (abs(f - fEst) < 0.05): break return fEst, M, N
def f0Twm(x, fs, w, N, H, t, minf0, maxf0, f0et): # fundamental frequency detection using twm algorithm # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # returns f0: fundamental frequency hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] f0t = 0 f0stable = 0 while pin<pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0DetectionTwm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) pin += H # advance sound pointer return f0
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) x_half = len(x) // 2 f_error = np.inf k = 1 while f_error > 0.05: # Hz M = 100 * k + 1 M2 = M // 2 W = get_window(window, M) N = int(2 ** np.ceil(np.log2(M))) mX, pX = DFT.dftAnal(x[x_half - M2: x_half - M2 + M], W, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N f_error = np.abs(f - fEst) k += 1 return(fEst, M, N)
def minFreqEstErr(inputFile='sine-440.wav', f=440): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ window = 'blackman' t = -40 (fs, x) = UF.wavread(inputFile) k = 11 N = 2 while True: M = 100 * k + 1 while N < M: N = N * 2 w = get_window(window, M) x1 = x[.5 * fs:.5 * fs + M] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = fs * iploc[0] / float(N) if abs(fEst - f) < 0.05: break else: k += 1 return (fEst, M, N)
def check_k(fr1, fr2, fs, k, window): fr = fr1 for fr in fr1 + np.arange(fr2-fr1): t = np.arange(441000.0) x = np.sin(2.0*np.pi * fr * t / fs) M = 100*k+1 i=0 while (2**i) < M: i+=1 N = 2**i h = M/2 l_h = len(x)/2 - h + 1 h_h = l_h + M x_cnk = x[l_h:h_h] w = get_window(window, M) (mX, pX) = DFT.dftAnal(x_cnk, w, N) p_loc = UF.peakDetection(mX, -40) p_int = UF.peakInterp(mX, pX, p_loc) peak = p_int[0]*(fs/float(N)) p = peak[0] if abs(p-fr) > 0.05: print "fr: ", fr, " error: ", abs(p-fr) return 0 print "fr: ", fr, " checked" fr+=1 return 3
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) for k in range(1, 20): M = 100 * k + 1 hM = int(math.floor(M / 2)) N = np.power(2, math.ceil(np.log2(M))) w = get_window(window, M) x1 = x[int(.5 * fs) - hM - 1:int(.5 * fs) + hM] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = fs * iploc[0] / float(N) if (abs(f - fEst) < 0.05): return fEst, M, N
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) for k in range(1, 100): M = 100 * k + 1 w = get_window(window, M) N = int(pow(2, np.ceil(np.log2(M)))) xCenter = int(0.5 * fs) x2 = x[xCenter-M/2:xCenter+M/2+1] mX, pX = DFT.dftAnal(x2, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = fs * iploc / float(N) if (np.abs(fEst - f) < 0.05): return fEst[0], M, N
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[int(hNs-H):int(hNs+H)] = int(ow) bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[int(hNs-H):int(hNs+H)] = sw[int(hNs-H):int(hNs+H)] / bh[int(hNs-H):int(hNs+H)] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:int(hNs-1)] = fftbuffer[int(hNs+1):] # undo zero-phase window yh[int(hNs-1):] = fftbuffer[:int(hNs+1)] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def sprModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal plus residual model, one frame at a time x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, returns y: output sound, ys: sinusoidal component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate spec of sinusoidal component Xr = X2-Ys; # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = ys+xr # sum of sinusoidal and residual components return y, ys, xr
def peak(m, n): hfs = fs * 0.5 x1 = x[hfs-m/2:hfs+(m+1)/2] w = get_window(window, m) mX, pX = DFT.dftAnal(x1, w, n) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fest = fs * iploc[0] / n return fest, ploc, mX, pX
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here # read file fs, x = UF.wavread(inputFile) # initial error, must higher than 0.05 est_error = 1.0 MIN_ERROR = 0.05 k = 1 # iterate all the allowd values to find M and N while est_error >= MIN_ERROR: M = 100 * k + 1 # N bigger than M and it is power of 2 N = 2**int(np.ceil(np.log2(M))) # get a segment from x, such as from the middle x1 = x[int(0.5 * fs - M // 2):int(0.5 * fs) + (M + 1) // 2] # get window w = get_window(window, M) # dft it mX, pX = DFT.dftAnal(x1, w, N) # peak detection ploc = UF.peakDetection(mX, t) # peak interpolation iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = iploc[0] * fs / N # estimated error est_error = np.abs(ipfreq - f) #print(M, N, ipfreq, f, est_error) # increase k k += 1 return est_error, M, N
def run_one_estimate(x, fs, M, window=DEFAULT_WINDOW, t=DEFAULT_THRESHOLD): center_sample = int(len(x) / 2) start_sample = center_sample - int(M / 2) end_sample = start_sample + M N = min_power_2(M) x1 = x[start_sample:end_sample] w = get_window(window, M) mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N return (mX, pX, ploc, iploc, ipmag, ipphase, fEst, N)
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if (minf0 < 0): # raise exception if minf0 is smaller than 0 raise ValueError( "Minumum fundamental frequency (minf0) smaller than 0") if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 raise ValueError( "Maximum fundamental frequency (maxf0) bigger than 10000Hz") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] # initialize f0 output f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin < pend: x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array pin += H # advance sound pointer return f0
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 #read the file fs, s = UF.wavread(inputFile) fEst = 0 error = abs(f - fEst) k = 1 #begin iteration while error > 0.05: #set window_size for this iteration M = 100 * k + 1 #compute FFT size as next power of two exponent = int(np.log2(M)) + 1 FFT_size = 2**exponent df = float(fs) / FFT_size #slice the input signal s_sliced = s[0.5 * fs - M/2 : 0.5 * fs + M/2 + 1] #generate window w = get_window("blackman", M) #compute DFT mX, pX = DFT.dftAnal(s_sliced, w, FFT_size) #detect the peaks peak_locations = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, peak_locations) fEst = iploc[0] * df error = abs(fEst - f) k += 1 return (fEst, M, FFT_size)
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin <= pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection( ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def sineModelMultiRes(x, fs, wList, NList, t, BList): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ #-----synthesis params init----- Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window for i in range(3): #-----analysis params init----- w = wList[i] N = NList[i] Bmin = BList[i][0] Bmax = BList[i][1] hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ipmag = ipmag[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipphase = ipphase[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] ipfreq = ipfreq[np.logical_and(ipfreq>=Bmin, ipfreq<Bmax)] #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 # -40dB magnitude threshold for peak picking ### Your code here (fs, x) = UF.wavread(inputFile) def smallest_power(M): p=1 Np = np.power(2,p) while (Np < M): Np = np.power(2,p) p += 1 return Np k = 1 # initializing k while (True): M = 100*k+1 # k is the minimum positive integer for which the fEst error < 0.05Hz hM2 = M//2+1 hM1 = M//2 x1 = x[int(.5*fs-hM2):int(.5*fs+hM1)] # reading a single frame centered around the middle #of the input signal w = get_window(window, M) N = smallest_power(M) mX, pX = DFT.dftAnal(x1, w, N) # get the magnitude and phase spectrum ploc = UF.peakDetection(mX, t) # get peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = iploc*fs/float(N) # get frequency values of peaks fEst = ipfreq # [np.argmax(ipmag)] # get the maximum frequency if abs(fEst-f)<=0.05: break k += 1; # try the next possible window size return float(fEst), int(M), int(N)
def proc_frame(self, frame): self.frames = np.append(self.frames, frame) pend = self.frames.size - self.hM1 # initialize f0 track f0t = 0 # initialize f0 stable f0stable = 0 while self.pin < pend: # select frame x1 = self.frames[self.pin - self.hM1:self.pin + self.hM2] # compute dft mX, pX = DFT.dftAnal(x1, self.w, self.N) if self.pin == self.hM1: self.magnitudes = mX self.phases = pX else: self.magnitudes = np.vstack((self.magnitudes, mX)) self.phases = np.vstack((self.phases, mX)) # detect peak locations ploc = UF.peakDetection(mX, self.t) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # convert locations to Hz ipfreq = self.fs * iploc / self.N # find f0 f0t = UF.f0Twm(ipfreq, ipmag, self.f0et, \ self.minf0, self.maxf0, f0stable) if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): # consider a stable f0 if it is close to the previous one f0stable = f0t else: f0stable = 0 self.fundamentals = np.append(self.fundamentals, f0t) self.fundamentals_file.write('%f\t%f\n' % (self.cur_time, f0t)) self.pin += self.H self.cur_time += 1.0 * self.H / self.fs if self.fundamentals.shape[0] > self.MAX_BUF: self.fundamentals = self.fundamentals[-self.MAX_BUF:] self.magnitudes = self.magnitudes[-self.MAX_BUF:] self.phases = self.phases[-self.MAX_BUF:] if self.frames.shape[0] > self.fs: self.pin -= self.frames.shape[0] - self.fs self.frames = self.frames[-self.fs:]
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): """ Analysis of a sound using the sinusoidal models_makam with sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ if (minSineDur <0): # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin<pend: # while input sound pointer is within sound x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array jtfreq[:tfreq.size]=tfreq # save track frequencies to temporary array jtmag[:tmag.size]=tmag # save track magnitudes to temporary array jtphase[:tphase.size]=tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: # rest of frames append values to sine tracks xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H # delete sine tracks shorter than minSineDur xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H)) return xtfreq, xtmag, xtphase
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): """ Analysis of a sound using the sinusoidal model with sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ if (minSineDur <0): # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin<pend: # while input sound pointer is within sound x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories tfreq, tmag, tphase = sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array jtfreq[:tfreq.size]=tfreq # save track frequencies to temporary array jtmag[:tmag.size]=tmag # save track magnitudes to temporary array jtphase[:tphase.size]=tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: # rest of frames append values to sine tracks xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H # delete sine tracks shorter than minSineDur xtfreq = cleaningSineTracks(xtfreq, round(fs*minSineDur/H)) return xtfreq, xtmag, xtphase
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if (minSineDur <0): # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin<=pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq,np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = SM.cleaningSineTracks(xhfreq, round(fs*minSineDur/H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here k = 1 M = 100*k + 1 N = nextPow2(M) fs, x = UF.wavread(inputFile) w = get_window(window, M) x1 = x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N while len(fEst) < 1 or abs(f - fEst[0]) >= 0.05: k += 1 M = 100*k + 1 N = nextPow2(M) w = get_window(window, M) fs, x = UF.wavread(inputFile) x1 = x[ 0.5*fs - M/2.0 : 0.5*fs + M/2.0] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc * fs / N return (fEst[0], M, N)
def sineModelMultiRes(x, fs, w, N, t, B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: array of analysis windows, N: array of sizes of complex spectrum, t: threshold in negative dB, B: array of frequency bands returns y: output array sound """ hM1 = [int(math.floor((_w.size + 1) / 2)) for _w in w] # half analysis window(s) size by rounding hM2 = [int(math.floor(_w.size / 2)) for _w in w] # half analysis window(s) size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, max(hM1)) # init sound pointer in middle of anal window pend = x.size - max(hNs, max(hM1)) # last sample to start a frame fftbuffer = np.array([]) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = [_w / sum(_w) for _w in w] # normalize analysis window(s) sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H : hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H : hNs + H] = sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound # -----analysis----- ipmag = ipphase = ipfreq = np.array([]) # initialize the synthesis arrays for i in range(0, len(w)): # for each window, use some loop variables ('_' prefix) _hM1, _hM2, _w, _N, _B = (hM1[i], hM2[i], w[i], N[i], B[i]) x1 = x[pin - _hM1 : pin + _hM2] # select frame mX, pX = DFT.dftAnal(x1, _w, _N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, _ipmag, _ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation _ipfreq = fs * iploc / float(_N) # convert peak locations to Hertz lo, hi = (_B[0], _B[1]) # low/high from band tuples [..(lo, hi)..] mask = (_ipfreq >= lo) * (_ipfreq < hi) # mask for in-band components ipmag = np.append(ipmag, _ipmag * mask) # mask and append components ipphase = np.append(ipphase, _ipphase * mask) ipfreq = np.append(ipfreq, _ipfreq * mask) # -----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window yw[hNs - 1 :] = fftbuffer[: hNs + 1] y[pin - hNs : pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def test(): window = 'blackman' t = -40 fs = 44100 a = [101, 200, 440] k = 1 matched = False while True: M = (100*k) + 1 N = int(pow(2, np.ceil(np.log2(M)))) w = get_window(window, M) for f in np.arange(100,8000): #for i in range(len(a)): #f = a[i] x = generateSine(f) hx = len(x) / 2 x1 = x[(.5*fs)-(M/2):(.5*fs)+((M/2)+1)] #x1 = x[hx-(M/2):hx+(M/2)+1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) pmag = mX[ploc] (iploc, ipmag, ipphase) = UF.peakInterp(mX, pX, ploc) fEst = (fs * float(np.sum(iploc))) / float(N) esterror = np.abs(fEst - f) print esterror if (esterror > 0.05): matched = False break else: matched = True if matched: break else: k += 1 print fEst print k print M print N
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 thresholdForError = 0.05 # Your code here #read file fs, x = UF.wavread(inputFile) #determine the sampe that is at 0.5 seconds into the sounds timeStamp = 0.5 # The timestamp where to center our windowed signal around binAtTimeStamp = int(timeStamp * fs) #Bin number at the timestamp #set range for k k_range = np.arange(1, (x.size - 1) / 100) # initialize fft size N (minimum window size) N = 100 * 1 + 1 fEst = 0 #Iterate through range of M_Range for k in k_range: M = 100 * k + 1 x1 = x[binAtTimeStamp - M / 2:binAtTimeStamp + M / 2 + 1] #get x1 as M no. samples of x centered around timeStamp w = get_window(window, M) #get window (<window>, <size of M>) N = next_power_of_2( M) #get FFT size as a power of 2, and greater than M mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, _, _ = UF.peakInterp(mX, pX, ploc) peakInHz = iploc * fs / float(N) if (abs(peakInHz - f) < thresholdForError): return float(peakInHz), M, N else: continue return float(fEst), int(M), int(N)
def sineModel(x, fs, w, N, t): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2 * H) # triangular window sw[hNs - H:hNs + H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # normalized synthesis window while pin < pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp( mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz #-----synthesis----- Y = UF.genSpecSines(ipfreq, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yw[hNs - 1:] = fftbuffer[:hNs + 1] y[pin - hNs:pin + hNs] += sw * yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def sineModelAnal(x, fs, w, N, H, t, maxnSines = 100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): # Analysis of a sound using the sinusoidal model # x: input array sound, w: analysis window, N: size of complex spectrum, # H: hop-size, t: threshold in negative dB # maxnSines: maximum number of sines per frame # minSineDur: minimum duration of sines in seconds # freqDevOffset: minimum frequency deviation at 0Hz # freqDevSlope: slope increase of minimum frequency deviation # returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoids hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin<pend: # while input sound pointer is within sound x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) tfreq, tmag, tphase = UF.sineTracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) tmag = np.resize(tmag, min(maxnSines, tmag.size)) tphase = np.resize(tphase, min(maxnSines, tphase.size)) jtfreq = np.zeros(maxnSines) jtmag = np.zeros(maxnSines) jtphase = np.zeros(maxnSines) jtfreq[:tfreq.size]=tfreq jtmag[:tmag.size]=tmag jtphase[:tphase.size]=tphase if pin == hM1: xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H xtfreq = UF.cleaningSineTracks(xtfreq, round(fs*minSineDur/H)) return xtfreq, xtmag, xtphase
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if (minf0 < 0): # raise exception if minf0 is smaller than 0 raise ValueError("Minumum fundamental frequency (minf0) smaller than 0") if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] # initialize f0 output f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin<pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hez f0t = f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array pin += H # advance sound pointer return f0
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) numbins = 6 k = 21 #M = int(numbins * fs / f) M = (100*k) + 1 N = int(pow(2, np.ceil(np.log2(M)))) w = get_window(window, M) hx = len(x) / 2 x1 = x[(.5*fs)-(M/2):(.5*fs)+((M/2)+1)] #x1 = x[hx-(M/2):hx+(M/2)+1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) pmag = mX[ploc] (iploc, ipmag, ipphase) = UF.peakInterp(mX, pX, ploc) fEst = (fs * float(np.sum(iploc))) / float(N) esterror = np.abs(fEst - f) print esterror return (fEst, M, N)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) print fs #k = find_k(100, 2000, fs, window) k=1 while check_k(100, 2000, fs, k, window) < 2: print k k+=1 M = 100*k+1 print "M: ", M i=0 while (2**i) < M: i+=1 N = 2**i print "N: ", N print "fs: ", fs print "length: ", len(x) center = len(x)/2 h = M/2 x_cnk = x[center-h:center+h+1] print "chunk: ", len(x_cnk) w = get_window(window, M) (mX, pX) = DFT.dftAnal(x_cnk, w, N) p_loc = UF.peakDetection(mX, t) p_int = UF.peakInterp(mX, pX, p_loc) peak = p_int[0]*(fs/float(N)) return (peak[0], M, N)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) offset = 0.5 center = offset * fs x_center = len(x) / 2 k = 2 while True: M = 100 * k + 1 N = smallest_power_of_2_greater_than(M) hM1 = int(math.floor((M+1)/2)) hM2 = int(math.floor(M/2)) half_window = M / 2 #x1 = x[x_center - half_window : x_center + half_window + 1] windowSize = M lowerIndex = (len(x) / 2) - (windowSize / 2) + 1 upperIndex = lowerIndex + windowSize #x1 = x[lowerIndex:upperIndex] x1 = x[fs*0.5-M/2:fs*0.5+M/2+1] w = get_window(window, M) mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) #print M, N, len(x1), ploc, iploc fsin = iploc[0] * fs / float(N) if abs(fsin - f) < 0.05: return fsin, M, N k = k + 1
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 ### Your code here (fs, x) = UF.wavread(inputFile) for k in range(5, 25): M = k * 100 + 1 for j in range(8, 13): if (2**j > M): break N = 2**j w = get_window('blackman', M) td = -40 x1 = x[int(0.5 * fs):int(0.5 * fs + M)] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, td) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) peak_hz = fs * iploc / float(N) f_err = abs(f - peak_hz) print(M, N, peak_hz, f_err) if (5 == k): cur_min = f_err if (cur_min >= f_err): fEst = peak_hz find_M = M find_N = N cur_min = f_err print(fEst, find_M, find_N) return (fEst, find_M, find_N)
def minFreqEstErr(inputFile, f): # analysis parameters: window = 'blackman' t = -40 (fs, x) = UF.wavread(inputFile) # Get window from half of sound file half = int(x.size / 2) # Window size of 100 * k + 1 M = 101 # Initialise N = 0 freq = 0 err = 0.05 while (M < x.size): w = get_window(window, M) win_size = int(M / 2) # Taking window from halfway x1 = x[half - win_size:half + win_size + 1] N = nextPow(M) mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) # iploc = interpolated peak location, ipmag = magnitude val, ipphase = phase values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) freq = float(iploc) * float(fs) / N if (abs(f - freq) < err): break M += 100 return (freq, M, N)
def dftAnal(p, w, N, B): hM1 = int(math.floor((w.size+1)/2)) hM2 = int(math.floor(w.size/2)) x1 = x[p-hM1:p+hM2] fftbuffer = np.zeros(N) rw = w / sum(w) mX, pX = DFT.dftAnal(x1, rw, N) upperIndex = Bs.index(B) lower_bin = 1 if upperIndex > 0: lower_bin = int(np.ceil(float(Bs[upperIndex-1])*N/fs)) upper_bin = int(np.ceil(float(B)*N/fs)) ploc = UF.peakDetection(mX, t) # Peak choice ploc = ploc[np.logical_and(ploc > lower_bin, ploc <= upper_bin)] iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/float(N) return (ipfreq, ipmag, ipphase)
def dftAnalMultiRes(p, w, N, B): hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x1 = x[pin-hM1:pin+hM2] # select frame fftbuffer = np.zeros(N) rw = w / sum(w) # normalize analysis window mX, pX = DFT.dftAnal(x1, w, N) # compute dft upper_index = Bs.index(B) if upper_index > 0: lower_bin = int(np.ceil(float(Bs[upper_index-1]) * N / fs)) else: lower_bin = 1 upper_bin = int(np.ceil(float(B) * N / fs)) ploc = UF.peakDetection(mX, t) # detect locations of peaks ploc = ploc[np.logical_and(ploc>lower_bin, ploc<=upper_bin)] # choose the peaks in band iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz return (ipfreq, ipmag, ipphase)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 fs, x = UF.wavread(inputFile) F_min = 100.0 F_max = 2000.0 k = 1 while True: M = 100 * k + 1 N = int(2 ** (math.floor(np.log2(M)) + 1)) #print("M {}, N {}".format(M, N)) w = get_window(window, M) x1 = x[0.5 * fs - (M + 1) / 2:0.5 * fs + (M + 1) / 2 - 1] # M must be odd mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / float(N) fEst = ipfreq[0] fEstError = abs(fEst - f) print("fEstError {0:.3f}".format(fEstError)) if fEstError < 0.05: break k += 1 print("fEst {}, M {}, N {}, frequency estimation error {:.3f}".format(fEst, M, N, fEstError)) return fEst, M, N
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' tol = -40 (fs, x) = UF.wavread(inputFile) nMax = len(x) nMid = int(float(nMax)/2) k = 0 M = 1 fEst = -1.0 while (M <= nMax) & (abs(f-fEst)>0.05): k += 1 M = 100*k +1 xSubSet = x[nMid-np.floor(M/2.0):nMid+np.floor(M/2.0)+1] w = get_window(window, M) N = np.ceil(np.log(float(M))/np.log(2)) N = int(2**N) (mX,pX) = DFT.dftAnal(xSubSet, w, N) ploc = UF.peakDetection(mX, tol) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) fEst = iploc*fs/N return (fEst[0], M, N)
def sineModel(x, fs, w, N, t): # Analysis/synthesis of a sound using the sinusoidal model # x: input array sound, w: analysis window, N: size of complex spectrum, # t: threshold in negative dB # returns y: output array sound hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H); # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, hN, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation #-----synthesis----- plocs = iploc*Ns/N # adapt peak locations to size of synthesis FFT Y = UF.genSpecSines(fs*plocs/N, ipmag, ipphase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def findk(fr, fs, window, startk): k = 1 p=0 while abs(p-fr) > 0.05: t = np.arange(441000.0) x = np.sin(2.0*np.pi * fr * t / fs) M = 100*k+1 i=0 while (2**i) < M: i+=1 N = 2**i h = M/2 l_h = len(x)/2 - h + 1 h_h = l_h + M x_cnk = x[l_h:h_h] w = get_window(window, M) (mX, pX) = DFT.dftAnal(x_cnk, w, N) p_loc = UF.peakDetection(mX, -40) p_int = UF.peakInterp(mX, pX, p_loc) peak = p_int[0]*(fs/float(N)) p = peak[0] k+=1 return k, abs(p-fr)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ # analysis parameters: window = 'blackman' t = -40 (fs, x) = UF.wavread(inputFile) # read in the inputFile Ns = 2**np.arange(24) # List of possible FFT sizes error = 0.05 # allowable frequency error in Hz for k in xrange(1, 100): M = 100 * k + 1 w = get_window(window, M) # get the window hM1 = int(math.floor( (M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor fftbuffer = x[x.size / 2 - hM2:x.size / 2 + hM1] # dftBuffer N = Ns[np.where( Ns > M)[0][0]] # Get the smallest N value larger than M (mX, pX) = DFT.dftAnal(fftbuffer, w, N) # Calculate the dft ploc = UF.peakDetection(mX, t) # Get peak locations (iploc, ipmag, ipphase) = UF.peakInterp( mX, pX, ploc) # parabolic interpolation to find peak values fEst = fs * iploc[0] / N if abs(fEst - f) <= error: break return (fEst, M, N)
def minFreqEstErr(inputFile, f): """ Inputs: inputFile (string) = wav file including the path f (float) = frequency of the sinusoid present in the input audio signal (Hz) Output: fEst (float) = Estimated frequency of the sinusoid (Hz) M (int) = Window size N (int) = FFT size """ t = -40 window = 'blackman' ### Your code here fs, x = UF.wavread(inputFile) center = 0.5 * x.size window = 'blackman' t = -40 estimationError = 1000 iterM = 1 while estimationError > 0.05: M = iterM * 100 + 1 fragment = x[int(center - M / 2):int(center + M / 2 + 1)] w = get_window(window, M, False) N = int(np.power(2, np.ceil(np.log2(M)))) #nearest N mX, pX = DFT.dftAnal(fragment, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) locBinToHz = iploc[0] * fs / N estimationError = np.abs(f - locBinToHz) iterM = iterM + 1 return locBinToHz, int(M), int(N)
def time2Freq(x, fs, w, N, pinFirst, hopSizeMelodia, t): ''' makes fourier transform, peak thresholding and interpolation for one window return interpolated iploc, ipmag, ipphase ''' ################### ## prepare params hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample # pin = hM1 # init sound pointer in middle of anal window pin = pinFirst + 300 * hopSizeMelodia pend = x.size - hM1 # last sample to start a frame ######################## # process one window print "at time {}".format(pin / fs) x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values # optional visualize(N, mX, pin, fs, ploc, iploc, ipmag, ipphase) return mX, iploc, ipmag, ipphase
def sineModelMultiRes(x, fs, multi_w, multi_N, t, multi_B): """ Analysis/synthesis of a sound using the sinusoidal model, without sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, t: threshold in negative dB returns y: output array sound """ bands = range(len(multi_B)) # to iterate over bands N = max(multi_N) multi_w_size = np.array([multi_w[i].size for i in bands]) multi_hM1 = np.floor((multi_w_size + 1)/2.0).astype(int) # half analysis window size by rounding multi_hM2 = np.floor(multi_w_size / 2.0).astype(int) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size multi_pin = np.maximum(hNs, multi_hM1) # init sound pointer in middle of anal window multi_pend = x.size - multi_pin # last sample to start a frame fftbuffer_combined = np.zeros(N) yw_combined = np.zeros(Ns) # initialize output sound frame y_combined = np.zeros(x.size) # initialize output array multi_w = [multi_w[i] / sum(multi_w[i]) for i in bands] # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while (multi_pin<multi_pend).all(): # while input sound pointer is within sound #-----analysis----- multi_x1 = [x[(multi_pin[i] - multi_hM1[i]) : (multi_pin[i] + multi_hM2[i])] for i in bands] # select frame multi_mX = [] multi_pX = [] for i in bands: mXi, pXi = DFT.dftAnal(multi_x1[i], multi_w[i], multi_N[i]) multi_mX.append(mXi) multi_pX.append(pXi) multi_ploc = [] for i in bands: ploci = UF.peakDetection(multi_mX[i], t) # detect locations of peaks multi_ploc.append(ploci) multi_ipmag = [] multi_ipphase = [] multi_ipfreq = [] for i in bands: iploci, ipmagi, ipphasei = UF.peakInterp(multi_mX[i], multi_pX[i], multi_ploc[i]) # refine peak values by interpolation ipfreqi = fs*iploci/float(multi_N[i]) # convert peak locations to Hertz multi_ipmag.append(ipmagi) multi_ipphase.append(ipphasei) multi_ipfreq.append(ipfreqi) # count first for array allocation num_ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: num_ip += 1 ipfreq_combined = np.zeros(num_ip) ipmag_combined = np.zeros(num_ip) ipphase_combined = np.zeros(num_ip) ip = 0 for i in bands: for p in range(len(multi_ipfreq[i])): f = multi_ipfreq[i][p] if (i == 0 or f >= multi_B[i-1]) and f < multi_B[i]: ipfreq_combined[ip] = f ipmag_combined[ip] = multi_ipmag[i][p] ipphase_combined[ip] = multi_ipphase[i][p] ip += 1 #-----synthesis----- Y_combined = UF.genSpecSines(ipfreq_combined, ipmag_combined, ipphase_combined, Ns, fs) # generate sines in the spectrum fftbuffer_combined = np.real(ifft(Y_combined)) # compute inverse FFT yw_combined[:hNs-1] = fftbuffer_combined[hNs+1:] # undo zero-phase window yw_combined[hNs-1:] = fftbuffer_combined[:hNs+1] y_combined[multi_pin[0]-hNs:multi_pin[0]+hNs] += sw*yw_combined # overlap-add and apply a synthesis window multi_pin += H return y_combined
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2-Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = yh+xr # sum of harmonic and residual components return y, yh, xr
import numpy as np import matplotlib.pyplot as plt from scipy.signal import get_window import sys, os sys.path.append( os.path.join(os.path.dirname(os.path.dirname(sys.path[0])), 'software', 'models')) import dftModel as DFT import utilFunctions as UF fs, x = UF.wavread('../../sounds/sine-440.wav') M = 501 N = 512 * 4 t = -20 w = get_window('hamming', M) x1 = x[int(.8 * fs):int(.8 * fs + M)] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, iphase = UF.peakInterp(mX, pX, ploc) pmag = mX[ploc] freqaxis = fs * np.arange(N / 2 + 1) / float(N) plt.plot(freqaxis, mX) plt.plot(fs * iploc / float(N), ipmag, marker='x', linestyle='') plt.show()
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../../software/models/")) import dftModel as DFT import utilFunctions as UF (fs, x) = UF.wavread("../../../sounds/oboe-A4.wav") M = 601 w = np.blackman(M) N = 1024 hN = N / 2 Ns = 512 hNs = Ns / 2 pin = 5000 t = -70 x1 = x[pin : pin + w.size] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) freqs = iploc * fs / N Y = UF.genSpecSines(freqs, ipmag, ipphase, Ns, fs) mY = 20 * np.log10(abs(Y[:hNs])) pY = np.unwrap(np.angle(Y[:hNs])) y = fftshift(ifft(Y)) * sum(blackmanharris(Ns)) plt.figure(1, figsize=(9, 6)) plt.subplot(4, 1, 1) plt.plot(np.arange(-M / 2, M / 2), x1, "b", lw=1.5) plt.axis([-M / 2, M / 2, min(x1), max(x1)]) plt.title("x (oboe-A4.wav), M = 601") plt.subplot(4, 1, 2)
def sineModelAnalEnhanced( inputFile='../../sounds/sines-440-602-transient.wav'): """ Input: inputFile (string): wav file including the path Output: tStamps: A Kx1 numpy array of time stamps at which the frequency components were estimated tfreq: A Kx2 numpy array of frequency values, one column per component """ phaseDevThres = 1e-2 # Allowed deviation in phase M = 2047 # window size N = 4096 # FFT size t = -80 # threshold in negative dB H = 128 # hop-size window = 'blackman' # window type fs, x = UF.wavread(inputFile) # Read input file w = get_window(window, M) # Get the window hM1 = int(np.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(np.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame tStamps = np.arange(pin, pend, H) / float(fs) # Generate time stamps w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin < pend: # while input sound pointer is within sound x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = SM.DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks ###### CODE DIFFERENT FROM sineModelAnal() ######### # Phase based mainlobe tracking plocSelMask = np.zeros(len(ploc)) for pindex, p in enumerate(ploc): if p > 2 and p < ( len(pX) - 2 ): # Peaks at either end of the spectrum are not processed if selectFlatPhasePeak( pX, p, phaseDevThres ): # Select the peak if the phase spectrum around the peak is flat plocSelMask[pindex] = 1 else: plocSelMask[pindex] = 1 plocSel = ploc[plocSelMask.nonzero()[0]] # Select the ones chosen if len(plocSel ) != 2: # Ignoring frames that don't return two selected peaks ipfreq = [0.0, 0.0] else: iploc, ipmag, ipphase = UF.peakInterp( mX, pX, plocSel ) # Only selected peaks to refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz ###### CODE DIFFERENT FROM sineModelAnal() ######### if pin == hM1: # if first frame initialize output frequency track tfreq = ipfreq else: # rest of frames append values to frequency track tfreq = np.vstack((tfreq, ipfreq)) pin += H # Plot the estimated frequency tracks mX, pX = stft.stftAnal(x, w, N, H) maxplotfreq = 1500.0 binFreq = fs * np.arange(N * maxplotfreq / fs) / N numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1]), cmap='hot_r') plt.plot(tStamps, tfreq[:, 0], color='y', linewidth=2.0) plt.plot(tStamps, tfreq[:, 1], color='c', linewidth=2.0) plt.legend(('Estimated f1', 'Estimated f2')) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) return tStamps, tfreq
def sineModelMultiRes(x, fs, Ns, W, M, N, B, T): """ Analysis/synthesis of a sound using the multi-resolution sinusoidal model, without sine tracking x: input array sound, fs: sampling frequency, Ns: FFT size for synthesis, W: array of analysis window types, M: array of analysis windows sizes, N: array of sizes of complex spectrums, B: array of frequency bands separators (ascending order of frequency, number of bands == B.size + 1), T: array of peak detection thresholds in negative dB. returns y: output array sound """ nResolutions = W.size if (nResolutions != N.size) or (nResolutions != B.size + 1) or (nResolutions != T.size): raise ValueError('Parameters W,N,B,T shall have compatible sizes') H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window HM1 = map(lambda m: math.floor((m+1)/2),M) # half analysis windows sizes by rounding HM2 = map(lambda m: math.floor( m /2),M) # half analysis windows sizes by floor maxHM1 = max(HM1) # max half analysis window size by rounding pin = max(hNs, maxHM1) # init sound pointers in the middle of largest window pend = x.size - pin # last samples to start a frame while pin < pend: # while input sound pointer is within sound combinedIPFreq = np.array([]) combinedIPMag = np.array([]) combinedIPhase = np.array([]) windowSizeAttribution = np.array([]) #-----multi-resolution spectrum calculation----- for k in range(0,nResolutions): windowType = W[k] windowSize = M[k] w = get_window(windowType,windowSize) # normalize analysis window w = w / sum(w) n = N[k] t = T[k] hM1 = HM1[k] # half analysis window size by rounding hM2 = HM2[k] # half analysis window size by floor #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, n) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs*iploc/float(n) # convert peak locations to Hertz if k == 0: # First frequency range starts from zero f0 = 0.0 else: f0 = B[k-1] if k == B.size: # Last frequency range ends at fs/2 f1 = fs / 2.0 else: f1 = B[k] for l in range(0,ipfreq.size): # Pick the peaks (no pun intended:) inside the assigned frequency band f = ipfreq[l] if f0 <= f and f < f1: combinedIPFreq = np.append(combinedIPFreq, f) combinedIPMag = np.append(combinedIPMag , ipmag [l]) combinedIPhase = np.append(combinedIPhase, ipphase[l]) windowSizeAttribution = np.append(windowSizeAttribution, windowSize) # Let's smooth out "double-reported" peaks close to the division frequencies of the frequency ranges freqDiffThreshold = (fs*6)/float(n) smoothedIPFreq = np.array([]) smoothedIPMag = np.array([]) smoothedIPhase = np.array([]) nPeaks = combinedIPFreq.size l = 0 while l < (nPeaks-1): f1 = combinedIPFreq[l] f2 = combinedIPFreq[l+1] m1 = windowSizeAttribution[l] m2 = windowSizeAttribution[l+1] freqDiff = abs(f1-f2) if freqDiff < freqDiffThreshold and m1 != m2: #print '!',f1,f2,m1,m2,freqDiff smoothedIPFreq = np.append(smoothedIPFreq, (f1+f2)/2.0) smoothedIPMag = np.append(smoothedIPMag , (combinedIPMag [l] + combinedIPMag [l+1])/2.0) smoothedIPhase = np.append(smoothedIPhase, (combinedIPhase[l] + combinedIPhase[l+1])/2.0) l = l + 2 else: smoothedIPFreq = np.append(smoothedIPFreq, f1) smoothedIPMag = np.append(smoothedIPMag , combinedIPMag [l]) smoothedIPhase = np.append(smoothedIPhase, combinedIPhase[l]) l = l + 1 # Add the last peak smoothedIPFreq = np.append(smoothedIPFreq,combinedIPFreq[nPeaks-1]) smoothedIPMag = np.append(smoothedIPMag ,combinedIPMag [nPeaks-1]) smoothedIPhase = np.append(smoothedIPhase,combinedIPhase[nPeaks-1]) #-----synthesis----- Y = UF.genSpecSines(smoothedIPFreq, smoothedIPMag, smoothedIPhase, Ns, fs) # generate sines in the spectrum fftbuffer = np.real(ifft(Y)) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
import matplotlib.pyplot as plt from scipy.signal import hamming, triang, blackmanharris import sys, os, functools, time sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../../software/models/')) import dftModel as DFT import utilFunctions as UF (fs, x) = UF.wavread('../../../sounds/sine-440+490.wav') w = np.hamming(3529) N = 16084*2 hN = N/2 t = -20 pin = 4850 x1 = x[pin:pin+w.size] mX1, pX1 = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX1, hN, t) pmag = mX1[ploc] iploc, ipmag, ipphase = UF.peakInterp(mX1, pX1, ploc) plt.figure(1, figsize=(9, 6)) plt.subplot(311) plt.plot(fs*np.arange(0,N/2)/float(N), pX1, 'c', lw=1.5) plt.plot(fs * iploc / N, ipphase, marker='x', color='b', alpha=1, linestyle='', markeredgewidth=1.5) plt.axis([200, 1000, 50, 180]) plt.title('pX + peaks (sine-440+490.wav)') (fs, x) = UF.wavread('../../../sounds/vibraphone-C6.wav') w = np.blackman(401) N = 1024 hN = N/2
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # synthesis window for harmonic component sws = H*hanning(Ns)/2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2-Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh+yst # sum of harmonic and stochastic components return y, yh, yst
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component sws = H * hanning(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components return y, yh, yst
def sineModelAnalEnhanced(inputFile= '../../sounds/sines-440-602-transient.wav'): """ Input: inputFile (string): wav file including the path Output: tStamps: A Kx1 numpy array of time stamps at which the frequency components were estimated tfreq: A Kx2 numpy array of frequency values, one column per component """ phaseDevThres = 1e-2 # Allowed deviation in phase M = 2047 # window size N = 4096 # FFT size t = -80 # threshold in negative dB H = 128 # hop-size window='blackman' # window type fs, x = UF.wavread(inputFile) # Read input file w = get_window(window, M) # Get the window hM1 = int(np.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(np.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame tStamps = np.arange(pin,pend,H)/float(fs) # Generate time stamps w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin<pend: # while input sound pointer is within sound x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = SM.DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect locations of peaks ###### CODE DIFFERENT FROM sineModelAnal() ######### # Phase based mainlobe tracking plocSelMask = np.zeros(len(ploc)) for pindex, p in enumerate(ploc): if p > 2 and p < (len(pX) - 2): # Peaks at either end of the spectrum are not processed if selectFlatPhasePeak(pX, p, phaseDevThres): # Select the peak if the phase spectrum around the peak is flat plocSelMask[pindex] = 1 else: plocSelMask[pindex] = 1 plocSel = ploc[plocSelMask.nonzero()[0]] # Select the ones chosen if len(plocSel) != 2: # Ignoring frames that don't return two selected peaks ipfreq = [0.0, 0.0] else: iploc, ipmag, ipphase = UF.peakInterp(mX, pX, plocSel) # Only selected peaks to refine peak values by interpolation ipfreq = fs*iploc/float(N) # convert peak locations to Hertz ###### CODE DIFFERENT FROM sineModelAnal() ######### if pin == hM1: # if first frame initialize output frequency track tfreq = ipfreq else: # rest of frames append values to frequency track tfreq = np.vstack((tfreq, ipfreq)) pin += H # Plot the estimated frequency tracks mX, pX = stft.stftAnal(x, fs, w, N, H) maxplotfreq = 1500.0 binFreq = fs*np.arange(N*maxplotfreq/fs)/N numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:N*maxplotfreq/fs+1]), cmap='hot_r') plt.plot(tStamps,tfreq[:,0], color = 'y', linewidth=2.0) plt.plot(tStamps,tfreq[:,1], color = 'c', linewidth=2.0) plt.legend(('Estimated f1', 'Estimated f2')) plt.xlabel('Time (s)') plt.ylabel('Frequency (Hz)') plt.autoscale(tight=True) return tStamps, tfreq