def sineModel(x, fs, w, N, t): # Analysis/synthesis of a sound using the sinusoidal model # x: input array sound, w: analysis window, N: size of complex spectrum, # t: threshold in negative dB # returns y: output array sound hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 # half of synthesis FFT size pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H); # triangular window sw[hNs-H:hNs+H] = ow # add triangular window bh = blackmanharris(Ns) # blackmanharris window bh = bh / sum(bh) # normalized blackmanharris window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # normalized synthesis window while pin<pend: # while input sound pointer is within sound #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values by interpolation #-----synthesis----- plocs = iploc*Ns/N; # adapt peak locations to size of synthesis FFT Y = GS.genSpecSines(plocs, ipmag, ipphase, Ns) # generate sines in the spectrum fftbuffer = np.real( ifft(Y) ) # compute inverse FFT yw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yw[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yw # overlap-add and apply a synthesis window pin += H # advance sound pointer return y
def hpsAnalysis(x, fs, w, wr, pin, N, hN, Ns, hNs, hM, nH, t, f0et, minf0, maxf0, maxhd, stocf): xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0) * (f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc = (hloc!=0) * (hloc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate sines Xr = X2-Xh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf return f0, hloc, hmag, mXrenv
def hpsModelSpectrogramPlot(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxFreq): hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame yrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] numFrames = int(math.floor(pend/float(H))) frmNum = 0 frmTime = [] lastBin = N*maxFreq/float(fs) binFreq = np.arange(lastBin)*float(fs)/N # The bin frequencies while pin<pend: # while sound pointer is smaller than last sample frmTime.append(pin/float(fs)) xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size; # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index if frmNum == 0: # Accumulate and store STFT XSpec = np.transpose(np.array([mX[:lastBin]])) ind1 = np.where(hloc>0)[0] ind2 = np.where(hloc<=lastBin)[0] ind = list((set(ind1.tolist())&set(ind2.tolist()))) final_peaks = hloc[ind] parray = np.zeros([final_peaks.size,2]) parray[:,0]=pin/float(fs) parray[:,1]=final_peaks*float(fs)/N specPeaks = parray else: XSpec = np.hstack((XSpec,np.transpose(np.array([mX[:lastBin]])))) ind1 = np.where(hloc>0)[0] ind2 = np.where(hloc<=lastBin)[0] ind = list((set(ind1.tolist())&set(ind2.tolist()))) final_peaks = hloc[ind] parray = np.zeros([final_peaks.size,2]) parray[:,0]=pin/float(fs) parray[:,1]=final_peaks*float(fs)/N specPeaks = np.append(specPeaks, parray,axis=0) hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns) # generate spec sines of harmonic component Yr = Xr-Yh; # get the residual complex spectrum mYr = 20 * np.log10(abs(Yr[:hNs])) mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf) # decimate the magnitude spectrum and avoid -Inf mYs = resample(mYrenv, hNs) lastBinYr = Ns*maxFreq/float(fs) binFreqYr = np.arange(lastBinYr)*float(fs)/Ns # The bin frequencies if frmNum == 0: # Accumulate and store STFT YrSpec = np.transpose(np.array([mYr[:lastBinYr]])) YsSpec = np.transpose(np.array([mYs[:lastBinYr]])) else: YrSpec = np.hstack((YrSpec,np.transpose(np.array([mYr[:lastBinYr]])))) YsSpec = np.hstack((YsSpec,np.transpose(np.array([mYs[:lastBinYr]])))) pin += H frmNum += 1 frmTime = np.array(frmTime) # The time at the centre of the frames plt.figure(1) plt.subplot(3,1,1) plt.pcolormesh(frmTime,binFreq,XSpec) plt.scatter(specPeaks[:,0]+(0.5*H/float(fs)), specPeaks[:,1], s=10, marker='x') plt.autoscale(tight=True) plt.title('X spectrogram + peaks') plt.subplot(3,1,2) plt.pcolormesh(frmTime,binFreqYr,YrSpec) plt.autoscale(tight=True) plt.title('X residual spectrogram') plt.subplot(3,1,3) plt.pcolormesh(frmTime,binFreqYr,YsSpec) plt.autoscale(tight=True) plt.title('X residual stochastic approx. spectrogram') plt.show() return YSpec
def spsModel(x, fs, w, N, t, stocf): # Analysis/synthesis of a sound using the sinusoidal plus residual model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # stocf: decimation factor of mag spectrum for stochastic analysis # y: output sound, ys: sinusoidal component, yr: residual component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values iploc = (iploc!=0) * (iploc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = GS.genSpecSines(iploc, ipmag, ipphase, Ns) # generate spec of sinusoidal component Yr = Xr-Ys; # get the residual complex spectrum mYr = 20 * np.log10( abs(Yr[:hNs]) ) # magnitude spectrum of residual mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf) # decimate the magnitude spectrum and avoid -Inf mYst = resample(mYrenv, hNs) # interpolate to original size mYst = 10**(mYst/20) # dB to linear magnitude fc = 1+round(500.0/fs*Ns) # 500 Hz to bin location mYst[:fc] *= (np.arange(0, fc)/(fc-1))**2 # high pass filter the stochastic component pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = mYst * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of residual spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines yst[ri:ri+Ns] += sw*ystw # overlap-add for residual pin += H # advance sound pointer y = ys+yst # sum of sinusoidal and residual components return y, ys, yst
def hprModelFrame(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd): hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame yrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size; # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hlocN = hloc hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns) # generate spec sines of harmonic component mYh = 20 * np.log10(abs(Yh[:hNs])) pYh = np.unwrap(np.angle(Yh[:hNs])) Yr = Xr-Yh; # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) pXr = np.unwrap(np.angle(Xr[:hNs])) mYr = 20 * np.log10(abs(Yr[:hNs])) pYr = np.unwrap(np.angle(Yr[:hNs])) fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yr)) # inverse FFT of residual spectrum yrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yr[ri:ri+Ns] += sw*yrw # overlap-add for residual y = yh+yr # sum of harmonic and residual components return mX, pX, hlocN, hmag, hphase, mYh, pYh, mXr, pXr, mYr, pYr, yh, yr, y
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, maxnpeaksTwm=10): # Analysis/synthesis of a sound using the sinusoidal harmonic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # maxnpeaksTwm: maximum number of peaks used for F0 detection # yh: harmonic component, yr: residual component # returns y: output array sound hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) # detect peak locations pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0, maxnpeaksTwm) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc = (hloc!=0) * (hloc*Ns/N) # synth. locs #-----synthesis----- Yh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate spec sines fftbuffer = np.real( ifft(Yh) ) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer return y
def hpsModelParams(x,fs,w,N,t,nH,minf0,maxf0,f0et,maxhd,stocf,timemapping,fscale,timbremapping) : # Analysis/synthesis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # stocf: decimation factor of mag spectrum for stochastic analysis # timemapping: mapping between input and output time (sec) # fscale: # timbremapping: mapping between input and output frequency (Hz) # vtf: vibrato-tremolo frequency in Hz, va: vibrato depth in cents, td: tremolo depth in dB # y: output sound, yh: harmonic component, ys: stochastic component hN = N/2 # size of positive spectrum hM = (w.size+1)/2 # half analysis window size Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ysw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array ys = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic lastyhloc = np.zeros(nH) # initialize synthesis harmonic locations yhphase = 2*np.pi * np.random.rand(nH) # initialize synthesis harmonic phases while pin<pend: #-----analysis----- xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0) * (f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc = (hloc!=0) * (hloc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate sines Xr = X2-Xh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf #-----synthesis data----- yhloc = hloc # synthesis harmonics locs yhmag = hmag # synthesis harmonic amplitudes mYrenv = mXrenv # synthesis residual envelope yf0 = f0 #-----transformations----- #-----synthesis----- yhphase += 2*np.pi * (lastyhloc+yhloc)/2/Ns*H # propagate phases lastyhloc = yhloc Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns) # generate spec sines mYs = resample(mYrenv, hNs) # interpolate to original size mYs = 10**(mYs/20) # dB to linear magnitude if f0>0: mYs *= np.cos(np.pi*np.arange(0, hNs)/Ns*fs/yf0)**2 # filter residual fc = 1+round(500.0/fs*Ns) # 500 Hz mYs[:fc] *= (np.arange(0, fc)/(fc-1))**2 # HPF pYs = 2*np.pi * np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype = complex) Ys[:hNs] = mYs * np.exp(1j*pYs) # generate positive freq. Ys[hNs+1:] = mYs[:0:-1] * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) yhw[:hNs-1] = fftbuffer[hNs+1:] # sines in time domain using IFFT yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Ys) ) ysw[:hNs-1] = fftbuffer[hNs+1:] # stochastic in time domain using IFFT ysw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines ys[ri:ri+Ns] += sws*ysw # overlap-add for stoch pin += H # advance sound pointer y = yh+ys return y, yh, ys
def sineModelPlot(x, fs, w, N, H, t, minFreq, maxFreq): """ Analysis/synthesis of a sound using the short-time fourier transform x: input array sound, w: analysis window, N: FFT size, H: hop size YSpec: The STFT of x (Only the half spectrum is stored)""" hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # Ceil of half analysis window size hM2 = int(math.floor(w.size / 2)) # Floor of half analysis window size pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - max(hM1, H) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yw = np.zeros(w.size) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window numFrames = int(math.floor(pend / float(H))) frmNum = 0 frmTime = [] firstBin = N * minFreq / float(fs) lastBin = N * maxFreq / float(fs) binFreq = np.arange(firstBin, lastBin) * float(fs) / N # The bin frequencies while pin < pend: # while sound pointer is smaller than last sample frmTime.append(pin / float(fs)) xw = x[pin - hM1 : pin + hM2] * w # window the input sound fftbuffer = np.zeros(N) # clean fft buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N - hM2 :] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies in dB pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) # detect locations of peaks pmag = mX[ploc] # get the magnitude of the peaks iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values by interpolation if frmNum == 0: # Accumulate and store STFT YSpec = np.transpose(np.array([mX[firstBin:lastBin]])) ind1 = np.where(iploc >= firstBin)[0] ind2 = np.where(iploc < lastBin)[0] ind = list((set(ind1.tolist()) & set(ind2.tolist()))) final_peaks = iploc[ind] parray = np.zeros([final_peaks.size, 2]) parray[:, 0] = pin / float(fs) parray[:, 1] = final_peaks * float(fs) / N specPeaks = parray else: YSpec = np.hstack((YSpec, np.transpose(np.array([mX[firstBin:lastBin]])))) ind1 = np.where(iploc >= firstBin)[0] ind2 = np.where(iploc < lastBin)[0] ind = list((set(ind1.tolist()) & set(ind2.tolist()))) final_peaks = iploc[ind] parray = np.zeros([final_peaks.size, 2]) parray[:, 0] = pin / float(fs) parray[:, 1] = final_peaks * float(fs) / N specPeaks = np.append(specPeaks, parray, axis=0) pin += H frmNum += 1 frmTime = np.array(frmTime) # The time at the centre of the frames plt.hold(True) plt.pcolormesh(frmTime, binFreq, YSpec) plt.scatter(specPeaks[:, 0] + (0.5 * H / float(fs)), specPeaks[:, 1], s=10, marker="x") plt.xlabel("Time(s)") plt.ylabel("Frequency(Hz)") plt.autoscale(tight=True) plt.show() return YSpec
def sps(x, fs, w, N, t, maxnS, stocf) : # Analysis/synthesis of a sound using the sinusoidal plus stochastic model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # maxnS: maximum number of sinusoids, # stocf: decimation factor of mag spectrum for stochastic analysis # y: output sound, yh: harmonic component, ys: stochastic component freq_range = 10000 # fs/2 by default hN = N/2 # size of positive spectrum hM = (w.size+1)/2 # half analysis window size Ns = 256 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sine sound frame ysw = np.zeros(Ns) # initialize output residual sound frame yh = np.zeros(x.size) # initialize output sine component ys = np.zeros(x.size) # initialize output residual component w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic lastysloc = np.zeros(maxnS) # initialize synthesis harmonic locations ysphase = 2*np.pi * np.random.rand(maxnS) # initialize synthesis harmonic phases fridx = 0 # frame pointer isInitFrame = True # True for frames equivalent to initial frame (for synth part) lastnS = 0 # it doesnot harm to initialize this variable with 0. #-----initialize plots----- clip_in = 0.0 # samples to clip input/output signal clip_spec = 0.0 # number of frames to clip spectrogram freq = np.arange(0, freq_range, fs/N) # frequency axis in Hz freq = freq[:freq.size-1] time = np.arange(0, np.float32(x.size)/fs, 1.0/fs) # time axis in seconds n_frame = 0 n_bins = freq.size specgram = np.ones((n_bins, pend/H)) * -200 # initialize spectrogram prev_peaks_loc = np.zeros(maxnS) # harmonic trajectories fig = plt.figure(figsize = (10.5, 7.1), dpi = 100) ax0 = plt.subplot2grid((8,6), (0, 0), colspan = 6) ax0.set_position([0.04, 0.955, 0.92, 0.015]) ax0.set_title("timeline", size = 7, fontweight = 'bold') ax0.yaxis.set_ticks([]) # no y axis ticks ax0.xaxis.set_ticks([0, np.float32(x.size)/fs]) ax0.set_xticklabels(['0 s', '%.2f' % (np.float32(x.size)/fs) + ' s']) ax0.set_xlim(0, np.float32(x.size)/fs) ax0.plot(time, np.zeros(x.size), lw = 1.5) plt.tick_params(axis = 'both', labelsize = 8) rect_zoom = patches.Rectangle((0, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2) ax0.add_patch(rect_zoom) ax1 = plt.subplot2grid((8, 6), (1, 0), colspan = 6) ax1.set_position([0.04, 0.87, 0.92, 0.05]) ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold') ax1.locator_params(axis = 'y', nbins = 5) ax1.set_xlim(0, (80.0*H)/fs) ax1.set_ylim(x.min(), x.max()) plt.tick_params(axis = 'both', labelsize = 8) plt.setp(ax1.get_xticklabels(), visible = False) ax1.plot(time[:80*H], x[:80*H], 'b') ax2 = plt.subplot2grid((8, 6), (2, 0), colspan = 6, sharex = ax1, sharey = ax1) ax2.set_position([0.04, 0.79, 0.92, 0.05]) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.set_xlim(0, (80.0*H)/fs) ax2.set_ylim(x.min(), x.max()) plt.tick_params(axis = 'both', labelsize = 8) ax3 = plt.subplot2grid((8, 6), (3, 0), rowspan = 2, colspan = 3) ax3.set_position([0.06, 0.52, 0.42, 0.21]) ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold') ax3.set_xlabel("Frequency (Hz)", size = 8) ax3.set_ylabel("Amplitude (dB)", size = 8) ax3.set_xlim(0, freq_range) ax3.set_ylim(-100, 0) plt.tick_params(axis = 'both', labelsize = 8) ax4 = plt.subplot2grid((8, 6), (3, 4), rowspan = 2, colspan = 3, sharex = ax3, sharey = ax3) ax4.set_position([0.55, 0.52, 0.42, 0.21]) ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold') ax4.set_xlabel("Frequency (Hz)", size = 8) ax4.set_ylabel("Amplitude (dB)", size = 8) ax4.set_xlim(0, freq_range) ax4.set_ylim(-100, 0) plt.tick_params(axis = 'both', labelsize = 8) ax5 = plt.subplot2grid((8, 6), (7, 1), rowspan = 2, colspan = 4) ax5.set_position([0.05, 0.03, 0.92, 0.42]) ax5.set_title("Peak tracking", size = 9, fontweight = 'bold') ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20) ax5.set_ylabel("Frequency (Hz)", size = 8) ax5.set_xlim(0, 80) ax5.set_ylim(0, freq_range) ax5.ticklabel_format(axis = 'y', scilimits = (-2, 2)) # use scientific limits above 1e2 plt.tick_params(axis = 'both', labelsize = 8) while pin<pend: if fridx==0 or lastnS==0 : # whenever lastnS is zero implies frame is equivalent to initial frame isInitFrame = True #-----analysis----- xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values smag = np.sort(ipmag)[::-1] # sort peaks by magnitude in descending order I = np.argsort(ipmag)[::-1] nS = min(maxnS, np.where(smag>t)[0].size) # get peaks above threshold sloc = iploc[I[:nS]] sphase = ipphase[I[:nS]] if isInitFrame : # update last frame data lastnS = nS lastsloc = sloc lastsmag = smag lastsphase = sphase peaks_loc = np.float32(sloc)/N*fs sloc = (sloc!=0) * (sloc*Ns/N) # peak locations for synthesis lastidx = np.zeros(nS, dtype = int) for i in range(0, nS) : # find closest peak to create trajectories idx = np.argmin(abs(sloc[i] - lastsloc[:lastnS])) lastidx[i] = idx ri = pin-hNs # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(sloc, smag, sphase, Ns) # generate sines Xr = X2-Xh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf #-----synthesis data----- ysloc = sloc # synthesis harmonics locs ysmag = smag[:nS] # synthesis harmonic amplitudes mYrenv = mXrenv # synthesis residual envelope #-----transformations----- #-----synthesis----- if isInitFrame : # Variables need to be initialized like for the first frame lastysloc = np.zeros(maxnS) # initialize synthesis harmonic locations ysphase = 2*np.pi * np.random.rand(maxnS) # initialize synthesis harmonic phases lastysphase = ysphase # phase for first frame if nS>lastnS : # initialize peaks that start lastysphase = np.concatenate((lastysphase, np.zeros(nS-lastnS))) lastysloc = np.concatenate((lastysloc, np.zeros(nS-lastnS))) ysphase = lastysphase[lastidx] + 2*np.pi*(lastysloc[lastidx]+ysloc)/2/Ns*H # propagate phases lastysloc = ysloc lastysphase = ysphase lastnS = nS # update last frame data lastsloc = sloc # update last frame data lastsmag = smag # update last frame data lastsphase = sphase # update last frame data Yh = GS.genSpecSines(ysloc, ysmag, ysphase, Ns) # generate spec sines mYs = resample(mYrenv, hNs) # interpolate to original size pYs = 2*np.pi*np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype = complex) Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs) # generate positive freq. Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) yhw[:hNs-1] = fftbuffer[hNs+1:] # sines in time domain using IFFT yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Ys) ) ysw[:hNs-1] = fftbuffer[hNs+1:] # stochastic in time domain using IFFT ysw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines ys[ri:ri+Ns] += sws*ysw # overlap-add for stoch #-----plotting------- # if n_frame > 1130 : # clear all plots if pin > ax1.get_xlim()[1]*fs - (5.0*H) : clip_in = np.float32(pin) - 50.0*H clip_spec = pin/H - 50.0 rect_zoom.remove() rect_zoom = patches.Rectangle((clip_in/fs, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2) ax0.add_patch(rect_zoom) ax1.cla() ax1.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax1.set_ylim(x.min(), x.max()) ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold') ax1.locator_params(axis = 'y', nbins = 5) plt.setp(ax1.get_xticklabels(), visible = False) ax1.plot(time[:clip_in+80*H], x[:clip_in+80*H], 'b') ax2.cla() ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.locator_params(axis = 'y', nbins = 5) ax2.plot(time[:ri], yh[:ri], 'b') ax5.set_xlim(clip_spec, clip_spec+80) ax3.cla() ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold') ax3.set_xlabel("Frequency (Hz)", size = 8) ax3.set_ylabel("Amplitude (dB)", size = 8) ax3.set_xlim(0, freq_range) ax3.set_ylim(-100, 0) ax4.cla() ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold') ax4.set_xlabel("Frequency (Hz)", size = 8) ax4.set_ylabel("Amplitude (dB)", size = 8) ax4.set_xlim(0, freq_range) ax4.set_ylim(-100, 0) rect = patches.Rectangle((np.float32(pin-hM)/fs, -2**7), width = np.float32(w.size)/fs, height = 2**15, color = 'blue', alpha = 0.5) ax1.add_patch(rect) # plt.draw() # plot the sample ax3.plot(freq, mX[:n_bins], 'b') # plot spectrum ax3.fill_between(freq, -200, mX[:n_bins], facecolor = 'blue', alpha = 0.3) # plt.draw() ax3.plot(np.float32(iploc[:n_bins])/N*fs, ipmag[:n_bins], 'rx', ms = 3) # plot interpolated peak locations # plt.draw() ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20) # plt.draw() ax3.plot(peaks_loc, smag[:nS], 'o', ms = 3, mfc = 'yellow') # plot harmonics for i in range(0, nS): ax5.plot([n_frame-0.5, n_frame+0.5], [prev_peaks_loc[lastidx[i]], peaks_loc[i]], '-og', ms = 2.5, mfc = 'yellow', lw = 1.3) prev_peaks_loc = peaks_loc # plt.draw() mX2 = 20 * np.log10( abs(X2[:hNs]) ) # magnitude spectrum of positive frequencies mX2 = resample(np.maximum(-200, mX2), hN) ax4.plot(freq[:n_bins], mX2[:n_bins], 'b', alpha = 0.3) ax4.fill_between(freq[:n_bins], -200, mX2[:n_bins], facecolor = 'blue', alpha = 0.1) # plt.draw() mXh = 20 * np.log10( abs(Xh[:hNs]) ) # magnitude spectrum of positive frequencies mXh = resample(np.maximum(-200, mXh), hN) ax4.plot(freq[:n_bins], mXh[:n_bins], 'g') ax4.fill_between(freq[:n_bins], -200, mXh[:n_bins], facecolor = 'green', alpha = 0.4) # plt.draw() mXr = resample(np.maximum(-200, mXr), hN) ax4.plot(freq[:n_bins], mXr[:n_bins], 'r', alpha = 0.3) ax4.fill_between(freq[:n_bins], -200, mXr[:n_bins], facecolor = 'red', alpha = 0.1) # plt.draw() rect2 = patches.Rectangle((np.float32(ri)/fs, -2**7), width = np.float32(Ns)/fs, height = 2**15, color = 'green', alpha = 0.3) ax2.cla() ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.locator_params(axis = 'y', nbins = 5) ax2.add_patch(rect2) ax2.plot(time[:ri+Ns], yh[:ri+Ns], 'b') plt.draw() rect2.remove() rect.remove() n_frame += 1 pin += H # advance sound pointer fridx += 1 # advance frame pointer isInitFrame = False # variable meaningful only for current frame, # therefore False at each frame y = yh+ys return y, yh, ys
def spsTimescale(x, fs, w, N, t, maxnS, stocf) : # Analysis/synthesis of a sound using the sinusoidal plus stochastic model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # maxnS: maximum number of sinusoids, # stocf: decimation factor of mag spectrum for stochastic analysis # y: output sound, yh: harmonic component, ys: stochastic component hN = N/2 # size of positive spectrum hM = (w.size+1)/2 # half analysis window size Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM) # initialize sound pointer in middle of analysis window fftbuffer = np.zeros(N) # initialize buffer for FFT tm = np.arange(0.01, 0.94, 0.01) in_time = np.concatenate( (np.array([0]), tm+0.05*np.sin(8.6*np.pi*tm), np.array([1])) ) # input time --> keep end value out_time = np.concatenate( (np.array([0]), tm , np.array([1])) ) # output time timemapping = np.asarray( (in_time, out_time) ) # timemapping = np.array( [[0, 1], [0, 2]] ) # input time (sec), output time (sec) timemapping = timemapping * x.size/fs outsoundlength = round(timemapping[1, -1]*fs) # length of output sound pend = outsoundlength - max(hNs, hM) # last sample to start a frame yhw = np.zeros(Ns) # initialize output sine sound frame ysw = np.zeros(Ns) # initialize output residual sound frame yh = np.zeros(outsoundlength) # initialize output sine component ys = np.zeros(outsoundlength) # initialize output residual component w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic lastysloc = np.zeros(maxnS) # initialize synthesis harmonic locations ysphase = 2*np.pi * np.random.rand(maxnS) # initialize synthesis harmonic phases minpin = max(hNs, hM) maxpin = x.size - max(hNs,hM) fridx = 0 # frame pointer isInitFrame = True # True for frames equivalent to initial frame (for synth part) lastnS = 0 # it doesnot harm to initialize this variable with 0. pout = pin while pout<pend: if fridx==0 or lastnS==0 : # whenever lastnS is zero implies frame is equivalent to initial frame isInitFrame = True pin = round(np.interp(np.float(pout)/fs, timemapping[1,:],timemapping[0,:]) * fs ) pin = max(minpin, pin) pin = min(maxpin, pin) #-----analysis----- xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values smag = np.sort(ipmag)[::-1] # sort peaks by magnitude in descending order I = np.argsort(ipmag)[::-1] nS = min(maxnS, np.where(smag>t)[0].size) # get peaks above threshold sloc = iploc[I[:nS]] sphase = ipphase[I[:nS]] if isInitFrame : # update last frame data lastnS = nS lastsloc = sloc lastsmag = smag lastsphase = sphase sloc = (sloc!=0) * (sloc*Ns/N) # peak locations for synthesis lastidx = np.zeros(nS, dtype = int) for i in range(0, nS) : # find closest peak to create trajectories idx = np.argmin(abs(sloc[i] - lastsloc[:lastnS])) lastidx[i] = idx ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(sloc, smag, sphase, Ns) # generate sines Xr = X2-Xh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf #-----synthesis data----- ysloc = sloc # synthesis harmonics locs ysmag = smag[:nS] # synthesis harmonic amplitudes mYrenv = mXrenv # synthesis residual envelope #-----transformations----- #-----synthesis----- if isInitFrame : # Variables need to be initialized like for the first frame lastysloc = np.zeros(maxnS) # initialize synthesis harmonic locations ysphase = 2*np.pi * np.random.rand(maxnS) # initialize synthesis harmonic phases lastysphase = ysphase # phase for first frame if nS>lastnS : # initialize peaks that start lastysphase = np.concatenate((lastysphase, np.zeros(nS-lastnS))) lastysloc = np.concatenate((lastysloc, np.zeros(nS-lastnS))) ysphase = lastysphase[lastidx] + 2*np.pi*(lastysloc[lastidx]+ysloc)/2/Ns*H # propagate phases lastysloc = ysloc lastysphase = ysphase lastnS = nS # update last frame data lastsloc = sloc # update last frame data lastsmag = smag # update last frame data lastsphase = sphase # update last frame data Yh = GS.genSpecSines(ysloc, ysmag, ysphase, Ns) # generate spec sines mYs = resample(mYrenv, hNs) # interpolate to original size pYs = 2*np.pi*np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype = complex) Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs) # generate positive freq. Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) yhw[:hNs-1] = fftbuffer[hNs+1:] # sines in time domain using IFFT yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Ys) ) ysw[:hNs-1] = fftbuffer[hNs+1:] # stochastic in time domain using IFFT ysw[hNs-1:] = fftbuffer[:hNs+1] ro = pout-hNs # output sound pointer for overlap yh[ro:ro+Ns] += sw*yhw # overlap-add for sines ys[ro:ro+Ns] += sws*ysw # overlap-add for stochastic pout += H # advance sound pointer fridx += 1 # advance frame pointer isInitFrame = False # variable meaningful only for current frame, # therefore False at each frame y = yh+ys return y, yh, ys
def run(self): # Analysis/synthesis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # stocf: decimation factor of mag spectrum for stochastic analysis # y: output sound, yh: harmonic component, ys: stochastic component # initialize variables x = self.x fs = self.fs w = self.w N = self.N t = self.t nH = self.nH minf0 = self.minf0 maxf0 = self.maxf0 f0et = self.f0et maxhd = self.maxhd stocf = self.stocf plot = self.plot process = self.process step = self.step nFrameStart = self.nFrameStart freq_range = 10000 # fs/2 by default hN = N/2 # size of positive spectrum hM = (w.size+1)/2 # half analysis window size Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ysw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array ys = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic lastyhloc = np.zeros(nH) # initialize synthesis harmonic locations yhphase = 2*np.pi * np.random.rand(nH) # initialize synthesis harmonic phases n_frame = 0 # initialize number of frames counter if plot: #-----initialize plots----- plt.ion() # activate interactive mode clip_in = 0.0 # samples to clip input/output signal clip_spec = 0.0 # number of frames to clip spectrogram freq = np.arange(0, freq_range, fs/N) # frequency axis in Hz freq = freq[:freq.size-1] time = np.arange(0, np.float32(x.size)/fs, 1.0/fs) # time axis in seconds n_bins = freq.size # number of total bins in the freq_range specgram = np.ones((n_bins, pend/H)) * -200 # initialize spectrogram prev_harmonics = np.zeros(nH-1) # previous harmonics to create harmonic trajectories prev_f0 = 0 # previous f0 to create f0 trajectory fig = plt.figure(figsize = (10.5, 7.1), dpi = 100) ax0 = plt.subplot2grid((8,6), (0, 0), colspan = 6) ax0.set_position([0.04, 0.955, 0.92, 0.015]) ax0.set_title("timeline", size = 7, fontweight = 'bold') ax0.yaxis.set_ticks([]) # no y axis ticks ax0.xaxis.set_ticks([0, np.float32(x.size)/fs]) # set only two ticks in the limits of the plot ax0.set_xticklabels(['0 s', '%.2f' % (np.float32(x.size)/fs) + ' s']) ax0.set_xlim(0, np.float32(x.size)/fs) ax0.plot(time, np.zeros(x.size), lw = 1.5) plt.tick_params(axis = 'both', labelsize = 8) rect_zoom = patches.Rectangle((0, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2) ax0.add_patch(rect_zoom) ax1 = plt.subplot2grid((8, 6), (1, 0), colspan = 6) ax1.set_position([0.04, 0.87, 0.92, 0.05]) ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold') ax1.locator_params(axis = 'y', nbins = 5) ax1.set_xlim(0, (80.0*H)/fs) ax1.set_ylim(x.min(), x.max()) plt.tick_params(axis = 'both', labelsize = 8) plt.setp(ax1.get_xticklabels(), visible = False) ax1.plot(time[:80*H], x[:80*H], 'b') ax2 = plt.subplot2grid((8, 6), (2, 0), colspan = 6, sharex = ax1, sharey = ax1) ax2.set_position([0.04, 0.79, 0.92, 0.05]) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.set_xlim(0, (80.0*H)/fs) ax2.set_ylim(x.min(), x.max()) plt.tick_params(axis = 'both', labelsize = 8) ax3 = plt.subplot2grid((8, 6), (3, 0), rowspan = 2, colspan = 3) ax3.set_position([0.06, 0.52, 0.42, 0.21]) ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold') ax3.set_xlabel("Frequency (Hz)", size = 8) ax3.set_ylabel("Amplitude (dB)", size = 8) ax3.set_xlim(0, freq_range) ax3.set_ylim(-100, 0) plt.tick_params(axis = 'both', labelsize = 8) ax4 = plt.subplot2grid((8, 6), (3, 4), rowspan = 2, colspan = 3, sharex = ax3, sharey = ax3) ax4.set_position([0.55, 0.52, 0.42, 0.21]) ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold') ax4.set_xlabel("Frequency (Hz)", size = 8) ax4.set_ylabel("Amplitude (dB)", size = 8) ax4.set_xlim(0, freq_range) ax4.set_ylim(-100, 0) plt.tick_params(axis = 'both', labelsize = 8) ax5 = plt.subplot2grid((8, 6), (7, 1), rowspan = 2, colspan = 4) ax5.set_position([0.05, 0.03, 0.92, 0.42]) ax5.set_title("Peak tracking", size = 9, fontweight = 'bold') ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20) ax5.set_ylabel("Frequency (Hz)", size = 8) ax5.set_xlim(0, 80) ax5.set_ylim(0, freq_range) ax5.ticklabel_format(axis = 'y', scilimits = (-2, 2)) # use scientific limits above 1e2 plt.tick_params(axis = 'both', labelsize = 8) while pin<pend: #-----analysis----- xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values if plot: specgram[:, n_frame] = mX[n_bins-1::-1] f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0) * (f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index harmonics = np.float32(hloc)/N*fs hloc = (hloc!=0) * (hloc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate sines Xr = X2-Xh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf #-----synthesis data----- yhloc = hloc # synthesis harmonics locs yhmag = hmag # synthesis harmonic amplitudes mYrenv = mXrenv # synthesis residual envelope yf0 = f0 #-----transformations----- #-----synthesis----- yhphase += 2*np.pi * (lastyhloc+yhloc)/2/Ns*H # propagate phases lastyhloc = yhloc Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns) # generate spec sines mYs = resample(mYrenv, hNs) # interpolate to original size mYs = 10**(mYs/20) # dB to linear magnitude if f0>0: mYs *= np.cos(np.pi*np.arange(0, hNs)/Ns*fs/yf0)**2 # filter residual fc = 1+round(500.0/fs*Ns) # 500 Hz mYs[:fc] *= (np.arange(0, fc)/(fc-1))**2 # HPF pYs = 2*np.pi * np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype = complex) Ys[:hNs] = mYs * np.exp(1j*pYs) # generate positive freq. Ys[hNs+1:] = mYs[:0:-1] * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) yhw[:hNs-1] = fftbuffer[hNs+1:] # sines in time domain using IFFT yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Ys) ) ysw[:hNs-1] = fftbuffer[hNs+1:] # stochastic in time domain using IFFT ysw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines ys[ri:ri+Ns] += sws*ysw # overlap-add for stoch #-----plotting------- if plot and n_frame>=nFrameStart and (n_frame%step == 0 or (pin+H)>pend): # clear all plots # clear only if not enough space to plot if pin > ax1.get_xlim()[1]*fs - (5.0*H) : clip_in = np.float32(pin) - 50.0*H clip_spec = pin/H - 50.0 rect_zoom.remove() rect_zoom = patches.Rectangle((clip_in/fs, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2) ax0.add_patch(rect_zoom) ax1.cla() ax1.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax1.set_ylim(x.min(), x.max()) ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold') ax1.locator_params(axis = 'y', nbins = 5) plt.setp(ax1.get_xticklabels(), visible = False) ax1.plot(time[:clip_in+80*H], x[:clip_in+80*H], 'b') ax2.cla() ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.locator_params(axis = 'y', nbins = 5) ax2.plot(time[:ri], yh[:ri], 'b') ax5.set_xlim(clip_spec, clip_spec+80) ax3.cla() ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold') ax3.set_xlabel("Frequency (Hz)", size = 8) ax3.set_ylabel("Amplitude (dB)", size = 8) ax3.set_xlim(0, freq_range) ax3.set_ylim(-100, 0) ax4.cla() ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold') ax4.set_xlabel("Frequency (Hz)", size = 8) ax4.set_ylabel("Amplitude (dB)", size = 8) ax4.set_xlim(0, freq_range) ax4.set_ylim(-100, 0) # plot all the information of the current sample rect = patches.Rectangle((np.float32(pin-hM)/fs, -2**7), width = np.float32(w.size)/fs, height = 2**15, color = 'blue', alpha = 0.5) ax1.add_patch(rect) if process: plt.draw() ax3.plot(freq, mX[:n_bins], 'b') # plot spectrum ax3.fill_between(freq, -200, mX[:n_bins], facecolor = 'blue', alpha = 0.3) if process: plt.draw() ax3.plot(np.float32(iploc[:n_bins])/N*fs, ipmag[:n_bins], 'rx', ms = 3) # plot interpolated peak locations if process: plt.draw() ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20) if process: plt.draw() if f0 > 0: # plot f0 loc = np.where(iploc/N*fs == f0)[0] if loc.size == 0: loc = np.argmin(np.abs(iploc/N*fs-f0)) # closest peak location ax3.plot(f0, ipmag[loc], 'go', ms = 4) # plot in spectrum if prev_f0 != 0 and f0 != 0: # plot in spectrogram ax5.plot([n_frame-0.5, n_frame+0.5], [prev_f0, f0], '-or', ms = 3, mfc = 'green', lw = 1.6) elif prev_f0 == 0 and f0 != 0: # initialize new line of f0's ax5.plot(n_frame+0.5, f0, 'or', ms = 3, mfc = 'green') if process: plt.draw() if step == 1: prev_f0 = f0 # save prev. f0 only if we are not rewinding plots if f0 > 0: ax3.plot(harmonics[1:], hmag[1:], 'o', ms = 3, mfc = 'yellow') # plot harmonics for i in range(1, nH-1): if prev_harmonics[i] != 0 and harmonics[i] != 0: ax5.plot([n_frame-0.5, n_frame+0.5], [prev_harmonics[i], harmonics[i]], '-og', ms = 2.5, mfc = 'yellow', lw = 1.3) elif prev_harmonics[i] == 0 and harmonics[i] != 0: # initialize new line of harmonics ax5.plot(n_frame+0.5, harmonics[i], 'og', ms = 2.5, mfc = 'yellow') if process: plt.draw() if step == 1: prev_harmonics = harmonics # save prev. harmonics only if we are not rewinding plots mX2 = 20 * np.log10( abs(X2[:hNs]) ) # magnitude spectrum of positive frequencies mX2 = resample(np.maximum(-200, mX2), hN) ax4.plot(freq[:n_bins], mX2[:n_bins], 'b', alpha = 0.3) ax4.fill_between(freq[:n_bins], -200, mX2[:n_bins], facecolor = 'blue', alpha = 0.1) if process: plt.draw() mXh = 20 * np.log10( abs(Xh[:hNs]) ) # magnitude spectrum of positive frequencies mXh = resample(np.maximum(-200, mXh), hN) ax4.plot(freq[:n_bins], mXh[:n_bins], 'g') ax4.fill_between(freq[:n_bins], -200, mXh[:n_bins], facecolor = 'green', alpha = 0.4) if process: plt.draw() mXr = resample(np.maximum(-200, mXr), hN) ax4.plot(freq[:n_bins], mXr[:n_bins], 'r', alpha = 0.3) ax4.fill_between(freq[:n_bins], -200, mXr[:n_bins], facecolor = 'red', alpha = 0.1) if process: plt.draw() rect2 = patches.Rectangle((np.float32(ri)/fs, -2**7), width = np.float32(Ns)/fs, height = 2**15, color = 'green', alpha = 0.3) ax2.cla() ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold') ax2.locator_params(axis = 'y', nbins = 5) ax2.add_patch(rect2) ax2.plot(time[:ri+Ns], yh[:ri+Ns], 'b') plt.draw() rect2.remove() rect.remove() n_frame += 1 # increment number of frames analyzed pin += H # advance sound pointer self.emit(SIGNAL("hpsDone(object, object, object, int)"), y, yh, ys, fs)
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxnpeaksTwm=10): # Analysis/synthesis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # stocf: decimation factor of mag spectrum for stochastic analysis # maxnpeaksTwm: maximum number of peaks used for F0 detection # y: output sound, yh: harmonic component, yr: residual component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0, maxnpeaksTwm) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size; # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns) # generate spec sines of harmonic component Yr = Xr-Yh; # get the residual complex spectrum mYr = 20 * np.log10(abs(Yr[:hNs]) ) # magnitude spectrum of residual mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf) # decimate the magnitude spectrum and avoid -Inf mYst = resample(mYrenv, hNs) # interpolate to original size mYst = 10**(mYst/20) # dB to linear magnitude fc = 1+round(500.0/fs*Ns) # 500 Hz to bin location mYst[:fc] *= (np.arange(0, fc)/(fc-1))**2 # high pass filter the stochastic component pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = mYst * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of residual spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sw*ystw # overlap-add for residual pin += H # advance sound pointer y = yh+yst # sum of harmonic and residual components return y, yh, yst
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd): # Analysis/synthesis of a sound using the harmonic plus residual model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # y: output sound, yh: harmonic component, yr: residual component hN = N/2 # size of positive spectrum hM = (w.size+1)/2 # half analysis window size Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame yrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H); # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM:pin+hM-1] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM] = xw[hM-1:] # zero-phase window in fftbuffer fftbuffer[N-hM+1:] = xw[:hM-1] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size; # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns) # generate spec sines Yr = Xr-Yh; # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) # inverse FFT yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yr) ) yrw[:hNs-1] = fftbuffer[hNs+1:] # residual in time domain using inverse FFT yrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yr[ri:ri+Ns] += sw*yrw # overlap-add for residual pin += H # advance sound pointer y = yh+yr return y, yh, yr
def harmonicModelPlot(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, maxFreq): hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 4000 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window numFrames = int(math.floor(pend/float(H))) frmNum = 0 frmTime = [] lastBin = N*maxFreq/float(fs) binFreq = np.arange(lastBin)*float(fs)/N # The bin frequencies while pin<pend: # while sound pointer is smaller than last sample frmTime.append(pin/float(fs)) xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) # detect peak locations pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index if frmNum == 0: # Accumulate and store STFT YSpec = np.transpose(np.array([mX[:lastBin]])) ind1 = np.where(hloc>0)[0] ind2 = np.where(hloc<=lastBin)[0] ind = list((set(ind1.tolist())&set(ind2.tolist()))) final_peaks = hloc[ind] parray = np.zeros([final_peaks.size,2]) parray[:,0]=pin/float(fs) parray[:,1]=final_peaks*float(fs)/N specPeaks = parray else: YSpec = np.hstack((YSpec,np.transpose(np.array([mX[:lastBin]])))) ind1 = np.where(hloc>0)[0] ind2 = np.where(hloc<=lastBin)[0] ind = list((set(ind1.tolist())&set(ind2.tolist()))) final_peaks = hloc[ind] parray = np.zeros([final_peaks.size,2]) parray[:,0]=pin/float(fs) parray[:,1]=final_peaks*float(fs)/N specPeaks = np.append(specPeaks, parray,axis=0) pin += H frmNum += 1 frmTime = np.array(frmTime) # The time at the centre of the frames plt.hold(True) plt.pcolormesh(frmTime,binFreq,YSpec) plt.scatter(specPeaks[:,0]+(0.5*H/float(fs)), specPeaks[:,1], s=10, marker='x') plt.xlabel('Time(s)') plt.ylabel('Frequency(Hz)') plt.autoscale(tight=True) plt.show() return YSpec
def hps(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxnpeaksTwm=10): # Analysis/synthesis of a sound using the harmonic plus stochastic model, prepared for transformations # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # stocf: decimation factor of mag spectrum for stochastic analysis # maxnpeaksTwm: maximum number of peaks used for F0 detection # y: output sound, yh: harmonic component, ys: stochastic component hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 # half of FFT size for synthesis pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ysw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array ys = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H : hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H : hNs + H] = sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H] sws = H * hanning(Ns) / 2 # synthesis window for stochastic lastyhloc = np.zeros(nH) # initialize synthesis harmonic locations yhphase = 2 * np.pi * np.random.rand(nH) # initialize synthesis harmonic phases while pin < pend: # -----analysis----- xw = x[pin - hM1 : pin + hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N - hM2 :] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) # detect spectral peaks pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH) - 100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0 > 0) * (f0 * np.arange(1, nH + 1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0 > 0 and hi < nH and hf[hi] < fs / 2: # find harmonic peaks dev = min(abs(iploc / N * fs - hf[hi])) pei = np.argmin(abs(iploc / N * fs - hf[hi])) # closest peak if (hi == 0 or not any(hloc[:hi] == iploc[pei])) and dev < maxhd * hf[hi]: hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index hloc[:hi] = (hloc[:hi] != 0) * (hloc[:hi] * Ns / N) # synth. locs ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri : ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis Xh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate sines Xr = X2 - Xh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf # -----synthesis data----- yhloc = hloc # synthesis harmonics locs yhmag = hmag # synthesis harmonic amplitudes mYrenv = mXrenv # synthesis residual envelope yf0 = f0 # synthesis fundamental frequency # ------transformations---- # -----clarinet effect, only odd harmonics----- # yhloc[1::2] = 0 # set even harmonic to 0 magnitude # -----pitch discretization to temperate scale----- # if f0>0: # nst = round(12*np.log2(f0/55)) # closest semitone # discpitch = 55*2**(nst/12) # discretized pitch # fscale = discpitch/f0 # pitch transposition factor # yhloc = yhloc*fscale # all harmonic corrected to discretized pitch # -----pitch transposition with timbre preseervation ----- fscale = 0.5 # scale factor for pitch transposition ind_valid = np.where(yhloc != 0)[0] # using only those harmonic indices which have non zero frequency values if f0 > 0: x_vals = np.append( np.append(0, yhloc[ind_valid]), hNs ) # values of peak locations to be considered for interpolation y_vals = np.append( np.append(yhmag[0], yhmag[ind_valid]), yhmag[-1] ) # values of peak magnitudes to be considered for interpolation specEnvelope = interp1d(x_vals, y_vals, kind="linear", bounds_error=False, fill_value=-100) yhloc = yhloc * fscale yhmag[ind_valid] = specEnvelope(yhloc[ind_valid]) # ----- Pitch transposition, Vibrato and tremolo with timbre preseervation ----- # vtf = 5.0; # vibrato-tremolo frequency in Hz # vd = 50; # vibrato depth in cents # td = 3; # tremolo depth in dB # fscale = 1 # scale factor for pitch transposition # modf = np.sin(2.0*np.pi*vtf*pin/fs) # modulation factor for both vibrato and tremolo (which has to be scaled later) # sfscale = fscale*(2.0**(vd/1200.0*modf)) # affective scale factor together with vibrato affect # idx = np.where(yhloc!=0)[0] # using only those harmonic indices which have non zero frequency values # if (f0>0): # x_vals = np.append(np.append(0, yhloc[idx]),hNs) # values of peak locations to be considered for interpolation # y_vals = np.append(np.append(yhmag[0], yhmag[idx]),yhmag[-1]) # values of peak magnitudes to be considered for interpolation # specEnvelope = interp1d(x_vals, y_vals, kind = 'linear',bounds_error=False, fill_value=-100) # yhloc = yhloc*sfscale # yhmag[idx] = specEnvelope(yhloc[idx]) # yhmag[idx] = yhmag[idx] + td*modf # tremolo # -----synthesis----- yhphase += 2 * np.pi * (lastyhloc + yhloc) / 2 / Ns * H # propagate phases lastyhloc = yhloc Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns) # generate spec sines mYs = resample(mYrenv, hNs) # interpolate to original size mYs = 10 ** (mYs / 20) # dB to linear magnitude if f0 > 0: mYs *= np.cos(np.pi * np.arange(0, hNs) / Ns * fs / yf0) ** 2 # filter residual fc = 1 + round(500.0 / fs * Ns) # 500 Hz mYs[:fc] *= (np.arange(0, fc) / (fc - 1)) ** 2 # HPF pYs = 2 * np.pi * np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype=complex) Ys[:hNs] = mYs * np.exp(1j * pYs) # generate positive freq. Ys[hNs + 1 :] = mYs[:0:-1] * np.exp(-1j * pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zer-phase window yhw[hNs - 1 :] = fftbuffer[: hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of stochastic approximation spectrum ysw[: hNs - 1] = fftbuffer[hNs + 1 :] # undo zero-phase window ysw[hNs - 1 :] = fftbuffer[: hNs + 1] yh[ri : ri + Ns] += sw * yhw # overlap-add for sines ys[ri : ri + Ns] += sws * ysw # overlap-add for stoch pin += H # advance sound pointer y = yh + ys # sum harmonic and stochastic components return y, yh, ys
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf) : # Analysis/synthesis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window (odd size), # N: FFT size (minimum 512), t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # maxhd: max. relative deviation in harmonic detection (ex: .2) # stocf: decimation factor of mag spectrum for stochastic analysis # y: output sound, yh: harmonic component, ys: stochastic component x = np.float32(x) / (2**15) # normalize input signal fig = plt.figure(figsize = (10.5, 6.5), dpi = 100) ax1 = plt.subplot2grid((4, 6), (0, 1), colspan = 4) ax1.set_position([0.10, 0.77, 0.8, 0.16]) ax1.set_xlim(0, 10000) ax1.set_ylim(x.min(), x.max()) ax1.set_title("Input Signal") plt.setp(ax1.get_xticklabels(), visible = False) ax2 = plt.subplot2grid((4, 6), (1, 1), colspan = 4, sharex = ax1, sharey = ax1) ax2.set_position([0.10, 0.55, 0.8, 0.16]) ax2.set_xlim(0, 10000) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal") ax3 = plt.subplot2grid((4, 6), (2, 0), rowspan = 2, colspan = 2) ax3.set_position([0.05, 0.08, 0.35, 0.35]) ax3.set_title("Frame") ax3.set_xlim(0, w.size) # ax4 = plt.subplot2grid((4, 4), (2, 1), rowspan = 2) # plt.title("Windowed") ax5 = plt.subplot2grid((4, 6), (2, 3), rowspan = 2, colspan = 4) ax5.set_position([0.47, 0.08, 0.5, 0.35]) ax5.set_title("Spectrum") ax5.set_xlabel("Frequency (Hz)") ax5.set_ylabel("Amplitude (dB)") ax5.set_xlim(0, fs/2) hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ysw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array ys = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] sws = H*hanning(Ns)/2 # synthesis window for stochastic lastyhloc = np.zeros(nH) # initialize synthesis harmonic locations yhphase = 2*np.pi * np.random.rand(nH) # initialize synthesis harmonic phases ax1.plot(x[:10000]) plt.draw() while pin<pend: rect = patches.Rectangle((pin-hM1, -2**7), width = w.size, height = 2**15, color = 'red', alpha = 0.3) ax1.add_patch(rect) plt.draw() rect.remove() #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT ax3.cla() ax3.set_title("Frame") ax3.plot(x[pin-hM1:pin+hM2]) ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() ax3.set_ylim(ax3.get_ylim()) ax3.plot(w, 'r') plt.draw() ax3.cla() ax3.set_title("Windowed Frame") ax3.plot(xw, 'b') ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() ax3.cla() ax3.set_title("Windowed Frame zero-phase") ax3.plot(fftbuffer, 'b') ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() X = fft(fftbuffer) # compute FFT mX = 20 * np.log10( abs(X[:hN]) ) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap( np.angle(X[:hN]) ) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values freq = np.arange(0, fs/2, fs/N) # frequency axis in Hz freq = freq[:freq.size-1] ax5.cla() ax5.set_title("Spectrum") ax5.set_xlabel("Frequency (Hz)") ax5.set_ylabel("Amplitude (dB)") ax5.set_xlim(0, fs/2) ax5.plot(freq, mX, 'b') ax5.set_ylim(ax5.get_ylim()) ax5.fill_between(freq, ax5.get_ylim()[0], mX, facecolor = 'blue', alpha = 0.3) plt.draw() ax5.plot(np.float32(iploc)/N*fs, ipmag, 'ro', ms = 4, alpha = 0.4) plt.draw() f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0) # find f0 if f0 > 0: loc = np.where(iploc/N*fs == f0)[0] if loc.size == 0: loc = np.argmin(np.abs(iploc/N*fs-f0)) # closest peak location ax5.plot(f0, ipmag[loc], 'go', ms = 4, alpha = 1) plt.draw() hloc = np.zeros(nH) # initialize harmonic locations hmag = np.zeros(nH)-100 # initialize harmonic magnitudes hphase = np.zeros(nH) # initialize harmonic phases hf = (f0>0)*(f0*np.arange(1, nH+1)) # initialize harmonic frequencies hi = 0 # initialize harmonic index npeaks = ploc.size # number of peaks found while f0>0 and hi<nH and hf[hi]<fs/2 : # find harmonic peaks dev = min(abs(iploc/N*fs - hf[hi])) pei = np.argmin(abs(iploc/N*fs - hf[hi])) # closest peak if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] : hloc[hi] = iploc[pei] # harmonic locations hmag[hi] = ipmag[pei] # harmonic magnitudes hphase[hi] = ipphase[pei] # harmonic phases hi += 1 # increase harmonic index ax5.plot(np.float32(hloc)/N*fs, hmag, 'yo', ms = 4, alpha = 0.7) plt.draw() hloc = (hloc!=0) * (hloc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = GS.genSpecSines(hloc, hmag, hphase, Ns) # generate spec sines Xr = X2-Yh # get the residual complex spectrum mXr = 20 * np.log10( abs(Xr[:hNs]) ) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum mYs = resample(mXrenv, hNs) # interpolate to original size pYs = 2*np.pi*np.random.rand(hNs) # generate phase random values Ys = np.zeros(Ns, dtype = complex) Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs) # generate positive freq. Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Yh) ) # inverse FFT ax3.cla() ax3.set_title("Reconstructed Frame") ax3.plot(fftbuffer, 'g') ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] ax3.cla() ax3.set_title("Reconstructed Frame") ax3.plot(yhw, 'g') ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() fftbuffer = np.zeros(Ns) fftbuffer = np.real( ifft(Ys) ) ysw[:hNs-1] = fftbuffer[hNs+1:] # residual in time domain using inverse FFT ysw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines ys[ri:ri+Ns] += sws*ysw # overlap-add for stochastic pin += H # advance sound pointer ax3.cla() ax3.set_title("Reconstructed Frame") ax3.plot(sw*yhw, 'g') ax3.set_xlim(0, w.size) ax3.ticklabel_format(scilimits = (-3,3)) # use scientific limits above 1e3 plt.draw() rect2 = patches.Rectangle((pin-hM1, -2**7), width = Ns, height = 2**15, color = 'green', alpha = 0.3) ax2.cla() ax2.set_xlim(0, 10000) ax2.set_ylim(x.min(), x.max()) ax2.set_title("Output Signal") ax2.add_patch(rect2) ax2.plot(yh, 'b') plt.draw() rect2.remove() y = yh+ys return y, yh, ys
def sprModel(x, fs, w, N, t): # Analysis/synthesis of a sound using the sinusoidal plus residual model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size (minimum 512), t: threshold in negative dB, # y: output sound, ys: sinusoidal component, yr: residual component hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT ysw = np.zeros(Ns) # initialize output sound frame yrw = np.zeros(Ns) # initialize output sound frame ys = np.zeros(x.size) # initialize output array yr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] while pin<pend: #-----analysis----- xw = x[pin-hM1:pin+hM2] * w # window the input sound fftbuffer = np.zeros(N) # reset buffer fftbuffer[:hM1] = xw[hM2:] # zero-phase window in fftbuffer fftbuffer[N-hM2:] = xw[:hM2] X = fft(fftbuffer) # compute FFT mX = 20 * np.log10(abs(X[:hN])) # magnitude spectrum of positive frequencies ploc = PP.peakDetection(mX, hN, t) pX = np.unwrap(np.angle(X[:hN])) # unwrapped phase spect. of positive freq. iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc) # refine peak values iploc = (iploc!=0) * (iploc*Ns/N) # synth. locs ri = pin-hNs-1 # input sound pointer for residual analysis xr = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xr[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xr[:hNs] Xr = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Ys = GS.genSpecSines(iploc, ipmag, ipphase, Ns) # generate spec of sinusoidal component Yr = Xr-Ys; # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Ys)) # inverse FFT of sinusoidal spectrum ysw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ysw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yr)) # inverse FFT of residual spectrum yrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yrw[hNs-1:] = fftbuffer[:hNs+1] ys[ri:ri+Ns] += sw*ysw # overlap-add for sines yr[ri:ri+Ns] += sw*yrw # overlap-add for residual pin += H # advance sound pointer y = ys+yr # sum of sinusoidal and residual components return y, ys, yr