Esempio n. 1
0
def sineModel(x, fs, w, N, t):
  # Analysis/synthesis of a sound using the sinusoidal model
  # x: input array sound, w: analysis window, N: size of complex spectrum,
  # t: threshold in negative dB 
  # returns y: output array sound

  hN = N/2                                                # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
  Ns = 512                                                # FFT size for synthesis (even)
  H = Ns/4                                                # Hop size used for analysis and synthesis
  hNs = Ns/2                                              # half of synthesis FFT size
  pin = max(hNs, hM1)                                     # initialize sound pointer in middle of analysis window       
  pend = x.size - max(hNs, hM1)                           # last sample to start a frame
  fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
  yw = np.zeros(Ns)                                       # initialize output sound frame
  y = np.zeros(x.size)                                    # initialize output array
  w = w / sum(w)                                          # normalize analysis window
  sw = np.zeros(Ns)                                       # initialize synthesis window
  ow = triang(2*H);                                       # triangular window
  sw[hNs-H:hNs+H] = ow                                    # add triangular window
  bh = blackmanharris(Ns)                                 # blackmanharris window
  bh = bh / sum(bh)                                       # normalized blackmanharris window
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]     # normalized synthesis window

  while pin<pend:                                         # while input sound pointer is within sound 
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                           # window the input sound
    fftbuffer = np.zeros(N)                               # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                            # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]        
    X = fft(fftbuffer)                                    # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                     # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                    # detect locations of peaks
    pmag = mX[ploc]                                       # get the magnitude of the peaks
    pX = np.unwrap( np.angle(X[:hN]) )                    # unwrapped phase spect. of positive freq.
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)   # refine peak values by interpolation
  
  #-----synthesis-----
    plocs = iploc*Ns/N;                                   # adapt peak locations to size of synthesis FFT
    Y = GS.genSpecSines(plocs, ipmag, ipphase, Ns)        # generate sines in the spectrum         
    fftbuffer = np.real( ifft(Y) )                        # compute inverse FFT
    yw[:hNs-1] = fftbuffer[hNs+1:]                        # undo zero-phase window
    yw[hNs-1:] = fftbuffer[:hNs+1] 
    y[pin-hNs:pin+hNs] += sw*yw                           # overlap-add and apply a synthesis window
    pin += H                                              # advance sound pointer
    
  return y
Esempio n. 2
0
def hpsAnalysis(x, fs, w, wr, pin, N, hN, Ns, hNs, hM, nH, t, f0et, minf0, maxf0, maxhd, stocf):
  
  xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound
  fftbuffer = np.zeros(N)                                      # reset buffer
  fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
  fftbuffer[N-hM+1:] = xw[:hM-1]                           
  X = fft(fftbuffer)                                           # compute FFT
  mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
  ploc = PP.peakDetection(mX, hN, t)                
  pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
  iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
  
  f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
  hloc = np.zeros(nH)                                          # initialize harmonic locations
  hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
  hphase = np.zeros(nH)                                        # initialize harmonic phases
  hf = (f0>0) * (f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
  hi = 0                                                       # initialize harmonic index
  npeaks = ploc.size                                           # number of peaks found

  while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
    dev = min(abs(iploc/N*fs - hf[hi]))
    pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
    if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
      hloc[hi] = iploc[pei]                                    # harmonic locations
      hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
      hphase[hi] = ipphase[pei]                                # harmonic phases
    hi += 1                                                    # increase harmonic index
    
  hloc = (hloc!=0) * (hloc*Ns/N)                               # synth. locs
  ri = pin-hNs-1                                               # input sound pointer for residual analysis
  xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
  fftbuffer = np.zeros(Ns)                                     # reset buffer
  fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
  fftbuffer[hNs:] = xw2[:hNs]                            
  X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  Xh = GS.genSpecSines(hloc, hmag, hphase, Ns)                    # generate sines
  Xr = X2-Xh                                                   # get the residual complex spectrum
  mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
  mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)         # decimate the magnitude spectrum and avoid -Inf    

  return f0, hloc, hmag, mXrenv
Esempio n. 3
0
def stftPeaksModel(x, fs, w, N, H, t) :
  # Analysis/synthesis of a sound using the spectral peaks
  # x: input array sound, w: analysis window, N: FFT size, H: hop size, 
  # t: threshold in negative dB 
  # returns y: output array sound

  hN = N/2    
  hM1 = int(math.floor((w.size+1)/2))                     # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                         # half analysis window size by floor
  pin = hM1                                               # initialize sound pointer in middle of analysis window       
  pend = x.size-hM1                                       # last sample to start a frame
  fftbuffer = np.zeros(N)                                 # initialize buffer for FFT
  yw = np.zeros(w.size)                                   # initialize output sound frame
  y = np.zeros(x.size)                                    # initialize output array
  w = w / sum(w)                                          # normalize analysis window
  
  while pin<pend:       
           
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2]*w                             # window the input sound
    fftbuffer = np.zeros(N)                               # clean fft buffer
    fftbuffer[:hM1] = xw[hM2:]                            # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]        
    X = fft(fftbuffer)                                    # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                     # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                    # detect all peaks above a threshold
    pmag = mX[ploc]                                       # get the magnitude of the peaks
    pX = np.unwrap( np.angle(X[:hN]) )                    # unwrapped phase spect. of positive freq.
    pphase = pX[ploc]

  #-----synthesis-----
    Y = np.zeros(N, dtype = complex)
    Y[ploc] = 10**(pmag/20) * np.exp(1j*pphase)           # generate positive freq.
    Y[N-ploc] = 10**(pmag/20) * np.exp(-1j*pphase)        # generate neg.freq.
    fftbuffer = np.real( ifft(Y) )                        # inverse FFT
    yw[:hM2] = fftbuffer[N-hM2:]                          # undo zero-phase window
    yw[hM2:] = fftbuffer[:hM1]
    y[pin-hM1:pin+hM2] += H*yw                            # overlap-add
    pin += H                                                # advance sound pointer
  
  return y
def hpsModelSpectrogramPlot(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxFreq): 
	hN = N/2                                                      # size of positive spectrum
	hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
	hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
	Ns = 512                                                      # FFT size for synthesis (even)
	H = Ns/4                                                      # Hop size used for analysis and synthesis
	hNs = Ns/2      
	pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
	pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
	fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
	yhw = np.zeros(Ns)                                            # initialize output sound frame
	yrw = np.zeros(Ns)                                            # initialize output sound frame
	yh = np.zeros(x.size)                                         # initialize output array
	yr = np.zeros(x.size)                                         # initialize output array
	w = w / sum(w)                                                # normalize analysis window
	sw = np.zeros(Ns)     
	ow = triang(2*H)                                              # overlapping window
	sw[hNs-H:hNs+H] = ow      
	bh = blackmanharris(Ns)                                       # synthesis window
	bh = bh / sum(bh)                                             # normalize synthesis window
	wr = bh                                                       # window for residual
	sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

	numFrames = int(math.floor(pend/float(H)))
	frmNum = 0
	frmTime = []
	lastBin = N*maxFreq/float(fs)
	binFreq = np.arange(lastBin)*float(fs)/N       # The bin frequencies
    
	while pin<pend:                                         # while sound pointer is smaller than last sample    
		frmTime.append(pin/float(fs))         
		xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
		fftbuffer = np.zeros(N)                                      # reset buffer
		fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
		fftbuffer[N-hM2:] = xw[:hM2]                           
		X = fft(fftbuffer)                                           # compute FFT
		mX = 20 * np.log10(abs(X[:hN]))                              # magnitude spectrum of positive frequencies
		ploc = PP.peakDetection(mX, hN, t)                
		pX = np.unwrap(np.angle(X[:hN]))                             # unwrapped phase spect. of positive freq.    
		iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
    
		f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
		hloc = np.zeros(nH)                                          # initialize harmonic locations
		hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
		hphase = np.zeros(nH)                                        # initialize harmonic phases
		hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
		hi = 0                                                       # initialize harmonic index
		npeaks = ploc.size;                                          # number of peaks found
    
		while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
			dev = min(abs(iploc/N*fs - hf[hi]))
			pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
			if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
				hloc[hi] = iploc[pei]                                    # harmonic locations
				hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
				hphase[hi] = ipphase[pei]                                # harmonic phases
			hi += 1                                                    # increase harmonic index
 
		if frmNum == 0:                                       # Accumulate and store STFT
			XSpec = np.transpose(np.array([mX[:lastBin]]))
			ind1 = np.where(hloc>0)[0]
			ind2 = np.where(hloc<=lastBin)[0]
			ind = list((set(ind1.tolist())&set(ind2.tolist())))
			final_peaks = hloc[ind]
			parray = np.zeros([final_peaks.size,2])
			parray[:,0]=pin/float(fs)
			parray[:,1]=final_peaks*float(fs)/N
			specPeaks = parray
		else:
			XSpec = np.hstack((XSpec,np.transpose(np.array([mX[:lastBin]]))))
			ind1 = np.where(hloc>0)[0]
			ind2 = np.where(hloc<=lastBin)[0]
			ind = list((set(ind1.tolist())&set(ind2.tolist())))
			final_peaks = hloc[ind]
			parray = np.zeros([final_peaks.size,2])
			parray[:,0]=pin/float(fs)
			parray[:,1]=final_peaks*float(fs)/N
			specPeaks = np.append(specPeaks, parray,axis=0)
		
		hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N)                # synth. locs
		ri = pin-hNs-1                                               # input sound pointer for residual analysis
		xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
		fftbuffer = np.zeros(Ns)                                     # reset buffer
		fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
		fftbuffer[hNs:] = xr[:hNs]                           
		Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
		Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns)            # generate spec sines of harmonic component          
		Yr = Xr-Yh;                                                  # get the residual complex spectrum
		mYr = 20 * np.log10(abs(Yr[:hNs]))
		mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf
		mYs = resample(mYrenv, hNs)
		lastBinYr = Ns*maxFreq/float(fs)
		binFreqYr = np.arange(lastBinYr)*float(fs)/Ns       # The bin frequencies
		if frmNum == 0:                                        # Accumulate and store STFT
			YrSpec = np.transpose(np.array([mYr[:lastBinYr]]))
			YsSpec = np.transpose(np.array([mYs[:lastBinYr]]))
		else:
			YrSpec = np.hstack((YrSpec,np.transpose(np.array([mYr[:lastBinYr]]))))
			YsSpec = np.hstack((YsSpec,np.transpose(np.array([mYs[:lastBinYr]]))))
		pin += H
		frmNum += 1
	
	frmTime = np.array(frmTime)                               # The time at the centre of the frames
	plt.figure(1)
	plt.subplot(3,1,1)
	plt.pcolormesh(frmTime,binFreq,XSpec)
	plt.scatter(specPeaks[:,0]+(0.5*H/float(fs)), specPeaks[:,1], s=10, marker='x')
	plt.autoscale(tight=True)
	plt.title('X spectrogram + peaks')

	plt.subplot(3,1,2)
	plt.pcolormesh(frmTime,binFreqYr,YrSpec)
	plt.autoscale(tight=True)
	plt.title('X residual spectrogram')

	plt.subplot(3,1,3)
	plt.pcolormesh(frmTime,binFreqYr,YsSpec)
	plt.autoscale(tight=True)
	plt.title('X residual stochastic approx. spectrogram')

	plt.show()
	return YSpec
Esempio n. 5
0
def spsModel(x, fs, w, N, t, stocf):
  # Analysis/synthesis of a sound using the sinusoidal plus residual model
  # x: input sound, fs: sampling rate, w: analysis window, 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # y: output sound, ys: sinusoidal component, yr: residual component

  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  ysw = np.zeros(Ns)                                            # initialize output sound frame
  ystw = np.zeros(Ns)                                            # initialize output sound frame
  ys = np.zeros(x.size)                                         # initialize output array
  yst = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

  while pin<pend:  
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10(abs(X[:hN]))                              # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap(np.angle(X[:hN]))                             # unwrapped phase spect. of positive freq.    
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
        
    iploc = (iploc!=0) * (iploc*Ns/N)                            # synth. locs
    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xr[:hNs]                           
    Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
    Ys = GS.genSpecSines(iploc, ipmag, ipphase, Ns)              # generate spec of sinusoidal component          
    Yr = Xr-Ys;                                                  # get the residual complex spectrum
    mYr = 20 * np.log10( abs(Yr[:hNs]) )                         # magnitude spectrum of residual
    mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf                     
    mYst = resample(mYrenv, hNs)                                 # interpolate to original size
    mYst = 10**(mYst/20)                                         # dB to linear magnitude  
    fc = 1+round(500.0/fs*Ns)                                    # 500 Hz to bin location
    mYst[:fc] *= (np.arange(0, fc)/(fc-1))**2                    # high pass filter the stochastic component
    pYst = 2*np.pi*np.random.rand(hNs)                           # generate phase random values
    Yst = np.zeros(Ns, dtype = complex)
    Yst[:hNs] = mYst * np.exp(1j*pYst)                           # generate positive freq.
    Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1])          # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Ys))                                # inverse FFT of sinusoidal spectrum
    ysw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    ysw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Yst))                                # inverse FFT of residual spectrum
    ystw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    ystw[hNs-1:] = fftbuffer[:hNs+1]
    
    ys[ri:ri+Ns] += sw*ysw                                       # overlap-add for sines
    yst[ri:ri+Ns] += sw*ystw                                       # overlap-add for residual
    pin += H                                                     # advance sound pointer
  
  y = ys+yst                                                      # sum of sinusoidal and residual components
  return y, ys, yst
Esempio n. 6
0
def hprModelFrame(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd):
  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sound frame
  yrw = np.zeros(Ns)                                            # initialize output sound frame
  yh = np.zeros(x.size)                                         # initialize output array
  yr = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

  #-----analysis-----             
  xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
  fftbuffer = np.zeros(N)                                      # reset buffer
  fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
  fftbuffer[N-hM2:] = xw[:hM2]                           
  X = fft(fftbuffer)                                           # compute FFT
  mX = 20 * np.log10(abs(X[:hN]))                              # magnitude spectrum of positive frequencies
  ploc = PP.peakDetection(mX, hN, t)                
  pX = np.unwrap(np.angle(X[:hN]))                             # unwrapped phase spect. of positive freq.    
  iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
    
  f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
  hloc = np.zeros(nH)                                          # initialize harmonic locations
  hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
  hphase = np.zeros(nH)                                        # initialize harmonic phases
  hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
  hi = 0                                                       # initialize harmonic index
  npeaks = ploc.size;                                          # number of peaks found
    
  while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    
  hlocN = hloc
  hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N)                # synth. locs
  ri = pin-hNs-1                                               # input sound pointer for residual analysis
  xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
  fftbuffer = np.zeros(Ns)                                     # reset buffer
  fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
  fftbuffer[hNs:] = xr[:hNs]                           
  Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
  Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns)            # generate spec sines of harmonic component          
  mYh = 20 * np.log10(abs(Yh[:hNs]))
  pYh = np.unwrap(np.angle(Yh[:hNs])) 
  Yr = Xr-Yh;                                                  # get the residual complex spectrum
  mXr = 20 * np.log10(abs(Xr[:hNs]))
  pXr = np.unwrap(np.angle(Xr[:hNs])) 
  mYr = 20 * np.log10(abs(Yr[:hNs]))
  pYr = np.unwrap(np.angle(Yr[:hNs])) 

  fftbuffer = np.zeros(Ns)
  fftbuffer = np.real(ifft(Yh))                                # inverse FFT of harmonic spectrum
  yhw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
  yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
  fftbuffer = np.zeros(Ns)
  fftbuffer = np.real(ifft(Yr))                                # inverse FFT of residual spectrum
  yrw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
  yrw[hNs-1:] = fftbuffer[:hNs+1]
    
  yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
  yr[ri:ri+Ns] += sw*yrw                                       # overlap-add for residual
  
  y = yh+yr                                                      # sum of harmonic and residual components
  return mX, pX, hlocN, hmag, hphase, mYh, pYh, mXr, pXr, mYr, pYr, yh, yr, y
Esempio n. 7
0
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, maxnpeaksTwm=10):
  # Analysis/synthesis of a sound using the sinusoidal harmonic model
  # x: input sound, fs: sampling rate, w: analysis window, 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
  # maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # maxhd: max. relative deviation in harmonic detection (ex: .2)
  # maxnpeaksTwm: maximum number of peaks used for F0 detection
  # yh: harmonic component, yr: residual component
  # returns y: output array sound

  
  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yh = np.zeros(Ns)                                             # initialize output sound frame
  y = np.zeros(x.size)                                          # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)                                             # initialize synthesis window
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
  while pin<pend:             
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                           # detect peak locations
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
    
    f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0, maxnpeaksTwm)  # find f0
    hloc = np.zeros(nH)                                          # initialize harmonic locations
    hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
    hphase = np.zeros(nH)                                        # initialize harmonic phases
    hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
    hi = 0                                                       # initialize harmonic index
    npeaks = ploc.size                                           # number of peaks found
    while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    hloc = (hloc!=0) * (hloc*Ns/N)                               # synth. locs
  #-----synthesis-----
    Yh = GS.genSpecSines(hloc, hmag, hphase, Ns)                 # generate spec sines          
    fftbuffer = np.real( ifft(Yh) )                              # inverse FFT
    yh[:hNs-1] = fftbuffer[hNs+1:]                               # undo zero-phase window
    yh[hNs-1:] = fftbuffer[:hNs+1] 
    y[pin-hNs:pin+hNs] += sw*yh                                  # overlap-add
    pin += H                                                     # advance sound pointer
  return y
Esempio n. 8
0
def sineModelPlot(x, fs, w, N, H, t, minFreq, maxFreq):
    """ Analysis/synthesis of a sound using the short-time fourier transform
    x: input array sound, w: analysis window, N: FFT size, H: hop size
    YSpec: The STFT of x (Only the half spectrum is stored)"""

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor((w.size + 1) / 2))  # Ceil of half analysis window size
    hM2 = int(math.floor(w.size / 2))  # Floor of half analysis window size
    pin = hM1  # initialize sound pointer in middle of analysis window
    pend = x.size - max(hM1, H)  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    yw = np.zeros(w.size)  # initialize output sound frame
    y = np.zeros(x.size)  # initialize output array
    w = w / sum(w)  # normalize analysis window
    numFrames = int(math.floor(pend / float(H)))
    frmNum = 0
    frmTime = []
    firstBin = N * minFreq / float(fs)
    lastBin = N * maxFreq / float(fs)
    binFreq = np.arange(firstBin, lastBin) * float(fs) / N  # The bin frequencies

    while pin < pend:  # while sound pointer is smaller than last sample
        frmTime.append(pin / float(fs))
        xw = x[pin - hM1 : pin + hM2] * w  # window the input sound
        fftbuffer = np.zeros(N)  # clean fft buffer
        fftbuffer[:hM1] = xw[hM2:]  # zero-phase window in fftbuffer
        fftbuffer[N - hM2 :] = xw[:hM2]
        X = fft(fftbuffer)  # compute FFT
        mX = 20 * np.log10(abs(X[:hN]))  # magnitude spectrum of positive frequencies in dB
        pX = np.unwrap(np.angle(X[:hN]))  # unwrapped phase spectrum of positive frequencies
        ploc = PP.peakDetection(mX, hN, t)  # detect locations of peaks
        pmag = mX[ploc]  # get the magnitude of the peaks
        iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)  # refine peak values by interpolation
        if frmNum == 0:  # Accumulate and store STFT
            YSpec = np.transpose(np.array([mX[firstBin:lastBin]]))
            ind1 = np.where(iploc >= firstBin)[0]
            ind2 = np.where(iploc < lastBin)[0]
            ind = list((set(ind1.tolist()) & set(ind2.tolist())))
            final_peaks = iploc[ind]
            parray = np.zeros([final_peaks.size, 2])
            parray[:, 0] = pin / float(fs)
            parray[:, 1] = final_peaks * float(fs) / N
            specPeaks = parray
        else:
            YSpec = np.hstack((YSpec, np.transpose(np.array([mX[firstBin:lastBin]]))))
            ind1 = np.where(iploc >= firstBin)[0]
            ind2 = np.where(iploc < lastBin)[0]
            ind = list((set(ind1.tolist()) & set(ind2.tolist())))
            final_peaks = iploc[ind]
            parray = np.zeros([final_peaks.size, 2])
            parray[:, 0] = pin / float(fs)
            parray[:, 1] = final_peaks * float(fs) / N
            specPeaks = np.append(specPeaks, parray, axis=0)
        pin += H
        frmNum += 1
    frmTime = np.array(frmTime)  # The time at the centre of the frames
    plt.hold(True)
    plt.pcolormesh(frmTime, binFreq, YSpec)

    plt.scatter(specPeaks[:, 0] + (0.5 * H / float(fs)), specPeaks[:, 1], s=10, marker="x")
    plt.xlabel("Time(s)")
    plt.ylabel("Frequency(Hz)")
    plt.autoscale(tight=True)
    plt.show()
    return YSpec
Esempio n. 9
0
def hps(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxnpeaksTwm=10):
    # Analysis/synthesis of a sound using the harmonic plus stochastic model, prepared for transformations
    # x: input sound, fs: sampling rate, w: analysis window,
    # N: FFT size (minimum 512), t: threshold in negative dB,
    # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz,
    # maxf0: maximim f0 frequency in Hz,
    # f0et: error threshold in the f0 detection (ex: 5),
    # maxhd: max. relative deviation in harmonic detection (ex: .2)
    # stocf: decimation factor of mag spectrum for stochastic analysis
    # maxnpeaksTwm: maximum number of peaks used for F0 detection
    # y: output sound, yh: harmonic component, ys: stochastic component

    hN = N / 2  # size of positive spectrum
    hM1 = int(math.floor((w.size + 1) / 2))  # half analysis window size by rounding
    hM2 = int(math.floor(w.size / 2))  # half analysis window size by floor
    Ns = 512  # FFT size for synthesis
    H = Ns / 4  # Hop size used for analysis and synthesis
    hNs = Ns / 2  # half of FFT size for synthesis
    pin = max(hNs, hM1)  # initialize sound pointer in middle of analysis window
    pend = x.size - max(hNs, hM1)  # last sample to start a frame
    fftbuffer = np.zeros(N)  # initialize buffer for FFT
    yhw = np.zeros(Ns)  # initialize output sound frame
    ysw = np.zeros(Ns)  # initialize output sound frame
    yh = np.zeros(x.size)  # initialize output array
    ys = np.zeros(x.size)  # initialize output array
    w = w / sum(w)  # normalize analysis window
    sw = np.zeros(Ns)
    ow = triang(2 * H)  # overlapping window
    sw[hNs - H : hNs + H] = ow
    bh = blackmanharris(Ns)  # synthesis window
    bh = bh / sum(bh)  # normalize synthesis window
    wr = bh  # window for residual
    sw[hNs - H : hNs + H] = sw[hNs - H : hNs + H] / bh[hNs - H : hNs + H]
    sws = H * hanning(Ns) / 2  # synthesis window for stochastic
    lastyhloc = np.zeros(nH)  # initialize synthesis harmonic locations
    yhphase = 2 * np.pi * np.random.rand(nH)  # initialize synthesis harmonic phases

    while pin < pend:
        # -----analysis-----
        xw = x[pin - hM1 : pin + hM2] * w  # window the input sound
        fftbuffer = np.zeros(N)  # reset buffer
        fftbuffer[:hM1] = xw[hM2:]  # zero-phase window in fftbuffer
        fftbuffer[N - hM2 :] = xw[:hM2]
        X = fft(fftbuffer)  # compute FFT
        mX = 20 * np.log10(abs(X[:hN]))  # magnitude spectrum of positive frequencies
        ploc = PP.peakDetection(mX, hN, t)  # detect spectral peaks
        pX = np.unwrap(np.angle(X[:hN]))  # unwrapped phase spect. of positive freq.
        iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)  # refine peak values
        f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
        hloc = np.zeros(nH)  # initialize harmonic locations
        hmag = np.zeros(nH) - 100  # initialize harmonic magnitudes
        hphase = np.zeros(nH)  # initialize harmonic phases
        hf = (f0 > 0) * (f0 * np.arange(1, nH + 1))  # initialize harmonic frequencies
        hi = 0  # initialize harmonic index
        npeaks = ploc.size  # number of peaks found
        while f0 > 0 and hi < nH and hf[hi] < fs / 2:  # find harmonic peaks
            dev = min(abs(iploc / N * fs - hf[hi]))
            pei = np.argmin(abs(iploc / N * fs - hf[hi]))  # closest peak
            if (hi == 0 or not any(hloc[:hi] == iploc[pei])) and dev < maxhd * hf[hi]:
                hloc[hi] = iploc[pei]  # harmonic locations
                hmag[hi] = ipmag[pei]  # harmonic magnitudes
                hphase[hi] = ipphase[pei]  # harmonic phases
            hi += 1  # increase harmonic index

        hloc[:hi] = (hloc[:hi] != 0) * (hloc[:hi] * Ns / N)  # synth. locs
        ri = pin - hNs - 1  # input sound pointer for residual analysis
        xw2 = x[ri : ri + Ns] * wr  # window the input sound
        fftbuffer = np.zeros(Ns)  # reset buffer
        fftbuffer[:hNs] = xw2[hNs:]  # zero-phase window in fftbuffer
        fftbuffer[hNs:] = xw2[:hNs]
        X2 = fft(fftbuffer)  # compute FFT for residual analysis
        Xh = GS.genSpecSines(hloc, hmag, hphase, Ns)  # generate sines
        Xr = X2 - Xh  # get the residual complex spectrum
        mXr = 20 * np.log10(abs(Xr[:hNs]))  # magnitude spectrum of residual
        mXrenv = resample(np.maximum(-200, mXr), mXr.size * stocf)  # decimate the magnitude spectrum and avoid -Inf

        # -----synthesis data-----
        yhloc = hloc  # synthesis harmonics locs
        yhmag = hmag  # synthesis harmonic amplitudes
        mYrenv = mXrenv  # synthesis residual envelope
        yf0 = f0  # synthesis fundamental frequency
        # ------transformations----
        # -----clarinet effect, only odd harmonics-----
        # yhloc[1::2] = 0											 # set even harmonic to 0 magnitude

        # -----pitch discretization to temperate scale-----
        # if f0>0:
        #  nst = round(12*np.log2(f0/55))                            # closest semitone
        #  discpitch = 55*2**(nst/12)                                # discretized pitch
        #  fscale = discpitch/f0                                     # pitch transposition factor
        #  yhloc = yhloc*fscale                                      # all harmonic corrected to discretized pitch

        # -----pitch transposition with timbre preseervation -----

        fscale = 0.5  # scale factor for pitch transposition
        ind_valid = np.where(yhloc != 0)[0]  # using only those harmonic indices which have non zero frequency values
        if f0 > 0:
            x_vals = np.append(
                np.append(0, yhloc[ind_valid]), hNs
            )  # values of peak locations to be considered for interpolation
            y_vals = np.append(
                np.append(yhmag[0], yhmag[ind_valid]), yhmag[-1]
            )  # values of peak magnitudes to be considered for interpolation
            specEnvelope = interp1d(x_vals, y_vals, kind="linear", bounds_error=False, fill_value=-100)
            yhloc = yhloc * fscale
            yhmag[ind_valid] = specEnvelope(yhloc[ind_valid])

        # ----- Pitch transposition, Vibrato and tremolo with timbre preseervation -----
        # vtf = 5.0;                                                  # vibrato-tremolo frequency in Hz
        # vd  = 50;                                                   # vibrato depth in cents
        # td  = 3;                                                    # tremolo depth in dB
        # fscale = 1                                                  # scale factor for pitch transposition
        # modf = np.sin(2.0*np.pi*vtf*pin/fs)                         # modulation factor for both vibrato and tremolo (which has to be scaled later)
        # sfscale = fscale*(2.0**(vd/1200.0*modf))                    # affective scale factor together with vibrato affect
        # idx = np.where(yhloc!=0)[0]                                 # using only those harmonic indices which have non zero frequency values
        # if (f0>0):
        #     x_vals = np.append(np.append(0, yhloc[idx]),hNs)        # values of peak locations to be considered for interpolation
        #     y_vals = np.append(np.append(yhmag[0], yhmag[idx]),yhmag[-1])     # values of peak magnitudes to be considered for interpolation
        #     specEnvelope = interp1d(x_vals, y_vals, kind = 'linear',bounds_error=False, fill_value=-100)
        #     yhloc = yhloc*sfscale
        #     yhmag[idx] = specEnvelope(yhloc[idx])
        #     yhmag[idx] = yhmag[idx] + td*modf                       # tremolo

        # -----synthesis-----
        yhphase += 2 * np.pi * (lastyhloc + yhloc) / 2 / Ns * H  # propagate phases
        lastyhloc = yhloc
        Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns)  # generate spec sines
        mYs = resample(mYrenv, hNs)  # interpolate to original size
        mYs = 10 ** (mYs / 20)  # dB to linear magnitude
        if f0 > 0:
            mYs *= np.cos(np.pi * np.arange(0, hNs) / Ns * fs / yf0) ** 2  # filter residual
        fc = 1 + round(500.0 / fs * Ns)  # 500 Hz
        mYs[:fc] *= (np.arange(0, fc) / (fc - 1)) ** 2  # HPF
        pYs = 2 * np.pi * np.random.rand(hNs)  # generate phase random values

        Ys = np.zeros(Ns, dtype=complex)
        Ys[:hNs] = mYs * np.exp(1j * pYs)  # generate positive freq.
        Ys[hNs + 1 :] = mYs[:0:-1] * np.exp(-1j * pYs[:0:-1])  # generate negative freq.

        fftbuffer = np.zeros(Ns)
        fftbuffer = np.real(ifft(Yh))  # inverse FFT of harmonic spectrum
        yhw[: hNs - 1] = fftbuffer[hNs + 1 :]  # undo zer-phase window
        yhw[hNs - 1 :] = fftbuffer[: hNs + 1]

        fftbuffer = np.zeros(Ns)
        fftbuffer = np.real(ifft(Ys))  # inverse FFT of stochastic approximation spectrum
        ysw[: hNs - 1] = fftbuffer[hNs + 1 :]  # undo zero-phase window
        ysw[hNs - 1 :] = fftbuffer[: hNs + 1]

        yh[ri : ri + Ns] += sw * yhw  # overlap-add for sines
        ys[ri : ri + Ns] += sws * ysw  # overlap-add for stoch
        pin += H  # advance sound pointer

    y = yh + ys  # sum harmonic and stochastic components
    return y, yh, ys
Esempio n. 10
0
def hpsModelParams(x,fs,w,N,t,nH,minf0,maxf0,f0et,maxhd,stocf,timemapping,fscale,timbremapping) :
  # Analysis/synthesis of a sound using the harmonic plus stochastic model
  # x: input sound, fs: sampling rate, w: analysis window (odd size), 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
  # maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # maxhd: max. relative deviation in harmonic detection (ex: .2)
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # timemapping: mapping between input and output time (sec)
  # fscale: 
  # timbremapping: mapping between input and output frequency (Hz)
  # vtf: vibrato-tremolo frequency in Hz, va: vibrato depth in cents, td: tremolo depth in dB
  # y: output sound, yh: harmonic component, ys: stochastic component

  hN = N/2                                                      # size of positive spectrum
  hM = (w.size+1)/2                                             # half analysis window size
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM)                                            # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM)                                  # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sound frame
  ysw = np.zeros(Ns)                                            # initialize output sound frame
  yh = np.zeros(x.size)                                         # initialize output array
  ys = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
  sws = H*hanning(Ns)/2                                         # synthesis window for stochastic
  lastyhloc = np.zeros(nH)                                      # initialize synthesis harmonic locations
  yhphase = 2*np.pi * np.random.rand(nH)                        # initialize synthesis harmonic phases

  while pin<pend:       
            
  #-----analysis-----             
    xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM+1:] = xw[:hM-1]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
    
    f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
    hloc = np.zeros(nH)                                          # initialize harmonic locations
    hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
    hphase = np.zeros(nH)                                        # initialize harmonic phases
    hf = (f0>0) * (f0*np.arange(1, nH+1))                        # initialize harmonic frequencies
    hi = 0                                                       # initialize harmonic index
    npeaks = ploc.size                                           # number of peaks found
    
    while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    
    hloc = (hloc!=0) * (hloc*Ns/N)                               # synth. locs
    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xw2[:hNs]                            
    X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
    
    Xh = GS.genSpecSines(hloc, hmag, hphase, Ns)                    # generate sines
    Xr = X2-Xh                                                   # get the residual complex spectrum
    mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
    mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf

  #-----synthesis data-----
    yhloc = hloc                                                 # synthesis harmonics locs
    yhmag = hmag                                                 # synthesis harmonic amplitudes
    mYrenv = mXrenv                                              # synthesis residual envelope
    yf0 = f0  

  #-----transformations-----

  #-----synthesis-----
    yhphase += 2*np.pi * (lastyhloc+yhloc)/2/Ns*H                # propagate phases
    lastyhloc = yhloc 
    
    Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns)                 # generate spec sines 
    mYs = resample(mYrenv, hNs)                                  # interpolate to original size
    mYs = 10**(mYs/20)                                           # dB to linear magnitude  
    if f0>0:
        mYs *= np.cos(np.pi*np.arange(0, hNs)/Ns*fs/yf0)**2      # filter residual

    fc = 1+round(500.0/fs*Ns)                                    # 500 Hz
    mYs[:fc] *= (np.arange(0, fc)/(fc-1))**2                     # HPF
    pYs = 2*np.pi * np.random.rand(hNs)                          # generate phase random values
    
    Ys = np.zeros(Ns, dtype = complex)
    Ys[:hNs] = mYs * np.exp(1j*pYs)                              # generate positive freq.
    Ys[hNs+1:] = mYs[:0:-1] * np.exp(-1j*pYs[:0:-1])             # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yh) )                            
    yhw[:hNs-1] = fftbuffer[hNs+1:]                              # sines in time domain using IFFT
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Ys) )
    ysw[:hNs-1] = fftbuffer[hNs+1:]                              # stochastic in time domain using IFFT
    ysw[hNs-1:] = fftbuffer[:hNs+1]

    yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
    ys[ri:ri+Ns] += sws*ysw                                      # overlap-add for stoch
    pin += H                                                     # advance sound pointer
    
  y = yh+ys
  return y, yh, ys
Esempio n. 11
0
def spsTimescale(x, fs, w, N, t, maxnS, stocf) :
  # Analysis/synthesis of a sound using the sinusoidal plus stochastic model
  # x: input sound, fs: sampling rate, w: analysis window (odd size), 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # maxnS: maximum number of sinusoids,
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # y: output sound, yh: harmonic component, ys: stochastic component

  hN = N/2                                                      # size of positive spectrum
  hM = (w.size+1)/2                                             # half analysis window size
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM)                                            # initialize sound pointer in middle of analysis window          
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  tm = np.arange(0.01, 0.94, 0.01)
  in_time = np.concatenate( (np.array([0]), tm+0.05*np.sin(8.6*np.pi*tm), np.array([1])) ) # input time --> keep end value
  out_time = np.concatenate( (np.array([0]),                 tm           , np.array([1])) ) # output time
  timemapping = np.asarray( (in_time, out_time) ) 
  # timemapping = np.array( [[0, 1], [0, 2]] )                  # input time (sec), output time (sec)                      
  timemapping = timemapping * x.size/fs
  outsoundlength = round(timemapping[1, -1]*fs)                 # length of output sound
  pend = outsoundlength - max(hNs, hM)                          # last sample to start a frame
  yhw = np.zeros(Ns)                                            # initialize output sine sound frame
  ysw = np.zeros(Ns)                                            # initialize output residual sound frame
  yh = np.zeros(outsoundlength)                                 # initialize output sine component
  ys = np.zeros(outsoundlength)                                 # initialize output residual component
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
  sws = H*hanning(Ns)/2                                         # synthesis window for stochastic
  lastysloc = np.zeros(maxnS)                                   # initialize synthesis harmonic locations
  ysphase = 2*np.pi * np.random.rand(maxnS)                   # initialize synthesis harmonic phases

  minpin = max(hNs, hM)
  maxpin = x.size - max(hNs,hM)
  fridx = 0                                                     # frame pointer
  isInitFrame = True                                            # True for frames equivalent to initial frame (for synth part)
  lastnS = 0                                                    # it doesnot harm to initialize this variable with 0.
  pout = pin
  
  while pout<pend:       
    
    if fridx==0 or lastnS==0 :     # whenever lastnS is zero implies frame is equivalent to initial frame
      isInitFrame = True
    
    pin = round(np.interp(np.float(pout)/fs, timemapping[1,:],timemapping[0,:]) * fs )
    pin = max(minpin, pin)
    pin = min(maxpin, pin)
  
  #-----analysis-----             
    xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM+1:] = xw[:hM-1]              
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
    
    smag = np.sort(ipmag)[::-1]                                  # sort peaks by magnitude in descending order
    I = np.argsort(ipmag)[::-1]
    
    nS = min(maxnS, np.where(smag>t)[0].size)                    # get peaks above threshold
    sloc = iploc[I[:nS]]
    sphase = ipphase[I[:nS]]  
    if isInitFrame :                                             # update last frame data
      lastnS = nS
      lastsloc = sloc
      lastsmag = smag
      lastsphase = sphase

    sloc = (sloc!=0) * (sloc*Ns/N)                               # peak locations for synthesis
    
    lastidx = np.zeros(nS, dtype = int)
    for i in range(0, nS) :                                      # find closest peak to create trajectories
      idx = np.argmin(abs(sloc[i] - lastsloc[:lastnS]))  
      lastidx[i] = idx

    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xw2[:hNs]              
    X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
    
    Xh = GS.genSpecSines(sloc, smag, sphase, Ns)                    # generate sines
    Xr = X2-Xh                                                   # get the residual complex spectrum
    mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
    mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf    

  #-----synthesis data-----
    ysloc = sloc                                                 # synthesis harmonics locs
    ysmag = smag[:nS]                                            # synthesis harmonic amplitudes
    mYrenv = mXrenv                                              # synthesis residual envelope

  #-----transformations-----
  
  #-----synthesis-----
    
    if isInitFrame :
      # Variables need to be initialized like for the first frame
      lastysloc = np.zeros(maxnS)                     # initialize synthesis harmonic locations
      ysphase = 2*np.pi * np.random.rand(maxnS)     # initialize synthesis harmonic phases
      
      lastysphase = ysphase                           # phase for first frame
    
    if nS>lastnS :                                    # initialize peaks that start
      lastysphase = np.concatenate((lastysphase, np.zeros(nS-lastnS)))
      lastysloc = np.concatenate((lastysloc, np.zeros(nS-lastnS)))
    
    ysphase = lastysphase[lastidx] + 2*np.pi*(lastysloc[lastidx]+ysloc)/2/Ns*H # propagate phases
    
    lastysloc = ysloc
    lastysphase = ysphase  
    lastnS = nS                                       # update last frame data
    lastsloc = sloc                                   # update last frame data
    lastsmag = smag                                   # update last frame data
    lastsphase = sphase                               # update last frame data

    Yh = GS.genSpecSines(ysloc, ysmag, ysphase, Ns)      # generate spec sines 
    mYs = resample(mYrenv, hNs)                       # interpolate to original size
    pYs = 2*np.pi*np.random.rand(hNs)               # generate phase random values
    
    Ys = np.zeros(Ns, dtype = complex)
    Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs)                   # generate positive freq.
    Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1])  # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yh) )                            
    yhw[:hNs-1] = fftbuffer[hNs+1:]                   # sines in time domain using IFFT
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Ys) )
    ysw[:hNs-1] = fftbuffer[hNs+1:]                   # stochastic in time domain using IFFT
    ysw[hNs-1:] = fftbuffer[:hNs+1]

    ro = pout-hNs                                     # output sound pointer for overlap
    yh[ro:ro+Ns] += sw*yhw                            # overlap-add for sines
    ys[ro:ro+Ns] += sws*ysw                           # overlap-add for stochastic
    pout += H                                         # advance sound pointer
    fridx += 1                                        # advance frame pointer
    isInitFrame = False                               # variable meaningful only for current frame,
                                                      # therefore False at each frame
  y = yh+ys
  return y, yh, ys
Esempio n. 12
0
def sps(x, fs, w, N, t, maxnS, stocf) :
  # Analysis/synthesis of a sound using the sinusoidal plus stochastic model
  # x: input sound, fs: sampling rate, w: analysis window (odd size), 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # maxnS: maximum number of sinusoids,
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # y: output sound, yh: harmonic component, ys: stochastic component
  
  freq_range = 10000 # fs/2 by default
  hN = N/2                                                      # size of positive spectrum
  hM = (w.size+1)/2                                             # half analysis window size
  Ns = 256                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM)                                            # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM)                                  # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sine sound frame
  ysw = np.zeros(Ns)                                            # initialize output residual sound frame
  yh = np.zeros(x.size)                                         # initialize output sine component
  ys = np.zeros(x.size)                                         # initialize output residual component
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
  sws = H*hanning(Ns)/2                                         # synthesis window for stochastic
  lastysloc = np.zeros(maxnS)                                   # initialize synthesis harmonic locations
  ysphase = 2*np.pi * np.random.rand(maxnS)                     # initialize synthesis harmonic phases
  fridx = 0                                                     # frame pointer
  isInitFrame = True                                            # True for frames equivalent to initial frame (for synth part)
  lastnS = 0                                                    # it doesnot harm to initialize this variable with 0.

  #-----initialize plots----- 

  clip_in = 0.0                                                 # samples to clip input/output signal
  clip_spec = 0.0                                               # number of frames to clip spectrogram
  freq = np.arange(0, freq_range, fs/N)                         # frequency axis in Hz
  freq = freq[:freq.size-1]
  time = np.arange(0, np.float32(x.size)/fs, 1.0/fs)            # time axis in seconds
  n_frame = 0
  n_bins = freq.size
  specgram = np.ones((n_bins, pend/H)) * -200                   # initialize spectrogram
  prev_peaks_loc = np.zeros(maxnS)                              # harmonic trajectories


  fig = plt.figure(figsize = (10.5, 7.1), dpi = 100)
  ax0 = plt.subplot2grid((8,6), (0, 0), colspan = 6)
  ax0.set_position([0.04, 0.955, 0.92, 0.015])
  ax0.set_title("timeline", size = 7, fontweight = 'bold')
  ax0.yaxis.set_ticks([])                           # no y axis ticks
  ax0.xaxis.set_ticks([0, np.float32(x.size)/fs])
  ax0.set_xticklabels(['0 s',  '%.2f' % (np.float32(x.size)/fs) + ' s'])
  ax0.set_xlim(0, np.float32(x.size)/fs)
  ax0.plot(time, np.zeros(x.size), lw = 1.5)
  plt.tick_params(axis = 'both', labelsize = 8)
  rect_zoom = patches.Rectangle((0, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2)
  ax0.add_patch(rect_zoom)

  ax1 = plt.subplot2grid((8, 6), (1, 0), colspan = 6)
  ax1.set_position([0.04, 0.87, 0.92, 0.05])
  ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold')
  ax1.locator_params(axis = 'y', nbins = 5)
  ax1.set_xlim(0, (80.0*H)/fs)
  ax1.set_ylim(x.min(), x.max())
  plt.tick_params(axis = 'both', labelsize = 8)
  plt.setp(ax1.get_xticklabels(), visible = False)
  ax1.plot(time[:80*H], x[:80*H], 'b')

  ax2 = plt.subplot2grid((8, 6), (2, 0), colspan = 6, sharex = ax1, sharey = ax1)
  ax2.set_position([0.04, 0.79, 0.92, 0.05])
  ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
  ax2.set_xlim(0, (80.0*H)/fs)
  ax2.set_ylim(x.min(), x.max())
  plt.tick_params(axis = 'both', labelsize = 8)

  ax3 = plt.subplot2grid((8, 6), (3, 0), rowspan = 2, colspan = 3)
  ax3.set_position([0.06, 0.52, 0.42, 0.21])
  ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold')
  ax3.set_xlabel("Frequency (Hz)", size = 8)
  ax3.set_ylabel("Amplitude (dB)", size = 8)
  ax3.set_xlim(0, freq_range)
  ax3.set_ylim(-100, 0)
  plt.tick_params(axis = 'both', labelsize = 8)

  ax4 = plt.subplot2grid((8, 6), (3, 4), rowspan = 2, colspan = 3, sharex = ax3, sharey = ax3)
  ax4.set_position([0.55, 0.52, 0.42, 0.21])
  ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold')
  ax4.set_xlabel("Frequency (Hz)", size = 8)
  ax4.set_ylabel("Amplitude (dB)", size = 8)
  ax4.set_xlim(0, freq_range)
  ax4.set_ylim(-100, 0)
  plt.tick_params(axis = 'both', labelsize = 8)

  ax5 = plt.subplot2grid((8, 6), (7, 1), rowspan = 2, colspan = 4)
  ax5.set_position([0.05, 0.03, 0.92, 0.42])
  ax5.set_title("Peak tracking", size = 9, fontweight = 'bold')
  ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20)
  ax5.set_ylabel("Frequency (Hz)", size = 8)
  ax5.set_xlim(0, 80)
  ax5.set_ylim(0, freq_range)
  ax5.ticklabel_format(axis = 'y', scilimits = (-2, 2))    # use scientific limits above 1e2
  plt.tick_params(axis = 'both', labelsize = 8)

  while pin<pend:       
    
    if fridx==0 or lastnS==0 :     # whenever lastnS is zero implies frame is equivalent to initial frame
      isInitFrame = True

  #-----analysis-----             
    xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM+1:] = xw[:hM-1]              
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
    
    smag = np.sort(ipmag)[::-1]                                  # sort peaks by magnitude in descending order
    I = np.argsort(ipmag)[::-1]
    
    nS = min(maxnS, np.where(smag>t)[0].size)                    # get peaks above threshold
    sloc = iploc[I[:nS]]
    sphase = ipphase[I[:nS]]  
    if isInitFrame :                                             # update last frame data
      lastnS = nS
      lastsloc = sloc
      lastsmag = smag
      lastsphase = sphase

    peaks_loc = np.float32(sloc)/N*fs
    sloc = (sloc!=0) * (sloc*Ns/N)                               # peak locations for synthesis
    lastidx = np.zeros(nS, dtype = int)
    for i in range(0, nS) :  # find closest peak to create trajectories
      idx = np.argmin(abs(sloc[i] - lastsloc[:lastnS]))  
      lastidx[i] = idx

    ri = pin-hNs                                                 # input sound pointer for residual analysis
    xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xw2[:hNs]              
    X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
    
    Xh = GS.genSpecSines(sloc, smag, sphase, Ns)                    # generate sines
    Xr = X2-Xh                                                   # get the residual complex spectrum
    mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
    mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf                     

  #-----synthesis data-----
    ysloc = sloc                                                 # synthesis harmonics locs
    ysmag = smag[:nS]                                            # synthesis harmonic amplitudes
    mYrenv = mXrenv                                              # synthesis residual envelope

  #-----transformations-----

  #-----synthesis-----
    
    if isInitFrame :
      # Variables need to be initialized like for the first frame
      lastysloc = np.zeros(maxnS)                     # initialize synthesis harmonic locations
      ysphase = 2*np.pi * np.random.rand(maxnS)       # initialize synthesis harmonic phases
      
      lastysphase = ysphase                           # phase for first frame
    
    if nS>lastnS :                                    # initialize peaks that start
      lastysphase = np.concatenate((lastysphase, np.zeros(nS-lastnS)))
      lastysloc = np.concatenate((lastysloc, np.zeros(nS-lastnS)))
    
    ysphase = lastysphase[lastidx] + 2*np.pi*(lastysloc[lastidx]+ysloc)/2/Ns*H # propagate phases
    
    lastysloc = ysloc
    lastysphase = ysphase  
    lastnS = nS                                       # update last frame data
    lastsloc = sloc                                   # update last frame data
    lastsmag = smag                                   # update last frame data
    lastsphase = sphase                               # update last frame data

    Yh = GS.genSpecSines(ysloc, ysmag, ysphase, Ns)      # generate spec sines 
    mYs = resample(mYrenv, hNs)                       # interpolate to original size
    pYs = 2*np.pi*np.random.rand(hNs)                 # generate phase random values
    
    Ys = np.zeros(Ns, dtype = complex)
    Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs)                   # generate positive freq.
    Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1])  # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yh) )                            
    yhw[:hNs-1] = fftbuffer[hNs+1:]                   # sines in time domain using IFFT
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Ys) )
    ysw[:hNs-1] = fftbuffer[hNs+1:]                   # stochastic in time domain using IFFT
    ysw[hNs-1:] = fftbuffer[:hNs+1]

    yh[ri:ri+Ns] += sw*yhw                            # overlap-add for sines
    ys[ri:ri+Ns] += sws*ysw                           # overlap-add for stoch
    
    #-----plotting-------
    # if n_frame > 1130 :
      
    # clear all plots
    if pin > ax1.get_xlim()[1]*fs - (5.0*H) :
      clip_in = np.float32(pin) - 50.0*H
      clip_spec = pin/H - 50.0
      rect_zoom.remove()
      rect_zoom = patches.Rectangle((clip_in/fs, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2)
      ax0.add_patch(rect_zoom)
      
      ax1.cla()
      ax1.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
      ax1.set_ylim(x.min(), x.max())
      ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold')
      ax1.locator_params(axis = 'y', nbins = 5)
      plt.setp(ax1.get_xticklabels(), visible = False)
      ax1.plot(time[:clip_in+80*H], x[:clip_in+80*H], 'b')
      
      ax2.cla()
      ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
      ax2.set_ylim(x.min(), x.max())
      ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
      ax2.locator_params(axis = 'y', nbins = 5)
      ax2.plot(time[:ri], yh[:ri], 'b')
      
      ax5.set_xlim(clip_spec, clip_spec+80)

    ax3.cla()
    ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold')
    ax3.set_xlabel("Frequency (Hz)", size = 8)
    ax3.set_ylabel("Amplitude (dB)", size = 8)
    ax3.set_xlim(0, freq_range)
    ax3.set_ylim(-100, 0)

    ax4.cla()
    ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold')
    ax4.set_xlabel("Frequency (Hz)", size = 8)
    ax4.set_ylabel("Amplitude (dB)", size = 8)
    ax4.set_xlim(0, freq_range)
    ax4.set_ylim(-100, 0)

    rect = patches.Rectangle((np.float32(pin-hM)/fs, -2**7), width = np.float32(w.size)/fs, height = 2**15, color = 'blue', alpha = 0.5)
    ax1.add_patch(rect) 
    # plt.draw()

    # plot the sample

    ax3.plot(freq, mX[:n_bins], 'b')                                              # plot spectrum
    ax3.fill_between(freq, -200, mX[:n_bins], facecolor = 'blue', alpha = 0.3) 
    # plt.draw()
    ax3.plot(np.float32(iploc[:n_bins])/N*fs, ipmag[:n_bins], 'rx', ms = 3)       # plot interpolated peak locations
    # plt.draw()

    ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20)
    # plt.draw()

    ax3.plot(peaks_loc, smag[:nS], 'o', ms = 3, mfc = 'yellow') # plot harmonics
    for i in range(0, nS):
      ax5.plot([n_frame-0.5, n_frame+0.5], [prev_peaks_loc[lastidx[i]], peaks_loc[i]], '-og', ms = 2.5, mfc = 'yellow', lw = 1.3)
    
    prev_peaks_loc = peaks_loc
    # plt.draw()
    

    mX2 = 20 * np.log10( abs(X2[:hNs]) )                         # magnitude spectrum of positive frequencies
    mX2 = resample(np.maximum(-200, mX2), hN)
    ax4.plot(freq[:n_bins], mX2[:n_bins], 'b', alpha = 0.3)
    ax4.fill_between(freq[:n_bins], -200, mX2[:n_bins], facecolor = 'blue', alpha = 0.1)
    # plt.draw()

    mXh = 20 * np.log10( abs(Xh[:hNs]) )                         # magnitude spectrum of positive frequencies
    mXh = resample(np.maximum(-200, mXh), hN)
    ax4.plot(freq[:n_bins], mXh[:n_bins], 'g')
    ax4.fill_between(freq[:n_bins], -200, mXh[:n_bins], facecolor = 'green', alpha = 0.4)
    # plt.draw()

    mXr = resample(np.maximum(-200, mXr), hN)
    ax4.plot(freq[:n_bins], mXr[:n_bins], 'r', alpha = 0.3)
    ax4.fill_between(freq[:n_bins], -200, mXr[:n_bins], facecolor = 'red', alpha = 0.1)
    # plt.draw()

    rect2 = patches.Rectangle((np.float32(ri)/fs, -2**7), width = np.float32(Ns)/fs, height = 2**15, color = 'green', alpha = 0.3)
    ax2.cla()
    ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
    ax2.set_ylim(x.min(), x.max())
    ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
    ax2.locator_params(axis = 'y', nbins = 5)
    ax2.add_patch(rect2)  
    ax2.plot(time[:ri+Ns], yh[:ri+Ns], 'b')
    plt.draw()
    rect2.remove()
    rect.remove()
      
    n_frame += 1
    pin += H                                          # advance sound pointer
    fridx += 1                                        # advance frame pointer
    isInitFrame = False                               # variable meaningful only for current frame,
                                                      # therefore False at each frame
  y = yh+ys
  return y, yh, ys
Esempio n. 13
0
  def run(self):

    # Analysis/synthesis of a sound using the harmonic plus stochastic model
    # x: input sound, fs: sampling rate, w: analysis window (odd size), 
    # N: FFT size (minimum 512), t: threshold in negative dB, 
    # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
    # maxf0: maximim f0 frequency in Hz, 
    # f0et: error threshold in the f0 detection (ex: 5),
    # maxhd: max. relative deviation in harmonic detection (ex: .2)
    # stocf: decimation factor of mag spectrum for stochastic analysis
    # y: output sound, yh: harmonic component, ys: stochastic component

    # initialize variables
    x = self.x
    fs = self.fs
    w = self.w
    N = self.N
    t = self.t
    nH = self.nH
    minf0 = self.minf0
    maxf0 = self.maxf0
    f0et = self.f0et
    maxhd = self.maxhd
    stocf = self.stocf
    plot = self.plot
    process = self.process
    step = self.step
    nFrameStart = self.nFrameStart


    freq_range = 10000 				# fs/2 by default
    
    hN = N/2                                                      # size of positive spectrum
    hM = (w.size+1)/2                                             # half analysis window size
    Ns = 512                                                      # FFT size for synthesis (even)
    H = Ns/4                                                      # Hop size used for analysis and synthesis
    hNs = Ns/2      
    pin = max(hNs, hM)                                            # initialize sound pointer in middle of analysis window          
    pend = x.size - max(hNs, hM)                                  # last sample to start a frame
    fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
    yhw = np.zeros(Ns)                                            # initialize output sound frame
    ysw = np.zeros(Ns)                                            # initialize output sound frame
    yh = np.zeros(x.size)                                         # initialize output array
    ys = np.zeros(x.size)                                         # initialize output array
    w = w / sum(w)                                                # normalize analysis window
    sw = np.zeros(Ns)     
    ow = triang(2*H)                                              # overlapping window
    sw[hNs-H:hNs+H] = ow      
    bh = blackmanharris(Ns)                                       # synthesis window
    bh = bh / sum(bh)                                             # normalize synthesis window
    wr = bh                                                       # window for residual
    sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
    sws = H*hanning(Ns)/2                                         # synthesis window for stochastic
    lastyhloc = np.zeros(nH)                                      # initialize synthesis harmonic locations
    yhphase = 2*np.pi * np.random.rand(nH)                        # initialize synthesis harmonic phases     

    n_frame = 0                                                   # initialize number of frames counter
                 
    if plot:
                   
    #-----initialize plots-----
      
      plt.ion()                                                   # activate interactive mode 
      clip_in = 0.0                                               # samples to clip input/output signal
      clip_spec = 0.0                                             # number of frames to clip spectrogram
      freq = np.arange(0, freq_range, fs/N)                       # frequency axis in Hz
      freq = freq[:freq.size-1]
      time = np.arange(0, np.float32(x.size)/fs, 1.0/fs)          # time axis in seconds
      n_bins = freq.size                                          # number of total bins in the freq_range
      specgram = np.ones((n_bins, pend/H)) * -200                 # initialize spectrogram
      prev_harmonics = np.zeros(nH-1)                             # previous harmonics to create harmonic trajectories
      prev_f0 = 0                                                 # previous f0 to create f0 trajectory

      fig = plt.figure(figsize = (10.5, 7.1), dpi = 100) 
      ax0 = plt.subplot2grid((8,6), (0, 0), colspan = 6)
      ax0.set_position([0.04, 0.955, 0.92, 0.015])
      ax0.set_title("timeline", size = 7, fontweight = 'bold')
      ax0.yaxis.set_ticks([])                                     # no y axis ticks
      ax0.xaxis.set_ticks([0, np.float32(x.size)/fs])             # set only two ticks in the limits of the plot
      ax0.set_xticklabels(['0 s',  '%.2f' % (np.float32(x.size)/fs) + ' s'])
      ax0.set_xlim(0, np.float32(x.size)/fs)
      ax0.plot(time, np.zeros(x.size), lw = 1.5)
      plt.tick_params(axis = 'both', labelsize = 8)
      rect_zoom = patches.Rectangle((0, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2)
      ax0.add_patch(rect_zoom)

      ax1 = plt.subplot2grid((8, 6), (1, 0), colspan = 6)
      ax1.set_position([0.04, 0.87, 0.92, 0.05])
      ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold')
      ax1.locator_params(axis = 'y', nbins = 5)
      ax1.set_xlim(0, (80.0*H)/fs)
      ax1.set_ylim(x.min(), x.max())
      plt.tick_params(axis = 'both', labelsize = 8)
      plt.setp(ax1.get_xticklabels(), visible = False)
      ax1.plot(time[:80*H], x[:80*H], 'b')

      ax2 = plt.subplot2grid((8, 6), (2, 0), colspan = 6, sharex = ax1, sharey = ax1)
      ax2.set_position([0.04, 0.79, 0.92, 0.05])
      ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
      ax2.set_xlim(0, (80.0*H)/fs)
      ax2.set_ylim(x.min(), x.max())
      plt.tick_params(axis = 'both', labelsize = 8)

      ax3 = plt.subplot2grid((8, 6), (3, 0), rowspan = 2, colspan = 3)
      ax3.set_position([0.06, 0.52, 0.42, 0.21])
      ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold')
      ax3.set_xlabel("Frequency (Hz)", size = 8)
      ax3.set_ylabel("Amplitude (dB)", size = 8)
      ax3.set_xlim(0, freq_range)
      ax3.set_ylim(-100, 0)
      plt.tick_params(axis = 'both', labelsize = 8)

      ax4 = plt.subplot2grid((8, 6), (3, 4), rowspan = 2, colspan = 3, sharex = ax3, sharey = ax3)
      ax4.set_position([0.55, 0.52, 0.42, 0.21])
      ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold')
      ax4.set_xlabel("Frequency (Hz)", size = 8)
      ax4.set_ylabel("Amplitude (dB)", size = 8)
      ax4.set_xlim(0, freq_range)
      ax4.set_ylim(-100, 0)
      plt.tick_params(axis = 'both', labelsize = 8)

      ax5 = plt.subplot2grid((8, 6), (7, 1), rowspan = 2, colspan = 4)
      ax5.set_position([0.05, 0.03, 0.92, 0.42])
      ax5.set_title("Peak tracking", size = 9, fontweight = 'bold')
      ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20)
      ax5.set_ylabel("Frequency (Hz)", size = 8)
      ax5.set_xlim(0, 80)
      ax5.set_ylim(0, freq_range)
      ax5.ticklabel_format(axis = 'y', scilimits = (-2, 2))    # use scientific limits above 1e2
      plt.tick_params(axis = 'both', labelsize = 8)

    while pin<pend:

    #-----analysis-----             
      
      xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound

      fftbuffer = np.zeros(N)                                      # reset buffer
      fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
      fftbuffer[N-hM+1:] = xw[:hM-1]                           

      X = fft(fftbuffer)                                           # compute FFT
      mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
      ploc = PP.peakDetection(mX, hN, t)                
      pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
      iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)       # refine peak values

      if plot: specgram[:, n_frame] = mX[n_bins-1::-1]
      f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0

      hloc = np.zeros(nH)                                          # initialize harmonic locations
      hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
      hphase = np.zeros(nH)                                        # initialize harmonic phases
      hf = (f0>0) * (f0*np.arange(1, nH+1))                        # initialize harmonic frequencies
      hi = 0                                                       # initialize harmonic index
      npeaks = ploc.size                                           # number of peaks found
      
      while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
        dev = min(abs(iploc/N*fs - hf[hi]))
        pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
        if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
          hloc[hi] = iploc[pei]                                    # harmonic locations
          hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
          hphase[hi] = ipphase[pei]                                # harmonic phases
        hi += 1                                                    # increase harmonic index
      
      harmonics = np.float32(hloc)/N*fs
      hloc = (hloc!=0) * (hloc*Ns/N)                               # synth. locs
      
      ri = pin-hNs-1                                               # input sound pointer for residual analysis
      xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
      fftbuffer = np.zeros(Ns)                                     # reset buffer
      fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
      fftbuffer[hNs:] = xw2[:hNs]                            
      X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
      
      Xh = GS.genSpecSines(hloc, hmag, hphase, Ns)                 # generate sines
      Xr = X2-Xh                                                   # get the residual complex spectrum
      mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
      mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf

    #-----synthesis data-----
      
      yhloc = hloc                                                 # synthesis harmonics locs
      yhmag = hmag                                                 # synthesis harmonic amplitudes
      mYrenv = mXrenv                                              # synthesis residual envelope
      yf0 = f0  

    #-----transformations-----

    #-----synthesis-----
      
      yhphase += 2*np.pi * (lastyhloc+yhloc)/2/Ns*H                # propagate phases
      lastyhloc = yhloc 
      
      Yh = GS.genSpecSines(yhloc, yhmag, yhphase, Ns)              # generate spec sines 
      mYs = resample(mYrenv, hNs)                                  # interpolate to original size
      mYs = 10**(mYs/20)                                           # dB to linear magnitude  
      if f0>0:
          mYs *= np.cos(np.pi*np.arange(0, hNs)/Ns*fs/yf0)**2      # filter residual

      fc = 1+round(500.0/fs*Ns)                                    # 500 Hz
      mYs[:fc] *= (np.arange(0, fc)/(fc-1))**2                     # HPF
      pYs = 2*np.pi * np.random.rand(hNs)                          # generate phase random values
      
      Ys = np.zeros(Ns, dtype = complex)
      Ys[:hNs] = mYs * np.exp(1j*pYs)                              # generate positive freq.
      Ys[hNs+1:] = mYs[:0:-1] * np.exp(-1j*pYs[:0:-1])             # generate negative freq.

      fftbuffer = np.zeros(Ns)
      fftbuffer = np.real( ifft(Yh) )                            

      yhw[:hNs-1] = fftbuffer[hNs+1:]                              # sines in time domain using IFFT
      yhw[hNs-1:] = fftbuffer[:hNs+1] 

      fftbuffer = np.zeros(Ns)
      fftbuffer = np.real( ifft(Ys) )
      ysw[:hNs-1] = fftbuffer[hNs+1:]                              # stochastic in time domain using IFFT
      ysw[hNs-1:] = fftbuffer[:hNs+1]

      yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
      ys[ri:ri+Ns] += sws*ysw                                      # overlap-add for stoch

      #-----plotting-------
      
      if plot and n_frame>=nFrameStart and (n_frame%step == 0 or (pin+H)>pend):
        
      # clear all plots

        # clear only if not enough space to plot
        if pin > ax1.get_xlim()[1]*fs - (5.0*H) :
          clip_in = np.float32(pin) - 50.0*H
          clip_spec = pin/H - 50.0
          rect_zoom.remove()
          rect_zoom = patches.Rectangle((clip_in/fs, -2**7), width = (80.0*H)/fs, height = 2**15, color = 'black', alpha = 0.2)
          ax0.add_patch(rect_zoom)
          
          ax1.cla()
          ax1.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
          ax1.set_ylim(x.min(), x.max())
          ax1.set_title("Input Signal (x)", size = 9, fontweight = 'bold')
          ax1.locator_params(axis = 'y', nbins = 5)
          plt.setp(ax1.get_xticklabels(), visible = False)
          ax1.plot(time[:clip_in+80*H], x[:clip_in+80*H], 'b')
          
          ax2.cla()
          ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
          ax2.set_ylim(x.min(), x.max())
          ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
          ax2.locator_params(axis = 'y', nbins = 5)
          ax2.plot(time[:ri], yh[:ri], 'b')
          
          ax5.set_xlim(clip_spec, clip_spec+80)

        ax3.cla()
        ax3.set_title("Original spectrum (mX, iploc, ipmag, f0, hloc, hmag)", size = 9, fontweight = 'bold')
        ax3.set_xlabel("Frequency (Hz)", size = 8)
        ax3.set_ylabel("Amplitude (dB)", size = 8)
        ax3.set_xlim(0, freq_range)
        ax3.set_ylim(-100, 0)

        ax4.cla()
        ax4.set_title("Harmonic plus residual spectrum (mXh, mXr, mX2)", size = 9, fontweight = 'bold')
        ax4.set_xlabel("Frequency (Hz)", size = 8)
        ax4.set_ylabel("Amplitude (dB)", size = 8)
        ax4.set_xlim(0, freq_range)
        ax4.set_ylim(-100, 0)

      # plot all the information of the current sample

        rect = patches.Rectangle((np.float32(pin-hM)/fs, -2**7), width = np.float32(w.size)/fs, height = 2**15, color = 'blue', alpha = 0.5)
        ax1.add_patch(rect) 
        if process: plt.draw()

        ax3.plot(freq, mX[:n_bins], 'b')                                              # plot spectrum
        ax3.fill_between(freq, -200, mX[:n_bins], facecolor = 'blue', alpha = 0.3) 
        if process: plt.draw()
        ax3.plot(np.float32(iploc[:n_bins])/N*fs, ipmag[:n_bins], 'rx', ms = 3)       # plot interpolated peak locations
        if process: plt.draw()

        ax5.imshow(specgram, interpolation = 'nearest', extent = (0, pend/H, 0, freq_range), aspect = 'auto', cmap = 'jet', vmin = -100, vmax = -20)
        if process: plt.draw()

        if f0 > 0:                                                    # plot f0
          loc = np.where(iploc/N*fs == f0)[0] 
          if loc.size == 0: loc = np.argmin(np.abs(iploc/N*fs-f0))    # closest peak location
          ax3.plot(f0, ipmag[loc], 'go', ms = 4)                      # plot in spectrum
          if prev_f0 != 0 and f0 != 0:                                # plot in spectrogram
            ax5.plot([n_frame-0.5, n_frame+0.5], [prev_f0, f0], '-or', ms = 3, mfc = 'green', lw = 1.6)
          elif prev_f0 == 0 and f0 != 0:                              # initialize new line of f0's
            ax5.plot(n_frame+0.5, f0, 'or', ms = 3, mfc = 'green')
          if process: plt.draw()

        if step == 1: prev_f0 = f0                # save prev. f0 only if we are not rewinding plots

        if f0 > 0: ax3.plot(harmonics[1:], hmag[1:], 'o', ms = 3, mfc = 'yellow')   # plot harmonics
        for i in range(1, nH-1):
          if prev_harmonics[i] != 0 and harmonics[i] != 0: 
            ax5.plot([n_frame-0.5, n_frame+0.5], [prev_harmonics[i], harmonics[i]], '-og', ms = 2.5, mfc = 'yellow', lw = 1.3)
          elif prev_harmonics[i] == 0 and harmonics[i] != 0:          # initialize new line of harmonics
            ax5.plot(n_frame+0.5, harmonics[i], 'og', ms = 2.5, mfc = 'yellow')
        
        if process: plt.draw()

        if step == 1: prev_harmonics = harmonics                     # save prev. harmonics only if we are not rewinding plots

        mX2 = 20 * np.log10( abs(X2[:hNs]) )                         # magnitude spectrum of positive frequencies
        mX2 = resample(np.maximum(-200, mX2), hN)
        ax4.plot(freq[:n_bins], mX2[:n_bins], 'b', alpha = 0.3)
        ax4.fill_between(freq[:n_bins], -200, mX2[:n_bins], facecolor = 'blue', alpha = 0.1)
        if process: plt.draw()

        mXh = 20 * np.log10( abs(Xh[:hNs]) )                         # magnitude spectrum of positive frequencies
        mXh = resample(np.maximum(-200, mXh), hN)
        ax4.plot(freq[:n_bins], mXh[:n_bins], 'g')
        ax4.fill_between(freq[:n_bins], -200, mXh[:n_bins], facecolor = 'green', alpha = 0.4)
        if process: plt.draw()

        mXr = resample(np.maximum(-200, mXr), hN)
        ax4.plot(freq[:n_bins], mXr[:n_bins], 'r', alpha = 0.3)
        ax4.fill_between(freq[:n_bins], -200, mXr[:n_bins], facecolor = 'red', alpha = 0.1)
        if process: plt.draw()

        rect2 = patches.Rectangle((np.float32(ri)/fs, -2**7), width = np.float32(Ns)/fs, height = 2**15, color = 'green', alpha = 0.3)
        ax2.cla()
        ax2.set_xlim(clip_in/fs, ((80.0*H)+clip_in)/fs)
        ax2.set_ylim(x.min(), x.max())
        ax2.set_title("Output Signal (yh)", size = 9, fontweight = 'bold')
        ax2.locator_params(axis = 'y', nbins = 5)
        ax2.add_patch(rect2)  
        ax2.plot(time[:ri+Ns], yh[:ri+Ns], 'b')
        plt.draw()
        rect2.remove()
        rect.remove()
        
      n_frame += 1                                                 # increment number of frames analyzed
      pin += H                                                     # advance sound pointer


    self.emit(SIGNAL("hpsDone(object, object, object, int)"), y, yh, ys, fs) 
Esempio n. 14
0
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf, maxnpeaksTwm=10):
  # Analysis/synthesis of a sound using the harmonic plus stochastic model
  # x: input sound, fs: sampling rate, w: analysis window, 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
  # maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # maxhd: max. relative deviation in harmonic detection (ex: .2)
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # maxnpeaksTwm: maximum number of peaks used for F0 detection
  # y: output sound, yh: harmonic component, yr: residual component

  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sound frame
  ystw = np.zeros(Ns)                                            # initialize output sound frame
  yh = np.zeros(x.size)                                         # initialize output array
  yst = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

  while pin<pend:  
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10(abs(X[:hN]))                              # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap(np.angle(X[:hN]))                             # unwrapped phase spect. of positive freq.    
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
    
    f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0, maxnpeaksTwm)  # find f0
    hloc = np.zeros(nH)                                          # initialize harmonic locations
    hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
    hphase = np.zeros(nH)                                        # initialize harmonic phases
    hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
    hi = 0                                                       # initialize harmonic index
    npeaks = ploc.size;                                          # number of peaks found
    
    while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    
    hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N)                # synth. locs
    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xr[:hNs]                           
    Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
    Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns)            # generate spec sines of harmonic component          
    Yr = Xr-Yh;                                                  # get the residual complex spectrum
    mYr = 20 * np.log10(abs(Yr[:hNs]) )                          # magnitude spectrum of residual
    mYrenv = resample(np.maximum(-200, mYr), mYr.size*stocf)     # decimate the magnitude spectrum and avoid -Inf                     
    mYst = resample(mYrenv, hNs)                                 # interpolate to original size
    mYst = 10**(mYst/20)                                         # dB to linear magnitude  
    fc = 1+round(500.0/fs*Ns)                                    # 500 Hz to bin location
    mYst[:fc] *= (np.arange(0, fc)/(fc-1))**2                    # high pass filter the stochastic component
    pYst = 2*np.pi*np.random.rand(hNs)                           # generate phase random values
    Yst = np.zeros(Ns, dtype = complex)
    Yst[:hNs] = mYst * np.exp(1j*pYst)                           # generate positive freq.
    Yst[hNs+1:] = mYst[:0:-1] * np.exp(-1j*pYst[:0:-1])          # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Yh))                                # inverse FFT of harmonic spectrum
    yhw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Yst))                                # inverse FFT of residual spectrum
    ystw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    ystw[hNs-1:] = fftbuffer[:hNs+1]
    
    yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
    yst[ri:ri+Ns] += sw*ystw                                     # overlap-add for residual
    pin += H                                                     # advance sound pointer
  
  y = yh+yst                                                     # sum of harmonic and residual components
  return y, yh, yst 
Esempio n. 15
0
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd):
  # Analysis/synthesis of a sound using the harmonic plus residual model
  # x: input sound, fs: sampling rate, w: analysis window (odd size), 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
  # maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # maxhd: max. relative deviation in harmonic detection (ex: .2)
  # y: output sound, yh: harmonic component, yr: residual component

  hN = N/2                                                      # size of positive spectrum
  hM = (w.size+1)/2                                             # half analysis window size
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM)                                            # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM)                                  # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sound frame
  yrw = np.zeros(Ns)                                            # initialize output sound frame
  yh = np.zeros(x.size)                                         # initialize output array
  yr = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H);                                             # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

  while pin<pend:       
            
  #-----analysis-----             
    xw = x[pin-hM:pin+hM-1] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM] = xw[hM-1:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM+1:] = xw[:hM-1]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.    
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
    
    f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
    hloc = np.zeros(nH)                                          # initialize harmonic locations
    hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
    hphase = np.zeros(nH)                                        # initialize harmonic phases
    hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
    hi = 0                                                       # initialize harmonic index
    npeaks = ploc.size;                                          # number of peaks found
    
    while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    
    hloc[:hi] = (hloc[:hi]!=0) * (hloc[:hi]*Ns/N)                # synth. locs
    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xr[:hNs]                           
    Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
    Yh = GS.genSpecSines(hloc[:hi], hmag, hphase, Ns)               # generate spec sines          
    Yr = Xr-Yh;                                                  # get the residual complex spectrum
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yh) )                              # inverse FFT
    yhw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yr) )
    yrw[:hNs-1] = fftbuffer[hNs+1:]                              # residual in time domain using inverse FFT
    yrw[hNs-1:] = fftbuffer[:hNs+1]
    
    yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
    yr[ri:ri+Ns] += sw*yrw                                       # overlap-add for residual
    pin += H                                                     # advance sound pointer
  
  y = yh+yr
  return y, yh, yr
def harmonicModelPlot(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, maxFreq):
    hN = N/2                                                      # size of positive spectrum
    hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
    hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
    Ns = 4000                                                      # FFT size for synthesis (even)
    H = Ns/4                                                      # Hop size used for analysis and synthesis
    hNs = Ns/2      
    pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
    pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
    fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
    yh = np.zeros(Ns)                                             # initialize output sound frame
    y = np.zeros(x.size)                                          # initialize output array
    w = w / sum(w)                                                # normalize analysis window
    numFrames = int(math.floor(pend/float(H)))
    frmNum = 0
    frmTime = []
    lastBin = N*maxFreq/float(fs)
    binFreq = np.arange(lastBin)*float(fs)/N       # The bin frequencies
    
    while pin<pend:                                         # while sound pointer is smaller than last sample    
        frmTime.append(pin/float(fs))         
        xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
        fftbuffer = np.zeros(N)                                      # reset buffer
        fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
        fftbuffer[N-hM2:] = xw[:hM2]                           
        X = fft(fftbuffer)                                           # compute FFT
        mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
        ploc = PP.peakDetection(mX, hN, t)                           # detect peak locations
        pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.     
        iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
    
        f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
        hloc = np.zeros(nH)                                          # initialize harmonic locations
        hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
        hphase = np.zeros(nH)                                        # initialize harmonic phases
        hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
        hi = 0                                                       # initialize harmonic index
        npeaks = ploc.size                                           # number of peaks found
        while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
            dev = min(abs(iploc/N*fs - hf[hi]))
            pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
            if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
                hloc[hi] = iploc[pei]                                    # harmonic locations
                hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
                hphase[hi] = ipphase[pei]                                # harmonic phases
            hi += 1                                                    # increase harmonic index
        if frmNum == 0:                                       # Accumulate and store STFT
            YSpec = np.transpose(np.array([mX[:lastBin]]))
            ind1 = np.where(hloc>0)[0]
            ind2 = np.where(hloc<=lastBin)[0]
            ind = list((set(ind1.tolist())&set(ind2.tolist())))
            final_peaks = hloc[ind]
            parray = np.zeros([final_peaks.size,2])
            parray[:,0]=pin/float(fs)
            parray[:,1]=final_peaks*float(fs)/N
            specPeaks = parray
        else:
            YSpec = np.hstack((YSpec,np.transpose(np.array([mX[:lastBin]]))))
            ind1 = np.where(hloc>0)[0]
            ind2 = np.where(hloc<=lastBin)[0]
            ind = list((set(ind1.tolist())&set(ind2.tolist())))
            final_peaks = hloc[ind]
            parray = np.zeros([final_peaks.size,2])
            parray[:,0]=pin/float(fs)
            parray[:,1]=final_peaks*float(fs)/N
            specPeaks = np.append(specPeaks, parray,axis=0)
        pin += H
        frmNum += 1
    frmTime = np.array(frmTime)                               # The time at the centre of the frames
    plt.hold(True)
    plt.pcolormesh(frmTime,binFreq,YSpec)
    plt.scatter(specPeaks[:,0]+(0.5*H/float(fs)), specPeaks[:,1], s=10, marker='x')
    plt.xlabel('Time(s)')
    plt.ylabel('Frequency(Hz)')
    plt.autoscale(tight=True)
    plt.show()
    return YSpec
Esempio n. 17
0
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, maxhd, stocf) :
  # Analysis/synthesis of a sound using the harmonic plus stochastic model
  # x: input sound, fs: sampling rate, w: analysis window (odd size), 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, 
  # maxf0: maximim f0 frequency in Hz, 
  # f0et: error threshold in the f0 detection (ex: 5),
  # maxhd: max. relative deviation in harmonic detection (ex: .2)
  # stocf: decimation factor of mag spectrum for stochastic analysis
  # y: output sound, yh: harmonic component, ys: stochastic component
  
  x = np.float32(x) / (2**15)                                   # normalize input signal

  fig = plt.figure(figsize = (10.5, 6.5), dpi = 100)
  ax1 = plt.subplot2grid((4, 6), (0, 1), colspan = 4)
  ax1.set_position([0.10, 0.77, 0.8, 0.16])
  ax1.set_xlim(0, 10000)
  ax1.set_ylim(x.min(), x.max())
  ax1.set_title("Input Signal")
  plt.setp(ax1.get_xticklabels(), visible = False)
  
  ax2 = plt.subplot2grid((4, 6), (1, 1), colspan = 4, sharex = ax1, sharey = ax1)
  ax2.set_position([0.10, 0.55, 0.8, 0.16])
  ax2.set_xlim(0, 10000)
  ax2.set_ylim(x.min(), x.max())
  ax2.set_title("Output Signal")

  ax3 = plt.subplot2grid((4, 6), (2, 0), rowspan = 2, colspan = 2)
  ax3.set_position([0.05, 0.08, 0.35, 0.35])
  ax3.set_title("Frame")
  ax3.set_xlim(0, w.size)
  
  # ax4 = plt.subplot2grid((4, 4), (2, 1), rowspan = 2)
  # plt.title("Windowed")

  ax5 = plt.subplot2grid((4, 6), (2, 3), rowspan = 2, colspan = 4)
  ax5.set_position([0.47, 0.08, 0.5, 0.35])
  ax5.set_title("Spectrum")
  ax5.set_xlabel("Frequency (Hz)")
  ax5.set_ylabel("Amplitude (dB)")
  ax5.set_xlim(0, fs/2)

  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  yhw = np.zeros(Ns)                                            # initialize output sound frame
  ysw = np.zeros(Ns)                                            # initialize output sound frame
  yh = np.zeros(x.size)                                         # initialize output array
  ys = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]
  sws = H*hanning(Ns)/2                                         # synthesis window for stochastic
  lastyhloc = np.zeros(nH)                                      # initialize synthesis harmonic locations
  yhphase = 2*np.pi * np.random.rand(nH)                        # initialize synthesis harmonic phases     

  ax1.plot(x[:10000])
  plt.draw()

  while pin<pend:       
    
    rect = patches.Rectangle((pin-hM1, -2**7), width = w.size, height = 2**15, color = 'red', alpha = 0.3)
    ax1.add_patch(rect)  
    plt.draw()
    rect.remove()
  
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]                           
    X = fft(fftbuffer)                                           # compute FFT

    ax3.cla()
    ax3.set_title("Frame")
    ax3.plot(x[pin-hM1:pin+hM2])
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()
    ax3.set_ylim(ax3.get_ylim())
    ax3.plot(w, 'r')
    plt.draw()
    
    ax3.cla()
    ax3.set_title("Windowed Frame")
    ax3.plot(xw, 'b')
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()

    ax3.cla()
    ax3.set_title("Windowed Frame zero-phase")
    ax3.plot(fftbuffer, 'b')
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()

    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10( abs(X[:hN]) )                            # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                 
    pX = np.unwrap( np.angle(X[:hN]) )                           # unwrapped phase spect. of positive freq.    
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)            # refine peak values
    
    freq = np.arange(0, fs/2, fs/N)                              # frequency axis in Hz
    freq = freq[:freq.size-1]
    ax5.cla()
    ax5.set_title("Spectrum")
    ax5.set_xlabel("Frequency (Hz)")
    ax5.set_ylabel("Amplitude (dB)")
    ax5.set_xlim(0, fs/2)
    ax5.plot(freq, mX, 'b')
    ax5.set_ylim(ax5.get_ylim())
    ax5.fill_between(freq, ax5.get_ylim()[0], mX, facecolor = 'blue', alpha = 0.3)
    plt.draw()
    ax5.plot(np.float32(iploc)/N*fs, ipmag, 'ro', ms = 4, alpha = 0.4)
    plt.draw()  

    f0 = fd.f0DetectionTwm(iploc, ipmag, N, fs, f0et, minf0, maxf0)  # find f0
    
    if f0 > 0:
      loc = np.where(iploc/N*fs == f0)[0]
      if loc.size == 0: loc = np.argmin(np.abs(iploc/N*fs-f0))   # closest peak location
      ax5.plot(f0, ipmag[loc], 'go', ms = 4, alpha = 1)
      plt.draw()
    
    hloc = np.zeros(nH)                                          # initialize harmonic locations
    hmag = np.zeros(nH)-100                                      # initialize harmonic magnitudes
    hphase = np.zeros(nH)                                        # initialize harmonic phases
    hf = (f0>0)*(f0*np.arange(1, nH+1))                          # initialize harmonic frequencies
    hi = 0                                                       # initialize harmonic index
    npeaks = ploc.size                                           # number of peaks found

    while f0>0 and hi<nH and hf[hi]<fs/2 :                       # find harmonic peaks
      dev = min(abs(iploc/N*fs - hf[hi]))
      pei = np.argmin(abs(iploc/N*fs - hf[hi]))                  # closest peak
      if ( hi==0 or not any(hloc[:hi]==iploc[pei]) ) and dev<maxhd*hf[hi] :
        hloc[hi] = iploc[pei]                                    # harmonic locations
        hmag[hi] = ipmag[pei]                                    # harmonic magnitudes
        hphase[hi] = ipphase[pei]                                # harmonic phases
      hi += 1                                                    # increase harmonic index
    
    ax5.plot(np.float32(hloc)/N*fs, hmag, 'yo', ms = 4, alpha = 0.7)
    plt.draw()
    hloc = (hloc!=0) * (hloc*Ns/N)                               # synth. locs

    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xw2 = x[ri:ri+Ns]*wr                                         # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xw2[hNs:]                                  # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xw2[:hNs]              
    X2 = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
    Yh = GS.genSpecSines(hloc, hmag, hphase, Ns)                    # generate spec sines          
    Xr = X2-Yh                                                   # get the residual complex spectrum
    mXr = 20 * np.log10( abs(Xr[:hNs]) )                         # magnitude spectrum of residual
    mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)     # decimate the magnitude spectrum

    mYs = resample(mXrenv, hNs)                                  # interpolate to original size
    pYs = 2*np.pi*np.random.rand(hNs)                            # generate phase random values

    Ys = np.zeros(Ns, dtype = complex)
    Ys[:hNs] = 10**(mYs/20) * np.exp(1j*pYs)                     # generate positive freq.
    Ys[hNs+1:] = 10**(mYs[:0:-1]/20) * np.exp(-1j*pYs[:0:-1])    # generate negative freq.

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Yh) )                              # inverse FFT
    ax3.cla()
    ax3.set_title("Reconstructed Frame")
    ax3.plot(fftbuffer, 'g')
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()

    yhw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    yhw[hNs-1:] = fftbuffer[:hNs+1] 
    
    ax3.cla()
    ax3.set_title("Reconstructed Frame")
    ax3.plot(yhw, 'g')
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real( ifft(Ys) )
    ysw[:hNs-1] = fftbuffer[hNs+1:]                              # residual in time domain using inverse FFT
    ysw[hNs-1:] = fftbuffer[:hNs+1]

    yh[ri:ri+Ns] += sw*yhw                                       # overlap-add for sines
    ys[ri:ri+Ns] += sws*ysw                                      # overlap-add for stochastic
    pin += H                                                     # advance sound pointer
    
    ax3.cla()
    ax3.set_title("Reconstructed Frame")
    ax3.plot(sw*yhw, 'g')
    ax3.set_xlim(0, w.size)
    ax3.ticklabel_format(scilimits = (-3,3))                     # use scientific limits above 1e3
    plt.draw()

    rect2 = patches.Rectangle((pin-hM1, -2**7), width = Ns, height = 2**15, color = 'green', alpha = 0.3)
    ax2.cla()
    ax2.set_xlim(0, 10000)
    ax2.set_ylim(x.min(), x.max())
    ax2.set_title("Output Signal")
    ax2.add_patch(rect2)  
    ax2.plot(yh, 'b')
    plt.draw()
    rect2.remove()

  y = yh+ys
  return y, yh, ys
Esempio n. 18
0
def sprModel(x, fs, w, N, t):
  # Analysis/synthesis of a sound using the sinusoidal plus residual model
  # x: input sound, fs: sampling rate, w: analysis window, 
  # N: FFT size (minimum 512), t: threshold in negative dB, 
  # y: output sound, ys: sinusoidal component, yr: residual component

  hN = N/2                                                      # size of positive spectrum
  hM1 = int(math.floor((w.size+1)/2))                           # half analysis window size by rounding
  hM2 = int(math.floor(w.size/2))                               # half analysis window size by floor
  Ns = 512                                                      # FFT size for synthesis (even)
  H = Ns/4                                                      # Hop size used for analysis and synthesis
  hNs = Ns/2      
  pin = max(hNs, hM1)                                           # initialize sound pointer in middle of analysis window          
  pend = x.size - max(hNs, hM1)                                 # last sample to start a frame
  fftbuffer = np.zeros(N)                                       # initialize buffer for FFT
  ysw = np.zeros(Ns)                                            # initialize output sound frame
  yrw = np.zeros(Ns)                                            # initialize output sound frame
  ys = np.zeros(x.size)                                         # initialize output array
  yr = np.zeros(x.size)                                         # initialize output array
  w = w / sum(w)                                                # normalize analysis window
  sw = np.zeros(Ns)     
  ow = triang(2*H)                                              # overlapping window
  sw[hNs-H:hNs+H] = ow      
  bh = blackmanharris(Ns)                                       # synthesis window
  bh = bh / sum(bh)                                             # normalize synthesis window
  wr = bh                                                       # window for residual
  sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H]

  while pin<pend:  
  #-----analysis-----             
    xw = x[pin-hM1:pin+hM2] * w                                  # window the input sound
    fftbuffer = np.zeros(N)                                      # reset buffer
    fftbuffer[:hM1] = xw[hM2:]                                   # zero-phase window in fftbuffer
    fftbuffer[N-hM2:] = xw[:hM2]                           
    X = fft(fftbuffer)                                           # compute FFT
    mX = 20 * np.log10(abs(X[:hN]))                              # magnitude spectrum of positive frequencies
    ploc = PP.peakDetection(mX, hN, t)                
    pX = np.unwrap(np.angle(X[:hN]))                             # unwrapped phase spect. of positive freq.    
    iploc, ipmag, ipphase = PP.peakInterp(mX, pX, ploc)          # refine peak values
        
    iploc = (iploc!=0) * (iploc*Ns/N)                            # synth. locs
    ri = pin-hNs-1                                               # input sound pointer for residual analysis
    xr = x[ri:ri+Ns]*wr                                          # window the input sound                                       
    fftbuffer = np.zeros(Ns)                                     # reset buffer
    fftbuffer[:hNs] = xr[hNs:]                                   # zero-phase window in fftbuffer
    fftbuffer[hNs:] = xr[:hNs]                           
    Xr = fft(fftbuffer)                                          # compute FFT for residual analysis
  
  #-----synthesis-----
    Ys = GS.genSpecSines(iploc, ipmag, ipphase, Ns)              # generate spec of sinusoidal component          
    Yr = Xr-Ys;                                                  # get the residual complex spectrum

    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Ys))                                # inverse FFT of sinusoidal spectrum
    ysw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    ysw[hNs-1:] = fftbuffer[:hNs+1] 
    
    fftbuffer = np.zeros(Ns)
    fftbuffer = np.real(ifft(Yr))                                # inverse FFT of residual spectrum
    yrw[:hNs-1] = fftbuffer[hNs+1:]                              # undo zero-phase window
    yrw[hNs-1:] = fftbuffer[:hNs+1]
    
    ys[ri:ri+Ns] += sw*ysw                                       # overlap-add for sines
    yr[ri:ri+Ns] += sw*yrw                                       # overlap-add for residual
    pin += H                                                     # advance sound pointer
  
  y = ys+yr                                                      # sum of sinusoidal and residual components
  return y, ys, yr