def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component sws = H * hanning(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components return y, yh, yst
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2-Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = yh+xr # sum of harmonic and residual components return y, yh, xr
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # synthesis window for harmonic component sws = H*hanning(Ns)/2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2-Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh+yst # sum of harmonic and stochastic components return y, yh, yst
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft( fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2 - Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H # advance sound pointer y = yh + xr # sum of harmonic and residual components return y, yh, xr
f0et = 5 maxnpeaksTwm = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns//4 x1 = x[pos-hM1:pos+hM2] x2 = x[pos-Ns//2-1:pos+Ns//2-1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0) hfreqp = [] hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, hfreqp, fs, harmDevSlope) Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) mYh = 20 * np.log10(abs(Yh[:Ns//2])) pYh = np.unwrap(np.angle(Yh[:Ns//2])) bh=blackmanharris(Ns) X2 = fft(fftshift(x2*bh/sum(bh))) Xr = X2-Yh mXr = 20 * np.log10(abs(Xr[:Ns//2])) pXr = np.unwrap(np.angle(Xr[:Ns//2])) xrw = np.real(fftshift(ifft(Xr))) * H * 2 yhw = np.real(fftshift(ifft(Yh))) * H * 2 maxplotfreq = 8000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(3,2,1)
f0et = 5 nH = 60 harmDevSlope = .001 stocf = .4 # stochastic approx. factor w = get_window('blackman', M) hM1 = int(math.floor((M+1)/2)) hM2 = int(math.floor(M/2)) x1 = x[pin-hM1:pin+hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) wr = get_window('blackmanharris', Ns) xw2 = x[pin-hNs-1:pin+hNs-1] * wr / sum(wr) fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:hNs])) # converting to a DB scale mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf)
def synthesis(self, harmonicsOutputFilename = None, residualOutputFilename = None): if len(self.pitch) == 0: print 'please do getMelody at first, then do saveMelody.' return if len(self.pitch) < len(self.mX): print 'please make sure that the pitch track belongs to the loaded audio, and they are equal length.' return if len(self.pitch) > len(self.mX): print 'pitch track has more frames than audio file, we will cut frames surplus in pitch track.' self.pitch = self.pitch[:len(self.mX)] if harmonicsOutputFilename == None: harmonicsOutputFilename = self.inputFilename[:-4] + '-harmonics.wav' if residualOutputFilename == None: residualOutputFilename = self.inputFilename[:-4] + '-residual.wav' #----- synthesis code----- H = self.hopSize M = self.frameSize N = 2*self.frameSize fs = self.fs t = -60 # threshold peak detection devRatio = 10 nH = 15 x = self.audio winAnalysis = 'hann' w = get_window(winAnalysis, M) hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 4*H # FFT size for synthesis (even) hNs = Ns/2 startApp = max(hNs, hM1) # init sound pointer in middle of anal window pin = startApp pend = x.size - startApp # last sample to start a frame x = np.append(np.zeros(startApp),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(startApp)) # add zeros at the end to analyze last sample fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame yho = np.zeros(x.size) # initialize output array harmonics xr = np.zeros(Ns) # initialize output sound frame xro = np.zeros(x.size) # initialize output array residual w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 cnt = 0 print 'synthesizing ... ...' while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame x2 = x[pin-hNs-1:pin+hNs-1] mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = self.pitch[cnt] if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, devRatio) # find harmonics hfreqp = hfreq #-----synthesis----- #-----harmonics----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] yho[pin-hNs:pin+hNs] += sw*yh # overlap-add #-----residual----- X2 = fft(fftshift(x2*bh)) Xr = X2 - Yh fftbuffer = np.real(ifft(Xr)) # inverse FFT xr[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xr[hNs-1:] = fftbuffer[:hNs+1] xro[pin-hNs:pin+hNs] += sw*xr # overlap-add pin += H # advance sound pointer cnt+=1 yho = np.delete(yho, range(startApp)) # delete half of first window which was added in stftAnal yho = np.delete(yho, range(yho.size-startApp, yho.size)) # add zeros at the end to analyze last sample xro = np.delete(xro, range(startApp)) # delete half of first window which was added in stftAnal xro = np.delete(xro, range(xro.size-startApp, xro.size)) # add zeros at the end to analyze last sample UF.wavwrite(yho, fs, harmonicsOutputFilename) UF.wavwrite(xro, fs, residualOutputFilename) print('synthesis done, harmonics file is saved at :' + harmonicsOutputFilename + '\n' + 'residual file is saved at :' + residualOutputFilename + '\n') self.yho = yho self.xro = xro return (yho, xro)
def loudnessHarmonics (fileName, dumpFile = False): eps = np.finfo(np.float).eps path2SmsTools = '../../sms-tools-master' path2Models = os.path.join(path2SmsTools, 'software/models') sys.path.append(path2Models) import utilFunctions as UF import harmonicModel as HM import dftModel as DFT # Computing predominant melody H = 128 M = 2048 fs = 44100 guessUnvoiced = True MELODIA = ess.PredominantMelody(guessUnvoiced=guessUnvoiced, frameSize=M, hopSize=H) audio = ess.MonoLoader(filename = fileName, sampleRate=fs) () audioEL = ess.EqualLoudness() (audio) pitch = MELODIA(audioEL)[0] # Computing loudness including harmonics LOUDNESS = ess.Loudness() winAnalysis = 'hann' t = -80 harmLoudness = [] ## Synthesis nH = 15 f0et = 5 x = audioEL w = get_window(winAnalysis, M) hM1 = int(math.floor(w.size+1)/2) hM2 = int(math.floor(w.size/2)) Ns = 4*H hNs = Ns/2 startApp = max(hNs, hM1) pin = startApp pend = x.size - startApp x = np.append(np.zeros(startApp), x) x = np.append(x, np.zeros(startApp)) N = 2 * M fftbuffer = np.zeros(N) yh = np.zeros(Ns) y = np.zeros(x.size) w = w / sum(w) sw = np.zeros(Ns) ow = triang(2 * H) sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) bh = bh / sum(bh) sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 cnt = 0 while pin < pend: x1 = x[pin-hM1:pin+hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF. peakInterp(mX, pX, ploc) ipfreq = fs * iploc/N f0t = pitch[cnt] if ((f0stable == 0) & (f0t>0) or ((f0stable>0) & (np.abs(f0stable-f0t)<f0stable/5.0))): f0stable = f0t else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) hfreqp = hfreq Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) fftbuffer = np.real(ifft(Yh)) yh[:hNs-1] = fftbuffer[hNs+1:] yh[hNs-1:] = fftbuffer[:hNs+1] yh_frame = sw*yh y[pin-hNs:pin+hNs] += yh_frame pin += H cnt+=1 harmLoudness.append(LOUDNESS(yh_frame.tolist())) harmLoudness = np.array(harmLoudness) timeStamps = np.arange(harmLoudness.size) * H / float(fs) # Plotting # plt.plot(timeStamps, harmLoudness, color = 'b', linewidth=1) # plt.xlabel('Time (s)') # plt.ylabel('Amplitude') # plt.show() loudnessData = np.column_stack((timeStamps, harmLoudness)) # Dumping a csv file if dumpFile: np.savetxt(fileName[:-4] + '-loudnessHarmonics.csv', loudnessData, delimiter=',') return loudnessData