def f0Detection(stftMx, stftPx, fs, N, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if (minf0 < 0): # raise exception if minf0 is smaller than 0 raise ValueError( "Minumum fundamental frequency (minf0) smaller than 0") if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 raise ValueError( "Maximum fundamental frequency (maxf0) bigger than 10000Hz") f0 = [] # initialize f0 output f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable for i in range(np.shape(stftMx)[0]): mX = stftMx[i, :] pX = stftPx[i, :] ploc = pdc.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = pdc.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array return f0
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yh[hNs-1:] = fftbuffer[:hNs+1] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def harmonicModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the sinusoidal harmonic model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns y: output array sound """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # init sound pointer in middle of anal window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yh = np.zeros(Ns) # initialize output sound frame y = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) # initialize synthesis window ow = triang(2*H) # overlapping window sw[int(hNs-H):int(hNs+H)] = int(ow) bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window sw[int(hNs-H):int(hNs+H)] = sw[int(hNs-H):int(hNs+H)] / bh[int(hNs-H):int(hNs+H)] # window for overlap-add hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines fftbuffer = np.real(ifft(Yh)) # inverse FFT yh[:int(hNs-1)] = fftbuffer[int(hNs+1):] # undo zero-phase window yh[int(hNs-1):] = fftbuffer[:int(hNs+1)] y[pin-hNs:pin+hNs] += sw*yh # overlap-add pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size-hM1, y.size)) # add zeros at the end to analyze last sample return y
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if (minf0 < 0): # raise exception if minf0 is smaller than 0 raise ValueError( "Minumum fundamental frequency (minf0) smaller than 0") if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 raise ValueError( "Maximum fundamental frequency (maxf0) bigger than 10000Hz") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append( np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] # initialize f0 output f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin < pend: x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array pin += H # advance sound pointer return f0
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N / 2 # size of positive spectrum hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin <= pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection( ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = SM.cleaningSineTracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def proc_frame(self, frame): self.frames = np.append(self.frames, frame) pend = self.frames.size - self.hM1 # initialize f0 track f0t = 0 # initialize f0 stable f0stable = 0 while self.pin < pend: # select frame x1 = self.frames[self.pin - self.hM1:self.pin + self.hM2] # compute dft mX, pX = DFT.dftAnal(x1, self.w, self.N) if self.pin == self.hM1: self.magnitudes = mX self.phases = pX else: self.magnitudes = np.vstack((self.magnitudes, mX)) self.phases = np.vstack((self.phases, mX)) # detect peak locations ploc = UF.peakDetection(mX, self.t) # refine peak values iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # convert locations to Hz ipfreq = self.fs * iploc / self.N # find f0 f0t = UF.f0Twm(ipfreq, ipmag, self.f0et, \ self.minf0, self.maxf0, f0stable) if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): # consider a stable f0 if it is close to the previous one f0stable = f0t else: f0stable = 0 self.fundamentals = np.append(self.fundamentals, f0t) self.fundamentals_file.write('%f\t%f\n' % (self.cur_time, f0t)) self.pin += self.H self.cur_time += 1.0 * self.H / self.fs if self.fundamentals.shape[0] > self.MAX_BUF: self.fundamentals = self.fundamentals[-self.MAX_BUF:] self.magnitudes = self.magnitudes[-self.MAX_BUF:] self.phases = self.phases[-self.MAX_BUF:] if self.frames.shape[0] > self.fs: self.pin -= self.frames.shape[0] - self.fs self.frames = self.frames[-self.fs:]
def harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if (minSineDur <0): # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin<=pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq,np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = SM.cleaningSineTracks(xhfreq, round(fs*minSineDur/H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if (minf0 < 0): # raise exception if minf0 is smaller than 0 raise ValueError("Minumum fundamental frequency (minf0) smaller than 0") if (maxf0 >= 10000): # raise exception if maxf0 is bigger than fs/2 raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") if (H <= 0): # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor x = np.append(np.zeros(hM2),x) # add zeros at beginning to center first window at sample 0 x = np.append(x,np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT w = w / sum(w) # normalize analysis window f0 = [] # initialize f0 output f0t = 0 # initialize f0 track f0stable = 0 # initialize f0 stable while pin<pend: x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hez f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array pin += H # advance sound pointer return f0
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] # synthesis window for harmonic component sws = H * hanning(Ns) / 2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2 - Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample( np.maximum(-200, mXr), mXr.size * stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2 * np.pi * np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype=complex) Yst[:hNs] = 10**(stocEnv / 20) * np.exp( 1j * pYst) # generate positive freq. Yst[hNs + 1:] = 10**(stocEnv[:0:-1] / 20) * np.exp( -1j * pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window ystw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines yst[ri:ri + Ns] += sws * ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh + yst # sum of harmonic and stochastic components return y, yh, yst
def hpsModel(x, fs, w, N, t, nH, minf0, maxf0, f0et, stocf): """ Analysis/synthesis of a sound using the harmonic plus stochastic model, one frame at a time, no harmonic tracking x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz; maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5); stocf: decimation factor of mag spectrum for stochastic analysis returns y: output sound, yh: harmonic component, yst: stochastic component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame ystw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array yst = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] # synthesis window for harmonic component sws = H*hanning(Ns)/2 # synthesis window for stochastic hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert peak locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate spec sines of harmonic component Xr = X2-Yh # get the residual complex spectrum mXr = 20 * np.log10(abs(Xr[:hNs])) # magnitude spectrum of residual mXrenv = resample(np.maximum(-200, mXr), mXr.size*stocf) # decimate the magnitude spectrum and avoid -Inf stocEnv = resample(mXrenv, hNs) # interpolate to original size pYst = 2*np.pi*np.random.rand(hNs) # generate phase random values Yst = np.zeros(Ns, dtype = complex) Yst[:hNs] = 10**(stocEnv/20) * np.exp(1j*pYst) # generate positive freq. Yst[hNs+1:] = 10**(stocEnv[:0:-1]/20) * np.exp(-1j*pYst[:0:-1]) # generate negative freq. fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yst)) # inverse FFT of stochastic spectrum ystw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window ystw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines yst[ri:ri+Ns] += sws*ystw # overlap-add for stochastic pin += H # advance sound pointer y = yh+yst # sum of harmonic and stochastic components return y, yh, yst
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N/2 # size of positive spectrum hM1 = int(math.floor((w.size+1)/2)) # half analysis window size by rounding hM2 = int(math.floor(w.size/2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns/4 # Hop size used for analysis and synthesis hNs = Ns/2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2*H) # overlapping window sw[hNs-H:hNs+H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs-H:hNs+H] = sw[hNs-H:hNs+H] / bh[hNs-H:hNs+H] hfreqp = [] f0t = 0 f0stable = 0 while pin<pend: #-----analysis----- x1 = x[pin-hM1:pin+hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc/N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin-hNs-1 # input sound pointer for residual analysis xw2 = x[ri:ri+Ns]*wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2-Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window yhw[hNs-1:] = fftbuffer[:hNs+1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs-1] = fftbuffer[hNs+1:] # undo zero-phase window xrw[hNs-1:] = fftbuffer[:hNs+1] yh[ri:ri+Ns] += sw*yhw # overlap-add for sines xr[ri:ri+Ns] += sw*xrw # overlap-add for residual pin += H # advance sound pointer y = yh+xr # sum of harmonic and residual components return y, yh, xr
def hprModel(x, fs, w, N, t, nH, minf0, maxf0, f0et): """ Analysis/synthesis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window, N: FFT size (minimum 512), t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), maxhd: max. relative deviation in harmonic detection (ex: .2) returns y: output sound, yh: harmonic component, xr: residual component """ hN = N / 2 # size of positive spectrum hM1 = int(math.floor( (w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor Ns = 512 # FFT size for synthesis (even) H = Ns / 4 # Hop size used for analysis and synthesis hNs = Ns / 2 pin = max(hNs, hM1) # initialize sound pointer in middle of analysis window pend = x.size - max(hNs, hM1) # last sample to start a frame fftbuffer = np.zeros(N) # initialize buffer for FFT yhw = np.zeros(Ns) # initialize output sound frame xrw = np.zeros(Ns) # initialize output sound frame yh = np.zeros(x.size) # initialize output array xr = np.zeros(x.size) # initialize output array w = w / sum(w) # normalize analysis window sw = np.zeros(Ns) ow = triang(2 * H) # overlapping window sw[hNs - H:hNs + H] = ow bh = blackmanharris(Ns) # synthesis window bh = bh / sum(bh) # normalize synthesis window wr = bh # window for residual sw[hNs - H:hNs + H] = sw[hNs - H:hNs + H] / bh[hNs - H:hNs + H] hfreqp = [] f0t = 0 f0stable = 0 while pin < pend: #-----analysis----- x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = DFT.dftAnal(x1, w, N) # compute dft ploc = UF.peakDetection(mX, t) # find peaks iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable==0)&(f0t>0)) \ or ((f0stable>0)&(np.abs(f0stable-f0t)<f0stable/5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs) # find harmonics hfreqp = hfreq ri = pin - hNs - 1 # input sound pointer for residual analysis xw2 = x[ri:ri + Ns] * wr # window the input sound fftbuffer = np.zeros(Ns) # reset buffer fftbuffer[:hNs] = xw2[hNs:] # zero-phase window in fftbuffer fftbuffer[hNs:] = xw2[:hNs] X2 = fft( fftbuffer) # compute FFT of input signal for residual analysis #-----synthesis----- Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # generate sines Xr = X2 - Yh # get the residual complex spectrum fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Yh)) # inverse FFT of harmonic spectrum yhw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window yhw[hNs - 1:] = fftbuffer[:hNs + 1] fftbuffer = np.zeros(Ns) fftbuffer = np.real(ifft(Xr)) # inverse FFT of residual spectrum xrw[:hNs - 1] = fftbuffer[hNs + 1:] # undo zero-phase window xrw[hNs - 1:] = fftbuffer[:hNs + 1] yh[ri:ri + Ns] += sw * yhw # overlap-add for sines xr[ri:ri + Ns] += sw * xrw # overlap-add for residual pin += H # advance sound pointer y = yh + xr # sum of harmonic and residual components return y, yh, xr
minf0 = 420 maxf0 = 460 f0et = 5 maxnpeaksTwm = 5 minSineDur = .1 harmDevSlope = 0.01 Ns = 512 H = Ns//4 x1 = x[pos-hM1:pos+hM2] x2 = x[pos-Ns//2-1:pos+Ns//2-1] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0) hfreqp = [] hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, hfreqp, fs, harmDevSlope) Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) mYh = 20 * np.log10(abs(Yh[:Ns//2])) pYh = np.unwrap(np.angle(Yh[:Ns//2])) bh=blackmanharris(Ns) X2 = fft(fftshift(x2*bh/sum(bh))) Xr = X2-Yh mXr = 20 * np.log10(abs(Xr[:Ns//2])) pXr = np.unwrap(np.angle(Xr[:Ns//2])) xrw = np.real(fftshift(ifft(Xr))) * H * 2 yhw = np.real(fftshift(ifft(Yh))) * H * 2 maxplotfreq = 8000.0
maxf0 = 500 f0et = 5 nH = 60 harmDevSlope = .001 stocf = .4 # stochastic approx. factor w = get_window('blackman', M) hM1 = int(math.floor((M+1)/2)) hM2 = int(math.floor(M/2)) x1 = x[pin-hM1:pin+hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs*iploc/N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) wr = get_window('blackmanharris', Ns) xw2 = x[pin-hNs-1:pin+hNs-1] * wr / sum(wr) fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer) Xr = X2 - Yh mXr = 20 * np.log10(abs(Xr[:hNs])) # converting to a DB scale
harmDevSlope = 0.001 # stocf = 0.4 stocf = 0.1 w = get_window('blackman', M) hM1 = int(math.floor( (M + 1) / 2)) # zero-phase windowing is essential here, for correct subtraction hM2 = int(math.floor(M / 2)) x1 = x[pin - hM1:pin + hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / N # convert to Hz f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) # find best candidate for f0 hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) Ns = 512 hNs = 256 # Ns / 2 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) # uses blackman harris lobe # Yh is the complete spectrum of the harmonic components wr = get_window('blackmanharris', Ns) # must use same window and size as harmonic spectrum xw2 = x[pin - hNs - 1:pin + hNs - 1] * wr / sum(wr) fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:] # zero-phase windowing fftbuffer[hNs:] = xw2[:hNs] X2 = fft(fftbuffer)
minf0 = 300 maxf0 = 500 f0et = 5 nH = 60 harmDevSlope = .001 w = get_window("blackman", M) hM1 = int(math.floor((M + 1) / 2)) hM2 = int(math.floor(M / 2)) x1 = x[pin - hM1:pin + hM2] mX, pX = DFT.dftAnal(x1, w, N) ploc = UF.peakDetection(mX, t) iploc, ipmag, ipphase = UF.peakInterp(mX, pX, ploc) ipfreq = fs * iploc / N f0 = UF.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, 0) # use 2-way mismatch to find the best f0 candidates hfreq, hmag, hphase = HM.harmonicDetection(ipfreq, ipmag, ipphase, f0, nH, [], fs, harmDevSlope) # synthesis the harmonic we identified Ns = 512 hNs = 256 Yh = UF.genSpecSines(hfreq, hmag, hphase, Ns, fs) #Yh is the complete spectrum for the component wr = get_window('blackman', Ns) xw2 = x[pin - hNs - 1:pin + hNs - 1] * wr / sum( wr) # only 512 samples around the pointer # centered everything around zero fftbuffer = np.zeros(Ns) fftbuffer[:hNs] = xw2[hNs:]