def stft(x, w, N, H): """ Analysis/synthesis of a sound using the short-time Fourier transform x: input sound, w: analysis window, N: FFT size, H: hop size returns y: output sound """ if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window y = np.zeros(x.size) # initialize output array while pin <= pend: # while sound pointer is smaller than last sample # -----analysis----- x1 = x[pin - hM1:pin + hM2] # select one frame of input sound mX, pX = dftModel.dft_anal(x1, w, N) # compute dft # -----synthesis----- y1 = dftModel.dft_synth(mX, pX, M) # compute idft y[pin - hM1:pin + hM2] += H * y1 # overlap-add to generate output sound pin += H # advance sound pointer y = np.delete(y, range(hM2)) # delete half of first window which was added in stftAnal y = np.delete(y, range(y.size - hM1, y.size)) # delete half of the last window which as added in stftAnal return y
def stft_anal(x, w, N, H): """ Analysis of a sound using the short-time Fourier transform x: input array sound, w: analysis window, N: FFT size, H: hop size returns xmX, xpX: magnitude and phase spectra """ if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") M = w.size # size of analysis window hM1 = int(math.floor((M + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(M / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window xmX = None xpX = None while pin <= pend: # while sound pointer is smaller than last sample x1 = x[pin - hM1:pin + hM2] # select one frame of input sound mX, pX = dftModel.dft_anal(x1, w, N) # compute dft if pin == hM1: # if first frame create output arrays xmX = np.array([mX]) xpX = np.array([pX]) else: # append output to existing array xmX = np.vstack((xmX, np.array([mX]))) xpX = np.vstack((xpX, np.array([pX]))) pin += H # advance sound pointer return xmX, xpX
def sine_model_anal(x, fs, w, N, H, t, maxnSines=100, minSineDur=.01, freqDevOffset=20, freqDevSlope=0.01): """ Analysis of a sound using the sinusoidal model with sine tracking x: input array sound, w: analysis window, N: size of complex spectrum, H: hop-size, t: threshold in negative dB maxnSines: maximum number of sines per frame, minSineDur: minimum duration of sines in seconds freqDevOffset: minimum frequency deviation at 0Hz, freqDevSlope: slope increase of minimum frequency deviation returns xtfreq, xtmag, xtphase: frequencies, magnitudes and phases of sinusoidal tracks """ if (minSineDur < 0): # raise error if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # initialize sound pointer in middle of analysis window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window tfreq = np.array([]) while pin < pend: # while input sound pointer is within sound x1 = x[pin - hM1:pin + hM2] # select frame mX, pX = dftModel.dft_anal(x1, w, N) # compute dft ploc = utilFunctions.peakDetection(mX, t) # detect locations of peaks iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc) # refine peak values by interpolation ipfreq = fs * iploc / float(N) # convert peak locations to Hertz # perform sinusoidal tracking by adding peaks to trajectories tfreq, tmag, tphase = sine_tracking(ipfreq, ipmag, ipphase, tfreq, freqDevOffset, freqDevSlope) tfreq = np.resize(tfreq, min(maxnSines, tfreq.size)) # limit number of tracks to maxnSines tmag = np.resize(tmag, min(maxnSines, tmag.size)) # limit number of tracks to maxnSines tphase = np.resize(tphase, min(maxnSines, tphase.size)) # limit number of tracks to maxnSines jtfreq = np.zeros(maxnSines) # temporary output array jtmag = np.zeros(maxnSines) # temporary output array jtphase = np.zeros(maxnSines) # temporary output array jtfreq[:tfreq.size] = tfreq # save track frequencies to temporary array jtmag[:tmag.size] = tmag # save track magnitudes to temporary array jtphase[:tphase.size] = tphase # save track magnitudes to temporary array if pin == hM1: # if first frame initialize output sine tracks xtfreq = jtfreq xtmag = jtmag xtphase = jtphase else: # rest of frames append values to sine tracks xtfreq = np.vstack((xtfreq, jtfreq)) xtmag = np.vstack((xtmag, jtmag)) xtphase = np.vstack((xtphase, jtphase)) pin += H # delete sine tracks shorter than minSineDur xtfreq = clean_sine_tracks(xtfreq, round(fs * minSineDur / H)) return xtfreq, xtmag, xtphase
def harmonic_model_anal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.02): """ Analysis of a sound using the sinusoidal harmonic model x: input sound; fs: sampling rate, w: analysis window; N: FFT size (minimum 512); t: threshold in negative dB, nH: maximum number of harmonics; minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns xhfreq, xhmag, xhphase: harmonic frequencies, magnitudes and phases """ if minSineDur < 0: # raise exception if minSineDur is smaller than 0 raise ValueError("Minimum duration of sine tracks smaller than 0") hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM2)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window hfreqp = [] # initialize harmonic frequencies of previous frame f0stable = 0 # initialize f0 stable while pin <= pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = dftModel.dft_anal(x1, w, N) # compute dft ploc = utilFunctions.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hz f0t = utilFunctions.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 hfreq, hmag, hphase = harmonic_detection( ipfreq, ipmag, ipphase, f0t, nH, hfreqp, fs, harmDevSlope ) # find harmonics hfreqp = hfreq if pin == hM1: # first frame xhfreq = np.array([hfreq]) xhmag = np.array([hmag]) xhphase = np.array([hphase]) else: # next frames xhfreq = np.vstack((xhfreq, np.array([hfreq]))) xhmag = np.vstack((xhmag, np.array([hmag]))) xhphase = np.vstack((xhphase, np.array([hphase]))) pin += H # advance sound pointer xhfreq = sineModel.clean_sine_tracks(xhfreq, round(fs * minSineDur / H)) # delete tracks shorter than minSineDur return xhfreq, xhmag, xhphase
def f0_detection(x, fs, w, N, H, t, minf0, maxf0, f0et): """ Fundamental frequency detection of a sound using twm algorithm x: input sound; fs: sampling rate; w: analysis window; N: FFT size; t: threshold in negative dB, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz, f0et: error threshold in the f0 detection (ex: 5), returns f0: fundamental frequency """ if minf0 < 0: # raise exception if minf0 is smaller than 0 raise ValueError("Minumum fundamental frequency (minf0) smaller than 0") if maxf0 >= 10000: # raise exception if maxf0 is bigger than fs/2 raise ValueError("Maximum fundamental frequency (maxf0) bigger than 10000Hz") if H <= 0: # raise error if hop size 0 or negative raise ValueError("Hop size (H) smaller or equal to 0") hM1 = int(math.floor((w.size + 1) / 2)) # half analysis window size by rounding hM2 = int(math.floor(w.size / 2)) # half analysis window size by floor x = np.append(np.zeros(hM2), x) # add zeros at beginning to center first window at sample 0 x = np.append(x, np.zeros(hM1)) # add zeros at the end to analyze last sample pin = hM1 # init sound pointer in middle of anal window pend = x.size - hM1 # last sample to start a frame w = w / sum(w) # normalize analysis window f0 = [] # initialize f0 output f0stable = 0 # initialize f0 stable while pin < pend: x1 = x[pin - hM1 : pin + hM2] # select frame mX, pX = dftModel.dft_anal(x1, w, N) # compute dft ploc = utilFunctions.peakDetection(mX, t) # detect peak locations iploc, ipmag, ipphase = utilFunctions.peakInterp(mX, pX, ploc) # refine peak values ipfreq = fs * iploc / N # convert locations to Hez f0t = utilFunctions.f0Twm(ipfreq, ipmag, f0et, minf0, maxf0, f0stable) # find f0 if ((f0stable == 0) & (f0t > 0)) or ((f0stable > 0) & (np.abs(f0stable - f0t) < f0stable / 5.0)): f0stable = f0t # consider a stable f0 if it is close to the previous one else: f0stable = 0 f0 = np.append(f0, f0t) # add f0 to output array pin += H # advance sound pointer return f0