def main(inputFile , window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile)
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) return x, fs, mXr,hfreq, y
def makeSound( hfreq, hmag, hphase, xr, Ns, H, fs, cutPoint, ): """ constructs a sound """ cutPoint = cutPoint + padValue hfreq = np.resize(hfreq, (cutPoint, hfreq.shape[1])) hmag = np.resize(hmag, (cutPoint, hmag.shape[1])) hphase = np.resize(hphase, (cutPoint, hphase.shape[1])) y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) return y
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.ion() plt.show()
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
Ns = 512 H = 128 (fs, x) = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) mXr, pXr = STFT.stftAnal(xr, w, N, H) freqScaling = np.array([0, 1.5, 1, 1.5]) freqStretching = np.array([0, 1.1, 1, 1.1]) timbrePreservation = 1 hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs) UF.wavwrite(y, fs, "hpr-freq-transformation.wav") plt.figure(figsize=(12, 9)) maxplotfreq = 15000.0 plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title("x (flute-A4.wav)") plt.subplot(4, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr[:, 0].size)
(fs, x) = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) mXr, pXr = STFT.stftAnal(xr, w, N, H) freqScaling = np.array([0, 1.5, 1, 1.5]) freqStretching = np.array([0, 1.1, 1, 1.1]) timbrePreservation = 1 hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs) UF.wavwrite(y, fs, 'hpr-freq-transformation.wav') plt.figure(figsize=(12, 9)) maxplotfreq = 15000.0 plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title('x (flute-A4.wav)') plt.subplot(4, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr[:, 0].size)
def pitch_shift_te(audio_inp, params, factor, choice_recon, params_ceps): """ Shifts the pitch by the scalar factor given as the input. Performs interpolation by using the True Envelope of the Spectra. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain residue : np.array Residue of the original signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.1, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = -5, bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
def pitch_shifting(audio_inp, params, factor,choice,choice_recon): """ Shifts the pitch by the scalar factor given as the input. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0 or 1 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain Residue : np.array The residue of the signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F if(choice == 0): new_M = M else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
def pitch_shifting_harmonic(audio_inp, params, params_ceps, factor,choice,choice_recon,f0): """ Shifts the pitch by the scalar factor given as the input. But, assumes the sound is harmonic and hence uses only the amplitudes sampled at multiples of the fundamental frequency. Note : Will only perform well for harmonic/sustained sounds. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0,1,2 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre If 2, uses the True envelope of the amplitude spectrum to sample the points from choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal f0 : Hz The fundamental frequency of the note Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) new_F= np.zeros_like(F) for i in range(F.shape[1]): new_F[:,i] = (i+1)*f0 scaled_F = factor*new_F if(choice == 0): new_M = M elif(choice == 1): new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,2*N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed
def makeY(hfreq, hmag, hphase, xr, Ns, H, fs): y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) return y