def main(inputFile , window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile)
def construct(inputFile, window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ makes arrays of a wav file """ Ns = 512 H = 128 (x, fs) = sf.read(inputFile) #print "x.dtype", x.dtype #print "fs", fs x = np.float32(x) / norm_fact[x.dtype.name] #(fs, x) = UF.wavread(inputFile) #if fs != 44100: # (x, fs) = sf.read(inputFile) # print "x.dtype", x.dtype # print "fs", fs # x = np.float32(x)/norm_fact[x.dtype.name] #print "x.dtype", x.dtype w = get_window(window, M) hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) return hfreq, hmag, hphase, xr, fs, x
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) return x, fs, mXr,hfreq, y
def cc_calc(audio_inp, params, params_ceps): """ Calculates the framewise cepstral coefficients for the true envelope of the audio file. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0,1,2 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre If 2, uses the True envelope of the amplitude spectrum to sample the points from choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal f0 : Hz The fundamental frequency of the note Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F, M, P, R = hprModelAnal(x=audio_inp, fs=fs, w=w, N=N, H=H, t=t, nH=maxnSines, minSineDur=0.02, minf0=10, maxf0=1000, f0et=5, harmDevSlope=0.01) # Cepstral Coefficients Calculation CC = np.zeros((F.shape[0], N)) for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d((F[i, :] / fs) * N, M[i, :] / 20, kind='linear', fill_value='-6', bounds_error=False) # Frequency bins # fbins = np.linspace(0,fs/2,N//2) fbins = np.arange(N) finp = f(fbins) zp = np.concatenate( (finp[0:N // 2], np.array([0]), np.flip(finp[1:N // 2]))) # print(zp.shape) specenv, _, _ = fe.calc_true_envelope_spectral(zp, N, thresh, ceps_coeffs, num_iters) CC[i, :] = np.real(np.fft.ifft(specenv)) return CC
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.ion() plt.show()
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, w, N, H) # synthesize hpr model y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav' outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav' outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav' # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mXr[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq*np.less(hfreq,maxplotfreq) harms[harms==0] = np.nan numFrames = int(harms[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show()
window = "blackman" M = 801 N = 2048 t = -90 minSineDur = 0.1 nH = 40 minf0 = 350 maxf0 = 700 f0et = 8 harmDevSlope = 0.1 Ns = 512 H = 128 (fs, x) = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) mXr, pXr = STFT.stftAnal(xr, w, N, H) freqScaling = np.array([0, 1.5, 1, 1.5]) freqStretching = np.array([0, 1.1, 1, 1.1]) timbrePreservation = 1 hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs) UF.wavwrite(y, fs, "hpr-freq-transformation.wav") plt.figure(figsize=(12, 9))
window = 'blackman' M = 801 N = 2048 t = -90 minSineDur = 0.1 nH = 40 minf0 = 350 maxf0 = 700 f0et = 8 harmDevSlope = 0.1 Ns = 512 H = 128 (fs, x) = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) mXr, pXr = STFT.stftAnal(xr, w, N, H) freqScaling = np.array([0, 1.5, 1, 1.5]) freqStretching = np.array([0, 1.1, 1, 1.1]) timbrePreservation = 1 hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs) UF.wavwrite(y, fs, 'hpr-freq-transformation.wav') plt.figure(figsize=(12, 9))
def pitch_shift_te(audio_inp, params, factor, choice_recon, params_ceps): """ Shifts the pitch by the scalar factor given as the input. Performs interpolation by using the True Envelope of the Spectra. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain residue : np.array Residue of the original signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.1, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = -5, bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
def sustain_sound_gen(audio_inp, params, params_ceps, f0, rwl, alpha): """ Re-synthesizes the input audio using a random walk starting from the middle frame of the audio. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) f0 : float Fundamental frequency(or pitch) of the note rwl : Integer Number of hops to consider around the middle frame alpha : float(0<alpha<1) Closeness to the current frame(for continuity of the spectral frames during reconstruction) Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) new_F= np.zeros_like(F) for i in range(F.shape[1]): new_F[:,i] = (i+1)*f0 new_M = M # Initial parameters for random walk midpoint = F.shape[0]//2 # Selecting the middle frame current_frame = midpoint f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv_at,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Reconstruct the Magnitude array from the frequency array(only the middle frame but) for i in range(M.shape[0]): # Updating the current frame as per a random walk update(add upper and lower threshold) current_frame = current_frame + random.choice([-rwl,rwl]) if(current_frame >= M.shape[0] - 1): current_frame = M.shape[0] - 1 if(current_frame <= 0): current_frame = 0 f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp = f(fbins) specenv_new,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Pnce the initial and final envelopes are obtained, interpolate to obtain the new(intermediate) envelope # The closer the envelope is to 1, the less the envelope will change from its current value specenv_at = alpha*specenv_at + (1 - alpha)*specenv_new # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope fp = interpolate.interp1d(fbins[:N//2 + 1],specenv_at[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) # Reconstruction of the sound ignoring the residual audio_transformed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_transformed
def morph_samepitch_lsf(audio_inp1, audio_inp2, alpha, f0, params, params_ceps): """ Timbre morphing between two sounds of same pitch by linearly interpolating the lsf representation of the true envelope(obtained from its lpc,cepstral representation). Parameters ---------- audio_inp1 : np.array Numpy array containing the first audio signal, in the time domain audio_inp2 : np.array Numpy array containing the second audio signal, in the time domain alpha : float Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2 f0 : float Fundamental Frequency(to reconstruct harmonics) params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_morphed : np.array Returns the morphed audio in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) # Defining the frequency matrix as multiples of the harmonics new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2) for i in range(new_F.shape[1]): new_F[:,i] = (i+1)*f0 # Defining the Magnitude matrix new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2) for i in range(new_M.shape[0]): # print('frame ',i,' of ',new_M.shape[0]) # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp1 = f1(fbins) finp2 = f2(fbins) specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters) specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters) # Obtain the Cepstral Representation of the True envelopes cc_te_1 = np.real(np.fft.ifft(specenv1)) cc_te_2 = np.real(np.fft.ifft(specenv2)) # Define number of LPC(LSF) coefficients to keep # Cannot keep all, as precision error causes the coefficients to blow up L = 60 # Obtaining the LPC Representation from the Cepstral Representation lpc_cc_te_1 = fe.cc_to_lpc(cc_te_1,L) lpc_cc_te_2 = fe.cc_to_lpc(cc_te_2,L) # Obtain LSF representation from the LPC lsf_lpc_cc_te_1 = fe.lpc_to_lsf(lpc_cc_te_1) lsf_lpc_cc_te_2 = fe.lpc_to_lsf(lpc_cc_te_2) # Interpolate the LSF and convert LSF back to LPC lsf_interp = alpha*lsf_lpc_cc_te_1 + (1 - alpha)*lsf_lpc_cc_te_2 lpc_interp = fe.lsf_to_lpc(lsf_interp) # Reconvert LPC's to CC's cc_interp = fe.lpc_to_cc(lpc_interp,L + 1 ,L) # Pad with zeros(Done to reduce number of computations) cc_interp = np.pad(cc_interp,[0 , N - len(cc_interp)],mode = 'constant',constant_values=(0, 0)) # Flip and append the array to give a real frequency signal to the fft input cc_interp = np.concatenate((cc_interp[:N//2],np.flip(cc_interp[1:N//2 + 1]))) # Interpolating the Zeroth coefficient separately(it represents the gain/power of the signals) cc_interp[0] = alpha*cc_te_1[0] + (1 - alpha)*cc_te_2[0] specenv = np.real(np.fft.fft(cc_interp)) # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_morphed
def morph_samepitch_cc(audio_inp1, audio_inp2, alpha, f0, params, params_ceps): """ Timbre morphing between two sounds of same pitch by linearly interpolating the cepstral representation of the true envelope. Parameters ---------- audio_inp1 : np.array Numpy array containing the first audio signal, in the time domain audio_inp2 : np.array Numpy array containing the second audio signal, in the time domain alpha : float Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2 f0 : float Fundamental Frequency(to reconstruct harmonics) params : dict Parameter dictionary for the sine model) containing the following keys - fs : integer Sampling rate of the audio - W : integer Window size(number of frames) - N : integer FFT size(multiple of 2) - H : integer Hop size - t : float Threshold for sinusoidal detection in dB - maxnSines : integer Number of sinusoids to detect params_ceps : dict Parameter Dictionary for the true envelope estimation containing the following keys - thresh : float Threshold(in dB) for the true envelope estimation - ceps_coeffs : integer Number of cepstral coefficients to keep in the true envelope estimation - num_iters : integer Upper bound on number of iterations(if no convergence) Returns ------- audio_morphed : np.array Returns the morphed audio in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) # Defining the frequency matrix as multiples of the harmonics new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2) for i in range(new_F.shape[1]): new_F[:,i] = (i+1)*f0 # Defining the Magnitude matrix new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2) for i in range(new_M.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,N) finp1 = f1(fbins) finp2 = f2(fbins) specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters) specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters) # Obtain the Cepstral Representation of the True envelopes cc_te_1 = np.real(np.fft.ifft(specenv1)) cc_te_2 = np.real(np.fft.ifft(specenv2)) # Linearly interpolate the cepstral coefficients, and reconstruct the true envelope from that cc_interp = alpha*cc_te_1 + (1 - alpha)*cc_te_2 specenv = np.real(np.fft.fft(cc_interp)) # fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False) fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False) new_M[i,:] = 20*fp(new_F[i,:]) audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs) return audio_morphed
def residue_lpc(audio_inp, params,lpc_order): """ Obtains the LPC representation of the Residual Spectral(LPC envelope), and then generates the residual by IFFT'ing this representation with random phase. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect lpc_order : integer Number of coefficients in the LPC representation Returns ------- res_transformed : np.array Returns the transformed residue(LPC envelope approximation) in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) harmonics_recon = sineModelSynth(tfreq = F, tmag = M, tphase = P, N = W, H = H, fs = fs) # Initializing an empty list to store the residual spectral approximations(LPC) xmX = [] # Normalize the Residue before analysis(throws a np zero error otherwise) nf = np.max(np.abs(R)) # nf = 1 # print(nf) R = R/nf for frame in ess.FrameGenerator(R.astype('float32'), W, H): inp = np.pad(frame,[0,N - W],mode = 'constant',constant_values=(0, 0)) env_frame = fe.lpc_envelope(inp,lpc_order,fs,len(inp)//2 + 1) xmX.append(env_frame) xmX = np.array(xmX) XpX = 2*np.pi*np.random.rand(xmX.shape[0],xmX.shape[1]) # xmX,XpX = stftAnal(audio_inp,w,N,H) # Obtain the audio from the above representation res_transformed = stftSynth(xmX, XpX, W, H)*nf # ***Re-normalize the Residual so that it lies in the same range as the original residue*** # scale_init = np.max(np.abs(audio_inp))/np.max(np.abs(R)) # scale_final = np.max(np.abs(harmonics_recon))/scale_init res_transformed = (res_transformed/np.max(np.abs(res_transformed))) return res_transformed
def pitch_shifting(audio_inp, params, factor,choice,choice_recon): """ Shifts the pitch by the scalar factor given as the input. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0 or 1 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain Residue : np.array The residue of the signal """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01) scaled_F = factor*F if(choice == 0): new_M = M else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed,R
def pitch_shifting_harmonic(audio_inp, params, params_ceps, factor,choice,choice_recon,f0): """ Shifts the pitch by the scalar factor given as the input. But, assumes the sound is harmonic and hence uses only the amplitudes sampled at multiples of the fundamental frequency. Note : Will only perform well for harmonic/sustained sounds. Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added. Parameters ---------- audio_inp : np.array Numpy array containing the audio signal, in the time domain params : dict Parameter dictionary for the sine model) containing the following keys - fs : Sampling rate of the audio - W : Window size(number of frames) - N : FFT size(multiple of 2) - H : Hop size - t : Threshold for sinusoidal detection in dB - maxnSines : Number of sinusoids to detect factor : float Shift factor for the pitch. New pitch = f * (old pitch) choice : 0,1,2 If 0, simply shifts the pitch without amplitude interpolation If 1, performs amplitude interpolation framewise to preserve timbre If 2, uses the True envelope of the amplitude spectrum to sample the points from choice_recon : 0 or 1 If 0, returns only the sinusoidal reconstruction If 1, adds the original residue as well to the sinusoidal f0 : Hz The fundamental frequency of the note Returns ------- audio_transformed : np.array Returns the transformed signal in the time domain """ fs = params['fs'] W = params['W'] N = params['N'] H = params['H'] t = params['t'] maxnSines = params['maxnSines'] thresh = params_ceps['thresh'] ceps_coeffs = params_ceps['ceps_coeffs'] num_iters = params_ceps['num_iters'] w = windows.hann(W) F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01) new_F= np.zeros_like(F) for i in range(F.shape[1]): new_F[:,i] = (i+1)*f0 scaled_F = factor*new_F if(choice == 0): new_M = M elif(choice == 1): new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False) new_M[i,:] = f(scaled_F[i,:]) else: new_M = M for i in range(F.shape[0]): # Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20)) f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False) # Frequency bins fbins = np.linspace(0,fs/2,2*N) finp = f(fbins) specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters) # Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False) new_M[i,:] = 20*fp(scaled_F[i,:]) if(choice_recon == 0): audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs) else: audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0] return audio_transformed