Exemple #1
0
def main(inputFile , window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr,hfreq, y
Exemple #3
0
def makeSound(
    hfreq,
    hmag,
    hphase,
    xr,
    Ns,
    H,
    fs,
    cutPoint,
):
    """
	constructs a sound
	"""
    cutPoint = cutPoint + padValue
    hfreq = np.resize(hfreq, (cutPoint, hfreq.shape[1]))
    hmag = np.resize(hmag, (cutPoint, hmag.shape[1]))
    hphase = np.resize(hphase, (cutPoint, hphase.shape[1]))

    y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

    return y
Exemple #4
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100,
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)

	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = np.arange(maxplotbin+1)*float(fs)/N
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)

	# plot harmonic frequencies on residual spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time(s)')
		plt.ylabel('frequency(Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + residual spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.ion()
	plt.show()
Exemple #5
0
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01):
	"""
	Perform analysis/synthesis using the harmonic plus residual model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# find harmonics and residual
	hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)
	  
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, w, N, H)
	  
	# synthesize hpr model
	y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hprModel.wav'

	# write sounds files for harmonics, residual, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                       
	binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)

	# plot harmonic frequencies on residual spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time(s)')
		plt.ylabel('frequency(Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + residual spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
Ns = 512
H = 128

(fs, x) = UF.wavread(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)

mXr, pXr = STFT.stftAnal(xr, w, N, H)

freqScaling = np.array([0, 1.5, 1, 1.5])
freqStretching = np.array([0, 1.1, 1, 1.1])
timbrePreservation = 1

hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs)

UF.wavwrite(y, fs, "hpr-freq-transformation.wav")

plt.figure(figsize=(12, 9))

maxplotfreq = 15000.0

plt.subplot(4, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x)
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title("x (flute-A4.wav)")

plt.subplot(4, 1, 2)
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mXr[:, 0].size)
(fs, x) = UF.wavread(inputFile)
w = get_window(window, M)
hfreq, hmag, hphase, xr = HPR.hprModelAnal(x, fs, w, N, H, t, minSineDur, nH,
                                           minf0, maxf0, f0et, harmDevSlope)

mXr, pXr = STFT.stftAnal(xr, w, N, H)

freqScaling = np.array([0, 1.5, 1, 1.5])
freqStretching = np.array([0, 1.1, 1, 1.1])
timbrePreservation = 1

hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling,
                                       freqStretching, timbrePreservation, fs)

y, yh = HPR.hprModelSynth(hfreqt, hmagt, np.array([]), xr, Ns, H, fs)

UF.wavwrite(y, fs, 'hpr-freq-transformation.wav')

plt.figure(figsize=(12, 9))

maxplotfreq = 15000.0

plt.subplot(4, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x)
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (flute-A4.wav)')

plt.subplot(4, 1, 2)
maxplotbin = int(N * maxplotfreq / fs)
numFrames = int(mXr[:, 0].size)
Exemple #8
0
def pitch_shift_te(audio_inp, params, factor, choice_recon, params_ceps):
	"""
	Shifts the pitch by the scalar factor given as the input.

	Performs interpolation by using the True Envelope of the Spectra. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- t : float
				Threshold for sinusoidal detection in dB
			- maxnSines : integer
				Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
	params_ceps : dict
		Parameter Dictionary for the true envelope estimation containing the following keys
			- thresh : float
				Threshold(in dB) for the true envelope estimation
			- ceps_coeffs : integer
				Number of cepstral coefficients to keep in the true envelope estimation
			- num_iters : integer
				Upper bound on number of iterations(if no convergence)
				
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
    residue : np.array
    	Residue of the original signal
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']


	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.1, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01)

	scaled_F = factor*F
	
	new_M = M
	for i in range(F.shape[0]):
		# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
		f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
		# Frequency bins
		fbins = np.linspace(0,fs/2,N)
		finp = f(fbins)
		specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)
		# Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope
		# fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = -5, bounds_error=False)
		fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
		new_M[i,:] = 20*fp(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed,R
Exemple #9
0
def pitch_shifting(audio_inp, params, factor,choice,choice_recon):
	"""
	Shifts the pitch by the scalar factor given as the input.

	Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice : 0 or 1
		If 0, simply shifts the pitch without amplitude interpolation
		If 1, performs amplitude interpolation framewise to preserve timbre
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
		
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
    Residue : np.array
    	The residue of the signal
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']

	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)

	scaled_F = factor*F

	if(choice == 0):
		new_M = M
	else:
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise
			f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False)
			new_M[i,:] = f(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed,R
Exemple #10
0
def pitch_shifting_harmonic(audio_inp, params, params_ceps, factor,choice,choice_recon,f0):
	"""
	Shifts the pitch by the scalar factor given as the input. But, assumes the sound is harmonic and hence uses only the amplitudes sampled at multiples of the fundamental frequency.
	Note : Will only perform well for harmonic/sustained sounds.
	Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice : 0,1,2
		If 0, simply shifts the pitch without amplitude interpolation
		If 1, performs amplitude interpolation framewise to preserve timbre
		If 2, uses the True envelope of the amplitude spectrum to sample the points from
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
	f0 : Hz
		The fundamental frequency of the note
		
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']

	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01)
	
	new_F= np.zeros_like(F)
	for i in range(F.shape[1]):
		new_F[:,i] = (i+1)*f0

	scaled_F = factor*new_F

	if(choice == 0):
		new_M = M
	elif(choice == 1):
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise
			f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False)
			new_M[i,:] = f(scaled_F[i,:])
	else:
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
			f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
			# Frequency bins
			fbins = np.linspace(0,fs/2,2*N)
			finp = f(fbins)
			specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)
			# Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope
			fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
			new_M[i,:] = 20*fp(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed
Exemple #11
0
def makeY(hfreq, hmag, hphase, xr, Ns, H, fs):
    y, yh = HPR.hprModelSynth(hfreq, hmag, hphase, xr, Ns, H, fs)
    return y