Beispiel #1
0
def timeStretchAudio(inputAudio, outputAudio, outputDuration, writeOutput=1):

	originalWav = Sndfile(inputAudio, 'r')
	x = originalWav.read_frames(originalWav.nframes)
	fs = originalWav.samplerate
	nChannel = originalWav.channels
	print fs
	if nChannel >1:
		x = x[0]


	w = np.hamming(801)
	N = 2048
	t = -90
	minSineDur = .005
	maxnSines = 150
	freqDevOffset = 20
	freqDevSlope = 0.02
	Ns = 512
	H = Ns/4
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
	inputDur = float(len(tfreq)*H/fs)
	#timeScale = np.array([0.1,0.1, inputDur, inputDur*2])
	timeScale = np.array([0,0, .4,outputDuration])

	ytfreq, ytmag = trans.sineTimeScaling(tfreq, tmag, timeScale)
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
	
	if writeOutput ==1:
		outputWav = Sndfile(outputAudio, 'w', originalWav.format, originalWav.channels, originalWav.samplerate)
		outputWav.write_frames(y)
		outputWav.close()
	else:
		return y, fs, nChannel
Beispiel #2
0
def estimate(inputFile='a7q2-harmonic.wav',
             window='blackman',
             M=2101,
             N=4096,
             t=-90,
             minSineDur=0.1,
             nH=50,
             minf0=100,
             maxf0=200,
             f0et=5,
             harmDevSlope=0.01):

    Ns = 512
    H = 128

    fs, x = UF.wavread(inputFile)
    w = get_window(window, M)
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)
    f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et)
    y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

    # plt.plot(x)
    # plt.plot(y)
    # plt.show()

    size = min([x.size, y.size])
    diff = np.sum(np.abs(x[:size] - y[:size]))
    std = np.std(f0)

    print "diff:{0} & std:{1}, M={2} N={3} t={4} minSineDur={5} nH={6} min/max={7}/{8} f0et={9} harmDevSlope={10}" \
    .format(diff, std, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope)

    return diff, std
def transformation_synthesis(inputFile, fs, hfreq, hmag, freqScaling = np.array([0, 2.0, 1, .3]), 
	freqStretching = np.array([0, 1, 1, 1.5]), timbrePreservation = 1, 
	timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])):
	# transform the analysis values returned by the analysis function and synthesize the sound
	# inputFile: name of input file
	# fs: sampling rate of input file	
	# tfreq, tmag: sinusoidal frequencies and magnitudes
	# freqScaling: frequency scaling factors, in time-value pairs
	# freqStretchig: frequency stretching factors, in time-value pairs
	# timbrePreservation: 1 preserves original timbre, 0 it does not
	# timeScaling: time scaling factors, in time-value pairs

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# frequency scaling of the harmonics 
	yhfreq, yhmag = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

	# time scale the sound
	yhfreq, yhmag = ST.sineTimeScaling(yhfreq, yhmag, timeScaling)

	# synthesis 
	y = SM.sineModelSynth(yhfreq, yhmag, np.array([]), Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# --------- plotting --------------------

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	plt.subplot(2,1,1)
	# plot the transformed sinusoidal frequencies
	tracks = yhfreq*np.less(yhfreq, maxplotfreq)
	tracks[tracks<=0] = np.nan
	numFrames = int(tracks[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	plt.plot(frmTime, tracks, color='k')
	plt.title('transformed harmonic tracks')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling = np.array([0, 2.0, 1, .3]), 
	timeScaling = np.array([0, .0, .671, .671, 1.978, 1.978+1.0])):
	"""
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file; fs: sampling rate of input file	
	tfreq, tmag: sinusoidal frequencies and magnitudes
	freqScaling: frequency scaling factors, in time-value pairs
	timeScaling: time scaling factors, in time-value pairs
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# frequency scaling of the sinusoidal tracks 
	ytfreq = ST.sineFreqScaling(tfreq, freqScaling)

	# time scale the sinusoidal tracks 
	ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling)

	# synthesis 
	y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot the transformed sinusoidal frequencies
	if (ytfreq.shape[1] > 0):
		plt.subplot(2,1,1)
		tracks = np.copy(ytfreq)
		tracks = tracks*np.less(tracks, maxplotfreq)
		tracks[tracks<=0] = np.nan
		numFrames = int(tracks[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, tracks)
		plt.title('transformed sinusoidal tracks')
		plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
def sms_synth_to_file(output_filename, tfreq, tmag, tphase, Fs):
    """ 
        Synthesis from freq, mag and phase
        Writes to file
        Returns y: a vector with audio 
    """
    y = np.asarray(SM.sineModelSynth(tfreq, tmag, tphase, SMS.Ns, SMS.H, Fs),
                   dtype='float32')
    librosa.output.write_wav(output_filename, y, Fs)
    return y
Beispiel #6
0
def hprModelSynth(hfreq, hmag, hphase, xr, N, H, fs):
	"""
	Synthesis of a sound using the sinusoidal plus residual model
	tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
	N: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, yh: harmonic component
	"""

	yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs)          # synthesize sinusoids
	y = yh[:min(yh.size, xr.size)]+xr[:min(yh.size, xr.size)]      # sum sinusoids and residual components
	return y, yh
Beispiel #7
0
def hprModelSynth(hfreq, hmag, hphase, xr, N, H, fs):
	"""
	Synthesis of a sound using the sinusoidal plus residual model
	tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
	N: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, yh: harmonic component
	"""

	yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs)          # synthesize sinusoids
	y = yh[:min(yh.size, xr.size)]+xr[:min(yh.size, xr.size)]      # sum sinusoids and residual components
	return y, yh
Beispiel #8
0
def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs):
	"""
	Synthesis of a sound using the harmonic plus stochastic model
	hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope
	Ns: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, yh: harmonic component, yst: stochastic component
	"""

	yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs)          # synthesize harmonics
	yst = STM.stochasticModelSynth(stocEnv, H, H*2)                # synthesize stochastic residual
	y = yh[:min(yh.size, yst.size)]+yst[:min(yh.size, yst.size)]   # sum harmonic and stochastic components
	return y, yh, yst
Beispiel #9
0
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs):
	"""
	Synthesis of a sound using the sinusoidal plus stochastic model
	tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
	N: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, ys: sinusoidal component, yst: stochastic component
	"""

	ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H, fs)          # synthesize sinusoids
	yst = STM.stochasticModelSynth(stocEnv, H, H*2)                # synthesize stochastic residual
	y = ys[:min(ys.size, yst.size)]+yst[:min(ys.size, yst.size)]   # sum sinusoids and stochastic components
	return y, ys, yst
Beispiel #10
0
def hpsModelSynth(hfreq, hmag, hphase, stocEnv, N, H, fs):
	"""
	Synthesis of a sound using the harmonic plus stochastic model
	hfreq, hmag: harmonic frequencies and amplitudes; stocEnv: stochastic envelope
	Ns: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, yh: harmonic component, yst: stochastic component
	"""

	yh = SM.sineModelSynth(hfreq, hmag, hphase, N, H, fs)          # synthesize harmonics
	yst = STM.stochasticModelSynth(stocEnv, H, H*2)                # synthesize stochastic residual
	y = yh[:min(yh.size, yst.size)]+yst[:min(yh.size, yst.size)]   # sum harmonic and stochastic components
	return y, yh, yst
Beispiel #11
0
def sprModelSynth(tfreq, tmag, tphase, xr, N, H, fs):
    """
	Synthesis of a sound using the sinusoidal plus residual models_makam
	tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
	N: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, y: sinusoidal component
	"""

    ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H,
                           fs)  # synthesize sinusoids
    y = ys[:min(ys.size, xr.size)] + xr[:min(
        ys.size, xr.size)]  # sum sinusoids and residual components
    return y, ys
Beispiel #12
0
def main(inputFile='../../sounds/vignesh.wav',
         window='blackman',
         M=1201,
         N=2048,
         t=-90,
         minSineDur=0.1,
         nH=100,
         minf0=130,
         maxf0=300,
         f0et=7,
         harmDevSlope=0.01):
    """
	Analysis and synthesis using the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # detect harmonics of input sound
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)

    # synthesize the harmonics
    y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    UF.wavwrite(y, fs, outputFile)
    return x, fs, hfreq, y
Beispiel #13
0
def spsModelSynth(tfreq, tmag, tphase, stocEnv, N, H, fs):
    """
	Synthesis of a sound using the sinusoidal plus stochastic model
	tfreq, tmag, tphase: sinusoidal frequencies, amplitudes and phases; stocEnv: stochastic envelope
	N: synthesis FFT size; H: hop size, fs: sampling rate 
	returns y: output sound, ys: sinusoidal component, yst: stochastic component
	"""

    ys = SM.sineModelSynth(tfreq, tmag, tphase, N, H,
                           fs)  # synthesize sinusoids
    #yst = STM.stochasticModelSynth(stocEnv, H, H*2)                # synthesize stochastic residual
    yst = STM.stochasticModelSynth(stocEnv, H, N)
    y = ys[:min(ys.size, yst.size)] + yst[:min(
        ys.size, yst.size)]  # sum sinusoids and stochastic components
    return y, ys, yst
def resynthesize(hfreq, hmag, hphase, fs, hopSizeMelodia, URIOutputFile):
    ''' synthesize the harmonics
	'''
    # 	Ns = 512
    Ns = 4 * hopSizeMelodia

    y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, hopSizeMelodia, fs)

    # output sound file (monophonic with sampling rate of 44100)
    # 	URIOutputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

    # write the sound resulting from harmonic analysis
    UF.wavwrite(y, fs, URIOutputFile)
    print 'written file ' + URIOutputFile

    return y
Beispiel #15
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)
    return x, fs, tfreq, y
def harmonic_magnitudes_to_audio(hfreqs, magns, phases, options):
    '''
    Compute for each frame harm amplitude
    convert cent bins to herz
    get harmonic partials form original spectrum
    
    Params:
    
    hfreq - harmonics  of contour
    magns -  magns of contour
    
    return:
    spectogram contour

    out_audio_contour - audio of harmonics for a contour
    '''

    pool = Pool()

    run_sine_model_synth = SineModelSynth(hopSize=512, sampleRate=options.Fs)
    run_ifft = IFFT(size=options.windowsizeInSamples)
    run_overl = OverlapAdd(frameSize=options.windowsizeInSamples,
                           hopSize=512,
                           gain=1. / options.windowsizeInSamples)
    out_audio_contour = np.array(0)

    for hfreq, hmag, hphase in zip(hfreqs, magns, phases):

        spectrum, audio_frame = harmonics_to_audio(hfreq, hmag, hphase,
                                                   run_sine_model_synth,
                                                   run_ifft, run_overl)
        out_audio_contour = np.append(out_audio_contour, audio_frame)

        pool.add('spectrum', spectrum)

    out_audio_contour = SM.sineModelSynth(hfreqs, magns, phases, 512, 128,
                                          44100)

    return out_audio_contour, pool['spectrum']
Beispiel #17
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)
	return x,fs,tfreq,y
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, 
	minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01):
	"""
	Analysis and synthesis using the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# detect harmonics of input sound
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

	# synthesize the harmonics
	y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)  

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

	# write the sound resulting from harmonic analysis
	UF.wavwrite(y, fs, outputFile)
	return x,fs,hfreq,y
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, 
	minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01):
	# analyze a sound with the harmonic model
	# inputFile: input sound file (monophonic with sampling rate of 44100)
	# window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	# M: analysis window size 
	# N: fft size (power of two, bigger or equal than M)
	# t: magnitude threshold of spectral peaks 
	# minSineDur: minimum duration of sinusoidal tracks
	# nH: maximum number of harmonics
	# minf0: minimum fundamental frequency in sound
	# maxf0: maximum fundamental frequency in sound
	# f0et: maximum error accepted in f0 detection algorithm                                                                                            
	# harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	# returns inputFile: input file name; fs: sampling rate of input file,
	#         tfreq, tmag: sinusoidal frequencies and magnitudes

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the magnitude and phase spectrogram of input sound
	mX, pX = STFT.stftAnal(x, fs, w, N, H)

	# compute the harmonic model of the whole sound
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# --------- plotting --------------------

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the magnitude spectrogram
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mX[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                       
	binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
	plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1]))
	plt.autoscale(tight=True)
		
	# plot the sinusoidal frequencies on top of the spectrogram
	tracks = hfreq*np.less(hfreq, maxplotfreq)
	tracks[tracks<=0] = np.nan
	plt.plot(frmTime, tracks, color='k')
	plt.title('magnitude spectrogram + harmonic tracks')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, hfreq, hmag
(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns/4
mX, pX = STFT.stftAnal(x, fs, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8])          
ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale)
y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
mY, pY = STFT.stftAnal(y, fs, w, N, H)

plt.figure(1, figsize=(12, 9))
maxplotfreq = 4000.0
plt.subplot(4,1,1)
plt.plot(np.arange(x.size)/float(fs), x, 'b')
plt.axis([0,x.size/float(fs),min(x),max(x)])
plt.title('x (mridangam.wav)')                        

plt.subplot(4,1,2)
numFrames = int(tfreq[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)
tracks = tfreq*np.less(tfreq, maxplotfreq)
tracks[tracks<=0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1)
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, 
	minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01):
	"""
	Analyze a sound with the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	returns inputFile: input file name; fs: sampling rate of input file, tfreq, 
						tmag: sinusoidal frequencies and magnitudes
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic model of the whole sound
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
	
	if (hfreq.shape[1] > 0):
		plt.subplot(3,1,2)
		tracks = np.copy(hfreq)
		numFrames = tracks.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs)
		tracks[tracks<=0] = np.nan
		plt.plot(frmTime, tracks)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of harmonic tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, hfreq, hmag
def analysis(inputFile='../../sounds/mridangam.wav', window='hamming', M=801, N=2048, t=-90, 
	minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02):
	"""
	Analyze a sound with the sine model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns inputFile: input file name; fs: sampling rate of input file,
	        tfreq, tmag: sinusoidal frequencies and magnitudes
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the sine model of the whole sound
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the sines without original phases
	y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the sound resulting from the inverse stft
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the sinusoidal frequencies
	if (tfreq.shape[1] > 0):
		plt.subplot(3,1,2)
		tracks = np.copy(tfreq)
		tracks = tracks*np.less(tracks, maxplotfreq)
		tracks[tracks<=0] = np.nan
		numFrames = int(tracks[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs)
		plt.plot(frmTime, tracks)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, tfreq, tmag
def analysis(inputFile='../../sounds/vignesh.wav',
             window='blackman',
             M=1201,
             N=2048,
             t=-90,
             minSineDur=0.1,
             nH=100,
             minf0=130,
             maxf0=300,
             f0et=7,
             harmDevSlope=0.01):
    """
	Analyze a sound with the harmonic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	returns inputFile: input file name; fs: sampling rate of input file, tfreq, 
						tmag: sinusoidal frequencies and magnitudes
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic model of the whole sound
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0,
                                               maxf0, f0et, harmDevSlope,
                                               minSineDur)

    # synthesize the sines without original phases
    y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_harmonicModel.wav'

    # write the sound resulting from the inverse stft
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    if (hfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(hfreq)
        numFrames = tracks.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of harmonic tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)

    return inputFile, fs, hfreq, hmag
Beispiel #24
0
def morph_samepitch_lsf(audio_inp1, audio_inp2, alpha, f0, params, params_ceps):
	"""
	Timbre morphing between two sounds of same pitch by linearly interpolating the lsf representation of the true envelope(obtained from its lpc,cepstral representation).

	Parameters
	----------
	audio_inp1 : np.array
		Numpy array containing the first audio signal, in the time domain
	audio_inp2 : np.array
		Numpy array containing the second audio signal, in the time domain 
	alpha : float
		Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2
	f0 : float
		Fundamental Frequency(to reconstruct harmonics)
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- t : float
				Threshold for sinusoidal detection in dB
			- maxnSines : integer
				Number of sinusoids to detect
	params_ceps : dict
		Parameter Dictionary for the true envelope estimation containing the following keys
			- thresh : float
				Threshold(in dB) for the true envelope estimation
			- ceps_coeffs : integer
				Number of cepstral coefficients to keep in the true envelope estimation
			- num_iters : integer
				Upper bound on number of iterations(if no convergence)
				
	Returns
	-------
	audio_morphed : np.array
		Returns the morphed audio in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']

	w = windows.hann(W)

	F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)
	F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)

	# Defining the frequency matrix as multiples of the harmonics
	new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2)
	for i in range(new_F.shape[1]):
		new_F[:,i] = (i+1)*f0

	# Defining the Magnitude matrix
	new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2)

	for i in range(new_M.shape[0]):
		# print('frame ',i,' of ',new_M.shape[0])
		# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
		f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False)
		f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False)
		# Frequency bins
		fbins = np.linspace(0,fs/2,N)
		finp1 = f1(fbins)
		finp2 = f2(fbins)
		specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters)
		specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters)

		# Obtain the Cepstral Representation of the True envelopes
		cc_te_1 = np.real(np.fft.ifft(specenv1))
		cc_te_2 = np.real(np.fft.ifft(specenv2))

		# Define number of LPC(LSF) coefficients to keep
		# Cannot keep all, as precision error causes the coefficients to blow up
		L = 60
		# Obtaining the LPC Representation from the Cepstral Representation
		lpc_cc_te_1 = fe.cc_to_lpc(cc_te_1,L)
		lpc_cc_te_2 = fe.cc_to_lpc(cc_te_2,L)

		# Obtain LSF representation from the LPC
		lsf_lpc_cc_te_1 = fe.lpc_to_lsf(lpc_cc_te_1)
		lsf_lpc_cc_te_2 = fe.lpc_to_lsf(lpc_cc_te_2)

		# Interpolate the LSF and convert LSF back to LPC
		lsf_interp = alpha*lsf_lpc_cc_te_1 + (1 - alpha)*lsf_lpc_cc_te_2
		lpc_interp = fe.lsf_to_lpc(lsf_interp)

		# Reconvert LPC's to CC's
		cc_interp = fe.lpc_to_cc(lpc_interp,L + 1 ,L)
		# Pad with zeros(Done to reduce number of computations)
		cc_interp = np.pad(cc_interp,[0 , N - len(cc_interp)],mode = 'constant',constant_values=(0, 0))

		# Flip and append the array to give a real frequency signal to the fft input
		cc_interp = np.concatenate((cc_interp[:N//2],np.flip(cc_interp[1:N//2 + 1])))

		# Interpolating the Zeroth coefficient separately(it represents the gain/power of the signals)
		cc_interp[0] = alpha*cc_te_1[0] + (1 - alpha)*cc_te_2[0]

		specenv = np.real(np.fft.fft(cc_interp))

		# fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
		fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False)
		new_M[i,:] = 20*fp(new_F[i,:])

	audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs)

	return audio_morphed
def transformation_synthesis(inputFile,
                             fs,
                             tfreq,
                             tmag,
                             freqScaling=np.array([0, 2.0, 1, .3]),
                             timeScaling=np.array(
                                 [0, .0, .671, .671, 1.978, 1.978 + 1.0])):
    """
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file; fs: sampling rate of input file	
	tfreq, tmag: sinusoidal frequencies and magnitudes
	freqScaling: frequency scaling factors, in time-value pairs
	timeScaling: time scaling factors, in time-value pairs
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the sinusoidal tracks
    ytfreq = ST.sineFreqScaling(tfreq, freqScaling)

    # time scale the sinusoidal tracks
    ytfreq, ytmag = ST.sineTimeScaling(ytfreq, tmag, timeScaling)

    # synthesis
    y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModelTransformation.wav'
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the transformed sinusoidal frequencies
    if (ytfreq.shape[1] > 0):
        plt.subplot(2, 1, 1)
        tracks = np.copy(ytfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.title('transformed sinusoidal tracks')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, 
					maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""
		
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	fs, x = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# analyze the sound with the sinusoidal model
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

	# synthesize the output sound from the sinusoidal representation
	y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# output sound file name
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sineModel.wav'

	# write the synthesized sound obtained from the sinusoidal synthesis
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
				
	# plot the sinusoidal frequencies
	plt.subplot(3,1,2)
	if (tfreq.shape[1] > 0):
		numFrames = tfreq.shape[0]
		frmTime = H*np.arange(numFrames)/float(fs)
		tfreq[tfreq<=0] = np.nan
		plt.plot(frmTime, tfreq)
		plt.axis([0, x.size/float(fs), 0, maxplotfreq])
		plt.title('frequencies of sinusoidal tracks')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)
Beispiel #27
0
(fs, x) = UF.wavread('../../../sounds/vignesh.wav')
w = np.blackman(1201)
N = 2048
t = -90
nH = 100
minf0 = 130
maxf0 = 300
f0et = 7
Ns = 512
H = Ns / 4
minSineDur = .1
harmDevSlope = 0.01
hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0,
                                           f0et, harmDevSlope, minSineDur)
y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

numFrames = int(hfreq[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title('x (vignesh.wav)')

plt.subplot(3, 1, 2)
yhfreq = hfreq
yhfreq[hfreq == 0] = np.nan
plt.plot(frmTime, hfreq, lw=1.2)
def main(
    inputFile="../../sounds/bendir.wav",
    window="hamming",
    M=2001,
    N=2048,
    t=-80,
    minSineDur=0.02,
    maxnSines=150,
    freqDevOffset=10,
    freqDevSlope=0.001,
    stocf=0.2,
):

    # ------- analysis parameters -------------------

    # inputFile: input sound file (monophonic with sampling rate of 44100)
    # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    # M: analysis window size
    # N: fft size (power of two, bigger or equal than M)
    # t: magnitude threshold of spectral peaks
    # minSineDur: minimum duration of sinusoidal tracks
    # maxnSines: maximum number of parallel sinusoids
    # freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
    # freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
    # stocf: decimation factor used for the stochastic approximation

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # --------- computation -----------------

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal analysis
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # subtract sinusoids from original sound
    Ns = 512
    xr = UF.sineSubtraction(x, Ns, H, tfreq, tmag, tphase, fs)

    # compute stochastic model of residual
    mYst = STM.stochasticModelAnal(xr, H, stocf)

    # synthesize sinusoids
    ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # synthesize stochastic component
    yst = STM.stochasticModelSynth(mYst, H)

    # sum sinusoids and stochastic
    y = yst[: min(yst.size, ys.size)] + ys[: min(yst.size, ys.size)]

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_sines.wav"
    outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel_stochastic.wav"
    outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_spsModel.wav"

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(yst, fs, outputFileStochastic)
    UF.wavwrite(y, fs, outputFile)

    # --------- plotting --------------------

    # plot stochastic component
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 10000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("input sound: x")

    plt.subplot(3, 1, 2)
    numFrames = int(mYst[:, 0].size)
    sizeEnv = int(mYst[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot sinusoidal frequencies on top of stochastic component
    sines = tfreq * np.less(tfreq, maxplotfreq)
    sines[sines == 0] = np.nan
    numFrames = int(sines[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    plt.plot(frmTime, sines, color="k", ms=3, alpha=1)
    plt.xlabel("time(s)")
    plt.ylabel("Frequency(Hz)")
    plt.autoscale(tight=True)
    plt.title("sinusoidal + stochastic spectrogram")

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("output sound: y")

    plt.tight_layout()
    plt.show()
Beispiel #29
0
def pitch_shift_te(audio_inp, params, factor, choice_recon, params_ceps):
	"""
	Shifts the pitch by the scalar factor given as the input.

	Performs interpolation by using the True Envelope of the Spectra. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- t : float
				Threshold for sinusoidal detection in dB
			- maxnSines : integer
				Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
	params_ceps : dict
		Parameter Dictionary for the true envelope estimation containing the following keys
			- thresh : float
				Threshold(in dB) for the true envelope estimation
			- ceps_coeffs : integer
				Number of cepstral coefficients to keep in the true envelope estimation
			- num_iters : integer
				Upper bound on number of iterations(if no convergence)
				
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
    residue : np.array
    	Residue of the original signal
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']


	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.1, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01)

	scaled_F = factor*F
	
	new_M = M
	for i in range(F.shape[0]):
		# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
		f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
		# Frequency bins
		fbins = np.linspace(0,fs/2,N)
		finp = f(fbins)
		specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)
		# Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope
		# fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = -5, bounds_error=False)
		fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
		new_M[i,:] = 20*fp(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed,R
Beispiel #30
0
window = 'hamming'
M = 1001
N = 2048
t = -100
minSineDur = 0.01
maxnSines = 150
freqDevOffset = 30
freqDevSlope = 0.02

Ns = 512
H = 128

fs, x = UF.wavread(inputFile)

w = get_window(window, M)

tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                       minSineDur, freqDevOffset, freqDevSlope)

# y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)
y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H,
                      fs)  # demonstration of recreated phases

UF.wavwrite(y, fs, 'test2.wav')

import matplotlib.pyplot as plt

plt.plot(x)
plt.plot(y)
plt.show()
Beispiel #31
0
# test the subtraction of sines
if __name__ == '__main__':
  (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../sounds/bendir.wav'))
  w = np.hamming(2001)
  N = 2048
  H = 128
  t = -100
  minSineDur = .02
  maxnSines = 200
  freqDevOffset = 10
  freqDevSlope = 0.001
  tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
  xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs)
  mXr, pXr = STFT.stftAnal(xr, fs, hamming(H*2), H*2, H)
  Ns = 512
  ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

  plt.figure(1, figsize=(9.5, 7))
  numFrames = int(mXr[:,0].size)
  frmTime = H*np.arange(numFrames)/float(fs)                             
  binFreq = np.arange(H)*float(fs)/(H*2)                       
  plt.pcolormesh(frmTime, binFreq, np.transpose(mXr))
  plt.autoscale(tight=True)

  tfreq[tfreq==0] = np.nan
  numFrames = int(tfreq[:,0].size)
  frmTime = H*np.arange(numFrames)/float(fs) 
  plt.plot(frmTime, tfreq, color='k', ms=3, alpha=1)
  plt.xlabel('Time(s)')
  plt.ylabel('Frequency(Hz)')
  plt.autoscale(tight=True)
plt.tight_layout()
plt.savefig("cello-phrase-spectrogram.png")

# compute the FO and the harmonics
t = -97
minf0 = 310
maxf0 = 450
f0et = 4
nH = 70
harmDevSlope = 0.01
Ns = H * 4
minSineDur = 0.3
hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)
hfreqt = copy.copy(hfreq)
hfreqt[:, 1:] = 0
yf0 = 4 * SM.sineModelSynth(hfreqt, hmag, hphase, Ns, H, fs)
yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)
UF.wavwrite(yf0, fs, "cello-phrase-f0.wav")
UF.wavwrite(yh, fs, "cello-phrase-harmonics.wav")

# plot the F0 on top of the spectrogram
plt.figure(3, figsize=(16, 4.5))
maxplotfreq = 5000.0
harms = hfreq * np.less(hfreq, maxplotfreq)
harms[harms[:, 0] == 0] = np.nan
numFrames = int(mX[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N * maxplotfreq / fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, : N * maxplotfreq / fs + 1]))
plt.plot(frmTime, harms[:, 0], linewidth=3, color="0")
plt.xlabel("time (sec)")
Beispiel #33
0
def exploreSineModel(inputFile='../sms-tools/sounds/multisines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
            Discuss on the forum!
    """
    window='hamming'                            # Window type
    M=3001                                      # Window size in sample
    N=4096                                      # FFT Size
    t=-80                                       # Threshold                
    minSineDur=0.02                             # minimum duration of a sinusoid
    maxnSines=15                                # Maximum number of sinusoids at any time frame
    freqDevOffset=10                            # minimum frequency deviation at 0Hz
    freqDevSlope=0.001                          # slope increase of minimum frequency deviation
    Ns = 512                                    # size of fft used in synthesis
    H = 128                                     # hop size (has to be 1/4 of Ns)
    
    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')
                
    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
Beispiel #34
0
def morph_samepitch_cc(audio_inp1, audio_inp2, alpha, f0, params, params_ceps):
	"""
	Timbre morphing between two sounds of same pitch by linearly interpolating the cepstral representation of the true envelope.

	Parameters
	----------
	audio_inp1 : np.array
		Numpy array containing the first audio signal, in the time domain
	audio_inp2 : np.array
		Numpy array containing the second audio signal, in the time domain 
	alpha : float
		Interpolation factor(0 <= alpha <= 1), alpha*audio1 + (1 - alpha)*audio2
	f0 : float
		Fundamental Frequency(to reconstruct harmonics)
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- t : float
				Threshold for sinusoidal detection in dB
			- maxnSines : integer
				Number of sinusoids to detect
	params_ceps : dict
		Parameter Dictionary for the true envelope estimation containing the following keys
			- thresh : float
				Threshold(in dB) for the true envelope estimation
			- ceps_coeffs : integer
				Number of cepstral coefficients to keep in the true envelope estimation
			- num_iters : integer
				Upper bound on number of iterations(if no convergence)
				
	Returns
	-------
	audio_morphed : np.array
		Returns the morphed audio in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']

	w = windows.hann(W)

	F1,M1,_,_ = hprModelAnal(x = audio_inp1, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)
	F2,M2,_,_ = hprModelAnal(x = audio_inp2, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)

	# Defining the frequency matrix as multiples of the harmonics
	new_F= np.zeros_like(F1 if F1.shape[0] < F2.shape[0] else F2)
	for i in range(new_F.shape[1]):
		new_F[:,i] = (i+1)*f0

	# Defining the Magnitude matrix
	new_M = np.zeros_like(M1 if M1.shape[0] < M2.shape[0] else M2)

	for i in range(new_M.shape[0]):
		# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
		f1 = interpolate.interp1d(F1[i,:],M1[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False)
		f2 = interpolate.interp1d(F2[i,:],M2[i,:]/20,kind = 'linear',fill_value = -100, bounds_error=False)
		# Frequency bins
		fbins = np.linspace(0,fs/2,N)
		finp1 = f1(fbins)
		finp2 = f2(fbins)
		specenv1,_,_ = fe.calc_true_envelope_spectral(finp1,N,thresh,ceps_coeffs,num_iters)
		specenv2,_,_ = fe.calc_true_envelope_spectral(finp2,N,thresh,ceps_coeffs,num_iters)

		# Obtain the Cepstral Representation of the True envelopes
		cc_te_1 = np.real(np.fft.ifft(specenv1))
		cc_te_2 = np.real(np.fft.ifft(specenv2))

		# Linearly interpolate the cepstral coefficients, and reconstruct the true envelope from that
		cc_interp = alpha*cc_te_1 + (1 - alpha)*cc_te_2
		specenv = np.real(np.fft.fft(cc_interp))

		# fp = interpolate.interp1d(np.linspace(0,fs/2,N),np.pad(specenv[0:N//2],[0,N//2],mode = 'constant',constant_values=(0, -5)),kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
		fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = -10, bounds_error=False)
		new_M[i,:] = 20*fp(new_F[i,:])

	audio_morphed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs)

	return audio_morphed
Beispiel #35
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	Perform analysis/synthesis using the sinusoidal model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    fs, x = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)
Beispiel #36
0
def residue_lpc(audio_inp, params,lpc_order):
	"""
	Obtains the LPC representation of the Residual Spectral(LPC envelope), and then generates the residual by IFFT'ing this representation with random phase.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	lpc_order : integer
		Number of coefficients in the LPC representation
		
	Returns
	-------
	res_transformed : np.array
	    Returns the transformed residue(LPC envelope approximation) in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']

	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)
	harmonics_recon = sineModelSynth(tfreq = F, tmag = M, tphase = P, N = W, H = H, fs = fs)

	# Initializing an empty list to store the residual spectral approximations(LPC)
	xmX = []

	# Normalize the Residue before analysis(throws a np zero error otherwise)
	nf = np.max(np.abs(R))
	# nf = 1
	# print(nf)

	R = R/nf
	
	for frame in ess.FrameGenerator(R.astype('float32'), W, H):
		inp = np.pad(frame,[0,N - W],mode = 'constant',constant_values=(0, 0))
		env_frame = fe.lpc_envelope(inp,lpc_order,fs,len(inp)//2 + 1)
		xmX.append(env_frame)
	xmX = np.array(xmX)
	XpX = 2*np.pi*np.random.rand(xmX.shape[0],xmX.shape[1])

	# xmX,XpX = stftAnal(audio_inp,w,N,H)
	# Obtain the audio from the above representation
	res_transformed =  stftSynth(xmX, XpX, W, H)*nf

	# ***Re-normalize the Residual so that it lies in the same range as the original residue***
	# scale_init = np.max(np.abs(audio_inp))/np.max(np.abs(R))
	# scale_final = np.max(np.abs(harmonics_recon))/scale_init
	res_transformed = (res_transformed/np.max(np.abs(res_transformed)))


	return res_transformed
Beispiel #37
0
def recon_samples_ls(matrix_ceps_coeffs,
                     midi_pitch,
                     params,
                     f_ref=440,
                     choice_f=0):
    """
	Returns the audio corresponding to an overlap add of each of the frames reconstructed from the latent variables in walk_locs
	Note : The input should be in log dB (log|X|)
	Inputs
	------
	matrix_ceps_coeffs : np.ndarray
		Matrix whose columns depict the cepstral frames(sequential)
	midi_pitch : list of int(0 < midi_pitch < 128)
		List of MIDI number of the pitch at each time frame(can directly feed in the NSynth parameter)(same as the number of columns in the above input matrix)
		If input is a single number, that will be the pitch for all the frames
	params : dict
		Parameter dictionary for the harmonic reconstruction containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- nH : integer
				Number of harmonics to synthesize
	f_ref : float
		Reference frequency for MIDI(440 Hz by default)
	choice_f : 0 or 1(0 by default)
		If 0, will accept MIDI pitch and convert it to Hz
		If 1, will accept and use pitch directly in Hz
	"""

    fs = params['fs']
    W = params['W']
    N = params['N']
    H = params['H']
    nH = params['nH']
    w = windows.hann(W)

    # Defining the Frequency and Magnitude matrices
    num_frames = matrix_ceps_coeffs.shape[1]

    if (type(midi_pitch) == int):
        midi_pitch = np.zeros(num_frames) + midi_pitch

    if (choice_f == 0):
        # Convert MIDI to Hz
        hz_from_midi = f_ref * (2**((midi_pitch - 69) / 12.0))
        f0 = hz_from_midi
    else:
        f0 = midi_pitch

    M = np.zeros((num_frames, nH))
    F = np.zeros((num_frames, nH))

    for j in range(num_frames):
        for i in range(F.shape[1]):
            F[j, i] = (i + 1) * f0[j]

    # Sample the frequencies from the envelope at each instant
    for i in range(num_frames):
        # Flip and append the array to give a real frequency signal to the fft input
        ceps_current = matrix_ceps_coeffs[:, i]
        # Pad with zeros
        cc_real = np.pad(ceps_current, [0, N - len(ceps_current)],
                         mode='constant',
                         constant_values=(0, 0))
        cc_real = np.concatenate(
            (cc_real[:N // 2], np.flip(cc_real[1:N // 2 + 1])))
        cc_real[0] = ceps_current[0]

        # Obtain the Envelope from the cepstrum
        specenv = np.real(np.fft.fft(cc_real))
        fbins = np.linspace(0, fs, N)
        fp = interpolate.interp1d(np.arange(params['N']),
                                  specenv,
                                  kind='linear',
                                  fill_value='extrapolate',
                                  bounds_error=False)
        M[i, :] = 20 * fp((F[i, :] / fs) * N)

    audio_recon = sineModelSynth(F, M, np.empty([0, 0]), W, H, fs)

    return audio_recon
Beispiel #38
0
def pitch_shifting(audio_inp, params, factor,choice,choice_recon):
	"""
	Shifts the pitch by the scalar factor given as the input.

	Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice : 0 or 1
		If 0, simply shifts the pitch without amplitude interpolation
		If 1, performs amplitude interpolation framewise to preserve timbre
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
		
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
    Residue : np.array
    	The residue of the signal
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']

	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 400, f0et = 5, harmDevSlope = 0.01)

	scaled_F = factor*F

	if(choice == 0):
		new_M = M
	else:
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise
			f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False)
			new_M[i,:] = f(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed,R
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, 
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):

	# ------- analysis parameters -------------------

	# inputFile: input sound file (monophonic with sampling rate of 44100)
	# window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	# M: analysis window size 
	# N: fft size (power of two, bigger or equal than M)
	# t: magnitude threshold of spectral peaks 
	# minSineDur: minimum duration of sinusoidal tracks
	# maxnSines: maximum number of parallel sinusoids
	# freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	# freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# --------- computation -----------------

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal analysis
	tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
		
	# subtract sinusoids from original 
	xr = UF.sineSubtraction(x, N, H, tfreq, tmag, tphase, fs)
		
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)
		
	# synthesize sinusoids
	ys = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

	# sum sinusoids and residual
	y = xr[:min(xr.size, ys.size)]+ys[:min(xr.size, ys.size)]

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# --------- plotting --------------------

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')
		
	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)                       
	binFreq = np.arange(maxplotbin+1)*float(fs)/N                         
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)
		
	# plot the sinusoidal frequencies on top of the residual spectrogram
	tracks = tfreq*np.less(tfreq, maxplotfreq)
	tracks[tracks<=0] = np.nan
	plt.plot(frmTime, tracks, color='k')
	plt.title('sinusoidal tracks + residual spectrogram')
	plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')


	plt.tight_layout()
	plt.show()
def exploreSineModel(inputFile='multiSines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
    """
    window = 'hamming'  # Window type
    M = 2001  # Window size in sample
    N = 2048  # FFT Size
    t = -80  # Threshold
    minSineDur = 0.02  # minimum duration of a sinusoid
    maxnSines = 150  # Maximum number of sinusoids at any time frame
    freqDevOffset = 10  # minimum frequency deviation at 0Hz
    freqDevSlope = 0.001  # slope increase of minimum frequency deviation
    Ns = 512  # size of fft used in synthesis
    H = 128  # hop size (has to be 1/4 of Ns)

    fs, x = UF.wavread(inputFile)  # read input sound
    w = get_window(window, M)  # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3, 1, 2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        tfreq[tfreq <= 0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True

(fs, x) = UF.wavread("../../../sounds/vignesh.wav")
w = np.blackman(1201)
N = 2048
t = -90
nH = 100
minf0 = 130
maxf0 = 300
f0et = 7
Ns = 512
H = Ns / 4
minSineDur = 0.1
harmDevSlope = 0.01
hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)
y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

numFrames = int(hfreq[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x.size) / float(fs), x, "b")
plt.axis([0, x.size / float(fs), min(x), max(x)])
plt.title("x (vignesh.wav)")

plt.subplot(3, 1, 2)
yhfreq = hfreq
yhfreq[hfreq == 0] = np.nan
plt.plot(frmTime, hfreq, lw=1.2)
Beispiel #42
0
def pitch_shifting_harmonic(audio_inp, params, params_ceps, factor,choice,choice_recon,f0):
	"""
	Shifts the pitch by the scalar factor given as the input. But, assumes the sound is harmonic and hence uses only the amplitudes sampled at multiples of the fundamental frequency.
	Note : Will only perform well for harmonic/sustained sounds.
	Depending on the choice, performs interpolation to preserve the timbre when shifting the pitch. Also returns sound with or without the original residue added.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : Sampling rate of the audio
			- W : Window size(number of frames)
			- N : FFT size(multiple of 2)
			- H : Hop size
			- t : Threshold for sinusoidal detection in dB
			- maxnSines : Number of sinusoids to detect
	factor : float
		Shift factor for the pitch. New pitch = f * (old pitch)
	choice : 0,1,2
		If 0, simply shifts the pitch without amplitude interpolation
		If 1, performs amplitude interpolation framewise to preserve timbre
		If 2, uses the True envelope of the amplitude spectrum to sample the points from
	choice_recon : 0 or 1
		If 0, returns only the sinusoidal reconstruction
		If 1, adds the original residue as well to the sinusoidal
	f0 : Hz
		The fundamental frequency of the note
		
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']

	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01)
	
	new_F= np.zeros_like(F)
	for i in range(F.shape[1]):
		new_F[:,i] = (i+1)*f0

	scaled_F = factor*new_F

	if(choice == 0):
		new_M = M
	elif(choice == 1):
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise
			f = interpolate.interp1d(F[i,:],M[i,:],kind = 'linear',fill_value = -100, bounds_error=False)
			new_M[i,:] = f(scaled_F[i,:])
	else:
		new_M = M
		for i in range(F.shape[0]):
			# Performing the envelope interpolation framewise(normalized log(dividing the magnitude by 20))
			f = interpolate.interp1d(F[i,:],M[i,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
			# Frequency bins
			fbins = np.linspace(0,fs/2,2*N)
			finp = f(fbins)
			specenv,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)
			# Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope
			fp = interpolate.interp1d(fbins[:N//2 + 1],specenv[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
			new_M[i,:] = 20*fp(scaled_F[i,:])

	if(choice_recon == 0):
		audio_transformed = sineModelSynth(scaled_F, new_M, np.empty([0,0]), W, H, fs)
	else:
		audio_transformed = hprModelSynth(scaled_F, new_M, np.empty([0,0]), R, W, H, fs)[0]

	return audio_transformed
Beispiel #43
0
def sustain_sound_gen(audio_inp, params, params_ceps, f0, rwl, alpha):
	"""
	Re-synthesizes the input audio using a random walk starting from the middle frame of the audio.

	Parameters
	----------
	audio_inp : np.array
		Numpy array containing the audio signal, in the time domain 
	params : dict
		Parameter dictionary for the sine model) containing the following keys
			- fs : integer
				Sampling rate of the audio
			- W : integer
				Window size(number of frames)
			- N : integer
				FFT size(multiple of 2)
			- H : integer
				Hop size
			- t : float
				Threshold for sinusoidal detection in dB
			- maxnSines : integer
				Number of sinusoids to detect
	params_ceps : dict
		Parameter Dictionary for the true envelope estimation containing the following keys
			- thresh : float
				Threshold(in dB) for the true envelope estimation
			- ceps_coeffs : integer
				Number of cepstral coefficients to keep in the true envelope estimation
			- num_iters : integer
				Upper bound on number of iterations(if no convergence)
	f0 : float
		Fundamental frequency(or pitch) of the note
	rwl : Integer
		Number of hops to consider around the middle frame
	alpha : float(0<alpha<1)
		Closeness to the current frame(for continuity of the spectral frames during reconstruction)
				
	Returns
	-------
	audio_transformed : np.array
	    Returns the transformed signal in the time domain
	"""

	fs = params['fs']
	W = params['W']
	N = params['N']
	H = params['H']
	t = params['t']
	maxnSines = params['maxnSines']
	thresh = params_ceps['thresh']
	ceps_coeffs = params_ceps['ceps_coeffs']
	num_iters = params_ceps['num_iters']


	w = windows.hann(W)

	F,M,P,R = hprModelAnal(x = audio_inp, fs = fs, w = w, N = N, H = H, t = t, nH = maxnSines, minSineDur = 0.02, minf0 = 10, maxf0 = 1000, f0et = 5, harmDevSlope = 0.01)

	new_F= np.zeros_like(F)
	for i in range(F.shape[1]):
		new_F[:,i] = (i+1)*f0
	new_M = M
	
	# Initial parameters for random walk
	midpoint = F.shape[0]//2 # Selecting the middle frame 
	current_frame = midpoint
	f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
	# Frequency bins
	fbins = np.linspace(0,fs/2,N)
	finp = f(fbins)
	specenv_at,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)

	# Reconstruct the Magnitude array from the frequency array(only the middle frame but)
	for i in range(M.shape[0]):
		
		# Updating the current frame as per a random walk update(add upper and lower threshold)
		current_frame = current_frame + random.choice([-rwl,rwl])
		if(current_frame >= M.shape[0] - 1):
			current_frame = M.shape[0] - 1
		if(current_frame <= 0):
			current_frame = 0
		f = interpolate.interp1d(F[current_frame,:],M[current_frame,:]/20,kind = 'linear',fill_value = -5, bounds_error=False)
		# Frequency bins
		fbins = np.linspace(0,fs/2,N)
		finp = f(fbins)
		specenv_new,_,_ = fe.calc_true_envelope_spectral(finp,N,thresh,ceps_coeffs,num_iters)
		# Pnce the initial and final envelopes are obtained, interpolate to obtain the new(intermediate) envelope
		# The closer the envelope is to 1, the less the envelope will change from its current value
		
		specenv_at = alpha*specenv_at + (1 - alpha)*specenv_new
		# Now, once the spectral envelope is obtained, define an interpolating function based on the spectral envelope
		fp = interpolate.interp1d(fbins[:N//2 + 1],specenv_at[:N//2 + 1],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
		new_M[i,:] = 20*fp(new_F[i,:])

	# Reconstruction of the sound ignoring the residual
	audio_transformed = sineModelSynth(new_F, new_M, np.empty([0,0]), W, H, fs)

	return audio_transformed
    os.path.join(os.path.dirname(os.path.realpath(__file__)),
                 '../../../sounds/bendir.wav'))
x1 = x[0:50000]
w = np.blackman(2001)
N = 2048
H = 500
t = -90
minSineDur = .01
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns / 4
tfreq, tmag, tphase = SM.sineModelAnal(x1, fs, w, N, H, t, maxnSines,
                                       minSineDur, freqDevOffset, freqDevSlope)
y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

numFrames = int(tfreq[:, 0].size)
frmTime = H * np.arange(numFrames) / float(fs)
maxplotfreq = 3000.0

plt.figure(1, figsize=(9, 7))

plt.subplot(3, 1, 1)
plt.plot(np.arange(x1.size) / float(fs), x1, 'b', lw=1.5)
plt.axis([0, x1.size / float(fs), min(x1), max(x1)])
plt.title('x (bendir.wav)')

plt.subplot(3, 1, 2)
tracks = tfreq * np.less(tfreq, maxplotfreq)
tracks[tracks <= 0] = np.nan
def sineModelMultiRes(inputFile="../../sounds/orchestra.wav", 
                    windows=(signal.blackman(4095), signal.hamming(2047), np.hamming(1023)),
                    Ns=(4096, 2048, 1024), 
                    Bs=(1000, 5000, 22050), 
                    t=-80, minSineDur=0.02, 
                    maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, PlotIt=True):
    
    sN = 512
    H = sN/4
    (fs, x) = UF.wavread(inputFile)
    
    tfreq, tmag, tphase = sineModelMultiResAnal(x, fs, windows, Ns, Bs, H, t, 
                            minSineDur, maxnSines, freqDevOffset, freqDevSlope)
    y = SM.sineModelSynth(tfreq, tmag, tphase, sN, H, fs)
    
    # calculate diff between x & y
    diffLength = min([x.size, y.size])
    diff = np.abs(x[:diffLength] - y[:diffLength])
    print("diff {0}".format(np.sum(diff)))
    
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModelMulti.wav'
    UF.wavwrite(y, fs, outputFile)
    
    if not PlotIt:
        return
    
    plt.figure(figsize=(12, 9))

    maxplotfreq = 10000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.ylabel('frequency (Hz)')
    	plt.xlabel('time (sec)')
    	plt.title('input sound: x')
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
(fs, x) = UF.wavread('../../../sounds/mridangam.wav')
w = np.hamming(801)
N = 2048
t = -90
minSineDur = .005
maxnSines = 150
freqDevOffset = 20
freqDevSlope = 0.02
Ns = 512
H = Ns/4
mX, pX = STFT.stftAnal(x, w, N, H)
tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)
timeScale = np.array([.01, .0, .03, .03, .335, .4, .355, .42, .671, .8, .691, .82, .858, 1.2, .878, 1.22, 1.185, 1.6, 1.205, 1.62, 1.497, 2.0, 1.517, 2.02, 1.686, 2.4, 1.706, 2.42, 1.978, 2.8])          
ytfreq, ytmag = SMT.sineTimeScaling(tfreq, tmag, timeScale)
y = SM.sineModelSynth(ytfreq, ytmag, np.array([]), Ns, H, fs)
mY, pY = STFT.stftAnal(y, w, N, H)

plt.figure(1, figsize=(12, 9))
maxplotfreq = 4000.0
plt.subplot(4,1,1)
plt.plot(np.arange(x.size)/float(fs), x, 'b')
plt.axis([0,x.size/float(fs),min(x),max(x)])
plt.title('x (mridangam.wav)')                        

plt.subplot(4,1,2)
numFrames = int(tfreq[:,0].size)
frmTime = H*np.arange(numFrames)/float(fs)
tracks = tfreq*np.less(tfreq, maxplotfreq)
tracks[tracks<=0] = np.nan
plt.plot(frmTime, tracks, color='k', lw=1)
def main(
    inputFile="../../sounds/sax-phrase.wav",
    window="blackman",
    M=601,
    N=1024,
    t=-100,
    minSineDur=0.1,
    nH=100,
    minf0=350,
    maxf0=700,
    f0et=5,
    harmDevSlope=0.01,
):

    # ------- analysis parameters -------------------

    # inputFile: input sound file (monophonic with sampling rate of 44100)
    # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
    # M: analysis window size
    # N: fft size (power of two, bigger or equal than M)
    # t: magnitude threshold of spectral peaks
    # minSineDur: minimum duration of sinusoidal tracks
    # nH: maximum number of harmonics
    # minf0: minimum fundamental frequency in sound
    # maxf0: maximum fundamental frequency in sound
    # f0et: maximum error accepted in f0 detection algorithm
    # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # --------- computation -----------------

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # find harmonics
    hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)

    # subtract harmonics from original sound
    xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs)

    # compute spectrogram of residual
    mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

    # synthesize harmonic component
    yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs)

    # sum harmonics and residual
    y = xr[: min(xr.size, yh.size)] + yh[: min(xr.size, yh.size)]

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_sines.wav"
    outputFileResidual = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_residual.wav"
    outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel.wav"

    # write sounds files for harmonics, residual, and the sum
    UF.wavwrite(yh, fs, outputFileSines)
    UF.wavwrite(xr, fs, outputFileResidual)
    UF.wavwrite(y, fs, outputFile)

    # --------- plotting --------------------

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("input sound: x")

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot harmonic frequencies on residual spectrogram
    harms = hfreq * np.less(hfreq, maxplotfreq)
    harms[harms == 0] = np.nan
    numFrames = int(harms[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    plt.plot(frmTime, harms, color="k", ms=3, alpha=1)
    plt.xlabel("time(s)")
    plt.ylabel("frequency(Hz)")
    plt.autoscale(tight=True)
    plt.title("harmonics + residual spectrogram")

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("output sound: y")

    plt.tight_layout()
    plt.show()
def analysis(inputFile='../../sounds/mridangam.wav',
             window='hamming',
             M=801,
             N=2048,
             t=-90,
             minSineDur=0.01,
             maxnSines=150,
             freqDevOffset=20,
             freqDevSlope=0.02):
    """
	Analyze a sound with the sine model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	returns inputFile: input file name; fs: sampling rate of input file,
	        tfreq, tmag: sinusoidal frequencies and magnitudes
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the sine model of the whole sound
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines,
                                           minSineDur, freqDevOffset,
                                           freqDevSlope)

    # synthesize the sines without original phases
    y = SM.sineModelSynth(tfreq, tmag, np.array([]), Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sineModel.wav'

    # write the sound resulting from the inverse stft
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the sinusoidal frequencies
    if (tfreq.shape[1] > 0):
        plt.subplot(3, 1, 2)
        tracks = np.copy(tfreq)
        tracks = tracks * np.less(tracks, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        numFrames = int(tracks[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, tracks)
        plt.axis([0, x.size / float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)

    return inputFile, fs, tfreq, tmag
Beispiel #49
0
def exploreSineModel(inputFile='../../sounds/multiSines.wav'):
    """
    Input:
            inputFile (string) = wav file including the path
    Output: 
            return True
            Discuss on the forum!
    """
    # window='hamming'                            # Window type
    window='blackmanharris'                            # Window type
    # M=3001                                      # Window size in sample
    M=3529                                      # Window size in sample
    #M=4095                                      # Window size in sample
    N=4096                                      # FFT Size
    #N=8192                                      # FFT Size
    # N=8192                                      # FFT Size
    # t=-80                                       # Threshold
    t=-50                                       # Threshold
    #minSineDur=0.02                             # minimum duration of a sinusoid
    minSineDur=0.01                             # minimum duration of a sinusoid
    maxnSines=15                                # Maximum number of sinusoids at any time frame
    #maxnSines=9                                # Maximum number of sinusoids at any time frame
    freqDevOffset=10                            # minimum frequency deviation at 0Hz
    #freqDevOffset=20                            # minimum frequency deviation at 0Hz
    freqDevSlope=0.001                          # slope increase of minimum frequency deviation
    # Ns = 512                                    # size of fft used in synthesis
    # H = 128                                     # hop size (has to be 1/4 of Ns)
    Ns = 512                                    # size of fft used in synthesis
    H = Ns / 4                                  # hop size (has to be 1/4 of Ns)

    fs, x = UF.wavread(inputFile)               # read input sound
    w = get_window(window, M)                   # compute analysis window

    # analyze the sound with the sinusoidal model
    tfreq, tmag, tphase = SM.sineModelAnal(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope)

    # synthesize the output sound from the sinusoidal representation
    y = SM.sineModelSynth(tfreq, tmag, tphase, Ns, H, fs)

    # output sound file name
    outputFile = os.path.basename(inputFile)[:-4] + '_sineModel.wav'

    # write the synthesized sound obtained from the sinusoidal synthesis
    UF.wavwrite(y, fs, outputFile)

    # SNR calculation
    x1 = x[:len(y)]
    e_signal = calculate_energy(x1)
    e_error = calculate_energy(x1 - y)
    snr = calculate_snr(e_signal, e_error)
    print("SNR {}".format(snr))
    errorFile = os.path.basename(inputFile)[:-4] + '_sineModel_error.wav'
    UF.wavwrite(x1 - y, fs, errorFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3,1,1)
    plt.plot(np.arange(x.size)/float(fs), x)
    plt.axis([0, x.size/float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')
                
    # plot the sinusoidal frequencies
    plt.subplot(3,1,2)
    if (tfreq.shape[1] > 0):
        numFrames = tfreq.shape[0]
        frmTime = H*np.arange(numFrames)/float(fs)
        tfreq[tfreq<=0] = np.nan
        plt.plot(frmTime, tfreq)
        plt.axis([0, x.size/float(fs), 0, maxplotfreq])
        plt.title('frequencies of sinusoidal tracks')

    # plot the output sound
    plt.subplot(3,1,3)
    plt.plot(np.arange(y.size)/float(fs), y)
    plt.plot(np.arange(y.size)/float(fs), abs(x1 - y))  # error
    plt.axis([0, y.size/float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
    return True
Beispiel #50
0
        zp = np.pad(ceps_current, [0, params['N'] - len(ceps_current)],
                    mode='constant',
                    constant_values=(0, 0))
        zp = np.concatenate(
            (zp[:params['N'] // 2], np.flip(zp[1:params['N'] // 2 + 1])))
        zp[0] = ceps_current[0]

        # Obtain the Envelope from the cepstrum
        specenv = np.real(np.fft.fft(zp))
        fbins = np.linspace(0, params['fs'], params['N'])
        fp = interpolate.interp1d(np.arange(params['N']),
                                  specenv,
                                  kind='linear',
                                  fill_value='extrapolate',
                                  bounds_error=False)
        new_M[j, :] = 20 * fp((new_F[j, :] / params['fs']) * params['N'])

        # zp = np.pad(frame,[0,params['N'] - len(frame)],mode = 'constant',constant_values=(0, 0))
        # zp = np.concatenate((zp[:params['N']//2],np.flip(zp[1:params['N']//2 + 1])))
        # specenv = np.real(np.fft.fft(zp))
        # # print(fbins[:params['N']//2 + 1])
        # # print(specenv[:params['N']//2 + 1])
        # fp = interpolate.interp1d(np.arange(params['N']//2),specenv[:params['N']//2],kind = 'linear',fill_value = 'extrapolate', bounds_error=False)
        # new_M[j,:] = 20*fp((new_F[j,:]/params['fs'])*params['N'])

    arecon = sineModelSynth(new_F, new_M, np.empty([0, 0]), params['W'],
                            params['H'], params['fs'])
    write(filename=dir_dump + str(k) + '_recon_param.wav',
          rate=params['fs'],
          data=arecon.astype('float32'))
		yhfreq[l,ind_valid] = yhfreq[l,ind_valid] * freqScaling[l]
	
	return yhfreq, yhmag

if __name__ == '__main__':
	(fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../sounds/soprano-E4.wav'))
	w = np.blackman(801)
	N = 1024
	t = -90
	nH = 100
	minf0 = 250
	maxf0 = 400
	f0et = 8
	minSineDur = .1
	harmDevSlope = 0.01
	Ns = 512
	H = Ns/4
	hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur)
	freqScaling = np.array([0, 3, 1, .5])
	freqStretching = np.array([])
	timbrePreservation = 1
	hfreqt, hmagt = harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)
	timeScaling = np.array([0, 0, 1, .5, 2, 4])
	hfreqt, hmagt = ST.sineTimeScaling(hfreq, hmag, timeScaling)
	yh = SM.sineModelSynth(hfreqt, hmagt, np.array([]), Ns, H, fs) 
	UF.play(yh, fs)