Beispiel #1
0
def transformation_synthesis(inputFile1,
                             fs,
                             hfreq1,
                             hmag1,
                             stocEnv1,
                             inputFile2,
                             hfreq2,
                             hmag2,
                             stocEnv2,
                             hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1])):
    """
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile1: name of input file 1
	fs: sampling rate of input file	1
	hfreq1, hmag1, stocEnv1: hps representation of sound 1
	inputFile2: name of input file 2
	hfreq2, hmag2, stocEnv2: hps representation of sound 2
	hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs)
	hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	"""

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128

    # morph the two sounds
    yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2,
                                            hmag2, stocEnv2, hfreqIntp,
                                            hmagIntp, stocIntp)

    # synthesis
    y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns,
                                   H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_hpsMorph.wav'
    UF.wavwrite(y, fs, outputFile)
def main(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic plus stochastic model of the whole sound
	hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)
		
	# synthesize a sound from the harmonic plus stochastic representation
	y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_sines.wav'
	outputFileStochastic = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel_stochastic.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav'

	# write sounds files for harmonics, stochastic, and the sum
	UF.wavwrite(yh, fs, outputFileSines)
	UF.wavwrite(yst, fs, outputFileStochastic)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, hfreq, stocEnv, y
Beispiel #3
0
def analysis(inputFile='../../sounds/sax-phrase-short.wav',
             window='blackman',
             M=601,
             N=1024,
             t=-100,
             minSineDur=0.1,
             nH=100,
             minf0=350,
             maxf0=700,
             f0et=5,
             harmDevSlope=0.01,
             stocf=0.1):
    """
	Analyze a sound with the harmonic plus stochastic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	returns inputFile: input file name; fs: sampling rate of input file,
	        hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic plus stochastic model of the whole sound
    hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0,
                                                 maxf0, f0et, harmDevSlope,
                                                 minSineDur, Ns, stocf)

    # synthesize the harmonic plus stochastic model without original phases
    y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_hpsModel.wav'
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot spectrogram stochastic compoment
    plt.subplot(3, 1, 2)
    numFrames = int(mYst[:, 0].size)
    sizeEnv = int(mYst[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(mYst[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram
    if (hfreq.shape[1] > 0):
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show(block=False)

    return inputFile, fs, hfreq, hmag, mYst
Beispiel #4
0
def transformation_synthesis(
    inputFile,
    fs,
    hfreq,
    hmag,
    mYst,
    freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]),
    freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]),
    timbrePreservation=1,
    timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146])):
    """
	transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file
	fs: sampling rate of input file	
	hfreq, hmag: harmonic frequencies and magnitudes
	mYst: stochastic residual
	freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling)
	freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching)
	timbrePreservation: 1 preserves original timbre, 0 it does not
	timeScaling: time scaling factors, in time-value pairs
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # frequency scaling of the harmonics
    hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling,
                                           freqStretching, timbrePreservation,
                                           fs)

    # time scaling the sound
    yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst,
                                                timeScaling)

    # synthesis from the trasformed hps representation
    y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns,
                                   H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_hpsModelTransformation.wav'
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 6))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv[:, 0].size)
    sizeEnv = int(ystocEnv[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(ystocEnv[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = yhfreq * np.less(yhfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
def analysis(inputFile='../../sounds/sax-phrase-short.wav', window='blackman', M=601, N=1024, t=-100, 
	minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1):
	"""
	Analyze a sound with the harmonic plus stochastic model
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics
	minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound
	f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	returns inputFile: input file name; fs: sampling rate of input file,
	        hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# compute the harmonic plus stochastic model of the whole sound
	hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)

	# synthesize the harmonic plus stochastic model without original phases
	y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModel.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot spectrogram stochastic compoment
	plt.subplot(3,1,2)
	numFrames = int(mYst[:,0].size)
	sizeEnv = int(mYst[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot harmonic on top of stochastic spectrogram
	if (hfreq.shape[1] > 0):
		harms = hfreq*np.less(hfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)

	return inputFile, fs, hfreq, hmag, mYst
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), 
	freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, 
	timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146])):
	"""
	transform the analysis values returned by the analysis function and synthesize the sound
	inputFile: name of input file
	fs: sampling rate of input file	
	hfreq, hmag: harmonic frequencies and magnitudes
	mYst: stochastic residual
	freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling)
	freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching)
	timbrePreservation: 1 preserves original timbre, 0 it does not
	timeScaling: time scaling factors, in time-value pairs
	"""
	
	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128
	
	# frequency scaling of the harmonics 
	hfreqt, hmagt = HT.harmonicFreqScaling(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs)

	# time scaling the sound
	yhfreq, yhmag, ystocEnv = HPST.hpsTimeScale(hfreqt, hmagt, mYst, timeScaling)

	# synthesis from the trasformed hps representation 
	y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav'
	UF.wavwrite(y,fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 6))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot spectrogram of transformed stochastic compoment
	plt.subplot(2,1,1)
	numFrames = int(ystocEnv[:,0].size)
	sizeEnv = int(ystocEnv[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot transformed harmonic on top of stochastic spectrogram
	if (yhfreq.shape[1] > 0):
		harms = yhfreq*np.less(yhfreq,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show()
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(N * maxplotfreq / fs) / N
plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : N * maxplotfreq / fs + 1]))
plt.xlabel("time (sec)")
plt.ylabel("frequency (Hz)")
plt.title("spectrogram of residual")
plt.autoscale(tight=True)
plt.tight_layout()
plt.savefig("cello-phrase-residual.png")

# compute the harmonic plus stochastic model
stocf = 0.5
hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(
    x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf
)
y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, np.array([]), mYst, Ns, H, fs)
UF.wavwrite(yst, fs, "cello-phrase-stochastic.wav")
UF.wavwrite(yh, fs, "cello-phrase-harmonic.wav")
UF.wavwrite(y, fs, "cello-phrase-synthesis.wav")

# plot the spectrogram of the stochastic component
plt.figure(6, figsize=(16, 4.5))
maxplotfreq = 5000.0
numFrames = int(mYst[:, 0].size)
Nst = 2 * int(mYst[0, :].size)
lastbin = int(Nst * maxplotfreq / fs)
maxplotfreq = fs * (lastbin - 1) / Nst
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = fs * np.arange(lastbin) / Nst
plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :lastbin]))
plt.xlabel("time (sec)")
Ns = 512
H = 128

(fs1, x1) = UF.wavread(inputFile1)
(fs2, x2) = UF.wavread(inputFile2)
w1 = get_window(window1, M1)
w2 = get_window(window2, M2)
hfreq1, hmag1, hphase1, stocEnv1 = HPS.hpsModelAnal(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf)
hfreq2, hmag2, hphase2, stocEnv2 = HPS.hpsModelAnal(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf)

hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])
yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp)

y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1)

UF.wavwrite(y,fs1, 'hps-morph-total.wav')

plt.figure(figsize=(12, 9))

# frequency range to plot
maxplotfreq = 15000.0

# plot spectrogram stochastic component of sound 1
plt.subplot(3,1,1)
numFrames = int(stocEnv1[:,0].size)
sizeEnv = int(stocEnv1[0,:].size)
frmTime = H*np.arange(numFrames)/float(fs1)
binFreq = (.5*fs1)*np.arange(sizeEnv*maxplotfreq/(.5*fs1))/sizeEnv                      
plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv1[:,:int(sizeEnv*maxplotfreq/(.5*fs1)+1)]))
def transformation_synthesis(inputFile1,
                             fs,
                             hfreq1,
                             hmag1,
                             stocEnv1,
                             inputFile2,
                             hfreq2,
                             hmag2,
                             stocEnv2,
                             hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]),
                             stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1])):
    """
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile1: name of input file 1
	fs: sampling rate of input file	1
	hfreq1, hmag1, stocEnv1: hps representation of sound 1
	inputFile2: name of input file 2
	hfreq2, hmag2, stocEnv2: hps representation of sound 2
	hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs)
	hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	"""

    # size of fft used in synthesis
    Ns = 512
    # hop size (has to be 1/4 of Ns)
    H = 128

    # morph the two sounds
    yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2,
                                            hmag2, stocEnv2, hfreqIntp,
                                            hmagIntp, stocIntp)

    # synthesis
    y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns,
                                   H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile1)[:-4] + '_hpsMorph.wav'
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot spectrogram of transformed stochastic compoment
    plt.subplot(2, 1, 1)
    numFrames = int(ystocEnv[:, 0].size)
    sizeEnv = int(ystocEnv[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq /
                                    (.5 * fs)) / sizeEnv
    plt.pcolormesh(
        frmTime, binFreq,
        np.transpose(ystocEnv[:, :int(sizeEnv * maxplotfreq / (.5 * fs)) + 1]))
    plt.autoscale(tight=True)

    # plot transformed harmonic on top of stochastic spectrogram
    if (yhfreq.shape[1] > 0):
        harms = np.copy(yhfreq)
        harms = harms * np.less(harms, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = int(harms[:, 0].size)
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
        plt.xlabel('time (sec)')
        plt.ylabel('frequency (Hz)')
        plt.autoscale(tight=True)
        plt.title('harmonics + stochastic spectrogram')

    # plot the output sound
    plt.subplot(2, 1, 2)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()
def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2,
	hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1])):
	"""
	Transform the analysis values returned by the analysis function and synthesize the sound
	inputFile1: name of input file 1
	fs: sampling rate of input file	1
	hfreq1, hmag1, stocEnv1: hps representation of sound 1
	inputFile2: name of input file 2
	hfreq2, hmag2, stocEnv2: hps representation of sound 2
	hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs)
	hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2  (time,value pairs)
	"""
	
	# size of fft used in synthesis
	Ns = 512
	# hop size (has to be 1/4 of Ns)
	H = 128

	# morph the two sounds
	yhfreq, yhmag, ystocEnv = HPST.hpsMorph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp)

	# synthesis 
	y, yh, yst = HPS.hpsModelSynth(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs)

	# write output sound 
	outputFile = 'output_sounds/' + os.path.basename(inputFile1)[:-4] + '_hpsMorph.wav'
	UF.wavwrite(y, fs, outputFile)

	# create figure to plot
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 15000.0

	# plot spectrogram of transformed stochastic compoment
	plt.subplot(2,1,1)
	numFrames = int(ystocEnv[:,0].size)
	sizeEnv = int(ystocEnv[0,:].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
	plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
	plt.autoscale(tight=True)

	# plot transformed harmonic on top of stochastic spectrogram
	if (yhfreq.shape[1] > 0):
		harms = np.copy(yhfreq)
		harms = harms*np.less(harms,maxplotfreq)
		harms[harms==0] = np.nan
		numFrames = int(harms[:,0].size)
		frmTime = H*np.arange(numFrames)/float(fs) 
		plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
		plt.xlabel('time (sec)')
		plt.ylabel('frequency (Hz)')
		plt.autoscale(tight=True)
		plt.title('harmonics + stochastic spectrogram')

	# plot the output sound
	plt.subplot(2,1,2)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')

	plt.tight_layout()
	plt.show(block=False)
Beispiel #11
0
w = np.blackman(601)
N = 1024
t = -100
nH = 100
minf0 = 350
maxf0 = 700
f0et = 5
minSineDur = .1
harmDevSlope = 0.01
Ns = 512
H = Ns / 4
stocf = .2
hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0,
                                             maxf0, f0et, harmDevSlope,
                                             minSineDur, Ns, stocf)
y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, mYst, Ns, H, fs)

maxplotfreq = 10000.0
plt.figure(1, figsize=(9, 7))

plt.subplot(311)
plt.plot(np.arange(x.size) / float(fs), x, 'b')
plt.autoscale(tight=True)
plt.title('x (sax-phrase-short.wav)')

plt.subplot(312)
numFrames = int(mYst[:, 0].size)
sizeEnv = int(mYst[0, :].size)
frmTime = H * np.arange(numFrames) / float(fs)
binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv
plt.pcolormesh(frmTime, binFreq,
def transformation_synthesis_stereo(inputFile, fs, hfreq, hmag, mYst, freqScaling = np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), 
    freqStretching = np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation = 1, 
    timeScaling = np.array([0, 0, 2.138, 2.138-1.0, 3.146, 3.146]),
    inputSound=[]):
    """
    transform the analysis values returned by the analysis function and synthesize the sound
    inputFile: name of input file
    fs: sampling rate of input file    
    hfreq, hmag: harmonic frequencies and magnitudes
    mYst: stochastic residual
    freqScaling: tuple (for L and R channels) of arrays of frequency scaling factors, in time-value pairs (value of 1 no scaling)
    freqStretching: tuple (for L and R channels) of arrays of frequency stretching factors, in time-value pairs (value of 1 no stretching)
    timbrePreservation: tuple (for L and R channels) of arrays of 1 preserves original timbre, 0 it does not
    timeScaling: tuple (for L and R channels) of arrays of time scaling factors, in time-value pairs
    inputSound: original sound, used for auto-attenuation of the output sound
    """
    
    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128
    
    # frequency scaling of the harmonics 
    hfreqtLeft , hmagtLeft  = HT.harmonicFreqScaling(hfreq, hmag, freqScaling[0], freqStretching[0], timbrePreservation[0], fs)
    hfreqtRight, hmagtRight = HT.harmonicFreqScaling(hfreq, hmag, freqScaling[1], freqStretching[1], timbrePreservation[1], fs)

    # time scaling the sound
    yhfreqLeft , yhmagLeft , ystocEnvLeft  = HPST.hpsTimeScale(hfreqtLeft , hmagtLeft , mYst, timeScaling[0])
    yhfreqRight, yhmagRight, ystocEnvRight = HPST.hpsTimeScale(hfreqtRight, hmagtRight, mYst, timeScaling[1])

    # synthesis from the trasformed hps representation 
    yLeft,  yhLeft,  ystLeft  = HPS.hpsModelSynth(yhfreqLeft , yhmagLeft , np.array([]), ystocEnvLeft , Ns, H, fs)
    yRight, yhRight, ystRight = HPS.hpsModelSynth(yhfreqRight, yhmagRight, np.array([]), ystocEnvRight, Ns, H, fs)

    # write output sound
    outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_hpsModelTransformation.wav'
    UF.wavwriteStereo(yLeft, yRight, fs, outputFile, inputSound)
    
    # create figure to plot
    plt.figure(figsize=(12, 12))
    
    # frequency range to plot
    maxplotfreq = 15000.0
     
    def plotTransformedSignals(channelName,subplot1,subplot2,yhfreq,ystocEnv,y):
        # plot spectrogram of transformed stochastic compoment
        plt.subplot(subplot1)
        numFrames = int(ystocEnv[:,0].size)
        sizeEnv = int(ystocEnv[0,:].size)
        frmTime = H*np.arange(numFrames)/float(fs)
        binFreq = (.5*fs)*np.arange(sizeEnv*maxplotfreq/(.5*fs))/sizeEnv                      
        plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:,:sizeEnv*maxplotfreq/(.5*fs)+1]))
        plt.autoscale(tight=True)
    
        # plot transformed harmonic on top of stochastic spectrogram
        if (yhfreq.shape[1] > 0):
            harms = yhfreq*np.less(yhfreq,maxplotfreq)
            harms[harms==0] = np.nan
            numFrames = int(harms[:,0].size)
            frmTime = H*np.arange(numFrames)/float(fs) 
            plt.plot(frmTime, harms, color='k', ms=3, alpha=1)
            plt.xlabel('time (sec)')
            plt.ylabel('frequency (Hz)')
            plt.autoscale(tight=True)
            plt.title('harmonics + stochastic spectrogram (' + channelName + ')')

        # plot the output sound
        plt.subplot(subplot2)
        plt.plot(np.arange(y.size)/float(fs), y)
        plt.axis([0, y.size/float(fs), min(y), max(y)])
        plt.ylabel('amplitude')
        plt.xlabel('time (sec)')
        plt.title('output sound: y (' + channelName + ')')

    plotTransformedSignals('Left' , 411, 413, yhfreqLeft,  ystocEnvLeft,  yLeft)
    plotTransformedSignals('Right', 412, 414, yhfreqRight, ystocEnvRight, yRight)

    plt.tight_layout()
    plt.show()
Beispiel #13
0
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../software/models/'))

import hpsModel as HPS
import utilFunctions as UF

if __name__ == '__main__':
    (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../sounds/sax-phrase.wav'))
    w = np.blackman(601)
    N = 1024
    t = -100
    nH = 100
    minf0 = 350
    maxf0 = 700
    f0et = 5
    maxnpeaksTwm = 5
    minSineDur = .1
    harmDevSlope = 0.01
    Ns = 512
    H = Ns/4
    stocf = .2
    hfreq, hmag, hphase, mYst = HPS.hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf)
    y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, mYst, Ns, H, fs)

    # UF.play(y, fs)
    # UF.play(yh, fs)
    # UF.play(yst, fs)
    UF.wavwrite(y,fs,'sax-phrase-total-synthesis.wav')
    UF.wavwrite(yh,fs,'sax-phrase-harmonic-component.wav')
    UF.wavwrite(yst,fs,'sax-phrase-stochastic-component.wav')
def main(
    inputFile="../../sounds/sax-phrase-short.wav",
    window="blackman",
    M=601,
    N=1024,
    t=-100,
    minSineDur=0.1,
    nH=100,
    minf0=350,
    maxf0=700,
    f0et=5,
    harmDevSlope=0.01,
    stocf=0.1,
):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size; N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks
	nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound
	maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm                                                                                            
	harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation
	stocf: decimation factor used for the stochastic approximation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # compute the harmonic plus stochastic model of the whole sound
    hfreq, hmag, hphase, stocEnv = HPS.hpsModelAnal(
        x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf
    )

    # synthesize a sound from the harmonic plus stochastic representation
    y, yh, yst = HPS.hpsModelSynth(hfreq, hmag, hphase, stocEnv, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_sines.wav"
    outputFileStochastic = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel_stochastic.wav"
    outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hpsModel.wav"

    # write sounds files for harmonics, stochastic, and the sum
    UF.wavwrite(yh, fs, outputFileSines)
    UF.wavwrite(yst, fs, outputFileStochastic)
    UF.wavwrite(y, fs, outputFile)

    # create figure to plot
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 15000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("input sound: x")

    # plot spectrogram stochastic component
    plt.subplot(3, 1, 2)
    numFrames = int(stocEnv[:, 0].size)
    sizeEnv = int(stocEnv[0, :].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = (0.5 * fs) * np.arange(sizeEnv * maxplotfreq / (0.5 * fs)) / sizeEnv
    plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv[:, : sizeEnv * maxplotfreq / (0.5 * fs) + 1]))
    plt.autoscale(tight=True)

    # plot harmonic on top of stochastic spectrogram
    if hfreq.shape[1] > 0:
        harms = hfreq * np.less(hfreq, maxplotfreq)
        harms[harms == 0] = np.nan
        numFrames = harms.shape[0]
        frmTime = H * np.arange(numFrames) / float(fs)
        plt.plot(frmTime, harms, color="k", ms=3, alpha=1)
        plt.xlabel("time (sec)")
        plt.ylabel("frequency (Hz)")
        plt.autoscale(tight=True)
        plt.title("harmonics + stochastic spectrogram")

        # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel("amplitude")
    plt.xlabel("time (sec)")
    plt.title("output sound: y")

    plt.tight_layout()
    plt.show(block=False)