def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = audio.read_wav(inputFile) # compute stochastic model stocEnv = stochastic.from_audio(x, H, N, stocf) # synthesize sound from stochastic model y = stochastic.to_audio(stocEnv, H, N) outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav' # write output sound audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3, 1, 2) numFrames = int(stocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
plt.figure(1, figsize=(9, 7)) plt.subplot(411) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.title('x (ocean.wav)') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.subplot(412) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mX.shape[1]) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX; M=512, N=512, H=256') plt.autoscale(tight=True) plt.subplot(413) numFrames = int(mYst.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * mX.shape[1]) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.title('mY (stochastic approximation); stocf=.1') plt.autoscale(tight=True) plt.subplot(414) plt.plot(np.arange(y.size) / float(fs), y, 'b') plt.title('y') plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.tight_layout() plt.savefig('stochasticModelAnalSynth.png') audio.write_wav(y, fs, 'ocean-synthesis.wav')
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling=np.array([0, 2.0, 1, .3]), timeScaling=np.array( [0, .0, .671, .671, 1.978, 1.978 + 1.0]), interactive=True, plotFile=False): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file; fs: sampling rate of input file tfreq, tmag: sinusoidal frequencies and magnitudes freqScaling: frequency scaling factors, in time-value pairs timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the sinusoidal tracks ytfreq = sine.scale_frequencies(tfreq, freqScaling) # time scale the sinusoidal tracks ytfreq, ytmag = sine.scale_time(ytfreq, tmag, timeScaling) # synthesis y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file( inputFile) + '_sineModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot the transformed sinusoidal frequencies if (ytfreq.shape[1] > 0): plt.subplot(2, 1, 1) tracks = np.copy(ytfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.title('transformed sinusoidal tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sine_transformation_synthesis.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]), interactive=True, plotFile=False): """ function to perform a time scaling using the stochastic model inputFile: name of input sound file stocf: decimation factor used for the stochastic approximation timeScaling: time scaling factors, in time-value pairs """ # hop size H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # perform stochastic analysis mYst = stochastic.from_audio(x, H, H * 2, stocf) # perform time scaling of stochastic representation ystocEnv = stochastic.scale_time(mYst, timeScaling) # synthesize output sound y = stochastic.to_audio(ystocEnv, H, H * 2) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(4, 1, 2) numFrames = int(mYst.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot modified stochastic representation plt.subplot(4, 1, 3) numFrames = int(ystocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('modified stochastic approximation') # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, interactive=True, plotFile=False): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
def main(inputFile1=demo_sound_path('ocean.wav'), inputFile2=demo_sound_path('speech-male.wav'), window1='hamming', window2='hamming', M1=1024, M2=1024, N1=1024, N2=1024, H1=256, smoothf=.5, balancef=0.2, interactive=True, plotFile=False): """ Function to perform a morph between two sounds inputFile1: name of input sound file to be used as source inputFile2: name of input sound file to be used as filter window1 and window2: windows for both files M1 and M2: window sizes for both files N1 and N2: fft sizes for both sounds H1: hop size for sound 1 (the one for sound 2 is computed automatically) smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2 """ # read input sounds (fs, x1) = audio.read_wav(inputFile1) (fs, x2) = audio.read_wav(inputFile2) # compute analysis windows w1 = get_window(window1, M1) w2 = get_window(window2, M2) # perform morphing y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef) # compute the magnitude and phase spectrogram of input sound (for plotting) mX1, pX1 = stft.from_audio(x1, w1, N1, H1) # compute the magnitude and phase spectrogram of output sound (for plotting) mY, pY = stft.from_audio(y, w1, N1, H1) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_stftMorph.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot sound 1 plt.subplot(4, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram of sound 1 plt.subplot(4, 1, 2) numFrames = int(mX1.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of x') plt.autoscale(tight=True) # plot magnitude spectrogram of morphed sound plt.subplot(4, 1, 3) numFrames = int(mY.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of y') plt.autoscale(tight=True) # plot the morphed sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig( 'output_plots/%s_%s_stft_morph.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2)))
harmDevSlope = 0.01 hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs) numFrames = int(hfreq.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.figure(1, figsize=(9, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title('x (vignesh.wav)') plt.subplot(3, 1, 2) yhfreq = hfreq yhfreq[hfreq == 0] = np.nan plt.plot(frmTime, hfreq, lw=1.2) plt.axis([0, y.size / float(fs), 0, 8000]) plt.title('f_h, harmonic frequencies') plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y, 'b') plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.title('yh') plt.tight_layout() audio.write_wav(y, fs, 'vignesh-harmonic-synthesis.wav') plt.savefig('harmonicModel-analysis-synthesis.png')
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation=1, timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146]), interactive=True, plotFile=False): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: 1 preserves original timbre, 0 it does not timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scaling the sound yhfreq, yhmag, ystocEnv = hps.scale_time(hfreqt, hmagt, mYst, timeScaling) # synthesis from the trasformed hps representation y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv.shape[0]) sizeEnv = int(ystocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq * np.less(yhfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_hps_transformation_synthesis.png' % files.strip_file(inputFile))
plt.figure(1, figsize=(9.5, 7)) plt.subplot(411) plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.title('x (piano.wav)') plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.subplot(412) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mX.shape[1]) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX, M=1024, N=1024, H=512') plt.autoscale(tight=True) plt.subplot(413) numFrames = int(pX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(pX.shape[1]) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.diff(np.transpose(pX), axis=0)) plt.title('pX derivative, M=1024, N=1024, H=512') plt.autoscale(tight=True) plt.subplot(414) plt.plot(np.arange(y.size) / float(fs), y, 'b') plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.title('y') plt.tight_layout() plt.savefig('stft-system.png') audio.write_wav(y, fs, 'piano-stft.wav')
def main(inputFile=demo_sound_path('piano.wav'), window='hamming', M=1024, N=1024, H=512, interactive=True, plotFile=False): """ analysis/synthesis using the STFT inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) H: hop size (at least 1/2 of analysis window size to have good overlap-add) """ # read input sound (monophonic with sampling rate of 44100) fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram mX, pX = stft.from_audio(x, w, N, H) # perform the inverse stft y = stft.to_audio(mX, pX, M, H) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + strip_file(inputFile) + '_stft.wav' # write the sound resulting from the inverse stft audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram plt.subplot(4, 1, 2) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram') plt.autoscale(tight=True) # plot the phase spectrogram plt.subplot(4, 1, 3) numFrames = int(pX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh( frmTime, binFreq, np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1))) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('phase spectrogram (derivative)') plt.autoscale(tight=True) # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stft_model.png' % files.strip_file(inputFile))
plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1))) plt.autoscale(tight=True) harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.autoscale(tight=True) plt.title('pX + harmonics') plt.subplot(223) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(Ns * maxplotfreq / fs) / Ns plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :Ns * maxplotfreq / fs + 1])) plt.autoscale(tight=True) plt.title('mXr') plt.subplot(224) numFrames = int(pXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(Ns * maxplotfreq / fs) / Ns plt.pcolormesh(frmTime, binFreq, np.transpose(np.diff(pXr[:, :Ns * maxplotfreq / fs + 1], axis=1))) plt.autoscale(tight=True) plt.title('pXr') plt.tight_layout() plt.savefig('hprModelAnal-flute.png') audio.write_wav(5 * xr, fs, 'flute-residual.wav')
filt[startBin:startBin + nBins] = bandpass y = stft.filter(x, fs, w, N, H, filt) mX, pX = stft.from_audio(x, w, N, H) mY, pY = stft.from_audio(y, w, N, H) plt.figure(1, figsize=(12, 9)) plt.subplot(311) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mX.shape[1]) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX)) plt.title('mX (orchestra.wav)') plt.autoscale(tight=True) plt.subplot(312) plt.plot(fs * np.arange(mX.shape[1]) / float(N), filt, 'k', lw=1.3) plt.axis([0, fs / 2, -60, 7]) plt.title('filter shape') plt.subplot(313) numFrames = int(mY.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(mY.shape[1]) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mY)) plt.title('mY') plt.autoscale(tight=True) plt.tight_layout() audio.write_wav(y, fs, 'orchestra-stft-filtering.wav') plt.savefig('stftFiltering-orchestra.png')
hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) mXr, pXr = stft.from_audio(xr, w, N, H) freqScaling = np.array([0, 1.5, 1, 1.5]) freqStretching = np.array([0, 1.1, 1, 1.1]) timbrePreservation = 1 hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) y, yh = hpr.to_audio(hfreqt, hmagt, np.array([]), xr, Ns, H, fs) audio.write_wav(y, fs, 'hpr-freq-transformation.wav') plt.figure(figsize=(12, 9)) maxplotfreq = 15000.0 plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title('x (flute-A4.wav)') plt.subplot(4, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N
contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_saliences']) pitch, confidence = run_pitch_contours_melody(contours_bins, contours_saliences, contours_start_times, duration) yf0 = synth.synthesize_sinusoid(pitch, .6, hopSize, sampleRate) figure(1, figsize=(9, 6)) mX, pX = stft.from_audio(audio, hamming(frameSize), frameSize, hopSize) maxplotfreq = 3000.0 numFrames = int(mX.shape[0]) frmTime = hopSize * arange(numFrames) / float(sampleRate) binFreq = sampleRate * arange(frameSize * maxplotfreq / sampleRate) / frameSize plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :frameSize * maxplotfreq / sampleRate + 1])) plt.autoscale(tight=True) offset = .5 * frameSize / sampleRate time = hopSize * arange(size(pitch)) / float(sampleRate) pitch[pitch == 0] = nan plot(time, pitch, color='k', linewidth=2) plt.title('mX + prominent melody (carnatic.wav)') tight_layout() savefig('predominantmelody-2.png') audio.write_wav(yf0, sampleRate, 'predominantmelody-2.wav')
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal+sotchastic analysis tfreq, tmag, tphase, stocEnv = sps.from_audio(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf) # synthesize sinusoidal+stochastic model y, ys, yst = sps.to_audio(tfreq, tmag, tphase, stocEnv, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = strip_file(inputFile) outputFileSines, outputFileStochastic, outputFile = [ 'output_sounds/%s_spsModel%s.wav' % (baseFileName, i) for i in ('_sines', '_stochastic', '') ] # write sounds files for sinusoidal, residual, and the sum audio.write_wav(ys, fs, outputFileSines) audio.write_wav(yst, fs, outputFileStochastic) audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') plt.subplot(3, 1, 2) numFrames = int(stocEnv.shape[0]) sizeEnv = int(stocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component if (tfreq.shape[1] > 0): sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('Frequency(Hz)') plt.autoscale(tight=True) plt.title('sinusoidal + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sps_model.png' % files.strip_file(inputFile))
if __name__ == '__main__': (fs, x) = audio.read_wav('../../../sounds/bendir.wav') plt.figure(1, figsize=(9, 7)) N = 2048 H = 256 w = hamming(2048) mX, pX = stft.from_audio(x, w, N, H) maxplotfreq = 2000.0 frmTime = H * np.arange(mX.shape[0]) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1])) N = 2048 minf0 = 130 maxf0 = 300 H = 256 f0 = f0Yin(x, N, H, minf0, maxf0) yf0 = synth.synthesize_sinusoid(f0, .8, H, fs) frmTime = H * np.arange(f0.size) / float(fs) plt.plot(frmTime, f0, linewidth=2, color='k') plt.autoscale(tight=True) plt.title('mX + f0 (vignesh.wav), YIN: N=2048, H = 256 ') plt.tight_layout() plt.savefig('f0Yin.png') audio.write_wav(yf0, fs, 'f0Yin.wav')
harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) hfreqIntp = np.array([0, .5, 1, .5]) hmagIntp = np.array([0, .5, 1, .5]) stocIntp = np.array([0, .5, 1, .5]) yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1) audio.write_wav(y, fs1, 'hps-morph.wav') plt.figure(figsize=(12, 9)) frame = 200 plt.subplot(2, 3, 1) plt.vlines(hfreq1[frame, :], -100, hmag1[frame, :], lw=1.5, color='b') plt.axis([0, 5000, -80, -15]) plt.title('x1: harmonics') plt.subplot(2, 3, 2) plt.vlines(hfreq2[frame, :], -100, hmag2[frame, :], lw=1.5, color='r') plt.axis([0, 5000, -80, -15]) plt.title('x2: harmonics') plt.subplot(2, 3, 3)
def main(inputFile=demo_sound_path('vignesh.wav'), window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01, interactive=True, plotFile=False): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + files.strip_file( inputFile) + '_harmonicModel.wav' # write the sound resulting from harmonic analysis audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the harmonic frequencies plt.subplot(3, 1, 2) if (hfreq.shape[1] > 0): numFrames = hfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) hfreq[hfreq <= 0] = np.nan plt.plot(frmTime, hfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_harmonic_model.png' % files.strip_file(inputFile))
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1, interactive=True, plotFile=False): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3, 1, 2) numFrames = int(mYst.shape[0]) sizeEnv = int(mYst.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, hfreq, hmag, mYst
harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) hfreqIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) hmagIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) stocIntp = np.array([0, 0, .1, 0, .9, 1, 1, 1]) yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs1) audio.write_wav(y, fs1, 'hps-morph-total.wav') plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram stochastic component of sound 1 plt.subplot(3, 1, 1) numFrames = int(stocEnv1.shape[0]) sizeEnv = int(stocEnv1.shape[1]) frmTime = H * np.arange(numFrames) / float(fs1) binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq / (.5 * fs1)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1]))
def main(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, interactive=True, plotFile=False): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = stft.from_audio(xr, w, N, H) # synthesize hpr model y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = files.strip_file(inputFile) outputFileSines, outputFileResidual, outputFile = [ 'output_sounds/%s_hprModel%s.wav' % (baseFileName, i) for i in ('_sines', '_residual', '') ] # write sounds files for harmonics, residual, and the sum audio.write_wav(yh, fs, outputFileSines) audio.write_wav(xr, fs, outputFileResidual) audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_hpr_model.png' % files.strip_file(inputFile))
numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :maxplotbin + 1])) plt.autoscale(tight=True) plt.subplot(4, 1, 3) numFrames = int(ytfreq.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) tracks = ytfreq * np.less(ytfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1) plt.autoscale(tight=True) plt.title('mY + time-scaled sine frequencies') maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mY.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :maxplotbin + 1])) plt.autoscale(tight=True) plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y, 'b') plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.title('y') plt.tight_layout() audio.write_wav(y, fs, 'mridangam-sineModelTimeScale.wav') plt.savefig('sineModelTimeScale-mridangam.png')
def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), interactive=True, plotFile=False): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile1: name of input file 1 fs: sampling rate of input file 1 hfreq1, hmag1, stocEnv1: hps representation of sound 1 inputFile2: name of input file 2 hfreq2, hmag2, stocEnv2: hps representation of sound 2 hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # morph the two sounds yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) # synthesis y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_hpsMorph.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv.shape[0]) sizeEnv = int(ystocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = np.copy(yhfreq) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig( 'output_plots/%s_%s_hps_morph_synthesis.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2)))
H = Ns / 4 tfreq, tmag, tphase = sine.from_audio(x1, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs) numFrames = int(tfreq.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) maxplotfreq = 3000.0 plt.figure(1, figsize=(9, 7)) plt.subplot(3, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1, 'b', lw=1.5) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) plt.title('x (bendir.wav)') plt.subplot(3, 1, 2) tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color='k', lw=1.5) plt.autoscale(tight=True) plt.title('f_t, sine frequencies') plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y, 'b', lw=1.5) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.title('y') plt.tight_layout() audio.write_wav(y, fs, 'bendir-sine-synthesis.wav') plt.savefig('sineModel-anal-synth.png')
N = 2048 t = -90 minf0 = 100 maxf0 = 300 f0et = 1 maxnpeaksTwm = 4 H = 128 x1 = x[1.5 * fs:1.8 * fs] plt.figure(1, figsize=(9, 7)) mX, pX = stft.from_audio(x, w, N, H) f0 = harmonic.find_fundamental_freq(x, fs, w, N, H, t, minf0, maxf0, f0et) f0 = peaks.clean_sinusoid_track(f0, 5) yf0 = synth.synthesize_sinusoid(f0, .8, H, fs) f0[f0 == 0] = np.nan maxplotfreq = 800.0 numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1])) plt.autoscale(tight=True) plt.plot(frmTime, f0, linewidth=2, color='k') plt.autoscale(tight=True) plt.title('mX + f0 (piano.wav), TWM') plt.tight_layout() plt.savefig('f0Twm-piano.png') audio.write_wav(yf0, fs, 'f0Twm-piano.wav')
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal plus residual analysis tfreq, tmag, tphase, xr = spr.from_audio(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) # compute spectrogram of residual mXr, pXr = stft.from_audio(xr, w, N, H) # sum sinusoids and residual y, ys = spr.to_audio(tfreq, tmag, tphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = strip_file(inputFile) outputFileSines, outputFileResidual, outputFile = [ 'output_sounds/%s_sprModel%s.wav' % (baseFileName, i) for i in ('_sines', '_residual', '') ] # write sounds files for sinusoidal, residual, and the sum audio.write_wav(ys, fs, outputFileSines) audio.write_wav(xr, fs, outputFileResidual) audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the residual spectrogram if (tfreq.shape[1] > 0): tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('sinusoidal tracks + residual spectrogram') plt.autoscale(tight=True) # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_spr_model.png' % files.strip_file(inputFile))
harmDevSlope = 0.01 stocf = 0.1 Ns = 512 H = 128 (fs, x) = audio.read_wav(inputFile) w = get_window(window, M) hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) timeScaling = np.array([0, 0, 2.138, 2.138 - 1.5, 3.146, 3.146]) yhfreq, yhmag, ystocEnv = hps.scale_time(hfreq, hmag, mYst, timeScaling) y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) audio.write_wav(y, fs, 'hps-transformation.wav') plt.figure(figsize=(12, 9)) maxplotfreq = 14900.0 # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.title('x (sax-phrase-short.wav') # plot spectrogram stochastic compoment plt.subplot(4, 1, 2) numFrames = int(mYst.shape[0]) sizeEnv = int(mYst.shape[1])
plt.plot(np.arange(x.size) / float(fs), x, 'b') plt.autoscale(tight=True) plt.title('x (sax-phrase-short.wav)') plt.subplot(312) numFrames = int(mYst.shape[0]) sizeEnv = int(mYst.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.autoscale(tight=True) plt.title('harmonics + stochastic') plt.subplot(313) plt.plot(np.arange(y.size) / float(fs), y, 'b') plt.autoscale(tight=True) plt.title('y') plt.tight_layout() plt.savefig('hpsModel-sax-phrase.png') audio.write_wav(y, fs, 'sax-phrase-hps-synthesis.wav') audio.write_wav(yh, fs, 'sax-phrase-harmonic.wav') audio.write_wav(yst, fs, 'sax-phrase-stochastic.wav')
def analysis(inputFile=demo_sound_path('mridangam.wav'), window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02, interactive=True, plotFile=False): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the sound resulting from the inverse stft audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(tfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_sine_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, tfreq, tmag
frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N1 * maxplotfreq / fs + 1])) plt.title('mX (orchestra.wav)') plt.autoscale(tight=True) plt.subplot(312) numFrames = int(mX2.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) N = 2 * mX2.shape[1] binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX2[:, :N * maxplotfreq / fs + 1])) plt.title('mX2 (speech-male.wav)') plt.autoscale(tight=True) plt.subplot(313) numFrames = int(mY.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :N1 * maxplotfreq / fs + 1])) plt.title('mY') plt.autoscale(tight=True) plt.tight_layout() audio.write_wav(y, fs, 'orchestra-speech-stftMorph.wav') plt.savefig('stftMorph-orchestra.png')