def main(inputFile=demo_sound_path('ocean.wav'), H=256, N=512, stocf=.1, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) H: hop size, N: fft size stocf: decimation factor used for the stochastic approximation (bigger than 0, maximum 1) """ # read input sound (fs, x) = audio.read_wav(inputFile) # compute stochastic model stocEnv = stochastic.from_audio(x, H, N, stocf) # synthesize sound from stochastic model y = stochastic.to_audio(stocEnv, H, N) outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModel.wav' # write output sound audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(3, 1, 2) numFrames = int(stocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * (N / 2 + 1)) * float(fs) / (stocf * N) plt.pcolormesh(frmTime, binFreq, np.transpose(stocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_model.png' % files.strip_file(inputFile))
def analysis(inputFile1=demo_sound_path('violin-B3.wav'), window1='blackman', M1=1001, N1=1024, t1=-100, minSineDur1=0.05, nH=60, minf01=200, maxf01=300, f0et1=10, harmDevSlope1=0.01, stocf=0.1, inputFile2=demo_sound_path('soprano-E4.wav'), window2='blackman', M2=901, N2=1024, t2=-100, minSineDur2=0.05, minf02=250, maxf02=500, f0et2=10, harmDevSlope2=0.01, interactive=True, plotFile=False): """ Analyze two sounds with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; stocEnv: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sounds (fs1, x1) = audio.read_wav(inputFile1) (fs2, x2) = audio.read_wav(inputFile2) # compute analysis windows w1 = get_window(window1, M1) w2 = get_window(window2, M2) # compute the harmonic plus stochastic models hfreq1, hmag1, hphase1, stocEnv1 = hps.from_audio(x1, fs1, w1, N1, H, t1, nH, minf01, maxf01, f0et1, harmDevSlope1, minSineDur1, Ns, stocf) hfreq2, hmag2, hphase2, stocEnv2 = hps.from_audio(x2, fs2, w2, N2, H, t2, nH, minf02, maxf02, f0et2, harmDevSlope2, minSineDur2, Ns, stocf) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram stochastic component of sound 1 plt.subplot(2, 1, 1) numFrames = int(stocEnv1.shape[0]) sizeEnv = int(stocEnv1.shape[1]) frmTime = H * np.arange(numFrames) / float(fs1) binFreq = (.5 * fs1) * np.arange(sizeEnv * maxplotfreq / (.5 * fs1)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv1[:, :sizeEnv * maxplotfreq / (.5 * fs1) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram of sound 1 if (hfreq1.shape[1] > 0): harms = np.copy(hfreq1) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs1) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram of sound 1') # plot spectrogram stochastic component of sound 2 plt.subplot(2, 1, 2) numFrames = int(stocEnv2.shape[0]) sizeEnv = int(stocEnv2.shape[1]) frmTime = H * np.arange(numFrames) / float(fs2) binFreq = (.5 * fs2) * np.arange(sizeEnv * maxplotfreq / (.5 * fs2)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv2[:, :sizeEnv * maxplotfreq / (.5 * fs2) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram of sound 2 if (hfreq2.shape[1] > 0): harms = np.copy(hfreq2) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs2) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram of sound 2') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig( 'output_plots/%s_%s_hps_morph_analysis.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2))) return inputFile1, fs1, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2
def main(inputFile1=demo_sound_path('ocean.wav'), inputFile2=demo_sound_path('speech-male.wav'), window1='hamming', window2='hamming', M1=1024, M2=1024, N1=1024, N2=1024, H1=256, smoothf=.5, balancef=0.2, interactive=True, plotFile=False): """ Function to perform a morph between two sounds inputFile1: name of input sound file to be used as source inputFile2: name of input sound file to be used as filter window1 and window2: windows for both files M1 and M2: window sizes for both files N1 and N2: fft sizes for both sounds H1: hop size for sound 1 (the one for sound 2 is computed automatically) smoothf: smoothing factor to be applyed to magnitude spectrum of sound 2 before morphing balancef: balance factor between booth sounds, 0 is sound 1 and 1 is sound 2 """ # read input sounds (fs, x1) = audio.read_wav(inputFile1) (fs, x2) = audio.read_wav(inputFile2) # compute analysis windows w1 = get_window(window1, M1) w2 = get_window(window2, M2) # perform morphing y = stft.morph(x1, x2, fs, w1, N1, w2, N2, H1, smoothf, balancef) # compute the magnitude and phase spectrogram of input sound (for plotting) mX1, pX1 = stft.from_audio(x1, w1, N1, H1) # compute the magnitude and phase spectrogram of output sound (for plotting) mY, pY = stft.from_audio(y, w1, N1, H1) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_stftMorph.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot sound 1 plt.subplot(4, 1, 1) plt.plot(np.arange(x1.size) / float(fs), x1) plt.axis([0, x1.size / float(fs), min(x1), max(x1)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram of sound 1 plt.subplot(4, 1, 2) numFrames = int(mX1.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mX1[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of x') plt.autoscale(tight=True) # plot magnitude spectrogram of morphed sound plt.subplot(4, 1, 3) numFrames = int(mY.shape[0]) frmTime = H1 * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N1 * maxplotfreq / fs) / N1 plt.pcolormesh(frmTime, binFreq, np.transpose(mY[:, :N1 * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram of y') plt.autoscale(tight=True) # plot the morphed sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig( 'output_plots/%s_%s_stft_morph.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2)))
def main(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, interactive=True, plotFile=False): """ Perform analysis/synthesis using the harmonic plus residual model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # find harmonics and residual hfreq, hmag, hphase, xr = hpr.from_audio(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) # compute spectrogram of residual mXr, pXr = stft.from_audio(xr, w, N, H) # synthesize hpr model y, yh = hpr.to_audio(hfreq, hmag, hphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = files.strip_file(inputFile) outputFileSines, outputFileResidual, outputFile = [ 'output_sounds/%s_hprModel%s.wav' % (baseFileName, i) for i in ('_sines', '_residual', '') ] # write sounds files for harmonics, residual, and the sum audio.write_wav(yh, fs, outputFileSines) audio.write_wav(xr, fs, outputFileResidual) audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('frequency(Hz)') plt.autoscale(tight=True) plt.title('harmonics + residual spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_hpr_model.png' % files.strip_file(inputFile))
def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/ocean.wav') # BUTTON TO BROWSE SOUND FILE self.open_file = Button( self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE self.preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## STOCHASTIC MODEL # HOP SIZE H_label = "Hop size (H):" Label(self.parent, text=H_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) self.H = Entry(self.parent, justify=CENTER) self.H["width"] = 5 self.H.grid(row=2, column=0, sticky=W, padx=(90, 5), pady=(10, 2)) self.H.delete(0, END) self.H.insert(0, "256") # FFT size N_label = "FFT size (N):" Label(self.parent, text=N_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "512") # DECIMATION FACTOR stocf_label = "Decimation factor (bigger than 0, max of 1):" Label(self.parent, text=stocf_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 self.stocf.grid(row=4, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.1") # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model, bg="dark red", fg="white") self.compute.grid(row=5, column=0, padx=5, pady=(10, 2), sticky=W) # BUTTON TO PLAY OUTPUT output_label = "Stochastic:" Label(self.parent, text=output_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 15)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_stochasticModel.wav'), bg="gray30", fg="white") self.output.grid(row=6, column=0, padx=(80, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def transformation_synthesis(inputFile, fs, tfreq, tmag, freqScaling=np.array([0, 2.0, 1, .3]), timeScaling=np.array( [0, .0, .671, .671, 1.978, 1.978 + 1.0]), interactive=True, plotFile=False): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file; fs: sampling rate of input file tfreq, tmag: sinusoidal frequencies and magnitudes freqScaling: frequency scaling factors, in time-value pairs timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the sinusoidal tracks ytfreq = sine.scale_frequencies(tfreq, freqScaling) # time scale the sinusoidal tracks ytfreq, ytmag = sine.scale_time(ytfreq, tmag, timeScaling) # synthesis y = sine.to_audio(ytfreq, ytmag, np.array([]), Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file( inputFile) + '_sineModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot the transformed sinusoidal frequencies if (ytfreq.shape[1] > 0): plt.subplot(2, 1, 1) tracks = np.copy(ytfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.title('transformed sinusoidal tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sine_transformation_synthesis.png' % files.strip_file(inputFile))
def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/bendir.wav') # BUTTON TO BROWSE SOUND FILE self.open_file = Button( self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE self.preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## SPS MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) self.w_type = StringVar() self.w_type.set("hamming") # initial value window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "2001") # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "2048") # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" Label(self.parent, text=t_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=5, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-80") # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" Label(self.parent, text=minSineDur_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 2)) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=6, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.02") # MAX NUMBER PARALLEL SINUSOIDS maxnSines_label = "Maximum number of parallel sinusoids:" Label(self.parent, text=maxnSines_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 2)) self.maxnSines = Entry(self.parent, justify=CENTER) self.maxnSines["width"] = 5 self.maxnSines.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.maxnSines.delete(0, END) self.maxnSines.insert(0, "150") # FREQUENCY DEVIATION ALLOWED freqDevOffset_label = "Max frequency deviation in sinusoidal tracks (at freq 0):" Label(self.parent, text=freqDevOffset_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10, 2)) self.freqDevOffset = Entry(self.parent, justify=CENTER) self.freqDevOffset["width"] = 5 self.freqDevOffset.grid(row=8, column=0, sticky=W, padx=(350, 5), pady=(10, 2)) self.freqDevOffset.delete(0, END) self.freqDevOffset.insert(0, "10") # SLOPE OF THE FREQ DEVIATION freqDevSlope_label = "Slope of the frequency deviation (as function of freq):" Label(self.parent, text=freqDevSlope_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10, 2)) self.freqDevSlope = Entry(self.parent, justify=CENTER) self.freqDevSlope["width"] = 5 self.freqDevSlope.grid(row=9, column=0, sticky=W, padx=(340, 5), pady=(10, 2)) self.freqDevSlope.delete(0, END) self.freqDevSlope.insert(0, "0.001") # DECIMATION FACTOR stocf_label = "Stochastic approximation factor:" Label(self.parent, text=stocf_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10, 2)) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 self.stocf.grid(row=10, column=0, sticky=W, padx=(210, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.2") # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model, bg="dark red", fg="white") self.compute.grid(row=11, column=0, padx=5, pady=(10, 2), sticky=W) # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" Label(self.parent, text=output_label).grid(row=12, column=0, sticky=W, padx=5, pady=(10, 0)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_spsModel_sines.wav'), bg="gray30", fg="white") self.output.grid(row=12, column=0, padx=(80, 5), pady=(10, 0), sticky=W) # BUTTON TO PLAY STOCHASTIC OUTPUT output_label = "Stochastic:" Label(self.parent, text=output_label).grid(row=22, column=0, sticky=W, padx=5, pady=(5, 0)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_spsModel_stochastic.wav'), bg="gray30", fg="white") self.output.grid(row=22, column=0, padx=(80, 5), pady=(5, 0), sticky=W) # BUTTON TO PLAY OUTPUT output_label = "Output:" Label(self.parent, text=output_label).grid(row=23, column=0, sticky=W, padx=5, pady=(5, 15)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_spsModel.wav'), bg="gray30", fg="white") self.output.grid(row=23, column=0, padx=(80, 5), pady=(5, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def initUI(self): choose_label = "inputFile:" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 32 self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/mridangam.wav') # BUTTON TO BROWSE SOUND FILE open_file = Button( self.parent, text="...", command=self.browse_file) # see: def browse_file(self) open_file.grid(row=0, column=0, sticky=W, padx=(340, 6), pady=(10, 2)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2)) ## SINE TRANSFORMATIONS ANALYSIS # ANALYSIS WINDOW TYPE wtype_label = "window:" Label(self.parent, text=wtype_label).grid(row=1, column=0, sticky=W, padx=5, pady=(10, 2)) self.w_type = StringVar() self.w_type.set("hamming") # initial value window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") window_option.grid(row=1, column=0, sticky=W, padx=(65, 5), pady=(10, 2)) # WINDOW SIZE M_label = "M:" Label(self.parent, text=M_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(10, 2)) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "801") # FFT SIZE N_label = "N:" Label(self.parent, text=N_label).grid(row=1, column=0, sticky=W, padx=(255, 5), pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "2048") # THRESHOLD MAGNITUDE t_label = "t:" Label(self.parent, text=t_label).grid(row=1, column=0, sticky=W, padx=(330, 5), pady=(10, 2)) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-90") # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "minSineDur:" Label(self.parent, text=minSineDur_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(10, 2)) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.01") # MAX NUMBER OF SINES maxnSines_label = "maxnSines:" Label(self.parent, text=maxnSines_label).grid(row=2, column=0, sticky=W, padx=(145, 5), pady=(10, 2)) self.maxnSines = Entry(self.parent, justify=CENTER) self.maxnSines["width"] = 5 self.maxnSines.grid(row=2, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) self.maxnSines.delete(0, END) self.maxnSines.insert(0, "150") # FREQUENCY DEVIATION ALLOWED freqDevOffset_label = "freqDevOffset:" Label(self.parent, text=freqDevOffset_label).grid(row=2, column=0, sticky=W, padx=(280, 5), pady=(10, 2)) self.freqDevOffset = Entry(self.parent, justify=CENTER) self.freqDevOffset["width"] = 5 self.freqDevOffset.grid(row=2, column=0, sticky=W, padx=(372, 5), pady=(10, 2)) self.freqDevOffset.delete(0, END) self.freqDevOffset.insert(0, "20") # SLOPE OF THE FREQUENCY DEVIATION freqDevSlope_label = "freqDevSlope:" Label(self.parent, text=freqDevSlope_label).grid(row=3, column=0, sticky=W, padx=(5, 5), pady=(10, 2)) self.freqDevSlope = Entry(self.parent, justify=CENTER) self.freqDevSlope["width"] = 5 self.freqDevSlope.grid(row=3, column=0, sticky=W, padx=(98, 5), pady=(10, 2)) self.freqDevSlope.delete(0, END) self.freqDevSlope.insert(0, "0.02") # BUTTON TO DO THE ANALYSIS OF THE SOUND self.compute = Button(self.parent, text="Analysis/Synthesis", command=self.analysis, bg="dark red", fg="white") self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W) # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_sineModel.wav'), bg="gray30", fg="white") self.output.grid(row=4, column=0, padx=(145, 5), pady=(10, 5), sticky=W) ### # SEPARATION LINE Frame(self.parent, height=1, width=50, bg="black").grid(row=5, pady=5, sticky=W + E) ### # FREQUENCY SCALING FACTORS freqScaling_label = "Frequency scaling factors (time, value pairs):" Label(self.parent, text=freqScaling_label).grid(row=6, column=0, sticky=W, padx=5, pady=(5, 2)) self.freqScaling = Entry(self.parent, justify=CENTER) self.freqScaling["width"] = 35 self.freqScaling.grid(row=7, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.freqScaling.delete(0, END) self.freqScaling.insert(0, "[0, 2.0, 1, .3]") # TIME SCALING FACTORS timeScaling_label = "Time scaling factors (in time, value pairs):" Label(self.parent, text=timeScaling_label).grid(row=8, column=0, sticky=W, padx=5, pady=(5, 2)) self.timeScaling = Entry(self.parent, justify=CENTER) self.timeScaling["width"] = 35 self.timeScaling.grid(row=9, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.timeScaling.delete(0, END) self.timeScaling.insert(0, "[0, .0, .671, .671, 1.978, 1.978+1.0]") # BUTTON TO DO THE SYNTHESIS self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis, bg="dark green", fg="white") self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT self.transf_output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_sineModelTransformation.wav'), bg="gray30", fg="white") self.transf_output.grid(row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def analysis(inputFile=demo_sound_path('sax-phrase-short.wav'), window='blackman', M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, stocf=0.1, interactive=True, plotFile=False): """ Analyze a sound with the harmonic plus stochastic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation stocf: decimation factor used for the stochastic approximation returns inputFile: input file name; fs: sampling rate of input file, hfreq, hmag: harmonic frequencies, magnitude; mYst: stochastic residual """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic plus stochastic model of the whole sound hfreq, hmag, hphase, mYst = hps.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf) # synthesize the harmonic plus stochastic model without original phases y, yh, yst = hps.to_audio(hfreq, hmag, np.array([]), mYst, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModel.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot spectrogram stochastic compoment plt.subplot(3, 1, 2) numFrames = int(mYst.shape[0]) sizeEnv = int(mYst.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(mYst[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot harmonic on top of stochastic spectrogram if (hfreq.shape[1] > 0): harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_hps_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, hfreq, hmag, mYst
def transformation_synthesis(inputFile, fs, hfreq, hmag, mYst, freqScaling=np.array([0, 1.2, 2.01, 1.2, 2.679, .7, 3.146, .7]), freqStretching=np.array([0, 1, 2.01, 1, 2.679, 1.5, 3.146, 1.5]), timbrePreservation=1, timeScaling=np.array([0, 0, 2.138, 2.138 - 1.0, 3.146, 3.146]), interactive=True, plotFile=False): """ transform the analysis values returned by the analysis function and synthesize the sound inputFile: name of input file fs: sampling rate of input file hfreq, hmag: harmonic frequencies and magnitudes mYst: stochastic residual freqScaling: frequency scaling factors, in time-value pairs (value of 1 no scaling) freqStretching: frequency stretching factors, in time-value pairs (value of 1 no stretching) timbrePreservation: 1 preserves original timbre, 0 it does not timeScaling: time scaling factors, in time-value pairs """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # frequency scaling of the harmonics hfreqt, hmagt = harmonic.scale_frequencies(hfreq, hmag, freqScaling, freqStretching, timbrePreservation, fs) # time scaling the sound yhfreq, yhmag, ystocEnv = hps.scale_time(hfreqt, hmagt, mYst, timeScaling) # synthesis from the trasformed hps representation y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_hpsModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 6)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv.shape[0]) sizeEnv = int(ystocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = yhfreq * np.less(yhfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_hps_transformation_synthesis.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal plus residual analysis tfreq, tmag, tphase, xr = spr.from_audio(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope) # compute spectrogram of residual mXr, pXr = stft.from_audio(xr, w, N, H) # sum sinusoids and residual y, ys = spr.to_audio(tfreq, tmag, tphase, xr, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = strip_file(inputFile) outputFileSines, outputFileResidual, outputFile = [ 'output_sounds/%s_sprModel%s.wav' % (baseFileName, i) for i in ('_sines', '_residual', '') ] # write sounds files for sinusoidal, residual, and the sum audio.write_wav(ys, fs, outputFileSines) audio.write_wav(xr, fs, outputFileResidual) audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the residual spectrogram if (tfreq.shape[1] > 0): tracks = tfreq * np.less(tfreq, maxplotfreq) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('sinusoidal tracks + residual spectrogram') plt.autoscale(tight=True) # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_spr_model.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('vignesh.wav'), window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01, interactive=True, plotFile=False): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = harmonic.from_audio(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = sine.to_audio(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + files.strip_file( inputFile) + '_harmonicModel.wav' # write the sound resulting from harmonic analysis audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the harmonic frequencies plt.subplot(3, 1, 2) if (hfreq.shape[1] > 0): numFrames = hfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) hfreq[hfreq <= 0] = np.nan plt.plot(frmTime, hfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_harmonic_model.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('piano.wav'), window='hamming', M=1024, N=1024, H=512, interactive=True, plotFile=False): """ analysis/synthesis using the STFT inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) H: hop size (at least 1/2 of analysis window size to have good overlap-add) """ # read input sound (monophonic with sampling rate of 44100) fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram mX, pX = stft.from_audio(x, w, N, H) # perform the inverse stft y = stft.to_audio(mX, pX, M, H) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + strip_file(inputFile) + '_stft.wav' # write the sound resulting from the inverse stft audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot magnitude spectrogram plt.subplot(4, 1, 2) numFrames = int(mX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :N * maxplotfreq / fs + 1])) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('magnitude spectrogram') plt.autoscale(tight=True) # plot the phase spectrogram plt.subplot(4, 1, 3) numFrames = int(pX.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh( frmTime, binFreq, np.transpose(np.diff(pX[:, :N * maxplotfreq / fs + 1], axis=1))) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('phase spectrogram (derivative)') plt.autoscale(tight=True) # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stft_model.png' % files.strip_file(inputFile))
def transformation_synthesis(inputFile1, fs, hfreq1, hmag1, stocEnv1, inputFile2, hfreq2, hmag2, stocEnv2, hfreqIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), hmagIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), stocIntp=np.array([0, 0, .1, 0, .9, 1, 1, 1]), interactive=True, plotFile=False): """ Transform the analysis values returned by the analysis function and synthesize the sound inputFile1: name of input file 1 fs: sampling rate of input file 1 hfreq1, hmag1, stocEnv1: hps representation of sound 1 inputFile2: name of input file 2 hfreq2, hmag2, stocEnv2: hps representation of sound 2 hfreqIntp: interpolation factor between the harmonic frequencies of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) hmagIntp: interpolation factor between the harmonic magnitudes of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) stocIntp: interpolation factor between the stochastic representation of the two sounds, 0 is sound 1 and 1 is sound 2 (time,value pairs) """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # morph the two sounds yhfreq, yhmag, ystocEnv = hps.morph(hfreq1, hmag1, stocEnv1, hfreq2, hmag2, stocEnv2, hfreqIntp, hmagIntp, stocIntp) # synthesis y, yh, yst = hps.to_audio(yhfreq, yhmag, np.array([]), ystocEnv, Ns, H, fs) # write output sound outputFile = 'output_sounds/' + os.path.basename( inputFile1)[:-4] + '_hpsMorph.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 15000.0 # plot spectrogram of transformed stochastic compoment plt.subplot(2, 1, 1) numFrames = int(ystocEnv.shape[0]) sizeEnv = int(ystocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(ystocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot transformed harmonic on top of stochastic spectrogram if (yhfreq.shape[1] > 0): harms = np.copy(yhfreq) harms = harms * np.less(harms, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color='k', ms=3, alpha=1) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.autoscale(tight=True) plt.title('harmonics + stochastic spectrogram') # plot the output sound plt.subplot(2, 1, 2) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig( 'output_plots/%s_%s_hps_morph_synthesis.png' % (files.strip_file(inputFile1), files.strip_file(inputFile2)))
def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/piano.wav') # BUTTON TO BROWSE SOUND FILE self.open_file = Button( self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE self.preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## STFT # ANALYSIS WINDOW TYPE wtype_label = "Window type:" Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) self.w_type = StringVar() self.w_type.set("hamming") # initial value window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" Label(self.parent, text=M_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=3, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "1024") # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" Label(self.parent, text=N_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=4, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "1024") # HOP SIZE H_label = "Hop size (H):" Label(self.parent, text=H_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) self.H = Entry(self.parent, justify=CENTER) self.H["width"] = 5 self.H.grid(row=5, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) self.H.delete(0, END) self.H.insert(0, "512") # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model, bg="dark red", fg="white") self.compute.grid(row=6, column=0, padx=5, pady=(10, 2), sticky=W) # BUTTON TO PLAY OUTPUT output_label = "Output:" Label(self.parent, text=output_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 15)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_stft.wav'), bg="gray30", fg="white") self.output.grid(row=7, column=0, padx=(60, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, interactive=True, plotFile=False): """ Perform analysis/synthesis using the sinusoidal model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # analyze the sound with the sinusoidal model tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the output sound from the sinusoidal representation y = sine.to_audio(tfreq, tmag, tphase, Ns, H, fs) # output sound file name outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the synthesized sound obtained from the sinusoidal synthesis audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies plt.subplot(3, 1, 2) if (tfreq.shape[1] > 0): numFrames = tfreq.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tfreq[tfreq <= 0] = np.nan plt.plot(frmTime, tfreq) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sine_model.png' % files.strip_file(inputFile))
def initUI(self): choose_label = "inputFile:" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 32 self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/vignesh.wav') # BUTTON TO BROWSE SOUND FILE open_file = Button( self.parent, text="...", command=self.browse_file) # see: def browse_file(self) open_file.grid(row=0, column=0, sticky=W, padx=(340, 6), pady=(10, 2)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") preview.grid(row=0, column=0, sticky=W, padx=(385, 6), pady=(10, 2)) ## HARMONIC TRANSFORMATIONS ANALYSIS # ANALYSIS WINDOW TYPE wtype_label = "window:" Label(self.parent, text=wtype_label).grid(row=1, column=0, sticky=W, padx=5, pady=(10, 2)) self.w_type = StringVar() self.w_type.set("blackman") # initial value window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") window_option.grid(row=1, column=0, sticky=W, padx=(65, 5), pady=(10, 2)) # WINDOW SIZE M_label = "M:" Label(self.parent, text=M_label).grid(row=1, column=0, sticky=W, padx=(180, 5), pady=(10, 2)) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=1, column=0, sticky=W, padx=(200, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "1201") # FFT SIZE N_label = "N:" Label(self.parent, text=N_label).grid(row=1, column=0, sticky=W, padx=(255, 5), pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=1, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "2048") # THRESHOLD MAGNITUDE t_label = "t:" Label(self.parent, text=t_label).grid(row=1, column=0, sticky=W, padx=(330, 5), pady=(10, 2)) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=1, column=0, sticky=W, padx=(348, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-90") # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "minSineDur:" Label(self.parent, text=minSineDur_label).grid(row=2, column=0, sticky=W, padx=(5, 5), pady=(10, 2)) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=2, column=0, sticky=W, padx=(87, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.1") # MAX NUMBER OF HARMONICS nH_label = "nH:" Label(self.parent, text=nH_label).grid(row=2, column=0, sticky=W, padx=(145, 5), pady=(10, 2)) self.nH = Entry(self.parent, justify=CENTER) self.nH["width"] = 5 self.nH.grid(row=2, column=0, sticky=W, padx=(172, 5), pady=(10, 2)) self.nH.delete(0, END) self.nH.insert(0, "100") # MIN FUNDAMENTAL FREQUENCY minf0_label = "minf0:" Label(self.parent, text=minf0_label).grid(row=2, column=0, sticky=W, padx=(227, 5), pady=(10, 2)) self.minf0 = Entry(self.parent, justify=CENTER) self.minf0["width"] = 5 self.minf0.grid(row=2, column=0, sticky=W, padx=(275, 5), pady=(10, 2)) self.minf0.delete(0, END) self.minf0.insert(0, "130") # MAX FUNDAMENTAL FREQUENCY maxf0_label = "maxf0:" Label(self.parent, text=maxf0_label).grid(row=2, column=0, sticky=W, padx=(330, 5), pady=(10, 2)) self.maxf0 = Entry(self.parent, justify=CENTER) self.maxf0["width"] = 5 self.maxf0.grid(row=2, column=0, sticky=W, padx=(380, 5), pady=(10, 2)) self.maxf0.delete(0, END) self.maxf0.insert(0, "300") # MAX ERROR ACCEPTED f0et_label = "f0et:" Label(self.parent, text=f0et_label).grid(row=3, column=0, sticky=W, padx=5, pady=(10, 2)) self.f0et = Entry(self.parent, justify=CENTER) self.f0et["width"] = 3 self.f0et.grid(row=3, column=0, sticky=W, padx=(42, 5), pady=(10, 2)) self.f0et.delete(0, END) self.f0et.insert(0, "7") # ALLOWED DEVIATION OF HARMONIC TRACKS harmDevSlope_label = "harmDevSlope:" Label(self.parent, text=harmDevSlope_label).grid(row=3, column=0, sticky=W, padx=(90, 5), pady=(10, 2)) self.harmDevSlope = Entry(self.parent, justify=CENTER) self.harmDevSlope["width"] = 5 self.harmDevSlope.grid(row=3, column=0, sticky=W, padx=(190, 5), pady=(10, 2)) self.harmDevSlope.delete(0, END) self.harmDevSlope.insert(0, "0.01") # BUTTON TO DO THE ANALYSIS OF THE SOUND self.compute = Button(self.parent, text="Analysis/Synthesis", command=self.analysis, bg="dark red", fg="white") self.compute.grid(row=4, column=0, padx=5, pady=(10, 5), sticky=W) # BUTTON TO PLAY ANALYSIS/SYNTHESIS OUTPUT self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_harmonicModel.wav'), bg="gray30", fg="white") self.output.grid(row=4, column=0, padx=(145, 5), pady=(10, 5), sticky=W) ### # SEPARATION LINE Frame(self.parent, height=1, width=50, bg="black").grid(row=5, pady=5, sticky=W + E) ### # FREQUENCY SCALING FACTORS freqScaling_label = "Frequency scaling factors (time, value pairs):" Label(self.parent, text=freqScaling_label).grid(row=6, column=0, sticky=W, padx=5, pady=(5, 2)) self.freqScaling = Entry(self.parent, justify=CENTER) self.freqScaling["width"] = 35 self.freqScaling.grid(row=7, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.freqScaling.delete(0, END) self.freqScaling.insert(0, "[0, 2.0, 1, 0.3]") # FREQUENCY STRETCHING FACTORSharmonicModelTransformation freqStretching_label = "Frequency stretching factors (time, value pairs):" Label(self.parent, text=freqStretching_label).grid(row=8, column=0, sticky=W, padx=5, pady=(5, 2)) self.freqStretching = Entry(self.parent, justify=CENTER) self.freqStretching["width"] = 35 self.freqStretching.grid(row=9, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.freqStretching.delete(0, END) self.freqStretching.insert(0, "[0, 1, 1, 1.5]") # TIMBRE PRESERVATION timbrePreservation_label = "Timbre preservation (1 preserves original timbre, 0 it does not):" Label(self.parent, text=timbrePreservation_label).grid(row=10, column=0, sticky=W, padx=5, pady=(5, 2)) self.timbrePreservation = Entry(self.parent, justify=CENTER) self.timbrePreservation["width"] = 2 self.timbrePreservation.grid(row=10, column=0, sticky=W + E, padx=(395, 5), pady=(5, 2)) self.timbrePreservation.delete(0, END) self.timbrePreservation.insert(0, "1") # TIME SCALING FACTORS timeScaling_label = "Time scaling factors (time, value pairs):" Label(self.parent, text=timeScaling_label).grid(row=11, column=0, sticky=W, padx=5, pady=(5, 2)) self.timeScaling = Entry(self.parent, justify=CENTER) self.timeScaling["width"] = 35 self.timeScaling.grid(row=12, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.timeScaling.delete(0, END) self.timeScaling.insert(0, "[0, 0, 0.671, 0.671, 1.978, 1.978+1.0]") # BUTTON TO DO THE SYNTHESIS self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis, bg="dark green", fg="white") self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT self.transf_output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_harmonicModelTransformation.wav'), bg="gray30", fg="white") self.transf_output.grid(row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def initUI(self): choose_label = "inputFile:" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 self.filelocation.grid(row=0, column=0, sticky=W, padx=(70, 5), pady=(10, 2)) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/rain.wav') # BUTTON TO BROWSE SOUND FILE open_file = Button( self.parent, text="...", command=self.browse_file) # see: def browse_file(self) open_file.grid(row=0, column=0, sticky=W, padx=(280, 6), pady=(10, 2)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") preview.grid(row=0, column=0, sticky=W, padx=(325, 6), pady=(10, 2)) ## STOCHASTIC TRANSFORMATIONS ANALYSIS # DECIMATION FACTOR stocf_label = "stocf:" Label(self.parent, text=stocf_label).grid(row=1, column=0, sticky=W, padx=(5, 5), pady=(10, 2)) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 self.stocf.grid(row=1, column=0, sticky=W, padx=(47, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.1") # TIME SCALING FACTORS timeScaling_label = "Time scaling factors (time, value pairs):" Label(self.parent, text=timeScaling_label).grid(row=2, column=0, sticky=W, padx=5, pady=(5, 2)) self.timeScaling = Entry(self.parent, justify=CENTER) self.timeScaling["width"] = 35 self.timeScaling.grid(row=3, column=0, sticky=W + E, padx=5, pady=(0, 2)) self.timeScaling.delete(0, END) self.timeScaling.insert(0, "[0, 0, 1, 2]") # BUTTON TO DO THE SYNTHESIS self.compute = Button(self.parent, text="Apply Transformation", command=self.transformation_synthesis, bg="dark green", fg="white") self.compute.grid(row=13, column=0, padx=5, pady=(10, 15), sticky=W) # BUTTON TO PLAY TRANSFORMATION SYNTHESIS OUTPUT self.transf_output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file(self.filelocation.get( )) + '_stochasticModelTransformation.wav'), bg="gray30", fg="white") self.transf_output.grid(row=13, column=0, padx=(165, 5), pady=(10, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def main(inputFile=demo_sound_path('piano.wav'), window='blackman', M=511, N=1024, time=.2, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (choice of rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size (odd integer value) N: fft size (power of two, bigger or equal than than M) time: time to start analysis (in seconds) """ # read input sound (monophonic with sampling rate of 44100) fs, x = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # get a fragment of the input sound of size M sample = int(time * fs) if (sample + M >= x.size or sample < 0): # raise error if time outside of sound raise ValueError("Time outside sound boundaries") x_frame = x[sample:sample + M] # compute the dft of the sound fragment mX, pX = dft.from_audio(x_frame, w, N) # compute the inverse dft of the spectrum y = dft.to_audio(mX, pX, w.size) * sum(w) # create figure plt.figure(figsize=(12, 9)) # plot the sound fragment plt.subplot(4, 1, 1) plt.plot(time + np.arange(M) / float(fs), x_frame) plt.axis([time, time + M / float(fs), min(x_frame), max(x_frame)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrum plt.subplot(4, 1, 2) plt.plot(float(fs) * np.arange(mX.size) / float(N), mX, 'r') plt.axis([0, fs / 2.0, min(mX), max(mX)]) plt.title('magnitude spectrum: mX') plt.ylabel('amplitude (dB)') plt.xlabel('frequency (Hz)') # plot the phase spectrum plt.subplot(4, 1, 3) plt.plot(float(fs) * np.arange(pX.size) / float(N), pX, 'c') plt.axis([0, fs / 2.0, min(pX), max(pX)]) plt.title('phase spectrum: pX') plt.ylabel('phase (radians)') plt.xlabel('frequency (Hz)') # plot the sound resulting from the inverse dft plt.subplot(4, 1, 4) plt.plot(time + np.arange(M) / float(fs), y) plt.axis([time, time + M / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_dft_model.png' % files.strip_file(inputFile))
def main(inputFile=demo_sound_path('rain.wav'), stocf=0.1, timeScaling=np.array([0, 0, 1, 2]), interactive=True, plotFile=False): """ function to perform a time scaling using the stochastic model inputFile: name of input sound file stocf: decimation factor used for the stochastic approximation timeScaling: time scaling factors, in time-value pairs """ # hop size H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # perform stochastic analysis mYst = stochastic.from_audio(x, H, H * 2, stocf) # perform time scaling of stochastic representation ystocEnv = stochastic.scale_time(mYst, timeScaling) # synthesize output sound y = stochastic.to_audio(ystocEnv, H, H * 2) # write output sound outputFile = 'output_sounds/' + strip_file(inputFile) + '_stochasticModelTransformation.wav' audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # plot the input sound plt.subplot(4, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot stochastic representation plt.subplot(4, 1, 2) numFrames = int(mYst.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(mYst)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('stochastic approximation') # plot modified stochastic representation plt.subplot(4, 1, 3) numFrames = int(ystocEnv.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(stocf * H) * float(fs) / (stocf * 2 * H) plt.pcolormesh(frmTime, binFreq, np.transpose(ystocEnv)) plt.autoscale(tight=True) plt.xlabel('time (sec)') plt.ylabel('frequency (Hz)') plt.title('modified stochastic approximation') # plot the output sound plt.subplot(4, 1, 4) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_stochastic_transformation.png' % files.strip_file(inputFile))
def initUI(self): choose_label = "Input file (.wav, mono and 44100 sampling rate):" Label(self.parent, text=choose_label).grid(row=0, column=0, sticky=W, padx=5, pady=(10, 2)) # TEXTBOX TO PRINT PATH OF THE SOUND FILE self.filelocation = Entry(self.parent) self.filelocation.focus_set() self.filelocation["width"] = 25 self.filelocation.grid(row=1, column=0, sticky=W, padx=10) self.filelocation.delete(0, END) self.filelocation.insert(0, 'sounds/sax-phrase-short.wav') # BUTTON TO BROWSE SOUND FILE self.open_file = Button( self.parent, text="Browse...", command=self.browse_file) # see: def browse_file(self) self.open_file.grid(row=1, column=0, sticky=W, padx=(220, 6)) # put it beside the filelocation textbox # BUTTON TO PREVIEW SOUND FILE self.preview = Button( self.parent, text=">", command=lambda: audio.play_wav(self.filelocation.get()), bg="gray30", fg="white") self.preview.grid(row=1, column=0, sticky=W, padx=(306, 6)) ## HARMONIC MODEL # ANALYSIS WINDOW TYPE wtype_label = "Window type:" Label(self.parent, text=wtype_label).grid(row=2, column=0, sticky=W, padx=5, pady=(10, 2)) self.w_type = StringVar() self.w_type.set("blackman") # initial value window_option = OptionMenu(self.parent, self.w_type, "rectangular", "hanning", "hamming", "blackman", "blackmanharris") window_option.grid(row=2, column=0, sticky=W, padx=(95, 5), pady=(10, 2)) # WINDOW SIZE M_label = "Window size (M):" Label(self.parent, text=M_label).grid(row=4, column=0, sticky=W, padx=5, pady=(10, 2)) self.M = Entry(self.parent, justify=CENTER) self.M["width"] = 5 self.M.grid(row=4, column=0, sticky=W, padx=(115, 5), pady=(10, 2)) self.M.delete(0, END) self.M.insert(0, "601") # FFT SIZE N_label = "FFT size (N) (power of two bigger than M):" Label(self.parent, text=N_label).grid(row=5, column=0, sticky=W, padx=5, pady=(10, 2)) self.N = Entry(self.parent, justify=CENTER) self.N["width"] = 5 self.N.grid(row=5, column=0, sticky=W, padx=(270, 5), pady=(10, 2)) self.N.delete(0, END) self.N.insert(0, "1024") # THRESHOLD MAGNITUDE t_label = "Magnitude threshold (t) (in dB):" Label(self.parent, text=t_label).grid(row=6, column=0, sticky=W, padx=5, pady=(10, 2)) self.t = Entry(self.parent, justify=CENTER) self.t["width"] = 5 self.t.grid(row=6, column=0, sticky=W, padx=(205, 5), pady=(10, 2)) self.t.delete(0, END) self.t.insert(0, "-100") # MIN DURATION SINUSOIDAL TRACKS minSineDur_label = "Minimum duration of sinusoidal tracks:" Label(self.parent, text=minSineDur_label).grid(row=7, column=0, sticky=W, padx=5, pady=(10, 2)) self.minSineDur = Entry(self.parent, justify=CENTER) self.minSineDur["width"] = 5 self.minSineDur.grid(row=7, column=0, sticky=W, padx=(250, 5), pady=(10, 2)) self.minSineDur.delete(0, END) self.minSineDur.insert(0, "0.1") # MAX NUMBER OF HARMONICS nH_label = "Maximum number of harmonics:" Label(self.parent, text=nH_label).grid(row=8, column=0, sticky=W, padx=5, pady=(10, 2)) self.nH = Entry(self.parent, justify=CENTER) self.nH["width"] = 5 self.nH.grid(row=8, column=0, sticky=W, padx=(215, 5), pady=(10, 2)) self.nH.delete(0, END) self.nH.insert(0, "100") # MIN FUNDAMENTAL FREQUENCY minf0_label = "Minimum fundamental frequency:" Label(self.parent, text=minf0_label).grid(row=9, column=0, sticky=W, padx=5, pady=(10, 2)) self.minf0 = Entry(self.parent, justify=CENTER) self.minf0["width"] = 5 self.minf0.grid(row=9, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) self.minf0.delete(0, END) self.minf0.insert(0, "350") # MAX FUNDAMENTAL FREQUENCY maxf0_label = "Maximum fundamental frequency:" Label(self.parent, text=maxf0_label).grid(row=10, column=0, sticky=W, padx=5, pady=(10, 2)) self.maxf0 = Entry(self.parent, justify=CENTER) self.maxf0["width"] = 5 self.maxf0.grid(row=10, column=0, sticky=W, padx=(220, 5), pady=(10, 2)) self.maxf0.delete(0, END) self.maxf0.insert(0, "700") # MAX ERROR ACCEPTED f0et_label = "Maximum error in f0 detection algorithm:" Label(self.parent, text=f0et_label).grid(row=11, column=0, sticky=W, padx=5, pady=(10, 2)) self.f0et = Entry(self.parent, justify=CENTER) self.f0et["width"] = 5 self.f0et.grid(row=11, column=0, sticky=W, padx=(265, 5), pady=(10, 2)) self.f0et.delete(0, END) self.f0et.insert(0, "5") # ALLOWED DEVIATION OF HARMONIC TRACKS harmDevSlope_label = "Max frequency deviation in harmonic tracks:" Label(self.parent, text=harmDevSlope_label).grid(row=12, column=0, sticky=W, padx=5, pady=(10, 2)) self.harmDevSlope = Entry(self.parent, justify=CENTER) self.harmDevSlope["width"] = 5 self.harmDevSlope.grid(row=12, column=0, sticky=W, padx=(285, 5), pady=(10, 2)) self.harmDevSlope.delete(0, END) self.harmDevSlope.insert(0, "0.01") # DECIMATION FACTOR stocf_label = "Stochastic approximation factor:" Label(self.parent, text=stocf_label).grid(row=13, column=0, sticky=W, padx=5, pady=(10, 2)) self.stocf = Entry(self.parent, justify=CENTER) self.stocf["width"] = 5 self.stocf.grid(row=13, column=0, sticky=W, padx=(210, 5), pady=(10, 2)) self.stocf.delete(0, END) self.stocf.insert(0, "0.2") # BUTTON TO COMPUTE EVERYTHING self.compute = Button(self.parent, text="Compute", command=self.compute_model, bg="dark red", fg="white") self.compute.grid(row=14, column=0, padx=5, pady=(10, 2), sticky=W) # BUTTON TO PLAY SINE OUTPUT output_label = "Sinusoidal:" Label(self.parent, text=output_label).grid(row=15, column=0, sticky=W, padx=5, pady=(10, 0)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_hpsModel_sines.wav'), bg="gray30", fg="white") self.output.grid(row=15, column=0, padx=(80, 5), pady=(10, 0), sticky=W) # BUTTON TO PLAY STOCHASTIC OUTPUT output_label = "Stochastic:" Label(self.parent, text=output_label).grid(row=16, column=0, sticky=W, padx=5, pady=(5, 0)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_hpsModel_stochastic.wav'), bg="gray30", fg="white") self.output.grid(row=16, column=0, padx=(80, 5), pady=(5, 0), sticky=W) # BUTTON TO PLAY OUTPUT output_label = "Output:" Label(self.parent, text=output_label).grid(row=17, column=0, sticky=W, padx=5, pady=(5, 15)) self.output = Button( self.parent, text=">", command=lambda: audio.play_wav('output_sounds/' + strip_file( self.filelocation.get()) + '_hpsModel.wav'), bg="gray30", fg="white") self.output.grid(row=17, column=0, padx=(80, 5), pady=(5, 15), sticky=W) # define options for opening file self.file_opt = options = {} options['defaultextension'] = '.wav' options['filetypes'] = [('All files', '.*'), ('Wav files', '.wav')] options['initialdir'] = 'sounds/' options[ 'title'] = 'Open a mono audio file .wav with sample frequency 44100 Hz'
def analysis(inputFile=demo_sound_path('mridangam.wav'), window='hamming', M=801, N=2048, t=-90, minSineDur=0.01, maxnSines=150, freqDevOffset=20, freqDevSlope=0.02, interactive=True, plotFile=False): """ Analyze a sound with the sine model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # compute the sine model of the whole sound tfreq, tmag, tphase = sine.from_audio(x, fs, w, N, H, t, maxnSines, minSineDur, freqDevOffset, freqDevSlope) # synthesize the sines without original phases y = sine.to_audio(tfreq, tmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + strip_file(inputFile) + '_sineModel.wav' # write the sound resulting from the inverse stft audio.write_wav(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the sinusoidal frequencies if (tfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(tfreq) tracks = tracks * np.less(tracks, maxplotfreq) tracks[tracks <= 0] = np.nan numFrames = int(tracks.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of sinusoidal tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show(block=False) if plotFile: plt.savefig('output_plots/%s_sine_transformation_analysis.png' % files.strip_file(inputFile)) return inputFile, fs, tfreq, tmag
def main(inputFile=demo_sound_path('bendir.wav'), window='hamming', M=2001, N=2048, t=-80, minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001, stocf=0.2, interactive=True, plotFile=False): """ inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks maxnSines: maximum number of parallel sinusoids freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0 freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation stocf: decimation factor used for the stochastic approximation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = audio.read_wav(inputFile) # compute analysis window w = get_window(window, M) # perform sinusoidal+sotchastic analysis tfreq, tmag, tphase, stocEnv = sps.from_audio(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope, stocf) # synthesize sinusoidal+stochastic model y, ys, yst = sps.to_audio(tfreq, tmag, tphase, stocEnv, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) baseFileName = strip_file(inputFile) outputFileSines, outputFileStochastic, outputFile = [ 'output_sounds/%s_spsModel%s.wav' % (baseFileName, i) for i in ('_sines', '_stochastic', '') ] # write sounds files for sinusoidal, residual, and the sum audio.write_wav(ys, fs, outputFileSines) audio.write_wav(yst, fs, outputFileStochastic) audio.write_wav(y, fs, outputFile) # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 10000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') plt.subplot(3, 1, 2) numFrames = int(stocEnv.shape[0]) sizeEnv = int(stocEnv.shape[1]) frmTime = H * np.arange(numFrames) / float(fs) binFreq = (.5 * fs) * np.arange(sizeEnv * maxplotfreq / (.5 * fs)) / sizeEnv plt.pcolormesh( frmTime, binFreq, np.transpose(stocEnv[:, :sizeEnv * maxplotfreq / (.5 * fs) + 1])) plt.autoscale(tight=True) # plot sinusoidal frequencies on top of stochastic component if (tfreq.shape[1] > 0): sines = tfreq * np.less(tfreq, maxplotfreq) sines[sines == 0] = np.nan numFrames = int(sines.shape[0]) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, sines, color='k', ms=3, alpha=1) plt.xlabel('time(s)') plt.ylabel('Frequency(Hz)') plt.autoscale(tight=True) plt.title('sinusoidal + stochastic spectrogram') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() if interactive: plt.show() if plotFile: plt.savefig('output_plots/%s_sps_model.png' % files.strip_file(inputFile))