def estimateF0(inputFile = '../../sounds/cello-double-2.wav'): """ Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the f0 contour on the spectrogram and synthesize the f0 contour. Input: inputFile (string): wav file including the path Output: f0 (numpy array): array of the estimated fundamental frequency (f0) values """ ### Change these analysis parameter values window = "blackman" M = 4401 N = 8192 f0et = 7 t = -90.0 minf0 = 140 maxf0 = 210 ### Do not modify the code below H = 256 #fix hop size fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 startFrame = np.floor(0.5*fs/H) endFrame = np.ceil(4.0*fs/H) f0[:startFrame] = 0 f0[endFrame:] = 0 y = UF.sinewaveSynth(f0, 0.8, H, fs) UF.wavwrite(y, fs, 'synthF0Contour.wav') ## Code for plotting the f0 contour on top of the spectrogram # frequency range to plot maxplotfreq = 500.0 fontSize = 16 plot = 1 # plot = 1 plots the f0 contour, otherwise saves it to a file. fig = plt.figure() ax = fig.add_subplot(111) mX, pX = stft.stftAnal(x, fs, w, N, H) #using same params as used for analysis mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1]) timeStamps = np.arange(mX.shape[1])*H/float(fs) binFreqs = np.arange(mX.shape[0])*fs/float(N) plt.pcolormesh(timeStamps, binFreqs, mX) plt.plot(timeStamps, f0, color = 'k', linewidth=1.5) plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.autoscale(tight=True) plt.ylabel('Frequency (Hz)', fontsize = fontSize) plt.xlabel('Time (s)', fontsize = fontSize) plt.legend(('f0',)) xLim = ax.get_xlim() yLim = ax.get_ylim() ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0]))) if plot == 1: #save the plot too! plt.autoscale(tight=True) plt.show() else: fig.tight_layout() fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight') return f0
def estimateF0(inputFile = '../../sounds/cello-double-2.wav'): """ Function to estimate fundamental frequency (f0) in an audio signal. This function also plots the f0 contour on the spectrogram and synthesize the f0 contour. Input: inputFile (string): wav file including the path Output: f0 (numpy array): array of the estimated fundamental frequency (f0) values """ ### Change these analysis parameter values marked as XX window = 'blackman' M = 4001 N = 4096 f0et = 11 t = -80 minf0 = 130 maxf0 = 210 ### Do not modify the code below H = 256 #fix hop size fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window ### Method 1 f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 startFrame = np.floor(0.5*fs/H) endFrame = np.ceil(4.0*fs/H) f0[:startFrame] = 0 f0[endFrame:] = 0 y = UF.sinewaveSynth(f0, 0.8, H, fs) UF.wavwrite(y, fs, 'synthF0Contour.wav') ## Code for plotting the f0 contour on top of the spectrogram # frequency range to plot maxplotfreq = 500.0 fontSize = 16 plot = 1 fig = plt.figure() ax = fig.add_subplot(111) mX, pX = stft.stftAnal(x, w, N, H) #using same params as used for analysis mX = np.transpose(mX[:,:int(N*(maxplotfreq/fs))+1]) timeStamps = np.arange(mX.shape[1])*H/float(fs) binFreqs = np.arange(mX.shape[0])*fs/float(N) plt.pcolormesh(timeStamps, binFreqs, mX) plt.plot(timeStamps, f0, color = 'k', linewidth=1.5) plt.plot([0.5, 0.5], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.plot([4.0, 4.0], [0, maxplotfreq], color = 'b', linewidth=1.5) plt.autoscale(tight=True) plt.ylabel('Frequency (Hz)', fontsize = fontSize) plt.xlabel('Time (s)', fontsize = fontSize) plt.legend(('f0',)) xLim = ax.get_xlim() yLim = ax.get_ylim() ax.set_aspect((xLim[1]-xLim[0])/(2.0*(yLim[1]-yLim[0]))) if plot == 1: #save the plot too! plt.autoscale(tight=True) plt.show() else: fig.tight_layout() fig.savefig('f0_over_Spectrogram.png', dpi=150, bbox_inches='tight') return f0
(fs, x) = UF.wavread('../../../sounds/piano.wav') w = np.blackman(1501) N = 2048 t = -90 minf0 = 100 maxf0 = 300 f0et = 1 maxnpeaksTwm = 4 H = 128 x1 = x[int(1.5*fs):int(1.8*fs)] plt.figure(1, figsize=(9, 7)) mX, pX = STFT.stftAnal(x, w, N, H) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) f0 = UF.cleaningTrack(f0, 5) yf0 = UF.sinewaveSynth(f0, .8, H, fs) f0[f0==0] = np.nan maxplotfreq = 800.0 numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = fs*np.arange(N*maxplotfreq/fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:int(N*maxplotfreq/fs+1)])) plt.autoscale(tight=True) plt.plot(frmTime, f0, linewidth=2, color='k') plt.autoscale(tight=True) plt.title('mX + f0 (piano.wav), TWM') plt.tight_layout() plt.savefig('f0Twm-piano.png') UF.wavwrite(yf0, fs, 'f0Twm-piano.wav')
(fs, x) = UF.wavread('../../../sounds/piano.wav') w = np.blackman(1501) N = 2048 t = -90 minf0 = 100 maxf0 = 300 f0et = 1 maxnpeaksTwm = 4 H = 128 x1 = x[int(1.5 * fs):int(1.8 * fs)] plt.figure(1, figsize=(9, 7)) mX, pX = STFT.stftAnal(x, w, N, H) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) f0 = UF.cleaningTrack(f0, 5) yf0 = UF.sinewaveSynth(f0, .8, H, fs) f0[f0 == 0] = np.nan maxplotfreq = 800.0 numFrames = int(mX[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = fs * np.arange(N * maxplotfreq / fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :int(N * maxplotfreq / fs + 1)])) plt.autoscale(tight=True) plt.plot(frmTime, f0, linewidth=2, color='k') plt.autoscale(tight=True) plt.title('mX + f0 (piano.wav), TWM') plt.tight_layout() plt.savefig('f0Twm-piano.png')
salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_saliences = run_pitch_salience_function_peaks(salience) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_saliences', salience_peaks_saliences) contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_saliences']) pitch, confidence = run_pitch_contours_melody(contours_bins, contours_saliences, contours_start_times, duration) yf0 = UF.sinewaveSynth(pitch, .6, hopSize, sampleRate) figure(1, figsize=(9, 6)) mX, pX = STFT.stftAnal(audio, sampleRate, hamming(frameSize), frameSize, hopSize) maxplotfreq = 3000.0 numFrames = int(mX[:,0].size) frmTime = hopSize*arange(numFrames)/float(sampleRate) binFreq = sampleRate*arange(frameSize*maxplotfreq/sampleRate)/frameSize plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:frameSize*maxplotfreq/sampleRate+1])) plt.autoscale(tight=True) offset = .5 * frameSize/sampleRate time = hopSize*arange(size(pitch))/float(sampleRate) pitch[pitch==0]=nan plot(time, pitch, color='k', linewidth = 2)
salience = run_pitch_salience_function(peak_frequencies, peak_magnitudes) salience_peaks_bins, salience_peaks_saliences = run_pitch_salience_function_peaks( salience) pool.add('allframes_salience_peaks_bins', salience_peaks_bins) pool.add('allframes_salience_peaks_saliences', salience_peaks_saliences) contours_bins, contours_saliences, contours_start_times, duration = run_pitch_contours( pool['allframes_salience_peaks_bins'], pool['allframes_salience_peaks_saliences']) pitch, confidence = run_pitch_contours_melody(contours_bins, contours_saliences, contours_start_times, duration) yf0 = UF.sinewaveSynth(pitch, .6, hopSize, sampleRate) figure(1, figsize=(9, 6)) mX, pX = STFT.stftAnal(audio, hamming(frameSize), frameSize, hopSize) maxplotfreq = 3000.0 numFrames = int(mX[:, 0].size) frmTime = hopSize * arange(numFrames) / float(sampleRate) binFreq = sampleRate * arange(frameSize * maxplotfreq / sampleRate) / frameSize plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:, :frameSize * maxplotfreq / sampleRate + 1])) plt.autoscale(tight=True) offset = .5 * frameSize / sampleRate time = hopSize * arange(size(pitch)) / float(sampleRate) pitch[pitch == 0] = nan