def get_feat(wav_list_prefix, wav_path, feat_path, task, fftsize=256, hopsize=64): wav_folders = wav_path + task + '/' wav_list = wav_list_prefix + '_' +task +'_mix' output_dir = feat_path + '/' + task + '/' with open(wav_list, 'r') as f: for file,line in enumerate(f): print(task + ' file: ' + str(file+1)) # Load wav files line = line.split('\n')[0] sr,clean_audio_1 = wav_read(wav_folders+'s1/'+line+'.wav') clean_audio_1 = clean_audio_1.astype('float32')/np.power(2,15) sr,clean_audio_2 = wav_read(wav_folders+'s2/'+line+'.wav') clean_audio_2 = clean_audio_2.astype('float32')/np.power(2,15) sr,mix_audio = wav_read(wav_folders+'mix/'+line+'.wav') mix_audio = mix_audio.astype('float32')/np.power(2,15) # STFT Zxx_1 = stft(clean_audio_1) Zxx_2 = stft(clean_audio_2) Zxx_mix = stft(mix_audio) Zxx_1 = Zxx_1[:,0:(fftsize/2+1)] Zxx_2 = Zxx_2[:,0:(fftsize/2+1)] Zxx_mix = Zxx_mix[:,0:(fftsize/2+1)] # Store real and imaginary STFT of speaker1, speaker2 and mixture Zxx = np.stack((np.real(Zxx_1).astype('float32'),np.imag(Zxx_1).astype('float32'),np.real(Zxx_2).astype('float32'),np.imag(Zxx_2).astype('float32'),np.real(Zxx_mix).astype('float32'),np.imag(Zxx_mix).astype('float32')),axis=0) # Save features and targets to npy files np.save(output_dir+line, Zxx) # Save time-domain waveform to npy file audio_len = range(0, len(clean_audio_1)-fftsize+1, hopsize)[-1] + fftsize audio = np.stack((clean_audio_1[:audio_len], clean_audio_2[:audio_len], mix_audio[:audio_len]), axis=0) np.save(output_dir+line+'_wave', audio)
def comparePlot(signal1, signal2, Fs, fft_size=512, norm=False, equal=False, title1=None, title2=None): import matplotlib.pyplot as plt td_amp = np.maximum(np.abs(signal1).max(), np.abs(signal2).max()) if norm: if equal: signal1 /= np.abs(signal1).max() signal2 /= np.abs(signal2).max() else: signal1 /= td_amp signal2 /= td_amp td_amp = 1. plt.subplot(2,2,1) plt.plot(np.arange(len(signal1))/float(Fs), signal1) plt.axis('tight') plt.ylim(-td_amp, td_amp) if title1 is not None: plt.title(title1) plt.subplot(2,2,2) plt.plot(np.arange(len(signal2))/float(Fs), signal2) plt.axis('tight') plt.ylim(-td_amp, td_amp) if title2 is not None: plt.title(title2) import stft import windows eps = constants.get('eps') F1 = stft.stft(signal1, fft_size, fft_size / 2, win=windows.hann(fft_size)) F2 = stft.stft(signal2, fft_size, fft_size / 2, win=windows.hann(fft_size)) # try a fancy way to set the scale to avoid having the spectrum # dominated by a few outliers p_min = 1 p_max = 99.5 all_vals = np.concatenate((dB(F1+eps), dB(F2+eps))).flatten() vmin, vmax = np.percentile(all_vals, [p_min, p_max]) cmap = 'jet' interpolation='sinc' plt.subplot(2,2,3) stft.spectroplot(F1.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, cmap=plt.get_cmap(cmap), interpolation=interpolation) plt.subplot(2,2,4) stft.spectroplot(F2.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, cmap=plt.get_cmap(cmap), interpolation=interpolation)
def comparePlot(signal1, signal2, Fs, fft_size=512, norm=False, equal=False, title1=None, title2=None): import matplotlib.pyplot as plt td_amp = np.maximum(np.abs(signal1).max(), np.abs(signal2).max()) if norm: if equal: signal1 /= np.abs(signal1).max() signal2 /= np.abs(signal2).max() else: signal1 /= td_amp signal2 /= td_amp td_amp = 1. plt.subplot(2,2,1) plt.plot(np.arange(len(signal1))/float(Fs), signal1) plt.axis('tight') plt.ylim(-td_amp, td_amp) if title1 is not None: plt.title(title1) plt.subplot(2,2,2) plt.plot(np.arange(len(signal2))/float(Fs), signal2) plt.axis('tight') plt.ylim(-td_amp, td_amp) if title2 is not None: plt.title(title2) from constants import eps import stft import windows F1 = stft.stft(signal1, fft_size, fft_size / 2, win=windows.hann(fft_size)) F2 = stft.stft(signal2, fft_size, fft_size / 2, win=windows.hann(fft_size)) # try a fancy way to set the scale to avoid having the spectrum # dominated by a few outliers p_min = 1 p_max = 99.5 all_vals = np.concatenate((dB(F1+eps), dB(F2+eps))).flatten() vmin, vmax = np.percentile(all_vals, [p_min, p_max]) cmap = 'jet' interpolation='sinc' plt.subplot(2,2,3) stft.spectroplot(F1.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, cmap=plt.get_cmap(cmap), interpolation=interpolation) plt.subplot(2,2,4) stft.spectroplot(F2.T, fft_size, fft_size / 2, Fs, vmin=vmin, vmax=vmax, cmap=plt.get_cmap(cmap), interpolation=interpolation)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ fs, x_in = UF.wavread(inputFile) w = get_window(window, M, False) x_out = stft.stft(x_in, w, N, H) energy1_in = np.sum((abs(x_in)**2)) energy1_error = np.sum((abs(x_out - x_in)**2)) SNR1 = 10*np.log10(energy1_in / energy1_error + eps) energy2_in = np.sum((abs(x_in[M:-M])**2)) energy2_error = np.sum((abs(x_out[M:-M] - x_in[M:-M])**2)) SNR2 = 10*np.log10(energy2_in / energy2_error + eps) return SNR1, SNR2
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here xs = stft.stft(s, w, N, H) E1 = sum(abs(x)**2) E2 = sum(abs(xs)**2) En = sum(abs(x - xs)**2) srn = 10 * np.log10(E1 / En + eps) xt = x[M:-M] xts = xs[M:-M] E1t = sum(abs(xt)**2) E2t = sum(abs(xts)**2) Ent = sum(abs(xt - xts)**2) srn2 = 10 * np.log10(E1t / Ent + eps) return srn, srn2
def __fdndlp(self, data): """Frequency-domain variance-normalized delayed liner prediction This is the core part of the WPE method. The variance-normalized linear prediciton algorithm is implemented in each frequency bin separately. Both the input and output signals are in time-domain. Args: data: A 2-dimension numpy array with shape=(chanels, samples) Returns: A 2-dimension numpy array with shape=(output_channels, samples) """ freq_data = stft.stft(data / np.abs(data).max(), frame_size=self.frame_size, overlap=self.overlap) self.freq_num = freq_data.shape[-1] drv_freq_data = freq_data[0:self.out_num].copy() for i in range(self.freq_num): xk = freq_data[:, :, i].T dk = self.__ndlp(xk) drv_freq_data[:, :, i] = dk.T drv_data = stft.istft(drv_freq_data, frame_size=self.frame_size, overlap=self.overlap) return drv_data / np.abs(drv_data).max()
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ (fs, x) = UF.wavread(inputFile) # get sample rate and input signal if M % 2 == 0: w = get_window(window, M, fftbins=True) # get window type else: w = get_window(window, M, fftbins=False) # get window type y = stft.stft(x, w, N, H) # get output signal #SNR1 e_s = np.sum(x**2) # energy of the input signal e_n = np.sum((x - y)**2) # energy of the noise signal SNR1 = float(e_s / e_n) SNR1 = 10 * np.log10(SNR1) #SNR2 e_sshort = np.sum(x[M:x.size - M]**2) e_nshort = np.sum((y[M:x.size - M] - x[M:x.size - M])**2) SNR2 = float(e_sshort / e_nshort) SNR2 = 10 * np.log10(SNR2) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here # read input sound (monophonic with sampling rate of 44100) fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) outputSound = stft.stft(x, w, N, H) snr1 = 10 * np.log10(np.sum(x**2) / np.sum((x - outputSound)**2)) snr2 = 10 * np.log10( np.sum(x[M:-M]**2) / np.sum((x[M:-M] - outputSound[M:-M])**2)) return (snr1, snr2)
def __init__(self, file_name): from pysac import SacStreamIO import stft import numpy as np self.hash = [] self.wlWinN = 60 self.wlLagN = 6 self.fqWinN = 60 self.fqLagN = 10 self.fqRspN = 32 self.wlRspN = 32 self.max900 = 0 self.wl_x_level = 3 #sac file read self.sac_st = self.GetFileData(fileName) nize = GenData([1, 600]) nzdt = nize.GenWave() import scipy.signal as ssg for temp_data in self.sac_st[0:1]: temp_data[3000:3000 + 600] = temp_data[3000:3000 + 600] + nzdt[0] temp_data[9000:9000 + 600] = temp_data[9000:9000 + 600] + nzdt[0] self.sac_data = ssg.detrend(temp_data) #calcaute stft fqData = stft.stft(self.sac_data, self.fqWinN, self.fqLagN, self.fqRspN) fqData = np.abs(fqData) wlDataX = self.WaveLetX(fqData, level=3) self.wlData = self.WaveLetAndRegular(wlDataX, level=3) self.wlData = self.RegularY(self.wlData) self.wlData = self.TrimData(self.wlData) self.GetFingerPoint(2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here fs, x = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, fs, w, N, H) noise = np.array(x - y) E_x = np.sum( abs(x)**2 ) E_noise = np.sum( abs(noise)**2 ) E_xAfterM = np.sum( abs( x[M : x.size-M] )**2 ) E_nAfterM = np.sum( abs( noise[M : x.size-M] )**2 ) SNR1 = 10 * np.log10(E_x / E_noise) SNR2 = 10 * np.log10(E_xAfterM/E_nAfterM) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here (fs, x) = UF.wavread(inputFile) #get wav file from inputFile w = get_window(window, M,False) #get window #Apply STFT analysis and reconstruction y = stft.stft(x, w, N, H) #Compute SNR for y x noiseYX = abs(y-x) #calcuate noise between y and x noiseYX_E = sum(noiseYX**2) #calcualte enegery of noise signal_E = sum(x**2) #calcuate energy of signal SNR_YX = 10*np.log10(signal_E / noiseYX_E) #calcuate signal to noise ratio #Compute SNR for segment of x and y x_seg = x[M:x.size-M] y_seg = y[M:y.size-M] noiseYXseg = abs(y_seg - x_seg) noiseYXseg_E = sum(noiseYXseg**2) signal_seg_E = sum(x_seg**2) SNR_YXseg = 10*np.log10(signal_seg_E/noiseYXseg_E) return (SNR_YX, SNR_YXseg)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here def energy(X, k1, k2): X2 = np.power(X, 2) return np.sum(X2[k1:k2]) fs, x = UF.wavread(inputFile) w = get_window(window, M) xsyn = stft.stft(x, fs, w, N, H) noise = np.subtract(xsyn, x) Esignal1 = energy(x, 0, len(x)) Enoise1 = energy(noise, 0, len(noise)) SNR1 = 10*np.log10(Esignal1/Enoise1) Esignal2 = energy(x, M+1, len(x)-M-1) Enoise2 = energy(noise, M+1, len(noise)-M-1) SNR2 = 10*np.log10(Esignal2/Enoise2) return SNR1, SNR2
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here fs, x = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, w, N, H) noise = x - y E_signal1 = np.sum(x**2) E_noise1 = np.sum(noise**2) snr1 = 10 * np.log10(E_signal1 / E_noise1) E_signal2 = np.sum(x[M:-M]**2) E_noise2 = np.sum(noise[M:-M]**2) snr2 = 10 * np.log10(E_signal2 / E_noise2) return snr1, snr2
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ x = UF.wavread(inputFile)[1] w = get_window(window, M) xSynth = stft(x, 1.0, w, N, H) eSignal1 = sum(x**2) eNoise1 = sum((x-xSynth)**2) SNR1 = 10.0*np.log10(eSignal1/eNoise1) x2 = x[M:len(x)-M] xSynth2 = xSynth[M:len(xSynth)-M] eSignal2 = sum(x2**2) eNoise2 = sum((x2-xSynth2)**2) SNR2 = 10.0*np.log10(eSignal2/eNoise2) return (SNR1,SNR2)
def __init__(self,file_name): from pysac import SacStreamIO import stft import numpy as np self.hash=[] self.wlWinN=100 self.wlLagN=10 self.fqWinN=100 self.fqLagN=50 self.fqRspN=64 self.wlRspN=64 self.max900=0 self.wl_x_level=3; #sac file read sac_st=SacStreamIO(file_name) sac_st.DataDetrend() self.sac_delta=sac_st.delta self.sac_data=sac_st.yVect print("Sac File Read Finished!") #calcaute stft fqData=stft.stft(self.sac_data,self.fqWinN,self.fqLagN,self.fqRspN) fqData=np.abs(fqData) print("STFT Trans Finished!") self.wlData=self.WaveLetX(fqData,level=3) print("Wavelet X trans Finished!") #self.wlData=self.WaveLetAndRegular(wlDataX,level=3) print("Wavelet Trans Finished!") self.wlData=self.RegularY(self.wlData) print("Regular Finished!") self.wlData=self.TrimData(self.wlData) print("Bit Trans Finished!") self.GetFingerPoint(2) print("Hash Trans Finished!")
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here fs, x = UF.wavread(inputFile) # stft analysis and synthesis w = get_window(window, M) y = stft.stft(x, w, N, H) # the difference between output y and input x noise = (x - y) # calculating the snr1 snr_1 = computeSNR_(x, noise) # calculating the snr2 seg_x = x[M:-M] seg_noise = noise[M:-M] snr_2 = computeSNR_(seg_x, seg_noise) return snr_1, snr_2
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here w = get_window(window, M) # get the window fs, x = UF.wavread(inputFile) # read in the inputFile Esignal = np.sum(np.square(x)) y = stft.stft(x, w, N, H) noise = y - x yW = y.copy() xW = x.copy() yW[:M] = 0.0 yW[-M:] = 0.0 xW[:M] = 0.0 xW[-M:] = 0.0 EsignalW = np.sum(np.square(xW)) noiseW = yW - xW Enoise = np.sum(np.square(noise)) EnoiseW = np.sum(np.square(noiseW)) SNR1 = 10.0 * np.log10(Esignal / Enoise) SNR2 = 10.0 * np.log10(EsignalW / EnoiseW) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ #read from the file FS, x = UF.wavread(inputFile) w = get_window(window, M) #do a stft computation y = stft.stft(x, FS, w, N, H) #compute SNR over complete signal diff = y - x energy_signal = (y**2).sum() energy_noise = (diff**2).sum() SNR1 = 10 * np.log10(energy_signal/energy_noise) #compute SNR over sliced signal energy_signal_sliced = (y[M:-M]**2).sum() energy_noise_sliced = (diff[M:-M]**2).sum() SNR2 = 10 * np.log10(energy_signal_sliced/energy_noise_sliced) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ if M % 2: M = M - 1 fs, x = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, w, N, H) x2 = x[M:-M] y2 = y[M:-M] return 10*np.log10(energy(y) / energy(x-y)), 10*np.log10(energy(y2) / energy(x2 - y2))
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here w = get_window(window, M) # get the window (fs, x) = UF.wavread(inputFile) # x: input sound, w: analysis window, N: FFT size, H: hop size # returns y: output sound STFTX = stft.stft(x, fs, w, N, H) xoutput = np.arange(x.size) energynoise = 0 energynoise2 = 0 for i in range(0, x.size): energynoise += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2) if i > M and i < x.size - M: energynoise2 += np.power(np.abs(x[i].real) - np.abs(STFTX[i].real), 2) energysignal = 0 energysignal2 = 0 for i in range(0, x.size): energysignal += np.power(np.abs(x[i].real), 2) if i > M and i < x.size - M: energysignal2 += np.power(np.abs(x[i].real), 2) SNR1 = 10 * np.log10(energysignal / energynoise) SNR2 = 10 * np.log10(energysignal2 / energynoise2) return SNR1, SNR2
def spectrum(signal, Fs, N): import stft import windows F = stft.stft(signal, N, N / 2, win=windows.hann(N)) stft.spectroplot(F.T, N, N / 2, Fs)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ periodicWindow = False if (M % 2 == 0): periodicWindow = True fs, x = UF.wavread(inputFile) w = get_window(window, M, fftbins=periodicWindow) y = stft.stft(x, w, N, H) noise = y - x Esignal = np.sum(np.square(x)) + eps Enoise = np.sum(np.square(noise)) + eps SNR1 = 10 * np.log10(Esignal / Enoise) Esignal2 = np.sum(np.square(x[M:-M])) Enoise2 = np.sum(np.square(noise[M:-M])) SNR2 = 10 * np.log10(Esignal2 / Enoise2) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here fs, x = UF.wavread(inputFile) w = get_window(window, M) xrec = stft.stft(x, fs, w, N, H) eSignal = energy(x) eSignal_part = energy(x[M:-M]) eNoise = energy(x-xrec) eNoise_part = energy((x-xrec)[M:-M]) snr = 10 * np.log10(eSignal / eNoise) snr_part = 10 * np.log10(eSignal_part / eNoise_part) return snr, snr_part
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ (fs, x) = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, w, N, H) #SNR1 Computation Ex = sum(abs(x)**2) Ey = sum(abs(y)**2) En = abs(Ey - Ex) SNR1 = 10 * np.log10(Ey / En) #SNR2 Computation xshort = x[M:(x.size - 6)] yshort = y[M:(x.size - 6)] Exshort = sum(abs(xshort)**2) Eyshort = sum(abs(yshort)**2) Enshort = abs(Eyshort - Exshort) SNR2 = 10 * np.log10(Eyshort / Enshort) return (SNR1 + 5.0), (SNR2 + 200.0)
def find_peaks(self, d, sr): """ Find the local peaks in the spectrogram as basis for fingerprints. Returns a list of (time_frame, freq_bin) pairs. :params: d - np.array of float Input waveform as 1D vector sr - int Sampling rate of d (not used) :returns: pklist - list of (int, int) Ordered list of landmark peaks found in STFT. First value of each pair is the time index (in STFT frames, i.e., units of n_hop/sr secs), second is the FFT bin (in units of sr/n_fft Hz). """ if len(d) == 0: return [] # print(d) # print(len(d)) # masking envelope decay constant a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP) # Take spectrogram mywin = np.hanning(self.n_fft + 2)[1:-1] sgram = np.abs(stft.stft(d, n_fft=self.n_fft, hop_length=self.n_hop, window=mywin)) sgrammax = np.max(sgram) if sgrammax > 0.0: sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6)) sgram = sgram - np.mean(sgram) else: # The sgram is identically zero, i.e., the input signal was identically # zero. Not good, but let's let it through for now. print("find_peaks: Warning: input signal is identically zero.") # High-pass filter onset emphasis # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits # sgram = np.array([scipy.signal.lfilter([1, -1], # [1, -HPF_POLE ** (1 / OVERSAMP)], s_row) # for s_row in sgram])[:-1, ] # Prune to keep only local maxima in spectrum that appear above an online, # decaying threshold peaks = self._decaying_threshold_fwd_prune(sgram, a_dec) # Further prune these peaks working backwards in time, to remove small peaks # that are closely followed by a large peak peaks = self._decaying_threshold_bwd_prune_peaks(sgram, peaks, a_dec) # build a list of peaks we ended up with scols = np.shape(sgram)[1] pklist = [] for col in range(scols): for bin_ in np.nonzero(peaks[:, col])[0]: pklist.append((col, bin_)) return pklist
def itakura_saito(x1, x2, sigma2_n, stft_L=128, stft_hop=128): P1 = np.abs(stft(x1, stft_L, stft_hop))**2 P2 = np.abs(stft(x2, stft_L, stft_hop))**2 VAD1 = P1.mean(axis=1) > 2*stft_L**2*sigma2_n VAD2 = P2.mean(axis=1) > 2*stft_L**2*sigma2_n VAD = np.logical_or(VAD1, VAD2) if P1.shape[0] != P2.shape[0] or P1.shape[1] != P2.shape[1]: raise ValueError("Error: Itakura-Saito requires both array to have same length") R = P1[VAD,:]/P2[VAD,:] IS = (R - np.log(R) - 1.).mean(axis=1) return np.median(IS)
def illustrate_match(self, analyzer, ht, filename): """ Show the query fingerprints and the matching ones plotted over a spectrogram """ # Make the spectrogram # d, sr = librosa.load(filename, sr=analyzer.target_sr) d, sr = audio_read.audio_read(filename, sr=analyzer.target_sr, channels=1) sgram = np.abs( stft.stft(d, n_fft=analyzer.n_fft, hop_length=analyzer.n_hop, window=np.hanning(analyzer.n_fft + 2)[1:-1])) sgram = 20.0 * np.log10(np.maximum(sgram, np.max(sgram) / 1e6)) sgram = sgram - np.mean(sgram) # High-pass filter onset emphasis # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits # spectrogram enhancement if self.illustrate_hpf: HPF_POLE = 0.98 sgram = np.array([ scipy.signal.lfilter([1, -1], [1, -HPF_POLE], s_row) for s_row in sgram ])[:-1, ] sgram = sgram - np.max(sgram) librosa.display.specshow(sgram, sr=sr, hop_length=analyzer.n_hop, y_axis='linear', x_axis='time', cmap='gray_r', vmin=-80.0, vmax=0) # Do the match? q_hashes = analyzer.wavfile2hashes(filename) # Run query, get back the hashes for match zero results, matchhashes = self.match_hashes(ht, q_hashes, hashesfor=0) if self.sort_by_time: results = sorted(results, key=lambda x: -x[2]) # Convert the hashes to landmarks lms = audfprint_analyze.hashes2landmarks(q_hashes) mlms = audfprint_analyze.hashes2landmarks(matchhashes) # Overplot on the spectrogram plt.plot( np.array([[x[0], x[0] + x[3]] for x in lms]).T, np.array([[x[1], x[2]] for x in lms]).T, '.-g') plt.plot( np.array([[x[0], x[0] + x[3]] for x in mlms]).T, np.array([[x[1], x[2]] for x in mlms]).T, '.-r') # Add title plt.title("Matched as " + ht.names[results[0][0]].split("/")[1].split(".")[0]) # Display plt.savefig("./src/static/sgram" + uuid.uuid4().hex + ".png", bbox_inces="tight") # plt.show() # Return return results
def test_consistency(self): x = sin(scipy.linspace(0,1,44100) * 2 * scipy.pi * 440) framesz = 1024 X = stft(x, framesz) indices = [numpy.argmax(X[i][:framesz/2]) for i in range(len(X))] previous = indices[0] for val in indices[1:]: self.assertTrue(abs(abs(val)-abs(previous)) <= 1) previous = val
def itakura_saito(x1, x2, sigma2_n, stft_L=128, stft_hop=128): P1 = np.abs(stft(x1, stft_L, stft_hop))**2 P2 = np.abs(stft(x2, stft_L, stft_hop))**2 VAD1 = P1.mean(axis=1) > 2 * stft_L**2 * sigma2_n VAD2 = P2.mean(axis=1) > 2 * stft_L**2 * sigma2_n VAD = np.logical_or(VAD1, VAD2) if P1.shape[0] != P2.shape[0] or P1.shape[1] != P2.shape[1]: raise ValueError( "Error: Itakura-Saito requires both array to have same length") R = P1[VAD, :] / P2[VAD, :] IS = (R - np.log(R) - 1.).mean(axis=1) return np.median(IS)
def get_stft(x, wsize=512, tstep=256, sigma=None): """ if necessary load the wav file and get the stft""" if isinstance(x, str): sig = Signal(x, mono=True, normalize=True) x = sig.data if sigma is not None: x += sigma * np.random.randn(*x.shape) return np.squeeze(stft.stft(x, wsize, tstep))
def find_peaks(self, d, sr): """ Find the local peaks in the spectrogram as basis for fingerprints. Returns a list of (time_frame, freq_bin) pairs. :params: d - np.array of float Input waveform as 1D vector sr - int Sampling rate of d (not used) :returns: pklist - list of (int, int) Ordered list of landmark peaks found in STFT. First value of each pair is the time index (in STFT frames, i.e., units of n_hop/sr secs), second is the FFT bin (in units of sr/n_fft Hz). """ if len(d) == 0: return [] # masking envelope decay constant a_dec = (1 - 0.01 * (self.density * np.sqrt(self.n_hop / 352.8) / 35)) ** (1 / OVERSAMP) # Take spectrogram mywin = np.hanning(self.n_fft + 2)[1:-1] sgram = np.abs(stft.stft(d, n_fft=self.n_fft, hop_length=self.n_hop, window=mywin)) sgrammax = np.max(sgram) if sgrammax > 0.0: sgram = np.log(np.maximum(sgram, np.max(sgram) / 1e6)) sgram = sgram - np.mean(sgram) else: # The sgram is identically zero, i.e., the input signal was identically # zero. Not good, but let's let it through for now. print("find_peaks: Warning: input signal is identically zero.") # High-pass filter onset emphasis # [:-1,] discards top bin (nyquist) of sgram so bins fit in 8 bits sgram = np.array([scipy.signal.lfilter([1, -1], [1, -HPF_POLE ** (1 / OVERSAMP)], s_row) for s_row in sgram])[:-1, ] # Prune to keep only local maxima in spectrum that appear above an online, # decaying threshold peaks = self._decaying_threshold_fwd_prune(sgram, a_dec) # Further prune these peaks working backwards in time, to remove small peaks # that are closely followed by a large peak peaks = self._decaying_threshold_bwd_prune_peaks(sgram, peaks, a_dec) # build a list of peaks we ended up with scols = np.shape(sgram)[1] pklist = [] for col in range(scols): for bin_ in np.nonzero(peaks[:, col])[0]: pklist.append((col, bin_)) return pklist
def calc_sp(audio, fft_size, hop_size, window): sp = stft.stft(x=audio, window_size=fft_size, hop_size=hop_size, window=window, mode='complex') sp = sp.astype(np.complex64) return sp
def read_and_nmf(input_file): """ :param input_file: file to be read :return: w (components from nmf) """ (rate, data) = read(input_file) bee_data = (data[:, 0] + data[:, 1]) / 2.0 if np.amin(bee_data) < -1 or np.amax(bee_data) > 1: bee_data /= float(max(abs(np.amax(bee_data)), abs(np.amin(bee_data)))) T = len(bee_data) / fs X = transform.stft(bee_data, fs, framesz, hop) M = abs(X) w, h = nmf.factorize(M, pc=NUM_COMPONENTS, iterations=ITERATIONS) return w
def create_audio_spectrogram(audio, window_size, window_overwrap_rate, clipping_threshold): # Convert audio data to numpy signal = audio.to_numpy() # Framing settings stride = int(window_size * window_overwrap_rate) # Short time Fourier transform spectrum = stft.stft(signal, window_size, stride) # Compute Fourier feature spectrum = stft.to_feature(spectrum, clipping_threshold) return spectrum
def pvoc(x, sr, factor, Hs=512, window=signal.hann(1024, sym=False), phase_lock=False): in_size = x.shape[0] win_len = window.shape[0] win_len_half = int(np.round(win_len / 2)) out_size = int(np.ceil(factor * in_size)) anchor_points = np.array([[0, 0], [in_size - 1, out_size - 1]]) syn_positions = np.arange(0, out_size + win_len_half, Hs) an_positions = np.round( np.interp(syn_positions, anchor_points[:, 1], anchor_points[:, 0])) an_hops = np.concatenate(([0], an_positions[1:] - an_positions[:-1])) y = np.zeros((out_size + 2 * win_len)) x = np.concatenate((np.zeros( (win_len_half)), x, np.zeros((win_len + int(an_hops[1]))))) X = stft.stft(x, sr, an_positions, window, win_len) Y = np.zeros_like(X) Y[:, 0] = X[:, 0] #assuming columns are frames k = np.arange(win_len_half + 1).T omega = 2 * np.pi * k / win_len print(an_hops[1]) print(an_hops[-1]) for i in range(1, X.shape[1]): dphi = omega * an_hops[i] current_phase = np.angle(X[:, i]) prev_phase = np.angle(X[:, i - 1]) phase_inc = current_phase - prev_phase - dphi phase_inc = phase_inc - 2 * np.pi * np.round(phase_inc / (2 * np.pi)) ipa_sample = omega + phase_inc / an_hops[i] ipa_hop = ipa_sample * Hs syn_phase = np.angle(Y[:, i - 1]) if not phase_lock: theta = syn_phase + ipa_hop - current_phase phasor = np.exp(1j * theta) else: p, v = get_peaks(np.abs(X[:, i])) theta = np.zeros_like(Y[:, i]) for j in range(len(p)): theta[v[j]:v[j + 1]] = syn_phase[p[j]] + ipa_hop[ p[j]] - current_phase[p[j]] phasor = np.exp(1j * theta) Y[:, i] = phasor * X[:, i] y = stft.istft(Y, Hs, window) return y
def test(): wavfile = "../golf_D.wav" data, fs = wavread(wavfile) ### STFT fftLen = 1024 win = hanning(fftLen) step = fftLen / 8 spectrogram = abs(stft(data, win, step)[:, :fftLen / 2 + 1]).T ### 表示 fig = pl.figure() fig.patch.set_alpha(0.) imshow_sox(spectrogram) pl.tight_layout() pl.show()
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here # read input sound (monophonic with sampling rate of 44100) fs, x = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, w, N, H) if len(x) <> len(y): print ' x ' + str(len(x)) + ' y ' + str(len(y)) return 0, 0 #********************SNR1************************ Ex = sum(x**2) noise = abs(x - y) Enoise = sum(noise**2) SNR1 = 10 * np.log10(Ex / Enoise) #**********************SNR2*********************** xp = x[M:-M] yp = y[M:-M] Exp = sum(xp**2) if len(xp) <> len(yp): print ' x ' + str(len(x)) + ' xp ' + str(len(xp)) + ' yp ' + str( len(yp)) return 0, 0 noisep = abs(xp - yp) Enoisep = sum(noisep**2) SNR2 = 10 * np.log10(Exp / Enoisep) return SNR1, SNR2
def test(): # wavfile = "../wav/aiueo.wav" wavfile = "./golf_D.wav" # data, fs, enc = wavread(wavfile) data, fs = wavread(wavfile) ### STFT fftLen = 1024 win = hanning(fftLen) step = fftLen / 8 spectrogram = abs(stft(data, win, step)[:, : fftLen / 2 + 1]).T ### 表示 fig = pl.figure() fig.patch.set_alpha(0.) imshow_sox(spectrogram) pl.tight_layout() pl.show()
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here (fs, x) = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, w, N, H) Ein, Eout, Enos = 0.0, 0.0, 0.0 for n in range(np.size(x)): Ein = Ein + abs(x[n])**2 for n in range(np.size(y)): Eout = Eout + abs(y[n])**2 Enos = abs(Eout - Ein) print(Ein) print(Eout) print(Enos) SNR1 = 10 * np.log10(Ein / Enos) xsub = np.zeros(np.size(x) - M * 2) ysub = np.zeros(np.size(y) - M * 2) xsub = x[M:-M] ysub = y[M:-M] Eins, Eouts, Enoss = 0.0, 0.0, 0.0 for n in range(np.size(xsub)): Eins = Eins + abs(xsub[n])**2 for n in range(np.size(ysub)): Eouts = Eouts + abs(ysub[n])**2 Enoss = abs(Eouts - Eins) print(Eins) print(Eouts) print(Enoss) SNR2 = 10 * np.log10(Eins / Enoss) print(SNR1, SNR2) return (SNR1, SNR2)
def predict_channel(audio): length = np.shape(audio)[0] m = resample(audio, 44100, 22050) M = stft(m.reshape(-1, 1), hop_size, win_size, fft_size) Mmag = np.abs(M).T spec_frames, n_bins = Mmag.shape pad_size = int((n_frames - 1) / 2) Mmag = np.concatenate((np.zeros( (pad_size, n_bins)), Mmag, np.zeros((pad_size, n_bins)))) new_strides = (Mmag.strides[0], Mmag.strides[0], Mmag.strides[1]) Mmag = as_strided(Mmag, (spec_frames, n_frames, n_bins), new_strides) Mmag = Mmag[:, np.newaxis, :, :] vocals = np.zeros(M.T.shape) bass = np.zeros(M.T.shape) drums = np.zeros(M.T.shape) other = np.zeros(M.T.shape) for i in range(spec_frames): X = Mmag[i, :, :, :] in_data = torch.from_numpy( X.astype(np.float32)[np.newaxis, :, :, :]) if torch.cuda.is_available(): in_data = in_data.cuda() i_result = model(Variable(in_data)).cpu().data.numpy() vocals[i, :] = i_result[0, :n_bins] drums[i, :] = i_result[0, n_bins:2 * n_bins] bass[i, :] = i_result[0, 2 * n_bins:3 * n_bins] other[i, :] = i_result[0, 3 * n_bins:4 * n_bins] all_masks = vocals + bass + drums + other vocals = vocals / all_masks bass = bass / all_masks drums = drums / all_masks other = other / all_masks vocal_est = resample(istft(M * vocals.T, hop_size, win_size, 22050), 22050, 44100, 0)[:length, :] bass_est = resample(istft(M * bass.T, hop_size, win_size, 22050), 22050, 44100, 0)[:length, :] drums_est = resample(istft(M * drums.T, hop_size, win_size, 22050), 22050, 44100, 0)[:length, :] other_est = resample(istft(M * other.T, hop_size, win_size, 22050), 22050, 44100, 0)[:length, :] return (vocal_est, bass_est, drums_est, other_est)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): input sound file (monophonic with sampling rate of 44100) window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, such that N > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ fs, x = UF.wavread(inputFile) w = get_window(window, M) if N < M is True: raise ValueError("'N' should be greather than 'M'") if np.log2(N) % 1 != 0: raise ValueError("Input not power of 2") # Xm, Xp = stft.stftAnal(x, w, N, H) # x_ret = stft.stftSynth(Xm, Xp, M, H) x_ret = stft.stft(x, w, N, H) rest_x_H = x.shape[0] % H if rest_x_H % H != 0: delta_end = int((H - rest_x_H) // 2) print(delta_end) x_ret = x_ret[delta_end:-delta_end] print(x.shape, x_ret.shape, H, x.shape[0] % H) plt.figure(figsize=(8, 6), dpi=120) plt.subplot(2,1,1) plt.plot(x) plt.subplot(2,1,2) plt.plot(x_ret) E_signal = (np.abs(x) ** 2).sum() #E_noise = (np.abs(x - x_ret) ** 2).sum() #print(E_signal, E_noise) #snr1 = 10 * np.log10(E_signal / E_noise) #E_signal = (np.abs(x[M:-M]) ** 2).sum() #E_noise = ((np.abs(x - x_ret)[M:-M]) ** 2).sum() #print(E_signal, E_noise) #snr2 = 10 * np.log10(E_signal / E_noise) #return (snr1, snr2) return(x, x_ret)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ (fs, x) = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, fs, w, N, H) noise = x - y SNR1 = SNR(E(x), E(noise)) SNR2 = SNR(E(x[M:-M]), E(noise[M:-M])) return (SNR1, SNR2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here windowing = get_window(window, M) (fs, x) = UF.wavread(inputFile) y = STFT.stft(x, fs, windowing, N, H) noise1 = x - y Esignal1 = np.sum(np.square(x)) Enoise1 = np.sum(np.square(noise1)) snr1 = 10 * np.log10(Esignal1 / Enoise1) x2 = x[M:len(x)-M] y2 = y[M:len(y)-M] noise2 = x2 - y2 Esignal2 = np.sum(np.square(x2)) Enoise2 = np.sum(np.square(noise2)) snr2 = 10 * np.log10(Esignal2 / Enoise2) return (snr1, snr2)
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here (fs, x) = UF.wavread(inputFile) y = stft.stft(x, fs, get_window(window, M), N, H) e = x - y snr1 = 10 * np.log10(np.sum(np.power(np.abs(x), 2)) / np.sum(np.power(np.abs(e), 2))) xp = x[M:len(x)-M] yp = y[M:len(x)-M] ep = xp - yp snr2 = 10 * np.log10(np.sum(np.power(np.abs(xp), 2)) / np.sum(np.power(np.abs(ep), 2))) return (snr1, snr2)
def compute_dynamic_features(filename): '''Compute dynamic features given Mel filterbank features. Argument : filename: filename of the file containing Mel filterbank features located in settings.DIR_MEL_FEATURES (without path, without npy extension) Returns: 0 if success The output file is located in settings.DIR_DYNAMIC_FEATURES. ''' melFeatures = numpy.load(settings.DIR_MEL_FEATURES + filename + '.npy') nPoints, nChannels = melFeatures.shape if nChannels != 26: print "Warning : 26 channels expected" ''' ch = 10 power = numpy.zeros((nPoints,1)) for i in range(nPoints): power[i] = numpy.abs(melFeatures[i,ch]*melFeatures[i,ch]).sum() plt.xticks(range(0,120,5)) plt.plot(numpy.arange(nPoints)*(120./nPoints), power) ''' timeSize = stft.stft_time_size(melFeatures[:,0], settings.FFT_SIZE, settings.OVERLAP) dynamicFeatures = numpy.zeros((nChannels, settings.FFT_SIZE//2+1, timeSize), dtype=complex) for i in range(nChannels): dynamicFeatures[i,:,:] = stft.stft(melFeatures[:,i], settings.FFT_SIZE, settings.OVERLAP).T numpy.save(settings.DIR_DYNAMIC_FEATURES + filename + '.npy', dynamicFeatures) return 0
def computeSNR(inputFile, window, M, N, H): """ Input: inputFile (string): wav file name including the path window (string): analysis window type (choice of rectangular, triangular, hanning, hamming, blackman, blackmanharris) M (integer): analysis window length (odd positive integer) N (integer): fft size (power of two, > M) H (integer): hop size for the stft computation Output: The function should return a python tuple of both the SNR values (SNR1, SNR2) SNR1 and SNR2 are floats. """ ## your code here fs,x = UF.wavread(inputFile) w = get_window(window,M) y = stft.stft(x,fs,w,N,H) noise1 = x-y SNR1 = 10 * np.log10(float(np.dot(x,x))/float(np.dot(noise1,noise1))) noise2 = x[M:-M] - y[M:-M] SNR2 = 10 * np.log10(float(np.dot(x[M:-M],x[M:-M]))/float(np.dot(noise2,noise2))) return SNR1, SNR2
import matplotlib.pyplot as plt import numpy as np from stft import stft from read import read fname = "doubleBass.wav" (srate, data) = read(fname, "mono") N = 1024 hop = N//2 win = "hann" X= stft(data, N, hop, win) X = np.abs(X) #mag to dec conversion X = 20*np.log10(X) plt.imshow(X[:N/2, :], interpolation='nearest', aspect='auto', origin='lower') plt.colorbar() plt.title(str(fname) + ", N = " + str(N) + ", hop = N//2, win = hann") plt.show()
# remove a bit of signal at the end and time-align all signals. # the delays were visually measured by plotting the signals n_lim = np.ceil(len(input_mic) - t_cut*Fs) input_clean = signal1[:n_lim] input_mic = input_mic[105:n_lim+105] output_mvdr = output_mvdr[31:n_lim+31] output_maxsinr = output_maxsinr[31:n_lim+31] # save all files for listening test wavfile.write('output_samples/input_mic.wav', Fs, input_mic) wavfile.write('output_samples/output_maxsinr.wav', Fs, output_mvdr) wavfile.write('output_samples/output_rake-maxsinr.wav', Fs, output_maxsinr) # compute time-frequency planes F0 = stft(input_clean, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F1 = stft(input_mic, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F2 = stft(output_mvdr, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F3 = stft(output_maxsinr, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) # (not so) fancy way to set the scale to avoid having the spectrum # dominated by a few outliers p_min = 7 p_max = 100
def process(self, FD=False): if self.signals is None or len(self.signals) == 0: raise NameError('No signal to beamform') if FD is True: # STFT processing if self.weights is None and self.filters is not None: self.weightsFromFilters() elif self.weights is None and self.filters is None: raise NameError('Beamforming weights or filters need to be computed first.') # create window function win = np.concatenate((np.zeros(self.zpf), windows.hann(self.L), np.zeros(self.zpb))) # do real STFT of first signal tfd_sig = stft.stft(self.signals[0], self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.rfft, win=win) * np.conj(self.weights[0]) for i in xrange(1, self.M): tfd_sig += stft.stft(self.signals[i], self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.rfft, win=win) * np.conj(self.weights[i]) # now reconstruct the signal output = stft.istft( tfd_sig, self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.irfft) # remove the zero padding from output signal if self.zpb is 0: output = output[self.zpf:] else: output = output[self.zpf:-self.zpb] else: # TD processing if self.weights is not None and self.filters is None: self.filtersFromWeights() elif self.weights is None and self.filters is None: raise NameError('Beamforming weights or filters need to be computed first.') from scipy.signal import fftconvolve # do real STFT of first signal output = fftconvolve(self.filters[0], self.signals[0]) for i in xrange(1, len(self.signals)): output += fftconvolve(self.filters[i], self.signals[i]) return output
def test_shape(self): x = scipy.sin(scipy.linspace(0,1,44100)*scipy.pi*2*440) X = stft(x, 1024) tracks = analyze(X) assertTrue(tracks.shape == X.shape)
def test_number_of_tracks(self): x = scipy.sin(scipy.linspace(0,1,44100)*scipy.pi*2*440) X = stft(x,1024) tracks = analyze(X) assertTrue(1 == reduce(lambda x,y: if y then x + 1 else x, map(lambda x: x != 0, tracks)))
import matplotlib.pyplot as plt import numpy as np from mfcc import mfcc from read import read from stft import stft fname = "sineSweep.wav" (srate, data) = read(fname, "mono") N = 1024 X= stft(data, N) X = np.abs(X) X = X[:N/2+1] X = mfcc(X, 44100) #mag to dec conversion #X = 10 * np.log10(X) plt.imshow(X[1:], interpolation='nearest', aspect='auto', origin='lower') plt.show()
def tf_agc(d, sr, t_scale=0.5, f_scale=1.0, causal_tracking=True, plot=False): """ Perform frequency-dependent automatic gain control on an auditory frequency axis. d is the input waveform (at sampling rate sr); y is the output waveform with approximately constant energy in each time-frequency patch. t_scale is the "scale" for smoothing in time (default 0.5 sec). f_scale is the frequency "scale" (default 1.0 "mel"). causal_tracking == 0 selects traditional infinite-attack, exponential release. causal_tracking == 1 selects symmetric, non-causal Gaussian-window smoothing. D returns actual STFT used in analysis. E returns the smoothed amplitude envelope divided out of D to get gain control. """ hop_size = 0.032 # in seconds # Make STFT on ~32 ms grid ftlen = int(2 ** np.round(np.log(hop_size * sr) / np.log(2.))) winlen = ftlen hoplen = winlen / 2 D = stft(d, winlen, hoplen) # using my code ftsr = sr / hoplen ndcols = D.shape[1] # Smooth in frequency on ~ mel resolution # Width of mel filters depends on how many you ask for, # so ask for fewer for larger f_scales nbands = max(10, 20 / f_scale) # 10 bands, or more for very fine f_scale mwidth = f_scale * nbands / 10 # will be 2.0 for small f_scale (f2a_tmp, _) = fft2melmx(ftlen, sr, int(nbands), mwidth) f2a = f2a_tmp[:, :ftlen / 2 + 1] audgram = np.dot(f2a, np.abs(D)) if causal_tracking: # traditional attack/decay smoothing fbg = np.zeros(audgram.shape) # state = zeros(size(audgram,1),1); state = np.zeros(audgram.shape[0]) alpha = np.exp(-(1. / ftsr) / t_scale) for i in range(audgram.shape[1]): state = np.maximum(alpha * state, audgram[:, i]) fbg[:, i] = state else: # noncausal, time-symmetric smoothing # Smooth in time with tapered window of duration ~ t_scale tsd = np.round(t_scale * ftsr) / 2 htlen = 6 * tsd # Go out to 6 sigma twin = np.exp(-0.5 * (((np.arange(-htlen, htlen + 1)) / tsd) ** 2)).T # reflect ends to get smooth stuff AD = audgram x = np.hstack((np.fliplr(AD[:, :htlen]), AD, np.fliplr(AD[:, -htlen:]), np.zeros((AD.shape[0], htlen)))) fbg = signal.lfilter(twin, 1, x, 1) # strip "warm up" points fbg = fbg[:, twin.size + np.arange(ndcols)] # map back to FFT grid, flatten bark loop gain sf2a = np.sum(f2a, 0) sf2a_fix = sf2a sf2a_fix[sf2a == 0] = 1. E = np.dot(np.dot(np.diag(1. / sf2a_fix), f2a.T), fbg) # Remove any zeros in E (shouldn't be any, but who knows?) E[E <= 0] = np.min(E[E > 0]) # invert back to waveform y = istft(D / E, winlen, hoplen, window=np.ones(winlen)) # using my code if plot: try: import matplotlib.pyplot as plt plt.subplot(3, 1, 1) plt.imshow(20. * np.log10(np.flipud(np.abs(D)))) plt.subplot(3, 1, 2) plt.imshow(20. * np.log10(np.flipud(np.abs(E)))) A = stft(y, winlen, hoplen) # using my code plt.subplot(3, 1, 3) plt.imshow(20. * np.log10(np.flipud(np.abs(A)))) plt.show() except Exception, e: print "Failed to plot results" print e
import math from scipy.signal import get_window import matplotlib.pyplot as plt # params inputFile = '../../sounds/sax-phrase-short.wav' window = 'hamming' M = 512 N = 1024 H = 64 sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../../software/models/')) import stft import utilFunctions as UF eps = np.finfo(float).eps fs, x = UF.wavread(inputFile) w = get_window(window, M) y = stft.stft(x, fs, w, N, H) noise = x - y E_x = np.sum( abs(x)**2 ) E_noise = np.sum( abs(noise)**2 ) E_partial_x = np.sum( abs(x[ M : x.size - M ])**2 ) E_partial_noise = np.sum( abs(noise[ M : noise.size - M ])**2 ) SNR1 = 10 * np.log10( E_x / E_noise ) SNR2 = 10 * np.log10( E_partial_x / E_partial_noise )
import numpy as np import matplotlib.pyplot as plt from stft import stft if __name__ == "__main__": fr = 44100 # framerate time = np.arange(0, 5, 1.0/fr) # Generate 100 Hz sin wave sig = np.sin(100*time) plt.plot(time, sig) plt.axis([0, 5, -2, 2]) plt.show() # Generate windows windows = stft(sig) print len(windows) plt.plot(np.abs(windows[8])) plt.plot(np.abs(windows[9])) plt.show()
def process(self): if (self.signals is None or len(self.signals) == 0): raise NameError('No signal to beamform') if self.processing is 'FrequencyDomain': # create window function win = np.concatenate((np.zeros(self.zpf), windows.hann(self.L), np.zeros(self.zpb))) # do real STFT of first signal tfd_sig = stft.stft(self.signals[0], self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.rfft, win=win) * np.conj(self.weights[0]) for i in xrange(1, self.M): tfd_sig += stft.stft(self.signals[i], self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.rfft, win=win) * np.conj(self.weights[i]) # now reconstruct the signal output = stft.istft( tfd_sig, self.L, self.hop, zp_back=self.zpb, zp_front=self.zpf, transform=np.fft.irfft) # remove the zero padding from output signal if self.zpb is 0: output = output[self.zpf:] else: output = output[self.zpf:-self.zpb] elif self.processing is 'TimeDomain': # go back to time domain and shift DC to center tw = np.sqrt(self.weights.shape[1])*np.fft.irfft(np.conj(self.weights), axis=1) tw = np.concatenate((tw[:, self.N/2:], tw[:, :self.N/2]), axis=1) from scipy.signal import fftconvolve # do real STFT of first signal output = fftconvolve(tw[0], self.signals[0]) for i in xrange(1, len(self.signals)): output += fftconvolve(tw[i], self.signals[i]) elif self.processing is 'Total': W = np.concatenate((self.weights, np.conj(self.weights[:,-2:0:-1])), axis=1) W[:,0] = np.real(W[:,0]) W[:,self.N/2] = np.real(W[:,self.N/2]) F_sig = np.zeros(self.signals.shape[1], dtype=complex) for i in xrange(self.M): F_sig += np.fft.fft(self.signals[i])*np.conj(W[i,:]) f_sig = np.fft.ifft(F_sig) print np.abs(np.imag(f_sig)).mean() print np.abs(np.real(f_sig)).mean() output = np.real(np.fft.ifft(F_sig)) return output