def no_window(nfft, D, transform, axis=0): if D == 1: x_local = x[:, 0] X_local = X_numpy[:, 0] else: if axis == 0: x_local = x X_local = X_numpy else: x_local = x.T X_local = X_numpy.T # make object dft = pra.transform.DFT(nfft, D, transform=transform, axis=axis) # forward X = dft.analysis(x_local) err_fwd = pra.dB(np.linalg.norm(X_local - X)) # backward x_r = dft.synthesis() err_bwd = pra.dB(np.linalg.norm(x_local - x_r)) return err_fwd, err_bwd
def plot_spectrogram(F, title): plt.imshow( pra.dB(F.T), extent=[0, 1, 0, Fs / 2], vmin=vmin, vmax=vmax, origin="lower", cmap=plt.get_cmap(cmap), interpolation=interpolation, ) ax.set_title(title) ax.set_ylabel("") ax.set_xlabel("") ax.set_aspect("auto") ax.axis("off")
def no_overlap(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = block_size # analysis X = analysis(x_local, L=block_size, hop=hop) # synthesis x_r = synthesis(X, L=block_size, hop=hop) return pra.dB(np.max(np.abs(x_local - x_r)))
def half_overlap(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = block_size//2 # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = synthesis(X, L=block_size, hop=hop) return pra.dB(np.max(np.abs(x_local[:-hop, ] - x_r[hop:, ])))
def hop_one_sample(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = 1 # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis synthesis_win = pra.transform.compute_synthesis_window(analysis_win, hop) x_r = synthesis(X, L=block_size, hop=hop, win=synthesis_win) return pra.dB( np.max(np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:, ])))
def append_one_sample(D): hop = block_size // 2 n_samples = x.shape[0] n_frames = n_samples // hop x_local = x[:n_frames * hop - 1, :] if D == 1: x_local = x_local[:, 0] else: x_local = x_local[:, :D] # analysis analysis_win = pra.hann(block_size) X = analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = synthesis(X, L=block_size, hop=hop) return pra.dB( np.max( np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:-1, ])))
fft_size = 512 # fft size for analysis fft_hop = 128 # hop between analysis frame fft_zp = 512 # zero padding analysis_window = pra.hann(fft_size) print("Sweeping echo measure for ISM is :") for n in range(M): if n == 0: S = stft.analysis(room.rir[n][0], fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) f, (ax1, ax2) = plt.subplots(2,1) ax1.imshow( pra.dB(S.T), extent=[0, len(room.rir[n][0]), 0, fs / 2], vmin=-100, vmax=0, origin="lower", cmap="jet" ) ax1.set_title("RIR for Mic location " + str(n) + " without random ISM") ax1.set_ylabel("Frequency") ax1.set_aspect("auto") #plot RIR ax2.plot(room.rir[n][0]) ax2.set_xlabel("Num samples") ax2.set_ylabel("Amplitude")
from pyroomacoustics.directivities import cardioid_func from pyroomacoustics.doa import spher2cart azimuth = np.radians(np.linspace(start=0, stop=360, num=361, endpoint=True)) colatitude = np.radians(np.linspace(start=0, stop=180, num=180, endpoint=True)) lower_gain = -40 """ 2D """ # get cartesian coordinates cart = spher2cart(azimuth=azimuth) direction = spher2cart(azimuth=225, degrees=True) # compute response resp = cardioid_func(x=cart, direction=direction, coef=0.5, magnitude=True) resp_db = dB(np.array(resp)) # plot plt.figure() plt.polar(azimuth, resp_db) plt.ylim([lower_gain, 0]) ax = plt.gca() ax.yaxis.set_ticks(np.arange(start=lower_gain, stop=5, step=10)) plt.tight_layout() """ 3D """ # get cartesian coordinates spher_coord = all_combinations(azimuth, colatitude) cart = spher2cart(azimuth=spher_coord[:, 0], colatitude=spher_coord[:, 1]) direction = spher2cart(azimuth=0, colatitude=45, degrees=True)
) # process the signal output = mics.process() # save to output file out_RakePerceptual = pra.normalize(pra.highpass(output, Fs)) wavfile.write( path + "/output_samples/output_RakePerceptual.wav", Fs, out_RakePerceptual ) """ Plot all the spectrogram """ dSNR = pra.dB(room1.direct_snr(mics.center[:, 0], source=0), power=True) print("The direct SNR for good source is " + str(dSNR)) # remove a bit of signal at the end n_lim = int(np.ceil(len(input_mic) - t_cut * Fs)) input_clean = signal1[:n_lim] input_mic = input_mic[:n_lim] out_DirectMVDR = out_DirectMVDR[:n_lim] out_RakeMVDR = out_RakeMVDR[:n_lim] out_DirectPerceptual = out_DirectPerceptual[:n_lim] out_RakePerceptual = out_RakePerceptual[:n_lim] # compute time-frequency planes F0 = stft.analysis(input_clean, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F1 = stft.analysis(input_mic, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
h_hat = pra.experimental.wiener_deconvolve(y, x, length=h_len, noise_variance=sigma_noise**2) rmse = np.sqrt(np.linalg.norm(h_hat - h)**2 / h_len) print('rmse=', rmse, '(tol=', tol, ')') self.assertTrue(rmse < tol) if __name__ == '__main__': import matplotlib.pyplot as plt h = h_hann y, sigma_noise = generate_signals(SNR, x, h, noise) h_hat1 = pra.experimental.deconvolve(y, x, length=h_len) res1 = np.linalg.norm(y - fftconvolve(x, h_hat1))**2 / y.shape[0] mse1 = np.linalg.norm(h_hat1 - h)**2 / h_len h_hat2 = pra.experimental.wiener_deconvolve(y, x, length=h_len, noise_variance=sigma_noise**2, let_n_points=15) res2 = np.linalg.norm(y - fftconvolve(x, h_hat2))**2 / y.shape[0] mse2 = np.linalg.norm(h_hat2 - h)**2 / h_len print('MSE naive: rmse=', np.sqrt(mse1), ' res=', pra.dB(res1, power=True)) print('MSE Wiener: rmse=', np.sqrt(mse2), ' res=', pra.dB(res1, power=True)) plt.plot(h) plt.plot(h_hat1) plt.plot(h_hat2) plt.legend(['Original', 'Naive', 'Wiener']) plt.show()
if __name__ == "__main__": import matplotlib.pyplot as plt h = h_hann y, sigma_noise = generate_signals(SNR, x, h, noise) h_hat1 = pra.experimental.deconvolve(y, x, length=h_len) res1 = np.linalg.norm(y - fftconvolve(x, h_hat1))**2 / y.shape[0] mse1 = np.linalg.norm(h_hat1 - h)**2 / h_len h_hat2 = pra.experimental.wiener_deconvolve(y, x, length=h_len, noise_variance=sigma_noise**2, let_n_points=15) res2 = np.linalg.norm(y - fftconvolve(x, h_hat2))**2 / y.shape[0] mse2 = np.linalg.norm(h_hat2 - h)**2 / h_len print("MSE naive: rmse=", np.sqrt(mse1), " res=", pra.dB(res1, power=True)) print("MSE Wiener: rmse=", np.sqrt(mse2), " res=", pra.dB(res1, power=True)) plt.plot(h) plt.plot(h_hat1) plt.plot(h_hat2) plt.legend(["Original", "Naive", "Wiener"]) plt.show()
mics.rakePerceptualFilters(good_sources[:max_order_design+1], bad_sources[:max_order_design+1], sigma2_n*np.eye(mics.Lg*mics.M), delay=delay) # process the signal output = mics.process() # save to output file out_RakePerceptual = pra.normalize(pra.highpass(output, Fs)) wavfile.write('output_samples/output_RakePerceptual.wav', Fs, out_RakePerceptual) ''' Plot all the spectrogram ''' dSNR = pra.dB(room1.dSNR(mics.center[:,0], source=0), power=True) print 'The direct SNR for good source is ' + str(dSNR) # remove a bit of signal at the end n_lim = np.ceil(len(input_mic) - t_cut*Fs) input_clean = signal1[:n_lim] input_mic = input_mic[:n_lim] out_DirectMVDR = out_DirectMVDR[:n_lim] out_RakeMVDR = out_RakeMVDR[:n_lim] out_DirectPerceptual = out_DirectPerceptual[:n_lim] out_RakePerceptual = out_RakePerceptual[:n_lim] # compute time-frequency planes F0 = pra.stft(input_clean, fft_size, fft_hop, win=analysis_window,
import numpy as np import matplotlib.pyplot as plt import pyroomacoustics as pra beamformer_names = ['Rake MaxSINR', 'Rake Perceptual', 'Rake MVDR'] SINR = np.load('data/SINR_data.npy') # uncomment the following line to use the simulated data used in the paper #SINR = np.load('data/SINR_data_Lg30ms_d20ms_SNR10_N10000_20141015.npy') max_K, n_bf, n_monte_carlo = SINR.shape SINR_med = np.array(np.percentile(SINR, [50, 5, 95], axis=-1)) SINR_gain_5sources = pra.dB(SINR_med[0,5,:]) - pra.dB(SINR_med[0,0,:]) print 'SNR gain of using 5 sources instead of one:' print 'Rake MaxSINR: %.2f dB' % SINR_gain_5sources[0] print 'Rake Perceptual: %.2f dB' % SINR_gain_5sources[1] print 'Rake MVDR: %.2f dB' % SINR_gain_5sources[2] #--------------------------------------------------------------------- # Export the SNR figure #--------------------------------------------------------------------- plt.figure(figsize=(4, 3)) newmap = plt.get_cmap('gist_heat') ax1 = plt.gca() ax1.set_color_cycle([newmap( k ) for k in np.linspace(0.25,0.9,len(beamformer_names))]) from itertools import cycle
bad_sources, sigma2_n * np.eye(mics.Lg * mics.M), delay=delay) # process the signal output = mics.process() # save to output file out_RakePerceptual = pra.normalize(pra.highpass(output, Fs)) wavfile.write('output_samples/output_RakePerceptual.wav', Fs, out_RakePerceptual) ''' Plot all the spectrogram ''' dSNR = pra.dB(room1.dSNR(mics.center[:, 0], source=0), power=True) print 'The direct SNR for good source is ' + str(dSNR) # remove a bit of signal at the end n_lim = np.ceil(len(input_mic) - t_cut * Fs) input_clean = signal1[:n_lim] input_mic = input_mic[:n_lim] out_DirectMVDR = out_DirectMVDR[:n_lim] out_RakeMVDR = out_RakeMVDR[:n_lim] out_DirectPerceptual = out_DirectPerceptual[:n_lim] out_RakePerceptual = out_RakePerceptual[:n_lim] # compute time-frequency planes F0 = pra.stft(input_clean, fft_size, fft_hop,
import numpy as np import matplotlib.pyplot as plt import pyroomacoustics as pra beamformer_names = ['Rake MaxSINR', 'Rake Perceptual', 'Rake MVDR'] SINR = np.load('data/SINR_data.npy') # uncomment the following line to use the simulated data used in the paper SINR = np.load('data/SINR_data_Lg30ms_d20ms_SNR10_N10000_20141015.npy') max_K, n_bf, n_monte_carlo = SINR.shape SINR_med = np.array(np.percentile(SINR, [50, 25, 75], axis=-1)) SINR_gain_5sources = pra.dB(SINR_med[0, 5, :], power=True) - pra.dB( SINR_med[0, 0, :], power=True) print 'SNR gain of using 5 sources instead of one:' print 'Rake MaxSINR: %.2f dB' % SINR_gain_5sources[0] print 'Rake Perceptual: %.2f dB' % SINR_gain_5sources[1] print 'Rake MVDR: %.2f dB' % SINR_gain_5sources[2] #--------------------------------------------------------------------- # Export the SNR figure #--------------------------------------------------------------------- plt.figure(figsize=(4, 3)) newmap = plt.get_cmap('gist_heat') ax1 = plt.gca() ax1.set_color_cycle( [newmap(k) for k in np.linspace(0.25, 0.9, len(beamformer_names))])
import numpy as np import matplotlib.pyplot as plt import pyroomacoustics as pra beamformer_names = ['Rake MaxSINR', 'Rake Perceptual', 'Rake MVDR'] SINR = np.load('data/SINR_data.npy') # uncomment the following line to use the simulated data used in the paper SINR = np.load('data/SINR_data_Lg30ms_d20ms_SNR10_N10000_20141015.npy') max_K, n_bf, n_monte_carlo = SINR.shape SINR_med = np.array(np.percentile(SINR, [50, 25, 75], axis=-1)) SINR_gain_5sources = pra.dB(SINR_med[0,5,:], power=True) - pra.dB(SINR_med[0,0,:], power=True) print 'SNR gain of using 5 sources instead of one:' print 'Rake MaxSINR: %.2f dB' % SINR_gain_5sources[0] print 'Rake Perceptual: %.2f dB' % SINR_gain_5sources[1] print 'Rake MVDR: %.2f dB' % SINR_gain_5sources[2] #--------------------------------------------------------------------- # Export the SNR figure #--------------------------------------------------------------------- plt.figure(figsize=(4, 3)) newmap = plt.get_cmap('gist_heat') ax1 = plt.gca() ax1.set_color_cycle([newmap( k ) for k in np.linspace(0.25,0.9,len(beamformer_names))]) from itertools import cycle