def no_overlap(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = block_size # analysis X = stft.analysis(x_local, L=block_size, hop=hop) # synthesis x_r = stft.synthesis(X, L=block_size, hop=hop) return pra.dB(np.max(np.abs(x_local - x_r)))
def half_overlap(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = block_size // 2 # analysis analysis_win = pra.hann(block_size) X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = stft.synthesis(X, L=block_size, hop=hop) return pra.dB( np.max(np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:, ])))
def hop_one_sample(D): if D == 1: x_local = x[:, 0] else: x_local = x[:, :D] hop = 1 # analysis analysis_win = pra.hann(block_size) X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis synthesis_win = pra.transform.stft.compute_synthesis_window( analysis_win, hop) x_r = stft.synthesis(X, L=block_size, hop=hop, win=synthesis_win) return pra.dB( np.max(np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:, ])))
def append_one_sample(D): hop = block_size // 2 n_samples = x.shape[0] n_frames = n_samples // hop x_local = x[:n_frames * hop - 1, :] if D == 1: x_local = x_local[:, 0] else: x_local = x_local[:, :D] # analysis analysis_win = pra.hann(block_size) X = stft.analysis(x_local, L=block_size, hop=hop, win=analysis_win) # synthesis x_r = stft.synthesis(X, L=block_size, hop=hop) return pra.dB( np.max( np.abs(x_local[:-block_size + hop, ] - x_r[block_size - hop:-1, ])))
if args.algo in dereverb_algos: # for dereverberation algorithms we use the anechoic reference signal fn_ref = DATA_DIR / rooms[args.room]["anechoic_filenames"][REF_MIC] else: fn_ref = DATA_DIR / rooms[args.room]["src_filenames"][REF_MIC] fs, ref = wavfile.read(fn_ref) ref = ref.astype(np.float64) / 2**15 # STFT parameters hop = args.block // 2 win_a = pra.hamming(args.block) win_s = pra.transform.stft.compute_synthesis_window(win_a, hop) # STFT X = stft.analysis(mix, args.block, hop, win=win_a) t1 = time.perf_counter() # Separation if args.algo == "fastmnmf": Y = algorithms[args.algo](X, n_iter=30) elif args.algo in dereverb_algos: if args.p is None: Y = algorithms[args.algo]( X, n_iter=15 * args.mics, n_taps=3, n_delays=2, n_components=1, proj_back=True,
room.add_microphone_array(pra.MicrophoneArray(mic_loc, fs)) room.compute_rir() #plot spectrograms to check for sweeping echoes fft_size = 512 # fft size for analysis fft_hop = 128 # hop between analysis frame fft_zp = 512 # zero padding analysis_window = pra.hann(fft_size) print("Sweeping echo measure for ISM is :") for n in range(M): if n == 0: S = stft.analysis(room.rir[n][0], fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) f, (ax1, ax2) = plt.subplots(2,1) ax1.imshow( pra.dB(S.T), extent=[0, len(room.rir[n][0]), 0, fs / 2], vmin=-100, vmax=0, origin="lower", cmap="jet" ) ax1.set_title("RIR for Mic location " + str(n) + " without random ISM") ax1.set_ylabel("Frequency") ax1.set_aspect("auto")
dSNR = pra.dB(room1.direct_snr(mics.center[:, 0], source=0), power=True) print("The direct SNR for good source is " + str(dSNR)) # remove a bit of signal at the end n_lim = int(np.ceil(len(input_mic) - t_cut * Fs)) input_clean = signal1[:n_lim] input_mic = input_mic[:n_lim] out_DirectMVDR = out_DirectMVDR[:n_lim] out_RakeMVDR = out_RakeMVDR[:n_lim] out_DirectPerceptual = out_DirectPerceptual[:n_lim] out_RakePerceptual = out_RakePerceptual[:n_lim] # compute time-frequency planes F0 = stft.analysis(input_clean, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F1 = stft.analysis(input_mic, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F2 = stft.analysis( out_DirectMVDR, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp ) F3 = stft.analysis(out_RakeMVDR, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp) F4 = stft.analysis( out_DirectPerceptual, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp ) F5 = stft.analysis( out_RakePerceptual, fft_size, fft_hop, win=analysis_window, zp_back=fft_zp ) # (not so) fancy way to set the scale to avoid having the spectrum # dominated by a few outliers p_min = 7
def process(args, config): n_channels, room_id, bss_algo = args # the name of the algorithm we'll use for bss bss_algo_name = config["bss_algorithms"][bss_algo]["name"] if mkl_available: mkl.set_num_threads_local(1) ref_mic = config["ref_mic"] metadata_fn = Path(config["metadata_fn"]) dataset_dir = metadata_fn.parent with open(config["metadata_fn"], "r") as f: metadata = json.load(f) rooms = metadata[f"{n_channels}_channels"] # the mixtures fn_mix = dataset_dir / rooms[room_id]["mix_filename"] fs, mix = load_audio(fn_mix) # add the noise sigma_src = np.std(mix) sigma_n = sigma_src * 10**(-config["snr"] / 20) mix += np.random.randn(*mix.shape) * sigma_n # the reference if bss_algo_name in dereverb_algos: # for dereverberation algorithms we use the anechoic reference signal fn_ref = dataset_dir / rooms[room_id]["anechoic_filenames"][ config["ref_mic"]] else: fn_ref = dataset_dir / rooms[room_id]["src_filenames"][ config["ref_mic"]] fs, ref = load_audio(fn_ref) # STFT parameters nfft = config["stft"]["nfft"] hop = config["stft"]["hop"] if config["stft"]["window"] == "hamming": win_a = pra.hamming(nfft) win_s = pra.transform.stft.compute_synthesis_window(win_a, hop) else: raise ValueError("Window not implemented") # STFT X = stft.analysis(mix, nfft, hop, win=win_a) # Separation bss_kwargs = config["bss_algorithms"][bss_algo]["kwargs"] n_iter_p_ch = config["bss_algorithms"][bss_algo]["n_iter_per_channel"] runtime_bss = time.perf_counter() if bss_algo_name == "fastmnmf": Y = bss_algorithms[bss_algo_name](X, n_iter=n_iter_p_ch * n_channels, **bss_kwargs) elif bss_algo_name in dereverb_algos: Y, Y_pb, runtime_pb = bss_algorithms[bss_algo_name]( X, n_iter=n_iter_p_ch * n_channels, proj_back_both=True, **bss_kwargs) # adjust start time to remove the projection back runtime_bss += runtime_pb else: Y = bss_algorithms[bss_algo_name](X, n_iter=n_iter_p_ch * n_channels, proj_back=False, **bss_kwargs) runtime_bss = time.perf_counter() - runtime_bss results = [{ "bss_runtime": { "bss_algo": bss_algo, "runtime": runtime_bss, } }] t = { "room_id": room_id, "n_channels": n_channels, "bss_algo": bss_algo, "proj_algo": None, "runtime": 0.0, "sdr": None, "sir": None, "p": None, "q": None, "n_iter": 1, } # Evaluation of raw signal t["proj_algo"] = "None" y, sdr, sir, _ = reconstruct_evaluate(ref, Y, nfft, hop, win=win_s, si_metric=config["si_metric"]) t["sdr"], t["sir"] = sdr.tolist(), sir.tolist() results.append(t.copy()) # projection back t["proj_algo"] = "projection_back" if bss_algo in dereverb_algos: Z = Y_pb else: runtime_pb = time.perf_counter() Z = bss_scale.projection_back(Y, X[:, :, ref_mic]) runtime_pb = time.perf_counter() - runtime_pb y, sdr, sir, _ = reconstruct_evaluate(ref, Z, nfft, hop, win=win_s, si_metric=config["si_metric"]) t["sdr"], t["sir"] = sdr.tolist(), sir.tolist() t["runtime"] = runtime_pb results.append(t.copy()) # minimum distortion lo, hi, n_steps = config["minimum_distortion"]["p_list"] p_vals = np.linspace(lo, hi, n_steps) kwargs = config["minimum_distortion"]["kwargs"] for ip, p in enumerate(p_vals): for q in p_vals[ip:]: t["p"], t["q"], t["proj_algo"] = ( f"{p:.1f}", f"{q:.1f}", "minimum_distortion", ) runtime_md = time.perf_counter() Z, t["n_iter"] = bss_scale.minimum_distortion(Y, X[:, :, ref_mic], p=p, q=q, **kwargs) runtime_md = time.perf_counter() - runtime_md y, sdr, sir, _ = reconstruct_evaluate( ref, Z, nfft, hop, win=win_s, si_metric=config["si_metric"]) t["sdr"] = sdr.tolist() t["sir"] = sir.tolist() t["runtime"] = runtime_md results.append(t.copy()) return results