def createroom(mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim, absorption, max_order, n_mics, angle): np.random.seed(10) # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # algorithm parameters # param ogive ogive_mu = 0.1 ogive_update = "switching" ogive_iter = 2000 SIR = 10 # dB SNR = ( 60 ) # dB, this is the SNR with respect to a single target source and microphone self-noise ########separation params############# algo = algo_choices[0] no_cb = True save = True n_iter = 60 dist = "gauss" #guass or laplace ########paramas set################## fs = 44100 n_sources = 2 n_mics = n_mics n_sources_target = 2 assert n_sources_target <= n_mics, "More sources than microphones is not supported" # set the source powers, the first one is half source_std = np.ones(n_sources_target) # room size room_dim = roomdim # micro position rot = angle offset = np.pi - rot / 2 mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics, rot=offset) ###micro2 # target position target_locs = np.transpose([[7, 10, 6], [9, 16, 6]]) #interference position interferer_locs = random_layout([14, 0, 6], n_sources - n_sources_target, offset=[5, 20, 3], seed=1) source_locs = target_locs # audio loaded wav_files = [amBird, saBird] signals = wav_read_center(wav_files, seed=123) #create room room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption, max_order=max_order, air_absorption=True, humidity=50) # add source for sig, loc in zip(signals, source_locs.T): room.add_source(loc, signal=sig) # add micro room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs)) callback_mix_kwargs = { "snr": SNR, "sir": SIR, "n_src": n_sources, "n_tgt": n_sources_target, "src_std": source_std, "ref_mic": 0, } # Run the simulation separate_recordings = room.simulate( callback_mix=callback_mix, callback_mix_kwargs=callback_mix_kwargs, return_premix=True, ) mics_signals = room.mic_array.signals print("Simulation done.") # rt60 = room.measure_rt60() # print(rt60) # Monitor Convergence ref = np.moveaxis(separate_recordings, 1, 2) if ref.shape[0] < n_mics: ref = np.concatenate( (ref, np.random.randn(n_mics - ref.shape[0], ref.shape[1], ref.shape[2])), axis=0, ) SDR, SIR, cost_func = [], [], [] convergence_callback = None # START BSS # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mics_signals.T, framesize, framesize // 2, win=win_a).astype(np.complex128) X_mics = X_all[:, :, :n_mics] tic = time.perf_counter() # Run BSS if algo == "auxiva": # Run AuxIVA Y = overiva( X_mics, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "auxiva_pca": # Run AuxIVA Y = auxiva_pca( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "overiva": # Run AuxIVA Y = overiva( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ilrma": # Run AuxIVA Y = pra.bss.ilrma( X_mics, n_iter=n_iter, n_components=2, proj_back=True, callback=convergence_callback, ) elif algo == "ogive": # Run OGIVE Y = ogive( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ogive_matlab": # Run OGIVE Y = ogive_matlab_wrapper( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, init_eig=(init == init_choices[1]), callback=convergence_callback, ) else: raise ValueError("No such algorithm {}".format(algo)) toc = time.perf_counter() # Run iSTFT if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[:, None] y = y.astype(np.float64) else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(np.float64) # If some of the output are uniformly zero, just add a bit of noise to compare for k in range(y.shape[1]): if np.sum(np.abs(y[:, k])) < 1e-10: y[:, k] = np.random.randn(y.shape[0]) * 1e-10 # For conventional methods of BSS, reorder the signals by decreasing power if algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] # Compare SIR m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2:m + framesize // 2, :n_sources_target].T, ) # reorder the vector of reconstructed signals y_hat = y[:, perm] print("SDR:", sdr) print("SIR:", sir) ####save mix and separation ####### if save: from scipy.io import wavfile wavfile.write( "birdmix.wav", room.fs, (pra.normalize(mics_signals, bits=16).astype(np.int16).T)[:, 0], ) for i, sig in enumerate(y_hat.T): wavfile.write( "birdsep{}.wav".format(i + 1), room.fs, pra.normalize(sig, bits=16).astype(np.int16).T, )
n_sources_target, rot=0.743 * np.pi) # interferer_locs = grid_layout([3., 5.5], n_sources - n_sources_target, offset=[6.5, 1., 1.7]) interferer_locs = random_layout([3.0, 5.5, 1.5], n_sources - n_sources_target, offset=[6.5, 1.0, 0.5], seed=1) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) # Prepare the signals wav_files = sampling(1, n_sources, f"{samples_dir}/metadata.json", gender_balanced=True, seed=8)[0] signals = wav_read_center(wav_files, seed=123) # Place the blinkies regularly in the room (2D plane) blinky_locs = gm_layout(n_blinkies, target_locs - np.c_[[0.0, 0.0, 0.4]], std=[0.4, 0.4, 0.05], seed=987) all_locs = np.concatenate((mic_locs, blinky_locs), axis=1) # Create the room itself room = pra.ShoeBox(room_dim, fs=fs, absorption=absorption, max_order=max_order)
def one_loop(args): global parameters import time import numpy np = numpy import pyroomacoustics pra = pyroomacoustics import os import sys sys.path.append(parameters["base_dir"]) from auxiva_pca import auxiva_pca, pca_separation from five import five from ive import ogive from overiva import overiva from pyroomacoustics.bss.common import projection_back from room_builder import callback_noise_mixer, random_room_builder # import samples helper routine from get_data import samples_dir sys.path.append(os.path.join(parameters['base_dir'], samples_dir)) from generate_samples import wav_read_center n_targets, n_interferers, n_mics, sinr, wav_files, room_seed, seed = args # this is the underdetermined case. We don't do that. if n_mics < n_targets: return [] # set MKL to only use one thread if present try: import mkl mkl.set_num_threads(1) except ImportError: pass # set the RNG seed rng_state = np.random.get_state() np.random.seed(seed) # STFT parameters framesize = parameters["stft_params"]["framesize"] hop = parameters["stft_params"]["hop"] if parameters["stft_params"]["window"] == "hann": win_a = pra.hamming(framesize) else: # default is Hann win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, hop) # Generate the audio signals # get the simulation parameters from the json file # Simulation parameters sources_var = np.ones(n_targets) # total number of sources n_sources = n_targets + n_interferers # Read the signals wav_files = [os.path.join(parameters["base_dir"], fn) for fn in wav_files] signals = wav_read_center(wav_files[:n_sources], seed=123) # Get a random room room, rt60 = random_room_builder(signals, n_mics, seed=room_seed, **parameters["room_params"]) premix = room.simulate(return_premix=True) # mix the signal n_samples = premix.shape[2] mix = callback_noise_mixer( premix, sinr=sinr, diffuse_ratio=parameters["sinr_diffuse_ratio"], n_src=n_sources, n_tgt=n_targets, tgt_std=np.sqrt(sources_var), ref_mic=parameters["ref_mic"], ) # sum up the background # shape (n_mics, n_samples) background = np.sum(premix[n_targets:n_sources, :, :], axis=0) # shape (n_targets+1, n_samples, n_mics) ref = np.zeros((n_targets + 1, premix.shape[2], premix.shape[1]), dtype=premix.dtype) ref[:n_targets, :, :] = premix[:n_targets, :, :].swapaxes(1, 2) ref[n_targets, :, :] = background.T synth = np.zeros_like(ref) # START BSS ########### # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a) X_mics = X_all[:, :, :n_mics] # convergence monitoring callback def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref, framesize, win_s, algo_name): t_in = time.perf_counter() # projection back z = projection_back(Y, X[:, :, 0]) Y = Y * np.conj(z[None, :, :]) from mir_eval.separation import bss_eval_sources if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, hop, win=win_s)[:, None] else: y = pra.transform.synthesis(Y, framesize, hop, win=win_s) if algo_name not in parameters["overdet_algos"]: new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] m = np.minimum(y.shape[0] - hop, ref.shape[1]) synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T synth[n_targets, :m, 0] = y[hop:m + hop, 0] sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0], synth[:, :m, 0]) SDR.append(sdr[:n_targets].tolist()) SIR.append(sir[:n_targets].tolist()) t_out = time.perf_counter() eval_time.append(t_out - t_in) # store results in a list, one entry per algorithm results = [] # compute the initial values of SDR/SIR init_sdr = [] init_sir = [] convergence_callback(X_mics, X_mics, n_targets, init_sdr, init_sir, [], ref, framesize, win_s, "init") for full_name, params in parameters["algorithm_kwargs"].items(): name = params["algo"] kwargs = params["kwargs"] if name == "auxiva_pca" and n_targets == 1: # PCA doesn't work for single source scenario continue elif name in ["ogive", "five"] and n_targets != 1: # OGIVE is only for single target continue results.append({ "algorithm": full_name, "n_targets": n_targets, "n_interferers": n_interferers, "n_mics": n_mics, "rt60": rt60, "sinr": sinr, "seed": seed, "sdr": [], "sir": [], # to store the result "runtime": np.nan, "eval_time": np.nan, "n_samples": n_samples, }) # this is used to keep track of time spent in the evaluation callback eval_time = [] def cb(Y): convergence_callback( Y, X_mics, n_targets, results[-1]["sdr"], results[-1]["sir"], eval_time, ref, framesize, win_s, name, ) # avoid one computation by using the initial values of sdr/sir results[-1]["sdr"].append(init_sdr[0]) results[-1]["sir"].append(init_sir[0]) try: t_start = time.perf_counter() if name == "auxiva": # Run AuxIVA # this calls full IVA when `n_src` is not provided Y = overiva(X_mics, callback=cb, **kwargs) elif name == "auxiva_pca": # Run AuxIVA Y = auxiva_pca(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "overiva": # Run BlinkIVA Y = overiva(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "overiva2": # Run BlinkIVA Y = overiva(X_mics, n_src=n_targets, callback=cb, proj_back=False, **kwargs) elif name == "five": # Run AuxIVE Y = five(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "ilrma": # Run AuxIVA Y = pra.bss.ilrma(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "ogive": # Run OGIVE Y = ogive(X_mics, callback=cb, proj_back=False, **kwargs) elif name == "pca": # Run PCA Y = pca_separation(X_mics, n_src=n_targets) else: continue t_finish = time.perf_counter() # The last evaluation convergence_callback( Y, X_mics, n_targets, results[-1]["sdr"], results[-1]["sir"], [], ref, framesize, win_s, name, ) results[-1]["eval_time"] = np.sum(eval_time) results[-1][ "runtime"] = t_finish - t_start - results[-1]["eval_time"] except: import os, json pid = os.getpid() # report last sdr/sir as np.nan results[-1]["sdr"].append(np.nan) results[-1]["sir"].append(np.nan) # now write the problem to file fn_err = os.path.join(parameters["_results_dir"], "error_{}.json".format(pid)) with open(fn_err, "a") as f: f.write(json.dumps(results[-1], indent=4)) # skip to next iteration continue # restore RNG former state np.random.set_state(rng_state) return results
def createroom(amBird, saBird, noises, mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim, absorption, max_order, n_mics, angle): ###########IVA params set################ np.random.seed(10) # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window( win_a, framesize // 2) # algorithm parameters # param ogive ogive_mu = 0.1 ogive_update = "switching" ogive_iter = 2000 # dB, this is the SNR with respect to a single target source and microphone self-noise ########separation params################## algo = algo_choices[0] no_cb = True save = True n_iter = 60 dist = "gauss" # guass or laplace ########paramas set################## fs = 44100 snr=60 sinr=10 n_sources = 2+3 n_mics = n_mics n_sources_target = 2 assert n_sources_target <= n_mics, "More sources than microphones is not supported" # set the source powers, the first one is half source_std = np.ones(n_sources_target) # position # room size room_dim = roomdim # micro position rot = angle offset = np.pi-rot/2 mic_locs = semi_circle_layout( mic_p, rot, mic_d, n_mics, rot=offset) # micro2 # target position target_locs = np.transpose([[7, 10, 6], [9, 16, 6]]) #interfere position interferer_locs = random_layout([16, 2, 6], 3, offset=[5, 18, 3], seed=1) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) # source_locs = target_locs # audio input wav_files = [amBird, saBird, noises[0],noises[1],noises[2]] signals = wav_read_center(wav_files, seed=123) # create room room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption, max_order=max_order, air_absorption=True, humidity=50) # add source for sig, loc in zip(signals, source_locs.T): room.add_source(loc, signal=sig) # add micro room.add_microphone_array( pra.MicrophoneArray(mic_locs, fs=room.fs)) # # draw # x = mic_locs[:2][0] # y = mic_locs[:2][1] # import matplotlib.pyplot as plt # plt.scatter(x,y) # plt.axis('equal') # plt.xlim([0,20]) # plt.ylim([0,20]) # x1 = source_locs[:2][0] # y1 = source_locs[:2][1] # plt.scatter(x1,y1) # plt.xlim([0,20]) # plt.ylim([0,20]) # plt.axis('equal') # x1 = interferer_locs[:2][0] # y1 = interferer_locs[:2][1] # plt.scatter(x1,y1) # plt.xlim([0,20]) # plt.ylim([0,20]) # plt.axis('equal') # plt.show() # callback_mix_kwargs = { # "snr": SNR, # "sir": SIR, # "n_src": n_sources, # "n_tgt": n_sources_target, # "src_std": source_std, # "ref_mic": 0, # } # power set premix = room.simulate(return_premix=True) n_samples = premix.shape[2] # Normalize the signals so that they all have unit variance at the reference microphone ref_mic=0 p_mic_ref = np.std(premix[:, ref_mic, :], axis=1) premix /= p_mic_ref[:, None, None] sources_var = np.ones(n_sources_target) # # scale to pre-defined variance premix[:n_sources_target, :, :] *= np.sqrt(sources_var[:, None, None]) # compute noise variance sigma_n = np.sqrt(10 ** (-snr / 10) * np.sum(sources_var)) # now compute the power of interference signal needed to achieve desired SINR sigma_i = np.sqrt( np.maximum(0, 10 ** (-sinr / 10) * np.sum(sources_var) - sigma_n ** 2) / (n_sources-n_sources_target) ) premix[n_sources_target:, :, :] *= sigma_i background = ( np.sum(premix[n_sources_target:, :, :], axis=0) ) # Mix down the recorded signals mix = np.sum(premix, axis=0) mics_signals = room.mic_array.signals print("Simulation done.") # Monitor Convergence ref = np.zeros( (n_sources_target+1, premix.shape[2], premix.shape[1]), dtype=premix.dtype) ref[:n_sources_target, :, :] = premix[:n_sources_target, :, :].swapaxes(1, 2) ref[n_sources_target, :, :] = background.T convergence_callback = None # START BSS X_all = pra.transform.analysis( mix.T, framesize, framesize // 2, win=win_a ).astype(np.complex128) X_mics = X_all[:, :, :n_mics] # Run BSS if algo == "auxiva": # Run AuxIVA Y = overiva( X_mics, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "auxiva_pca": # Run AuxIVA Y = auxiva_pca( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "overiva": # Run AuxIVA Y = overiva( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ilrma": # Run AuxIVA Y = pra.bss.ilrma( X_mics, n_iter=n_iter, n_components=2, proj_back=True, callback=convergence_callback, ) elif algo == "ogive": # Run OGIVE Y = ogive( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ogive_matlab": # Run OGIVE Y = ogive_matlab_wrapper( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, init_eig=(init == init_choices[1]), callback=convergence_callback, ) else: raise ValueError("No such algorithm {}".format(algo)) # Run iSTFT if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[ :, None ] y = y.astype(np.float64) else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype( np.float64 ) # If some of the output are uniformly zero, just add a bit of noise to compare for k in range(y.shape[1]): if np.sum(np.abs(y[:, k])) < 1e-10: y[:, k] = np.random.randn(y.shape[0]) * 1e-10 # For conventional methods of BSS, reorder the signals by decreasing power if algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] # Compare SIR m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2: m + framesize // 2, :n_sources_target].T, ) # reorder the vector of reconstructed signals y_hat = y[:, perm] SDRList.append(sdr) SIRList.append(sir) print("SDR:", sdr) print("SIR:", sir) ####save mix and separation ####### if save: from scipy.io import wavfile wavfile.write( "birdmix.wav", room.fs, (pra.normalize(mics_signals, bits=16).astype( np.int16).T)[:, 0], ) for i, sig in enumerate(y_hat.T): wavfile.write( "birdsep{}.wav".format(i + 1), room.fs, pra.normalize(sig, bits=16).astype(np.int16).T, )
def random_room_builder( wav_files: List[str], n_mics: int, mic_delta: Optional[float] = None, fs: float = 16000, t60_interval: Tuple[float, float] = (0.150, 0.500), room_width_interval: Tuple[float, float] = (6, 10), room_height_interval: Tuple[float, float] = (2.8, 4.5), source_zone_height: Tuple[float, float] = [1.0, 2.0], guard_zone_width: float = 0.5, seed: Optional[int] = None, ): """ This function creates a random room within some parameters. The microphone array is circular with the distance between neighboring elements set to the maximal distance avoiding spatial aliasing. Parameters ---------- wav_files: list of numpy.ndarray A list of audio signals for each source n_mics: int The number of microphones in the microphone array mic_delta: float, optional The distance between neighboring microphones in the array fs: float, optional The sampling frequency for the simulation t60_interval: (float, float), optional An interval where to pick the reverberation time room_width_interval: (float, float), optional An interval where to pick the room horizontal length/width room_height_interval: (float, float), optional An interval where to pick the room vertical length source_zone_height: (float, float), optional The vertical interval where sources and microphones are allowed guard_zone_width: float The minimum distance between a vertical wall and a source/microphone Returns ------- ShoeBox object A randomly generated room according to the provided parameters float The measured T60 reverberation time of the room created """ # save current numpy RNG state and set a known seed if seed is not None: rng_state = np.random.get_state() np.random.seed(seed) n_sources = len(wav_files) # sanity checks assert source_zone_height[0] > 0 assert source_zone_height[1] < room_height_interval[0] assert source_zone_height[0] <= source_zone_height[1] assert 2 * guard_zone_width < room_width_interval[1] - room_width_interval[0] def random_location( room_dim, n, ref_point=None, min_distance=None, max_distance=None ): """ Helper function to pick a location in the room """ width = room_dim[0] - 2 * guard_zone_width width_intercept = guard_zone_width depth = room_dim[1] - 2 * guard_zone_width depth_intercept = guard_zone_width height = np.diff(source_zone_height)[0] height_intercept = source_zone_height[0] locs = rand(3, n) locs[0, :] = locs[0, :] * width + width_intercept locs[1, :] = locs[1, :] * depth + depth_intercept locs[2, :] = locs[2, :] * height + height_intercept if ref_point is not None: # Check condition d = np.linalg.norm(locs - ref_point, axis=0) if min_distance is not None and max_distance is not None: redo = np.where(np.logical_or(d < min_distance, max_distance < d))[0] elif min_distance is not None: redo = np.where(d < min_distance)[0] elif max_distance is not None: redo = np.where(d > max_distance)[0] else: redo = [] # Recursively call this function on sources to redraw if len(redo) > 0: locs[:, redo] = random_location( room_dim, len(redo), ref_point=ref_point, min_distance=min_distance, max_distance=max_distance, ) return locs c = pra.constants.get("c") # Create the room # Sometimes the room dimension and required T60 are not compatible, then # we just try again with new random values retry = True while retry: try: room_dim = np.array( [ rand() * np.diff(room_width_interval)[0] + room_width_interval[0], rand() * np.diff(room_width_interval)[0] + room_width_interval[0], rand() * np.diff(room_height_interval)[0] + room_height_interval[0], ] ) t60 = rand() * np.diff(t60_interval)[0] + t60_interval[0] reflection, max_order = inv_sabine(t60, room_dim, c) retry = False except ValueError: pass # Create the room based on the random parameters room = pra.ShoeBox(room_dim, fs=fs, absorption=1 - reflection, max_order=max_order) # The critical distance # https://en.wikipedia.org/wiki/Critical_distance d_critical = 0.057 * np.sqrt(np.prod(room_dim) / t60) # default inter-mic distance is set according to nyquist criterion # i.e. 1/2 wavelength corresponding to fs / 2 at given speed of sound if mic_delta is None: mic_delta = 0.5 * (c / (0.5 * fs)) # the microphone array is uniformly circular with the distance between # neighboring elements of mic_delta mic_center = random_location(room_dim, 1) mic_rotation = rand() * 2 * np.pi mic_radius = 0.5 * mic_delta / np.sin(np.pi / n_mics) mic_array = pra.MicrophoneArray( np.vstack( ( pra.circular_2D_array( mic_center[:2, 0], n_mics, mic_rotation, mic_radius ), mic_center[2, 0] * np.ones(n_mics), ) ), room.fs, ) room.add_microphone_array(mic_array) # Now we will get the sources at random source_locs = [] # Choose the target location at least as far as the critical distance # Then the other sources, yet one further meter away source_locs = random_location( room_dim, n_sources, ref_point=mic_center, min_distance=d_critical, max_distance=d_critical + 1, ) source_signals = wav_read_center(wav_files, seed=123) for s, signal in enumerate(source_signals): room.add_source(source_locs[:, s], signal=signal) # pre-compute the impulse responses room.compute_rir() # average the t60 of all the RIRs t60_actual = np.mean( [ pra.experimental.measure_rt60(room.rir[0][0], room.fs) for mic_list in room.rir for rir in mic_list ] ) # save the parameters room_params = { "shoebox_params": { "room_dim": room_dim.tolist(), "fs": fs, "absorption": 1 - reflection, "max_order": max_order, }, "mics": mic_array.R.T.tolist(), "sources": {"locs": source_locs.T.tolist(), "signals": wav_files}, "t60": t60_actual.tolist(), } # restore numpy RNG former state if seed is not None: np.random.set_state(rng_state) return room, room_params
def one_loop(args): import numpy np = numpy import pyroomacoustics pra = pyroomacoustics import sys sys.path.append(parameters['base_dir']) from routines import semi_circle_layout, random_layout, gm_layout, grid_layout from blinkiva import blinkiva from blinkiva_gauss import blinkiva_gauss from generate_samples import wav_read_center n_targets, n_mics, rt60, sinr, wav_files, seed = args # this is the underdetermined case. We don't do that. if n_mics < n_targets: return [] # set MKL to only use one thread if present try: import mkl mkl.set_num_threads(1) except ImportError: pass # set the RNG seed rng_state = np.random.get_state() np.random.seed(seed) # STFT parameters framesize = parameters['stft_params']['framesize'] win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2) # Generate the audio signals # get the simulation parameters from the json file # Simulation parameters n_repeat = parameters['n_repeat'] fs = parameters['fs'] snr = parameters['snr'] n_interferers = parameters['n_interferers'] n_blinkies = parameters['n_blinkies'] ref_mic = parameters['ref_mic'] room_dim = np.array(parameters['room_dim']) sources_var = np.ones(n_targets) sources_var[0] = parameters['weak_source_var'] # total number of sources n_sources = n_interferers + n_targets # Geometry of the room and location of sources and microphones interferer_locs = random_layout([3., 5.5, 1.5], n_interferers, offset=[6.5, 1., 0.5], seed=1) target_locs = semi_circle_layout( [4.1, 3.755, 1.2], np.pi / 1.5, 2., # 120 degrees arc, 2 meters away n_targets, rot=0.743 * np.pi, ) source_locs = np.concatenate((target_locs, interferer_locs), axis=1) if parameters['blinky_geometry'] == 'gm': ''' Normally distributed in the vicinity of each source ''' blinky_locs = gm_layout( n_blinkies, target_locs - np.c_[[0., 0., 0.5]], std=[0.4, 0.4, 0.05], seed=987, ) elif parameters['blinky_geometry'] == 'grid': ''' Placed on a regular grid, with a little bit of noise added ''' blinky_locs = grid_layout([3.,5.5], n_blinkies, offset=[1., 1., 0.7], seed=987,) else: ''' default is semi-circular ''' blinky_locs = semi_circle_layout( [4.1, 3.755, 1.1], np.pi, 3.5, n_blinkies, rot=0.743 * np.pi - np.pi / 4, seed=987, ) mic_locs = np.vstack(( pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02), 1.2 * np.ones((1, n_mics)), )) all_locs = np.concatenate((mic_locs, blinky_locs), axis=1) signals = wav_read_center(wav_files, seed=123) # Create the room itself room = pra.ShoeBox(room_dim, fs=fs, absorption=parameters['rt60_list'][rt60]['absorption'], max_order=parameters['rt60_list'][rt60]['max_order'], ) # Place all the sound sources for sig, loc in zip(signals[-n_sources:,:], source_locs.T,): room.add_source(loc, signal=sig) assert len(room.sources) == n_sources, 'Number of signals ({}) doesn''t match number of sources ({})'.format(signals.shape[0], n_sources) # Place the microphone array room.add_microphone_array( pra.MicrophoneArray(all_locs, fs=room.fs) ) # compute RIRs room.compute_rir() # Run the simulation premix = room.simulate(return_premix=True) # Normalize the signals so that they all have unit # variance at the reference microphone p_mic_ref = np.std(premix[:,ref_mic,:], axis=1) premix /= p_mic_ref[:,None,None] # scale to pre-defined variance premix[:n_targets,:,:] *= np.sqrt(sources_var[:,None,None]) # compute noise variance sigma_n = np.sqrt(10 ** (- snr / 10) * np.sum(sources_var)) # now compute the power of interference signal needed to achieve desired SINR sigma_i = np.sqrt( np.maximum(0, 10 ** (- sinr / 10) * np.sum(sources_var) - sigma_n ** 2) / n_interferers ) premix[n_targets:,:,:] *= sigma_i # Mix down the recorded signals mix = np.sum(premix, axis=0) + sigma_n * np.random.randn(*premix.shape[1:]) ref = np.moveaxis(premix, 1, 2) # START BSS ########### # pre-emphasis on blinky signals if parameters['use_pre_emphasis']: mix[n_mics:,:-1] = np.diff(mix[n_mics:,:], axis=1) mix[n_mics:,-1] = 0. # shape: (n_frames, n_freq, n_mics) X_all = pra.transform.analysis( mix.T, framesize, framesize // 2, win=win_a, ) X_mics = X_all[:,:,:n_mics] U_blinky = np.sum(np.abs(X_all[:,:,n_mics:]) ** 2, axis=1) # shape: (n_frames, n_blinkies) # convergence monitoring callback def convergence_callback(Y, n_targets, SDR, SIR, ref, framesize, win_s, algo_name): from mir_eval.separation import bss_eval_sources y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s,) if not algo_name.startswith('blinkiva'): new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:,new_ord] m = np.minimum(y.shape[0]-framesize//2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_targets,:m,0], y[framesize//2:m+framesize//2,:n_targets].T, ) SDR.append(sdr.tolist()) SIR.append(sir.tolist()) # store results in a list, one entry per algorithm results = [] for name, kwargs in parameters['algorithm_kwargs'].items(): results.append({ 'algorithm' : name, 'n_targets' : n_targets, 'n_mics' : n_mics, 'rt60' : rt60, 'sinr' : sinr, 'seed' : seed, 'sdr' : [], 'sir' : [], # to store the result }) if parameters['monitor_convergence']: cb = lambda Y : convergence_callback( Y, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) else: cb = None # In that case, we still want to capture the initial values of SDR/SIR convergence_callback( X_mics, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) if name == 'auxiva': # Run AuxIVA Y = pra.bss.auxiva( X_mics, callback=cb, **kwargs, ) elif name == 'blinkiva': # Run BlinkIVA Y = blinkiva( X_mics, U_blinky, n_src=n_targets, callback=cb, **kwargs, ) elif name == 'blinkiva-gauss': # Run BlinkIVA Y = blinkiva_gauss( X_mics, U_blinky, n_src=n_targets, callback=cb, **kwargs, ) else: continue # The last evaluation convergence_callback( Y, n_targets, results[-1]['sdr'], results[-1]['sir'], ref, framesize, win_s, name, ) # restore RNG former state np.random.set_state(rng_state) return results
def line_createroom(Bird1,Bird2,Bird3,callback_mix): roomdim = np.array([20, 20, 10]) max_order = 17 absorption = 0.9 mic_p = [13, 10, 3.5] #mic_center_point mic_d = 0.015 #mic_dinstance sour_p = [7,10,6] #source_postion sour_d = 5 #source_distance n_mics = 4 #mic_number n_sources = 3 mic_rot = np.pi/2 bird_rot = np.pi/2 ### params setting ### np.random.seed(10) # STFT parameters framesize = 4096 win_a = pra.hann(framesize) win_s = pra.transform.compute_synthesis_window( win_a, framesize // 2) ogive_mu = 0.1 ogive_update = "switching" ogive_iter = 2000 SIR = 10 # dB SNR = (60) algo = algo_choices[0] no_cb = True save = True n_iter = 60 dist = "gauss" # laplace fs = 44100 n_sources_target = 3 assert n_sources_target <= n_mics, "More sources than microphones is not supported" source_std = np.ones(n_sources_target) # room size room_dim = roomdim # micro position mic_locs = semi_line_layout(mic_p,mic_rot,mic_d,n_mics) # target position source_locs = semi_line_layout(sour_p,bird_rot,sour_d,n_sources) source_locs[0][0],source_locs[0][2] = source_locs[0][0]+0,source_locs[0][2]+0##push value # target_locs = np.transpose([[7, 10, 6], [9, 16, 6]]) # audio loaded wav_files = [Bird1,Bird2,Bird3] signals = wav_read_center(wav_files, seed=123) #create room room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption, max_order=max_order, air_absorption=True, humidity=50) # add source for sig, loc in zip(signals, source_locs.T): room.add_source(loc, signal=sig) # add micro room.add_microphone_array( pra.MicrophoneArray(mic_locs, fs=room.fs)) callback_mix_kwargs = { "snr": SNR, "sir": SIR, "n_src": n_sources, "n_tgt": n_sources_target, "src_std": source_std, "ref_mic": 0, } # # draw # x = mic_locs[:2][0] # y = mic_locs[:2][1] # import matplotlib.pyplot as plt # plt.scatter(x,y) # plt.axis('equal') # plt.xlim([0,20]) # plt.ylim([0,20]) # x1 = source_locs[:2][0] # y1 = source_locs[:2][1] # plt.scatter(x1,y1) # plt.xlim([0,20]) # plt.ylim([0,20]) # plt.axis('equal') # plt.show() # Run the simulation separate_recordings = room.simulate( callback_mix=callback_mix, callback_mix_kwargs=callback_mix_kwargs, return_premix=True, ) mics_signals = room.mic_array.signals print("line Simulation done.") # Monitor Convergence ref = np.moveaxis(separate_recordings, 1, 2) if ref.shape[0] < n_mics: ref = np.concatenate( (ref, np.random.randn(n_mics - ref.shape[0], ref.shape[1], ref.shape[2])), axis=0, ) convergence_callback = None X_all = pra.transform.analysis( mics_signals.T, framesize, framesize // 2, win=win_a ).astype(np.complex128) X_mics = X_all[:, :, :n_mics] tic = time.perf_counter() if algo == "auxiva": # Run AuxIVA Y = overiva( X_mics, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "auxiva_pca": # Run AuxIVA Y = auxiva_pca( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, callback=convergence_callback, ) elif algo == "overiva": # Run AuxIVA Y = overiva( X_mics, n_src=n_sources_target, n_iter=n_iter, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ilrma": # Run AuxIVA Y = pra.bss.ilrma( X_mics, n_iter=n_iter, n_components=2, proj_back=True, callback=convergence_callback, ) elif algo == "ogive": # Run OGIVE Y = ogive( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, model=dist, init_eig=(init == init_choices[1]), callback=convergence_callback, ) elif algo == "ogive_matlab": # Run OGIVE Y = ogive_matlab_wrapper( X_mics, n_iter=ogive_iter, step_size=ogive_mu, update=ogive_update, proj_back=True, init_eig=(init == init_choices[1]), callback=convergence_callback, ) else: raise ValueError("No such algorithm {}".format(algo)) # Run iSTFT if Y.shape[2] == 1: y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[ :, None ] y = y.astype(np.float64) else: y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype( np.float64 ) # If some of the output are uniformly zero, just add a bit of noise to compare for k in range(y.shape[1]): if np.sum(np.abs(y[:, k])) < 1e-10: y[:, k] = np.random.randn(y.shape[0]) * 1e-10 # For conventional methods of BSS, reorder the signals by decreasing power if algo != "blinkiva": new_ord = np.argsort(np.std(y, axis=0))[::-1] y = y[:, new_ord] # Compare SIR m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1]) sdr, sir, sar, perm = bss_eval_sources( ref[:n_sources_target, :m, 0], y[framesize // 2: m + framesize // 2, :n_sources_target].T, ) # reorder the vector of reconstructed signals y_hat = y[:, perm] #return mixdata = pra.normalize(mics_signals, bits=16).astype(np.int16).T separationdata = [] for sig in y_hat.T: separationdata.append(pra.normalize(sig, bits=16).astype(np.int16).T) print("sdr",sdr) return sdr,sir,mixdata,separationdata #wavefile(mixdata) wavefile(separationdata[0]) wavefile(separationdata[1])