def simulateSound(room_dim, R_loc, source_locations, source_audios, rt60, materials=None, max_order=None): # source_audios: array of numpy array # L: max of all audios. Zero padding at the end # return (all_channel_data (C, L), groundtruth_with_reverb (N, C, L), groundtruth_data (N, C, L), angles (N) if materials is not None: (ceiling, east, west, north, south, floor) = materials room = pra.ShoeBox( room_dim, fs=fs, materials=pra.make_materials( ceiling=ceiling, floor=floor, east=east, west=west, north=north, south=south, ), max_order=max_order ) else: try: e_absorption, max_order_rt60 = pra.inverse_sabine(rt60, room_dim) except ValueError: e_absorption, max_order_rt60 = pra.inverse_sabine(1, room_dim) room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order_rt60) R = generate_mic_array(R_MIC, N_MIC, R_loc) room.add_microphone_array(pra.MicrophoneArray(R, room.fs)) length = max([len(source_audios[i]) for i in range(len(source_audios))]) for i in range(len(source_audios)): source_audios[i] = np.pad(source_audios[i], (0, length - len(source_audios[i])), 'constant') for i in range(len(source_locations)): room.add_source(source_locations[i], signal=source_audios[i], delay=0) room.image_source_model() premix_w_reverb = room.simulate(return_premix=True) mixed = room.mic_array.signals # groundtruth room_gt = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(1.0), max_order=0) # R_gt=generate_mic_array(R_MIC, N_MIC, R_loc) R_gt = generate_mic_array(0, 1, R_loc) room_gt.add_microphone_array(pra.MicrophoneArray(R_gt, room.fs)) for i in range(len(source_locations)): room_gt.add_source(source_locations[i], signal=source_audios[i], delay=0) room_gt.compute_rir() room_gt.image_source_model() premix = room_gt.simulate(return_premix=True) return (mixed, premix_w_reverb, premix, R)
def get_rir(audio_signal, fs, rt60=0.2, room_dim=[60, 60, 10], room_source=[30, 30, 4.5], mic_pos=[30, 10, 7], T=19, D=0.01, S=35): import pyroomacoustics as pra import numpy as np c = 1449.2 + 4.6 * T - 0.055 * T**2 + 0.0029 * T**3 + (1.34 - 0.01 * T) * ( S - 35) + 0.016 * D e_absorption, max_order = pra.inverse_sabine(rt60, room_dim, c=c) room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), ray_tracing=False, max_order=3, air_absorption=False) room.add_source(room_source, signal=audio_signal, delay=1.0) mic_locs = np.c_[mic_pos, # mic 1 ] room.add_microphone_array(mic_locs) room.compute_rir() rir = room.rir[0][0] return rir
def simroom(room_dim, src_loc, mic_locs): parser = argparse.ArgumentParser( description= "Simulates and adds reverberation to a dry sound sample. Saves it into `./examples/samples`." ) parser.add_argument( "--method", "-m", choices=methods, default=methods[0], help="Simulation method to use", ) args = parser.parse_args() # The desired reverberation time and dimensions of the room rt60_tgt = 0.3 # seconds # meters # import a mono wavfile as the source signal # the sampling frequency should match that of the room fs, audio = wavfile.read("examples/samples/guitar_16k.wav") # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim) # Create the room room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order) room.add_source(src_loc, signal=audio, delay=0.5) # finally place the array in the room room.add_microphone_array(mic_locs) # Run the simulation (this will also build the RIR automatically) room.simulate() room.mic_array.to_wav( "examples/samples/guitar_16k_reverb_{}.wav".format(args.method), norm=True, bitdepth=np.int16, ) """ detect_peaks(room.mic_array.signals[0, :], mph=0, mpd=1000, threshold=10, show=True) detect_peaks(room.mic_array.signals[1, :], mph=0, mpd=1000, threshold=10, show=True) detect_peaks(room.mic_array.signals[2, :], mph=0, mpd=1000, threshold=10, show=True) print(max(room.mic_array.signals[0, :])) print(max(room.mic_array.signals[1, :])) print(max(room.mic_array.signals[2, :])) """ return np.array([ max(room.mic_array.signals[0, :]), max(room.mic_array.signals[1, :]), max(room.mic_array.signals[2, :]) ])
def room_simulate(num_mic, mic_array, room_type): room_list = { 'star_3': [8.3, 3.4, 2.5], 'room_819': [7.9, 7.0, 2.7], 'room_409': [7.0, 4.2, 2.7] } room = room_list[room_type] dim_x, dim_y, dim_z = room[0], room[1], room[2] sr = 16000 rt60 = 0.3 e_absorption, max_order = pra.inverse_sabine(rt60, [dim_x, dim_y, dim_z]) print(e_absorption, max_order) num_direction = 12 mic_radius = 0.04 #0.03231 testing #mic_radius = np.random.uniform(low=0.025,high=0.035) mic_x_radius = 0.0637 mic_y_radius = 0.0484 mic_lin = 0.04 room = pra.ShoeBox(room, fs=sr, materials=pra.Material(e_absorption), max_order=max_order) mic_center = np.array([dim_x / 2, dim_y / 2, 0.69]) thetas = np.arange(num_mic) / num_mic * 2 * np.pi theta_source = np.arange(num_direction) / num_direction * 2 * np.pi if mic_array == 'circle': center_to_mic = np.stack( [np.cos(thetas), np.sin(thetas), np.zeros_like(thetas)], 0) * mic_radius elif mic_array == 'ellipse': center_to_mic = np.stack([ mic_x_radius * np.cos(thetas), mic_y_radius * np.sin(thetas), np.zeros_like(thetas) ], 0) elif mic_array == 'linear': linear = np.arange(num_mic) * mic_lin linear = linear - np.max(linear) / 2 center_to_mic = np.stack( [linear, np.zeros_like(linear), np.zeros_like(linear)], 0) mic_positions = mic_center[:, None] + center_to_mic room.add_microphone_array(mic_positions) far_field_distance = 1 thetas = np.arange(num_direction) / num_direction * 2 * np.pi center_to_source = np.stack([ np.cos(theta_source), np.sin(theta_source), np.zeros_like(theta_source) ], -1) * far_field_distance source_positions = mic_center[None, :] + center_to_source return room, source_positions
def make_room(room_size, source_location, mic_array_location, rt60, sample_rate=16000): e_absorption, max_order = pra.inverse_sabine(rt60, room_size) r = pra.ShoeBox(room_size, fs=sample_rate, materials=pra.Material(e_absorption), max_order=max_order) r.add_microphone_array(mic_array_location) r.add_source(source_location) return r
def create_room(room_size, mics_loc, s1_loc, s2_loc, fs, T60=None): if T60 is not None: absorption, max_order = pra.inverse_sabine(T60, room_size) room = pra.room.ShoeBox(room_size, fs=fs, t0=0, absorption=absorption, max_order=max_order) else: room = pra.room.ShoeBox(room_size, fs=fs, t0=0, max_order=0) room.add_source(s1_loc) room.add_source(s2_loc) room.add_microphone_array(pra.MicrophoneArray(np.array(mics_loc).T, fs)) room.compute_rir() return room
def func(cat1, cat2, cat2fn): rt60_tgt = 0.3 # seconds room_dim = [10, 10, 3] # meters # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim) # microphone locations mic_locs = np.c_[[4.9, 4, 1], [5.1, 4.1, 1], [5.1, 3.9, 1], [5, 4, 1.2], [5, 4, 0.8], [4.9, 3.9, 1.2], [5.1, 4.1, 0.8]] for i in range(100): rand1 = np.random.randint(0, len(cat2fn[cat1])) rand2 = np.random.randint(0, len(cat2fn[cat2])) fn1 = cat2fn[cat1][rand1] fn2 = cat2fn[cat2][rand2] fs, audio1 = wavfile.read("inputs/ESC-50-master/audio/" + fn1) fs, audio2 = wavfile.read("inputs/ESC-50-master/audio/" + fn2) min_len = min(audio1.shape[0], audio2.shape[0]) audio1 = audio1[:min_len] audio2 = audio2[:min_len] for i in range(5): room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order) values = np.random.random(6) * 2.0 - 1.0 loc1 = [values[0] + 5, values[1] + 5, values[2] + 1.5] loc2 = [values[3] + 5, values[4] + 5, values[5] + 1.5] room.add_source(loc1, signal=audio1, delay=0.) room.add_source(loc2, signal=audio2, delay=0.) room.add_microphone_array(mic_locs) room.simulate() filename = fn1 + " " + fn2 + " " + str(loc1) + str(loc2) + ".wav" room.mic_array.to_wav( f"outputs/combined/" + filename, norm=True, bitdepth=np.int16, )
def test_rt60_theory_single_band(): # The desired reverberation time and dimensions of the room rt60_tgt = 0.3 # seconds # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim) # Create the room room = pra.ShoeBox( room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order ) rt60_sabine = pra.rt60_sabine(S, V, e_absorption, 0.0, room.c) assert (rt60_sabine - room.rt60_theory(formula="sabine")) < eps rt60_eyring = pra.rt60_eyring(S, V, e_absorption, 0.0, room.c) assert (rt60_eyring - room.rt60_theory(formula="eyring")) < eps
def build_room(dimensions, source, mic_array, rt60, fs, max_order_user): """ This method wraps inside all the necessary steps to build the simulated room :param dimensions: length, width and height of the room :param source: contains the x, y, z location of the sound source :param mic_array: contains the x, y, z vectors of all the microphones :param float rt60: represents the reverberation time of the room :param int fs: represents the sampling frequency used for the generation of signals :param int max_order_user: represents the maximum order of the simulated reflections :return: pyroomacoustics object representing the room and the fractional delay to compensate """ # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60, dimensions) # Building a 'Shoebox' room with the provided dimensions if max_order_user != 0: max_order = max_order_user room = pra.ShoeBox(p=dimensions, fs=fs, absorption=e_absorption, max_order=max_order) # Place the Microphone Array and the Sound Source inside the room mics = pra.MicrophoneArray(mic_array, fs) room.add_microphone_array(mics) room.add_source(source) # Computing the Room Impulse Response at each microphone, for each source room.image_source_model() room.compute_rir() # Getting the fractional delay introduced by the simulation global_delay = pra.constants.get("frac_delay_length") // 2 # room.plot() return room, global_delay
# the sampling frequency should match that of the room fs, audio1 = wavfile.read("inputs/sitar.wav") fs, audio2 = wavfile.read("inputs/piano.wav") audio1 = audio1[1000000:2000000, :1] audio1 = audio1.reshape(audio1.shape[0]) audio1 = audio1 / 5 audio2 = audio2[1000000:2000000, :1] audio2 = audio2.reshape(audio1.shape[0]) print(audio1.shape) print(audio2.shape) # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim) # Create the room room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order) # place the source in the room room.add_source([7, 5, 1], signal=audio1, delay=0.) room.add_source([2, 3, 1], signal=audio2, delay=0.) # define the locations of the microphones mic_locs = np.c_[ # [4.9, 4, 1], # [5.1, 4.1, 1],
def create_rooms(self,N_mutations_per_roomtype,roomtype=0): roomtype=self.room_specs[roomtype] number_rooms=0 for i in range(N_mutations_per_roomtype): ################################################################################################ #random room dimensions room_x=(random.randint(roomtype["min_room_dim"][0]*100,roomtype["max_room_dim"][0]*100)/100) room_y=(random.randint(roomtype["min_room_dim"][1]*100,roomtype["max_room_dim"][1]*100)/100) room_z=(random.randint(roomtype["min_room_dim"][2]*100,roomtype["max_room_dim"][2]*100)/100) room_dim=[room_x,room_y,room_z] ################################################################################################ #random materials wall_materials=random.choice(roomtype["wall_materials"]) random.shuffle(wall_materials) floor_material=random.choice(roomtype["floor_material"]) ceiling_material=random.choice(roomtype["ceiling_material"]) m = pra.make_materials( ceiling=ceiling_material, floor=floor_material, east=wall_materials[0], west=wall_materials[1], north=wall_materials[2], south=wall_materials[3],) ################################################################################################ # create room rt60 = 0.5 # seconds e_absorption, max_order = pra.inverse_sabine(rt60, room_dim) room = pra.ShoeBox(room_dim, fs=self.sr, materials=m, max_order=max_order , air_absorption=True,ray_tracing=False) ################################################################################################ # random mic_positions mic_x=(random.randint(0,int(room_x*100))/100) mic_y=(random.randint(0,int(room_y*100))/100) mic_z=(random.randint(0,int(room_z*100))/100) mic_pos=[mic_x,mic_y,mic_z] room.add_microphone_array(np.array([mic_pos]).T) ################################################################################################ # add sweep source sg=signal_generator(sr=self.sr) logsweep=sg.logsweep(w1=100,w2=30000,T=0.3) sweep_sourcepos=mic_pos if room.is_inside([mic_pos[0],mic_pos[1]+0.1,mic_pos[2]]): sweep_sourcepos=[mic_pos[0],mic_pos[1]+0.1,mic_pos[2]] else: sweep_sourcepos=[mic_pos[0],mic_pos[1]-0.1,mic_pos[2]] scaled = np.int16(logsweep.signal/np.max(np.abs(logsweep.signal)) * 32767*0.5) room.add_source(sweep_sourcepos, signal=scaled, delay=0.02) ################################################################################################ # add noise source fs, audio = wavfile.read("../../data/audio/"+random.choice(os.listdir("../../data/audio/"))) #fs, audio = wavfile.read("../../data/audio/DevNode1_ex46_154.wav") noise_x=(random.randint(0,int(room_x*100))/100) noise_y=(random.randint(0,int(room_y*100))/100) noise_z=(random.randint(0,int(room_z*100))/100) noise_pos=[noise_x,noise_y,noise_z] ## Interpolate to higher fps duration = audio.shape[0] / fs time_old = np.linspace(0, duration, audio.shape[0]) time_new = np.linspace(0, duration, int(audio.shape[0] * 96000 / fs)) interpolator = interpolate.interp1d(time_old, audio.T) audio = interpolator(time_new).T.astype('int16') room.add_source(noise_pos, signal=audio.T[0], delay=0.0) #room.add_source(noise_pos, signal=np.sin(audio.T[1])*0.01, delay=0.0) #room.add_source(noise_pos, signal=np.sin(audio.T[2])*0.01, delay=0.0) #room.add_source(noise_pos, signal=np.sin(audio.T[3])*0.01, delay=0.0) ################################################################################################ # store room self.rooms.append(room) #print("Room "+str(number_rooms)+" created: "+str(room_dim)+" , "+str(wall_materials)+" , "+str(floor_material)+" , "+str(ceiling_material)) number_rooms+=1
def sim(src_loc, mic_locs, noise=False): parser = argparse.ArgumentParser( description= "Simulates and adds reverberation to a dry sound sample. Saves it into `./examples/samples`." ) parser.add_argument( "--method", "-m", choices=methods, default=methods[0], help="Simulation method to use", ) args = parser.parse_args() # The desired reverberation time and dimensions of the room rt60_tgt = 0.166 # seconds, original was 0.3, IRL this would probably be closest by digitally removing reverb room_dim = [10, 10, 3.5] # meters #room_dim = [5, 5, 3] # import a mono wavfile as the source signal # the sampling frequency should match that of the room fs, audio = wavfile.read("examples/samples/guitar_16k.wav") # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim) # Create the room if args.method == "ism": room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order) elif args.method == "hybrid": room = pra.ShoeBox( room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=3, ray_tracing=True, air_absorption=True, ) # place the source in the room #src_loc = [5, 5, 1] room.add_source(src_loc, signal=audio, delay=0.5) # finally place the array in the room room.add_microphone_array(mic_locs) # Run the simulation (this will also build the RIR automatically) room.simulate() room.mic_array.to_wav( "examples/samples/guitar_16k_reverb_{}.wav".format(args.method), norm=True, bitdepth=np.int16, ) print(fs) detect_peaks(room.mic_array.signals[0, :], mph=0, mpd=1000, threshold=10, show=True) detect_peaks(room.mic_array.signals[1, :], mph=0, mpd=1000, threshold=10, show=True) detect_peaks(room.mic_array.signals[2, :], mph=0, mpd=1000, threshold=10, show=True) detect_peaks(room.mic_array.signals[3, :], mph=0, mpd=1000, threshold=10, show=True) sig1 = room.mic_array.signals[0, :] sig2 = room.mic_array.signals[1, :] sig3 = room.mic_array.signals[2, :] sig4 = room.mic_array.signals[3, :] div = 100 #10 is where things start to break down if noise: sig1 = sig1 + np.random.normal(0, max(sig1) / div, sig1.shape) sig2 = sig2 + np.random.normal(0, max(sig2) / div, sig2.shape) sig3 = sig3 + np.random.normal(0, max(sig3) / div, sig3.shape) sig4 = sig4 + np.random.normal(0, max(sig4) / div, sig4.shape) sigs = [sig1, sig2, sig3, sig4] """ for sig in sigs: detect_peaks(sig, mph=0, mpd=1000, threshold=10, show=True) """ I1 = max(sig1) I2 = max(sig2) I3 = max(sig3) I4 = max(sig4) i1 = np.where(sig1 == I1)[0][0] i2 = np.where(sig2 == I2)[0][0] i3 = np.where(sig3 == I3)[0][0] i4 = np.where(sig4 == I4)[0][0] print("SRC's: " + str(src_loc)) print(I1, I2, I3, I4) print(i1, i2, i3, i4) return fs, i1, i2, i3, i4
def cache_rirs(self, ): if os.path.isfile(self.rir_file): data = load_numpy_from_mat(self.rir_file) self.rir_A = data['rir_A'] # shape = (nrir, samples, nmic) self.rir_B = data['rir_B'] # shape = (nrir, samples, nmic) self.nrir = self.rir_A.shape[0] print('Loaded', self.nrir, 'RIRs from', self.rir_file) else: self.nrir = 500 # pre-calculate <nrir> RIRs print('Generating', self.nrir, 'RIRs ...') # define room/shoebox rt60 = 0.250 # define rt60 of the generated RIRs room_dim = np.asarray([6.0, 4.0, 2.5]) # define room dimensions in [m] absorption, max_order = pra.inverse_sabine( rt60, room_dim ) # invert Sabine's formula to obtain the parameters for the ISM simulator # create the room room = pra.ShoeBox(room_dim, fs=self.fs, materials=pra.Material(absorption), max_order=max_order) # place the array in the room array_center = np.asarray([2.5, 1.5, 0.8]) pos = self.micpos.T + array_center[:, np.newaxis] room.add_microphone_array(pos) # add <nrir> sources for region A and B to the room for r in range(self.nrir): # source 1 is randomly placed within region A x = np.random.uniform(1.0, 2.0) y = np.random.uniform(2.0, 3.0) z = np.random.uniform(1.5, 2.0) room.add_source([x, y, z], signal=0, delay=0) # source 2 is randomly placed within region B x = np.random.uniform(3.0, 4.0) y = np.random.uniform(2.0, 3.0) z = np.random.uniform(1.5, 2.0) room.add_source([x, y, z], signal=0, delay=0) # compute all RIRs and extend their length to <samples> t0 = time.time() room.compute_rir() t1 = time.time() print('Generated', self.nrir, 'RIRs in', t1 - t0, 'seconds') self.rir_A = np.zeros((self.nrir, self.samples, self.nmic), dtype=np.float32) self.rir_B = np.zeros((self.nrir, self.samples, self.nmic), dtype=np.float32) for r in range(self.nrir): for m in range(self.nmic): h_A = room.rir[m][r * 2 + 0] n = min(self.samples, h_A.size) self.rir_A[r, :n, m] = h_A[:n] h_B = room.rir[m][r * 2 + 1] n = min(self.samples, h_B.size) self.rir_B[r, :n, m] = h_B[:n] data = { 'rir_A': self.rir_A, 'rir_B': self.rir_B, } save_numpy_to_mat(self.rir_file, data)
def mix_convolutive(S: np.array, sim: dict, data_set: dict) -> Tuple[np.ndarray, np.ndarray, dict]: # Get parameters opts = sim['env_options'] N = S.shape[0] # number of sources M = sim[ 'microphones'] if 'microphones' in sim else N # number of microphones # Some parameters from example on https://pyroomacoustics.readthedocs.io/en/pypi-release/pyroomacoustics.room.html # The desired reverberation time and dimensions of the room # We invert Sabine's formula to obtain the parameters for the ISM simulator e_absorption, max_order = pra.inverse_sabine(opts['rt60'], opts['room_dim']) # Create room room = pra.ShoeBox(opts['room_dim'], fs=data_set['fs'], materials=pra.Material(e_absorption), max_order=max_order, sigma2_awgn=opts['sigma2_awgn']) # Microphone locations for hexagonal array micro_locs = opts['micro_locations'] # Check that required number of microphones has it's locations if micro_locs.shape[1] < M: raise ValueError( '{} microphones required, but only {} microphone locations specified' .format(M, micro_locs.shape[0])) # Select as much microphones as needed R = micro_locs[:, :M] room.add_microphone_array(pra.MicrophoneArray(R, room.fs)) # Place the sources inside the room source_locs = opts['source_locations'] # Check that required number of microphones has it's locations if source_locs.shape[0] < N: raise ValueError( '{} sources required, but only {} source locations specified'. format(N, source_locs.shape[0])) # At first we add empty sources in order to record each source separately for SDR/SIR computation later # (according to https://github.com/LCAV/pyroomacoustics/blob/pypi-release/examples/bss_example.py) for sig, loc in zip(S, source_locs): room.add_source(loc, signal=np.zeros_like(sig)) # Make separate recordings # room.plot_rir() # fig = plt.gcf() # fig.set_size_inches(9, 6) # plt.show() filtered = [] for source, s in zip(room.sources, S): # Set only one of the signals source.signal[:] = s # Simulate & record the signal room.simulate() filtered.append(room.mic_array.signals) # Unset that source's signal (for next iterations) source.signal[:] = 0 filtered = np.array(filtered) # Now mixed signals is just the sum mixed = np.sum(filtered, axis=0) # room.plot(freq=[1000, 2000], img_order=0) # plt.show() return filtered, mixed, {'room_object': room}
def make_noisy(args, thread_id, num_make_utts): spe_utt_ids, noise_utt_ids, diffuse_utt_ids, text_dict, utt2spk_dict, utt2data_dict = load_data(args) audio_parser = AudioParser() spe_utt_size = len(spe_utt_ids) if spe_utt_ids is not None else 0 noise_utt_size = len(noise_utt_ids) if noise_utt_ids is not None else 0 diffuse_utt_size = len(diffuse_utt_ids) if diffuse_utt_ids is not None else 0 noisy_scp_list = [] noisy_utt2spk = [] noisy_text_dict = [] mix2info = [] num_utts = 0 all_angle = 360.0 Targ_Ang_Num = args.num_targ_ang Targ_Ang_Resolution = all_angle / Targ_Ang_Num if Targ_Ang_Num > 0 else 0.0 save_mix = args.save_mix save_reverb = args.save_reverb save_clean = args.save_clean while True: ## Random a room room_x = random.uniform(args.min_room_length, args.max_room_length) room_y = random.uniform(args.min_room_weidth, args.max_room_weidth) room_z = random.uniform(args.min_room_height, args.max_room_height) room_dim = [room_x, room_y, room_z] ## Create the room T60 = random.uniform(args.min_T60, args.max_T60) absorption, max_order = pra.inverse_sabine(T60, room_dim) if save_mix: room_mix = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None) else: room_mix = None if save_reverb: room_ref = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None) else: room_mix = None if save_clean: room_dir = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(0.99999), max_order=max_order, sigma2_awgn = None) else: room_dir = None ## Random the position of microphone array mic_x = random.uniform(args.min_mic_x, room_x - args.min_mic_x) mic_y = random.uniform(args.min_mic_y, room_y - args.min_mic_y) mic_z = random.uniform(args.min_mic_z, max(min(room_z - args.min_mic_z, 2.0), args.min_mic_z + 0.5)) ## Compute The position of microphones mic_xyz = [] for m in range(args.num_mic): mic_pos = args.mic_pos[m] x = mic_x + mic_pos[0] y = mic_y + mic_pos[1] z = mic_z mic_xyz.append([x, y, z]) mic_xyz = np.array(mic_xyz) # ( 6, 3 ) mic_xyz = mic_xyz.T # ( 3, 6 ) ## Add micphone array mic_array = pra.MicrophoneArray(mic_xyz, args.sample_rate) if room_mix is not None: room_mix = room_mix.add_microphone_array(mic_array) if room_ref is not None: room_ref = room_ref.add_microphone_array(mic_array) if room_dir is not None: room_dir = room_dir.add_microphone_array(mic_array) ##print("room = [%.2f %.2f %.2f], micro = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, mic_x, mic_y, mic_z)) ## Add target sources to room_mix and room_ref target_source = None while True: if args.num_targ_ang <= 0.0: targ_ang = random.randint( 0, int(all_angle) ) else: targ_ang = int(random.randint(0, Targ_Ang_Num - 1) * Targ_Ang_Resolution) targ_theta = np.pi * targ_ang / 180.0 targ_dist = random.uniform(args.min_targ_distance, args.max_targ_distance) targ_x = mic_x + np.cos(targ_theta) * targ_dist targ_y = mic_y + np.sin(targ_theta) * targ_dist targ_z = mic_z target_source = [targ_x, targ_y, targ_z] if (targ_x < (room_x - 0.5) and targ_x > 0.5) and (targ_y < (room_y - 0.5) and targ_y > 0.5): break if target_source is None and not room_mix.is_inside(target_source): continue ##print("room = [%.2f %.2f %.2f], target_source = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, target_source[0], target_source[1], target_source[2])) ##print("targ_ang = %d, targ_dist %.2f" % (targ_ang, targ_dist)) targ_tdoa = targ_ang if args.is_linear_mic and targ_tdoa > 180: targ_tdoa = 360.0 - targ_tdoa ## Add interference sources to room_mix num_interf = min(random.randint(1, args.max_num_interf), 1) interf_angs = [] interf_dists = [] interf_source = [] while True: interf_ang = random.randint(0, int(all_angle)) interf_tdoa = interf_ang if args.is_linear_mic and interf_tdoa > 180: interf_tdoa = 360.0 - interf_tdoa if np.abs(targ_tdoa - interf_tdoa) < args.minAD: continue interf_theta = np.pi * interf_ang / 180.0 interf_dist = random.uniform(args.min_interf_distance, args.max_interf_distance) interf_x = mic_x + np.cos(interf_theta) * interf_dist interf_y = mic_y + np.sin(interf_theta) * interf_dist interf_z = mic_z ainterf_source = [interf_x, interf_y, interf_z] if (interf_x < (room_x - 0.5) and interf_x > 0.5) and (interf_y < (room_y - 0.5) and interf_y > 0.5): interf_angs.append(interf_ang) interf_dists.append(interf_dist) interf_source.append(ainterf_source) if len(interf_source) >= num_interf: break ##print("interf_ang = %d, interf_dist %.2f, num_interf = %d" % (interf_ang, interf_dist, len(interf_source))) for sim in range(args.nutt_per_room): if room_mix is not None: room_mix.sources = [] if room_ref is not None: room_ref.sources = [] if room_dir is not None: room_dir.sources = [] ## Add Speech to microphone array while True: spe_idx = random.randint(0, spe_utt_size - 1) spe_key, spe_path = spe_utt_ids[spe_idx] spe_wav = audio_parser.WaveData(spe_path, sample_rate = args.sample_rate) if spe_wav is None or spe_wav.shape[0] < args.sample_rate: continue spe_wav = np.squeeze(spe_wav) if np.mean(np.abs(spe_wav)) > 0: break spe_length = spe_wav.shape[0] spe_wav = pra.normalize(spe_wav) spe_wav = pra.highpass(spe_wav, args.sample_rate, 50) if room_mix is not None and room_mix.is_inside(target_source): room_mix = room_mix.add_source(target_source, signal = spe_wav, delay = 0) else: print("target_source not in room_mix") continue if room_ref is not None and room_ref.is_inside(target_source): room_ref = room_ref.add_source(target_source, signal = spe_wav, delay = 0) else: print("target_source not in room_ref") if room_dir is not None and room_dir.is_inside(target_source): room_dir = room_dir.add_source(target_source, signal = spe_wav, delay = 0) else: print("target_source not in room_dir") if room_mix is not None and len(room_mix.sources) < 1: print("target_source not in room_mix") break if room_ref is not None and len(room_ref.sources) < 1: print("target_source not in room_ref") break if room_dir is not None and len(room_dir.sources) < 1: print("target_source not in room_dir") break ## Add Interference to microphone array for it in range(0, num_interf): while True: inf_idx = random.randint(0, noise_utt_size - 1) inf_path = noise_utt_ids[inf_idx] inf_wav = audio_parser.WaveData(inf_path, sample_rate = args.sample_rate) if inf_wav is None or inf_wav.shape[0] < args.sample_rate: continue inf_wav = np.squeeze(inf_wav) if np.mean(np.abs(inf_wav)) > 0: break inf_length = inf_wav.shape[0] inf_wav = pra.normalize(inf_wav) inf_wav = pra.highpass(inf_wav, args.sample_rate, 50) while(inf_length < spe_length): inf_wav = np.concatenate((inf_wav, inf_wav), axis = 0) inf_length = inf_wav.shape[0] inf_wav = inf_wav[:spe_length] if room_mix is not None and room_mix.is_inside(interf_source[it]): room_mix = room_mix.add_source(interf_source[it], signal = inf_wav, delay = 0) else: print("interf_source not in room_mix") continue if room_mix is not None and len(room_mix.sources) < 1: break ## Make the far-field mixture audio iSIR = random.uniform(args.lowSIR, args.upSIR) room_mix.simulate(callback_mix = callback_mix, callback_mix_kwargs = {'snr': 30, 'sir': iSIR, 'n_src': num_interf + 1, 'n_tgt': 1, 'ref_mic': 0}) mix_wav = room_mix.mic_array.signals.T # (nchannel, nsample) mix_length, num_channel = mix_wav.shape ## Read diffuse noise if diffuse_utt_ids is not None: while True: diff_idx = random.randint(0, diffuse_utt_size - 1) diff_path = diffuse_utt_ids[diff_idx] diff_wav = audio_parser.WaveData(diff_path, sample_rate = args.sample_rate, id_channel = list(range(0, num_channel))) if diff_wav is None or diff_wav.shape[0] < args.sample_rate: continue if np.mean(np.abs(diff_wav)) > 0: break dif_length, num_channel = diff_wav.shape ''' for i in range(int(num_channel / 2)): ch_wav = diff_wav[:, i] diff_wav[:, i] = diff_wav[:, num_channel - i -1] diff_wav[:, num_channel - i -1] = ch_wav ''' ## Add diffuse noise into mix while( dif_length < mix_length ): diff_wav = np.concatenate((diff_wav, diff_wav), axis = 0) dif_length = diff_wav.shape[0] diff_wav = diff_wav[0:mix_length, :] iSNR = random.uniform(args.lowSNR, args.upSNR) mix_wav = audio_parser.MixWave(mix_wav, diff_wav, snr = iSNR) ## Adapt gain of mixture audio by given gain gain = random.uniform(args.lowGain, args.upGain) scale = gain / np.max(np.abs(mix_wav)) mix_wav = mix_wav * scale mix_wav = mix_wav * 32767.0 mix_wav = mix_wav.astype(np.int16) if room_dir is not None: ## Simulate directional signals room_dir.simulate() dir_wav = room_dir.mic_array.signals[0,:].T # (spe_length) dir_wav = dir_wav * scale dir_wav = dir_wav * 32767.0 dir_wav = dir_wav.astype(np.int16) else: dir_wav = None if room_ref is not None: ## Simulate the clean far-field signal to make ref signal for compute metrics room_ref.simulate() ref_wav = room_ref.mic_array.signals # (num_channel, spe_length) ref_wav = ref_wav * scale # (num_channel, spe_length) else: ref_wav = None if ref_wav is not None: if args.targ_bf is not None: num_block = 1 ref_wav = ref_wav[np.newaxis, :, :] # [ num_block, num_channel, spe_length ] ref_wav = torch.FloatTensor(ref_wav) # [ num_block, num_channel, spe_length ] ref_wav = ref_wav.view(num_block * num_channel, 1, -1) # [ num_block * num_channel, 1, spe_length ] input_audio = ref_wav.to(args.device) # (num_block * num_channel, 1, spe_length) mFFT = args.convstft(input_audio) # (num_block * num_channel, num_bin * 2, num_frame) num_frame = mFFT.size(2) mFFT = mFFT.view(num_block, num_channel, num_bin * 2, -1) #( num_block, num_channel, num_bin * 2, num_frame) mFFT_r = mFFT[:, :, :num_bin, :] #( num_block, num_channel, num_bin, num_frame) mFFT_i = mFFT[:, :, num_bin:, :] #( num_block, num_channel, num_bin, num_frame) mFFT_r = mFFT_r.permute([0, 3, 2, 1]).contiguous() #( num_block, num_frame, num_bin, num_channel) mFFT_i = mFFT_i.permute([0, 3, 2, 1]).contiguous() #( num_block, num_frame, num_bin, num_channel) mFFT_r = mFFT_r.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel) mFFT_i = mFFT_i.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel) mFFT = torch.cat([torch.unsqueeze(mFFT_r, 1), torch.unsqueeze(mFFT_i, 1)], dim = 1) # ( num_block * num_frame, 2, num_bin, num_channel ) # Compute the BF bf_direction_resolution targ_tdoa = targ_ang if num_channel == 2 or args.is_linear_mic: if targ_tdoa > 180: targ_tdoa = 360.0 - targ_tdoa bf_beam = targ_tdoa / args.bf_direction_resolution + 0.5 bf_beam = int(bf_beam) % args.num_beam print("tdoa = %d, beam = %d" % (targ_ang, bf_beam)) rFFT = args.targ_bf(mFFT, bf_beam) # (num_block * num_frame, 2, num_bin, 1) rFFT = rFFT[:, :, :, 0].view([num_block, -1, 2, num_bin]) # (num_block, num_frame, 2, num_bin) rFFT = rFFT.permute([0, 2, 3, 1]).contiguous() # ( num_block, 2, num_bin, num_frame ) est_fft = torch.cat([rFFT[:,0], rFFT[:,1]], 1) # ( num_block, num_bin * 2, num_frame ) ref_wav = args.convistft(est_fft) # ( num_block, 1, num_sample) ref_wav = torch.squeeze(ref_wav, 1) # ( num_block, num_sample) ref_wav = ref_wav[0, :] # ( num_sample) ref_wav = ref_wav.data.cpu().numpy() # ( num_sample) else: ref_wav = ref_wav[0, :] # ( num_sample) ref_wav = ref_wav * 32767.0 ref_wav = ref_wav.astype(np.int16) else: ref_wav = None ## Align mix_wav, ref_wav and dir_wav nsample = min(mix_wav.shape[0], ref_wav.shape[0], dir_wav.shape[0]) mix_wav = mix_wav[:nsample] if ref_wav is not None: ref_wav = ref_wav[:nsample] if dir_wav is not None: dir_wav = dir_wav[:nsample] num_utts += 1 _, spe_name, _ = file_parse.getFileInfo(spe_path) out_path = os.path.join(args.out_path, 'wav') if not os.path.exists(out_path): os.makedirs(out_path) if utt2data_dict is not None: data_key, data_id = utt2data_dict[spe_idx] out_path = os.path.join(out_path, data_id) if not os.path.exists(out_path): os.makedirs(out_path) else: data_id = 'data01' if utt2spk_dict is not None: spk_key, spk_id = utt2spk_dict[spe_idx] out_path = os.path.join(out_path, spk_id) if not os.path.exists(out_path): os.makedirs(out_path) else: spk_id = 'spk01' out_path = os.path.join(out_path, 'wav') if not os.path.exists(out_path): os.makedirs(out_path) spe_key = spe_key.replace('_', '').replace('-', '').replace('.', '') spk_id = spk_id.replace('_', '').replace('-', '').replace('.', '') #utt_id = spk_id + "_" + spe_key + "%02d%07d" % (thread_id, num_utts) utt_id = spk_id + "_" + "%02d%07d" % (thread_id, num_utts) if mix_wav is not None: ## Write the mixture audio filename = "%s_id%02d%07d_Doa%d_SIR%.1f_SNR%.1f" % (spe_key, thread_id, num_utts, targ_ang, iSIR, iSNR) mix_path = os.path.join(out_path, '%s.wav' % (filename) ) audio_parser.WriteWave(mix_path, mix_wav, args.sample_rate) else: mix_path = None if dir_wav is not None: filename = "%s_id%02d%07d_Doa%d_DS" % (spe_key, thread_id, num_utts, targ_ang) ds_path = os.path.join(out_path, '%s.wav' % (filename) ) audio_parser.WriteWave(ds_path, dir_wav, args.sample_rate) else: ds_path = None if ref_wav is not None: filename = "%s_id%02d%07d_Doa%d_Ref" % (spe_key, thread_id, num_utts, targ_ang) ref_path = os.path.join(out_path, '%s.wav' % (filename) ) audio_parser.WriteWave(ref_path, ref_wav, args.sample_rate) else: ref_path = None if text_dict is not None: text_key, text_value = text_dict[spe_idx] else: text_value = ' ' noisy_scp_list.append((utt_id, mix_path, ds_path, ref_path, targ_ang, targ_dist, iSIR, iSNR, scale)) noisy_utt2spk.append(spk_id) noisy_text_dict.append(text_value) info = (utt_id, spe_key, mix_path, ds_path, ref_path, targ_ang, targ_dist, interf_angs, interf_dists, iSIR, iSNR, scale) mix2info.append(info) print("%d / %d: %s" % (num_utts, num_make_utts, mix_path)) if num_utts >= num_make_utts: return noisy_scp_list, noisy_utt2spk, noisy_text_dict, mix2info
import wave import numpy as np import pyroomacoustics as pra from sound_source_separation import wave_writer, wave_loader # The desired reverberation time and dimensions of the room reverberation_time60 = 1.5 # seconds room_dim = [7., 8., 9.] # meters # get room material parameter to achieve the desired reverberation time e_absorption, max_order = pra.inverse_sabine(reverberation_time60, room_dim) room = pra.ShoeBox(room_dim, fs=16000, materials=pra.Material(e_absorption), max_order=max_order) with wave.open("./CMU_ARCTIC/cmu_us_aew_arctic/wav/arctic_a0001.wav") as wav_speech_1, \ wave.open("./CMU_ARCTIC/cmu_us_aew_arctic/wav/arctic_a0005.wav") as wav_speech_2: # place sources in the room speech_data_1 = wave_loader.load_to_mono_array(wav_speech_1) room.add_source([2.5, 4.90, 1.76], signal=speech_data_1) speech_data_2 = wave_loader.load_to_mono_array(wav_speech_2) room.add_source([4.5, 1.24, 8.76], signal=speech_data_2, delay=3.) # place the microphone array in the room mic_locs = np.c_[[6.3, 4.87, 1.2], # mic 1 [6.3, 4.93, 1.2], # mic 2 ]