Beispiel #1
0
def simulateSound(room_dim, R_loc, source_locations, source_audios, rt60, materials=None, max_order=None):
    # source_audios: array of numpy array
    # L: max of all audios. Zero padding at the end
    # return (all_channel_data (C, L), groundtruth_with_reverb (N, C, L), groundtruth_data (N, C, L), angles (N)

    if materials is not None:
        (ceiling, east, west, north, south, floor) = materials
        room = pra.ShoeBox(
            room_dim,
            fs=fs,
            materials=pra.make_materials(
                ceiling=ceiling,
                floor=floor,
                east=east,
                west=west,
                north=north,
                south=south,
            ), max_order=max_order
        )
    else:
        try:
            e_absorption, max_order_rt60 = pra.inverse_sabine(rt60, room_dim)
        except ValueError:
            e_absorption, max_order_rt60 = pra.inverse_sabine(1, room_dim)
        room = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order_rt60)

    R = generate_mic_array(R_MIC, N_MIC, R_loc)
    room.add_microphone_array(pra.MicrophoneArray(R, room.fs))

    length = max([len(source_audios[i]) for i in range(len(source_audios))])
    for i in range(len(source_audios)):
        source_audios[i] = np.pad(source_audios[i], (0, length - len(source_audios[i])), 'constant')

    for i in range(len(source_locations)):
        room.add_source(source_locations[i], signal=source_audios[i], delay=0)

    room.image_source_model()
    premix_w_reverb = room.simulate(return_premix=True)
    mixed = room.mic_array.signals

    # groundtruth
    room_gt = pra.ShoeBox(room_dim, fs=fs, materials=pra.Material(1.0), max_order=0)
    # R_gt=generate_mic_array(R_MIC, N_MIC, R_loc)
    R_gt = generate_mic_array(0, 1, R_loc)
    room_gt.add_microphone_array(pra.MicrophoneArray(R_gt, room.fs))

    for i in range(len(source_locations)):
        room_gt.add_source(source_locations[i], signal=source_audios[i], delay=0)
    room_gt.compute_rir()

    room_gt.image_source_model()
    premix = room_gt.simulate(return_premix=True)

    return (mixed, premix_w_reverb, premix, R)
Beispiel #2
0
def get_rir(audio_signal,
            fs,
            rt60=0.2,
            room_dim=[60, 60, 10],
            room_source=[30, 30, 4.5],
            mic_pos=[30, 10, 7],
            T=19,
            D=0.01,
            S=35):
    import pyroomacoustics as pra
    import numpy as np
    c = 1449.2 + 4.6 * T - 0.055 * T**2 + 0.0029 * T**3 + (1.34 - 0.01 * T) * (
        S - 35) + 0.016 * D
    e_absorption, max_order = pra.inverse_sabine(rt60, room_dim, c=c)
    room = pra.ShoeBox(room_dim,
                       fs=fs,
                       materials=pra.Material(e_absorption),
                       ray_tracing=False,
                       max_order=3,
                       air_absorption=False)
    room.add_source(room_source, signal=audio_signal, delay=1.0)
    mic_locs = np.c_[mic_pos,  # mic 1
                     ]
    room.add_microphone_array(mic_locs)
    room.compute_rir()
    rir = room.rir[0][0]
    return rir
Beispiel #3
0
def simroom(room_dim, src_loc, mic_locs):

    parser = argparse.ArgumentParser(
        description=
        "Simulates and adds reverberation to a dry sound sample. Saves it into `./examples/samples`."
    )
    parser.add_argument(
        "--method",
        "-m",
        choices=methods,
        default=methods[0],
        help="Simulation method to use",
    )
    args = parser.parse_args()

    # The desired reverberation time and dimensions of the room
    rt60_tgt = 0.3  # seconds
    # meters

    # import a mono wavfile as the source signal
    # the sampling frequency should match that of the room
    fs, audio = wavfile.read("examples/samples/guitar_16k.wav")

    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim)

    # Create the room
    room = pra.ShoeBox(room_dim,
                       fs=fs,
                       materials=pra.Material(e_absorption),
                       max_order=max_order)

    room.add_source(src_loc, signal=audio, delay=0.5)

    # finally place the array in the room
    room.add_microphone_array(mic_locs)

    # Run the simulation (this will also build the RIR automatically)
    room.simulate()

    room.mic_array.to_wav(
        "examples/samples/guitar_16k_reverb_{}.wav".format(args.method),
        norm=True,
        bitdepth=np.int16,
    )
    """
    detect_peaks(room.mic_array.signals[0, :], mph=0, mpd=1000, threshold=10, show=True)
    detect_peaks(room.mic_array.signals[1, :], mph=0, mpd=1000, threshold=10, show=True)
    detect_peaks(room.mic_array.signals[2, :], mph=0, mpd=1000, threshold=10, show=True)

    print(max(room.mic_array.signals[0, :]))
    print(max(room.mic_array.signals[1, :]))
    print(max(room.mic_array.signals[2, :]))
    """
    return np.array([
        max(room.mic_array.signals[0, :]),
        max(room.mic_array.signals[1, :]),
        max(room.mic_array.signals[2, :])
    ])
Beispiel #4
0
def room_simulate(num_mic, mic_array, room_type):
    room_list = {
        'star_3': [8.3, 3.4, 2.5],
        'room_819': [7.9, 7.0, 2.7],
        'room_409': [7.0, 4.2, 2.7]
    }
    room = room_list[room_type]
    dim_x, dim_y, dim_z = room[0], room[1], room[2]
    sr = 16000
    rt60 = 0.3
    e_absorption, max_order = pra.inverse_sabine(rt60, [dim_x, dim_y, dim_z])
    print(e_absorption, max_order)
    num_direction = 12
    mic_radius = 0.04  #0.03231 testing
    #mic_radius =  np.random.uniform(low=0.025,high=0.035)
    mic_x_radius = 0.0637
    mic_y_radius = 0.0484
    mic_lin = 0.04

    room = pra.ShoeBox(room,
                       fs=sr,
                       materials=pra.Material(e_absorption),
                       max_order=max_order)

    mic_center = np.array([dim_x / 2, dim_y / 2, 0.69])
    thetas = np.arange(num_mic) / num_mic * 2 * np.pi
    theta_source = np.arange(num_direction) / num_direction * 2 * np.pi
    if mic_array == 'circle':
        center_to_mic = np.stack(
            [np.cos(thetas),
             np.sin(thetas),
             np.zeros_like(thetas)], 0) * mic_radius
    elif mic_array == 'ellipse':
        center_to_mic = np.stack([
            mic_x_radius * np.cos(thetas), mic_y_radius * np.sin(thetas),
            np.zeros_like(thetas)
        ], 0)
    elif mic_array == 'linear':
        linear = np.arange(num_mic) * mic_lin
        linear = linear - np.max(linear) / 2
        center_to_mic = np.stack(
            [linear, np.zeros_like(linear),
             np.zeros_like(linear)], 0)
    mic_positions = mic_center[:, None] + center_to_mic
    room.add_microphone_array(mic_positions)
    far_field_distance = 1
    thetas = np.arange(num_direction) / num_direction * 2 * np.pi
    center_to_source = np.stack([
        np.cos(theta_source),
        np.sin(theta_source),
        np.zeros_like(theta_source)
    ], -1) * far_field_distance
    source_positions = mic_center[None, :] + center_to_source

    return room, source_positions
Beispiel #5
0
def make_room(room_size,
              source_location,
              mic_array_location,
              rt60,
              sample_rate=16000):
    e_absorption, max_order = pra.inverse_sabine(rt60, room_size)
    r = pra.ShoeBox(room_size,
                    fs=sample_rate,
                    materials=pra.Material(e_absorption),
                    max_order=max_order)
    r.add_microphone_array(mic_array_location)
    r.add_source(source_location)
    return r
def create_room(room_size, mics_loc, s1_loc, s2_loc, fs, T60=None):
    if T60 is not None:
        absorption, max_order = pra.inverse_sabine(T60, room_size)
        room = pra.room.ShoeBox(room_size,
                                fs=fs,
                                t0=0,
                                absorption=absorption,
                                max_order=max_order)
    else:
        room = pra.room.ShoeBox(room_size, fs=fs, t0=0, max_order=0)
    room.add_source(s1_loc)
    room.add_source(s2_loc)
    room.add_microphone_array(pra.MicrophoneArray(np.array(mics_loc).T, fs))
    room.compute_rir()
    return room
Beispiel #7
0
def func(cat1, cat2, cat2fn):
    rt60_tgt = 0.3  # seconds
    room_dim = [10, 10, 3]  # meters

    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim)

    # microphone locations
    mic_locs = np.c_[[4.9, 4, 1], [5.1, 4.1, 1], [5.1, 3.9, 1], [5, 4, 1.2],
                     [5, 4, 0.8], [4.9, 3.9, 1.2], [5.1, 4.1, 0.8]]

    for i in range(100):
        rand1 = np.random.randint(0, len(cat2fn[cat1]))
        rand2 = np.random.randint(0, len(cat2fn[cat2]))
        fn1 = cat2fn[cat1][rand1]
        fn2 = cat2fn[cat2][rand2]

        fs, audio1 = wavfile.read("inputs/ESC-50-master/audio/" + fn1)
        fs, audio2 = wavfile.read("inputs/ESC-50-master/audio/" + fn2)

        min_len = min(audio1.shape[0], audio2.shape[0])

        audio1 = audio1[:min_len]
        audio2 = audio2[:min_len]

        for i in range(5):
            room = pra.ShoeBox(room_dim,
                               fs=fs,
                               materials=pra.Material(e_absorption),
                               max_order=max_order)
            values = np.random.random(6) * 2.0 - 1.0
            loc1 = [values[0] + 5, values[1] + 5, values[2] + 1.5]
            loc2 = [values[3] + 5, values[4] + 5, values[5] + 1.5]

            room.add_source(loc1, signal=audio1, delay=0.)
            room.add_source(loc2, signal=audio2, delay=0.)
            room.add_microphone_array(mic_locs)
            room.simulate()

            filename = fn1 + " " + fn2 + " " + str(loc1) + str(loc2) + ".wav"
            room.mic_array.to_wav(
                f"outputs/combined/" + filename,
                norm=True,
                bitdepth=np.int16,
            )
Beispiel #8
0
def test_rt60_theory_single_band():

    # The desired reverberation time and dimensions of the room
    rt60_tgt = 0.3  # seconds

    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim)

    # Create the room
    room = pra.ShoeBox(
        room_dim, fs=fs, materials=pra.Material(e_absorption), max_order=max_order
    )

    rt60_sabine = pra.rt60_sabine(S, V, e_absorption, 0.0, room.c)
    assert (rt60_sabine - room.rt60_theory(formula="sabine")) < eps

    rt60_eyring = pra.rt60_eyring(S, V, e_absorption, 0.0, room.c)
    assert (rt60_eyring - room.rt60_theory(formula="eyring")) < eps
Beispiel #9
0
def build_room(dimensions, source, mic_array, rt60, fs, max_order_user):
    """
    This method wraps inside all the necessary steps
    to build the simulated room
    :param dimensions: length, width and height of the room
    :param source: contains the x, y, z location of the sound source
    :param mic_array: contains the x, y, z vectors of all the microphones
    :param float rt60: represents the reverberation time of the room
    :param int fs: represents the sampling frequency used for the generation of signals
    :param int max_order_user: represents the maximum order of the simulated reflections
    :return: pyroomacoustics object representing the room and the fractional delay to compensate
    """
    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(rt60, dimensions)

    # Building a 'Shoebox' room with the provided dimensions
    if max_order_user != 0:
        max_order = max_order_user

    room = pra.ShoeBox(p=dimensions,
                       fs=fs,
                       absorption=e_absorption,
                       max_order=max_order)

    # Place the Microphone Array and the Sound Source inside the room
    mics = pra.MicrophoneArray(mic_array, fs)
    room.add_microphone_array(mics)
    room.add_source(source)

    # Computing the Room Impulse Response at each microphone, for each source
    room.image_source_model()
    room.compute_rir()

    # Getting the fractional delay introduced by the simulation
    global_delay = pra.constants.get("frac_delay_length") // 2

    # room.plot()

    return room, global_delay
Beispiel #10
0
# the sampling frequency should match that of the room
fs, audio1 = wavfile.read("inputs/sitar.wav")
fs, audio2 = wavfile.read("inputs/piano.wav")

audio1 = audio1[1000000:2000000, :1]
audio1 = audio1.reshape(audio1.shape[0])
audio1 = audio1 / 5

audio2 = audio2[1000000:2000000, :1]
audio2 = audio2.reshape(audio1.shape[0])

print(audio1.shape)
print(audio2.shape)

# We invert Sabine's formula to obtain the parameters for the ISM simulator
e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim)

# Create the room
room = pra.ShoeBox(room_dim,
                   fs=fs,
                   materials=pra.Material(e_absorption),
                   max_order=max_order)

# place the source in the room
room.add_source([7, 5, 1], signal=audio1, delay=0.)
room.add_source([2, 3, 1], signal=audio2, delay=0.)

# define the locations of the microphones
mic_locs = np.c_[
    # [4.9, 4, 1],
    # [5.1, 4.1, 1],
 def create_rooms(self,N_mutations_per_roomtype,roomtype=0):
     roomtype=self.room_specs[roomtype]
     
     number_rooms=0
         
     for i in range(N_mutations_per_roomtype):
         ################################################################################################
         #random room dimensions
         room_x=(random.randint(roomtype["min_room_dim"][0]*100,roomtype["max_room_dim"][0]*100)/100)
         room_y=(random.randint(roomtype["min_room_dim"][1]*100,roomtype["max_room_dim"][1]*100)/100)
         room_z=(random.randint(roomtype["min_room_dim"][2]*100,roomtype["max_room_dim"][2]*100)/100)
         room_dim=[room_x,room_y,room_z]
         ################################################################################################
         #random materials
         wall_materials=random.choice(roomtype["wall_materials"])
         random.shuffle(wall_materials)
         floor_material=random.choice(roomtype["floor_material"])
         ceiling_material=random.choice(roomtype["ceiling_material"])
         m = pra.make_materials(
             ceiling=ceiling_material,
             floor=floor_material,
             east=wall_materials[0],
             west=wall_materials[1],
             north=wall_materials[2],
             south=wall_materials[3],)
         ################################################################################################
         # create room
         rt60 = 0.5  # seconds
         e_absorption, max_order = pra.inverse_sabine(rt60, room_dim)
         room = pra.ShoeBox(room_dim, fs=self.sr, materials=m, max_order=max_order , air_absorption=True,ray_tracing=False)
         ################################################################################################
         # random mic_positions
         mic_x=(random.randint(0,int(room_x*100))/100)
         mic_y=(random.randint(0,int(room_y*100))/100)
         mic_z=(random.randint(0,int(room_z*100))/100)
         mic_pos=[mic_x,mic_y,mic_z]
         room.add_microphone_array(np.array([mic_pos]).T)
         ################################################################################################
         # add sweep source
         sg=signal_generator(sr=self.sr)
         logsweep=sg.logsweep(w1=100,w2=30000,T=0.3)
         sweep_sourcepos=mic_pos
         if room.is_inside([mic_pos[0],mic_pos[1]+0.1,mic_pos[2]]):
             sweep_sourcepos=[mic_pos[0],mic_pos[1]+0.1,mic_pos[2]]
         else:
             sweep_sourcepos=[mic_pos[0],mic_pos[1]-0.1,mic_pos[2]]
         scaled = np.int16(logsweep.signal/np.max(np.abs(logsweep.signal)) * 32767*0.5)
         room.add_source(sweep_sourcepos, signal=scaled, delay=0.02)
         ################################################################################################
         # add noise source
         fs, audio = wavfile.read("../../data/audio/"+random.choice(os.listdir("../../data/audio/")))
         #fs, audio = wavfile.read("../../data/audio/DevNode1_ex46_154.wav")
         noise_x=(random.randint(0,int(room_x*100))/100)
         noise_y=(random.randint(0,int(room_y*100))/100)
         noise_z=(random.randint(0,int(room_z*100))/100)
         noise_pos=[noise_x,noise_y,noise_z]
         ## Interpolate to higher fps
         duration = audio.shape[0] / fs
         time_old  = np.linspace(0, duration, audio.shape[0])
         time_new  = np.linspace(0, duration, int(audio.shape[0] * 96000 / fs))
         interpolator = interpolate.interp1d(time_old, audio.T)
         audio = interpolator(time_new).T.astype('int16') 
         
         room.add_source(noise_pos, signal=audio.T[0], delay=0.0)
         #room.add_source(noise_pos, signal=np.sin(audio.T[1])*0.01, delay=0.0)
         #room.add_source(noise_pos, signal=np.sin(audio.T[2])*0.01, delay=0.0)
         #room.add_source(noise_pos, signal=np.sin(audio.T[3])*0.01, delay=0.0)
     ################################################################################################
         # store room
         self.rooms.append(room)
         #print("Room "+str(number_rooms)+" created: "+str(room_dim)+" , "+str(wall_materials)+" , "+str(floor_material)+" , "+str(ceiling_material))
         number_rooms+=1
Beispiel #12
0
def sim(src_loc, mic_locs, noise=False):

    parser = argparse.ArgumentParser(
        description=
        "Simulates and adds reverberation to a dry sound sample. Saves it into `./examples/samples`."
    )
    parser.add_argument(
        "--method",
        "-m",
        choices=methods,
        default=methods[0],
        help="Simulation method to use",
    )
    args = parser.parse_args()

    # The desired reverberation time and dimensions of the room
    rt60_tgt = 0.166  # seconds, original was 0.3, IRL this would probably be closest by digitally removing reverb
    room_dim = [10, 10, 3.5]  # meters
    #room_dim = [5, 5, 3]

    # import a mono wavfile as the source signal
    # the sampling frequency should match that of the room
    fs, audio = wavfile.read("examples/samples/guitar_16k.wav")

    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(rt60_tgt, room_dim)

    # Create the room
    if args.method == "ism":
        room = pra.ShoeBox(room_dim,
                           fs=fs,
                           materials=pra.Material(e_absorption),
                           max_order=max_order)
    elif args.method == "hybrid":
        room = pra.ShoeBox(
            room_dim,
            fs=fs,
            materials=pra.Material(e_absorption),
            max_order=3,
            ray_tracing=True,
            air_absorption=True,
        )

    # place the source in the room
    #src_loc = [5, 5, 1]

    room.add_source(src_loc, signal=audio, delay=0.5)

    # finally place the array in the room
    room.add_microphone_array(mic_locs)

    # Run the simulation (this will also build the RIR automatically)
    room.simulate()

    room.mic_array.to_wav(
        "examples/samples/guitar_16k_reverb_{}.wav".format(args.method),
        norm=True,
        bitdepth=np.int16,
    )

    print(fs)
    detect_peaks(room.mic_array.signals[0, :],
                 mph=0,
                 mpd=1000,
                 threshold=10,
                 show=True)
    detect_peaks(room.mic_array.signals[1, :],
                 mph=0,
                 mpd=1000,
                 threshold=10,
                 show=True)
    detect_peaks(room.mic_array.signals[2, :],
                 mph=0,
                 mpd=1000,
                 threshold=10,
                 show=True)
    detect_peaks(room.mic_array.signals[3, :],
                 mph=0,
                 mpd=1000,
                 threshold=10,
                 show=True)

    sig1 = room.mic_array.signals[0, :]
    sig2 = room.mic_array.signals[1, :]
    sig3 = room.mic_array.signals[2, :]
    sig4 = room.mic_array.signals[3, :]

    div = 100  #10 is where things start to break down
    if noise:
        sig1 = sig1 + np.random.normal(0, max(sig1) / div, sig1.shape)
        sig2 = sig2 + np.random.normal(0, max(sig2) / div, sig2.shape)
        sig3 = sig3 + np.random.normal(0, max(sig3) / div, sig3.shape)
        sig4 = sig4 + np.random.normal(0, max(sig4) / div, sig4.shape)

    sigs = [sig1, sig2, sig3, sig4]
    """
    for sig in sigs:
    	detect_peaks(sig, mph=0, mpd=1000, threshold=10, show=True)
    """

    I1 = max(sig1)
    I2 = max(sig2)
    I3 = max(sig3)
    I4 = max(sig4)

    i1 = np.where(sig1 == I1)[0][0]
    i2 = np.where(sig2 == I2)[0][0]
    i3 = np.where(sig3 == I3)[0][0]
    i4 = np.where(sig4 == I4)[0][0]
    print("SRC's: " + str(src_loc))
    print(I1, I2, I3, I4)
    print(i1, i2, i3, i4)
    return fs, i1, i2, i3, i4
Beispiel #13
0
    def cache_rirs(self, ):

        if os.path.isfile(self.rir_file):
            data = load_numpy_from_mat(self.rir_file)
            self.rir_A = data['rir_A']  # shape = (nrir, samples, nmic)
            self.rir_B = data['rir_B']  # shape = (nrir, samples, nmic)
            self.nrir = self.rir_A.shape[0]
            print('Loaded', self.nrir, 'RIRs from', self.rir_file)

        else:
            self.nrir = 500  # pre-calculate <nrir> RIRs
            print('Generating', self.nrir, 'RIRs ...')

            # define room/shoebox
            rt60 = 0.250  # define rt60 of the generated RIRs
            room_dim = np.asarray([6.0, 4.0,
                                   2.5])  # define room dimensions in [m]
            absorption, max_order = pra.inverse_sabine(
                rt60, room_dim
            )  # invert Sabine's formula to obtain the parameters for the ISM simulator

            # create the room
            room = pra.ShoeBox(room_dim,
                               fs=self.fs,
                               materials=pra.Material(absorption),
                               max_order=max_order)

            # place the array in the room
            array_center = np.asarray([2.5, 1.5, 0.8])
            pos = self.micpos.T + array_center[:, np.newaxis]
            room.add_microphone_array(pos)

            # add <nrir> sources for region A and B to the room
            for r in range(self.nrir):

                # source 1 is randomly placed within region A
                x = np.random.uniform(1.0, 2.0)
                y = np.random.uniform(2.0, 3.0)
                z = np.random.uniform(1.5, 2.0)
                room.add_source([x, y, z], signal=0, delay=0)

                # source 2 is randomly placed within region B
                x = np.random.uniform(3.0, 4.0)
                y = np.random.uniform(2.0, 3.0)
                z = np.random.uniform(1.5, 2.0)
                room.add_source([x, y, z], signal=0, delay=0)

            # compute all RIRs and extend their length to <samples>
            t0 = time.time()
            room.compute_rir()
            t1 = time.time()
            print('Generated', self.nrir, 'RIRs in', t1 - t0, 'seconds')

            self.rir_A = np.zeros((self.nrir, self.samples, self.nmic),
                                  dtype=np.float32)
            self.rir_B = np.zeros((self.nrir, self.samples, self.nmic),
                                  dtype=np.float32)
            for r in range(self.nrir):
                for m in range(self.nmic):

                    h_A = room.rir[m][r * 2 + 0]
                    n = min(self.samples, h_A.size)
                    self.rir_A[r, :n, m] = h_A[:n]

                    h_B = room.rir[m][r * 2 + 1]
                    n = min(self.samples, h_B.size)
                    self.rir_B[r, :n, m] = h_B[:n]

            data = {
                'rir_A': self.rir_A,
                'rir_B': self.rir_B,
            }
            save_numpy_to_mat(self.rir_file, data)
Beispiel #14
0
def mix_convolutive(S: np.array, sim: dict,
                    data_set: dict) -> Tuple[np.ndarray, np.ndarray, dict]:
    # Get parameters
    opts = sim['env_options']
    N = S.shape[0]  # number of sources
    M = sim[
        'microphones'] if 'microphones' in sim else N  # number of microphones

    # Some parameters from example on https://pyroomacoustics.readthedocs.io/en/pypi-release/pyroomacoustics.room.html
    # The desired reverberation time and dimensions of the room

    # We invert Sabine's formula to obtain the parameters for the ISM simulator
    e_absorption, max_order = pra.inverse_sabine(opts['rt60'],
                                                 opts['room_dim'])

    # Create room
    room = pra.ShoeBox(opts['room_dim'],
                       fs=data_set['fs'],
                       materials=pra.Material(e_absorption),
                       max_order=max_order,
                       sigma2_awgn=opts['sigma2_awgn'])
    # Microphone locations for hexagonal array
    micro_locs = opts['micro_locations']

    # Check that required number of microphones has it's locations
    if micro_locs.shape[1] < M:
        raise ValueError(
            '{} microphones required, but only {} microphone locations specified'
            .format(M, micro_locs.shape[0]))

    # Select as much microphones as needed
    R = micro_locs[:, :M]

    room.add_microphone_array(pra.MicrophoneArray(R, room.fs))
    # Place the sources inside the room
    source_locs = opts['source_locations']

    # Check that required number of microphones has it's locations
    if source_locs.shape[0] < N:
        raise ValueError(
            '{} sources required, but only {} source locations specified'.
            format(N, source_locs.shape[0]))

    # At first we add empty sources in order to record each source separately for SDR/SIR computation later
    # (according to https://github.com/LCAV/pyroomacoustics/blob/pypi-release/examples/bss_example.py)
    for sig, loc in zip(S, source_locs):
        room.add_source(loc, signal=np.zeros_like(sig))
    # Make separate recordings

    # room.plot_rir()
    # fig = plt.gcf()
    # fig.set_size_inches(9, 6)
    # plt.show()

    filtered = []
    for source, s in zip(room.sources, S):
        # Set only one of the signals
        source.signal[:] = s

        # Simulate & record the signal
        room.simulate()
        filtered.append(room.mic_array.signals)

        # Unset that source's signal (for next iterations)
        source.signal[:] = 0

    filtered = np.array(filtered)

    # Now mixed signals is just the sum
    mixed = np.sum(filtered, axis=0)
    # room.plot(freq=[1000, 2000], img_order=0)
    # plt.show()
    return filtered, mixed, {'room_object': room}
Beispiel #15
0
def make_noisy(args, thread_id, num_make_utts):
    
    spe_utt_ids, noise_utt_ids, diffuse_utt_ids, text_dict, utt2spk_dict, utt2data_dict = load_data(args)

    audio_parser      = AudioParser()

    spe_utt_size     = len(spe_utt_ids) if spe_utt_ids is not None else 0
    noise_utt_size   = len(noise_utt_ids) if noise_utt_ids is not None else 0
    diffuse_utt_size = len(diffuse_utt_ids) if diffuse_utt_ids is not None else 0
    
    noisy_scp_list   = []
    noisy_utt2spk    = []
    noisy_text_dict  = []
    mix2info         = []
    num_utts         = 0

    all_angle           = 360.0
    Targ_Ang_Num        = args.num_targ_ang
    Targ_Ang_Resolution = all_angle / Targ_Ang_Num if Targ_Ang_Num > 0 else 0.0

    save_mix    = args.save_mix
    save_reverb = args.save_reverb
    save_clean  = args.save_clean
    while True:
        ## Random a room
        room_x   = random.uniform(args.min_room_length, args.max_room_length)
        room_y   = random.uniform(args.min_room_weidth, args.max_room_weidth)
        room_z   = random.uniform(args.min_room_height, args.max_room_height)
        room_dim = [room_x, room_y, room_z]

        ## Create the room
        T60                   = random.uniform(args.min_T60, args.max_T60)
        absorption, max_order = pra.inverse_sabine(T60, room_dim)
        if save_mix:
            room_mix   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None)
        else:
            room_mix   = None
        if save_reverb:
            room_ref   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(absorption), max_order=max_order, sigma2_awgn = None)
        else:
            room_mix   = None
        if save_clean:
            room_dir   = pra.ShoeBox(room_dim, fs = args.sample_rate, materials=pra.Material(0.99999), max_order=max_order, sigma2_awgn = None)
        else:
            room_dir = None
        
        ## Random the position of microphone array
        mic_x  = random.uniform(args.min_mic_x, room_x - args.min_mic_x)
        mic_y  = random.uniform(args.min_mic_y, room_y - args.min_mic_y)
        mic_z  = random.uniform(args.min_mic_z, max(min(room_z - args.min_mic_z, 2.0), args.min_mic_z + 0.5))

        ## Compute The position of microphones
        mic_xyz = []
        for m in range(args.num_mic):
            mic_pos   = args.mic_pos[m]
            x         = mic_x + mic_pos[0]
            y         = mic_y + mic_pos[1]
            z         = mic_z
            mic_xyz.append([x, y, z])
        mic_xyz = np.array(mic_xyz) # ( 6, 3 )
        mic_xyz = mic_xyz.T			# ( 3, 6 )

        ## Add micphone array
        mic_array = pra.MicrophoneArray(mic_xyz, args.sample_rate)
        if room_mix is not None:
            room_mix  = room_mix.add_microphone_array(mic_array)
        if room_ref is not None:
            room_ref  = room_ref.add_microphone_array(mic_array)
        if room_dir is not None:
            room_dir  = room_dir.add_microphone_array(mic_array)

        ##print("room = [%.2f %.2f %.2f], micro = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, mic_x, mic_y, mic_z))
        
        ## Add target sources to room_mix and room_ref
        target_source = None
        while True:
            if args.num_targ_ang <= 0.0:
                targ_ang = random.randint( 0, int(all_angle) )
            else:
                targ_ang = int(random.randint(0, Targ_Ang_Num - 1) * Targ_Ang_Resolution)

            targ_theta  = np.pi * targ_ang / 180.0
            targ_dist   = random.uniform(args.min_targ_distance, args.max_targ_distance)
            
            targ_x      = mic_x + np.cos(targ_theta) * targ_dist
            targ_y      = mic_y + np.sin(targ_theta) * targ_dist
            targ_z      = mic_z

            target_source = [targ_x, targ_y, targ_z]

            if (targ_x < (room_x - 0.5) and targ_x > 0.5) and (targ_y < (room_y - 0.5) and targ_y > 0.5):
                break
            
        if target_source is None and not room_mix.is_inside(target_source):
            continue
        
        ##print("room = [%.2f %.2f %.2f], target_source = [%.2f %.2f %.2f]" % (room_x, room_y, room_z, target_source[0], target_source[1], target_source[2]))
        ##print("targ_ang = %d, targ_dist %.2f" % (targ_ang, targ_dist))
        targ_tdoa = targ_ang
        if args.is_linear_mic and targ_tdoa > 180:
            targ_tdoa = 360.0 - targ_tdoa
        
        ## Add interference sources to room_mix
        num_interf    = min(random.randint(1, args.max_num_interf), 1)
        interf_angs   = []
        interf_dists  = []
        interf_source = []
        
        while True:
            interf_ang  = random.randint(0, int(all_angle))
            interf_tdoa = interf_ang
            if args.is_linear_mic and interf_tdoa > 180:
                interf_tdoa = 360.0 - interf_tdoa
            if np.abs(targ_tdoa - interf_tdoa) < args.minAD:
                continue
            interf_theta = np.pi * interf_ang / 180.0
            interf_dist  = random.uniform(args.min_interf_distance, args.max_interf_distance)

            interf_x      = mic_x + np.cos(interf_theta) * interf_dist
            interf_y      = mic_y + np.sin(interf_theta) * interf_dist
            interf_z      = mic_z

            ainterf_source = [interf_x, interf_y, interf_z]
            if (interf_x < (room_x - 0.5) and interf_x > 0.5) and (interf_y < (room_y - 0.5) and interf_y > 0.5):
                interf_angs.append(interf_ang)
                interf_dists.append(interf_dist)
                interf_source.append(ainterf_source)
            
            if len(interf_source) >= num_interf:
                break
                
        ##print("interf_ang = %d, interf_dist %.2f, num_interf = %d" % (interf_ang, interf_dist, len(interf_source)))

        for sim in range(args.nutt_per_room):
            if room_mix is not None:
                room_mix.sources = []
            if room_ref is not None:
                room_ref.sources = []
            if room_dir is not None:
                room_dir.sources = []
            
            ## Add Speech to microphone array
            while True:
                spe_idx = random.randint(0, spe_utt_size - 1)
                spe_key, spe_path = spe_utt_ids[spe_idx]

                spe_wav = audio_parser.WaveData(spe_path, sample_rate = args.sample_rate)
                if spe_wav is None or spe_wav.shape[0] < args.sample_rate:
                    continue
                spe_wav = np.squeeze(spe_wav)
                if np.mean(np.abs(spe_wav)) > 0:
                    break
            
            spe_length 	   = spe_wav.shape[0]
            spe_wav        = pra.normalize(spe_wav)
            spe_wav        = pra.highpass(spe_wav, args.sample_rate, 50)
            
            if room_mix is not None and room_mix.is_inside(target_source):
                room_mix = room_mix.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_mix")
                continue
            if room_ref is not None and room_ref.is_inside(target_source):
                room_ref = room_ref.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_ref")
            if room_dir is not None and room_dir.is_inside(target_source):
                room_dir = room_dir.add_source(target_source, signal = spe_wav, delay = 0)
            else:
                print("target_source not in room_dir")
                        
            if room_mix is not None and len(room_mix.sources) < 1:
                print("target_source not in room_mix")
                break
            if room_ref is not None and len(room_ref.sources) < 1:
                print("target_source not in room_ref")
                break
            if room_dir is not None and len(room_dir.sources) < 1:
                print("target_source not in room_dir")
                break
            
            ## Add Interference to microphone array
            for it in range(0, num_interf):
                while True:
                    inf_idx = random.randint(0, noise_utt_size - 1)
                    inf_path = noise_utt_ids[inf_idx]

                    inf_wav = audio_parser.WaveData(inf_path, sample_rate = args.sample_rate)
                    if inf_wav is None or inf_wav.shape[0] < args.sample_rate:
                        continue
                    inf_wav = np.squeeze(inf_wav)
                    if np.mean(np.abs(inf_wav)) > 0:
                        break
                
                inf_length = inf_wav.shape[0]
                inf_wav = pra.normalize(inf_wav)
                inf_wav = pra.highpass(inf_wav, args.sample_rate, 50)

                while(inf_length < spe_length):
                    inf_wav    = np.concatenate((inf_wav, inf_wav), axis = 0)
                    inf_length = inf_wav.shape[0]
                inf_wav = inf_wav[:spe_length]
                
                if room_mix is not None and room_mix.is_inside(interf_source[it]):
                    room_mix = room_mix.add_source(interf_source[it], signal = inf_wav, delay = 0)
                else:
                    print("interf_source not in room_mix")
                    continue

            if room_mix is not None and len(room_mix.sources) < 1:
                break

            ## Make the far-field mixture audio
            iSIR  = random.uniform(args.lowSIR, args.upSIR)
            room_mix.simulate(callback_mix = callback_mix, callback_mix_kwargs = {'snr': 30, 'sir': iSIR, 'n_src': num_interf + 1, 'n_tgt': 1, 'ref_mic': 0})
            
            mix_wav 				= room_mix.mic_array.signals.T	# (nchannel, nsample)
            mix_length, num_channel = mix_wav.shape
            
            ## Read diffuse noise
            if diffuse_utt_ids is not None:
                while True:
                    diff_idx = random.randint(0, diffuse_utt_size - 1)
                    diff_path = diffuse_utt_ids[diff_idx]

                    diff_wav = audio_parser.WaveData(diff_path, sample_rate = args.sample_rate, id_channel = list(range(0, num_channel)))
                    if diff_wav is None or diff_wav.shape[0] < args.sample_rate:
                        continue
                    if np.mean(np.abs(diff_wav)) > 0:
                        break
                
                dif_length, num_channel = diff_wav.shape
                '''
                for i in range(int(num_channel / 2)):
                    ch_wav = diff_wav[:, i]
                    diff_wav[:, i] = diff_wav[:, num_channel - i -1]
                    diff_wav[:, num_channel - i -1] = ch_wav
                '''
                
                ## Add diffuse noise into mix
                while( dif_length < mix_length ):
                    diff_wav    = np.concatenate((diff_wav, diff_wav), axis = 0)
                    dif_length = diff_wav.shape[0]
                diff_wav = diff_wav[0:mix_length, :]
                
                iSNR    = random.uniform(args.lowSNR, args.upSNR)
                mix_wav = audio_parser.MixWave(mix_wav, diff_wav, snr = iSNR)

            ## Adapt gain of mixture audio by given gain
            gain     = random.uniform(args.lowGain, args.upGain)
            scale	 = gain / np.max(np.abs(mix_wav))
            mix_wav  = mix_wav * scale
            mix_wav  = mix_wav * 32767.0
            mix_wav  = mix_wav.astype(np.int16)

            if room_dir is not None:
                ## Simulate directional signals
                room_dir.simulate()
                dir_wav = room_dir.mic_array.signals[0,:].T # (spe_length)
                dir_wav = dir_wav * scale
                dir_wav = dir_wav * 32767.0
                dir_wav = dir_wav.astype(np.int16)
            else:
                dir_wav = None

            if room_ref is not None:
                ## Simulate the clean far-field signal to make ref signal for compute metrics
                room_ref.simulate()
                ref_wav = room_ref.mic_array.signals 		 # (num_channel, spe_length)
                ref_wav = ref_wav * scale			  		 # (num_channel, spe_length)
            else:
                ref_wav = None
            
            if ref_wav is not None:
                if args.targ_bf is not None:
                    num_block = 1
                    ref_wav   = ref_wav[np.newaxis, :, :]    	 			 # [ num_block, num_channel, spe_length ]
                    ref_wav   = torch.FloatTensor(ref_wav)   	 		     # [ num_block, num_channel, spe_length ]
                    ref_wav   = ref_wav.view(num_block * num_channel, 1, -1) # [ num_block * num_channel, 1, spe_length ]

                    input_audio  = ref_wav.to(args.device)     		 # (num_block * num_channel, 1, spe_length)

                    mFFT  = args.convstft(input_audio)                # (num_block * num_channel, num_bin * 2, num_frame)

                    num_frame = mFFT.size(2)
                    mFFT   = mFFT.view(num_block, num_channel, num_bin * 2, -1) #( num_block, num_channel, num_bin * 2, num_frame)
                    mFFT_r = mFFT[:, :, :num_bin, :] 							#( num_block, num_channel, num_bin, num_frame)
                    mFFT_i = mFFT[:, :, num_bin:, :] 							#( num_block, num_channel, num_bin, num_frame)

                    mFFT_r = mFFT_r.permute([0, 3, 2, 1]).contiguous() 		    #( num_block, num_frame, num_bin, num_channel)
                    mFFT_i = mFFT_i.permute([0, 3, 2, 1]).contiguous()          #( num_block, num_frame, num_bin, num_channel)

                    mFFT_r = mFFT_r.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel)
                    mFFT_i = mFFT_i.view(num_block * num_frame, num_bin, num_channel) # ( num_block * num_frame, num_bin, num_channel)

                    mFFT = torch.cat([torch.unsqueeze(mFFT_r, 1), torch.unsqueeze(mFFT_i, 1)], dim = 1) # ( num_block * num_frame, 2, num_bin, num_channel )

                    # Compute the BF bf_direction_resolution
                    targ_tdoa = targ_ang
                    if num_channel == 2 or args.is_linear_mic:
                        if targ_tdoa > 180:
                            targ_tdoa = 360.0 - targ_tdoa
                    bf_beam = targ_tdoa / args.bf_direction_resolution + 0.5
                    bf_beam = int(bf_beam) % args.num_beam
                    print("tdoa = %d, beam = %d" % (targ_ang, bf_beam))

                    rFFT = args.targ_bf(mFFT, bf_beam) 				            # (num_block * num_frame, 2, num_bin, 1)
                    rFFT = rFFT[:, :, :, 0].view([num_block, -1, 2, num_bin])   # (num_block, num_frame, 2, num_bin)

                    rFFT    = rFFT.permute([0, 2, 3, 1]).contiguous()    # ( num_block, 2, num_bin, num_frame )
                    est_fft = torch.cat([rFFT[:,0], rFFT[:,1]], 1) 	     # ( num_block, num_bin * 2, num_frame )
                    ref_wav = args.convistft(est_fft)                    # ( num_block, 1, num_sample)
                    ref_wav = torch.squeeze(ref_wav, 1)                  # ( num_block, num_sample)
                    ref_wav = ref_wav[0, :]								 # ( num_sample)
                    ref_wav = ref_wav.data.cpu().numpy() 				 # ( num_sample)
                else:
                    ref_wav = ref_wav[0, :]								 # ( num_sample)
                
                ref_wav = ref_wav * 32767.0
                ref_wav = ref_wav.astype(np.int16)
            else:
                ref_wav = None
            
            ## Align mix_wav, ref_wav and dir_wav
            nsample = min(mix_wav.shape[0], ref_wav.shape[0], dir_wav.shape[0])
            mix_wav = mix_wav[:nsample]
            if ref_wav is not None:
                ref_wav = ref_wav[:nsample]
            if dir_wav is not None:
                dir_wav = dir_wav[:nsample]

            num_utts += 1

            _, spe_name, _ = file_parse.getFileInfo(spe_path)

            out_path = os.path.join(args.out_path, 'wav')
            if not os.path.exists(out_path):
                os.makedirs(out_path)
            
            if utt2data_dict is not None:
                data_key, data_id = utt2data_dict[spe_idx]
                out_path = os.path.join(out_path, data_id)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            else:
                data_id = 'data01'

            if utt2spk_dict is not None:
                spk_key, spk_id = utt2spk_dict[spe_idx]
                out_path = os.path.join(out_path, spk_id)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            else:
                spk_id = 'spk01'
                out_path = os.path.join(out_path, 'wav')
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
            
            spe_key = spe_key.replace('_', '').replace('-', '').replace('.', '')
            spk_id  = spk_id.replace('_', '').replace('-', '').replace('.', '')
            #utt_id = spk_id + "_" + spe_key + "%02d%07d" % (thread_id, num_utts)
            utt_id = spk_id + "_" + "%02d%07d" % (thread_id, num_utts)
            
            if mix_wav is not None:
                ## Write the mixture audio
                filename = "%s_id%02d%07d_Doa%d_SIR%.1f_SNR%.1f" % (spe_key, thread_id, num_utts, targ_ang, iSIR, iSNR)
                mix_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(mix_path, mix_wav, args.sample_rate)
            else:
                mix_path = None

            if dir_wav is not None:
                filename = "%s_id%02d%07d_Doa%d_DS" % (spe_key, thread_id, num_utts, targ_ang)
                ds_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(ds_path, dir_wav, args.sample_rate)
            else:
                ds_path = None
            
            if ref_wav is not None:
                filename = "%s_id%02d%07d_Doa%d_Ref" % (spe_key, thread_id, num_utts, targ_ang)
                ref_path = os.path.join(out_path, '%s.wav' % (filename) )
                audio_parser.WriteWave(ref_path, ref_wav, args.sample_rate)
            else:
                ref_path = None

            if text_dict is not None:
                text_key, text_value = text_dict[spe_idx]
            else:
                text_value = ' '
            
            noisy_scp_list.append((utt_id, mix_path, ds_path, ref_path, targ_ang, targ_dist, iSIR, iSNR, scale))
            noisy_utt2spk.append(spk_id)
            noisy_text_dict.append(text_value)

            info = (utt_id, spe_key, mix_path, ds_path, ref_path, targ_ang, targ_dist, interf_angs, interf_dists, iSIR, iSNR, scale)

            mix2info.append(info)
            
            print("%d / %d: %s" % (num_utts, num_make_utts, mix_path))

            if num_utts >= num_make_utts:
                return noisy_scp_list, noisy_utt2spk, noisy_text_dict, mix2info
Beispiel #16
0
import wave

import numpy as np
import pyroomacoustics as pra

from sound_source_separation import wave_writer, wave_loader

# The desired reverberation time and dimensions of the room
reverberation_time60 = 1.5  # seconds
room_dim = [7., 8., 9.]  # meters

# get room material parameter to achieve the desired reverberation time
e_absorption, max_order = pra.inverse_sabine(reverberation_time60, room_dim)
room = pra.ShoeBox(room_dim,
                   fs=16000,
                   materials=pra.Material(e_absorption),
                   max_order=max_order)

with wave.open("./CMU_ARCTIC/cmu_us_aew_arctic/wav/arctic_a0001.wav") as wav_speech_1, \
        wave.open("./CMU_ARCTIC/cmu_us_aew_arctic/wav/arctic_a0005.wav") as wav_speech_2:
    # place sources in the room
    speech_data_1 = wave_loader.load_to_mono_array(wav_speech_1)
    room.add_source([2.5, 4.90, 1.76], signal=speech_data_1)

    speech_data_2 = wave_loader.load_to_mono_array(wav_speech_2)
    room.add_source([4.5, 1.24, 8.76], signal=speech_data_2, delay=3.)

    # place the microphone array in the room
    mic_locs = np.c_[[6.3, 4.87, 1.2],  # mic 1
                     [6.3, 4.93, 1.2],  # mic 2
                     ]