예제 #1
0
 def rir(self, fname, fs=16000, rir_nsamps=4096, v=340, gpu=False):
     """
     Generate rir for current settings
     """
     if gpu:
         # self.beta: rt60
         beta = pygpurir.beta_SabineEstimation(self.size, self.beta)
         # NOTE: do not clear here
         # diff = pygpurir.att2t_SabineEstimator(15, self.beta)
         tmax = rir_nsamps / fs
         nb_img = pygpurir.t2n(tmax, self.size)
         # S x R x T
         rir = pygpurir.simulateRIR(self.size,
                                    beta,
                                    np.array(self.spos)[None, ...],
                                    np.array(self.rpos),
                                    nb_img,
                                    tmax,
                                    fs,
                                    mic_pattern="omni")
         write_wav(fname, rir[0], fs=fs)
     elif cpp_rir_available:
         # format float
         ffloat = lambda f: "{:.3f}".format(f)
         # location for each microphone
         loc_for_each_channel = [
             ",".join(map(ffloat, p)) for p in self.rpos
         ]
         beta = ",".join(map(ffloat, self.beta)) if isinstance(
             self.beta, list) else round(self.beta, 3)
         run_command(
             "rir-simulate --sound-velocity={v} --samp-frequency={sample_rate} "
             "--hp-filter=true --number-samples={rir_samples} --beta={beta} "
             "--room-topo={room_size} --receiver-location=\"{receiver_location}\" "
             "--source-location={source_location} {dump_dest}".format(
                 v=v,
                 sample_rate=fs,
                 rir_samples=rir_nsamps,
                 room_size=",".join(map(ffloat, self.size)),
                 beta=beta,
                 receiver_location=";".join(loc_for_each_channel),
                 source_location=",".join(map(ffloat, self.spos)),
                 dump_dest=fname))
     elif pyrirgen_available:
         rir = pyrirgen.generateRir(self.size,
                                    self.spos,
                                    self.rpos,
                                    soundVelocity=v,
                                    fs=fs,
                                    nDim=3,
                                    nSamples=rir_nsamps,
                                    nOrder=-1,
                                    reverbTime=self.beta,
                                    micType="omnidirectional",
                                    isHighPassFilter=True)
         if isinstance(rir, list):
             rir = np.stack(rir)
         write_wav(fname, rir, fs=fs)
     else:
         raise RuntimeError("Both rir-simulate and pyrirgen unavailable")
예제 #2
0
             [0, 0.3, 0.3 * 2, 0.3 * 3, 0.3 * 4, 0.3 * 5]]

# Generate RIRs
RIRs = np.zeros((0, 5))
for r in range(0, len(rooms)):
    room_sz = rooms[r]
    arr_centre = np.random.uniform(MIN_ARR2WALL,
                                   room_sz - MAX_ARR2WALL,
                                   size=[1, 3])
    arr = arr_centre[-1] * np.ones((3, 5))
    arr[0, 0] = arr_centre[0] - 0.08
    arr[0, 1] = arr_centre[0] - 0.04
    arr[0, 2] = arr_centre[0]
    arr[0, 3] = arr_centre[0] + 0.04
    arr[0, 4] = arr_centre[0] + 0.08
    arr[1, :] = arr_centre[1]
    pos_rcv = arr.T
    for rt in range(0, 6):
        T60 = rev_times[r]
        dist = distances[r]
        for ang in tqdm(range(0, 37)):
            x = np.cos(angles[ang]) * dist + arr_centre[0, 0]
            y = np.sin(angles[ang]) * dist + arr_centre[0, 1]
            pos_src = np.array([[x, y, 1.5]])
            beta = gpuRIR.beta_SabineEstimation(
                room_sz, T60[rt])  # Reflection coefficients
            nb_img = gpuRIR.t2n(
                Tdiff, room_sz)  # Number of image sources in each dimension
            _rirs = gpuRIR.simulateRIR(room_sz, beta, pos_src, pos_rcv, nb_img,
                                       Tmax, fs).reshape(5, 9600).T
            RIRs = np.concatenate((RIRs, _rirs))
예제 #3
0
fs = 16000.0  # Sampling frequency [Hz]

pos_src = np.random.rand(nb_src, 3) * room_sz
pos_rcv = np.random.rand(nb_rcv, 3) * room_sz

time_max = 100  # Stop the measurements after find an average time greter than this time [s]
times = np.zeros((len(T60_vec), 1))
for i in range(len(T60_vec)):
    T60 = T60_vec[i]
    start_time = time.time()

    for j in range(nb_test_per_point):
        beta = gpuRIR.beta_SabineEstimation(room_sz, T60)
        Tdiff = gpuRIR.att2t_SabineEstimator(att_diff, T60)
        Tmax = gpuRIR.att2t_SabineEstimator(att_max, T60)
        nb_img = gpuRIR.t2n(Tdiff, room_sz)
        RIRs = gpuRIR.simulateRIR(room_sz,
                                  beta,
                                  pos_src,
                                  pos_rcv,
                                  nb_img,
                                  Tmax,
                                  fs,
                                  Tdiff=Tdiff)

    times[i] = (time.time() - start_time) / nb_test_per_point

    if times[i] > time_max:
        break

print(times.transpose())
    def simulate(self):
        """ Get the array recording using gpuRIR to perform the acoustic simulations.
		"""
        if self.T60 == 0:
            Tdiff = 0.1
            Tmax = 0.1
            nb_img = [1, 1, 1]
        else:
            Tdiff = gpuRIR.att2t_SabineEstimator(
                12, self.T60)  # Use ISM until the RIRs decay 12dB
            Tmax = gpuRIR.att2t_SabineEstimator(
                40, self.T60)  # Use diffuse model until the RIRs decay 40dB
            if self.T60 < 0.15:
                Tdiff = Tmax  # Avoid issues with too short RIRs
            nb_img = gpuRIR.t2n(Tdiff, self.room_sz)

        nb_mics = len(self.mic_pos)
        nb_traj_pts = len(self.traj_pts)
        nb_gpu_calls = min(
            int(
                np.ceil(self.fs * Tdiff * nb_mics * nb_traj_pts *
                        np.prod(nb_img) / 1e9)), nb_traj_pts)
        traj_pts_batch = np.ceil(nb_traj_pts / nb_gpu_calls *
                                 np.arange(0, nb_gpu_calls + 1)).astype(int)

        RIRs_list = [
            gpuRIR.simulateRIR(
                self.room_sz,
                self.beta,
                self.traj_pts[traj_pts_batch[0]:traj_pts_batch[1], :],
                self.mic_pos,
                nb_img,
                Tmax,
                self.fs,
                Tdiff=Tdiff,
                orV_rcv=self.array_setup.mic_orV,
                mic_pattern=self.array_setup.mic_pattern)
        ]
        for i in range(1, nb_gpu_calls):
            RIRs_list += [
                gpuRIR.simulateRIR(
                    self.room_sz,
                    self.beta,
                    self.traj_pts[traj_pts_batch[i]:traj_pts_batch[i + 1], :],
                    self.mic_pos,
                    nb_img,
                    Tmax,
                    self.fs,
                    Tdiff=Tdiff,
                    orV_rcv=self.array_setup.mic_orV,
                    mic_pattern=self.array_setup.mic_pattern)
            ]
        RIRs = np.concatenate(RIRs_list, axis=0)
        mic_signals = gpuRIR.simulateTrajectory(self.source_signal,
                                                RIRs,
                                                timestamps=self.timestamps,
                                                fs=self.fs)
        mic_signals = mic_signals[0:len(self.t), :]

        # Omnidirectional noise
        dp_RIRs = gpuRIR.simulateRIR(self.room_sz,
                                     self.beta,
                                     self.traj_pts,
                                     self.mic_pos, [1, 1, 1],
                                     0.1,
                                     self.fs,
                                     orV_rcv=self.array_setup.mic_orV,
                                     mic_pattern=self.array_setup.mic_pattern)
        dp_signals = gpuRIR.simulateTrajectory(self.source_signal,
                                               dp_RIRs,
                                               timestamps=self.timestamps,
                                               fs=self.fs)
        ac_pow = np.mean([
            acoustic_power(dp_signals[:, i])
            for i in range(dp_signals.shape[1])
        ])
        noise = np.sqrt(
            ac_pow / 10**(self.SNR / 10)) * np.random.standard_normal(
                mic_signals.shape)
        mic_signals += noise

        # Apply the propagation delay to the VAD information if it exists
        if hasattr(self, 'source_vad'):
            vad = gpuRIR.simulateTrajectory(self.source_vad,
                                            dp_RIRs,
                                            timestamps=self.timestamps,
                                            fs=self.fs)
            self.vad = vad[0:len(self.t), :].mean(
                axis=1) > vad[0:len(self.t), :].max() * 1e-3

        return mic_signals
예제 #5
0
def generate_data(output_path='',
                  dataset='adhoc',
                  libri_path='/hdd/data/Librispeech/LibriSpeech',
                  noise_path='/hdd/data/Nonspeech'):
    assert dataset in ['adhoc', 'fixed'], "dataset can only be adhoc or fixed."

    if output_path == '':
        output_path = os.getcwd()

    data_type = ['train', 'validation', 'test']
    for i in range(len(data_type)):
        # path for config
        config_path = os.path.join(
            'configs', 'MC_Libri_' + dataset + '_' + data_type[i] + '.pkl')

        # load pickle file
        with open(config_path, 'rb') as f:
            configs = pickle.load(f)

        # sample rate is 16k Hz
        sr = 16000
        # signal length is 4 sec
        sig_len = 4

        # generate and save audio
        save_dir = os.path.join(output_path, 'MC_Libri_' + dataset,
                                data_type[i])
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        for utt in range(len(configs)):
            this_config = configs[utt]

            # load audio files
            speakers = this_config['speech']
            noise = this_config['noise']
            spk1, _ = sf.read(os.path.join(libri_path, speakers[0]))
            spk2, _ = sf.read(os.path.join(libri_path, speakers[1]))
            noise, _ = sf.read(os.path.join(noise_path, noise))

            # calculate signal length according to overlap ratio
            overlap_ratio = this_config['overlap_ratio']
            actual_len = int(sig_len / (2 - overlap_ratio)) * sr
            overlap = int(actual_len * overlap_ratio)

            # truncate speech according to start and end indexes
            start_idx = this_config['start_idx']
            end_idx = this_config['end_idx']
            spk1 = spk1[start_idx:end_idx]
            spk2 = spk2[start_idx:end_idx]

            # rescaling spk2 energy according to relative SNR
            spk1 = spk1 / np.sqrt(np.sum(spk1**2) + 1e-8) * 1e2
            spk2 = spk2 / np.sqrt(np.sum(spk2**2) + 1e-8) * 1e2
            spk2 = spk2 * np.power(10, this_config['spk_snr'] / 20.)

            # load locations and room configs
            mic_pos = np.asarray(this_config['mic_pos'])
            spk_pos = np.asarray(this_config['spk_pos'])
            noise_pos = np.asarray(this_config['noise_pos'])
            room_size = np.asarray(this_config['room_size'])
            rt60 = this_config['RT60']

            # generate RIR
            beta = gpuRIR.beta_SabineEstimation(room_size, rt60)
            nb_img = gpuRIR.t2n(rt60, room_size)
            spk_rir = gpuRIR.simulateRIR(room_size, beta, spk_pos, mic_pos,
                                         nb_img, rt60, sr)
            noise_rir = gpuRIR.simulateRIR(room_size, beta, noise_pos, mic_pos,
                                           nb_img, rt60, sr)

            # convolve with RIR at different mic
            if dataset == 'adhoc':
                nmic = this_config['num_mic']
            else:
                nmic = 6
            for mic in range(nmic):
                spk1_echoic_sig = signal.fftconvolve(spk1, spk_rir[0][mic])
                spk2_echoic_sig = signal.fftconvolve(spk2, spk_rir[1][mic])

                # align the speakers according to overlap ratio
                actual_length = len(spk1_echoic_sig)
                total_length = actual_length * 2 - overlap
                padding = np.zeros(actual_length - overlap)
                spk1_echoic_sig = np.concatenate([spk1_echoic_sig, padding])
                spk2_echoic_sig = np.concatenate([padding, spk2_echoic_sig])
                mixture = spk1_echoic_sig + spk2_echoic_sig

                # add noise
                noise = noise[:total_length]
                if len(noise) < total_length:
                    # repeat noise if necessary
                    num_repeat = total_length // len(noise)
                    res = total_length - num_repeat * len(noise)
                    noise = np.concatenate(
                        [np.concatenate([noise] * num_repeat), noise[:res]])
                noise = signal.fftconvolve(noise, noise_rir[0][mic])

                # rescaling noise energy
                noise = noise[:total_length]
                noise = noise / np.sqrt(np.sum(noise**2) + 1e-8) * np.sqrt(
                    np.sum(mixture**2) + 1e-8)
                noise = noise / np.power(10, this_config['noise_snr'] / 20.)

                mixture += noise

                # save waveforms
                this_save_dir = os.path.join(save_dir,
                                             str(nmic) + 'mic',
                                             'sample' + str(utt + 1))
                if not os.path.exists(this_save_dir):
                    os.makedirs(this_save_dir)
                sf.write(
                    os.path.join(this_save_dir,
                                 'spk1_mic' + str(mic + 1) + '.wav'),
                    spk1_echoic_sig, sr)
                sf.write(
                    os.path.join(this_save_dir,
                                 'spk2_mic' + str(mic + 1) + '.wav'),
                    spk2_echoic_sig, sr)
                sf.write(
                    os.path.join(this_save_dir,
                                 'mixture_mic' + str(mic + 1) + '.wav'),
                    mixture, sr)

            # print progress
            if (utt + 1) % (len(configs) // 5) == 0:
                print(
                    "{} configuration, {} set, {:d} out of {:d} utterances generated."
                    .format(dataset, data_type[i], utt + 1, len(configs)))
예제 #6
0
def generate_data(output_path='',
                  avoid_clipping=0,
                  dataset='adhoc',
                  libri_path='/home/yi/data/Librispeech',
                  noise_path='/home/yi/data/Nonspeech'):
    assert dataset in ['adhoc', 'fixed'], "dataset can only be adhoc or fixed."

    if output_path == '':
        output_path = os.getcwd()

    data_type = ['train', 'validation', 'test']
    for i in range(len(data_type)):
        # path for config
        config_path = os.path.join(
            'configs', 'MC_Libri_' + dataset + '_' + data_type[i] + '.pkl')

        # load pickle file
        with open(config_path, 'rb') as f:
            configs = pickle.load(f)

        # sample rate is 16k Hz
        sr = 16000
        # signal length is 4 sec
        sig_len = 4

        for utt in range(len(configs)):
            this_config = configs[utt]

            # load audio files
            speakers = this_config['speech']
            noise = this_config['noise']
            spk1, _ = sf.read(os.path.join(libri_path, speakers[0]))
            spk2, _ = sf.read(os.path.join(libri_path, speakers[1]))
            noise, _ = sf.read(os.path.join(noise_path, noise))

            # calculate signal length according to overlap ratio
            overlap_ratio = this_config['overlap_ratio']
            actual_len = int(sig_len / (2 - overlap_ratio) * sr)
            overlap = int(actual_len * overlap_ratio)

            # truncate speech according to start and end indexes
            start_idx = this_config['start_idx']
            end_idx = start_idx + actual_len
            spk1 = spk1[start_idx:end_idx]
            spk2 = spk2[start_idx:end_idx]

            # rescaling speaker and noise energy according to relative SNR
            spk1 = spk1 / np.sqrt(np.sum(spk1**2) + 1e-8) * 1e2
            spk2 = spk2 / np.sqrt(np.sum(spk2**2) + 1e-8) * 1e2
            spk2 = spk2 * np.power(10, this_config['spk_snr'] / 20.)
            # repeat noise if necessary
            noise = noise[:int(sig_len * sr)]
            if len(noise) < int(sig_len * sr):
                num_repeat = int(sig_len * sr) // len(noise)
                res = int(sig_len * sr) - num_repeat * len(noise)
                noise = np.concatenate(
                    [np.concatenate([noise] * num_repeat), noise[:res]])
            # rescale noise energy w.r.t mixture energy
            noise = noise / np.sqrt(np.sum(noise**2) + 1e-8) * np.sqrt(
                np.sum((spk1 + spk2)**2) + 1e-8)
            noise = noise / np.power(10, this_config['noise_snr'] / 20.)

            # load locations and room configs
            mic_pos = np.asarray(this_config['mic_pos'])
            spk_pos = np.asarray(this_config['spk_pos'])
            noise_pos = np.asarray(this_config['noise_pos'])
            room_size = np.asarray(this_config['room_size'])
            rt60 = this_config['RT60']
            num_mic = len(mic_pos)

            # generate RIR
            beta = gpuRIR.beta_SabineEstimation(room_size, rt60)
            nb_img = gpuRIR.t2n(rt60, room_size)
            spk_rir = gpuRIR.simulateRIR(room_size, beta, spk_pos, mic_pos,
                                         nb_img, rt60, sr)
            noise_rir = gpuRIR.simulateRIR(room_size, beta, noise_pos, mic_pos,
                                           nb_img, rt60, sr)

            # convolve with RIR at different mic
            echoic_spk1 = []
            echoic_spk2 = []
            echoic_mixture = []

            if dataset == 'adhoc':
                nmic = this_config['num_mic']
            else:
                nmic = 6
            for mic in range(nmic):
                spk1_echoic_sig = signal.fftconvolve(spk1, spk_rir[0][mic])
                spk2_echoic_sig = signal.fftconvolve(spk2, spk_rir[1][mic])
                noise_echoic_sig = signal.fftconvolve(noise, noise_rir[0][mic])

                # align the speakers according to overlap ratio
                pad_length = int((1 - overlap_ratio) * actual_len)
                padding = np.zeros(pad_length)
                spk1_echoic_sig = np.concatenate([spk1_echoic_sig, padding])
                spk2_echoic_sig = np.concatenate([padding, spk2_echoic_sig])

                # pad or truncate length to 4s if necessary
                def pad_sig(x):
                    if len(x) < sig_len * sr:
                        zeros = np.zeros(sig_len * sr - len(x))
                        return np.concatenate([x, zeros])
                    else:
                        return x[:sig_len * sr]

                spk1_echoic_sig = pad_sig(spk1_echoic_sig)
                spk2_echoic_sig = pad_sig(spk2_echoic_sig)
                noise_echoic_sig = pad_sig(noise_echoic_sig)

                # sum up for mixture
                mixture = spk1_echoic_sig + spk2_echoic_sig + noise_echoic_sig

                if avoid_clipping:
                    # avoid clipping
                    max_scale = np.max([
                        np.max(np.abs(mixture)),
                        np.max(np.abs(spk1_echoic_sig)),
                        np.max(np.abs(spk2_echoic_sig))
                    ])
                    mixture = mixture / max_scale * 0.9
                    spk1_echoic_sig = spk1_echoic_sig / max_scale * 0.9
                    spk2_echoic_sig = spk2_echoic_sig / max_scale * 0.9

                # save waveforms
                this_save_dir = os.path.join(output_path,
                                             'MC_Libri_' + dataset,
                                             data_type[i],
                                             str(num_mic) + 'mic',
                                             'sample' + str(utt + 1))
                if not os.path.exists(this_save_dir):
                    os.makedirs(this_save_dir)
                sf.write(
                    os.path.join(this_save_dir,
                                 'spk1_mic' + str(mic + 1) + '.wav'),
                    spk1_echoic_sig, sr)
                sf.write(
                    os.path.join(this_save_dir,
                                 'spk2_mic' + str(mic + 1) + '.wav'),
                    spk2_echoic_sig, sr)
                sf.write(
                    os.path.join(this_save_dir,
                                 'mixture_mic' + str(mic + 1) + '.wav'),
                    mixture, sr)

            # print progress
            if (utt + 1) % (len(configs) // 5) == 0:
                print(
                    "{} configuration, {} set, {:d} out of {:d} utterances generated."
                    .format(dataset, data_type[i], utt + 1, len(configs)))