Esempio n. 1
0
def generate_waveforms(wav, h_list):
    data_list = []
    target_list = []

    for i_h, h in enumerate(h_list):
        y = au.convolve(wav, h)
        y_length = au.next_power_of_two(np.size(y))
        data = au.pad_to(y, y_length, 0)
        target = au.pad_to(h, y_length, 0)
        target_list.append(target)
        data_list.append(data)
    return np.array(target_list), np.array(data_list)
Esempio n. 2
0
def main():
    n_mfcc = 40
    model_dir = '../models'
    model = Model(model_dir)

    signal, rate = au.read_wav('../../audio/trapphus.wav')
    signal_segment_list = au.split_signal(signal,
                                          rate=rate,
                                          segment_length=60000,
                                          min_energy=100,
                                          max_energy=4,
                                          debug=False)
    signal_segment_list = [
        au.pad_to(segment, 2**16) for segment in signal_segment_list
    ]
    mfccs = [
        au.waveform_to_mfcc(segment, rate, n_mfcc)[1][:, :-1]
        for segment in signal_segment_list
    ]
    nw_input = preprocess(mfccs)
    nw_output = model.forward(nw_input)
    rir_list = postprocess(nw_output, 0, True)
    rir_list_2 = postprocess(nw_output, 20, True)

    plt.show()
Esempio n. 3
0
    def __next__(self):
        if self.i_total == self.n_total:
            raise StopIteration

        i_produced = 0
        h_list = []
        info_list = []
        peaks_list = []
        self.terminate_dead_proc()
        room = self.output.get()
        self.processes.append(mp.Process(target=self.compute_room_proc))
        new_proc = self.processes[-1]
        new_proc.start()

        for i_rir, rir in enumerate(room.rir):

            cut_rir = remove_leading_zeros(list(rir[0]))
            rir_length = len(cut_rir)
            peaks = room.peaks[i_rir]
            peaks_length = len(peaks[0])

            if rir_length > self.h_length:
                self.discarded += 1
                return self.__next__()
            else:
                rir = au.pad_to(cut_rir, self.h_length, 0)

                h_list.append(rir)
                if peaks_length < self.min_peaks_length:
                    self.discarded += 1
                    return self.__next__()
                #else:
                #    times = au.pad_to(peaks[0], self.peaks_length, np.max(peaks[0]))
                #    alphas = au.pad_to(peaks[1], self.peaks_length, np.min(peaks[1]))
                #    peaks = [times, alphas]

                peaks_list.append(peaks)
                info_list.append([
                    room.corners, room.absorption, room.mic_array.R[:, i_rir],
                    room.sources[0].position
                ])

                interrupted = False

                i_produced += 1
                if self.i_total + i_produced == self.n_total:
                    for process in self.processes:
                        process.terminate()
                    break
                if interrupted:
                    for process in self.processes:
                        process.terminate()
                    print('Terminated processes')
                    sys.exit()
        self.i_total += i_produced
        return h_list, peaks_list, info_list
Esempio n. 4
0
def convolve_and_pad(wav, h_list):
    data_list = []

    for i_h, h in enumerate(h_list):
        y = au.convolve(wav, h)
        y_length = au.next_power_of_two(np.size(y))
        data = au.pad_to(y, y_length, 0)
        data_list.append(data)

    return np.array(data_list)
Esempio n. 5
0
def generate_monoband_rir(x, y, z, mic_pos, source_pos, fs, max_order, abs_coeffs):
    absorption = rg.get_absorption_by_index(abs_coeffs, 3)
    room = pyroomacoustics.room.ShoeBox([x, y, z], fs, absorption=absorption, max_order=max_order)
    room.add_source(source_pos)
    room.add_microphone_array(pyroomacoustics.MicrophoneArray(mic_pos.T, fs=fs))
    room.compute_rir()
    rir = room.rir[0][0]
    ind_1st_nonzero = next((i for i, x in enumerate(rir) if x), None)
    rir = au.pad_to(rir[ind_1st_nonzero:], 2**15)
    return rir
Esempio n. 6
0
def pad_list_to_pow2(h_list):
    longest_irf = len(max(h_list, key=len))
    target_length = au.next_power_of_two(longest_irf)
    h_list = [au.pad_to(h, target_length) for h in h_list]
    return h_list
Esempio n. 7
0
def main():
    net_timeconv, _ = misc.load_latest('/home/felix/rirnet/timeconv/models',
                                       'net')
    net_peaks_ae, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16',
                                       'autoencoder')
    net_peaks_ext, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16',
                                        'extractor')

    x, y, z = 6, 9, 3
    mic_pos = rg.generate_pos_in_rect(x, y, z, 1)
    source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0]
    fs_peaks = 44100
    fs_timeconv = 16384
    n_fft = 128

    sound_engine = SoundEngine('/home/felix/rirnet/audio/chamber/val',
                               fs_peaks)
    material_engine = MaterialEngine('/home/felix/rirnet/wip/materials.csv',
                                     '/home/felix/rirnet/wip/surfaces.csv')
    abs_coeffs = material_engine.random()

    multiband_rir = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos,
                                               fs_timeconv, 60, abs_coeffs)[0]
    monoband_rir = generate_monoband_rir(x, y, z, mic_pos, source_pos,
                                         fs_peaks, 8, abs_coeffs)

    an_sig_peaks = sound_engine.random()
    an_sig_timeconv = au.resample(an_sig_peaks, fs_peaks, fs_timeconv)

    rev_sig_multi = au.convolve(multiband_rir, an_sig_timeconv)
    _, _, rev_sig_multi_spectrogram = sp.signal.stft(rev_sig_multi,
                                                     fs=fs_timeconv,
                                                     nfft=n_fft,
                                                     nperseg=n_fft)
    input_timeconv = torch.from_numpy(
        -np.log(np.abs(rev_sig_multi_spectrogram))).unsqueeze(0).float()

    rev_sig_mono = au.pad_to(au.convolve(monoband_rir, an_sig_peaks), 2**16)
    input_peaks = preprocess_peaks(rev_sig_mono, fs_peaks)

    with torch.no_grad():
        output_timeconv = net_timeconv(input_timeconv).squeeze().numpy()
        output_peaks = net_peaks_ae(net_peaks_ext(input_peaks),
                                    decode=True).squeeze().numpy()
        plt.figure()
        plt.imshow(output_timeconv)
        plt.show()
    phase = np.exp(1j * np.random.uniform(
        low=-np.pi, high=np.pi, size=np.shape(output_timeconv)))
    _, output_timeconv = sp.signal.istft(output_timeconv * phase,
                                         fs_timeconv,
                                         nfft=n_fft,
                                         nperseg=n_fft)

    plt.subplot(221)
    plt.plot(output_timeconv)
    plt.subplot(222)
    rev_output = au.convolve(output_timeconv, an_sig_timeconv)
    plt.plot(rev_output / np.max(np.abs(rev_output)))
    #plt.scatter(output_peaks[0], output_peaks[1])
    plt.subplot(223)
    plt.plot(multiband_rir)
    plt.subplot(224)
    plt.plot(rev_sig_multi / np.max(np.abs(rev_sig_multi)))
    plt.show()

    au.save_wav('synthetic.wav', rev_output, fs_timeconv, True)
    au.save_wav('tru.wav', rev_sig_multi, fs_timeconv, True)