def generate_waveforms(wav, h_list): data_list = [] target_list = [] for i_h, h in enumerate(h_list): y = au.convolve(wav, h) y_length = au.next_power_of_two(np.size(y)) data = au.pad_to(y, y_length, 0) target = au.pad_to(h, y_length, 0) target_list.append(target) data_list.append(data) return np.array(target_list), np.array(data_list)
def main(): n_mfcc = 40 model_dir = '../models' model = Model(model_dir) signal, rate = au.read_wav('../../audio/trapphus.wav') signal_segment_list = au.split_signal(signal, rate=rate, segment_length=60000, min_energy=100, max_energy=4, debug=False) signal_segment_list = [ au.pad_to(segment, 2**16) for segment in signal_segment_list ] mfccs = [ au.waveform_to_mfcc(segment, rate, n_mfcc)[1][:, :-1] for segment in signal_segment_list ] nw_input = preprocess(mfccs) nw_output = model.forward(nw_input) rir_list = postprocess(nw_output, 0, True) rir_list_2 = postprocess(nw_output, 20, True) plt.show()
def __next__(self): if self.i_total == self.n_total: raise StopIteration i_produced = 0 h_list = [] info_list = [] peaks_list = [] self.terminate_dead_proc() room = self.output.get() self.processes.append(mp.Process(target=self.compute_room_proc)) new_proc = self.processes[-1] new_proc.start() for i_rir, rir in enumerate(room.rir): cut_rir = remove_leading_zeros(list(rir[0])) rir_length = len(cut_rir) peaks = room.peaks[i_rir] peaks_length = len(peaks[0]) if rir_length > self.h_length: self.discarded += 1 return self.__next__() else: rir = au.pad_to(cut_rir, self.h_length, 0) h_list.append(rir) if peaks_length < self.min_peaks_length: self.discarded += 1 return self.__next__() #else: # times = au.pad_to(peaks[0], self.peaks_length, np.max(peaks[0])) # alphas = au.pad_to(peaks[1], self.peaks_length, np.min(peaks[1])) # peaks = [times, alphas] peaks_list.append(peaks) info_list.append([ room.corners, room.absorption, room.mic_array.R[:, i_rir], room.sources[0].position ]) interrupted = False i_produced += 1 if self.i_total + i_produced == self.n_total: for process in self.processes: process.terminate() break if interrupted: for process in self.processes: process.terminate() print('Terminated processes') sys.exit() self.i_total += i_produced return h_list, peaks_list, info_list
def convolve_and_pad(wav, h_list): data_list = [] for i_h, h in enumerate(h_list): y = au.convolve(wav, h) y_length = au.next_power_of_two(np.size(y)) data = au.pad_to(y, y_length, 0) data_list.append(data) return np.array(data_list)
def generate_monoband_rir(x, y, z, mic_pos, source_pos, fs, max_order, abs_coeffs): absorption = rg.get_absorption_by_index(abs_coeffs, 3) room = pyroomacoustics.room.ShoeBox([x, y, z], fs, absorption=absorption, max_order=max_order) room.add_source(source_pos) room.add_microphone_array(pyroomacoustics.MicrophoneArray(mic_pos.T, fs=fs)) room.compute_rir() rir = room.rir[0][0] ind_1st_nonzero = next((i for i, x in enumerate(rir) if x), None) rir = au.pad_to(rir[ind_1st_nonzero:], 2**15) return rir
def pad_list_to_pow2(h_list): longest_irf = len(max(h_list, key=len)) target_length = au.next_power_of_two(longest_irf) h_list = [au.pad_to(h, target_length) for h in h_list] return h_list
def main(): net_timeconv, _ = misc.load_latest('/home/felix/rirnet/timeconv/models', 'net') net_peaks_ae, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16', 'autoencoder') net_peaks_ext, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16', 'extractor') x, y, z = 6, 9, 3 mic_pos = rg.generate_pos_in_rect(x, y, z, 1) source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0] fs_peaks = 44100 fs_timeconv = 16384 n_fft = 128 sound_engine = SoundEngine('/home/felix/rirnet/audio/chamber/val', fs_peaks) material_engine = MaterialEngine('/home/felix/rirnet/wip/materials.csv', '/home/felix/rirnet/wip/surfaces.csv') abs_coeffs = material_engine.random() multiband_rir = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos, fs_timeconv, 60, abs_coeffs)[0] monoband_rir = generate_monoband_rir(x, y, z, mic_pos, source_pos, fs_peaks, 8, abs_coeffs) an_sig_peaks = sound_engine.random() an_sig_timeconv = au.resample(an_sig_peaks, fs_peaks, fs_timeconv) rev_sig_multi = au.convolve(multiband_rir, an_sig_timeconv) _, _, rev_sig_multi_spectrogram = sp.signal.stft(rev_sig_multi, fs=fs_timeconv, nfft=n_fft, nperseg=n_fft) input_timeconv = torch.from_numpy( -np.log(np.abs(rev_sig_multi_spectrogram))).unsqueeze(0).float() rev_sig_mono = au.pad_to(au.convolve(monoband_rir, an_sig_peaks), 2**16) input_peaks = preprocess_peaks(rev_sig_mono, fs_peaks) with torch.no_grad(): output_timeconv = net_timeconv(input_timeconv).squeeze().numpy() output_peaks = net_peaks_ae(net_peaks_ext(input_peaks), decode=True).squeeze().numpy() plt.figure() plt.imshow(output_timeconv) plt.show() phase = np.exp(1j * np.random.uniform( low=-np.pi, high=np.pi, size=np.shape(output_timeconv))) _, output_timeconv = sp.signal.istft(output_timeconv * phase, fs_timeconv, nfft=n_fft, nperseg=n_fft) plt.subplot(221) plt.plot(output_timeconv) plt.subplot(222) rev_output = au.convolve(output_timeconv, an_sig_timeconv) plt.plot(rev_output / np.max(np.abs(rev_output))) #plt.scatter(output_peaks[0], output_peaks[1]) plt.subplot(223) plt.plot(multiband_rir) plt.subplot(224) plt.plot(rev_sig_multi / np.max(np.abs(rev_sig_multi))) plt.show() au.save_wav('synthetic.wav', rev_output, fs_timeconv, True) au.save_wav('tru.wav', rev_sig_multi, fs_timeconv, True)