def main(): net, _ = misc.load_latest('/home/eriklarsson/rirnet/timeconv/models', 'net') fs = 16384 n_fft = 128 sound_engine = SoundEngine('/home/eriklarsson/rirnet/audio/chamber/val', 44100) anechoic_signal = sound_engine.random() rir_real, _ = au.read_wav('/home/eriklarsson/rirnet/audio/rirs/lecture.wav', 44100) rir_real = rir_real[:44100//2] rev_real = au.resample(au.convolve(rir_real, anechoic_signal), 44100, fs) _, _, rev_spectrogram = sp.signal.stft(rev_real, fs=fs, nfft=n_fft, nperseg=n_fft) net_input = torch.from_numpy(-np.log(np.abs(rev_spectrogram))).unsqueeze(0).float() with torch.no_grad(): net_output = net(net_input).squeeze().numpy() phase = np.exp(1j*np.random.uniform(low = -np.pi, high = np.pi, size = np.shape(net_output))) _, rir_net = sp.signal.istft(net_output*phase, fs, nfft=n_fft, nperseg=n_fft) plt.imshow(net_output) plt.show() rir_net = au.resample(rir_net, fs, 44100) anechoic_test, _ = au.read_wav('/home/eriklarsson/rirnet/audio/harvard/male.wav') anechoic_test = anechoic_test[250000:400000,0] rev_real_test = au.convolve(rir_real, anechoic_test) rev_net_test = au.convolve(rir_net, anechoic_test) au.save_wav('real.wav', rev_real_test, 44100, True) au.save_wav('net.wav', rev_net_test, 44100, True)
def reconstruct_rir_conv(time, alpha): fdl = 81 fdl2 = (fdl-1) // 2 time = (time.astype('double')+1)*1024 alpha = np.exp(-alpha).astype('double') signs = np.random.randint(0,2, len(alpha))*2-1 #alpha *= signs ir = np.arange(np.ceil((1.05*time.max()) + fdl))*0 inds = np.argsort(time) time = np.round(time[inds]).astype(int) alpha = alpha[inds] print(time) peaks = np.zeros(np.max(time)+1) for n, t in enumerate(time): peaks[t] += alpha[n] #peaks[time] = alpha ir = au.convolve(peaks, np.hanning(fdl)*np.sinc(np.linspace(-41, 41, 81))) start_ind = min(np.where(ir > 10**(-10))[0]) ir = ir[start_ind:] return ir
def convolve_and_pad(wav, h_list): data_list = [] for i_h, h in enumerate(h_list): y = au.convolve(wav, h) y_length = au.next_power_of_two(np.size(y)) data = au.pad_to(y, y_length, 0) data_list.append(data) return np.array(data_list)
def generate_waveforms(wav, h_list): data_list = [] target_list = [] for i_h, h in enumerate(h_list): y = au.convolve(wav, h) y_length = au.next_power_of_two(np.size(y)) data = au.pad_to(y, y_length, 0) target = au.pad_to(h, y_length, 0) target_list.append(target) data_list.append(data) return np.array(target_list), np.array(data_list)
def run(self): self.extractor.eval() with torch.no_grad(): for batch_idx, (source, target) in enumerate(self.eval_loader): source, target = source.to(self.device), target[0].numpy() latent_source = self.extractor(source) output = self.autoencoder(latent_source, encode=False, decode=True)[0].cpu().numpy() filled_times_output, filled_alphas_output = misc.fill_peaks(output[0,:], output[1,:]) filled_times_target, filled_alphas_target = misc.fill_peaks(target[0,:], target[1,:]) output_rir = misc.reconstruct_rir(filled_times_output, filled_alphas_output) target_rir = misc.reconstruct_rir(filled_times_target, filled_alphas_target) rev_signal_output = au.convolve(self.audio_anechoic, output_rir) rev_signal_target = au.convolve(self.audio_anechoic, target_rir) au.save_wav('output.wav', rev_signal_output, self.fs, 1) au.save_wav('target.wav', rev_signal_target, self.fs, 1) au.play_file('output.wav') au.play_file('target.wav')
def generate_spectrograms(queue, args): x_max, y_max, z_max, n_mics, n_per_seg, max_order, fs, material_engine, sound_engine = args np.random.seed() x, y, z = np.random.rand(3) * (np.array([x_max, y_max, z_max]) - 2.5) + 2.5 mic_pos = rg.generate_pos_in_rect(x, y, z, n_mics) source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0] absorption = material_engine.random() an_sig = sound_engine.random() rir_list = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos, fs, max_order, absorption) rev_sig_spectrograms = [] rir_spectrograms = [] for rir in rir_list: rev_sig = au.convolve(rir, an_sig) _, _, rir_spectrogram = sp.signal.stft(rir, fs=fs, nperseg=n_per_seg) _, _, rev_sig_spectrogram = sp.signal.stft(rev_sig, fs=fs, nperseg=n_per_seg) rev_sig_spectrograms.append(np.abs(rev_sig_spectrogram)) rir_spectrograms.append(np.abs(rir_spectrogram)) queue.put([rev_sig_spectrograms, rir_spectrograms])
def main(): net_timeconv, _ = misc.load_latest('/home/felix/rirnet/timeconv/models', 'net') net_peaks_ae, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16', 'autoencoder') net_peaks_ext, _ = misc.load_latest('/home/felix/rirnet/nanonet/models/16', 'extractor') x, y, z = 6, 9, 3 mic_pos = rg.generate_pos_in_rect(x, y, z, 1) source_pos = rg.generate_pos_in_rect(x, y, z, 1)[0] fs_peaks = 44100 fs_timeconv = 16384 n_fft = 128 sound_engine = SoundEngine('/home/felix/rirnet/audio/chamber/val', fs_peaks) material_engine = MaterialEngine('/home/felix/rirnet/wip/materials.csv', '/home/felix/rirnet/wip/surfaces.csv') abs_coeffs = material_engine.random() multiband_rir = rg.generate_multiband_rirs(x, y, z, mic_pos, source_pos, fs_timeconv, 60, abs_coeffs)[0] monoband_rir = generate_monoband_rir(x, y, z, mic_pos, source_pos, fs_peaks, 8, abs_coeffs) an_sig_peaks = sound_engine.random() an_sig_timeconv = au.resample(an_sig_peaks, fs_peaks, fs_timeconv) rev_sig_multi = au.convolve(multiband_rir, an_sig_timeconv) _, _, rev_sig_multi_spectrogram = sp.signal.stft(rev_sig_multi, fs=fs_timeconv, nfft=n_fft, nperseg=n_fft) input_timeconv = torch.from_numpy( -np.log(np.abs(rev_sig_multi_spectrogram))).unsqueeze(0).float() rev_sig_mono = au.pad_to(au.convolve(monoband_rir, an_sig_peaks), 2**16) input_peaks = preprocess_peaks(rev_sig_mono, fs_peaks) with torch.no_grad(): output_timeconv = net_timeconv(input_timeconv).squeeze().numpy() output_peaks = net_peaks_ae(net_peaks_ext(input_peaks), decode=True).squeeze().numpy() plt.figure() plt.imshow(output_timeconv) plt.show() phase = np.exp(1j * np.random.uniform( low=-np.pi, high=np.pi, size=np.shape(output_timeconv))) _, output_timeconv = sp.signal.istft(output_timeconv * phase, fs_timeconv, nfft=n_fft, nperseg=n_fft) plt.subplot(221) plt.plot(output_timeconv) plt.subplot(222) rev_output = au.convolve(output_timeconv, an_sig_timeconv) plt.plot(rev_output / np.max(np.abs(rev_output))) #plt.scatter(output_peaks[0], output_peaks[1]) plt.subplot(223) plt.plot(multiband_rir) plt.subplot(224) plt.plot(rev_sig_multi / np.max(np.abs(rev_sig_multi))) plt.show() au.save_wav('synthetic.wav', rev_output, fs_timeconv, True) au.save_wav('tru.wav', rev_sig_multi, fs_timeconv, True)
def main(audio_path): room = rg.generate(4, 10, 2, 3, 10, max_order=8) room.plot(mic_marker_size=30) room.compute_rir() rir = room.rir[0][0] first_index = next((i for i, x in enumerate(rir) if x), None) rir = rir[first_index:] / max(abs(rir)) t_rir = np.arange(len(rir)) / 44100. sound, rate = au.read_wav(audio_path) t_sound = np.arange(len(sound)) / 44100. signal = au.convolve(sound, rir) signal /= max(abs(signal)) t_signal = np.arange(len(signal)) / 44100. mic = room.mic_array.R.T[0] distances = room.sources[0].distance(mic) times = distances / 343.0 * room.fs alphas = room.sources[0].damping / (4. * np.pi * distances) slice = tuple(np.where(room.visibility[0][0] == 1)) alphas = -np.log(alphas[slice]) alphas -= min(alphas) times = (times[slice] - min(times[slice])) / 44100. right_lim = max(times) mfcc = librosa.feature.mfcc(y=signal, sr=44100., n_mels=40) eps = 0.1 plt.figure() ax = plt.subplot(2, 2, 1) plt.plot(t_sound, sound) plt.title('Anechoic sound') plt.xlabel('Time (s)') ax.set_xlim(min(t_sound), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 2) plt.plot(t_rir, rir) plt.title('Room IRF') plt.xlabel('Time (s)') ax.set_xlim(min(t_rir), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 3) plt.plot(t_signal, signal) plt.title('Reverberant sound') plt.xlabel('Time (s)') ax.set_xlim(min(t_signal), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 4) plt.plot(times, alphas, '.') plt.title('Peaks data') plt.xlabel('Time (s)') ax.set_xlim(min(times) - 0.002, right_lim + 0.002) plt.figure() specshow(mfcc, sr=44100, x_axis='time') plt.title('MFCC spectrogram') plt.xlabel('Time (s)') plt.show()