def main(): net, _ = misc.load_latest('/home/eriklarsson/rirnet/timeconv/models', 'net') fs = 16384 n_fft = 128 sound_engine = SoundEngine('/home/eriklarsson/rirnet/audio/chamber/val', 44100) anechoic_signal = sound_engine.random() rir_real, _ = au.read_wav('/home/eriklarsson/rirnet/audio/rirs/lecture.wav', 44100) rir_real = rir_real[:44100//2] rev_real = au.resample(au.convolve(rir_real, anechoic_signal), 44100, fs) _, _, rev_spectrogram = sp.signal.stft(rev_real, fs=fs, nfft=n_fft, nperseg=n_fft) net_input = torch.from_numpy(-np.log(np.abs(rev_spectrogram))).unsqueeze(0).float() with torch.no_grad(): net_output = net(net_input).squeeze().numpy() phase = np.exp(1j*np.random.uniform(low = -np.pi, high = np.pi, size = np.shape(net_output))) _, rir_net = sp.signal.istft(net_output*phase, fs, nfft=n_fft, nperseg=n_fft) plt.imshow(net_output) plt.show() rir_net = au.resample(rir_net, fs, 44100) anechoic_test, _ = au.read_wav('/home/eriklarsson/rirnet/audio/harvard/male.wav') anechoic_test = anechoic_test[250000:400000,0] rev_real_test = au.convolve(rir_real, anechoic_test) rev_net_test = au.convolve(rir_net, anechoic_test) au.save_wav('real.wav', rev_real_test, 44100, True) au.save_wav('net.wav', rev_net_test, 44100, True)
def main(): n_mfcc = 40 model_dir = '../models' model = Model(model_dir) signal, rate = au.read_wav('../../audio/trapphus.wav') signal_segment_list = au.split_signal(signal, rate=rate, segment_length=60000, min_energy=100, max_energy=4, debug=False) signal_segment_list = [ au.pad_to(segment, 2**16) for segment in signal_segment_list ] mfccs = [ au.waveform_to_mfcc(segment, rate, n_mfcc)[1][:, :-1] for segment in signal_segment_list ] nw_input = preprocess(mfccs) nw_output = model.forward(nw_input) rir_list = postprocess(nw_output, 0, True) rir_list_2 = postprocess(nw_output, 20, True) plt.show()
def __init__(self, model_dir): self.model_dir = model_dir self.extractor, _ = misc.load_latest(model_dir, 'extractor') self.autoencoder, _ = misc.load_latest(model_dir, 'autoencoder') self.extractor_args = self.extractor.args() use_cuda = not self.extractor_args.no_cuda and torch.cuda.is_available( ) self.device = torch.device("cuda" if use_cuda else "cpu") self.extractor.to(self.device) self.autoencoder.to(self.device) self.kwargs = { 'num_workers': 1, 'pin_memory': True } if use_cuda else {} data_transform = self.extractor.data_transform() target_transform = self.extractor.target_transform() eval_db = RirnetDatabase(is_training=False, args=self.extractor_args, data_transform=data_transform, target_transform=target_transform) self.eval_loader = torch.utils.data.DataLoader( eval_db, batch_size=self.extractor_args.batch_size, shuffle=True, **self.kwargs) self.audio_anechoic, self.fs = au.read_wav( '../../audio/harvard/male.wav')
def load_audio(self, audio_folder_path, fs): audio_list = [] audio_filename_list = glob.glob(os.path.join(audio_folder_path, '*.wav')) for audio_filename in audio_filename_list: audio_file_path = os.path.join(audio_folder_path, audio_filename) audio = au.normalize(au.read_wav(audio_file_path, fs)[0]) audio_list.append(audio) return audio_list
def load_wavs(audio_folder, db_setup): audio_list = db_setup['source_audio'] rate = db_setup['fs'] wav_list = [] if audio_list == ['']: audio_list = glob.glob(os.path.join(audio_folder, '*.wav')) for audio_filename in audio_list: wav_path = os.path.join(audio_folder, audio_filename) wav = au.normalize(au.read_wav(wav_path, rate)[0]) wav_list.append(wav) return wav_list
def __init__(self, model_dir): sys.path.append('../../nanonet/rirnet') from rirnet_database import RirnetDatabase print(sys.path) self.model_dir = model_dir self.extractor, _ = misc.load_latest(model_dir, 'extractor') self.autoencoder, _ = misc.load_latest(model_dir, 'autoencoder') self.extractor_args = self.extractor.args() use_cuda = not self.extractor_args.no_cuda and torch.cuda.is_available( ) self.device = torch.device("cuda" if use_cuda else "cpu") self.extractor.to(self.device) self.autoencoder.to(self.device) self.kwargs = { 'num_workers': 1, 'pin_memory': True } if use_cuda else {} data_transform = transforms.Compose([ ToNormalized('../../database/mean.npy', '../../database/std.npy') ]) target_transform = transforms.Compose( [ToNegativeLog(), ToUnitNorm(), ToTensor()]) self.extractor_args.val_db_path = '../../database/db-val.csv' eval_db = RirnetDatabase(is_training=False, args=self.extractor_args, data_transform=data_transform, target_transform=target_transform) self.eval_loader = torch.utils.data.DataLoader( eval_db, batch_size=self.extractor_args.batch_size, shuffle=True, **self.kwargs) self.audio_anechoic, self.fs = au.read_wav( '../../audio/harvard/male.wav')
import rirnet.acoustic_utils as au import rirnet.misc as misc import matplotlib.pyplot as plt import numpy as np import torch import matplotlib.pyplot as plt import scipy as sp fs = 16384 signal, _ = au.read_wav('../../audio/livingroom/full/mario.wav', fs) start = np.max(np.random.randint(signal.shape[0] - fs), 0) snippet = signal[start:start + fs] net, _ = misc.load_latest('../models', 'net') net.to("cuda") a = True while a: start = np.max(np.random.randint(signal.shape[0] - fs), 0) snippet = signal[start:start + fs] output = au.split_signal(signal, rate=fs, segment_length=fs // 4, min_energy=10, max_energy=20, hop_length=128, debug=False) if len(output) > 0:
import rirnet.acoustic_utils as au import librosa import numpy as np import matplotlib.pyplot as plt out_path = '../audio/chamber/' in_path = '../audio/chamber/full/full.wav' rate = 44100 data, rate = au.read_wav(in_path, rate=rate) sound_starts = librosa.onset.onset_detect(data, sr=rate, backtrack=True) * 512 for i, start in enumerate(sound_starts): stop = start + au.next_power_of_two(int(rate / 4)) energy = np.sum(np.abs(data[stop - 100:stop])) if energy < 0.01: au.save_wav(out_path + 'ch_{}.wav'.format(i), data[start:stop], rate)
def main(audio_path): room = rg.generate(4, 10, 2, 3, 10, max_order=8) room.plot(mic_marker_size=30) room.compute_rir() rir = room.rir[0][0] first_index = next((i for i, x in enumerate(rir) if x), None) rir = rir[first_index:] / max(abs(rir)) t_rir = np.arange(len(rir)) / 44100. sound, rate = au.read_wav(audio_path) t_sound = np.arange(len(sound)) / 44100. signal = au.convolve(sound, rir) signal /= max(abs(signal)) t_signal = np.arange(len(signal)) / 44100. mic = room.mic_array.R.T[0] distances = room.sources[0].distance(mic) times = distances / 343.0 * room.fs alphas = room.sources[0].damping / (4. * np.pi * distances) slice = tuple(np.where(room.visibility[0][0] == 1)) alphas = -np.log(alphas[slice]) alphas -= min(alphas) times = (times[slice] - min(times[slice])) / 44100. right_lim = max(times) mfcc = librosa.feature.mfcc(y=signal, sr=44100., n_mels=40) eps = 0.1 plt.figure() ax = plt.subplot(2, 2, 1) plt.plot(t_sound, sound) plt.title('Anechoic sound') plt.xlabel('Time (s)') ax.set_xlim(min(t_sound), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 2) plt.plot(t_rir, rir) plt.title('Room IRF') plt.xlabel('Time (s)') ax.set_xlim(min(t_rir), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 3) plt.plot(t_signal, signal) plt.title('Reverberant sound') plt.xlabel('Time (s)') ax.set_xlim(min(t_signal), right_lim) ax.set_ylim(-1 - eps, 1 + eps) ax = plt.subplot(2, 2, 4) plt.plot(times, alphas, '.') plt.title('Peaks data') plt.xlabel('Time (s)') ax.set_xlim(min(times) - 0.002, right_lim + 0.002) plt.figure() specshow(mfcc, sr=44100, x_axis='time') plt.title('MFCC spectrogram') plt.xlabel('Time (s)') plt.show()