Ejemplo n.º 1
0
def main():
    net, _ = misc.load_latest('/home/eriklarsson/rirnet/timeconv/models', 'net')
    
    fs = 16384
    n_fft = 128

    sound_engine = SoundEngine('/home/eriklarsson/rirnet/audio/chamber/val', 44100)
    anechoic_signal = sound_engine.random()

    rir_real, _ = au.read_wav('/home/eriklarsson/rirnet/audio/rirs/lecture.wav', 44100)
    rir_real = rir_real[:44100//2]
    rev_real = au.resample(au.convolve(rir_real, anechoic_signal), 44100, fs)
    
    _, _, rev_spectrogram = sp.signal.stft(rev_real, fs=fs, nfft=n_fft, nperseg=n_fft)
    net_input = torch.from_numpy(-np.log(np.abs(rev_spectrogram))).unsqueeze(0).float()

    with torch.no_grad():
        net_output = net(net_input).squeeze().numpy()
    phase = np.exp(1j*np.random.uniform(low = -np.pi, high = np.pi, size = np.shape(net_output)))
    _, rir_net = sp.signal.istft(net_output*phase, fs, nfft=n_fft, nperseg=n_fft)
    plt.imshow(net_output)
    plt.show()
    rir_net = au.resample(rir_net, fs, 44100)

    anechoic_test, _ = au.read_wav('/home/eriklarsson/rirnet/audio/harvard/male.wav')
    anechoic_test = anechoic_test[250000:400000,0]

    rev_real_test = au.convolve(rir_real, anechoic_test)
    rev_net_test = au.convolve(rir_net, anechoic_test)
    
    au.save_wav('real.wav', rev_real_test, 44100, True)
    au.save_wav('net.wav', rev_net_test, 44100, True)
Ejemplo n.º 2
0
def main():
    n_mfcc = 40
    model_dir = '../models'
    model = Model(model_dir)

    signal, rate = au.read_wav('../../audio/trapphus.wav')
    signal_segment_list = au.split_signal(signal,
                                          rate=rate,
                                          segment_length=60000,
                                          min_energy=100,
                                          max_energy=4,
                                          debug=False)
    signal_segment_list = [
        au.pad_to(segment, 2**16) for segment in signal_segment_list
    ]
    mfccs = [
        au.waveform_to_mfcc(segment, rate, n_mfcc)[1][:, :-1]
        for segment in signal_segment_list
    ]
    nw_input = preprocess(mfccs)
    nw_output = model.forward(nw_input)
    rir_list = postprocess(nw_output, 0, True)
    rir_list_2 = postprocess(nw_output, 20, True)

    plt.show()
Ejemplo n.º 3
0
    def __init__(self, model_dir):
        self.model_dir = model_dir

        self.extractor, _ = misc.load_latest(model_dir, 'extractor')
        self.autoencoder, _ = misc.load_latest(model_dir, 'autoencoder')

        self.extractor_args = self.extractor.args()

        use_cuda = not self.extractor_args.no_cuda and torch.cuda.is_available(
        )
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.extractor.to(self.device)
        self.autoencoder.to(self.device)
        self.kwargs = {
            'num_workers': 1,
            'pin_memory': True
        } if use_cuda else {}

        data_transform = self.extractor.data_transform()
        target_transform = self.extractor.target_transform()

        eval_db = RirnetDatabase(is_training=False,
                                 args=self.extractor_args,
                                 data_transform=data_transform,
                                 target_transform=target_transform)
        self.eval_loader = torch.utils.data.DataLoader(
            eval_db,
            batch_size=self.extractor_args.batch_size,
            shuffle=True,
            **self.kwargs)

        self.audio_anechoic, self.fs = au.read_wav(
            '../../audio/harvard/male.wav')
Ejemplo n.º 4
0
 def load_audio(self, audio_folder_path, fs):
     audio_list = []
     audio_filename_list = glob.glob(os.path.join(audio_folder_path, '*.wav'))
     for audio_filename in audio_filename_list:
         audio_file_path = os.path.join(audio_folder_path, audio_filename)
         audio = au.normalize(au.read_wav(audio_file_path, fs)[0])
         audio_list.append(audio)
     return audio_list
Ejemplo n.º 5
0
def load_wavs(audio_folder, db_setup):
    audio_list = db_setup['source_audio']
    rate = db_setup['fs']
    wav_list = []
    if audio_list == ['']:
        audio_list = glob.glob(os.path.join(audio_folder, '*.wav'))
    for audio_filename in audio_list:
        wav_path = os.path.join(audio_folder, audio_filename)
        wav = au.normalize(au.read_wav(wav_path, rate)[0])
        wav_list.append(wav)
    return wav_list
Ejemplo n.º 6
0
    def __init__(self, model_dir):
        sys.path.append('../../nanonet/rirnet')
        from rirnet_database import RirnetDatabase
        print(sys.path)
        self.model_dir = model_dir

        self.extractor, _ = misc.load_latest(model_dir, 'extractor')
        self.autoencoder, _ = misc.load_latest(model_dir, 'autoencoder')

        self.extractor_args = self.extractor.args()

        use_cuda = not self.extractor_args.no_cuda and torch.cuda.is_available(
        )
        self.device = torch.device("cuda" if use_cuda else "cpu")
        self.extractor.to(self.device)
        self.autoencoder.to(self.device)
        self.kwargs = {
            'num_workers': 1,
            'pin_memory': True
        } if use_cuda else {}

        data_transform = transforms.Compose([
            ToNormalized('../../database/mean.npy', '../../database/std.npy')
        ])
        target_transform = transforms.Compose(
            [ToNegativeLog(), ToUnitNorm(),
             ToTensor()])

        self.extractor_args.val_db_path = '../../database/db-val.csv'

        eval_db = RirnetDatabase(is_training=False,
                                 args=self.extractor_args,
                                 data_transform=data_transform,
                                 target_transform=target_transform)
        self.eval_loader = torch.utils.data.DataLoader(
            eval_db,
            batch_size=self.extractor_args.batch_size,
            shuffle=True,
            **self.kwargs)

        self.audio_anechoic, self.fs = au.read_wav(
            '../../audio/harvard/male.wav')
Ejemplo n.º 7
0
import rirnet.acoustic_utils as au
import rirnet.misc as misc
import matplotlib.pyplot as plt
import numpy as np
import torch
import matplotlib.pyplot as plt
import scipy as sp

fs = 16384

signal, _ = au.read_wav('../../audio/livingroom/full/mario.wav', fs)
start = np.max(np.random.randint(signal.shape[0] - fs), 0)
snippet = signal[start:start + fs]

net, _ = misc.load_latest('../models', 'net')
net.to("cuda")

a = True

while a:
    start = np.max(np.random.randint(signal.shape[0] - fs), 0)
    snippet = signal[start:start + fs]

    output = au.split_signal(signal,
                             rate=fs,
                             segment_length=fs // 4,
                             min_energy=10,
                             max_energy=20,
                             hop_length=128,
                             debug=False)
    if len(output) > 0:
Ejemplo n.º 8
0
import rirnet.acoustic_utils as au
import librosa
import numpy as np
import matplotlib.pyplot as plt

out_path = '../audio/chamber/'
in_path = '../audio/chamber/full/full.wav'
rate = 44100
data, rate = au.read_wav(in_path, rate=rate)
sound_starts = librosa.onset.onset_detect(data, sr=rate, backtrack=True) * 512
for i, start in enumerate(sound_starts):
    stop = start + au.next_power_of_two(int(rate / 4))
    energy = np.sum(np.abs(data[stop - 100:stop]))
    if energy < 0.01:
        au.save_wav(out_path + 'ch_{}.wav'.format(i), data[start:stop], rate)
Ejemplo n.º 9
0
def main(audio_path):
    room = rg.generate(4, 10, 2, 3, 10, max_order=8)
    room.plot(mic_marker_size=30)

    room.compute_rir()
    rir = room.rir[0][0]
    first_index = next((i for i, x in enumerate(rir) if x), None)
    rir = rir[first_index:] / max(abs(rir))
    t_rir = np.arange(len(rir)) / 44100.

    sound, rate = au.read_wav(audio_path)
    t_sound = np.arange(len(sound)) / 44100.

    signal = au.convolve(sound, rir)
    signal /= max(abs(signal))
    t_signal = np.arange(len(signal)) / 44100.

    mic = room.mic_array.R.T[0]
    distances = room.sources[0].distance(mic)
    times = distances / 343.0 * room.fs
    alphas = room.sources[0].damping / (4. * np.pi * distances)
    slice = tuple(np.where(room.visibility[0][0] == 1))
    alphas = -np.log(alphas[slice])
    alphas -= min(alphas)
    times = (times[slice] - min(times[slice])) / 44100.
    right_lim = max(times)

    mfcc = librosa.feature.mfcc(y=signal, sr=44100., n_mels=40)

    eps = 0.1

    plt.figure()

    ax = plt.subplot(2, 2, 1)
    plt.plot(t_sound, sound)
    plt.title('Anechoic sound')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_sound), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 2)
    plt.plot(t_rir, rir)
    plt.title('Room IRF')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_rir), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 3)
    plt.plot(t_signal, signal)
    plt.title('Reverberant sound')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(t_signal), right_lim)
    ax.set_ylim(-1 - eps, 1 + eps)

    ax = plt.subplot(2, 2, 4)
    plt.plot(times, alphas, '.')
    plt.title('Peaks data')
    plt.xlabel('Time (s)')
    ax.set_xlim(min(times) - 0.002, right_lim + 0.002)

    plt.figure()
    specshow(mfcc, sr=44100, x_axis='time')
    plt.title('MFCC spectrogram')
    plt.xlabel('Time (s)')
    plt.show()