예제 #1
0
    def __init__(self,
                 calibration_signal,
                 step_size,
                 pb_ff,
                 nfft,
                 shift=None,
                 win=None):

        self.step_size = step_size  # for the LMS
        self.pb_ff = pb_ff  # forgetting factor for the projection back
        self.nfft = nfft
        self.nchannel = calibration_signal.shape[1]

        if shift is None:
            shift = nfft // 2

        if win is None:
            win = pra.hann(nfft)

        # Compute the fixed beamformer
        X = pra.transform.analysis(calibration_signal, nfft, shift, win=win)
        self.fixed_weights = calibration(X)[1:, :]  # remove DC

        # gsc adaptive weights
        self.adaptive_weights = np.zeros_like(self.fixed_weights)

        # projection back weights
        self.pb_den = np.ones(self.fixed_weights.shape[0],
                              dtype=self.fixed_weights.dtype)
        self.pb_num = np.ones(self.fixed_weights.shape[0], dtype=np.float)
예제 #2
0
def with_half_overlap_no_filter(D):

    if D == 1:
        x_local = x[:,0]
    else:
        x_local = x[:,:D]

    # parameters
    block_size = 512  # make sure the FFT size is a power of 2
    hop = block_size // 2  # half overlap
    window = pra.hann(block_size)  # the analysis window

    # Create the STFT object
    stft = pra.realtime.STFT(block_size, hop=hop, analysis_window=window, 
        channels=D, transform=transform)

    # collect the processed blocks
    processed_x = np.zeros(x_local.shape)

    # process the signals while full blocks are available
    n = 0
    while  x_local.shape[0] - n > hop:

        # go to frequency domain
        stft.analysis(x_local[n:n+hop,])

        # copy processed block in the output buffer
        processed_x[n:n+hop,] = stft.synthesis()

        n += hop

    error = np.max(np.abs(x_local[:n-hop,] - processed_x[hop:n,]))

    return error
def apply_spectral_sub(
    noisy_signal, nfft=512, db_reduc=25, lookback=12, beta=30, alpha=1
):
    """
    One-shot function to apply spectral subtraction approach.

    Parameters
    ----------
    noisy_signal : numpy array
        Real signal in time domain.
    nfft: int
        FFT size. Length of gain filter, i.e. the number of frequency bins, is
        given by ``nfft//2+1``.
    db_reduc: float
        Maximum reduction in dB for each bin.
    lookback: int
        How many frames to look back for the noise estimate.
    beta: float
        Overestimation factor to "push" the gain filter value (at each
        frequency) closer to the dB reduction specified by ``db_reduc``.
    alpha: float, optional
        Exponent factor to modify transition behavior towards the dB reduction
        specified by ``db_reduc``. Default is 1.

    Returns
    -------
    numpy array
        Enhanced/denoised signal.
    """

    from pyroomacoustics import hann
    from pyroomacoustics.transform import STFT

    hop = nfft // 2
    window = hann(nfft, flag="asymmetric", length="full")
    stft = STFT(nfft, hop=hop, analysis_window=window, streaming=True)
    scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha)

    processed_audio = np.zeros(noisy_signal.shape)
    n = 0
    while noisy_signal.shape[0] - n >= hop:
        # SCNR in frequency domain
        stft.analysis(
            noisy_signal[
                n : (n + hop),
            ]
        )
        gain_filt = scnr.compute_gain_filter(stft.X)

        # back to time domain
        processed_audio[
            n : n + hop,
        ] = stft.synthesis(gain_filt * stft.X)

        # update step
        n += hop

    return processed_audio
예제 #4
0
    def __init__(self,
                 calibration_signal,
                 step_size,
                 pb_ff,
                 nfft,
                 ds,
                 shift=None,
                 win=None):

        self.step_size = step_size  # for the LMS
        self.pb_ff = pb_ff  # forgetting factor for the projection back
        self.nfft = nfft
        self.nchannel = calibration_signal.shape[1]
        self.ds = ds

        if shift is None:
            shift = nfft // 2

        if win is None:
            win = pra.hann(nfft)

        # Compute the fixed beamformer
        X = pra.transform.analysis(calibration_signal, nfft, shift, win=win)
        self.fixed_weights = calibration(X)[1:, :]  # remove DC
        self.norm_weights = 1. / np.linalg.norm(
            self.fixed_weights, axis=1, keepdims=True)**2

        # gsc adaptive weights
        self.adaptive_weights = np.zeros(
            (self.fixed_weights.shape[0], self.nchannel // ds),
            dtype=self.fixed_weights.dtype)
        self.adaptive_weights[:, 0] = 1.

        # projection back weights
        self.pb_den = np.ones(self.fixed_weights.shape[0],
                              dtype=self.fixed_weights.dtype)
        self.pb_num = np.ones(self.fixed_weights.shape[0], dtype=np.float)

        self.estimates = {
            'covmat':
            LeakyIntegration(
                0.9,  # average over this number of frames
                lambda X: X[:, :, None] * np.conj(X[:, None, :]
                                                  ),  # (nfreq, nchan, nchan),
                init=np.array([
                    np.eye(self.nchannel // self.ds)
                    for i in range(self.nfft // 2)
                ]) * 1e-3,
            ),
            'xcov':
            LeakyIntegration(
                0.9,
                lambda v: v[0] * np.conj(v[1][:, None]),
            ),
        }
예제 #5
0
def half_overlap(D):

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    hop = block_size//2

    # analysis
    analysis_win = pra.hann(block_size)
    X = analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    x_r = synthesis(X, L=block_size, hop=hop)

    return pra.dB(np.max(np.abs(x_local[:-hop, ] - x_r[hop:, ])))
예제 #6
0
def hop_one_sample(D):

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    hop = 1

    # analysis
    analysis_win = pra.hann(block_size)
    X = analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    synthesis_win = pra.transform.compute_synthesis_window(analysis_win, hop)
    x_r = synthesis(X, L=block_size, hop=hop, win=synthesis_win)

    return pra.dB(
        np.max(np.abs(x_local[:-block_size + hop, ] -
                      x_r[block_size - hop:, ])))
예제 #7
0
def append_one_sample(D):
    hop = block_size // 2
    n_samples = x.shape[0]
    n_frames = n_samples // hop
    x_local = x[:n_frames * hop - 1, :]

    if D == 1:
        x_local = x_local[:, 0]
    else:
        x_local = x_local[:, :D]

    # analysis
    analysis_win = pra.hann(block_size)
    X = analysis(x_local, L=block_size, hop=hop, win=analysis_win)

    # synthesis
    x_r = synthesis(X, L=block_size, hop=hop)

    return pra.dB(
        np.max(
            np.abs(x_local[:-block_size + hop, ] -
                   x_r[block_size - hop:-1, ])))
예제 #8
0
def process_experiment_max_sinr(SIR, mic, args):

    nfft = args.nfft
    vad_guard = args.vad_guard
    if args.thresh is None:
        vad_thresh = thresh_opt[SIR]
    else:
        vad_thresh = args.thresh

    # read_in the mix signals
    fs_led, leds = wavfile.read(
        file_pattern.format('camera_leds_zero_hold', 'mix', SIR))
    fs_snd, audio = wavfile.read(
        file_pattern.format(mic_choices[mic], 'mix', SIR))
    assert fs_led == fs_snd

    # read in the ref signals
    r, noise_ref = wavfile.read(
        file_pattern.format(mic_choices[mic], 'noise_ref', SIR))
    assert r == fs_snd
    r, speech_ref = wavfile.read(file_speech_ref.format(mic_choices[mic]))
    assert r == fs_snd
    r, leds_ref = wavfile.read(file_speech_ref.format('camera_leds_zero_hold'))
    assert r == fs_snd

    # In case of objective evaluation, we do an artificial mix
    if args.synth_mix:
        audio = noise_ref + speech_ref

    # get the geometry information to get nice plots.
    mics_loc = np.array(protocol['geometry']['microphones'][mic]['reference'])
    noise_loc = protocol['geometry']['speakers']['locations'][0]
    speech_loc = protocol['geometry']['speakers']['locations'][1]

    # the directions of arrival
    theta_speech = 0
    p0 = speech_loc - mics_loc
    p1 = noise_loc - mics_loc
    theta_noise = np.arccos(np.inner(p0, p1) / la.norm(p0) / la.norm(p1))
    print('Source separation', theta_noise / np.pi * 180)

    if mic == 'pyramic':
        I = list(range(8, 16)) + list(range(24, 32)) + list(range(
            40, 48))  # flat part
        #I = list(range(24,32)) + list(range(40,48)) # flat part
        #I = list(range(8,16))
        #I = list(range(48))
        audio = audio[:, I]
        noise_ref = noise_ref[:, I].copy()
        speech_ref = speech_ref[:, I].copy()
        mics_positions = mics_geom['pyramic'][I].copy()
        # place in room 2-806
        mics_positions -= np.mean(mics_positions, axis=0)[None, :]
        mics_positions[:, 2] -= np.max(mics_positions[:, 2])
        mics_positions += mics_loc

    elif mic == 'olympus':
        mics_positions = mics_geom['olympus'].copy() + mics_loc

    n_samples = audio.shape[0]  # shorthand
    n_channels = audio.shape[1]

    # perform VAD
    vad_snd = leds > vad_thresh

    # Now we want to make sure no speech speech goes in estimation of the noise covariance matrix.
    # For that we will remove frames neighbouring the detected speech
    vad_guarded = vad_snd.copy()
    if vad_guard is not None:
        for i, v in enumerate(vad_snd):
            if np.any(vad_snd[i - vad_guard:i + vad_guard]):
                vad_guarded[i] = True

    ##############################
    ## STFT and frame-level VAD ##
    ##############################

    print('STFT and stuff')
    sys.stdout.flush()

    engine = pra.realtime.STFT(nfft,
                               nfft // 2,
                               pra.hann(nfft),
                               channels=audio.shape[1])

    def analysis(x):
        engine.analysis(x)
        return np.moveaxis(engine.X, 1, 0)

    # Now compute the STFT of the microphone input
    X = analysis(audio)
    X_time = np.arange(1, X.shape[0] + 1) * (nfft / 2) / fs_snd

    X_speech = analysis(audio * vad_guarded[:, None])
    X_noise = analysis(audio * (1 - vad_guarded[:, None]))

    S_ref = analysis(speech_ref)
    N_ref = analysis(noise_ref)

    ##########################
    ## MAX SINR BEAMFORMING ##
    ##########################

    print('Max SINR beamformer computation')
    sys.stdout.flush()

    # covariance matrices from noisy signal
    Rs = np.einsum('i...j,i...k->...jk', X_speech, np.conj(X_speech))
    Rn = np.einsum('i...j,i...k->...jk', X_noise, np.conj(X_noise))

    # compute covariances with reference signals to check everything is working correctly
    #Rs = np.einsum('i...j,i...k->...jk', S_ref, np.conj(S_ref))
    #Rn = np.einsum('i...j,i...k->...jk', N_ref, np.conj(N_ref))

    # compute the MaxSINR beamformer
    w = [
        la.eigh(rs, b=rn, eigvals=(n_channels - 1, n_channels - 1))[1]
        for rs, rn in zip(Rs[1:], Rn[1:])
    ]
    w = np.squeeze(np.array(w))
    nw = la.norm(w, axis=1)
    w[nw > 1e-10, :] /= nw[nw > 1e-10, None]
    w = np.concatenate([np.ones((1, n_channels)), w], axis=0)

    if not args.no_norm:
        # normalize with respect to input signal
        z = compute_gain(w,
                         X_speech,
                         X_speech[:, :, 0],
                         clip_up=args.clip_gain)
        w *= z[:, None]

    ###########
    ## APPLY ##
    ###########

    print('Apply beamformer')
    sys.stdout.flush()

    # 2D beamformer
    mic_array = pra.Beamformer(mics_positions[:, :2].T,
                               fs=fs_snd,
                               N=nfft,
                               hop=nfft,
                               zpb=nfft)
    mic_array.signals = audio.T
    mic_array.weights = w.T

    out = mic_array.process()

    # Signal alignment step
    ref = np.vstack([speech_ref[:, 0], noise_ref[:, 0]])
    # Not sure why the delay is sometimes negative here... Need to check more
    delay = np.abs(
        int(pra.tdoa(out, speech_ref[:, 0].astype(np.float), phat=True)))
    if delay > 0:
        out_trunc = out[delay:delay + ref.shape[1]]
        noise_eval = audio[:ref.shape[1], 0] - out_trunc
    else:
        out_trunc = np.concatenate(
            (np.zeros(-delay), out[:ref.shape[1] + delay]))
        noise_eval = audio[:ref.shape[1], 0] - out_trunc
    sig_eval = np.vstack([out_trunc, noise_eval])

    # We use the BSS eval toolbox
    metric = bss_eval_images(ref[:, :, None], sig_eval[:, :, None])

    # we are only interested in SDR and SIR for the speech source
    SDR_out = metric[0][0]
    SIR_out = metric[2][0]

    ##################
    ## SAVE SAMPLES ##
    ##################

    if args.save_sample is not None:

        # for informal listening tests, we need to high pass and normalize the
        # amplitude.
        upper = np.maximum(audio[:, 0].max(), out.max())
        sig_in = pra.highpass(audio[:, 0].astype(np.float) / upper,
                              fs_snd,
                              fc=150)
        sig_out = pra.highpass(out / upper, fs_snd, fc=150)

        f1 = os.path.join(args.save_sample,
                          '{}_ch0_SIR_{}_dB.wav'.format(mic, SIR))
        wavfile.write(f1, fs_snd, sig_in)
        f2 = os.path.join(args.save_sample,
                          '{}_out_SIR_{}_dB.wav'.format(mic, SIR))
        wavfile.write(f2, fs_snd, sig_out)

    ##########
    ## PLOT ##
    ##########

    if args.plot:

        plt.figure()
        plt.plot(out_trunc)
        plt.plot(speech_ref[:, 0])
        plt.legend(['output', 'reference'])

        # time axis for plotting
        led_time = np.arange(leds.shape[0]) / fs_led + 1 / (2 * fs_led)
        audio_time = np.arange(n_samples) / fs_snd

        plt.figure()
        plt.plot(led_time, leds, 'r')
        plt.title('LED signal')

        # match the scales of VAD and light to sound before plotting
        q_vad = np.max(audio)
        q_led = np.max(audio) / np.max(leds)

        plt.figure()
        plt.plot(audio_time, audio[:, 0], 'b')
        plt.plot(led_time, leds * q_led, 'r')
        plt.plot(audio_time, vad_snd * q_vad, 'g')
        plt.plot(audio_time, vad_guarded * q_vad, 'g--')
        plt.legend(['audio', 'VAD'])
        plt.title('LED and audio signals')

        plt.figure()
        a_time = np.arange(audio.shape[0]) / fs_snd
        plt.plot(a_time, audio[:, 0])
        plt.plot(a_time, out_trunc)
        #plt.plot(a_time, speech_ref[:,0])
        plt.legend(['channel 0', 'beamformer output', 'speech reference'])

        plt.figure()
        mic_array.plot_beam_response()
        plt.vlines(
            [180 + np.degrees(theta_speech), 180 - np.degrees(theta_noise)], 0,
            nfft // 2)

        room = pra.ShoeBox(protocol['geometry']['room'][:2],
                           fs=16000,
                           max_order=1)

        room.add_source(noise_loc[:2])  # noise
        room.add_source(speech_loc[:2])  # speech
        room.add_source(
            protocol['geometry']['speakers']['locations'][1][:2])  # signal

        room.add_microphone_array(mic_array)
        room.plot(img_order=1, freq=[800, 1000, 1200, 1400, 1600, 2500, 4000])

        plt.figure()
        mic_array.plot()

        plt.show()

    # Return SDR and SIR
    return SDR_out, SIR_out
예제 #9
0
    # fix the randomness for repeatability
    np.random.seed(10)

    # set the source powers, the first one is half
    source_std = np.ones(n_sources_target)
    source_std[0] /= np.sqrt(2.0)

    SIR = 10  # dB
    SNR = (
        60
    )  # dB, this is the SNR with respect to a single target source and microphone self-noise

    # STFT parameters
    framesize = 4096
    win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)

    # algorithm parameters
    n_iter = 51
    n_nmf_sub_iter = 20
    sparse_reg = 0.0

    # pre-emphasis of blinky signals
    pre_emphasis = False

    # Geometry of the room and location of sources and microphones
    room_dim = np.array([10, 7.5, 3])

    mic_locs = np.vstack((
        pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02),
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from scipy.io import wavfile
from scipy.signal import resample,fftconvolve

import pyroomacoustics as pra

# Spectrogram figure properties
figsize=(15, 7)        # figure size
fft_size = 512         # fft size for analysis
fft_hop  = 8           # hop between analysis frame
fft_zp = 512           # zero padding
analysis_window = np.concatenate((pra.hann(fft_size), np.zeros(fft_zp)))
t_cut = 0.83           # length in [s] to remove at end of signal (no sound)

# Some simulation parameters
Fs = 8000
t0 = 1./(Fs*np.pi*1e-2)  # starting time function of sinc decay in RIR response
absorption = 0.90
max_order_sim = 10
sigma2_n = 5e-7

# Room 1 : Shoe box
room_dim = [4, 6]

# the good source is fixed for all 
good_source = np.array([1, 4.5])           # good source
normal_interferer = np.array([2.8, 4.3])   # interferer
예제 #11
0
"""
Length of filter in time domain = <fft_size> / <samp_freq> * <num_taps>
"""

# the unknown filters in the frequency domain
num_bands = fft_length//2+1
W = np.random.randn(num_taps,num_bands) + \
    1j*np.random.randn(num_taps,num_bands)
W /= np.linalg.norm(W, axis=0)

# create a known driving signal
x = np.random.randn(n_samples)

# take to STFT domain
window = pra.hann(fft_length)  # the analysis window
hop = fft_length//2
stft_in = pra.transform.STFT(fft_length, hop=hop,
                             analysis_window=window, channels=1)
stft_out = pra.transform.STFT(fft_length, hop=hop,
                              analysis_window=window, channels=1)

n = 0
num_blocks = 0
X_concat = np.zeros((num_bands,n_samples//hop),dtype=np.complex64)
while  n_samples - n > hop:

    stft_in.analysis(x[n:n+hop,])
    X_concat[:,num_blocks] = stft_in.X

    n += hop
예제 #12
0
def createroom(amBird, saBird, noises, mic_p, mic_d, sour_p, sour_d,
               callback_mix, roomdim, absorption, max_order, n_mics, angle):
    np.random.seed(10)
    # STFT parameters
    framesize = 4096
    win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
    # algorithm parameters
    # param ogive
    ogive_mu = 0.1
    ogive_update = "switching"
    ogive_iter = 2000

    ########separation params##############
    algo = algo_choices[0]
    no_cb = True
    save = True
    n_iter = 60
    dist = "gauss"  # guass or laplace
    ########paramas set##################
    fs = 44100
    snr = 60
    sinr = 10
    # absorption, max_order = 0.45, 12  # RT60 == 0.2
    # absorption,max_order=0.9,17
    n_sources = 2 + 3
    n_mics = n_mics
    n_sources_target = 2
    assert n_sources_target <= n_mics, "More sources than microphones is not supported"

    # set the source powers, the first one is half
    source_std = np.ones(n_sources_target)
    # position
    #room size
    room_dim = roomdim
    #micro position
    rot = angle
    offset = np.pi - rot / 2
    mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics,
                                  rot=offset)  # micro2
    # mic_locs = np.transpose([[13, 9.99, 3.5],[13, 10, 3.5],[13, 10.01, 3.5]])###micro3

    # targent position
    target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
    # inferences position
    interferer_locs = random_layout([16, 2, 6],
                                    n_sources - n_sources_target,
                                    offset=[5, 18, 3],
                                    seed=1)
    source_locs = np.concatenate((target_locs, interferer_locs), axis=1)

    # audios loaded
    wav_files = [amBird, saBird, noises[0], noises[1], noises[2]]
    signals = wav_read_center(wav_files, seed=123)

    # create room
    room = pra.ShoeBox(room_dim,
                       fs=44100,
                       absorption=absorption,
                       max_order=max_order,
                       air_absorption=True,
                       humidity=50)

    # add source
    for sig, loc in zip(signals, source_locs.T):
        room.add_source(loc, signal=sig)

    # add micro
    room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs))

    # power set
    premix = room.simulate(return_premix=True)
    n_samples = premix.shape[2]
    # Normalize the signals so that they all have unit variance at the reference microphone
    ref_mic = 0
    p_mic_ref = np.std(premix[:, ref_mic, :], axis=1)
    premix /= p_mic_ref[:, None, None]
    sources_var = np.ones(n_sources_target)
    # scale to pre-defined variance
    premix[:n_sources_target, :, :] *= np.sqrt(sources_var[:, None, None])

    # compute noise variance
    sigma_n = np.sqrt(10**(-snr / 10) * np.sum(sources_var))

    # now compute the power of interference signal needed to achieve desired SINR
    sigma_i = np.sqrt(
        np.maximum(0, 10**(-sinr / 10) * np.sum(sources_var) - sigma_n**2) /
        (n_sources - n_sources_target))
    premix[n_sources_target:, :, :] *= sigma_i
    background = (np.sum(premix[n_sources_target:, :, :], axis=0))

    # Mix down the recorded signals
    mix = np.sum(premix, axis=0)
    mics_signals = room.mic_array.signals

    print("Simulation done.")

    # rt60 = room.measure_rt60()
    # print(rt60)

    # Monitor Convergence
    ref = np.zeros((n_sources_target + 1, premix.shape[2], premix.shape[1]),
                   dtype=premix.dtype)
    ref[:n_sources_target, :, :] = premix[:n_sources_target, :, :].swapaxes(
        1, 2)
    ref[n_sources_target, :, :] = background.T
    convergence_callback = None

    # START BSS

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(mics_signals.T,
                                   framesize,
                                   framesize // 2,
                                   win=win_a).astype(np.complex128)
    X_mics = X_all[:, :, :n_mics]

    # Run BSS
    if algo == "auxiva":
        # Run AuxIVA
        Y = overiva(
            X_mics,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            callback=convergence_callback,
        )
    elif algo == "auxiva_pca":
        # Run AuxIVA
        Y = auxiva_pca(
            X_mics,
            n_src=n_sources_target,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            callback=convergence_callback,
        )
    elif algo == "overiva":
        # Run AuxIVA
        Y = overiva(
            X_mics,
            n_src=n_sources_target,
            n_iter=n_iter,
            proj_back=True,
            model=dist,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    elif algo == "ilrma":
        # Run AuxIVA
        Y = pra.bss.ilrma(
            X_mics,
            n_iter=n_iter,
            n_components=2,
            proj_back=True,
            callback=convergence_callback,
        )
    elif algo == "ogive":
        # Run OGIVE
        Y = ogive(
            X_mics,
            n_iter=ogive_iter,
            step_size=ogive_mu,
            update=ogive_update,
            proj_back=True,
            model=dist,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    elif algo == "ogive_matlab":
        # Run OGIVE
        Y = ogive_matlab_wrapper(
            X_mics,
            n_iter=ogive_iter,
            step_size=ogive_mu,
            update=ogive_update,
            proj_back=True,
            init_eig=(init == init_choices[1]),
            callback=convergence_callback,
        )
    else:
        raise ValueError("No such algorithm {}".format(algo))

    # Run iSTFT
    if Y.shape[2] == 1:
        y = pra.transform.synthesis(Y[:, :, 0],
                                    framesize,
                                    framesize // 2,
                                    win=win_s)[:, None]
        y = y.astype(np.float64)
    else:
        y = pra.transform.synthesis(Y, framesize, framesize // 2,
                                    win=win_s).astype(np.float64)

    # If some of the output are uniformly zero, just add a bit of noise to compare
    for k in range(y.shape[1]):
        if np.sum(np.abs(y[:, k])) < 1e-10:
            y[:, k] = np.random.randn(y.shape[0]) * 1e-10

    # For conventional methods of BSS, reorder the signals by decreasing power
    if algo != "blinkiva":
        new_ord = np.argsort(np.std(y, axis=0))[::-1]
        y = y[:, new_ord]

    # Compare SIR
    m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
    sdr, sir, sar, perm = bss_eval_sources(
        ref[:n_sources_target, :m, 0],
        y[framesize // 2:m + framesize // 2, :n_sources_target].T,
    )

    # reorder the vector of reconstructed signals
    y_hat = y[:, perm]

    return pra.normalize(mics_signals,
                         bits=16).astype(np.int16).T, y_hat, sir, sdr
예제 #13
0
def with_arbitrary_overlap_synthesis_window(D,
                                            num_frames=1,
                                            fixed_memory=False,
                                            streaming=True,
                                            overlap=0.5):
    """
    D             - number of channels
    num_frames    - how many frames to process, None will process one frame at
                    a time
    fixed_memory  - whether to enforce checks for size (real-time consideration)
    streaming     - whether or not to stitch between frames
    """

    if D == 1:
        x_local = x[:, 0]
    else:
        x_local = x[:, :D]

    # parameters
    block_size = 512  # make sure the FFT size is a power of 2
    hop = int((1 - overlap) * block_size)  # quarter overlap
    if not streaming:
        num_samples = (num_frames - 1) * hop + block_size
        x_local = x_local[:num_samples, ]

    analysis_window = pra.hann(block_size)
    synthesis_window = pra.realtime.compute_synthesis_window(
        analysis_window, hop)

    # Create the STFT object
    if fixed_memory:
        stft = STFT(block_size,
                    hop=hop,
                    channels=D,
                    transform=transform,
                    num_frames=num_frames,
                    analysis_window=analysis_window,
                    synthesis_window=synthesis_window,
                    streaming=streaming)
    else:
        stft = STFT(block_size,
                    hop=hop,
                    channels=D,
                    analysis_window=analysis_window,
                    synthesis_window=synthesis_window,
                    transform=transform,
                    streaming=streaming)

    # collect the processed blocks
    processed_x = np.zeros(x_local.shape)

    if streaming:

        n = 0
        hop_frames = hop * num_frames
        # process the signals while full blocks are available
        while x_local.shape[0] - n > hop_frames:
            stft.analysis(x_local[n:n + hop_frames, ])
            processed_x[n:n + hop_frames, ] = stft.synthesis()
            n += hop_frames

        error = np.max(
            np.abs(x_local[:n - block_size + hop, ] -
                   processed_x[block_size - hop:n, ]))

        if 20 * np.log10(error) > -10:
            import matplotlib.pyplot as plt
            if x_local.ndim == 1:
                plt.plot(x_local[:n - block_size + hop])
                plt.plot(processed_x[block_size - hop:n])
            else:
                plt.plot(x_local[:n - block_size + hop, 0])
                plt.plot(processed_x[block_size - hop:n, 0])
            plt.show()

    else:

        stft.analysis(x_local)
        processed_x = stft.synthesis()
        n = processed_x.shape[0]

        L = block_size - hop
        error = np.max(np.abs(x_local[L:-L, ] - processed_x[L:, ]))

        if 20 * np.log10(error) > -10:
            import matplotlib.pyplot as plt
            if x_local.ndim == 1:
                plt.plot(x_local[L:-L])
                plt.plot(processed_x[L:])
            else:
                plt.plot(x_local[L:-L, 0])
                plt.plot(processed_x[L:, 0])
            plt.show()

    return error
예제 #14
0
from __future__ import division, print_function
from unittest import TestCase
import numpy as np
import pyroomacoustics as pra

tol = -80  # dB
nfft = 128
D = 7
x = np.random.randn(nfft, D).astype('float32')
X_numpy = np.fft.rfft(x, axis=0).astype('complex64')
analysis_window = pra.hann(nfft)
synthesis_window = pra.hann(nfft)


def no_window(nfft, D, transform, axis=0):

    if D == 1:
        x_local = x[:, 0]
        X_local = X_numpy[:, 0]
    else:
        if axis == 0:
            x_local = x
            X_local = X_numpy
        else:
            x_local = x.T
            X_local = X_numpy.T

    # make object
    dft = pra.transform.DFT(nfft, D, transform=transform, axis=axis)

    # forward
예제 #15
0
def run(args, parameters):
    """
    This is the core loop of the simulation
    """

    # expand arguments
    sinr, n_targets, n_interf, n_mics, dist_ratio, room_params, seed = args

    n_sources = n_targets + n_interf

    # this is the underdetermined case. We don't do that.
    if n_mics < n_targets:
        return []

    # set the RNG seed
    rng_state = np.random.get_state()
    np.random.seed(seed)

    # get all the signals
    files_absolute = [
        os.path.join(parameters["base_dir"], fn)
        for fn in room_params["wav"][:n_sources]
    ]
    source_signals = wav_read_center(files_absolute, seed=123)

    # create the room
    room = pra.ShoeBox(**room_params["room_kwargs"])
    R = np.array(room_params["mic_array"])
    room.add_microphone_array(pra.MicrophoneArray(R[:, :n_mics], room.fs))
    source_locs = np.array(room_params["sources"])
    for n in range(n_sources):
        room.add_source(source_locs[:, n], signal=source_signals[n, :])

    # compute RIRs and RT60
    room.compute_rir()
    rt60 = np.median([
        pra.experimental.measure_rt60(room.rir[0][n], fs=room.fs)
        for n in range(n_targets)
    ])

    # signals after propagation but before mixing
    # (n_sources, n_mics, n_samples)
    premix = room.simulate(return_premix=True)
    n_samples = premix.shape[-1]

    # create the mix (n_mics, n_samples)
    # this routine will also resize the signals in premix
    mix = callback_noise_mixer(premix,
                               sinr=sinr,
                               n_src=n_targets + n_interf,
                               n_tgt=n_targets,
                               **parameters["mix_params"])

    # create the reference signals
    # (n_sources + 1, n_samples)
    refs = np.zeros((n_targets + 1, n_samples))
    refs[:-1, :] = premix[:n_targets, parameters["mix_params"]["ref_mic"], :]
    refs[-1, :] = np.sum(premix[n_targets:, 0, :], axis=0)

    # STFT parameters
    framesize = parameters["stft_params"]["framesize"]
    hop = parameters["stft_params"]["hop"]
    if parameters["stft_params"]["window"] == "hann":
        win_a = pra.hamming(framesize)
    else:  # default is Hann
        win_a = pra.hann(framesize)

    # START BSS
    ###########

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a)
    X_mics = X_all[:, :, :n_mics]

    # store results in a list, one entry per algorithm
    results = []

    # compute the initial values of SDR/SIR
    init_sdr = []
    init_sir = []

    for full_name, params in parameters["algorithm_kwargs"].items():

        name = params["algo"]
        kwargs = params["kwargs"]

        if not bss.is_determined[name] and bss.is_dual_update[
                name] and n_targets == 1:
            # Overdetermined algorithms with dual updates cannot be used
            # in the single source case (they can extract at least two sources)
            continue
        elif bss.is_single_source[name] and n_targets > 1:
            # doesn't work for multi source scenario
            continue
        elif bss.is_overdetermined[name] and n_targets == n_mics:
            # don't run the overdetermined stuff in determined case
            continue

        results.append({
            "algorithm": full_name,
            "n_targets": n_targets,
            "n_interferers": n_interf,
            "n_mics": n_mics,
            "rt60": rt60,
            "dist_ratio": dist_ratio,
            "sinr": sinr,
            "seed": seed,
            "sdr": [],
            "sir": [],  # to store the result
            "cost": [],
            "runtime": np.nan,
            "eval_time": np.nan,
            "n_samples": n_samples,
        })

        # this is used to keep track of time spent in the evaluation callback
        eval_time = []

        def cb(W, Y, source_model):
            convergence_callback(
                W,
                Y,
                source_model,
                X_mics,
                n_targets,
                results[-1]["sdr"],
                results[-1]["sir"],
                results[-1]["cost"],
                eval_time,
                refs,
                parameters["mix_params"]["ref_mic"],
                parameters["stft_params"],
                name,
                not bss.is_determined[name],
            )

        if "model" not in kwargs:
            local_model = bss.default.model
        else:
            local_model = kwargs["model"]

        cb(np.eye(n_mics)[None, :, :], X_mics, local_model)

        try:
            t_start = time.perf_counter()

            bss.separate(X_mics,
                         n_src=n_targets,
                         algorithm=name,
                         callback=cb,
                         proj_back=False,
                         **kwargs)

            t_finish = time.perf_counter()

            results[-1]["eval_time"] = np.sum(eval_time)
            results[-1][
                "runtime"] = t_finish - t_start - results[-1]["eval_time"]

        except Exception:

            # get the traceback
            tb = traceback.format_exc()

            report = {
                "algorithm": name,
                "n_src": n_targets,
                "kwargs": kwargs,
                "result": results[-1],
                "tb": tb,
            }

            pid = os.getpid()
            # report last sdr/sir as np.nan
            results[-1]["sdr"].append(np.nan)
            results[-1]["sir"].append(np.nan)
            # now write the problem to file
            fn_err = os.path.join(parameters["_results_dir"],
                                  "error_{}.json".format(pid))
            with open(fn_err, "a") as f:
                f.write(json.dumps(report, indent=4))
                f.write(",\n")

            # skip to next iteration
            continue

    # restore RNG former state
    np.random.set_state(rng_state)

    return results
예제 #16
0
from unittest import TestCase
import numpy as np
from scipy.signal import fftconvolve

import pyroomacoustics as pra

# fix seed for repeatability
np.random.seed(0)

h_len = 30
x_len = 1000
SNR = 1000.0  # decibels

h_lp = np.fft.irfft(np.ones(5), n=h_len)
h_rand = np.random.randn(h_len)
h_hann = pra.hann(h_len, flag="symmetric")

x = np.random.randn(x_len)
noise = np.random.randn(x_len + h_len - 1)


def generate_signals(SNR, x, h, noise):
    """run convolution"""

    # noise standard deviation
    sigma_noise = 10**(-SNR / 20.0)

    y = fftconvolve(x, h)
    y += sigma_noise * noise

    return y, sigma_noise
예제 #17
0
from unittest import TestCase
import numpy as np
from scipy.signal import fftconvolve

import pyroomacoustics as pra

# fix seed for repeatability
np.random.seed(0)

h_len = 30
x_len = 1000
SNR = 1000.  # decibels

h_lp = np.fft.irfft(np.ones(5), n=h_len)
h_rand = np.random.randn(h_len)
h_hann = pra.hann(h_len, flag='symmetric')

x = np.random.randn(x_len)
noise = np.random.randn(x_len + h_len - 1)

def generate_signals(SNR, x, h, noise):
    ''' run convolution '''

    # noise standard deviation
    sigma_noise = 10**(-SNR / 20.)

    y = fftconvolve(x, h) 
    y += sigma_noise * noise

    return y, sigma_noise
예제 #18
0
from __future__ import division, print_function
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
import pyroomacoustics as pra
import os

# filter to apply
h_len = 99
h = np.ones(h_len)
h /= np.linalg.norm(h)

# parameters
block_size = 512 - h_len + 1  # make sure the FFT size is a power of 2
hop = block_size // 2  # half overlap
window = pra.hann(block_size, flag='asymmetric',
                  length='full')  # analysis window (no synthesis window)

# open single channel audio file
fn = os.path.join(os.path.dirname(__file__), 'input_samples',
                  'singing_8000.wav')
fs, audio = wavfile.read(fn)

# Create the STFT object
stft = pra.transform.STFT(block_size,
                          hop=hop,
                          analysis_window=window,
                          channels=1,
                          streaming=True)

# set the filter and the appropriate amount of zero padding (back)
if h_len > 1:
def one_loop(args):
    global parameters

    import time
    import numpy

    np = numpy

    import pyroomacoustics

    pra = pyroomacoustics

    import os
    import sys

    sys.path.append(parameters["base_dir"])

    from auxiva_pca import auxiva_pca, pca_separation
    from five import five
    from ive import ogive
    from overiva import overiva
    from pyroomacoustics.bss.common import projection_back
    from room_builder import callback_noise_mixer, random_room_builder

    # import samples helper routine
    from get_data import samples_dir

    sys.path.append(os.path.join(parameters['base_dir'], samples_dir))
    from generate_samples import wav_read_center

    n_targets, n_interferers, n_mics, sinr, wav_files, room_seed, seed = args

    # this is the underdetermined case. We don't do that.
    if n_mics < n_targets:
        return []

    # set MKL to only use one thread if present
    try:
        import mkl

        mkl.set_num_threads(1)
    except ImportError:
        pass

    # set the RNG seed
    rng_state = np.random.get_state()
    np.random.seed(seed)

    # STFT parameters
    framesize = parameters["stft_params"]["framesize"]
    hop = parameters["stft_params"]["hop"]
    if parameters["stft_params"]["window"] == "hann":
        win_a = pra.hamming(framesize)
    else:  # default is Hann
        win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, hop)

    # Generate the audio signals

    # get the simulation parameters from the json file
    # Simulation parameters
    sources_var = np.ones(n_targets)

    # total number of sources
    n_sources = n_targets + n_interferers

    # Read the signals
    wav_files = [os.path.join(parameters["base_dir"], fn) for fn in wav_files]
    signals = wav_read_center(wav_files[:n_sources], seed=123)

    # Get a random room
    room, rt60 = random_room_builder(signals,
                                     n_mics,
                                     seed=room_seed,
                                     **parameters["room_params"])
    premix = room.simulate(return_premix=True)

    # mix the signal
    n_samples = premix.shape[2]
    mix = callback_noise_mixer(
        premix,
        sinr=sinr,
        diffuse_ratio=parameters["sinr_diffuse_ratio"],
        n_src=n_sources,
        n_tgt=n_targets,
        tgt_std=np.sqrt(sources_var),
        ref_mic=parameters["ref_mic"],
    )

    # sum up the background
    # shape (n_mics, n_samples)
    background = np.sum(premix[n_targets:n_sources, :, :], axis=0)

    # shape (n_targets+1, n_samples, n_mics)
    ref = np.zeros((n_targets + 1, premix.shape[2], premix.shape[1]),
                   dtype=premix.dtype)
    ref[:n_targets, :, :] = premix[:n_targets, :, :].swapaxes(1, 2)
    ref[n_targets, :, :] = background.T

    synth = np.zeros_like(ref)

    # START BSS
    ###########

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a)
    X_mics = X_all[:, :, :n_mics]

    # convergence monitoring callback
    def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref,
                             framesize, win_s, algo_name):
        t_in = time.perf_counter()

        # projection back
        z = projection_back(Y, X[:, :, 0])
        Y = Y * np.conj(z[None, :, :])

        from mir_eval.separation import bss_eval_sources

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop,
                                        win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)

        if algo_name not in parameters["overdet_algos"]:
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0] - hop, ref.shape[1])

        synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T
        synth[n_targets, :m, 0] = y[hop:m + hop, 0]

        sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0],
                                               synth[:, :m, 0])
        SDR.append(sdr[:n_targets].tolist())
        SIR.append(sir[:n_targets].tolist())

        t_out = time.perf_counter()
        eval_time.append(t_out - t_in)

    # store results in a list, one entry per algorithm
    results = []

    # compute the initial values of SDR/SIR
    init_sdr = []
    init_sir = []

    convergence_callback(X_mics, X_mics, n_targets, init_sdr, init_sir, [],
                         ref, framesize, win_s, "init")

    for full_name, params in parameters["algorithm_kwargs"].items():

        name = params["algo"]
        kwargs = params["kwargs"]

        if name == "auxiva_pca" and n_targets == 1:
            # PCA doesn't work for single source scenario
            continue
        elif name in ["ogive", "five"] and n_targets != 1:
            # OGIVE is only for single target
            continue

        results.append({
            "algorithm": full_name,
            "n_targets": n_targets,
            "n_interferers": n_interferers,
            "n_mics": n_mics,
            "rt60": rt60,
            "sinr": sinr,
            "seed": seed,
            "sdr": [],
            "sir": [],  # to store the result
            "runtime": np.nan,
            "eval_time": np.nan,
            "n_samples": n_samples,
        })

        # this is used to keep track of time spent in the evaluation callback
        eval_time = []

        def cb(Y):
            convergence_callback(
                Y,
                X_mics,
                n_targets,
                results[-1]["sdr"],
                results[-1]["sir"],
                eval_time,
                ref,
                framesize,
                win_s,
                name,
            )

        # avoid one computation by using the initial values of sdr/sir
        results[-1]["sdr"].append(init_sdr[0])
        results[-1]["sir"].append(init_sir[0])

        try:
            t_start = time.perf_counter()

            if name == "auxiva":
                # Run AuxIVA
                # this calls full IVA when `n_src` is not provided
                Y = overiva(X_mics, callback=cb, **kwargs)

            elif name == "auxiva_pca":
                # Run AuxIVA
                Y = auxiva_pca(X_mics,
                               n_src=n_targets,
                               callback=cb,
                               proj_back=False,
                               **kwargs)

            elif name == "overiva":
                # Run BlinkIVA
                Y = overiva(X_mics,
                            n_src=n_targets,
                            callback=cb,
                            proj_back=False,
                            **kwargs)

            elif name == "overiva2":
                # Run BlinkIVA
                Y = overiva(X_mics,
                            n_src=n_targets,
                            callback=cb,
                            proj_back=False,
                            **kwargs)

            elif name == "five":
                # Run AuxIVE
                Y = five(X_mics, callback=cb, proj_back=False, **kwargs)

            elif name == "ilrma":
                # Run AuxIVA
                Y = pra.bss.ilrma(X_mics,
                                  callback=cb,
                                  proj_back=False,
                                  **kwargs)

            elif name == "ogive":
                # Run OGIVE
                Y = ogive(X_mics, callback=cb, proj_back=False, **kwargs)

            elif name == "pca":
                # Run PCA
                Y = pca_separation(X_mics, n_src=n_targets)

            else:
                continue

            t_finish = time.perf_counter()

            # The last evaluation
            convergence_callback(
                Y,
                X_mics,
                n_targets,
                results[-1]["sdr"],
                results[-1]["sir"],
                [],
                ref,
                framesize,
                win_s,
                name,
            )

            results[-1]["eval_time"] = np.sum(eval_time)
            results[-1][
                "runtime"] = t_finish - t_start - results[-1]["eval_time"]

        except:
            import os, json

            pid = os.getpid()
            # report last sdr/sir as np.nan
            results[-1]["sdr"].append(np.nan)
            results[-1]["sir"].append(np.nan)
            # now write the problem to file
            fn_err = os.path.join(parameters["_results_dir"],
                                  "error_{}.json".format(pid))
            with open(fn_err, "a") as f:
                f.write(json.dumps(results[-1], indent=4))
            # skip to next iteration
            continue

    # restore RNG former state
    np.random.set_state(rng_state)

    return results
예제 #20
0
def apply_iterative_wiener(noisy_signal,
                           frame_len=512,
                           lpc_order=20,
                           iterations=2,
                           alpha=0.8,
                           thresh=0.01):
    """
    One-shot function to apply iterative Wiener filtering for denoising.

    Parameters
    ----------
    noisy_signal : numpy array
        Real signal in time domain.
    frame_len : int
        Frame length in samples. 50% overlap is used with hanning window.
    lpc_order : int
        Number of LPC coefficients to compute
    iterations : int
        How many iterations to perform in updating the Wiener filter for each
        signal frame.
    alpha : int
        Smoothing factor within [0,1] for updating noise level. Closer to `1`
        gives more weight to the previous noise level, while closer to `0`
        gives more weight to the current frame's level. Closer to `0` can track
        more rapid changes in the noise level. However, if a speech frame is
        incorrectly identified as noise, you can end up removing desired
        speech.
    thresh : float
        Threshold to distinguish between (signal+noise) and (noise) frames. A
        high value will classify more frames as noise but might remove desired
        signal!

    Returns
    -------
    numpy array
        Enhanced/denoised signal.
    """

    from pyroomacoustics import hann
    from pyroomacoustics.transform import STFT

    hop = frame_len // 2
    window = hann(frame_len, flag='asymmetric', length='full')
    stft = STFT(frame_len, hop=hop, analysis_window=window, streaming=True)
    scnr = IterativeWiener(frame_len, lpc_order, iterations, alpha, thresh)

    processed_audio = np.zeros(noisy_signal.shape)
    n = 0
    while noisy_signal.shape[0] - n >= hop:
        # SCNR in frequency domain
        stft.analysis(noisy_signal[n:(n + hop), ])
        X = scnr.compute_filtered_output(current_frame=stft.fft_in_buffer,
                                         frame_dft=stft.X)

        # back to time domain
        processed_audio[n:n + hop, ] = stft.synthesis(X)

        # update step
        n += hop

    return processed_audio
예제 #21
0
    def createroom(mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim,
                   absorption, max_order, n_mics, angle):
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
        # algorithm parameters
        # param ogive
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
        SIR = 10  # dB
        SNR = (
            60
        )  # dB, this is the SNR with respect to a single target source and microphone self-noise

        ########separation params#############
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60
        dist = "gauss"  #guass or laplace
        ########paramas set##################
        fs = 44100
        n_sources = 2
        n_mics = n_mics
        n_sources_target = 2
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

        # set the source powers, the first one is half
        source_std = np.ones(n_sources_target)
        # room size
        room_dim = roomdim
        # micro position
        rot = angle
        offset = np.pi - rot / 2
        mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics,
                                      rot=offset)  ###micro2

        # target position
        target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
        #interference position
        interferer_locs = random_layout([14, 0, 6],
                                        n_sources - n_sources_target,
                                        offset=[5, 20, 3],
                                        seed=1)
        source_locs = target_locs
        # audio loaded
        wav_files = [amBird, saBird]
        signals = wav_read_center(wav_files, seed=123)

        #create room
        room = pra.ShoeBox(room_dim,
                           fs=44100,
                           absorption=absorption,
                           max_order=max_order,
                           air_absorption=True,
                           humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs))

        callback_mix_kwargs = {
            "snr": SNR,
            "sir": SIR,
            "n_src": n_sources,
            "n_tgt": n_sources_target,
            "src_std": source_std,
            "ref_mic": 0,
        }

        # Run the simulation
        separate_recordings = room.simulate(
            callback_mix=callback_mix,
            callback_mix_kwargs=callback_mix_kwargs,
            return_premix=True,
        )
        mics_signals = room.mic_array.signals
        print("Simulation done.")

        # rt60 = room.measure_rt60()
        # print(rt60)

        # Monitor Convergence
        ref = np.moveaxis(separate_recordings, 1, 2)
        if ref.shape[0] < n_mics:
            ref = np.concatenate(
                (ref,
                 np.random.randn(n_mics - ref.shape[0], ref.shape[1],
                                 ref.shape[2])),
                axis=0,
            )

        SDR, SIR, cost_func = [], [], []
        convergence_callback = None

        # START BSS

        # shape: (n_frames, n_freq, n_mics)
        X_all = pra.transform.analysis(mics_signals.T,
                                       framesize,
                                       framesize // 2,
                                       win=win_a).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]

        tic = time.perf_counter()

        # Run BSS
        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))

        toc = time.perf_counter()

        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0],
                                        framesize,
                                        framesize // 2,
                                        win=win_s)[:, None]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y,
                                        framesize,
                                        framesize // 2,
                                        win=win_s).astype(np.float64)

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2:m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]
        print("SDR:", sdr)
        print("SIR:", sir)

        ####save mix and separation #######
        if save:
            from scipy.io import wavfile
            wavfile.write(
                "birdmix.wav",
                room.fs,
                (pra.normalize(mics_signals, bits=16).astype(np.int16).T)[:,
                                                                          0],
            )
            for i, sig in enumerate(y_hat.T):
                wavfile.write(
                    "birdsep{}.wav".format(i + 1),
                    room.fs,
                    pra.normalize(sig, bits=16).astype(np.int16).T,
                )
from __future__ import division, print_function
import numpy as np
from scipy.io import wavfile
import matplotlib.pyplot as plt
import pyroomacoustics as pra
import os

# filter to apply
h_len = 99
h = np.ones(h_len)
h /= np.linalg.norm(h)

# parameters
block_size = 512 - h_len + 1  # make sure the FFT size is a power of 2
hop = block_size // 2  # half overlap
window = pra.hann(block_size, flag="asymmetric",
                  length="full")  # analysis window (no synthesis window)

# open single channel audio file
fn = os.path.join(os.path.dirname(__file__), "input_samples",
                  "singing_8000.wav")
fs, audio = wavfile.read(fn)

# Create the STFT object
stft = pra.transform.STFT(block_size,
                          hop=hop,
                          analysis_window=window,
                          channels=1,
                          streaming=True)

# set the filter and the appropriate amount of zero padding (back)
if h_len > 1:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from scipy.linalg import toeplitz
from scipy.io import wavfile
from scipy.signal import resample, fftconvolve

import pyroomacoustics as pra
import TDBeamformers as tdb

# Spectrogram figure properties
figsize = (15, 7)  # figure size
fft_size = 512  # fft size for analysis
fft_hop = 8  # hop between analysis frame
fft_zp = 512  # zero padding
analysis_window = np.concatenate((pra.hann(fft_size), np.zeros(fft_zp)))
t_cut = 0.83  # length in [s] to remove at end of signal (no sound)

# Some simulation parameters
Fs = 8000
t0 = 1. / (Fs * np.pi * 1e-2
           )  # starting time function of sinc decay in RIR response
absorption = 0.90
max_order_sim = 10
sigma2_n = 5e-7

# Room 1 : Shoe box
room_dim = [4, 6]

# the good source is fixed for all
good_source = np.array([1, 4.5])  # good source
예제 #24
0
def with_half_overlap_with_filter(D,
                                  num_frames=1,
                                  fixed_memory=False,
                                  streaming=True):
    """
    D             - number of channels
    num_frames    - how many frames to process, None will process one frame at 
                    a time 
    fixed_memory  - whether to enforce checks for size (real-time consideration)
    streaming     - whether or not to stitch between frames
    """

    if D == 1:
        x_local = x[:, 0]
        y_local = y[:, 0]
        h_local = h[:, 0]
    else:
        x_local = x[:, :D]
        y_local = y[:, :D]
        h_local = h[:, :D]

    # parameters
    block_size = 512 - h_len + 1  # make sure the FFT size is a power of 2
    hop = block_size // 2  # half overlap
    window = pra.hann(block_size)  # the analysis window
    if not streaming:
        num_samples = (num_frames - 1) * hop + block_size
        x_local = x_local[:num_samples, ]

    # Create the STFT object
    if fixed_memory:
        stft = STFT(block_size,
                    hop=hop,
                    channels=D,
                    transform=transform,
                    num_frames=num_frames,
                    analysis_window=window,
                    streaming=streaming)
    else:
        stft = STFT(block_size,
                    hop=hop,
                    channels=D,
                    transform=transform,
                    analysis_window=window,
                    streaming=streaming)

    # setup the filter
    stft.set_filter(h_local, zb=h_len - 1)

    # collect the processed blocks
    processed_x = np.zeros(x_local.shape)

    if not streaming:

        stft.analysis(x_local)
        stft.process()
        processed_x = stft.synthesis()
        n = processed_x.shape[0]

        error = np.max(
            np.abs(y_local[block_size:n - block_size, ] -
                   processed_x[block_size:n - block_size, ]))

    else:

        n = 0
        hop_frames = hop * num_frames
        # process the signals while full blocks are available
        while x_local.shape[0] - n > hop_frames:
            stft.analysis(x_local[n:n + hop_frames, ])
            stft.process()  # apply the filter
            processed_x[n:n + hop_frames, ] = stft.synthesis()
            n += hop_frames

        error = np.max(np.abs(y_local[:n - hop, ] - processed_x[hop:n, ]))

        # if D==1:
        #     import matplotlib.pyplot as plt
        #     plt.figure()
        #     plt.plot(y_local)
        #     plt.plot(processed_x)
        #     plt.show()

    return error
noise_fp = os.path.join(os.path.dirname(__file__), "input_samples",
                        "doing_the_dishes.wav")
noisy_signal, signal, noise, fs = pra.create_noisy_signal(signal_fp,
                                                          snr=snr,
                                                          noise_fp=noise_fp)
wavfile.write(
    os.path.join(os.path.dirname(__file__), "output_samples",
                 "denoise_input_SpectralSub.wav"),
    fs,
    noisy_signal.astype(np.float32),
)
"""
Create STFT and SCNR objects
"""
hop = nfft // 2
window_a = pra.hann(nfft)
window_s = pra.transform.stft.compute_synthesis_window(window_a, hop)
stft = pra.transform.STFT(nfft,
                          hop=hop,
                          analysis_window=window_a,
                          synthesis_window=window_s,
                          streaming=True)

scnr = SpectralSub(nfft, db_reduc, lookback, beta, alpha)
lookback_time = hop / fs * lookback
print("Lookback : %f seconds" % lookback_time)
"""
Process as in real-time
"""
# collect the processed blocks
processed_audio = np.zeros(signal.shape)
예제 #26
0
)

# Read in the pyramic microphone locations
with open('pyramic.json') as f:
    data = json.load(f)
    array = np.array(data['pyramic']).T

# Position the array in the room
array -= array.mean(axis=1, keepdims=True)
array += np.array([[5.5, 5.3, 1.1]]).T
room.add_microphone_array(pra.MicrophoneArray(array, room.fs))

####################
# Prepare the STFT #

awin = pra.hann(nfft)
swin = pra.transform.compute_synthesis_window(awin, shift)
stft_input = pra.transform.STFT(
    nfft,
    shift,
    analysis_window=awin,
    synthesis_window=swin,
    channels=array.shape[1],
)
stft_output = pra.transform.STFT(
    nfft,
    shift,
    analysis_window=awin,
    synthesis_window=swin,
    channels=1,
)
noisy_signal, signal, noise, fs = pra.create_noisy_signal(signal_fp,
                                                          snr=snr,
                                                          noise_fp=noise_fp)
wavfile.write(
    os.path.join(os.path.dirname(__file__), 'output_samples',
                 'denoise_input_IterativeWiener.wav'), fs,
    noisy_signal.astype(np.float32))
"""
Apply approach
"""

scnr = IterativeWiener(frame_len, lpc_order, iterations, alpha, threshold)

# derived parameters
hop = frame_len // 2
win = pra.hann(frame_len, flag='asymmetric', length='full')
stft = pra.transform.STFT(frame_len,
                          hop=hop,
                          analysis_window=win,
                          streaming=True)
speech_psd = np.ones(hop + 1)  # initialize PSD
noise_psd = 0

start_time = time.time()
processed_audio = np.zeros(noisy_signal.shape)
n = 0
while noisy_signal.shape[0] - n >= hop:

    # to frequency domain, 50% overlap
    stft.analysis(noisy_signal[n:(n + hop), ])
예제 #28
0
    parser.add_argument('--save',
                        action='store_true',
                        help='Saves the output of the separation to wav files')
    args = parser.parse_args()

    if args.gui:
        # avoids a bug with tkinter and matplotlib
        import matplotlib
        matplotlib.use('TkAgg')

    import pyroomacoustics as pra

    ## Prepare one-shot STFT
    L = args.block
    hop = L // 2
    win_a = pra.hann(L)
    win_s = pra.transform.compute_synthesis_window(win_a, hop)

    ## Create a room with sources and mics
    # Room dimensions in meters
    room_dim = [8, 9]

    # source location
    source = np.array([1, 4.5])
    room = pra.ShoeBox(room_dim,
                       fs=16000,
                       max_order=15,
                       absorption=0.35,
                       sigma2_awgn=1e-8)

    # get signals
예제 #29
0
def convergence_callback(
    Y,
    source_model,
    X,
    n_targets,
    SDR,
    SIR,
    cost_list,
    eval_time,
    ref_sig,
    ref_mic,
    stft_params,
    algo_name,
    algo_is_overdetermined,
):
    global id_wav
    # we will keep track of how long this routine takes
    t_in = time.perf_counter()

    # Compute the current value of the IVA cost function
    cost_list.append(bss.cost_iva(X, Y, model=source_model))

    # prepare STFT parameters
    framesize = stft_params["framesize"]
    hop = stft_params["hop"]
    if stft_params["window"] == "hamming":
        win_a = pra.hamming(framesize)
    else:  # default is Hann
        win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, hop)

    # projection back
    Y = bss.project_back(Y, X[:, :, ref_mic])

    if Y.shape[2] == 1:
        y = pra.transform.synthesis(Y[:, :, 0], framesize, hop,
                                    win=win_s)[:, None]
    else:
        y = pra.transform.synthesis(Y, framesize, hop, win=win_s)
    y = y[framesize - hop:, :].astype(np.float64)

    if not algo_is_overdetermined:
        new_ord = np.argsort(np.std(y, axis=0))[::-1]
        y = y[:, new_ord]

    m = np.minimum(y.shape[0], ref_sig.shape[1])

    synth = np.zeros_like(ref_sig)
    # in the overdetermined case, we also take into account the background for SIR computation
    synth[:n_targets, :m] = y[:m, :n_targets].T
    if synth.shape[0] > y.shape[1]:
        # here we copy the first source to fill the channel of the background
        synth[n_targets, :m] = y[:m, 0]

    if ref_sig.shape[0] > n_targets and np.sum(np.abs(
            ref_sig[n_targets, :])) < 1e-10:
        sdr, sir, sar, perm = si_bss_eval(ref_sig[:n_targets, :m].T,
                                          synth[:-1, :m].T)
    else:
        sdr, sir, sar, perm = si_bss_eval(ref_sig[:, :m].T, synth[:, :m].T)

    SDR.append(sdr[:n_targets].tolist())
    SIR.append(sir[:n_targets].tolist())

    t_out = time.perf_counter()
    eval_time.append(t_out - t_in)
    # creating a noisy_signal array for each snr value and mic
    speech_file_location = speech.meta.as_dict()['file_loc']
    noise_file_location = noise.meta.as_dict()['file_loc']
    noisy_signal = utils.modify_input_wav_multiple_mics(
        speech_file_location, noise_file_location, room_dim, max_order,
        snr_vals, mic_array, [2, 3.1, 2], [4, 2, 1.5])

    # Create our new samples for each SNR values
    noisy_single_mic = noisy_signal[:, 0, :]
    '''
    make an STFT object (these class are already implemented in Pyroomacoustics and have example showing how to use them)
    '''

    hop = fft_len // 2
    window = pra.hann(fft_len, flag='asymmetric', length='full')
    stft = pra.realtime.STFT(fft_len,
                             hop=hop,
                             analysis_window=window,
                             channels=1)
    '''
    Processing of our noisy signals contained in the noisy array.
    '''

    # collect the processed block for each of our noisy signal
    processed_audio_array = np.zeros(noisy_single_mic.shape)

    # we run the algorithm for each of our possible signal
    for i, snr in enumerate(snr_vals):

        n = 0
fs = 44100

room = pra.ShoeBox(room_size, fs, materials=pra.Material(0.1), max_order = 50)

room.add_source(source_loc)

room.add_microphone_array(pra.MicrophoneArray(mic_loc, fs))

room.compute_rir()

#plot spectrograms to check for sweeping echoes

fft_size = 512  # fft size for analysis
fft_hop = 128  # hop between analysis frame
fft_zp = 512  # zero padding
analysis_window = pra.hann(fft_size)

print("Sweeping echo measure for ISM is :")
for n in range(M):
    
    if n == 0:
        S = stft.analysis(room.rir[n][0],  fft_size, fft_hop, win=analysis_window, zp_back=fft_zp)
        
        f, (ax1, ax2) = plt.subplots(2,1)
        
        ax1.imshow(
        pra.dB(S.T),
        extent=[0, len(room.rir[n][0]), 0, fs / 2],
        vmin=-100,
        vmax=0,
        origin="lower",