Beispiel #1
0
def preprocess(data, sr, params):
    """
    Assert first order ambisonics and dimensionality order.
    Compute Stft.
    :param data: np.array (num_frames, num_channels)
    :param sr:  sampling rate
    :param params: params dict
    :return: psa.Stft instance
    """
    num_frames = np.shape(data)[0]
    num_channels = np.shape(data)[1]
    assert num_channels == 4

    start_frame = 0
    if params['quick_test']:
        end_frame = int(np.ceil(sr * params['quick_test_file_duration']))
    else:
        end_frame = num_frames

    window_size = params['window_size']
    window_overlap = params['window_overlap']
    nfft = params['nfft']

    x = psa.Signal(data[start_frame:end_frame].T, sr, 'acn', 'n3d')
    X = psa.Stft.fromSignal(x,
                            window_size=window_size,
                            window_overlap=window_overlap,
                            nfft=nfft
                            ).limit_bands(params['fmin'], params['fmax'])

    if params['plot']:
        psa.plot_magnitude_spectrogram(X)

    return X
Beispiel #2
0
def plot_doa(x_t, title):
    x = psa.Signal(x_t, fs, 'acn', 'sn3d')
    X = psa.Stft.fromSignal(x,
                            window_size=window_size,
                            window_overlap=window_overlap,
                            nfft=nfft)
    X_doa = psa.compute_DOA(X)
    psa.plot_doa(X_doa, title)
    plt.show()
    return X_doa
    y = masp.get_sh(1, d, basisType) * np.sqrt(4 * np.pi) * [
        1, 1. / np.sqrt(3), 1. / np.sqrt(3), 1. / np.sqrt(3)
    ]  ## ACN, SN3D

    s = np.random.normal(size=(num_channels, audio_length_samples))
    ambi0 = s[0][:, np.newaxis] * y[0]
    ambi1 = s[1][:, np.newaxis] * y[1]

    ambi = ambi0 + ambi1

    # # # # # # # # # # # # # # # # # #
    r = 1
    window_size = 256
    window_overlap = window_size // 2

    s_tot_ambi = psa.Signal(ambi.T, fs, 'acn', 'n3d')
    S_tot_ambi = psa.Stft.fromSignal(s_tot_ambi,
                                     window_size=window_size,
                                     window_overlap=window_overlap)
    doa = psa.compute_DOA(S_tot_ambi)
    # ksi = S_tot_ambi.compute_ksi(r=r)
    msc = S_tot_ambi.compute_msc(r=r)
    msw = S_tot_ambi.compute_msw(r=r)
    ksi = np.dot(msc, msw)
    m = np.asarray([
        msc.data[0] * msw.data[0], msc.data[1] * msw.data[1],
        msc.data[2] * msw.data[2]
    ])
    A = np.sqrt(msc.data[0] * msw.data[0] + msc.data[1] * msw.data[1] +
                msc.data[2] * msw.data[2])
    ksi = psa.Stft(msc.t, msc.f, A, msc.sample_rate)
Beispiel #4
0
        s_dir = librosa.core.load(af, sr=fs, mono=True)[0][:audio_file_length_samples]

        bformat = np.zeros((M, audio_file_length_samples))
        for m in range(M):
            bformat[m] = scipy.signal.fftconvolve(s_dir, irs[m])[:audio_file_length_samples]  # keep original length


        r=4
        window_size = 256
        window_overlap = window_size//2

        _, _, S_dir = scipy.signal.stft(s_dir, fs, nperseg=window_size, noverlap=window_overlap )


        s_tot_ambi = psa.Signal(bformat, fs, 'acn', 'n3d')
        S_tot_ambi = psa.Stft.fromSignal(s_tot_ambi,
                                window_size=window_size,
                                window_overlap=window_overlap
                                )
        doa = psa.compute_DOA(S_tot_ambi)
        directivity = S_tot_ambi.compute_ita_re(r=r)

        psa.plot_signal(s_tot_ambi)
        psa.plot_magnitude_spectrogram(S_tot_ambi)
        psa.plot_doa(doa)
        psa.plot_directivity(directivity)
        # psa.plot_directivity(directivity.sqrt())

        est_S_dir_ambi = S_tot_ambi.apply_mask(directivity.sqrt())
        est_S_dir = est_S_dir_ambi.data[0]
import parametric_spatial_audio_processing as psa
import soundfile as sf
import matplotlib.pyplot as plt
import numpy as np
import time

len = 1.  # s
audio_path = "/Volumes/Dinge/DCASE2019/foa_dev/split1_ir0_ov1_1.wav"
data, sr = sf.read(audio_path)
data = data[:int(len * sr)]
signal = psa.Signal(data.T, sr, ordering='acn', norm='sn3d')
stft = psa.Stft.fromSignal(signal)

r = 5

t_a = []
t_b = []

for i in range(1000):

    start = time.time()
    a = stft.compute_ita_re(r)
    end = time.time()
    # print('a', end - start)
    t_a.append(end - start)

    start = time.time()
    b = stft.compute_ksi_re(r)
    end = time.time()
    # print('b', end - start)
    t_b.append(end - start)
analysis_window_size = 512
window_overlap = analysis_window_size // 2
fmin = 125
fmax = 8000
fft_factor = 1
fft_size = analysis_window_size * fft_factor

## Open the file to analyze

# file_path = '/Volumes/Dinge/ambiscaper/background/background_anechoic_pad/background_anechoic_pad.wav'
file_path = '/Volumes/Dinge/ambiscaper/testing/len2/len2.wav'
data, sr = sf.read(file_path)

## Check diffuseness

signal = psa.Signal(data.T, sr, 'acn', 'sn3d')
psa.plot_signal(signal, title='waveform')

stft = psa.Stft.fromSignal(signal,
                           window_size=analysis_window_size,
                           window_overlap=window_overlap,
                           nfft=fft_size)
psa.plot_magnitude_spectrogram(stft, title='magnitude spectrogram')

diffuseness_stft = psa.compute_diffuseness(stft)
directivity_stft = psa.compute_directivity(stft)
psa.plot_diffuseness(diffuseness_stft, title='diffuseness')

## Apply diffuseness mask to separate background and foreground

background_stft = stft.apply_mask(diffuseness_stft)
def compute_peak_statistics(ir,
                            sample_rate,
                            ambisonics_ordering,
                            ambisonics_normalization,
                            plot=False,
                            plot_title = ''):


    ## Signal
    signal = psa.Signal(ir, int(sample_rate), ambisonics_ordering, ambisonics_normalization)
    if plot:
        psa.plot_signal(signal,title=plot_title+'IR')

    stft = psa.Stft.fromSignal(signal,
                               window_size=analysis_window_size,
                               window_overlap=window_overlap,
                               nfft=fft_size,
                               )
    stft = stft.limit_bands(fmin=fmin, fmax=fmax)

    if plot:
        psa.plot_magnitude_spectrogram(stft,title=plot_title+'IR Magnitude Spectrogram, w='+str(analysis_window_size))

    ### Energy Density
    energy_density_t = psa.compute_energy_density(signal)
    if plot:
        psa.plot_signal(energy_density_t,'Energy Density', y_scale='log')

    # # Smoothed signal
    # L = gaussian_window_length
    # smooth_window = scipy.signal.general_gaussian(L, p=gaussian_window_shape, sig=gaussian_window_std)
    # smoothed_energy_density_t = scipy.signal.fftconvolve(smooth_window, energy_density_t.data[0, :])
    # smoothed_energy_density_t = (np.average(energy_density_t.data[0, :]) / np.average(smoothed_energy_density_t)) * smoothed_energy_density_t
    # smoothed_energy_density_t = np.roll(smoothed_energy_density_t, -((L - 1) / 2))
    # smoothed_energy_density_t = smoothed_energy_density_t[:-(L - 1)]  # same length
    #
    # ### Peak peaking
    #
    # # WAVELET
    # cwt_widths = np.arange(0.5*L,1.5*L) # Find peaks of shape among 2 gaussian window lengths
    # smoothed_peaks = scipy.signal.find_peaks_cwt(smoothed_energy_density_t, widths=cwt_widths)

    # # Fine sample correction of peaks: find local maxima over a gaussian window length
    # corrected_peaks = copy.deepcopy(smoothed_peaks)
    # for peak_idx,peak in enumerate(smoothed_peaks):
    #     local_energy = smoothed_energy_density_t[peak - (L / 2):peak + (L / 2)]
    #     corrected_peaks[peak_idx] = np.argmax(local_energy) + peak - (L / 2)
    #
    # if plot:
    #     plt.figure()
    #     plt.suptitle('Smoothed Energy Density & peaks')
    #     ax = plt.subplot(111)
    #     ax.semilogy(energy_density_t.data[0,:])
    #     ax.semilogy(smoothed_energy_density_t)
    #
    #     # plot peak estimates
    #     for peak in corrected_peaks:
    #         plt.axvline(x=peak, color='g')
    #
    #     # plot time frames
    #     for x in np.arange(0,processing_window_samples,analysis_window_size):
    #         plt.axvline(x=x, color='r', alpha=0.3)
    #
    #     plt.grid()
    #
    # peak_time_bins = []
    # for peak in corrected_peaks:
    #     peak_time_bins.append(find_maximal_time_bin(peak, stft, overlap_factor))


    ## Raw Estimates
    doa = psa.compute_DOA(stft)
    if plot:
        psa.plot_doa(doa,title=plot_title+'DoA estimates, w='+str(analysis_window_size))

    # diffuseness = psa.compute_diffuseness(stft)
    # if plot:
    #     psa.plot_diffuseness(diffuseness,title=plot_title+'Diffuseness, w='+str(analysis_window_size))


    neighborhood_size = 3
    ## DOA variance
    doa_var = copy.deepcopy(doa)
    for n in range(doa.get_num_time_bins()):
        for k in range(doa.get_num_frequency_bins()):
            local_var_azi = 0
            local_var_ele = 0
            local_azi = []
            local_ele = []
            r = int(np.floor(neighborhood_size/2)) # neighborhood radius
            for x in np.arange(n - r, n + r + 1):
                for y in np.arange(k - r, k + r + 1):
                    if x < 0:
                        continue
                    elif x >= doa.get_num_time_bins():
                        continue
                    if y < 0:
                        continue
                    elif y >= doa.get_num_frequency_bins():
                        continue
                    local_azi.append(doa.data[0,y,x])
                    local_ele.append(doa.data[1,y,x])
                    # local_var_azi += np.std(doa.data[0,y,x])
                    # local_var_ele += np.std(doa.data[1,y,x])
            local_var_azi = scipy.stats.circvar(np.array(local_azi))
            local_var_ele = np.var(np.array(local_ele))
            doa_var.data[0,k,n] = local_var_azi
            doa_var.data[1,k,n] = local_var_ele

    ## DOA VAR salience

    neighborhood_size = round_up_to_odd(doa_var.get_num_frequency_bins())
    doa_var_salience = threshold_local(doa_var.data[0,:],block_size=neighborhood_size)
    doa_var_max_salience_mask = copy.deepcopy(doa_var)
    doa_var_min_salience_mask = copy.deepcopy(doa_var)
    for k in range(doa_var.get_num_frequency_bins()):
        for n in range(doa_var.get_num_time_bins()):

            if doa_var.data[0, k, n] > doa_var_salience[k, n]:
                doa_var_max_salience_mask.data[:, k, n] = 1.
            else:
                doa_var_max_salience_mask.data[:, k, n] = np.nan

            if doa_var.data[0, k, n] < doa_var_salience[k, n]:
                doa_var_min_salience_mask.data[:, k, n] = 1.
            else:
                doa_var_min_salience_mask.data[:, k, n] = np.nan


    # MINIMUM VARIANCE DOA
    masked_doa = doa.apply_mask(doa_var_min_salience_mask)
    if plot:
        psa.plot_doa(masked_doa,
                     title=plot_title + 'DOA - Minimum variance Salience Masked, w=' + str(analysis_window_size) + ' N: ' + str(
                         neighborhood_size))

    masked_doa = doa.apply_mask(doa_var_max_salience_mask)
    # if plot:
    #     psa.plot_doa(masked_doa,
    #                  title=plot_title + 'DOA - Maximim variance Salience Masked, w=' + str(analysis_window_size) + ' N: ' + str(
    #                      neighborhood_size))


    # if plot:
        # plt.figure()
        # plt.suptitle('DOA VAR')
        # plt.subplot(211)
        # plt.pcolormesh(doa_var.data[0,:,:])
        # plt.subplot(212)
        # plt.pcolormesh(doa_var.data[1,:,:])
        #
        # psa.plot_mask(doa_var_max_salience_mask,title='MAX SALIENCE')
        # psa.plot_mask(doa_var_min_salience_mask,title='MIN SALIENCE')







    ## Energy density
    energy_density_tf = psa.compute_energy_density(stft)
    # if plot:
    #     psa.plot_magnitude_spectrogram(energy_density_tf,title='Energy Density Spectrogram, w='+str(analysis_window_size))


    # Energy density salience

    neighborhood_size = round_up_to_odd(energy_density_tf.get_num_frequency_bins())
    energy_density_salience = threshold_local(energy_density_tf.data[0,:],block_size=neighborhood_size)
    energy_density_salience_mask = copy.deepcopy(energy_density_tf)
    for k in range(energy_density_tf.get_num_frequency_bins()):
        for n in range(energy_density_tf.get_num_time_bins()):
            if energy_density_tf.data[0, k, n] > energy_density_salience[k, n]:
                energy_density_salience_mask.data[:, k, n] = 1.
            else:
                energy_density_salience_mask.data[:, k, n] = np.nan

    # if plot:
    #     fig = plt.figure()
    #     fig.suptitle('energy salience, w=' + str(analysis_window_size))
    #
    #     x = np.arange(np.shape(energy_density_salience)[0])
    #     y = np.arange(np.shape(energy_density_salience)[1])
    #     plt.pcolormesh(y, x, energy_density_salience, norm=LogNorm())
    #     plt.ylabel('Frequency [Hz]')
    #     plt.xlabel('Time [sec]')
    #     plt.colorbar()

    # if plot:
    #     psa.plot_mask(energy_density_salience_mask, title='Energy Salience Mask'+str(analysis_window_size))

    masked_energy = energy_density_tf.apply_mask(energy_density_salience_mask)
    # if plot:
    #     psa.plot_magnitude_spectrogram(masked_energy, title=plot_title+'Energy - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size))

    masked_doa = doa.apply_mask(energy_density_salience_mask)
    if plot:
        psa.plot_doa(masked_doa, title=plot_title+'DOA - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size))

    masked_doa = doa.apply_mask(energy_density_salience_mask).apply_mask(doa_var_min_salience_mask)
    if plot:
        psa.plot_doa(masked_doa, title=plot_title+'DOA - VAR MIN,  Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size))


    # masked_diffuseness = diffuseness.apply_mask(energy_density_salience_mask)
    # if plot:
    #     psa.plot_diffuseness(masked_diffuseness, title=plot_title+'Diffuseness - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size))


    # # Diffuseness density salience
    #
    # neighborhood_size = round_up_to_odd(diffuseness.get_num_frequency_bins())
    # diffuseness_salience = threshold_local(diffuseness.data[0,:],block_size=neighborhood_size)
    # diffuseness_salience_mask = copy.deepcopy(diffuseness)
    # for k in range(diffuseness.get_num_frequency_bins()):
    #     for n in range(diffuseness.get_num_time_bins()):
    #         if diffuseness.data[0, k, n] < diffuseness_salience[k, n]:
    #             diffuseness_salience_mask.data[:, k, n] = 1.
    #         else:
    #             diffuseness_salience_mask.data[:, k, n] = np.nan
    #
    # masked_energy = energy_density_tf.apply_mask(diffuseness_salience_mask)
    # if plot:
    #     psa.plot_magnitude_spectrogram(masked_energy, title=plot_title + 'Energy - Diffuseness Salience Masked, w=' + str(
    #         analysis_window_size) + ' N: ' + str(neighborhood_size))
    #
    # masked_doa = doa.apply_mask(diffuseness_salience_mask)
    # if plot:
    #     psa.plot_doa(masked_doa, title=plot_title + 'DOA - Diffuseness Salience Masked, w=' + str(
    #         analysis_window_size) + ' N: ' + str(neighborhood_size))
    #
    # masked_diffuseness = diffuseness.apply_mask(diffuseness_salience_mask)
    # if plot:
    #     psa.plot_diffuseness(masked_diffuseness, title=plot_title + 'Diffuseness - Diffuseness Salience Masked, w=' + str(
    #         analysis_window_size) + ' N: ' + str(neighborhood_size))

    # #
    # if plot:
    #     psa.plot_mask(diffuseness_salience_mask, title='Diffuseness Salience Mask'+str(neighborhood_size))
    # #
    # masked_dif = diffuseness.apply_mask(diffuseness_salience_mask)
    # if plot:
    #     psa.plot_diffuseness(masked_dif, title='Diffuseness - Salience Masked'+str(neighborhood_size))
    #
    # energy_diffuseness_mask = energy_density_salience_mask.apply_mask(diffuseness_salience_mask)
    #
    # masked_doa = masked_doa.apply_mask(diffuseness_salience_mask)
    # if plot:
    #     psa.plot_doa(masked_doa, title='DOA - Salience Masked - Diffuseness Masked, w='+str(analysis_window_size))
    #
    # fig = plt.figure()
    # fig.suptitle("diffuseness salience, block:"+str(neighborhood_size), fontsize=16)
    # x = np.arange(np.shape(diffuseness_salience)[0])
    # y = np.arange(np.shape(diffuseness_salience)[1])
    # plt.pcolormesh(y,x, diffuseness_salience, cmap='plasma_r',norm=LogNorm())
    # plt.ylabel('Frequency [Hz]')
    # plt.xlabel('Time [sec]')
    # plt.colorbar()



    # diffuseness_energy_mask = diffuseness_mask.apply_mask(energy_density_mask)
    # if plot:
    #     psa.plot_mask(diffuseness_energy_mask, title='Diffuseness + Energy Density Mask')
    #
    # masked_diffuseness = diffuseness.apply_mask(diffuseness_mask)
    # if plot:
    #     psa.plot_diffuseness(masked_diffuseness, title='Diffuseness, diffuseness mask')
    #
    # masked_diffuseness = masked_diffuseness.apply_mask(energy_density_mask)
    # if plot:
    #     psa.plot_diffuseness(masked_diffuseness, title='Diffuseness, energy density maskm diffuseness mask')
    #
    # masked_doa = masked_doa.apply_mask(diffuseness_mask)
    # if plot:
    #     psa.plot_doa(masked_doa,title='DoA estimates, energy density mask, diffuseness mask')


    ### Find horizontal-contiguous bins on doa estimates
    time_bins_with_energy = []
    for n in range(energy_density_salience_mask.get_num_time_bins()):
        if not np.all(np.isnan(energy_density_salience_mask.data[0,:,n])):
            time_bins_with_energy.append(n)

    time_region_starts = []
    time_region_ends = []
    for idx, b in enumerate(time_bins_with_energy):
        if time_bins_with_energy[idx] - time_bins_with_energy[idx - 1] != 1:
            time_region_starts.append(time_bins_with_energy[idx])
            time_region_ends.append(time_bins_with_energy[idx - 1]+1)

    time_region_starts.sort()
    time_region_ends.sort()
    assert len(time_region_starts) == len(time_region_ends)


    # Compute local doa estimates on contiguous time regions
    peak_stats = []
    for idx in range(len(time_region_starts)):
        n_range = range(time_region_starts[idx],time_region_ends[idx])

        local_azi = []
        local_ele = []

        index_of_bins_estimated = []
        for n in n_range:
            # Filter nans
            for k in np.arange(energy_density_salience_mask.get_num_frequency_bins()):

                if not np.isnan(energy_density_salience_mask.data[0, k, n]):
                    local_azi.append(masked_doa.data[0, k, n])
                    local_ele.append(masked_doa.data[1, k, n])
                    index_of_bins_estimated.append(n)

        local_azi = np.asarray(local_azi)
        local_ele = np.asarray(local_ele)
        # local_dif = np.asarray(local_dif)

        local_azi_mean = scipy.stats.circmean(local_azi, high=np.pi, low=-np.pi)
        local_azi_std = scipy.stats.circstd(local_azi, high=np.pi, low=-np.pi)
        local_ele_mean = np.mean(local_ele)
        local_ele_std = np.std(local_ele)

        if plot:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            plt.suptitle(plot_title+'FREQ BIN '+str(idx)+' - bins '+str(n_range))
            # cmap = plt.cm.get_cmap("copper")
            plt.grid()
            plt.xlim(-np.pi,np.pi)
            plt.ylim(-np.pi/2,np.pi/2)

            plt.scatter(local_azi, local_ele, marker='o',)
            plt.scatter(local_azi_mean, local_ele_mean, c='red', s=20, marker='+')
            ax.add_patch(Ellipse(xy=(local_azi_mean,local_ele_mean), width=local_azi_std,height=local_ele_std, alpha=0.5))
            ax.add_patch(Ellipse(xy=(local_azi_mean,local_ele_mean), width=3*local_azi_std,height=3*local_ele_std, alpha=0.1))


        mean_location = np.mean(index_of_bins_estimated)
        time_resolution_ms = float(processing_window_ms) / masked_doa.get_num_time_bins()
        estimated_location_ms = mean_location * time_resolution_ms

        peak_stats.append(
            [estimated_location_ms, [local_azi_mean, local_azi_std], [local_ele_mean, local_ele_std]])

    ### Return peak stats
    return peak_stats
analysis_window_size = 512
window_overlap = analysis_window_size // 2
fmin = 125
fmax = 8000
fft_factor = 1
fft_size = analysis_window_size * fft_factor

## Open open the noise background

# bg_path = '/Volumes/Dinge/ambiscaper/background/noise2/noise2.wav'
# bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf_acn_sn3d.wav'
bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf_bformat_plugin_filter.wav'
# bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf.wav'
bg, sr = sf.read(bg_path)
bg = bg.T * 0.1
bg_signal = psa.Signal(bg, sr, 'acn', 'sn3d')
bg_stft = psa.Stft.fromSignal(bg_signal,
                              window_size=analysis_window_size,
                              window_overlap=window_overlap,
                              nfft=fft_size)
# psa.plot_signal(bg_signal,title='groundtruth background')
# psa.plot_magnitude_spectrogram(bg_stft,title='groundtruth  background')
# psa.plot_diffuseness(psa.compute_diffuseness(bg_stft),'groundtruth bg diffuseness')

# Open the foreground
fg_path = '/Volumes/Dinge/ambiscaper/background/foreground/foreground.wav'
fg, sr = sf.read(fg_path)
fg = fg.T * 5
fg_signal = psa.Signal(fg, sr, 'acn', 'sn3d')
fg_stft = psa.Stft.fromSignal(fg_signal,
                              window_size=analysis_window_size,
Beispiel #9
0
sh_rirs = srs.render_rirs_sh(abs_echograms, band_centerfreqs, fs).squeeze()
sh_rirs = sh_rirs * np.sqrt(4 * np.pi) * [
    1, 1. / np.sqrt(3), 1. / np.sqrt(3), 1. / np.sqrt(3)
]  # SN3D norm
plt.figure()
plt.plot(sh_rirs)
plt.show()

signal_len_samples = int(np.floor(1. * fs))
signal = np.random.randn(signal_len_samples)

reverberant_signal = np.zeros((signal_len_samples, 4))
for i in range(4):
    reverberant_signal[:, i] = scipy.signal.fftconvolve(
        signal, sh_rirs[:, i].squeeze())[:signal_len_samples]
x = psa.Signal(reverberant_signal.T, fs, 'acn', 'sn3d')
psa.plot_signal(x, title='waveform')

analysis_window_size = 512
window_overlap = analysis_window_size // 2
fft_size = analysis_window_size
stft = psa.Stft.fromSignal(x,
                           window_size=analysis_window_size,
                           window_overlap=window_overlap,
                           nfft=fft_size)
psa.plot_magnitude_spectrogram(stft, title='magnitude spectrogram')
doa = psa.compute_DOA(stft)
psa.plot_doa(doa, title='doa')

plt.show()