def preprocess(data, sr, params): """ Assert first order ambisonics and dimensionality order. Compute Stft. :param data: np.array (num_frames, num_channels) :param sr: sampling rate :param params: params dict :return: psa.Stft instance """ num_frames = np.shape(data)[0] num_channels = np.shape(data)[1] assert num_channels == 4 start_frame = 0 if params['quick_test']: end_frame = int(np.ceil(sr * params['quick_test_file_duration'])) else: end_frame = num_frames window_size = params['window_size'] window_overlap = params['window_overlap'] nfft = params['nfft'] x = psa.Signal(data[start_frame:end_frame].T, sr, 'acn', 'n3d') X = psa.Stft.fromSignal(x, window_size=window_size, window_overlap=window_overlap, nfft=nfft ).limit_bands(params['fmin'], params['fmax']) if params['plot']: psa.plot_magnitude_spectrogram(X) return X
def plot_doa(x_t, title): x = psa.Signal(x_t, fs, 'acn', 'sn3d') X = psa.Stft.fromSignal(x, window_size=window_size, window_overlap=window_overlap, nfft=nfft) X_doa = psa.compute_DOA(X) psa.plot_doa(X_doa, title) plt.show() return X_doa
y = masp.get_sh(1, d, basisType) * np.sqrt(4 * np.pi) * [ 1, 1. / np.sqrt(3), 1. / np.sqrt(3), 1. / np.sqrt(3) ] ## ACN, SN3D s = np.random.normal(size=(num_channels, audio_length_samples)) ambi0 = s[0][:, np.newaxis] * y[0] ambi1 = s[1][:, np.newaxis] * y[1] ambi = ambi0 + ambi1 # # # # # # # # # # # # # # # # # # r = 1 window_size = 256 window_overlap = window_size // 2 s_tot_ambi = psa.Signal(ambi.T, fs, 'acn', 'n3d') S_tot_ambi = psa.Stft.fromSignal(s_tot_ambi, window_size=window_size, window_overlap=window_overlap) doa = psa.compute_DOA(S_tot_ambi) # ksi = S_tot_ambi.compute_ksi(r=r) msc = S_tot_ambi.compute_msc(r=r) msw = S_tot_ambi.compute_msw(r=r) ksi = np.dot(msc, msw) m = np.asarray([ msc.data[0] * msw.data[0], msc.data[1] * msw.data[1], msc.data[2] * msw.data[2] ]) A = np.sqrt(msc.data[0] * msw.data[0] + msc.data[1] * msw.data[1] + msc.data[2] * msw.data[2]) ksi = psa.Stft(msc.t, msc.f, A, msc.sample_rate)
s_dir = librosa.core.load(af, sr=fs, mono=True)[0][:audio_file_length_samples] bformat = np.zeros((M, audio_file_length_samples)) for m in range(M): bformat[m] = scipy.signal.fftconvolve(s_dir, irs[m])[:audio_file_length_samples] # keep original length r=4 window_size = 256 window_overlap = window_size//2 _, _, S_dir = scipy.signal.stft(s_dir, fs, nperseg=window_size, noverlap=window_overlap ) s_tot_ambi = psa.Signal(bformat, fs, 'acn', 'n3d') S_tot_ambi = psa.Stft.fromSignal(s_tot_ambi, window_size=window_size, window_overlap=window_overlap ) doa = psa.compute_DOA(S_tot_ambi) directivity = S_tot_ambi.compute_ita_re(r=r) psa.plot_signal(s_tot_ambi) psa.plot_magnitude_spectrogram(S_tot_ambi) psa.plot_doa(doa) psa.plot_directivity(directivity) # psa.plot_directivity(directivity.sqrt()) est_S_dir_ambi = S_tot_ambi.apply_mask(directivity.sqrt()) est_S_dir = est_S_dir_ambi.data[0]
import parametric_spatial_audio_processing as psa import soundfile as sf import matplotlib.pyplot as plt import numpy as np import time len = 1. # s audio_path = "/Volumes/Dinge/DCASE2019/foa_dev/split1_ir0_ov1_1.wav" data, sr = sf.read(audio_path) data = data[:int(len * sr)] signal = psa.Signal(data.T, sr, ordering='acn', norm='sn3d') stft = psa.Stft.fromSignal(signal) r = 5 t_a = [] t_b = [] for i in range(1000): start = time.time() a = stft.compute_ita_re(r) end = time.time() # print('a', end - start) t_a.append(end - start) start = time.time() b = stft.compute_ksi_re(r) end = time.time() # print('b', end - start) t_b.append(end - start)
analysis_window_size = 512 window_overlap = analysis_window_size // 2 fmin = 125 fmax = 8000 fft_factor = 1 fft_size = analysis_window_size * fft_factor ## Open the file to analyze # file_path = '/Volumes/Dinge/ambiscaper/background/background_anechoic_pad/background_anechoic_pad.wav' file_path = '/Volumes/Dinge/ambiscaper/testing/len2/len2.wav' data, sr = sf.read(file_path) ## Check diffuseness signal = psa.Signal(data.T, sr, 'acn', 'sn3d') psa.plot_signal(signal, title='waveform') stft = psa.Stft.fromSignal(signal, window_size=analysis_window_size, window_overlap=window_overlap, nfft=fft_size) psa.plot_magnitude_spectrogram(stft, title='magnitude spectrogram') diffuseness_stft = psa.compute_diffuseness(stft) directivity_stft = psa.compute_directivity(stft) psa.plot_diffuseness(diffuseness_stft, title='diffuseness') ## Apply diffuseness mask to separate background and foreground background_stft = stft.apply_mask(diffuseness_stft)
def compute_peak_statistics(ir, sample_rate, ambisonics_ordering, ambisonics_normalization, plot=False, plot_title = ''): ## Signal signal = psa.Signal(ir, int(sample_rate), ambisonics_ordering, ambisonics_normalization) if plot: psa.plot_signal(signal,title=plot_title+'IR') stft = psa.Stft.fromSignal(signal, window_size=analysis_window_size, window_overlap=window_overlap, nfft=fft_size, ) stft = stft.limit_bands(fmin=fmin, fmax=fmax) if plot: psa.plot_magnitude_spectrogram(stft,title=plot_title+'IR Magnitude Spectrogram, w='+str(analysis_window_size)) ### Energy Density energy_density_t = psa.compute_energy_density(signal) if plot: psa.plot_signal(energy_density_t,'Energy Density', y_scale='log') # # Smoothed signal # L = gaussian_window_length # smooth_window = scipy.signal.general_gaussian(L, p=gaussian_window_shape, sig=gaussian_window_std) # smoothed_energy_density_t = scipy.signal.fftconvolve(smooth_window, energy_density_t.data[0, :]) # smoothed_energy_density_t = (np.average(energy_density_t.data[0, :]) / np.average(smoothed_energy_density_t)) * smoothed_energy_density_t # smoothed_energy_density_t = np.roll(smoothed_energy_density_t, -((L - 1) / 2)) # smoothed_energy_density_t = smoothed_energy_density_t[:-(L - 1)] # same length # # ### Peak peaking # # # WAVELET # cwt_widths = np.arange(0.5*L,1.5*L) # Find peaks of shape among 2 gaussian window lengths # smoothed_peaks = scipy.signal.find_peaks_cwt(smoothed_energy_density_t, widths=cwt_widths) # # Fine sample correction of peaks: find local maxima over a gaussian window length # corrected_peaks = copy.deepcopy(smoothed_peaks) # for peak_idx,peak in enumerate(smoothed_peaks): # local_energy = smoothed_energy_density_t[peak - (L / 2):peak + (L / 2)] # corrected_peaks[peak_idx] = np.argmax(local_energy) + peak - (L / 2) # # if plot: # plt.figure() # plt.suptitle('Smoothed Energy Density & peaks') # ax = plt.subplot(111) # ax.semilogy(energy_density_t.data[0,:]) # ax.semilogy(smoothed_energy_density_t) # # # plot peak estimates # for peak in corrected_peaks: # plt.axvline(x=peak, color='g') # # # plot time frames # for x in np.arange(0,processing_window_samples,analysis_window_size): # plt.axvline(x=x, color='r', alpha=0.3) # # plt.grid() # # peak_time_bins = [] # for peak in corrected_peaks: # peak_time_bins.append(find_maximal_time_bin(peak, stft, overlap_factor)) ## Raw Estimates doa = psa.compute_DOA(stft) if plot: psa.plot_doa(doa,title=plot_title+'DoA estimates, w='+str(analysis_window_size)) # diffuseness = psa.compute_diffuseness(stft) # if plot: # psa.plot_diffuseness(diffuseness,title=plot_title+'Diffuseness, w='+str(analysis_window_size)) neighborhood_size = 3 ## DOA variance doa_var = copy.deepcopy(doa) for n in range(doa.get_num_time_bins()): for k in range(doa.get_num_frequency_bins()): local_var_azi = 0 local_var_ele = 0 local_azi = [] local_ele = [] r = int(np.floor(neighborhood_size/2)) # neighborhood radius for x in np.arange(n - r, n + r + 1): for y in np.arange(k - r, k + r + 1): if x < 0: continue elif x >= doa.get_num_time_bins(): continue if y < 0: continue elif y >= doa.get_num_frequency_bins(): continue local_azi.append(doa.data[0,y,x]) local_ele.append(doa.data[1,y,x]) # local_var_azi += np.std(doa.data[0,y,x]) # local_var_ele += np.std(doa.data[1,y,x]) local_var_azi = scipy.stats.circvar(np.array(local_azi)) local_var_ele = np.var(np.array(local_ele)) doa_var.data[0,k,n] = local_var_azi doa_var.data[1,k,n] = local_var_ele ## DOA VAR salience neighborhood_size = round_up_to_odd(doa_var.get_num_frequency_bins()) doa_var_salience = threshold_local(doa_var.data[0,:],block_size=neighborhood_size) doa_var_max_salience_mask = copy.deepcopy(doa_var) doa_var_min_salience_mask = copy.deepcopy(doa_var) for k in range(doa_var.get_num_frequency_bins()): for n in range(doa_var.get_num_time_bins()): if doa_var.data[0, k, n] > doa_var_salience[k, n]: doa_var_max_salience_mask.data[:, k, n] = 1. else: doa_var_max_salience_mask.data[:, k, n] = np.nan if doa_var.data[0, k, n] < doa_var_salience[k, n]: doa_var_min_salience_mask.data[:, k, n] = 1. else: doa_var_min_salience_mask.data[:, k, n] = np.nan # MINIMUM VARIANCE DOA masked_doa = doa.apply_mask(doa_var_min_salience_mask) if plot: psa.plot_doa(masked_doa, title=plot_title + 'DOA - Minimum variance Salience Masked, w=' + str(analysis_window_size) + ' N: ' + str( neighborhood_size)) masked_doa = doa.apply_mask(doa_var_max_salience_mask) # if plot: # psa.plot_doa(masked_doa, # title=plot_title + 'DOA - Maximim variance Salience Masked, w=' + str(analysis_window_size) + ' N: ' + str( # neighborhood_size)) # if plot: # plt.figure() # plt.suptitle('DOA VAR') # plt.subplot(211) # plt.pcolormesh(doa_var.data[0,:,:]) # plt.subplot(212) # plt.pcolormesh(doa_var.data[1,:,:]) # # psa.plot_mask(doa_var_max_salience_mask,title='MAX SALIENCE') # psa.plot_mask(doa_var_min_salience_mask,title='MIN SALIENCE') ## Energy density energy_density_tf = psa.compute_energy_density(stft) # if plot: # psa.plot_magnitude_spectrogram(energy_density_tf,title='Energy Density Spectrogram, w='+str(analysis_window_size)) # Energy density salience neighborhood_size = round_up_to_odd(energy_density_tf.get_num_frequency_bins()) energy_density_salience = threshold_local(energy_density_tf.data[0,:],block_size=neighborhood_size) energy_density_salience_mask = copy.deepcopy(energy_density_tf) for k in range(energy_density_tf.get_num_frequency_bins()): for n in range(energy_density_tf.get_num_time_bins()): if energy_density_tf.data[0, k, n] > energy_density_salience[k, n]: energy_density_salience_mask.data[:, k, n] = 1. else: energy_density_salience_mask.data[:, k, n] = np.nan # if plot: # fig = plt.figure() # fig.suptitle('energy salience, w=' + str(analysis_window_size)) # # x = np.arange(np.shape(energy_density_salience)[0]) # y = np.arange(np.shape(energy_density_salience)[1]) # plt.pcolormesh(y, x, energy_density_salience, norm=LogNorm()) # plt.ylabel('Frequency [Hz]') # plt.xlabel('Time [sec]') # plt.colorbar() # if plot: # psa.plot_mask(energy_density_salience_mask, title='Energy Salience Mask'+str(analysis_window_size)) masked_energy = energy_density_tf.apply_mask(energy_density_salience_mask) # if plot: # psa.plot_magnitude_spectrogram(masked_energy, title=plot_title+'Energy - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size)) masked_doa = doa.apply_mask(energy_density_salience_mask) if plot: psa.plot_doa(masked_doa, title=plot_title+'DOA - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size)) masked_doa = doa.apply_mask(energy_density_salience_mask).apply_mask(doa_var_min_salience_mask) if plot: psa.plot_doa(masked_doa, title=plot_title+'DOA - VAR MIN, Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size)) # masked_diffuseness = diffuseness.apply_mask(energy_density_salience_mask) # if plot: # psa.plot_diffuseness(masked_diffuseness, title=plot_title+'Diffuseness - Energy Salience Masked, w='+str(analysis_window_size)+' N: '+str(neighborhood_size)) # # Diffuseness density salience # # neighborhood_size = round_up_to_odd(diffuseness.get_num_frequency_bins()) # diffuseness_salience = threshold_local(diffuseness.data[0,:],block_size=neighborhood_size) # diffuseness_salience_mask = copy.deepcopy(diffuseness) # for k in range(diffuseness.get_num_frequency_bins()): # for n in range(diffuseness.get_num_time_bins()): # if diffuseness.data[0, k, n] < diffuseness_salience[k, n]: # diffuseness_salience_mask.data[:, k, n] = 1. # else: # diffuseness_salience_mask.data[:, k, n] = np.nan # # masked_energy = energy_density_tf.apply_mask(diffuseness_salience_mask) # if plot: # psa.plot_magnitude_spectrogram(masked_energy, title=plot_title + 'Energy - Diffuseness Salience Masked, w=' + str( # analysis_window_size) + ' N: ' + str(neighborhood_size)) # # masked_doa = doa.apply_mask(diffuseness_salience_mask) # if plot: # psa.plot_doa(masked_doa, title=plot_title + 'DOA - Diffuseness Salience Masked, w=' + str( # analysis_window_size) + ' N: ' + str(neighborhood_size)) # # masked_diffuseness = diffuseness.apply_mask(diffuseness_salience_mask) # if plot: # psa.plot_diffuseness(masked_diffuseness, title=plot_title + 'Diffuseness - Diffuseness Salience Masked, w=' + str( # analysis_window_size) + ' N: ' + str(neighborhood_size)) # # # if plot: # psa.plot_mask(diffuseness_salience_mask, title='Diffuseness Salience Mask'+str(neighborhood_size)) # # # masked_dif = diffuseness.apply_mask(diffuseness_salience_mask) # if plot: # psa.plot_diffuseness(masked_dif, title='Diffuseness - Salience Masked'+str(neighborhood_size)) # # energy_diffuseness_mask = energy_density_salience_mask.apply_mask(diffuseness_salience_mask) # # masked_doa = masked_doa.apply_mask(diffuseness_salience_mask) # if plot: # psa.plot_doa(masked_doa, title='DOA - Salience Masked - Diffuseness Masked, w='+str(analysis_window_size)) # # fig = plt.figure() # fig.suptitle("diffuseness salience, block:"+str(neighborhood_size), fontsize=16) # x = np.arange(np.shape(diffuseness_salience)[0]) # y = np.arange(np.shape(diffuseness_salience)[1]) # plt.pcolormesh(y,x, diffuseness_salience, cmap='plasma_r',norm=LogNorm()) # plt.ylabel('Frequency [Hz]') # plt.xlabel('Time [sec]') # plt.colorbar() # diffuseness_energy_mask = diffuseness_mask.apply_mask(energy_density_mask) # if plot: # psa.plot_mask(diffuseness_energy_mask, title='Diffuseness + Energy Density Mask') # # masked_diffuseness = diffuseness.apply_mask(diffuseness_mask) # if plot: # psa.plot_diffuseness(masked_diffuseness, title='Diffuseness, diffuseness mask') # # masked_diffuseness = masked_diffuseness.apply_mask(energy_density_mask) # if plot: # psa.plot_diffuseness(masked_diffuseness, title='Diffuseness, energy density maskm diffuseness mask') # # masked_doa = masked_doa.apply_mask(diffuseness_mask) # if plot: # psa.plot_doa(masked_doa,title='DoA estimates, energy density mask, diffuseness mask') ### Find horizontal-contiguous bins on doa estimates time_bins_with_energy = [] for n in range(energy_density_salience_mask.get_num_time_bins()): if not np.all(np.isnan(energy_density_salience_mask.data[0,:,n])): time_bins_with_energy.append(n) time_region_starts = [] time_region_ends = [] for idx, b in enumerate(time_bins_with_energy): if time_bins_with_energy[idx] - time_bins_with_energy[idx - 1] != 1: time_region_starts.append(time_bins_with_energy[idx]) time_region_ends.append(time_bins_with_energy[idx - 1]+1) time_region_starts.sort() time_region_ends.sort() assert len(time_region_starts) == len(time_region_ends) # Compute local doa estimates on contiguous time regions peak_stats = [] for idx in range(len(time_region_starts)): n_range = range(time_region_starts[idx],time_region_ends[idx]) local_azi = [] local_ele = [] index_of_bins_estimated = [] for n in n_range: # Filter nans for k in np.arange(energy_density_salience_mask.get_num_frequency_bins()): if not np.isnan(energy_density_salience_mask.data[0, k, n]): local_azi.append(masked_doa.data[0, k, n]) local_ele.append(masked_doa.data[1, k, n]) index_of_bins_estimated.append(n) local_azi = np.asarray(local_azi) local_ele = np.asarray(local_ele) # local_dif = np.asarray(local_dif) local_azi_mean = scipy.stats.circmean(local_azi, high=np.pi, low=-np.pi) local_azi_std = scipy.stats.circstd(local_azi, high=np.pi, low=-np.pi) local_ele_mean = np.mean(local_ele) local_ele_std = np.std(local_ele) if plot: fig = plt.figure() ax = fig.add_subplot(111) plt.suptitle(plot_title+'FREQ BIN '+str(idx)+' - bins '+str(n_range)) # cmap = plt.cm.get_cmap("copper") plt.grid() plt.xlim(-np.pi,np.pi) plt.ylim(-np.pi/2,np.pi/2) plt.scatter(local_azi, local_ele, marker='o',) plt.scatter(local_azi_mean, local_ele_mean, c='red', s=20, marker='+') ax.add_patch(Ellipse(xy=(local_azi_mean,local_ele_mean), width=local_azi_std,height=local_ele_std, alpha=0.5)) ax.add_patch(Ellipse(xy=(local_azi_mean,local_ele_mean), width=3*local_azi_std,height=3*local_ele_std, alpha=0.1)) mean_location = np.mean(index_of_bins_estimated) time_resolution_ms = float(processing_window_ms) / masked_doa.get_num_time_bins() estimated_location_ms = mean_location * time_resolution_ms peak_stats.append( [estimated_location_ms, [local_azi_mean, local_azi_std], [local_ele_mean, local_ele_std]]) ### Return peak stats return peak_stats
analysis_window_size = 512 window_overlap = analysis_window_size // 2 fmin = 125 fmax = 8000 fft_factor = 1 fft_size = analysis_window_size * fft_factor ## Open open the noise background # bg_path = '/Volumes/Dinge/ambiscaper/background/noise2/noise2.wav' # bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf_acn_sn3d.wav' bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf_bformat_plugin_filter.wav' # bg_path = '/Volumes/Dinge/ambiscaper/background/isf/isf.wav' bg, sr = sf.read(bg_path) bg = bg.T * 0.1 bg_signal = psa.Signal(bg, sr, 'acn', 'sn3d') bg_stft = psa.Stft.fromSignal(bg_signal, window_size=analysis_window_size, window_overlap=window_overlap, nfft=fft_size) # psa.plot_signal(bg_signal,title='groundtruth background') # psa.plot_magnitude_spectrogram(bg_stft,title='groundtruth background') # psa.plot_diffuseness(psa.compute_diffuseness(bg_stft),'groundtruth bg diffuseness') # Open the foreground fg_path = '/Volumes/Dinge/ambiscaper/background/foreground/foreground.wav' fg, sr = sf.read(fg_path) fg = fg.T * 5 fg_signal = psa.Signal(fg, sr, 'acn', 'sn3d') fg_stft = psa.Stft.fromSignal(fg_signal, window_size=analysis_window_size,
sh_rirs = srs.render_rirs_sh(abs_echograms, band_centerfreqs, fs).squeeze() sh_rirs = sh_rirs * np.sqrt(4 * np.pi) * [ 1, 1. / np.sqrt(3), 1. / np.sqrt(3), 1. / np.sqrt(3) ] # SN3D norm plt.figure() plt.plot(sh_rirs) plt.show() signal_len_samples = int(np.floor(1. * fs)) signal = np.random.randn(signal_len_samples) reverberant_signal = np.zeros((signal_len_samples, 4)) for i in range(4): reverberant_signal[:, i] = scipy.signal.fftconvolve( signal, sh_rirs[:, i].squeeze())[:signal_len_samples] x = psa.Signal(reverberant_signal.T, fs, 'acn', 'sn3d') psa.plot_signal(x, title='waveform') analysis_window_size = 512 window_overlap = analysis_window_size // 2 fft_size = analysis_window_size stft = psa.Stft.fromSignal(x, window_size=analysis_window_size, window_overlap=window_overlap, nfft=fft_size) psa.plot_magnitude_spectrogram(stft, title='magnitude spectrogram') doa = psa.compute_DOA(stft) psa.plot_doa(doa, title='doa') plt.show()