from maad import sound, rois, features from maad.util import power2dB, plot2D, format_features, read_audacity_annot #%% # First, load and audio file and compute the power spectrogram. s, fs = sound.load('../data/cold_forest_daylight.wav') t0 = 0 t1 = 20 f0 = 100 f1 = 10000 dB_max = 96 Sxx_power, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=1024 // 2, fcrop=(f0, f1), tcrop=(t0, t1)) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel # range when quantification bit is 16bits and display the result Sxx_db = power2dB(Sxx_power) + dB_max plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext}) #%% # Then, relevant acoustic events are extracted directly from the power # spectrogram based on a double thresholding technique. The result is binary # image called a mask. Double thresholding technique is more sophisticated than # basic thresholding based on a single value. First, a threshold selects pixels # with high value (i.e. high acoustic energy). They should belong to an acoustic # event. They are called seeds. From these seeds, we aggregate pixels connected
In an audio signal, regions of interest are usually regions with high density of energy. The function find_rois_cwt allows finding regions of interest in the signal giving very simple and intuitive parameters: temporal length and frequency limits. This segmentation can be seen as a coarse detection process, the starting point of more advanced classification methods. The following sound example as two main different soundtypes in the foreground: - An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds - A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds """ #%% Load an audio file and compute the spectrogram for visualization. from maad import sound from maad.rois import find_rois_cwt from maad.util import power2dB, plot2D s, fs = sound.load('../../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=100) + 100 plot2D(Sxx_db, **{'extent': ext}) #%% # Detect the accelerating trill # ----------------------------- # The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2. _ = find_rois_cwt(s, fs, flims=(4500, 8000), tlen=2, th=0, display=True, figsize=(13, 6))
""" from maad.sound import load, spectrogram from maad.features import shape_features, plot_shape, centroid_features, overlay_centroid from maad.util import read_audacity_annot, linear_scale, format_features, get_unimode, running_mean from maad.rois import overlay_rois, create_mask, select_rois, find_rois_cwt, remove_background, median_equalizer from skimage import morphology import numpy as np import pandas as pd ###=============== load audio ================= s, fs = load('./data/spinetail.wav') rois = read_audacity_annot( './data/spinetail.txt') ## annotations using Audacity ###=============== compute spectrogram ================= Sxx, tn, fn, ext = spectrogram(s, fs) Sxx = 10 * np.log10(Sxx) rois = format_features(rois, tn, fn) ###=============== from Audacity ================= ### with all labels ax, fig = overlay_rois(Sxx, ext, rois, vmin=-120, vmax=20) # Compute an visualize features shape, params = shape_features(Sxx, resolution='low', rois=rois) plot_shape(shape.mean(), params) # Compute and visualize centroids centroid = centroid_features(Sxx, rois)
print('Processing date: ', date) # concat audio into array s_sum = list() for index, row in flist_day.iterrows(): s = s_dict[row.date]['s'] s_sum.append(s) # crossfade and high pass filtering s_sum = crossfade_list(s_sum, fs) s_sum = butter_filter(s_sum, cutoff=200, fs=fs, order=2, ftype='high') # compute spectrogram im, dt, df, ext = sound.spectrogram(s_sum, fs, nperseg=opt_spec['wl'], cmap='viridis', overlap=opt_spec['ovlp'], fcrop=opt_spec['fcrop'], rescale=True, db_range=opt_spec['db_range']) # Apply gaussian smoothing im = gaussian(im, sigma=0.5, mode='reflect') # Normalize spectrogram according to sensor model vmin, vmax = 0.4, 0.8 # Audiomoth im[im < vmin] = vmin im[im > vmax] = vmax im = (im - im.min()) / (im.max() - im.min()) # save to file im = np.flip(im, axis=0) key = fname_open[0:-4] + '_' + date io.imsave(path_save + key + fmt, im)
fs, gain=G, sensibility=S, dB_threshold=3, rejectDuration=0.01, verbose=False, display=False) """ ======================================================================= Computation in the frequency domain ========================================================================""" # Compute the Power Spectrogram Density (PSD) : Sxx_power Sxx_power, tn, fn, ext = sound.spectrogram(wave, fs, window='hanning', nperseg=1024, noverlap=1024 // 2, verbose=False, display=False, savefig=None) # compute all the spectral indices and store them into a DataFrame # flim_low, flim_mid, flim_hi corresponds to the frequency limits in Hz # that are required to compute somes indices (i.e. NDSI) # if R_compatible is set to 'soundecology', then the output are similar to # soundecology R package. # mask_param1 and mask_param2 are two parameters to find the regions of # interest (ROIs). These parameters need to be adapted to the dataset in # order to select ROIs df_spec_ind, df_spec_ind_per_bin = features.all_spectral_alpha_indices( Sxx_power, tn,
# Wildlife. The sensitivity of the internal microphone is -35dBV and the # maximal voltage converted by the analog to digital convertor (ADC) is 2Vpp # (peak to peak). The gain used for the recording is a combination of # the internal pre-amplifier of the SM4, which is 26dB and the adjustable gain # which was 16dB. So the total gain applied to the signal is : 42dB # We load the sound w, fs = sound.load('../../data/spinetail.wav') # We convert the sound into sound pressure level (Pa) p0 = spl.wav2pressure(wave=w, gain=42, Vadc=2, sensitivity=-35) # We select part of the sound with the spinetail signal p0_sig = p0[int(5.68 * fs):int(7.48 * fs)] # We select part of the sound with background p0_noise = p0[int(8.32 * fs):int(10.12 * fs)] # We convert both signals into spectrograms Sxx_power, tn, fn, ext = sound.spectrogram(p0_sig, fs) Sxx_power_noise, tn, fn, ext = sound.spectrogram(p0_noise, fs) # We convert both spectrograms into dB. We choose a dB range of 96dB which # is the maximal range for a 16 bits signal. Sxx_dB = util.power2dB(Sxx_power, db_range=96) + 96 Sxx_dB_noise = util.power2dB(Sxx_power_noise, db_range=96) + 96 #%% # Before simulating the attenuation of the acoustic signature depending on # the distance, we need to evaluate the distance at which the signal of the # spinetail was recordered. # First, we estimate the sound level L of the spinetail song in the recording # by selected the sound between 4900-7500 Hz. p0_sig_4900_7500 = sound.select_bandwidth(p0_sig, fs, fcut=[4900, 7300],
remove_background_along_axis, sharpness) import numpy as np from timeit import default_timer as timer import matplotlib.pyplot as plt #%% # Load and plot the spectrogram of the original audio file # -------------------------------------------------------- # First, we load the audio file and take its spectrogram. # The linear spectrogram is then transformed into dB. The dB range is 96dB # which is the maximum dB range value for a 16bits audio recording. We add # 96dB in order to get have only positive values in the spectrogram. s, fs = load('../../data/tropical_forest_morning.wav') Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60]) Sxx_dB = power2dB(Sxx, db_range=96) + 96 plot2d(Sxx_dB, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) print ("Original sharpness : %2.3f" % sharpness(Sxx_dB)) #%% # Test different methods to remove stationary background noise # ------------------------------------------------------------ # Test the function "remove_background" start = timer() X1, noise_profile1, _ = remove_background(Sxx_dB) elapsed_time = timer() - start print("---- test remove_background -----") print("duration %2.3f s" % elapsed_time)
s, fs = sound.load('../../data/spinetail.wav') util.plot_wave(s, fs) #%% # It can be noticed that in this audio there are four consecutive songs of the spinetail # *Cranioleuca erythorps*, every song lasting of approximatelly two seconds. # Let's trim the signal to zoom in on the details of the song. s_trim = sound.trim(s, fs, 5, 8) #%% # Onced trimmed, lets compute the envelope of the signal, the Fourier and short-time Fourier transforms. env = sound.envelope(s_trim, mode='fast', Nt=128) pxx, fidx = sound.spectrum(s, fs, nperseg=1024, method='welch') Sxx, tn, fn, ext = sound.spectrogram(s_trim, fs, window='hann', nperseg=1024, noverlap=512) #%% # Finally, we can visualize the signal characteristics in the temporal and # spectral domains. fig, ax = plt.subplots(4, 1, figsize=(8, 10)) util.plot_wave(s_trim, fs, ax=ax[0]) util.plot_wave(env, fs, ax=ax[1]) util.plot_spectrum(pxx, fidx, ax=ax[2]) util.plot_spectrogram(Sxx, extent=ext, ax=ax[3], colorbar=False)
def batch_feature_rois_no_verb(rois_list, params_features, path_audio): """ Computes features for a list of files Parameters: ---------- params_features: dict Dictionary with the basic parameters to feed find_rois: 'flims', 'tlen', and 'th'. path_flist : str Path to a *.txt file with the list of audio filenames to process path_audio : str Path to the place were the dataset of audio files are stored path_save : str Path with the file name to save the csv Returns: ------- info_features: dic Dictionary with features and all the parameters used to compute the features. Included keys: features, parameters_df, opt_shape, opt_spectro """ ## TODO: when the time limits are too short, the function has problems # load parameters flims = params_features['flims'] opt_spec = params_features['opt_spec'] opt_shape = opt_shape_presets(params_features['opt_shape_str']) # load detection data features = [] for idx, file in enumerate(rois_list): # unpack file values fname = file['fname'] rois_tf = file['rois'] #print(idx+1, '/', len(rois_list), fname) if rois_tf.empty: #print('< No detection on file >') features.append({'fname':fname, 'features': pd.DataFrame()}) else: # load materials: sound, spectrogram s, fs = sound.load(path_audio+fname) im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], overlap=opt_spec['overlap'], fcrop=flims, rescale=False, db_range=opt_spec['db_range']) # format rois to bbox ts = np.arange(ext[0], ext[1], dt) f = np.arange(ext[2],ext[3]+df,df) rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox') # roi to image blob im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox) # get features: shape, center frequency im = normalize_2d(im, 0, 1) bbox, params, shape = shape_features(im, im_blobs, resolution='custom', opt_shape=opt_shape) _, cent = centroid(im, im_blobs) cent['frequency']= f[round(cent.y).astype(int)] # y values to frequency # format rois to time-frequency rois_out = format_rois(bbox, ts, f, fmt='tf') # combine into a single df aux_df = pd.concat([rois_out, shape, cent.frequency], axis=1) # aux_df['fname'] = fname features.append({'fname':fname, 'features': aux_df}) # Arranges the data into a dictionary info_features = {'features': features, 'parameters_df': params, 'opt_shape': opt_shape, 'opt_spectro': opt_spec} return info_features
from maad.features import shape_features, plot_shape from maad.util import format_features, read_audacity_annot, power2dB from maad.rois import overlay_rois import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn import preprocessing s, fs = load('../data/spinetail.wav') rois_tf = read_audacity_annot('../data/spinetail.txt') ## annotations using Audacity rois_cr = rois_tf.loc[rois_tf.label=='CRER',] rois_sp = rois_tf.loc[rois_tf.label=='SP',] Sxx_power, ts, f, ext = spectrogram(s, fs) Sxx_dB = power2dB(Sxx_power, db_range=90) + 96 # Visualize large vocalizations rois_cr = format_features(rois_cr, ts, f) overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80}) # Visualize short vocalizations rois_sp = format_features(rois_sp, ts, f) overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80}) # Compute an visualize features shape_cr, params = shape_features(Sxx_dB, resolution='med', rois=rois_cr) ax = plot_shape(shape_cr.mean(), params) shape_sp, params = shape_features(Sxx_dB, resolution='med', rois=rois_sp)
def compute_rois_features(s, fs, rois_tf, opt_spec, opt_shape, flims): """ Computes shape and central frequency features from signal at specified time-frequency limits defined by regions of interest (ROIs) Parameters ---------- s: ndarray Singal to be analysed fs: int Sampling frequency of the signal rois_tf: pandas DataFrame Time frequency limits for the analysis. Columns should have at least min_t, max_t, min_f, max_f. Can be computed with multiple detection methods, such as find_rois_cwt opt_spec: dictionnary Options for the spectrogram with keys, window lenght 'nperseg' and, window overlap in percentage 'overlap' opt_shape: dictionary Options for the filter bank (kbank_opt) and the number of scales (npyr) flims: list of 2 scalars Minimum and maximum boundary frequency values in Hertz Returns ------- feature_rois: pandas Dataframe A dataframe with each column corresponding to a feature Example ------- s, fs = sound.load('spinetail.wav') rois_tf = find_rois_cwt(s, fs, flims=(3000, 8000), tlen=2, th=0.003) opt_spec = {'nperseg': 512, 'overlap': 0.5} opt_shape = opt_shape_presets('med') features_rois = compute_rois_features(s, fs, rois_tf, opt_spec, opt_shape, flims) """ im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], overlap=opt_spec['overlap'], fcrop=flims, rescale=False, db_range=100) # format rois to bbox ts = np.arange(ext[0], ext[1], dt) f = np.arange(ext[2], ext[3] + df, df) rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox') # roi to image blob im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox) # get features: shape, center frequency im = normalize_2d(im, 0, 1) bbox, params, shape = shape_features(im, im_blobs, resolution='custom', opt_shape=opt_shape) _, cent = centroid(im, im_blobs) cent['frequency'] = f[round(cent.y).astype(int)] # y values to frequency # format rois to time-frequency rois_out = format_rois(bbox, ts, f, fmt='tf') # combine into a single df rois_features = pd.concat([rois_out, shape, cent.frequency], axis=1) return rois_features
# sphinx_gallery_thumbnail_path = './_images/sphx_glr_compare_auto_and_manual_rois_selection.png' import numpy as np import pandas as pd from maad import sound, rois, features from maad.util import (power2dB, plot2d, format_features, read_audacity_annot, overlay_rois, overlay_centroid) #%% # First, load and audio file and compute the power spectrogram. s, fs = sound.load('../../data/cold_forest_daylight.wav') dB_max = 96 Sxx_power, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=1024 // 2) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel # range when quantification bit is 16bits and display the result Sxx_db = power2dB(Sxx_power) + dB_max plot2d(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext}) #%% # Then, relevant acoustic events are extracted directly from the power # spectrogram based on a double thresholding technique. The result is binary # image called a mask. Double thresholding technique is more sophisticated than # basic thresholding based on a single value. First, a threshold selects pixels # with high value (i.e. high acoustic energy). They should belong to an acoustic # event. They are called seeds. From these seeds, we aggregate pixels connected # to the seed with value higher than the second threslhold. These new pixels