def batch_find_rois(flist, params_detections, path_audio): """ Exports features saved as joblib into a csv file readable by R and other programs. The joblib file should be computed using the Parameters: ---------- params_detection: dict Dictionary with the basic parameters to feed find_rois: 'flims', 'tlen', and 'th'. path_flist : str Path to a *.txt file with the list of audio filenames to process path_audio : str Path to the place were the dataset of audio files are stored Returns: ------- Saves a joblib file to disk. Does not return any variable """ # load parameters flims = params_detections['flims'] tlen = params_detections['tlen'] th = params_detections['th'] detections = list() for idx, fname in enumerate(flist['fname']): print(idx + 1, '/', len(flist), fname) s, fs = sound.load(path_audio + fname) rois = find_rois_cwt(s, fs, flims, tlen, th) if not rois.empty: # filter rois shorter than 25% of tlen idx_rm = (rois.max_t - rois.min_t) < tlen * 0.25 rois.drop(index=np.where(idx_rm)[0], inplace=True) rois.reset_index(inplace=True, drop=True) else: pass # save to list detections.append({'fname': fname, 'rois': rois}) info_detections = { 'detections': detections, 'parameters': params_detections } return info_detections
mathematical morphology tools... Dependencies: To execute this example you will need to have installed the scikit-image, scikit-learn and pandas Python packages. """ # sphinx_gallery_thumbnail_path = '../_images/sphx_glr_compare_auto_and_manual_rois_selection.png' import numpy as np import pandas as pd from maad import sound, rois, features from maad.util import power2dB, plot2D, format_features, read_audacity_annot #%% # First, load and audio file and compute the power spectrogram. s, fs = sound.load('../data/cold_forest_daylight.wav') t0 = 0 t1 = 20 f0 = 100 f1 = 10000 dB_max = 96 Sxx_power, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=1024 // 2, fcrop=(f0, f1), tcrop=(t0, t1)) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# -*- coding: utf-8 -*- """ Created on Tue Nov 3 12:45:22 2020 @author: haupert """ from maad.sound import load, spectrogram from maad.features import shape_features, plot_shape, centroid_features, overlay_centroid from maad.util import read_audacity_annot, linear_scale, format_features, get_unimode, running_mean from maad.rois import overlay_rois, create_mask, select_rois, find_rois_cwt, remove_background, median_equalizer from skimage import morphology import numpy as np import pandas as pd ###=============== load audio ================= s, fs = load('./data/spinetail.wav') rois = read_audacity_annot( './data/spinetail.txt') ## annotations using Audacity ###=============== compute spectrogram ================= Sxx, tn, fn, ext = spectrogram(s, fs) Sxx = 10 * np.log10(Sxx) rois = format_features(rois, tn, fn) ###=============== from Audacity ================= ### with all labels ax, fig = overlay_rois(Sxx, ext, rois, vmin=-120, vmax=20) # Compute an visualize features
In an audio signal, regions of interest are usually regions with high density of energy. The function find_rois_cwt allows finding regions of interest in the signal giving very simple and intuitive parameters: temporal length and frequency limits. This segmentation can be seen as a coarse detection process, the starting point of more advanced classification methods. The following sound example as two main different soundtypes in the foreground: - An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds - A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds """ #%% Load an audio file and compute the spectrogram for visualization. from maad import sound from maad.rois import find_rois_cwt from maad.util import power2dB, plot2D s, fs = sound.load('../../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=100) + 100 plot2D(Sxx_db, **{'extent': ext}) #%% # Detect the accelerating trill # ----------------------------- # The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2. _ = find_rois_cwt(s, fs, flims=(4500, 8000), tlen=2, th=0, display=True,
df_indices = pd.DataFrame() df_indices_per_bin = pd.DataFrame() for index, row in df.iterrows(): # get the full filename of the corresponding row fullfilename = row['file'] # Save file basename path, filename = os.path.split(fullfilename) print('\n**************************************************************') print(filename) #### Load the original sound (16bits) and get the sampling frequency fs try: wave, fs = sound.load(filename=fullfilename, channel='left', detrend=True, verbose=False) except: # Delete the row if the file does not exist or raise a value error (i.e. no EOF) df.drop(index, inplace=True) continue """ ======================================================================= Computation in the time domain ========================================================================""" # Parameters of the audio recorder. This is not a mandatory but it allows # to compute the sound pressure level of the audio file (dB SPL) as a # sonometer would do. S = -35 # Sensbility microphone-35dBV (SM4) / -18dBV (Audiomoth) G = 26 + 16 # Amplification gain (26dB (SM4 preamplifier))
remove_background_morpho, remove_background_along_axis, sharpness) import numpy as np from timeit import default_timer as timer import matplotlib.pyplot as plt #%% # Load and plot the spectrogram of the original audio file # -------------------------------------------------------- # First, we load the audio file and take its spectrogram. # The linear spectrogram is then transformed into dB. The dB range is 96dB # which is the maximum dB range value for a 16bits audio recording. We add # 96dB in order to get have only positive values in the spectrogram. s, fs = load('../../data/tropical_forest_morning.wav') Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60]) Sxx_dB = power2dB(Sxx, db_range=96) + 96 plot2d(Sxx_dB, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) print ("Original sharpness : %2.3f" % sharpness(Sxx_dB)) #%% # Test different methods to remove stationary background noise # ------------------------------------------------------------ # Test the function "remove_background" start = timer() X1, noise_profile1, _ = remove_background(Sxx_dB) elapsed_time = timer() - start print("---- test remove_background -----")
def format_trainds(df, flims, wl, path_audio): """ Arranges all the training data into a dictionary for easy and compact access. Parameters ---------- df : pandas DataFrame DataFrame with information on the regions of interest to be arranged. The DataFrame must have the columns: fname, min_t, max_t. flims : tuple or list Minimum and maximum frequency limits of the band pass filter. This is used to filter unwanted sounds and improve the manual analysis. wl : int or float Window length (in seconds) of each region of intrest. While the regions have a specified duration, with this argument it is possible to increase the window of observation, allowing to have a wider context to analyse the audio. Recomended minimum 2 seconds. path_audio : str Path to the directory where all the raw audio files are stored Returns ------- train_data : dict A dictionary with the keys: roi_info, shape_features, label, audio, segments and maad_label """ print('Aligning ROIs, number of observations:', len(df)) df['tlen'] = df.max_t - df.min_t audiolist = list() for idx, roi in df.iterrows(): fname_wav = path_audio + roi.fname # define tlimits with window length length = roi.max_t - roi.min_t tlims = ((roi.min_t + length/2) - wl/2, (roi.min_t + length/2) + wl/2) s, fs = sound.load(fname_wav) s = sound.select_bandwidth(s, fs, lfc=flims[0], hfc=flims[1]) # #normalize? rec_length = len(s)/fs # if time limits are outside the recording, add silence if tlims[1] > rec_length: # add silence at end sil_len = tlims[1] - rec_length silence = np.zeros(int(sil_len*fs)) s_roi = np.concatenate([s[int(tlims[0]*fs):], silence]) elif tlims[0] < 0: # add silence at begin sil_len = abs(tlims[0]) silence = np.zeros(int(sil_len*fs)) s_roi = np.concatenate([silence, s[0:int(tlims[1]*fs)]]) else: s_roi = s[int(tlims[0]*fs):int(tlims[1]*fs)] audiolist.append(s_roi.copy()) ## write segments for manual annotations onset = (wl/2) - (df.tlen/2) offset = (wl/2) + (df.tlen/2) seg = pd.DataFrame({'onset': onset, 'offset': offset}) seg['label'] = 'NA' ## assign to object and save train_data = dict() idx_features = df.columns.str.startswith('shp') | (df.columns=='frequency') train_data['roi_info'] = df[['fname','min_t','max_t','min_f','max_f']] train_data['shape_features'] = df.loc[:,idx_features] train_data['label'] = seg.label train_data['audio'] = audiolist train_data['segments'] = seg[['onset','offset']] train_data['maad_label'] = df.cluster return train_data
def batch_predict_rois(flist, tuned_clfs, params, path_audio_db='./'): """ Predict the labels of rois in a list of audio files. Parameters ---------- flist: pandas DataFrame list of audio filenames to be analysed. Column name must be 'fname' tuned_clfs: dict data structure with tuned classifiers by grid search or random search params: dict data structure with the same parameters used to train the classifiers. Keys to be included: 'sample_rate_wav', 'flims', 'tlen', 'th', 'opt_spec', 'opt_shape_str' path_audio_db: str, default current directory path pointing to the directory where the audio files are located. Note that all files in flist must be in the same directory Returns ------- predictions: dict data structure with name of audio files as keys. Each element in the dictionary has a DataFrame with predictions for every region interest found. Predictions are given as probabilities for three different classifiers, namely Random Forest ('rf'), Adaboost ('adb') and Support Vector Machines ('svm'). """ t_start = time.time() # compute processing time # Load params and variables clf_svm = tuned_clfs['svm'].best_estimator_ clf_rf = tuned_clfs['rf'].best_estimator_ clf_adb = tuned_clfs['adb'].best_estimator_ flims = params['flims'] tlen = params['tlen'] th = params['th'] opt_spec = params['opt_spec'] opt_shape = opt_shape_presets(params['opt_shape_str']) sample_rate_std = params['sample_rate_wav'] # Batch: compute rois, features and predict through files predictions = dict() for idx, fname in enumerate(flist['fname']): print(idx+1, '/', len(flist), fname) # fname = flist['fname'][0] s, fs = sound.load(path_audio_db+fname) # Check sampling frequency on file if fs==sample_rate_std: pass else: print('Warning: sample rate mismatch, resampling audio file to standard', sample_rate_std, 'Hz') s = resample(s, fs, sample_rate_std, res_type='kaiser_fast') fs = sample_rate_std rois = find_rois_cwt(s, fs, flims, tlen, th) if rois.empty: #print('< No detection on file >') predictions[fname] = -1 else: # filter rois shorter than 25% of tlen idx_rm = (rois.max_t - rois.min_t) < tlen*0.25 rois.drop(index=np.where(idx_rm)[0], inplace=True) rois.reset_index(inplace=True, drop=True) if rois.empty: print('< No detection on file >') predictions[fname] = -1 else: # compute features rois_features = compute_rois_features(s, fs, rois, opt_spec, opt_shape, flims) # predict X = rois_features.loc[:,rois_features.columns.str.startswith('shp')] #X['frequency'] = preprocessing.scale(X['frequency']) # new! scale frequency pred_rf = pd.DataFrame(data=clf_rf.predict_proba(X), columns=[s + '_rf' for s in clf_rf.classes_.astype('str')]) pred_adb = pd.DataFrame(data=clf_adb.predict_proba(X), columns=[s + '_adb' for s in clf_adb.classes_.astype('str')]) pred_svm = pd.DataFrame(data=clf_svm.predict_proba(X), columns=[s + '_svm' for s in clf_svm.classes_.astype('str')]) # save to variable pred_proba_file = pd.concat([rois, pred_rf, pred_adb, pred_svm], axis=1) predictions[fname] = pred_proba_file t_stop = time.time() # compute processing time print('Batch process completed. Processing time: ', np.round(t_stop - t_start,2),'s') return predictions
def batch_feature_rois_no_verb(rois_list, params_features, path_audio): """ Computes features for a list of files Parameters: ---------- params_features: dict Dictionary with the basic parameters to feed find_rois: 'flims', 'tlen', and 'th'. path_flist : str Path to a *.txt file with the list of audio filenames to process path_audio : str Path to the place were the dataset of audio files are stored path_save : str Path with the file name to save the csv Returns: ------- info_features: dic Dictionary with features and all the parameters used to compute the features. Included keys: features, parameters_df, opt_shape, opt_spectro """ ## TODO: when the time limits are too short, the function has problems # load parameters flims = params_features['flims'] opt_spec = params_features['opt_spec'] opt_shape = opt_shape_presets(params_features['opt_shape_str']) # load detection data features = [] for idx, file in enumerate(rois_list): # unpack file values fname = file['fname'] rois_tf = file['rois'] #print(idx+1, '/', len(rois_list), fname) if rois_tf.empty: #print('< No detection on file >') features.append({'fname':fname, 'features': pd.DataFrame()}) else: # load materials: sound, spectrogram s, fs = sound.load(path_audio+fname) im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], overlap=opt_spec['overlap'], fcrop=flims, rescale=False, db_range=opt_spec['db_range']) # format rois to bbox ts = np.arange(ext[0], ext[1], dt) f = np.arange(ext[2],ext[3]+df,df) rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox') # roi to image blob im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox) # get features: shape, center frequency im = normalize_2d(im, 0, 1) bbox, params, shape = shape_features(im, im_blobs, resolution='custom', opt_shape=opt_shape) _, cent = centroid(im, im_blobs) cent['frequency']= f[round(cent.y).astype(int)] # y values to frequency # format rois to time-frequency rois_out = format_rois(bbox, ts, f, fmt='tf') # combine into a single df aux_df = pd.concat([rois_out, shape, cent.frequency], axis=1) # aux_df['fname'] = fname features.append({'fname':fname, 'features': aux_df}) # Arranges the data into a dictionary info_features = {'features': features, 'parameters_df': params, 'opt_shape': opt_shape, 'opt_spectro': opt_spec} return info_features
Unsupervised learning algorithms search for structures or patterns in a dataset without requiring labels. In the context of ecoacoustics, this approach can be usefull to draw inferences when manual labelling is inaccesible or too expensive. For example, unsupervised learning can be used to estimate the animal acoustic diversity [1], combine human-reasoning and automated procedures to build reference libraries, and find hidden structures in the soundscapes. In this example, we will use unsupervised learning to automatically annotate multiple sounds in an audio recording. The process follows four main steps. We will (i) find sounds that can be delimited in time and frequency, here defined as regions of interest (ROIs), (ii) characterize ROIs by features in the time-frequency domain using 2D wavelets [2], (iii) use t-SNE, a dimensionality reduction algorithm, to reduce the dimensionality of the data [3], and (iv) a automatically form homogenous groups using DBSCAN [4]. We will use a real audio file recorded with an omnidirectional microphone. This audio has a poor signal-to-noise ratio, which is typical of automated audio recordings. **Dependencies**: This example requires the Python package scikit-learn v0.24 or greater. """ # sphinx_gallery_thumbnail_path = './_images/sphx_glr_plot_unsupervised_sound_classification_004.png' import numpy as np import matplotlib.pyplot as plt from maad import sound, features, rois from maad.util import power2dB, plot2d, format_features, overlay_rois #%% # Start by loading an example audio file. We will remove low frequency ambient noise with a lowpass filter and then compute the spectrogram. s, fs = sound.load('../../data/rock_savanna.wav') s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass') db_max = 70 # used to define the range of the spectrogram Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=db_max) + db_max plot2d(Sxx_db, **{'extent': ext}) #%% # 1. Find regions of interest # --------------------------- # To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed. Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db) Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2) im_mask = rois.create_mask(im=Sxx_db_smooth,
#%% # Load packages and set variables. import glob import matplotlib.pyplot as plt from maad import sound, util fpath = '../../data/indices/' # location of audio files sample_len = 3 # length in seconds of each audio slice #%% # Build a long list of audio slices of length `sample_len`. flist = glob.glob(fpath + '*.wav') long_wav = list() for idx, fname in enumerate(flist): s, fs = sound.load(fname) s = sound.trim(s, fs, 0, sample_len) long_wav.append(s) #%% # Combine all audio recordings applying a crossfade and compute a the spectrogram of # the resulting mixed audio. long_wav = util.crossfade_list(long_wav, fs, fade_len=0.5) Sxx, tn, fn, ext = sound.spectrogram(long_wav, fs, window='hann', nperseg=1024, noverlap=512) #%% # Display the spectrogram. We can see clearly the bird chorus at dawn (5-10 h) and
In this example, we will use unsupervised learning to automatically annotate multiple sounds in an audio recording. The process follows four main steps. We will (i) find sounds that can be delimited in time and frequency, here defined as regions of interest (ROIs), (ii) characterize ROIs by features in the time-frequency domain using 2D wavelets [2], (iii) use t-SNE, a dimensionality reduction algorithm, to reduce the dimensionality of the data [3], and (iv) a automatically form homogenous groups using DBSCAN [4]. We will use a real audio file recorded with an omnidirectional microphone. This audio has a poor signal-to-noise ratio, which is typical of automated audio recordings. Note: To execute this example you will need to have instaled the Python packages matplotlib, scikit-image and scikit-learn. """ # sphinx_gallery_thumbnail_path = '../_images/sphx_glr_plot_unsupervised_sound_classification_004.png' import numpy as np import matplotlib.pyplot as plt from maad import sound, features, rois from maad.util import power2dB, plot2D, format_features #%% # Start by loading an example audio file. Ambient noise will be removed with a lowpass filter and then we will compute the spectrogram. s, fs = sound.load('/Users/jsulloa/Downloads/rock_savana.wav') s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass') db_max = 70 # used to define the range of the spectrogram Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=db_max) + db_max plot2D(Sxx_db, **{'extent': ext}) #%% # 1. Find regions of interest # --------------------------- # To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed. Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db) Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2) im_mask = rois.create_mask(im=Sxx_db_smooth,
@author: """ from maad.sound import load, spectrogram from maad.features import shape_features, plot_shape from maad.util import format_features, read_audacity_annot, power2dB from maad.rois import overlay_rois import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn import preprocessing s, fs = load('../data/spinetail.wav') rois_tf = read_audacity_annot('../data/spinetail.txt') ## annotations using Audacity rois_cr = rois_tf.loc[rois_tf.label=='CRER',] rois_sp = rois_tf.loc[rois_tf.label=='SP',] Sxx_power, ts, f, ext = spectrogram(s, fs) Sxx_dB = power2dB(Sxx_power, db_range=90) + 96 # Visualize large vocalizations rois_cr = format_features(rois_cr, ts, f) overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80}) # Visualize short vocalizations rois_sp = format_features(rois_sp, ts, f) overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80})