t0 = 0 t1 = 20 f0 = 100 f1 = 10000 dB_max = 96 Sxx_power, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=1024 // 2, fcrop=(f0, f1), tcrop=(t0, t1)) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel # range when quantification bit is 16bits and display the result Sxx_db = power2dB(Sxx_power) + dB_max plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext}) #%% # Then, relevant acoustic events are extracted directly from the power # spectrogram based on a double thresholding technique. The result is binary # image called a mask. Double thresholding technique is more sophisticated than # basic thresholding based on a single value. First, a threshold selects pixels # with high value (i.e. high acoustic energy). They should belong to an acoustic # event. They are called seeds. From these seeds, we aggregate pixels connected # to the seed with value higher than the second threslhold. These new pixels # become seed and the aggregating process continue until no more new pixels are # aggregated, meaning that there is no more connected pixels with value upper # than the second threshold value. # First we remove the stationary background in order to increase the contrast [1]
import numpy as np from timeit import default_timer as timer import matplotlib.pyplot as plt #%% # Load and plot the spectrogram of the original audio file # -------------------------------------------------------- # First, we load the audio file and take its spectrogram. # The linear spectrogram is then transformed into dB. The dB range is 96dB # which is the maximum dB range value for a 16bits audio recording. We add # 96dB in order to get have only positive values in the spectrogram. s, fs = load('../../data/tropical_forest_morning.wav') Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60]) Sxx_dB = power2dB(Sxx, db_range=96) + 96 plot2d(Sxx_dB, extent=ext, title='original', vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) print ("Original sharpness : %2.3f" % sharpness(Sxx_dB)) #%% # Test different methods to remove stationary background noise # ------------------------------------------------------------ # Test the function "remove_background" start = timer() X1, noise_profile1, _ = remove_background(Sxx_dB) elapsed_time = timer() - start print("---- test remove_background -----") print("duration %2.3f s" % elapsed_time) print ("sharpness : %2.3f" % sharpness(X1))
The following sound example as two main different soundtypes in the foreground: - An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds - A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds """ #%% Load an audio file and compute the spectrogram for visualization. from maad import sound from maad.rois import find_rois_cwt from maad.util import power2dB, plot2D s, fs = sound.load('../../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=100) + 100 plot2D(Sxx_db, **{'extent': ext}) #%% # Detect the accelerating trill # ----------------------------- # The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2. _ = find_rois_cwt(s, fs, flims=(4500, 8000), tlen=2, th=0, display=True, figsize=(13, 6))
""" # sphinx_gallery_thumbnail_path = '../_images/sphx_glr_plot_nmf_and_false_color_spectrogram_003.png' import numpy as np import matplotlib.pyplot as plt from maad import sound, features from maad.util import power2dB, plot2D from skimage import transform from sklearn.preprocessing import MinMaxScaler from sklearn.decomposition import NMF #%% # First, load and audio file and compute the spectrogram. s, fs = sound.load('../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=70) Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False) plot2D(Sxx_db, **{ 'figsize': (4, 10), 'extent': (tn[0], tn[-1], fn[0], fn[-1]) }) #%% # Then, compute feature with ``shape_features_raw`` to get the raw output of the # spectrogram filtered by the filterbank composed of 2D Gabor wavelets. This # raw output can be fed to the NMF algorithm to decompose the spectrogram into # elementary basis spectrograms. shape_im, params = features.shape_features_raw(Sxx_db, resolution='low') # Format the output as an array for decomposition
# which was 16dB. So the total gain applied to the signal is : 42dB # We load the sound w, fs = sound.load('../../data/spinetail.wav') # We convert the sound into sound pressure level (Pa) p0 = spl.wav2pressure(wave=w, gain=42, Vadc=2, sensitivity=-35) # We select part of the sound with the spinetail signal p0_sig = p0[int(5.68 * fs):int(7.48 * fs)] # We select part of the sound with background p0_noise = p0[int(8.32 * fs):int(10.12 * fs)] # We convert both signals into spectrograms Sxx_power, tn, fn, ext = sound.spectrogram(p0_sig, fs) Sxx_power_noise, tn, fn, ext = sound.spectrogram(p0_noise, fs) # We convert both spectrograms into dB. We choose a dB range of 96dB which # is the maximal range for a 16 bits signal. Sxx_dB = util.power2dB(Sxx_power, db_range=96) + 96 Sxx_dB_noise = util.power2dB(Sxx_power_noise, db_range=96) + 96 #%% # Before simulating the attenuation of the acoustic signature depending on # the distance, we need to evaluate the distance at which the signal of the # spinetail was recordered. # First, we estimate the sound level L of the spinetail song in the recording # by selected the sound between 4900-7500 Hz. p0_sig_4900_7500 = sound.select_bandwidth(p0_sig, fs, fcut=[4900, 7300], forder=10, ftype='bandpass') L = spl.pressure2leq(p0_sig_4900_7500, fs) print('Sound Level measured : %2.2fdB SPL' % L)
""" # sphinx_gallery_thumbnail_path = './_images/sphx_glr_plot_unsupervised_sound_classification_004.png' import numpy as np import matplotlib.pyplot as plt from maad import sound, features, rois from maad.util import power2dB, plot2d, format_features, overlay_rois #%% # Start by loading an example audio file. We will remove low frequency ambient noise with a lowpass filter and then compute the spectrogram. s, fs = sound.load('../../data/rock_savanna.wav') s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass') db_max = 70 # used to define the range of the spectrogram Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=db_max) + db_max plot2d(Sxx_db, **{'extent': ext}) #%% # 1. Find regions of interest # --------------------------- # To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed. Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db) Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2) im_mask = rois.create_mask(im=Sxx_db_smooth, mode_bin='relative', bin_std=2, bin_per=0.25) im_rois, df_rois = rois.select_rois(im_mask, min_roi=50, max_roi=None)
from maad.util import format_features, read_audacity_annot, power2dB from maad.rois import overlay_rois import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn import preprocessing s, fs = load('../data/spinetail.wav') rois_tf = read_audacity_annot('../data/spinetail.txt') ## annotations using Audacity rois_cr = rois_tf.loc[rois_tf.label=='CRER',] rois_sp = rois_tf.loc[rois_tf.label=='SP',] Sxx_power, ts, f, ext = spectrogram(s, fs) Sxx_dB = power2dB(Sxx_power, db_range=90) + 96 # Visualize large vocalizations rois_cr = format_features(rois_cr, ts, f) overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80}) # Visualize short vocalizations rois_sp = format_features(rois_sp, ts, f) overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80}) # Compute an visualize features shape_cr, params = shape_features(Sxx_dB, resolution='med', rois=rois_cr) ax = plot_shape(shape_cr.mean(), params) shape_sp, params = shape_features(Sxx_dB, resolution='med', rois=rois_sp) ax = plot_shape(shape_sp.mean(), params)
t0 = 0 t1 = 20 f0 = 100 f1 = 10000 dB_max = 96 Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512, fcrop=(f0, f1), tcrop=(t0, t1)) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel # range when quantification bit is 16bits and display the result Sxx_db = power2dB(Sxx) + dB_max plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext}) #%% # Then, relevant acoustic events are extracted directly from the power # spectrogram based on a double thresholding technique. The result is binary # image called a mask. Double thresholding technique is more sophisticated than # basic thresholding based on a single value. First, a threshold selects pixels # with high value (i.e. high acoustic energy). They should belong to an acoustic # event. They are called seeds. From these seeds, we aggregate pixels connected # to the seed with value higher than the second threslhold. These new pixels # become seed and the aggregating process continue until no more new pixels are # aggregated, meaning that there is no more connected pixels with value upper # than the second threshold value. # First we remove the stationary background in order to increase the contrast