t1 = 20 f0 = 100 f1 = 10000 dB_max = 96 Sxx_power, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=1024 // 2, fcrop=(f0, f1), tcrop=(t0, t1)) # Convert the power spectrogram into dB, add dB_max which is the maximum decibel # range when quantification bit is 16bits and display the result Sxx_db = power2dB(Sxx_power) + dB_max plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext}) #%% # Then, relevant acoustic events are extracted directly from the power # spectrogram based on a double thresholding technique. The result is binary # image called a mask. Double thresholding technique is more sophisticated than # basic thresholding based on a single value. First, a threshold selects pixels # with high value (i.e. high acoustic energy). They should belong to an acoustic # event. They are called seeds. From these seeds, we aggregate pixels connected # to the seed with value higher than the second threslhold. These new pixels # become seed and the aggregating process continue until no more new pixels are # aggregated, meaning that there is no more connected pixels with value upper # than the second threshold value. # First we remove the stationary background in order to increase the contrast [1] # Then we convert the spectrogram into dB
The following sound example as two main different soundtypes in the foreground: - An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds - A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds """ #%% Load an audio file and compute the spectrogram for visualization. from maad import sound from maad.rois import find_rois_cwt from maad.util import power2dB, plot2D s, fs = sound.load('../../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=100) + 100 plot2D(Sxx_db, **{'extent': ext}) #%% # Detect the accelerating trill # ----------------------------- # The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2. _ = find_rois_cwt(s, fs, flims=(4500, 8000), tlen=2, th=0, display=True, figsize=(13, 6)) #%%
import numpy as np import matplotlib.pyplot as plt from maad import sound, features from maad.util import linear2dB, plot2D from skimage import transform from sklearn.preprocessing import MinMaxScaler from sklearn.decomposition import NMF #%% # Load audio and compute a spectrogram s, fs = sound.load('../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = linear2dB(Sxx, db_range=80) Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False) plot2D(Sxx_db) #%% # Compute feature with shape_features_raw to get the raw output of the # spectrogram filtered by the filterbank composed of 2D Gabor wavelets params, shape_im = features.shape_features_raw(Sxx_db, resolution='low') # Format the output as an array for decomposition X = np.array(shape_im).reshape([len(shape_im), Sxx_db.size]).transpose() # Decompose signal using non-negative matrix factorization Y = NMF(n_components=3, init='random', random_state=0).fit_transform(X) # Format plt_data matrix Y = MinMaxScaler(feature_range=(0, 1)).fit_transform(Y)
import matplotlib.pyplot as plt from maad import sound, features from maad.util import power2dB, plot2D from skimage import transform from sklearn.preprocessing import MinMaxScaler from sklearn.decomposition import NMF #%% # First, load and audio file and compute the spectrogram. s, fs = sound.load('../data/spinetail.wav') Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=70) Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False) plot2D(Sxx_db, **{ 'figsize': (4, 10), 'extent': (tn[0], tn[-1], fn[0], fn[-1]) }) #%% # Then, compute feature with ``shape_features_raw`` to get the raw output of the # spectrogram filtered by the filterbank composed of 2D Gabor wavelets. This # raw output can be fed to the NMF algorithm to decompose the spectrogram into # elementary basis spectrograms. shape_im, params = features.shape_features_raw(Sxx_db, resolution='low') # Format the output as an array for decomposition X = np.array(shape_im).reshape([len(shape_im), Sxx_db.size]).transpose() # Decompose signal using non-negative matrix factorization Y = NMF(n_components=3, init='random', random_state=0).fit_transform(X)
import matplotlib.pyplot as plt #%% # First, we load the audio file and take its spectrogram. # The linear spectrogram is then transformed into dB. The dB range is 96dB # which is the maximum dB range value for a 16bits audio recording. We add # 96dB in order to get have only positive values in the spectrogram s, fs = load('../data/tropical_forest_morning.wav') #s, fs = load('../data/cold_forest_night.wav') Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60]) Sxx_dB = power2dB(Sxx, db_range=96) + 96 #%% # We plot the original spectrogram. fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(5, 1, sharex=True) plot2D(Sxx_dB, ax=ax0, extent=ext, title='original', xlabel=None, vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40) print ("Original sharpness : %2.3f" % sharpness(Sxx_dB)) #%% # Test the function "remove_background" start = timer() X1, noise_profile1, _ = remove_background(Sxx_dB) elapsed_time = timer() - start print("---- test remove_background -----") print("duration %2.3f s" % elapsed_time) print ("sharpness : %2.3f" % sharpness(X1)) plot2D(X1, ax=ax1, extent=ext, title='remove_background', xlabel=None, vmin=np.median(X1), vmax=np.median(X1)+40)
""" import numpy as np import matplotlib.pyplot as plt from maad import sound, features, rois from maad.util import power2dB, plot2D, format_features #%% # Start by loading and audio file and compute the spectrogram. s, fs = sound.load('/Users/jsulloa/Downloads/rock_savana.wav') db_max = 70 Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512) Sxx_db = power2dB(Sxx, db_range=db_max) + db_max plot2D(Sxx_db, **{'figsize': (4, 10), 'extent': ext}) #%% # 1. Find regions of interest # --------------------------- # To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed. Sxx_db, noise_profile1, _ = rois.remove_background(Sxx_db) Sxx_db_smooth = rois.smooth(Sxx_db, std=1) im_mask = rois.create_mask(im=Sxx_db_smooth, mode_bin='relative', bin_std=5.5, bin_per=0.5) im_rois, df_rois = rois.select_rois(im_mask, min_roi=100, max_roi=None) # Format ROIs and visualize the bounding box on the audio spectrogram.