예제 #1
0
t0 = 0
t1 = 20
f0 = 100
f1 = 10000
dB_max = 96

Sxx_power, tn, fn, ext = sound.spectrogram(s,
                                           fs,
                                           nperseg=1024,
                                           noverlap=1024 // 2,
                                           fcrop=(f0, f1),
                                           tcrop=(t0, t1))

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# range when quantification bit is 16bits and display the result
Sxx_db = power2dB(Sxx_power) + dB_max
plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext})

#%%
# Then, relevant acoustic events are extracted directly from the power
# spectrogram based on a double thresholding technique. The result is binary
# image called a mask. Double thresholding technique is more sophisticated than
# basic thresholding based on a single value. First, a threshold selects pixels
# with high value (i.e. high acoustic energy). They should belong to an acoustic
# event. They are called seeds. From these seeds, we aggregate pixels connected
# to the seed with value higher than the second threslhold. These new pixels
# become seed and the aggregating process continue until no more new pixels are
# aggregated, meaning that there is no more connected pixels with value upper
# than the second threshold value.

# First we remove the stationary background in order to increase the contrast [1]
예제 #2
0
import numpy as np

from timeit import default_timer as timer

import matplotlib.pyplot as plt

#%%
# Load and plot the spectrogram of the original audio file
# --------------------------------------------------------
# First, we load the audio file and take its spectrogram.
# The linear spectrogram is then transformed into dB. The dB range is  96dB 
# which is the maximum dB range value for a 16bits audio recording. We add
# 96dB in order to get have only positive values in the spectrogram.
s, fs = load('../../data/tropical_forest_morning.wav')
Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60])
Sxx_dB = power2dB(Sxx, db_range=96) + 96
plot2d(Sxx_dB, extent=ext, title='original',
       vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)

print ("Original sharpness : %2.3f" % sharpness(Sxx_dB))

#%%
# Test different methods to remove stationary background noise
# ------------------------------------------------------------
# Test the function "remove_background"
start = timer()
X1, noise_profile1, _ = remove_background(Sxx_dB)
elapsed_time = timer() - start
print("---- test remove_background -----")
print("duration %2.3f s" % elapsed_time)
print ("sharpness : %2.3f" % sharpness(X1))
The following sound example as two main different soundtypes in the foreground:

- An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds
- A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds
"""

#%% Load an audio file and compute the spectrogram for visualization.

from maad import sound
from maad.rois import find_rois_cwt
from maad.util import power2dB, plot2D

s, fs = sound.load('../../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=100) + 100
plot2D(Sxx_db, **{'extent': ext})

#%%
# Detect the accelerating trill
# -----------------------------
# The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2.

_ = find_rois_cwt(s,
                  fs,
                  flims=(4500, 8000),
                  tlen=2,
                  th=0,
                  display=True,
                  figsize=(13, 6))
예제 #4
0
"""
# sphinx_gallery_thumbnail_path = '../_images/sphx_glr_plot_nmf_and_false_color_spectrogram_003.png'
import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features
from maad.util import power2dB, plot2D
from skimage import transform
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import NMF

#%%
# First, load and audio file and compute the spectrogram.
s, fs = sound.load('../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)

Sxx_db = power2dB(Sxx, db_range=70)
Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False)
plot2D(Sxx_db, **{
    'figsize': (4, 10),
    'extent': (tn[0], tn[-1], fn[0], fn[-1])
})

#%%
# Then, compute feature with ``shape_features_raw`` to get the raw output of the
# spectrogram filtered by the filterbank composed of 2D Gabor wavelets. This
# raw output can be fed to the NMF algorithm to decompose the spectrogram into
# elementary basis spectrograms.

shape_im, params = features.shape_features_raw(Sxx_db, resolution='low')

# Format the output as an array for decomposition
# which was 16dB. So the total gain applied to the signal is : 42dB

# We load the sound
w, fs = sound.load('../../data/spinetail.wav')
# We convert the sound into sound pressure level (Pa)
p0 = spl.wav2pressure(wave=w, gain=42, Vadc=2, sensitivity=-35)
# We select part of the sound with the spinetail signal
p0_sig = p0[int(5.68 * fs):int(7.48 * fs)]
# We select part of the sound with background
p0_noise = p0[int(8.32 * fs):int(10.12 * fs)]
# We convert both signals into spectrograms
Sxx_power, tn, fn, ext = sound.spectrogram(p0_sig, fs)
Sxx_power_noise, tn, fn, ext = sound.spectrogram(p0_noise, fs)
# We convert both spectrograms into dB. We choose a dB range of 96dB which
# is the maximal range for a 16 bits signal.
Sxx_dB = util.power2dB(Sxx_power, db_range=96) + 96
Sxx_dB_noise = util.power2dB(Sxx_power_noise, db_range=96) + 96

#%%
# Before simulating the attenuation of the acoustic signature depending on
# the distance, we need to evaluate the distance at which the signal of the
# spinetail was recordered.
# First, we estimate the sound level L of the spinetail song in the recording
# by selected the sound between 4900-7500 Hz.
p0_sig_4900_7500 = sound.select_bandwidth(p0_sig,
                                          fs,
                                          fcut=[4900, 7300],
                                          forder=10,
                                          ftype='bandpass')
L = spl.pressure2leq(p0_sig_4900_7500, fs)
print('Sound Level measured : %2.2fdB SPL' % L)
예제 #6
0
"""
# sphinx_gallery_thumbnail_path = './_images/sphx_glr_plot_unsupervised_sound_classification_004.png'
import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features, rois
from maad.util import power2dB, plot2d, format_features, overlay_rois

#%%
# Start by loading an example audio file. We will remove low frequency ambient noise with a lowpass filter and then compute the spectrogram.

s, fs = sound.load('../../data/rock_savanna.wav')
s_filt = sound.select_bandwidth(s, fs, fcut=100, forder=3, ftype='highpass')

db_max = 70  # used to define the range of the spectrogram
Sxx, tn, fn, ext = sound.spectrogram(s_filt, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=db_max) + db_max
plot2d(Sxx_db, **{'extent': ext})

#%%
# 1. Find regions of interest
# ---------------------------
# To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed.

Sxx_db_rmbg, _, _ = sound.remove_background(Sxx_db)
Sxx_db_smooth = sound.smooth(Sxx_db_rmbg, std=1.2)
im_mask = rois.create_mask(im=Sxx_db_smooth,
                           mode_bin='relative',
                           bin_std=2,
                           bin_per=0.25)
im_rois, df_rois = rois.select_rois(im_mask, min_roi=50, max_roi=None)
예제 #7
0
from maad.util import format_features, read_audacity_annot, power2dB
from maad.rois import overlay_rois

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn import preprocessing

s, fs = load('../data/spinetail.wav')
rois_tf = read_audacity_annot('../data/spinetail.txt')  ## annotations using Audacity
rois_cr = rois_tf.loc[rois_tf.label=='CRER',]  
rois_sp = rois_tf.loc[rois_tf.label=='SP',]

Sxx_power, ts, f, ext = spectrogram(s, fs)
Sxx_dB = power2dB(Sxx_power, db_range=90) + 96

# Visualize large vocalizations
rois_cr = format_features(rois_cr, ts, f)
overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80})

# Visualize short vocalizations
rois_sp = format_features(rois_sp, ts, f)
overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80})

# Compute an visualize features
shape_cr, params = shape_features(Sxx_dB, resolution='med', rois=rois_cr)
ax = plot_shape(shape_cr.mean(), params)

shape_sp, params = shape_features(Sxx_dB, resolution='med', rois=rois_sp)
ax = plot_shape(shape_sp.mean(), params)
t0 = 0
t1 = 20
f0 = 100
f1 = 10000
dB_max = 96

Sxx, tn, fn, ext = sound.spectrogram(s,
                                     fs,
                                     nperseg=1024,
                                     noverlap=512,
                                     fcrop=(f0, f1),
                                     tcrop=(t0, t1))

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# range when quantification bit is 16bits and display the result
Sxx_db = power2dB(Sxx) + dB_max
plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext})

#%%
# Then, relevant acoustic events are extracted directly from the power
# spectrogram based on a double thresholding technique. The result is binary
# image called a mask. Double thresholding technique is more sophisticated than
# basic thresholding based on a single value. First, a threshold selects pixels
# with high value (i.e. high acoustic energy). They should belong to an acoustic
# event. They are called seeds. From these seeds, we aggregate pixels connected
# to the seed with value higher than the second threslhold. These new pixels
# become seed and the aggregating process continue until no more new pixels are
# aggregated, meaning that there is no more connected pixels with value upper
# than the second threshold value.

# First we remove the stationary background in order to increase the contrast