コード例 #1
0
t1 = 20
f0 = 100
f1 = 10000
dB_max = 96

Sxx_power, tn, fn, ext = sound.spectrogram(s,
                                           fs,
                                           nperseg=1024,
                                           noverlap=1024 // 2,
                                           fcrop=(f0, f1),
                                           tcrop=(t0, t1))

# Convert the power spectrogram into dB, add dB_max which is the maximum decibel
# range when quantification bit is 16bits and display the result
Sxx_db = power2dB(Sxx_power) + dB_max
plot2D(Sxx_db, **{'vmin': 0, 'vmax': dB_max, 'extent': ext})

#%%
# Then, relevant acoustic events are extracted directly from the power
# spectrogram based on a double thresholding technique. The result is binary
# image called a mask. Double thresholding technique is more sophisticated than
# basic thresholding based on a single value. First, a threshold selects pixels
# with high value (i.e. high acoustic energy). They should belong to an acoustic
# event. They are called seeds. From these seeds, we aggregate pixels connected
# to the seed with value higher than the second threslhold. These new pixels
# become seed and the aggregating process continue until no more new pixels are
# aggregated, meaning that there is no more connected pixels with value upper
# than the second threshold value.

# First we remove the stationary background in order to increase the contrast [1]
# Then we convert the spectrogram into dB
コード例 #2
0
The following sound example as two main different soundtypes in the foreground:

- An accelerating trill between 4.5 and 8 kHz lasting approximately 2 seconds
- A fast descending chirp between 8 and 12 kHz lasting 0.1 approximately seconds
"""

#%% Load an audio file and compute the spectrogram for visualization.

from maad import sound
from maad.rois import find_rois_cwt
from maad.util import power2dB, plot2D

s, fs = sound.load('../../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=100) + 100
plot2D(Sxx_db, **{'extent': ext})

#%%
# Detect the accelerating trill
# -----------------------------
# The accelerating trill is the song of a small neotropical bird, Cranioleuca erythrops. This song can be detected on the recording using the function find_rois_cwt and setting frequency limits flims=(4500,8000) and temporal length of signal tlen=2.

_ = find_rois_cwt(s,
                  fs,
                  flims=(4500, 8000),
                  tlen=2,
                  th=0,
                  display=True,
                  figsize=(13, 6))

#%%
import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features
from maad.util import linear2dB, plot2D
from skimage import transform
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import NMF

#%%
# Load audio and compute a spectrogram
s, fs = sound.load('../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)

Sxx_db = linear2dB(Sxx, db_range=80)
Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False)
plot2D(Sxx_db)

#%%
# Compute feature with shape_features_raw to get the raw output of the
# spectrogram filtered by the filterbank composed of 2D Gabor wavelets

params, shape_im = features.shape_features_raw(Sxx_db, resolution='low')

# Format the output as an array for decomposition
X = np.array(shape_im).reshape([len(shape_im), Sxx_db.size]).transpose()

# Decompose signal using non-negative matrix factorization
Y = NMF(n_components=3, init='random', random_state=0).fit_transform(X)

# Format plt_data matrix
Y = MinMaxScaler(feature_range=(0, 1)).fit_transform(Y)
コード例 #4
0
import matplotlib.pyplot as plt
from maad import sound, features
from maad.util import power2dB, plot2D
from skimage import transform
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import NMF

#%%
# First, load and audio file and compute the spectrogram.
s, fs = sound.load('../data/spinetail.wav')
Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)

Sxx_db = power2dB(Sxx, db_range=70)
Sxx_db = transform.rescale(Sxx_db, 0.5, anti_aliasing=True, multichannel=False)
plot2D(Sxx_db, **{
    'figsize': (4, 10),
    'extent': (tn[0], tn[-1], fn[0], fn[-1])
})

#%%
# Then, compute feature with ``shape_features_raw`` to get the raw output of the
# spectrogram filtered by the filterbank composed of 2D Gabor wavelets. This
# raw output can be fed to the NMF algorithm to decompose the spectrogram into
# elementary basis spectrograms.

shape_im, params = features.shape_features_raw(Sxx_db, resolution='low')

# Format the output as an array for decomposition
X = np.array(shape_im).reshape([len(shape_im), Sxx_db.size]).transpose()

# Decompose signal using non-negative matrix factorization
Y = NMF(n_components=3, init='random', random_state=0).fit_transform(X)
コード例 #5
0
import matplotlib.pyplot as plt

#%%
# First, we load the audio file and take its spectrogram.
# The linear spectrogram is then transformed into dB. The dB range is  96dB 
# which is the maximum dB range value for a 16bits audio recording. We add
# 96dB in order to get have only positive values in the spectrogram
s, fs = load('../data/tropical_forest_morning.wav')
#s, fs = load('../data/cold_forest_night.wav')
Sxx, tn, fn, ext = spectrogram(s, fs, fcrop=[0,20000], tcrop=[0,60])
Sxx_dB = power2dB(Sxx, db_range=96) + 96

#%%
# We plot the original spectrogram.
fig, (ax0, ax1, ax2, ax3, ax4) = plt.subplots(5, 1, sharex=True)
plot2D(Sxx_dB, ax=ax0, extent=ext, title='original', xlabel=None,
       vmin=np.median(Sxx_dB), vmax=np.median(Sxx_dB)+40)

print ("Original sharpness : %2.3f" % sharpness(Sxx_dB))

#%%
# Test the function "remove_background"
start = timer()
X1, noise_profile1, _ = remove_background(Sxx_dB)
elapsed_time = timer() - start
print("---- test remove_background -----")
print("duration %2.3f s" % elapsed_time)
print ("sharpness : %2.3f" % sharpness(X1))

plot2D(X1, ax=ax1, extent=ext, title='remove_background', xlabel=None,
       vmin=np.median(X1), vmax=np.median(X1)+40)
コード例 #6
0
"""

import numpy as np
import matplotlib.pyplot as plt
from maad import sound, features, rois
from maad.util import power2dB, plot2D, format_features

#%%
# Start by loading and audio file and compute the spectrogram.

s, fs = sound.load('/Users/jsulloa/Downloads/rock_savana.wav')
db_max = 70

Sxx, tn, fn, ext = sound.spectrogram(s, fs, nperseg=1024, noverlap=512)
Sxx_db = power2dB(Sxx, db_range=db_max) + db_max
plot2D(Sxx_db, **{'figsize': (4, 10), 'extent': ext})

#%%
# 1. Find regions of interest
# ---------------------------
# To find regions of interest in the spectrogram, we will remove stationary background noise and then find isolated sounds using a double threshold method. Small ROIs due to noise in the signal will be removed.

Sxx_db, noise_profile1, _ = rois.remove_background(Sxx_db)
Sxx_db_smooth = rois.smooth(Sxx_db, std=1)
im_mask = rois.create_mask(im=Sxx_db_smooth,
                           mode_bin='relative',
                           bin_std=5.5,
                           bin_per=0.5)
im_rois, df_rois = rois.select_rois(im_mask, min_roi=100, max_roi=None)

# Format ROIs and visualize the bounding box on the audio spectrogram.