-
Notifications
You must be signed in to change notification settings - Fork 1
/
wrappers.py
347 lines (298 loc) · 11.6 KB
/
wrappers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
import librosa
import numpy as np
import scipy
from audiotools import *
def decompose_save(filepath, kernel_size=(5,17), n_fft = 4096, hop_length = 1024):
"""
Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and saves each filtered file and
a mix of them as an audio file.
ARGS
filepath: fullpath of audio file <str>
kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
n_fft: FFT size <int>
hop_length : hop length <int>
"""
signal, sr = load_signal(filepath)
D = librosa.stft(signal, n_fft, hop_length)
H, P = librosa.decompose.hpss(D, kernel_size=(5,17))
signal_harm = librosa.istft(H)
signal_perc = librosa.istft(P)
signal_mix = librosa.istft(D)
librosa.output.write_wav(filepath[:-4]+"-harm.wav", signal_harm, sr)
librosa.output.write_wav(filepath[:-4]+"-perc.wav", signal_perc, sr)
librosa.output.write_wav(filepath[:-4]+"-mix.wav", signal_mix, sr)
def decompose_into_harmonic_and_percussive(filepath, kernel_size=(7,15), n_fft = 4096, hop_length = 1024):
"""
Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and returns each filtered version
as an audio signal
ARGS
filepath: fullpath of audio file <str>
kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
n_fft: FFT size <int>
hop_length : hop length <int>
"""
signal, sr = load_signal(filepath)
D = librosa.stft(signal, n_fft, hop_length)
H, P = librosa.decompose.hpss(D, kernel_size=(7,15))
signal_harm = librosa.istft(H)
signal_perc = librosa.istft(P)
return signal_harm, signal_perc
def get_percussive(filepath, kernel_size=(7,15), n_fft = 4096, hop_length = 1024):
"""
Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and returns the percussive version
as an audio signal
ARGS
filepath: fullpath of audio file <str>
kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
n_fft: FFT size <int>
hop_length : hop length <int>
RETURN
signal_perc: percussion enhanced audio signal <float numpy array>
"""
signal, sr = load_signal(filepath)
D = librosa.stft(signal, n_fft, hop_length)
_, P = librosa.decompose.hpss(D, kernel_size=(7,15))
signal_perc = librosa.istft(P)
return signal_perc
def get_harmonic(filepath, kernel_size=(7,15), n_fft = 4096, hop_length = 1024):
"""
Performs Harmonic/Percussive Source Separation on an audio file by applying median filters and returns the harmoic version
as an audio signal
ARGS
filepath: fullpath of audio file <str>
kernel_size: tuple sized of (harmonic, percussive) filters (<int>,<int>)
n_fft: FFT size <int>
hop_length : hop length <int>
RETURN
harmonic_perc: harmonic enhanced audio signal <float numpy array>
"""
signal, sr = load_signal(filepath)
D = librosa.stft(signal, n_fft, hop_length)
H, _ = librosa.decompose.hpss(D, kernel_size=(7,15))
signal_harm = librosa.istft(H)
return signal_harm
def get_chromagram(filepath, n_fft = 4096, hop_length = 1024):
"""
Returns the chromagram of an audio file
ARGS
filepath: fullpath of audio file <str>
n_fft: FFT size <int>
hop_length : hop length <int>
RETURN
C: chromagram of audio signal <float numpy array>
"""
signal, sr = load_signal(filepath)
D = librosa.stft(signal, n_fft, hop_length)
C = librosa.feature.chromagram(S=D)
return C
def get_enrate(signal = None, sr = None, filepath = None, downsample=100):
"""Computes the enrate as described in
ARGS
signal: audio signal <number array>
sr: sampling rate <int>
filepath: fullpath of audio file <str>
downsample: sampling rate to downsample signal <int>
RETURN
enrate: proportional to speaking rate <float>
"""
if signal == None:
signal, sr = load_signal(filepath)
# FFT data
n_fft = downsample
hop_length = int(0.8*downsample)
# Half-wave rectify the signal waveform
signal[signal<0] = 0
# Low-pass filter
numtaps=2
cutoff=16.0
nyq=sr/2.0
transfer_function = scipy.signal.firwin(numtaps=numtaps, cutoff=cutoff/nyq, nyq=nyq)
signal = scipy.signal.lfilter(transfer_function, 1.0, signal)
# Downsample to 100hz
signal = librosa.resample(signal, sr, downsample)
# Hamming window 1-2 seconds with > 75% overlap
fft_window = scipy.signal.hamming(downsample, sym=False)
# FFT, ignore values above 16 hz
magnitudes = np.abs(librosa.stft(signal, n_fft, hop_length, window=fft_window))
bin_count, freq_res = get_bin_count_and_frequency_resolution(n_fft, downsample)
lowest_fbin_idx = int(1/freq_res)
highest_fbin_idx = int(16/freq_res)
# Compute the spectral moment ( index weight each power spectral value and sum )
enrate = np.sum(magnitudes[lowest_fbin_idx:highest_fbin_idx].T * np.array(range(lowest_fbin_idx, highest_fbin_idx)))
return enrate
def get_magnitudes(filepath, n_fft = 4096, hop_length = 1024):
"""Returns the spectrum magnitudes of an audio file
ARGS
filepath: fullpath of audio file <str>
n_fft: FFT size <int>
hop_length : hop length <int>
RETURNS
mags: spectrum magnitudes <numpy number array>
"""
signal, sr = load_signal(filepath, sr=44100)
mags = np.abs(librosa.stft(signal, n_fft, hop_length))
return mags
def get_fft(signal = None, filepath = None, n_fft = 4096, hop_length = 512, win_length=2048):
"""Returns the spectrum magnitudes and phases of an audio file
ARGS
filepath: fullpath of audio file <str>
n_fft: FFT size <int>
hop_length : hop length <int>
RETURNS
mags: a complex-valued matrix <numpy number array>
"""
if signal == None:
signal, sr = signal, sr = load_signal(filepath)
return librosa.stft(signal, n_fft, hop_length)
def get_mfcc(signal, n_fft = 4096, hop_length = 1024, sr=44100, n_mfcc=20, logscaled=True):
"""Computes the mel-frequency cepstral coefficients of a signal
ARGS
signal: audio signal <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
n_mfcc: number of MFC coefficients <int>
logscaled: log-scale the magnitudes of the spectrogram <bool>
RETURN
mfcc: mel-frequency cepstral coefficients <number numpy array>
"""
S = librosa.feature.melspectrogram(signal, sr=sr, n_fft=n_fft, hop_length=hop_length)
if logscaled:
log_S = librosa.logamplitude(S)
mfcc = librosa.feature.mfcc(S=log_S, n_mfcc=n_mfcc)
return mfcc
def get_feature_delta(feature, order=1):
"""Computes the n-th order delta of a singal. This is not the same as the Dn = Sn+1 - Sn
ARGS
feature: 1d or 2d numerical array <number array>
order: order of delta <int>
RETURN
delta 1d or 2d numerical array <number array>
"""
return librosa.feature.delta(feature, order=order)
def get_centroid(mags = None, n_fft = 1024, hop_length = 512, sr = None, fbins=None):
"""Computes the centroid of a spectrum. Equivalent to the first statistical moment of the spectrum
ARGS
mags: spectrum magnitudes <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
fbins: array with frequency of each frequency bin(optional) <number array>
RETURN
centroid: spectrum centroid <float>
"""
if mags is None:
print 'CENTROID : mags is None'
return -1
if fbins is None:
print 'has not fbin'
nbins, binres = get_bin_count_and_frequency_resolution(n_fft,sr)
fbins = binres * np.array(range(0, nbins+1))
centroid = fbins.dot(mags)/mags.sum(axis=0)
return centroid
def get_spread(centroids = None, mags = None, n_fft = 1024, hop_length = 512, sr = None, fbins=None):
"""Computes the spread of a spectrum. Equivalent to the second statistical moment of the spectrum
SHOULD BE VECTORIZED
ARGS
centroids: centroid(s) <number array>
mags: spectrum magnitudes <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
fbins: array with frequency of each frequency bin(optional) <number array>
RETURN
spread: spectrum spread <float>
"""
if mags is None:
raise Exception("get_spread: mags is None")
spread = []
probs = None
if fbins is None:
nbins, binres = get_bin_count_and_frequency_resolution(n_fft,sr)
fbins = binres * np.array(range(0, nbins+1))
if centroids is None:
centroids = get_centroids(mags, n_fft, hop_length)
for i in range(0,len(centroids)):
probs = mags[:,i]/sum(mags[:,i])
spread.append(np.sqrt(np.mean(probs * (abs(fbins - centroids[i])**2))))
spread = np.array(spread)
return spread
def get_skewness(centroids = None, mags = None, n_fft = 1024, hop_length = 512, sr = None, fbins=None):
"""Computes the skewness of a spectrum. Equivalent to the third statistical moment of the spectrum
SHOULD BE VECTORIZED
ARGS
centroids: centroid(s) <number array>
mags: spectrum magnitudes <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
fbins: array with frequency of each frequency bin(optional) <number array>
RETURN
skewness: spectrum skewness <float>
"""
if mags is None:
raise Exception("get_skewness: mags is None")
skewness = []
probs = None
if fbins is None:
nbins, binres = get_bin_count_and_frequency_resolution(n_fft,sr)
fbins = binres * np.array(range(0, nbins+1))
if centroids is None:
centroids = get_centroids(mags, n_fft, hop_length, sr)
stds = get_spread(centroids, mags, n_fft, hop_length, sr)
for i in range(0,len(centroids)):
probs = mags[:,i]/sum(mags[:,i])
skewness.append(np.mean((probs * (abs(fbins - centroids[i])**3))/stds[i]**3))
skewness = np.array(skewness)
return skewness
def get_kurtosis(centroids = None, mags = None, n_fft = 1024, hop_length = 512, sr = None, fbins=None):
"""Computes the kurtosis of a spectrum. Equivalent to the fourth statistical moment of the spectrum
SHOULD BE VECTORIZED
ARGS
centroids: centroid(s) <number array>
mags: spectrum magnitudes <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
fbins: array with frequency of each frequency bin(optional) <number array>
RETURN
kurtosis: spectrum kurtosis <float>
"""
if mags is None:
raise Exception("get_kurtosis: mags is None")
kurtosis = []
probs = None
if fbins is None:
nbins, binres = get_bin_count_and_frequency_resolution(n_fft,sr)
fbins = binres * np.array(range(0, nbins+1))
if centroids is None:
centroids = get_centroids(mags, n_fft, hop_length, sr)
stds = get_spread(centroids, mags, n_fft, hop_length, sr)
for i in range(0,len(centroids)):
probs = mags[:,i]/sum(mags[:,i])
kurtosis.append(np.mean((probs * (abs(fbins - centroids[i])**4))/stds[i]**4))
kurtosis = np.array(kurtosis)
return kurtosis
def get_slope(mags = None, n_fft = 1024, hop_length = 512, sr = None, fbins = None):
"""Computes the slope of each timeframe of a spectrum.
ARGS
mags: spectrum magnitudes <number array>
n_fft: FFT size <int>
hop_length : hop length <int>
sr: sampling rate <int>
fbins: array with frequency of each frequency bin(optional) <number array>
RETURN
slope: slope of time frames <float>
"""
if mags is None:
raise Exception("SLOPE: mags is None")
slopes = []
slope = None
if fbins is None:
nbins, binres = get_bin_count_and_frequency_resolution(n_fft,sr)
fbins = binres * np.array(range(0, nbins+1))
for i in range(0,mags.shape[1]):
slope, intercept,_,_,_ = scipy.stats.linregress(fbins,mags[:,i])
slopes.append((slope, intercept))
slopes = np.array(slopes)
return slopes