-
Notifications
You must be signed in to change notification settings - Fork 0
/
features.py
203 lines (144 loc) · 6.72 KB
/
features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
import os
import sys
import timeit
import glob
import numpy as np
import scipy
from scipy.fftpack import fft
from scikits.talkbox.features import mfcc
from utils import DATA_DIR, FT_DIR, GENRE_DICT
from utils import show_feature, load_source, stft, plot_stft
#For example, a measurement with a high zero-crossing rate, i.e., the number of samples per second that cross the zero reference line, indicates that it is noisy.
def zero_crossing_rate(wavedata, window_size, sample_rate):
num_windows = int(np.ceil(len(wavedata) / window_size))
timestamps = (np.arange(0, num_windows - 1) * (window_size / float(sample_rate)))
zcr = []
for i in range(0, num_windows-1):
start = i * window_size
end = np.min([(start + window_size - 1), (len(wavedata))])
zc = 0.5 * np.mean(np.abs(np.diff(np.sign(wavedata[start:end]))))
zcr.append(zc)
return np.asarray(zcr), np.asarray(timestamps)
#The RMS method takes the square of the instantaneous voltage before averaging, then takes the square root of the average.
def root_mean_square(wavedata, window_size, sample_rate):
#original wavedata is an array of int16 [-32768:32767], square value may exceed the range
wavedata = np.int32(wavedata)
num_windows = int(np.ceil(len(wavedata)/window_size))
timestamps = timestamps = (np.arange(0, num_windows - 1) * (window_size / float(sample_rate)))
rms = []
for i in range(0, num_windows-1):
start = i * window_size
end = np.min([(start + window_size - 1), len(wavedata)])
rms_seg = np.sqrt(np.mean(wavedata[start:end]**2))
rms.append(rms_seg)
return np.asarray(rms), np.asarray(timestamps)
#spectual analysis
#
#center of gravity (balancing point of the spectrum)
#It determines the frequency area around which most of the signal energy concentrates
#gives an indication of how dark or bright a sound is
def spectual_centroid(wavedata, window_size, sample_rate):
magnitude_spectrum = stft(wavedata, window_size)
timebins, freqbins = np.shape(magnitude_spectrum)
timestamps = np.arange(0,timebins-1) * (timebins / float(sample_rate))
sc = []
for t in range(timebins-1):
power_spectrum = np.abs(magnitude_spectrum[t])**2
sc_t = np.sum(power_spectrum * np.arange(1, freqbins+1)) / np.sum(power_spectrum)
sc.append(sc_t)
sc = np.asarray(sc)
sc = np.nan_to_num(sc)
return sc, np.asarray(timestamps)
# the frequency below which some fraction, k (typically 0.85, 0.9 or 0.95 percentile), of the cumulative spectral power resides
# measure of the skewness of the spectral shape
# indication of how much energy is in the lower frequencies
# It is used to distinguish voiced from unvoiced speech or music
def spectral_rolloff(wavedata, window_size, sample_rate, k=0.85):
magnitude_spectrum = stft(wavedata, window_size)
power_spectrum = np.abs(magnitude_spectrum) ** 2
timebins, freqbins = np.shape(magnitude_spectrum)
timestamps = (np.arange(0,timebins - 1) * (timebins / float(sample_rate)))
sr = []
spectral_sum = np.sum(power_spectrum, axis=1)
for t in range(timebins-1):
sr_t = np.where(np.cumsum(power_spectrum[t,:]) >= k * spectral_sum[t])[0][0]
sr.append(sr_t)
sr = np.asarray(sr).astype(float)
sr = (sr / freqbins) * (sample_rate / 2.0)
return sr, np.asarray(timestamps)
# squared differences in frequency distribution of two successive time frames
# measures the rate of local change in the spectrum
def spectral_flux(wavedata, window_size, sample_rate):
magnitude_spectrum = stft(wavedata, window_size)
timebins, freqbins = np.shape(magnitude_spectrum)
timestamps = (np.arange(0,timebins - 1) * (timebins / float(sample_rate)))
sf = np.sqrt(np.sum(np.diff(np.abs(magnitude_spectrum))**2, axis=1)) / freqbins
return sf[1:], np.asarray(timestamps)
def write_features(feature, fn):
np.save(fn, feature)
print("Written : ", fn)
def compute_features(source, features):
"""
compute features for all the tracks
"""
for label in source.keys():
for i in range(0,100):
base_path = os.path.join(FT_DIR, "%s_%d" % (label, i))
ft = []
if 'zcr' in features:
zcr, ts = zero_crossing_rate(source[label][i]['wavedata'], 512, source[label][i]['sample_rate'])
ft.append(zcr)
if 'rms' in features:
rms, ts = root_mean_square(source[label][i]['wavedata'], 512, source[label][i]['sample_rate'])
ft.append(rms)
if 'sc' in features:
sc, ts = spectual_centroid(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate'])
ft.append(sc)
if 'sr' in features:
sr, ts = spectral_rolloff(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate'])
ft.append(sr)
if 'sf' in features:
sf, ts = spectral_flux(source[label][i]['wavedata'], 2048, source[label][i]['sample_rate'])
ft.append(sf)
if 'mfcc' in features:
ceps, mspec, spec = mfcc(source[label][i]['wavedata'])
ft.append(ceps)
write_features(ft, base_path)
def read_features(features):
"""
read all the features in the 'features' array and return a numpy array
currently only compute the grand mean and std
"""
start = timeit.default_timer()
x = []
y = []
for fn in glob.glob(os.path.join(FT_DIR, "*.npy")):
start = fn.rfind('/')
end = fn.rfind('.')
ext = fn[start+1:end]
genre, _= ext.split('_')
data = np.load(fn)
surface_ft = data[:-1] #5 features
ft_vec = [np.mean(ft) for ft in surface_ft] + [np.std(ft) for ft in surface_ft]
ceps = data[-1]#mfcc features
cep_len = len(ceps)
ft_vec += np.mean(ceps[int(cep_len / 10.):int(cep_len * 9 / 10.)], axis=0).tolist()
x.append(ft_vec)
y.append(GENRE_DICT[genre])
end = timeit.default_timer()
print("reading all features takes: ", (end - start))
return np.array(x), np.array(y)
def create_features(features):
import timeit
source = load_source()
start = timeit.default_timer()
compute_features(source, features)
end = timeit.default_timer()
print("save all features takes ", (end-start))
if __name__ == "__main__":
features = ['zcr', 'rms', 'sc', 'sr', 'sf', 'mfcc']
create_features(features) #will write all the features to the 'feature' directory
# x, y = read_features(features)
# print(x.shape, " : ",y.shape)
#show_feature(source['classical'][99], "zcr", ts, "classical_512_zcr")
#plot_stft(source['blues'][99]['wavedata'], source['blues'][99]['sample_rate'], 512)