-
Notifications
You must be signed in to change notification settings - Fork 0
/
analyze.py
508 lines (354 loc) · 15.9 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
# coding=utf-8
import bob
import numpy
import os
import logging
from collections import defaultdict
import utils
__author__ = 'Timo Mikkilä'
#logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
current_dir = os.getcwd()
# base_path = ''
# snd_path = os.path.join(base_path, 'snd')
# train_sounds_path = os.path.join(snd_path, 'train')
# eval_sounds_path = os.path.join(snd_path, 'eval')
#
# analyze_path = os.path.join(base_path, 'analyze')
#
# train_features_path = os.path.join(analyze_path, 'train')
# eval_features_path = os.path.join(analyze_path, 'eval')
#
# mfcc_path = os.path.join(analyze_path, 'mfcc')
# kmeans_file = os.path.join(analyze_path, 'kmeans.hdf5')
# ubm_file = os.path.join(analyze_path, 'ubm.hdf5')
# gmm_stats_path = os.path.join(analyze_path, 'gmm_stats')
# tv_file = os.path.join(analyze_path, 'tv.hdf5')
# ivec_dir = os.path.join(analyze_path, 'ivectors')
# num_gauss = 16
#
# ubm_convergence_threshold = 1e-4
# ubm_max_iterations = 10
# Feature extraction
def feature_extraction(audio_file, wl=20, ws=10, nf=24, nceps=19, fmin=0., fmax=4000., d_w=2, pre=0.97, mel=True):
# Parameters used to extract MFCC (These can be defined in a separate configuration file)
# wl = 20 # The window length in milliseconds
# ws = 10 # The window shift of the in milliseconds
# nf = 24 # The number of filter bands
# nceps = 19 # The number of cepstral coefficients
# fmin = 0. # The minimal frequency of the filter bank
# fmax = 4000. # The maximal frequency of the filter bank
# d_w = 2 # The delta value used to compute 1st and 2nd derivatives
# pre = 0.97 # The coefficient used for the pre-emphasis
# mel = True # Tell whether MFCC or LFCC are extracted
# We could also add 1st and 2nd derivatives by activating their flags!
# read the audio file
#(rate, signal) = utils.read(audio_file)
(rate, signal) = utils.load_wav_as_mono(audio_file)
# extract MFCCsu
ceps = bob.ap.Ceps(rate, wl, ws, nf, nceps, fmin, fmax, d_w, pre, mel)
#Convert signal to float array
signal = numpy.cast['float'](signal)
mfcc = ceps(signal)
# let's just normalize them using this helper function
# This will reduce the effect of the channel
mfcc = utils.normalize_features(mfcc)
return mfcc
def recursively_extract_features(path, dest_dir, wl=20, ws=10, nf=24, nceps=19, fmin=0., fmax=4000., d_w=2, pre=0.97,
mel=True):
for f in os.listdir(path):
f_location = os.path.join(path, f)
if utils.is_wav_file(f_location):
logger.debug('Extract features: ' + f_location)
stripped_filename = utils.strip_filename(f)
dest_file = os.path.join(dest_dir, stripped_filename)
dest_file += '.hdf5'
mfcc = feature_extraction(f_location, wl, ws, nf, nceps, fmin, fmax, d_w, pre, mel)
utils.ensure_dir(dest_dir)
bob.io.save(mfcc, dest_file)
logger.debug('savin mfcc to ' + dest_file)
elif os.path.isdir(f_location):
logger.debug('Extract features: ' + f_location)
new_path = os.path.join(path, f)
new_dest_dir = os.path.join(dest_dir, f)
recursively_extract_features(new_path, new_dest_dir)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def recursive_load(path, file_validator, loader):
data = []
for f in os.listdir(path):
f_location = os.path.join(path, f)
if file_validator(f_location):
logger.debug('Loading: ' + f_location)
feature = loader(f_location)
data.append(feature)
elif os.path.isdir(f_location):
logger.debug('Load dir: ' + f_location)
new_path = os.path.join(path, f)
data += recursive_load(new_path, file_validator, loader)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
return data
def test_array_for_nan(array):
for v in array:
if type(v) is numpy.ndarray:
test_array_for_nan(v)
else:
if numpy.isnan(v).any():
print 'Contains NAN: ' + str(v)
if numpy.isinf(v).any():
print 'Contains INF: ' + str(v)
def recursive_test_for_nan(path):
for f in os.listdir(path):
f_location = os.path.join(path, f)
if utils.is_hdf5_file(f_location):
#logger.debug('Loading: ' + f_location)
feature = bob.io.load(f_location)
if numpy.isnan(feature).any():
print 'Contains NAN: ' + f_location
if numpy.isinf(feature).any():
print 'Contains INF: ' + f_location
#for dim in feature:
# for v in dim:
# if numpy.isnan(v):
# print 'Contains NAN: ' + f_location
elif os.path.isdir(f_location):
#logger.debug('Load dir: ' + f_location)
new_path = os.path.join(path, f)
recursive_test_for_nan(new_path)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def recursive_test_wavs_for_nan(path):
for f in os.listdir(path):
f_location = os.path.join(path, f)
if utils.is_wav_file(f_location):
#logger.debug('Loading: ' + f_location)
rate, feature = utils.load_wav_as_mono(f_location)
if numpy.isnan(feature).any():
print 'Contains NAN: ' + f_location
if numpy.isinf(feature).any():
print 'Contains INF: ' + f_location
#for dim in feature:
# for v in dim:
# if numpy.isnan(v):
# print 'Contains NAN: ' + f_location
elif os.path.isdir(f_location):
#logger.debug('Load dir: ' + f_location)
new_path = os.path.join(path, f)
recursive_test_wavs_for_nan(new_path)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def load_mfcc_files(files):
features = []
for f in files:
if not utils.is_hdf5_file(f):
logger.error('Wrong file format: ' + f)
features.append(bob.io.load(f))
return features
def recursive_load_mfcc_files(path):
return recursive_load(path, utils.is_hdf5_file, bob.io.load)
def train_kmeans_machine(train_features, num_gauss, dim):
#dim = train_features.shape[1]
kmeans_trainer = bob.trainer.KMeansTrainer()
kmeans = bob.machine.KMeansMachine(num_gauss, dim)
kmeans_trainer.train(kmeans, train_features)
return kmeans
def train_ubm_gmm_with_features(kmeans, train_features, num_gauss, dim, convergence_threshold=1e-4, max_iterations=10):
#dim = train_features.shape[1]
#Create universal background model
ubm = bob.machine.GMMMachine(num_gauss, dim)
# Initialize the means of the Gaussians of the GMM with the means of the k-means
ubm.means = kmeans.means
# update means/variances/weights at each iteration
ubm_trainer = bob.trainer.ML_GMMTrainer(True, True, True)
ubm_trainer.convergence_threshold = convergence_threshold
ubm_trainer.max_iterations = max_iterations
ubm_trainer.train(ubm, train_features)
return ubm
def recursive_find_all_files(path, extension):
logger.debug('Recursively list all files from ' + path + ' with extension ' + extension)
files = []
for f in os.listdir(path):
f_location = os.path.join(path, f)
#logger.debug('compute_gmm_sufficient_statistics: ' + f_location)
if utils.file_has_extension(f_location, extension):
files.append(f_location)
elif os.path.isdir(f_location):
child_path = os.path.join(path, f)
files += recursive_find_all_files(child_path, extension)
else:
logger.debug('Wrong filetype: ' + str(f_location))
continue
logger.debug('Found ' + str(len(files)) + ' files')
return files
def compute_gmm_sufficient_statistics(ubm, src_dir, dest_dir):
for f in os.listdir(src_dir):
f_location = os.path.join(src_dir, f)
logger.debug('compute_gmm_sufficient_statistics: ' + f_location)
if utils.is_hdf5_file(f_location):
stripped_filename = utils.strip_filename(f)
dest_file = os.path.join(dest_dir, stripped_filename)
dest_file += '.hdf5'
feature = bob.io.load(f_location)
gmm_stats = bob.machine.GMMStats(ubm.dim_c, ubm.dim_d)
ubm.acc_statistics(feature, gmm_stats)
utils.ensure_dir(dest_dir)
gmm_stats.save(bob.io.HDF5File(dest_file, 'w'))
logger.debug('savin gmm_stats to ' + dest_file)
elif os.path.isdir(f_location):
new_path = os.path.join(src_dir, f)
new_dest_dir = os.path.join(dest_dir, f)
compute_gmm_sufficient_statistics(ubm, new_path, new_dest_dir)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def load_gmm_stats_file(file):
return bob.machine.GMMStats(bob.io.HDF5File(file))
def load_gmm_machine_file(file):
return bob.machine.GMMMachine(bob.io.HDF5File(file))
def recursive_load_gmm_stats(gmm_stats_path):
return recursive_load(gmm_stats_path, utils.is_hdf5_file, load_gmm_stats_file)
def recursive_load_gmm_machines(gmm_machines_path):
return recursive_load(gmm_machines_path, utils.is_hdf5_file, load_gmm_machine_file)
def gen_ivec_machine(gmm_train_stats, ubm, iv_dim=None, variance_treshold=1e-5, max_iterations=10, update_sigma=True):
if iv_dim is None:
iv_dim = len(gmm_train_stats)
#Creating the i-vector machine
ivec_machine = bob.machine.IVectorMachine(ubm, iv_dim)
ivec_machine.variance_threshold = variance_treshold
#Train the TV matrix
ivec_trainer = bob.trainer.IVectorTrainer(update_sigma=update_sigma, max_iterations=max_iterations)
ivec_trainer.train(ivec_machine, gmm_train_stats)
return ivec_machine
def gmm_stats_to_ivec(gmm_stats_file, ivec_file, ivec_machine):
# load the GMM stats file
gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(gmm_stats_file))
# extract i-vector
ivec = ivec_machine.forward(gmm_stats)
# save them!
bob.io.save(ivec, ivec_file)
def extract_i_vectors(gmm_stats_path, ivec_dir, ivec_machine):
for f in os.listdir(gmm_stats_path):
f_location = os.path.join(gmm_stats_path, f)
logger.debug('extract_i_vectors: ' + f_location)
if utils.is_hdf5_file(f_location):
stripped_filename = utils.strip_filename(f)
dest_file = os.path.join(ivec_dir, stripped_filename)
dest_file += '.hdf5'
utils.ensure_dir(ivec_dir)
gmm_stats_to_ivec(f_location, dest_file, ivec_machine)
logger.debug('saving ivec ' + dest_file)
elif os.path.isdir(f_location):
new_path = os.path.join(gmm_stats_path, f)
new_dest_dir = os.path.join(ivec_dir, f)
extract_i_vectors(new_path, new_dest_dir, ivec_machine)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def map_gmm_machine_analysis(features_path, map_gmm_dir, map_gmm_machine):
for f in os.listdir(features_path):
f_location = os.path.join(features_path, f)
logger.debug('running map gmm machine for: ' + f_location)
if utils.is_hdf5_file(f_location):
stripped_filename = utils.strip_filename(f)
dest_file = os.path.join(map_gmm_dir, stripped_filename)
dest_file += '.hdf5'
utils.ensure_dir(map_gmm_dir)
# load the GMM stats file
#gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(f_location))
feature = bob.io.load(f_location)
# extract i-vector
logger.debug('gmm_stats type ' + str(type(feature)))
#response = map_gmm_machine.forward(feature)
response = map_gmm_machine(feature)
# save them!
bob.io.save(response, dest_file)
logger.debug('saving map gmm response ' + dest_file)
elif os.path.isdir(f_location):
new_path = os.path.join(features_path, f)
new_dest_dir = os.path.join(map_gmm_dir, f)
map_gmm_machine_analysis(new_path, new_dest_dir, map_gmm_machine)
else:
logger.info('Unknown file type: ' + str(f_location))
continue
def recursive_execute_gmms_on_machine(gmm_stats_path, dest_path, machine):
for f in os.listdir(gmm_stats_path):
gmm_stats_file = os.path.join(gmm_stats_path, f)
logger.debug('extract_i_vectors: ' + gmm_stats_file)
if utils.is_hdf5_file(gmm_stats_file):
stripped_filename = utils.strip_filename(f)
dest_file = os.path.join(dest_path, stripped_filename)
dest_file += '.hdf5'
utils.ensure_dir(dest_path)
# load the GMM stats file
gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(gmm_stats_file))
# extract i-vector
output = machine.forward(gmm_stats)
# save them!
bob.io.save(output, dest_file)
print 'savin ivec ' + dest_file
elif os.path.isdir(gmm_stats_file):
new_path = os.path.join(gmm_stats_path, f)
new_dest_dir = os.path.join(dest_path, f)
extract_i_vectors(new_path, new_dest_dir, machine)
else:
logger.info('Unknown file type: ' + str(gmm_stats_file))
continue
def gen_map_gmm_trainer(ubm, relevance_factor, convergence_threshold=1e-5, max_iterations=200):
trainer = bob.trainer.MAP_GMMTrainer(relevance_factor, True, False, False)
trainer.convergence_threshold = convergence_threshold
trainer.max_iterations = max_iterations
trainer.set_prior_gmm(ubm)
return trainer
def enroll(features, ubm, gmm_trainer):
feature_set = numpy.vstack(features)
# create a GMM from the UBM
gmm = bob.machine.GMMMachine(ubm)
# train the GMM
gmm_trainer.train(gmm, feature_set)
# return the resulting gmm
return gmm
def gen_MAP_GMM_machines(ubm, all_features, gmm_trainer, dest_dir):
for name, class_feat_files in all_features.iteritems():
class_feats = load_mfcc_files(class_feat_files)
class_gmm = enroll(class_feats, ubm, gmm_trainer)
gmm_file = os.path.join(dest_dir, name + '.hdf5')
class_gmm.save(bob.io.HDF5File(gmm_file, 'w'))
def gen_filedict(filelist):
filedict = defaultdict(list)
for f in filelist:
name = utils.get_bird_name_from_file_name(f)
filedict[name].append(f)
return filedict
def gen_kmeans(training_set, number_of_gaussians):
logger.info('Generating k-means.')
#training_set = numpy.vstack(train_features)
feature_dimensions = input_size = training_set.shape[1]
# create the KMeans and UBM machine
kmeans = bob.machine.KMeansMachine(number_of_gaussians, feature_dimensions)
# create the KMeansTrainer
kmeans_trainer = bob.trainer.KMeansTrainer()
# train using the KMeansTrainer
kmeans_trainer.train(kmeans, training_set)
return kmeans
def gen_ubm(kmeans, training_set, trainer_convergence_threshold, trainer_max_iterations):
logger.info('Generating UBM')
#training_set = numpy.vstack(train_features)
feature_dimensions = input_size = training_set.shape[1]
[variances, weights] = kmeans.get_variances_and_weights_for_each_cluster(training_set)
means = kmeans.means
ubm = bob.machine.GMMMachine(kmeans.dim_c, kmeans.dim_d)
# initialize the GMM
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# train the GMM
trainer = bob.trainer.ML_GMMTrainer()
trainer.convergence_threshold = trainer_convergence_threshold
trainer.max_iterations = trainer_max_iterations
trainer.train(ubm, training_set)
return ubm