forked from bastibe/WebGL-Spectrogram
/
spectrogram.py
436 lines (351 loc) · 10.9 KB
/
spectrogram.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
import h5py
import operator
import math
import time
import numpy as np
from collections import namedtuple
from scipy import signal
from spectrum import dpss
from eegtools.io import load_edf
from helpers import grouper
LL = 0
LP = 1
RP = 2
RL = 3
CHANNELS = {
'LL': LL,
'LP': LP,
'RP': RP,
'RL': RL,
}
DIFFERENCE_PAIRS = {
'LL': [
('fp1', 'f7'),
('f7', 't3'),
('t3', 't5'),
('t5', 'o1'),
],
'LP': [
('fp1', 'f3'),
('f3', 'c3'),
('c3', 'p3'),
('p3', 'o1'),
],
'RP': [
('fp2', 'f4'),
('f4', 'c4'),
('c4', 'p4'),
('p4', 'o2'),
],
'RL': [
('fp2', 'f8'),
('f8', 't4'),
('t4', 't6'),
('t6', 'o2'),
],
}
CHANNEL_INDEX = {
'fp1': 0,
'f3': 1,
'c3': 2,
'p3': 3,
'o1': 4,
'fp2': 5,
'f4': 6,
'c4': 7,
'p4': 8,
'o2': 9,
'f7': 10,
't3': 11,
't5': 12,
'f8': 13,
't4': 14,
't6': 15,
}
CHUNK_HOURS = 1.0
EEGSpecParams = namedtuple(
'SpecParams', ['filename', 'duration',
'chunksize', 'fs', 'data',
'shift', 'spec_len',
'nstep', 'trial_avg',
'nfft', 'nblocks', 'tapers',
'nfreqs', 'nsamples', 'findx', ])
AudioSpecParams = namedtuple(
'SpecParams', ['chunksize', 'fs',
'shift', 'spec_len',
'nfft', 'nblocks',
'nfreqs', 'nsamples'])
# see if we are running normally or using kernprof
try:
profile(lambda x: None)
except NameError:
def noop_decorator(fn):
return fn
profile = noop_decorator
def _get_nsamples(data, fs, duration):
"""
Determine the number of samples to take
from a data source based on the duration (hrs)
specified
"""
data_len = len(data)
if duration is None:
return data_len
nsamples = min(data_len, int(fs * 60 * 60 * duration))
print 'Num samples:', nsamples
return nsamples
def _get_chunksize(nsamples, fs, nfft, num_hours=CHUNK_HOURS):
"""
A chunk will be at most 1 hr in length.
"""
chunksize = int(fs * 60 * 60 * num_hours)
return min(chunksize + (chunksize % nfft), nsamples)
def _get_shift(nfft, overlap):
return int(round(nfft * overlap))
def _get_nblocks(nsamples, shift, nstep):
return int(math.ceil((nsamples - shift) / nstep)) + 1
def _get_nfreqs(nfft):
return nfft / 2 + 1
def _get_nfft(shift, pad):
return int(max(_power_log(shift) + pad, shift))
def _hann(n):
return 0.54 - 0.46 * np.cos(2.0 * np.pi * np.arange(n) / (n - 1))
@profile
def _power_log(x):
return 2**(math.ceil(math.log(x, 2)))
@profile
def _change_row_to_column(data):
"""
Transform 1d array into column vectors
"""
shape = data.shape
if len(shape) == 1:
data = np.reshape(data, (-1, shape[0]))
N, Ch = data.shape
if (N == 1):
data = data.T
return data
def _getfgrid(fs, nfft, fpass):
df = fs / nfft
f = np.arange(0, fs, df)
if f[-1] != fs:
f = np.append(f, fs)
f = f[:nfft]
if len(fpass) != 1:
findx = (np.where((f >= fpass[0]) & (f <= fpass[1])))
else:
min_index, min_value = min(
enumerate(f - fpass[2]), key=operator.itemgetter(1))
f = f[min_index]
findx = min_index
fout = f[findx]
return fout, findx[0]
@profile
def _dpsschk(tapers, N, fs):
"""
Helper function to calculate tapers and
if precalculated tapers are provided,
to check that they are the of same
length in time as the time series being studied.
"""
tapers, eigs = dpss(N, tapers[0], tapers[1])
tapers = np.multiply(tapers, np.sqrt(fs))
return tapers
@profile
def _mtfftc(data, tapers, nfft, fs):
""" Helper function which calculates the fft of the data using the tapers"""
NC, C = data.shape
NK, K = tapers.shape
if NK != NC:
print 'length of tapers is not compatible with length of data!!'
# to create the matrix which has n rows X K cols
data = np.tile(data, (1, K))
data_proj = np.multiply(data, tapers)
J = np.fft.fft(data_proj, n=nfft, axis=0) / fs
return J
def load_h5py_spectrofile(filename):
f = h5py.File(filename, 'r')
data = f['data']
fs = f['Fs'][0][0]
return data, fs
def load_edf_spectrofile(filename):
edf = load_edf(filename)
fs = edf.sample_rate
# convert the edf channels to match the index
data = [None] * len(CHANNEL_INDEX)
for edf_idx, ch in enumerate(edf.chan_lab):
spec_idx = CHANNEL_INDEX.get(ch.lower())
if spec_idx is not None:
data[spec_idx] = edf.X[edf_idx, :]
return np.array(data).T, fs
def load_spectrofile(filename):
spectrofile_map = {
'eeg': load_h5py_spectrofile,
'edf': load_edf_spectrofile,
}
try:
file_ext = filename.split('.')[-1]
except IndexError:
file_ext = ''
return spectrofile_map.get(file_ext, load_h5py_spectrofile)(filename)
def get_eeg_spectrogram_params(filename, duration, pad=0, fpass=None,
trial_avg=False, moving_win=None, tapers=None):
data, fs = load_spectrofile(filename)
if fpass is None:
fpass = [0, 55]
if moving_win is None:
moving_win = [4, 1]
if tapers is None:
tapers = [3, 5]
shift = int(round(fs * moving_win[0]))
nstep = int(round(fs * moving_win[1]))
nfft = _get_nfft(shift, pad)
nsamples = _get_nsamples(data, fs, duration)
chunksize = _get_chunksize(nsamples, fs, nfft)
sfreqs, findx = _getfgrid(fs, nfft, fpass)
nfreqs = len(sfreqs)
# nfreqs = _get_nfreqs(nfft)
nblocks = _get_nblocks(nsamples, shift, nstep)
spec_len = int(nsamples / fs)
return EEGSpecParams(
filename=filename, duration=duration,
fs=fs, shift=shift, data=data,
nstep=nstep, nfft=nfft, findx=findx,
nfreqs=nfreqs, nblocks=nblocks,
nsamples=nsamples, tapers=tapers,
trial_avg=trial_avg,
spec_len=spec_len,
chunksize=chunksize)
def print_spec_params_t(spec_params):
print 'spec_params: {'
print '\tfilename %s' % spec_params.filename
print '\tduration %.2f' % spec_params.duration
print '\tnfft: %d' % spec_params.nfft
print '\tnstep: %d' % spec_params.nstep
print '\tshift: %d' % spec_params.shift
print '\tnsamples: %d' % spec_params.nsamples
print '\tnblocks: %d' % spec_params.nblocks
print '\tnfreqs: %d' % spec_params.nfreqs
print '\tspec_len: %d' % spec_params.spec_len
print '\tfs: %d' % spec_params.fs
print '}'
@profile
def eeg_ch_spectrogram(ch, data, spec_params, progress_fn=None):
"""
Compute the spectrogram for an individual spectrogram in the eeg
"""
T = []
t0 = time.time()
# TODO (joshblum): do this once in a preprocessing step.
pairs = DIFFERENCE_PAIRS.get(ch)
for i, pair in enumerate(pairs):
c1, c2 = pair
# take differences between the channels in each of the regions
v1 = data[:, CHANNEL_INDEX.get(c1)]
v2 = data[:, CHANNEL_INDEX.get(c2)]
diff = v2 - v1
T.append(multitaper_spectrogram(
diff, spec_params))
if progress_fn:
progress_fn((i + 1) / len(pairs), canvas_id=ch)
# compute the regional average of the spectrograms for each channel
res = sum(T) / 4
return res
@profile
def on_eeg_file_spectrogram_profile(filename, duration):
spec_params = get_eeg_spectrogram_params(filename, duration)
print_spec_params_t(spec_params)
# ok lets just chunk a bit of this mess
data = spec_params.data[:spec_params.nsamples]
t0 = time.time()
# for chunk in grouper(data, spec_params.chunksize, spec_params.shift):
for chunk in [data, ]:
chunk = np.array(chunk)
for ch in ['LL']: # CHANNELS:
spec = eeg_ch_spectrogram(ch, chunk, spec_params)
t1 = time.time()
print 'Total time: %s' % (t1 - t0)
return spec
def get_audio_spectrogram_params(data, fs, duration, nfft, overlap):
shift = _get_shift(nfft, overlap)
nsamples = _get_nsamples(data, fs, duration)
chunksize = _get_chunksize(nsamples, fs, nfft)
nblocks = _get_nblocks(nsamples, nfft, shift)
nfreqs = _get_nfreqs(nfft)
return AudioSpecParams(nfft=nfft, shift=shift,
nsamples=nsamples,
spec_len=nsamples / fs,
nblocks=nblocks, nfreqs=nfreqs,
fs=fs, chunksize=chunksize)
@profile
def multitaper_spectrogram(data, spec_params):
data = _change_row_to_column(data)
fs = spec_params.fs
nfft = spec_params.nfft
nstep = spec_params.nstep
shift = spec_params.shift
nblocks = _get_nblocks(len(data), shift, nstep)
nfreqs = spec_params.nfreqs
if spec_params.trial_avg:
Ch = data.shape[1]
S = np.zeros((nblocks, nfreqs, Ch))
else:
S = np.zeros((nblocks, nfreqs))
for idx in xrange(nblocks):
datawin = signal.detrend(
data[idx * nstep:idx * nstep + shift], type == 'constant')
if idx < 2:
N = len(datawin)
taps = _dpsschk(spec_params.tapers, N, fs)
J = _mtfftc(datawin, taps, nfft, fs)[
spec_params.findx, :]
s = np.mean((np.multiply(np.conj(J), J)), axis=1).squeeze()
if spec_params.trial_avg:
s = np.mean(s, axis=1).squeeze()
S[idx, :] = s
spect = S.squeeze()
return spect
def spectrogram(data, spec_params, canvas_id=None, progress_fn=None):
"""Calculate a real spectrogram from audio data
An audio data will be cut up into overlapping blocks of length
`nfft`. The amount of overlap will be `overlap*nfft`. Then,
calculate a real fourier transform of length `nfft` of every
block and save the absolute spectrum.
Arguments:
data audio data as a numpy array.
nfft the FFT length used for calculating the spectrogram.
shift the amount of overlap between consecutive spectra.
"""
nfft = spec_params.nfft
nstep = spec_params.nstep
shift = spec_params.shift
nblocks = spec_params.nblocks
nfreqs = spec_params.nfreqs
nsamples = spec_params.nsamples
window = _hann(nfft)
specs = np.zeros((nfreqs, nblocks), dtype=np.float32)
fft_data = np.zeros(nfft, dtype=np.float32)
for idx in xrange(nblocks):
specs[:, idx] = np.abs(np.fft.rfft(
data[idx * nstep:idx * nstep + shift] * window, n=nfft)) / nfft
if progress_fn and idx % 10 == 0:
progress_fn(idx / nblocks, canvas_id=canvas_id)
specs[:, -1] = np.abs(
np.fft.rfft(data[nblocks * nstep:], n=nfft)) / nfft
if progress_fn:
progress_fn(1, canvas_id=canvas_id)
return specs.T
def main(filename, duration):
spec = on_eeg_file_spectrogram_profile(args.filename, args.duration)
print 'Spectrogram shape:', str(spec.shape)
print 'Sample data:', spec[:10, :10]
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(
description='Profile spectrogram code.')
parser.add_argument('-f', '--filename', default='/Users/joshblum/Dropbox (MIT)/MIT-EDFs/MIT-CSAIL-007.edf',
dest='filename', help='filename for spectrogram data.')
parser.add_argument('-d', '--duration', default=4.0,
dest='duration', help='duration of the data')
args = parser.parse_args()
main(args.filename, args.duration)