/
Intan_motif_matches_9_23.py
272 lines (236 loc) · 12.6 KB
/
Intan_motif_matches_9_23.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
# -*- coding: utf-8 -*-
import os
from matplotlib.pyplot import *
import quantities as pq
from zeebeez.tdt2neo import stim, import_multielectrode
from neo.core import EpochArray
from neo.io import RHDIO, NeoHdf5IO
from neosound import sound_manager
from neosound.sound_store import HDF5Store
from lasp.signal import lowpass_filter, highpass_filter, coherency
from lasp.sound import spectrogram, plot_spectrogram
from scipy.stats import zscore
from scipy.signal import correlate, argrelextrema
# faster plotting
# %matplotlib notebook
experiment_dir = "/auto/tdrive/billewood/intan data/LBlYel0923"
experiment_file = os.path.join(experiment_dir, "RHD", "LBlYe0923__160829_144329.rhd")
stimulus_file = os.path.join(experiment_dir, "pyoperant", "LBlYel0923_trialdata_20160829144324.csv")
stimulus_dir = os.path.join(experiment_dir, "Stimuli", "TutFam")
figures_dir = os.path.join(experiment_dir, "Figures")
low_power = 2000
high_power = 5000
vb_low = 5 # lowpass for smoothing vocal band
vb_stds = 1 # num of stds for vocal band threshhold
vd_stds = 1 # num of stds for vocal density threshold
vd_win = .25 # in s
onset_shift = -.3 # in s, move the onset this direction (should be negative usually or 0, to avoid missing some vocalizations)
low_freq_corr = 1000 # Hz, lower bound for frequencies to correlate in spectrogram
high_freq_corr = 10000 # in Hz, " "
normalize_mic = True # will subtract mean from mic, because our mic often has a dc leak in it.
def find_vocal_periods(vocal_band, vb_stds = 1, vd_stds = 1, vd_win = .25, fs_mic = 25000, onset_shift = 0):
"""
Given a sound pressure waveform, vocal_band, find areas with vocalizations (extended power).
Strongly recommended to bandpass the wave first (for instance, just pass 2-5 kHz).
Performs two thresholding steps (Honestly I'm not sure how helpful the second one is but that's what I'm doing for now),
each one is done based on standard deviations above mean, with the vocal density being windows (~low pass filtered)
Returns:
sound_onset, sound_offset- two vectors of the same length (hopefully!)
Arguments:
REQUIRED:
vocal_band: sound pressssure waveform, ideally band-pass filtered
OPTIONAL
vb_stds: vocal band standard deviations- power is determined by thresholding above this
vd_stds: vocal density standard deviations- power is determined by thresholding above this
vd_win: for smoothing the vocal density vector (second thresholding step)
fs_mic: 25000 is our standard
onset_shift: Many vocalizations start quietly so it's often best to just move all onsets a little back in time
TODO Not sure why I chose to window/bin the second thresholding step instead of low-passing it, should think about that more
"""
vocal_band_thresh = vb_stds * np.std(vocal_band)
# find periods of time above the threshold...
vocal_threshed = np.zeros(vocal_band.shape)
for i in range(vocal_band.shape[0]):
if vocal_band[i] > vocal_band_thresh:
vocal_threshed[i] = 1
else: vocal_threshed[i] = 0
# TODO it's pretty weird the way I'm windowing this, maybe I should just lowpass filter it?
window = np.int(np.round(vd_win * fs_mic / 2) * 2) # force an even integer
overlap = window / 2
start_time = 0
i = 0
vocal_density = np.zeros(np.int(np.floor(vocal_band.shape[0]/(window-overlap))))
while start_time + window < vocal_band.shape[0]:
vocal_density[i] = np.mean(vocal_threshed[start_time:start_time+window])
start_time = start_time + window - overlap
i += 1
# sound onsets: 1 is an onset, -1 is an offset
density_thresh = vd_stds * np.std(vocal_density)
vd_crosses = np.zeros(len(vocal_density))
for i in range(vocal_density.shape[0]):
if vocal_density[i] > density_thresh:
vd_crosses[i] = 1
sound_onsets = vd_crosses[1:len(vd_crosses)] - vd_crosses[0:len(vd_crosses)-1]
sound_onsetsb = np.zeros(len(sound_onsets)+1)
sound_onsetsb[1:len(sound_onsetsb)] = sound_onsets # just aligns for n-1, must be a better way oh well
del sound_onsets
sound_onsets = sound_onsetsb
del sound_onsetsb
# this just checks that the first sound onset wasn't obscured by the beginning of the file
for i in range(len(sound_onsets)):
if sound_onsets[i] == -1:
sound_onsets[0] = -1 # the case where sound onset happens on the first data point
print("Warning, sound appears to onset before file start, first time point being set to an onset")
if sound_onsets[i] == 1:
break
# check for missing offset (sound continues past data file)
for i in range(len(sound_onsets)):
if sound_onsets[len(sound_onsets)-i-1] == 1:
sound_onsets[len(sound_onsets)] = -1
print("Warning, sound appears to continue past file end, last time point being set to an offset")
if sound_onsets[len(sound_onsets)-i-1] == -1:
break
# convert to a list of just onsets and offsets
sound_onset = []
sound_offset = []
for i in range(len(sound_onsets)):
if sound_onsets[i] == 1:
sound_onset = np.append(sound_onset, [i])
if sound_onsets[i] == -1:
sound_offset = np.append(sound_offset, [i])
try:
len(sound_onset) == len(sound_offset)
except:
print("Number of vocal onsets and offsets not equal!")
sound_onset = sound_onset * (window - overlap) # convert to original data points
sound_onset = sound_onset + np.round(onset_shift * fs_mic)
sound_offset = sound_offset * (window - overlap) # " "
return sound_onset, sound_offset
def get_spect_corr(sound_wav, templ_timefreq, freq_index, fs_mic = 25000, spec_sample_rate = 1000, freq_spacing = 50):
"""
Calculate cross correlation between two spectrograms over only some frequencies, across time. Expects one wav file and
one spectrogram and a frequency index.
Returns:
correlation: spect_corr
Arguments:
REQUIRED:
sound_wav, templ_timefreq, freq_index
"""
t, freq, timefreq, rms = spectrogram(sound_wav, fs_mic, spec_sample_rate, freq_spacing)
timefreq = timefreq[freq_index,:]
timefreq = zscore(timefreq, axis = None)
if timefreq.shape[1] < templ_timefreq.shape[1]:
zeropad = np.zeros((timefreq.shape[0],templ_timefreq.shape[1]-timefreq.shape[1]))
timefreq = np.append(timefreq, zeropad, axis = 1)
spect_corr = np.zeros(timefreq.shape[1])# - templ_timefreq.shape[1])
# sliding time
for time_win in range(timefreq.shape[1] - templ_timefreq.shape[1]): # no overlap before or after
# correlate relative freq band to each other
freq_corr = np.zeros(len(timefreq))
for freq_i in range(len(freq_index)):
in1 = np.abs(templ_timefreq[freq_i])
in2 = np.abs(timefreq[freq_i][time_win:time_win + len(in1)])
freq_corr[freq_i] = correlate(in1,in2, mode = 'valid')
spect_corr[time_win] = np.sum(freq_corr)
return spect_corr
# plot_spectrogram(t, freq, timefreq, dBNoise=80, colorbar = False)
################################################################################################################################
###############################################################################################################################
# Get the neural data
h5_filename = os.path.join(experiment_dir,
os.path.basename(experiment_file).replace(".rhd", "_neo.h5"))
h5_exporter = NeoHdf5IO(h5_filename)
block = h5_exporter.get("/Block_0")
# Get the sound database
stimulus_h5_filename = os.path.join(experiment_dir,
os.path.basename(experiment_file).replace(".rhd", "_stimuli.h5"))
sm = sound_manager.SoundManager(HDF5Store, stimulus_h5_filename)
# This is the entire microphone recording
mic = [asig for asig in block.segments[0].analogsignalarrays if asig.name == "Board ADC"][0]
fs_mic = np.int(mic.sampling_rate)
t_mic = np.asarray(mic.times)
mic = mic.squeeze()
if normalize_mic:
mic = mic - np.mean(mic) # because our mic channel often has 1.652 v of dc leak from somewhere!
vocal_band = lowpass_filter(mic, fs_mic, low_power)
vocal_band = highpass_filter(vocal_band, fs_mic, high_power)
vocal_band = np.abs(vocal_band)
vocal_band = lowpass_filter(vocal_band, fs_mic, vb_low)
sound_onset, sound_offset = find_vocal_periods(vocal_band, vb_stds, vd_stds, vd_win, fs_mic, onset_shift)
#==============================================================================
# for i in range(len(sound_onset)):
# t, freq, timefreq, rms = spectrogram(mic[sound_onset[i]:sound_offset[i]], fs_mic, 1000, 50)
# plot_spectrogram(t, freq, timefreq, dBNoise=80, colorbar = False)
# pause(.05)
#==============================================================================
# just a temporary template, ~ 1 motif, maybe even a playback but who cares
# TODO make a template finding function
i = 17
template = mic[sound_onset[i]:sound_offset[i]]
template = template[4800:18000] #shift it, particular to this template
templ_t, templ_freq, templ_timefreq, templ_rms = spectrogram(template, fs_mic, spec_sample_rate = 1000, freq_spacing = 50)
plot_spectrogram(templ_t, templ_freq, templ_timefreq, dBNoise=80, colorbar = False)
# Make xcorr of spectrogram - if i = 17 template is present in sound
# TODO this is the part that will have to loop through for every vocal chunk
# for now it is just a bout that has three motifs in it
t, freq, timefreq, rms = spectrogram(mic[sound_onset[i]:sound_offset[i]], fs_mic, spec_sample_rate = 1000, freq_spacing = 50)
# find the frequencies of interest
freq_index = [0]
iter = 0
for i in range(len(freq)):
if (freq[i] > low_freq_corr) & (freq[i] < high_freq_corr):
freq_index = np.append(freq_index, [i])
iter += iter
freq_index = freq_index[1:len(freq_index)]
# zscore specs first, and just do freqs of interest
# this is from earlier
templ_timefreq = templ_timefreq[freq_index,:]
templ_timefreq = zscore(templ_timefreq, axis = None)
#==============================================================================
# timefreq = timefreq[freq_index,:]
# timefreq = zscore(timefreq, axis = None)
#
# del timefreq
#==============================================================================
# loop through and do crosscos on specs, freq band by freq band, time point by time point
#corr_list = list()
corr_list = []
peak_list = []
for i in range(len(sound_onset)):
sound_wav = mic[sound_onset[i]:sound_offset[i]]
spect_corr = get_spect_corr(sound_wav, templ_timefreq, freq_index, fs_mic)
corr_list.append(spect_corr)
#corr_list = np.append(corr_list, spect_corr)
peaks = argrelextrema(spect_corr, np.greater, order = 100) # seems to work ok
peak_list = np.append(peak_list, peaks)
##################################################################################################
#==============================================================================
#
# # just to check things look ok
# figure(1)
# plot_spectrogram(t, freq, timefreq, dBNoise=80, colorbar = False)
# figure(2)
# plot_spectrogram(t, freq, templ_timefreq, dBNoise=80, colorbar = False)
# figure(3)
# plot(spect_corr)
#==============================================================================
# TODO Next is to iterate through vocal_density and check spectrograms, but I'm running out of time for now
# the idea will be to calculate xcorrs of both amplitude waveform and the spectrogram (row by row). Both will be zscored.
# this should give us
#for i in range(vocal_density.shape[0]):
# if vocal_density[i] > density_thresh:
# t, freq, timefreq, rms = spectrogram(mic[i*(window-overlap):i*(window-overlap)+window], fs_mic, 1000, 50)
# plot_spectrogram(t, freq, timefreq, dBNoise=80)
# xcorr for envelope and spectrogram, then make it slide, zscore, correlate
plot(vocal_density[0:200])
i = 0
t, freq, timefreq, rms = spectrogram(mic[i*(window-overlap):50*(window-overlap)+window], fs_mic, 1000, 50)
plot_spectrogram(t, freq, timefreq, dBNoise=80)
i = 75
t, freq, timefreq, rms = spectrogram(mic[i*(window-overlap):(i+25)*(window-overlap)+window], fs_mic, 1000, 50)
plot_spectrogram(t, freq, timefreq, dBNoise=80)
t, freq, timefreq, rms = spectrogram(template, fs_mic, 1000, 50)
plot_spectrogram(t, freq, timefreq, dBNoise=80)
# mic[36600000:36750000] has a song
# t, freq, timefreq, rms = spectrogram(mic[36600000:36750000], fs_mic, 1000, 50)
# plot_spectrogram(t, freq, timefreq, dBNoise=80)