-
Notifications
You must be signed in to change notification settings - Fork 0
/
OpenData.py
267 lines (203 loc) · 11.2 KB
/
OpenData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# -*- coding: utf-8 -*-
"""
Call loadEvents() to get the final processed pandas data structure.
Script to open EEG data and relevant info, previously exported to csv using MATLAB
(pymatbridge too slow, no matlab.engine in currently owned matlab version <2014)
This script creates the events data, first by calling loadEvents then Process events. It will also create
the reaction times.
Actually it was used to save the original .csv (from mat) to HDF,
and also organize the events, into full trials in different conditions.
After HDF was made data load quickly, so no separate openinig script is needed.
"""
#########
import glob
import pandas as pd
import scipy.io as sio
import numpy as np
#import csv
#import itertools
#import h5py
from pandas import HDFStore
import obspy.signal.filter as filters
#GLOBALS
eeg_path = '/Users/ryszardcetnarski/Desktop/Nencki/TD/Converted_data/signals/';
eeg_names = sorted(glob.glob(eeg_path+'*'))
events_path = '/Users/ryszardcetnarski/Desktop/Nencki/TD/Converted_data/events/';
events_names = sorted(glob.glob(events_path +'*'))
#switch between before and after to load respective datasets
bef_aft_switch = 'after'
#IMPORTANT: .mat files for events are saved with the _TD_EVENTS.mat ending, while the signal is ony _TD.mat
bef_aft_dict = {'before_mat':'1_TD_EVENTS.mat',
'after_mat':'2_TD_EVENTS.mat',
'before_hdf':'td_before_database.hdf5',
'after_hdf':'td_after_database.hdf5'
}
#########
def storeEEGinHDF():
"""Load EEG from 64 electrodes x ~30 min at 500 hz (large dataset)"""
h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
all_eeg_names= sorted([name for name in eeg_names if bef_aft_dict[bef_aft_switch + '_mat'].replace("_EVENTS", "") in name])
store = HDFStore(h_path +bef_aft_dict[bef_aft_switch + '_hdf'])
#Create a HDF database with a single-precision point (float 32)
cnt = 0
for recording in all_eeg_names:
cnt = cnt + 1
sname = recording.rfind("/") +1
subId = recording[sname:-4].replace("-", "_")
sig = pd.DataFrame(sio.loadmat(recording,struct_as_record=True)['eegToSave']).transpose()
#Modified here to save a filtered version from: store[subId + "/signal/f"] = sig.convert_objects())
store[subId + "/signal/filtered_30/"] = sig.convert_objects().apply(FilterData, axis = 0)
print(cnt)
store.close()
def SAVE_ChangeDictOrder(_processedEvents):
'''Change the nesting order for the final HDF database - insted of correct/attention, it will go attention/present/correct etc'''
h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
#Replace the '_EVENTS' because the path n HDF must match exactly, otherwise it was not savivng anything, weirdo
all_event_names = sorted([name.replace('_EVENTS', '') for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name])
store = HDFStore(h_path +bef_aft_dict[bef_aft_switch+ '_hdf'])
for _data, recording in zip(_processedEvents, all_event_names):
print('I')
sname = recording.rfind("/") +1
subId = recording[sname:-4].replace("-", "_")
store[subId + '/events/attention/correct'] = _data['correct']['attention'].convert_objects()
store[subId + '/events/motor/correct'] = _data['correct']['motor'].convert_objects()
store[subId + '/events/attention/incorrect'] = _data['incorrect']['attention'].convert_objects()
store[subId + '/events/motor/incorrect'] = _data['incorrect']['motor'].convert_objects()
#print(_data['incorrect']['motor'].convert_objects())
store.close()
def FilterData(channel):
b_pass = filters.bandpass(channel, freqmin = 2, freqmax = 30, df = 500)
# b_stop =filters.bandstop(b_pass, freqmin = 49 ,freqmax = 51, df = 500)
return b_pass
def MakeDict():
"""Creates a dict to remap the different markers labels (from different experiment iterations) into a unified description"""
translate = pd.read_excel('/Users/ryszardcetnarski/Desktop/Nencki/TD/Info/Port_codes_RC.xlsx')
translate_dict_A = dict(zip(translate['EEG_prof_Grabowskiej'].values,translate['Top down'].values))
translate_dict_B = dict(zip(translate['EEG_prof_Szelag'].values,translate['Top down'].values))
#Join two dicts
merged = translate_dict_A.copy()
merged.update(translate_dict_B)
return merged
def ProcessEvents(events):
"""Clear events from boundaries, dvde them between conditions and accuracies.
events need to exist as a global variable"""
#events = LoadEvents()
cue_dict = {'Cue Att R': 'Target Att R',
'Cue Att L': 'Target Att L',
'Cue Mot R': 'Target Mot R',
'Cue Mot L': 'Target Mot L',
}
acc_dict = {'Target Att R': 'Button 1 R',
'Target Att L': 'Button 2 L',
'Target Mot R': 'Button 1 R',
'Target Mot L': 'Button 2 L',
}
processedEvents = []
#Now the shifted columns is moved one index backwards, thus shwing in the same row what happened after the original column
for _events in events:
#Make a copy of the event type column for a later groupby, irrespective of L or R position of stim
_events['type_nodir'] = _events['type'].map(lambda x: x[0: x.rfind(" ")] if 'Button' not in x else 'Button')
#Create boolean columns, to find consecutive events based on certain conditions (target after cue, with no boundary in the middle etc)
_events['completeTrial'] = np.vectorize(mapVals)(_events['type'], _events['type'].shift(-1), cue_dict)
_events['accurateTrial'] = np.vectorize(mapVals)(_events['type'], _events['type'].shift(-1), acc_dict)
#Apart from finding the complete trials without boundaries also divide them between correct and incorrect trials
_events['completeAndAccurateTrial'] = _events['completeTrial'] & _events['accurateTrial'].shift(-1)
_events['completeAndMissTrial'] = False
idx = np.array(_events[_events['completeAndAccurateTrial'] == True].index.tolist())
idx = idx +1
_events['completeAndAccurateTrial'].iloc[idx] = True
idx = idx +1
_events['completeAndAccurateTrial'].iloc[idx] = True
#Look for cases where the trial was complete (no boundary between cue - target - response), but the response was incorrect
for i in range(0, len(_events) -2):
if((_events['completeTrial'].iloc[i] == True) &
(_events['accurateTrial'].iloc[i+1] == False) &
('Button' in _events['type'].iloc[i+2])):
_events['completeAndMissTrial'].iloc[i:i+3] = True
#In type store only present (R), absent (L) info
_events['stim'] = _events['type'].map(lambda x: 'absent' if x[-1] == 'L' else 'present')
correct_complete = GroupTrials(_events.ix[_events['completeAndAccurateTrial']==True])
incorrect_complete = GroupTrials(_events.ix[_events['completeAndMissTrial']==True])
processedEvents.append({'correct': correct_complete, 'incorrect' : incorrect_complete})
return processedEvents
#def mySave(_data,DataName, subjectID):
# h_path = "/Users/ryszardcetnarski/Desktop/Nencki/TD/HDF/"
#
# store = HDFStore(h_path +"eeg_database2.hdf5")
#
# store[subjectID+ "/events/" + DataName] = sig
#
# hdf.put(subjectID+ "/events/" + DataName, _data.convert_objects(), format='table', data_columns=True)
# hdf.close()
#
def GroupTrials(df):
"""Split the consecutive rows cue-target-response rows into 3 columns df.pivot() (unused argument 'index = ')
further split them between the attention and motor conditon
keep only time, left/right, and event name (cue, target, response) columns"""
#Create a vector of repeating numbers [1,1,1,2,2,2,3,3,3]. This is used as an index in multiindex pivot to identify rows that belong to the same trial
if(len(df['type']) % 3 == 0 ):
threes = [[i]*3 for i in range(0, int(len(df) / 3) )]
continous = [item for sublist in threes for item in sublist]
else:
#%3 Must be 0, otherwise there is something missing
print("EVENTS MISSING !!!???")
#Reindex with the previously generated vector of repeating numbers
df['newIndex'] = continous
df = df[['latency','type_nodir', 'newIndex', 'stim' ]]
#Do not specofy values to create multiindex, the only way to represent the latency and left right simoultaneously
#After Pivot new columns will be created
df = df.pivot(index = 'newIndex', columns='type_nodir')
#Flattent the nested names: from latency/cue to latency cue
df.columns = [' '.join(col).strip() for col in df.columns.values]
#Create empty ones in case there were no trials of such type (for example missed motor)
att = pd.DataFrame()
mot = pd.DataFrame()
if 'latency Cue Att' in df:
att = RenameColumns(df.loc[df['latency Cue Att'].notnull()].dropna(axis=1,how='all'))
if 'latency Cue Mot' in df:
mot = RenameColumns(df.loc[df['latency Cue Mot'].notnull()].dropna(axis=1,how='all'))
att_mot = {'attention': att, 'motor' : mot}
return att_mot
def RenameColumns(df):
'''Renames and removes unwanted columns'''
df = df.ix[:, 0:4]
#print(df.columns)
df.columns = ['Button', 'Cue', 'Target', 'Stim_present']
df = df[['Cue', 'Target', 'Button', 'Stim_present']]
df['RT'] = df['Button'] - df['Target']
d = {'present': 1.0, 'absent': 0.0}
df['Stim_present'] = df['Stim_present'].map(d)
return df
def mapVals(curr_event, next_event, myDict):
"""used to compare two strings, if they map in a dictionary, basically if row == row+1,
where the strings are not literally match, that's why the dict.
The row+1 is done with a df.shift(-1) before this function"""
return myDict.get(curr_event) == next_event
def LoadEvents():
"""Load the data saved in matlab format, extracted before with matlab from eeg struct (the event field) of eeglab """
mergedDict = MakeDict()
mat_events = sorted([name for name in events_names if bef_aft_dict[bef_aft_switch + '_mat'] in name])
allEvents = []
# with h5py.File(h_path +"eeg_database.hdf5", 'w') as eeg_database:
for recording in mat_events:
mat_struct = sio.loadmat(recording,struct_as_record=True)['event']
#Unpack the event times
latency = np.array(mat_struct[0,:]['latency']).astype('float')
#Unpack the event names
stype = np.array([name[0][:] for name in mat_struct[0,:]['type']], dtype = object)
names = ['latency', 'type']
#Create the pandas dataframe to store the data
database = pd.DataFrame(data = [latency,stype]).T
database.columns = names
allEvents.append(database.replace({"type": mergedDict}))
#Organize in columns cue, target, button from previous event by row organization
allEvents = ProcessEvents(allEvents)
return allEvents
def LoadChannels(subID):
"""Load the data saved in matlab format, extracted before with matlab from eeg struct (the event field) of eeglab """
mat_events = sorted([name for name in events_names if '1_TD_ELECTRODES' in name])
mat_struct = sio.loadmat(mat_events[subID])
n_channels = len(mat_struct['chanlocs'][0,:])
chanlocs = [mat_struct['chanlocs'][0,i][0][0] for i in range(0,n_channels)]
return chanlocs
#[#x[0][0][0]for x in mat_struct]