def make(fo): print('making artifacts, cnn bad epoch, xml files per block') cnn_model_name = 'cnn_5X5_2048_1' for i in range(1, 49): p = e.Participant(i, fid2ort=fo) p.add_all_sessions() for s in p.sessions: for b in s.blocks: if not os.path.isfile(path.eeg100hz + windower.make_name(b) + '.npy'): continue if b.start_marker_missing or b.end_marker_missing: d = load_100hz_numpy_block(windower.make_name(b)) w = windower.Windower(b, nsamples=d.shape[1], sf=100) w = windower.Windower(b, sf=100) if not os.path.isfile(path.artifact_data_all_pp + w.name + '_pred.npy'): print(path.artifact_data_all_pp + w.name + '_pred.npy', 'no prediction file present') continue a = xml_cnn.xml_cnn(w) a.make_bad_epoch() a.bad_epochs2xml() a.write_bad_epoch_xml() print(w.name)
def handle_save_xml(self,force_save = False): if not self.annotation_type == 'channel_corrector': if self.annotation_type == 'corrector': filename = self.save_dir_artifact + self.coder + '_' + windower.make_name(self.b) + '.xml' elif self.save_dir_artifact != '': filename = path.data + self.save_dir_artifact + self.filename.split('/')[-1] else: filename = self.filename save_ok = False for be in self.bad_epochs: if be.ok: save_ok = True if save_ok and (time.time() - self.last_save > 60 or force_save == True): print('saving:',filename) print('nbad epochs:',len(self.bad_epochs)) self.last_save = time.time() xml = xml_handler.xml_handler(bad_epochs=self.bad_epochs,filename=filename) xml.bad_epochs2xml(multiplier = 10) xml.write() save_ok = False if self.annotation_type == 'channel_corrector': filename = self.save_dir_ch + self.coder + '_' + windower.make_name(self.b) + '.xml' else: filename = self.filename_channels for bc in self.bad_channels: if bc.ok: save_ok = True if bc.correct != 'correct': bc.color == 'grey' bc.correct = 'incorrect' if save_ok and (time.time() - self.last_save > 60 or force_save == True): print('saving:',filename) print('nbad channels:',len(self.bad_channels)) self.last_save = time.time() xml = xml_handler.xml_handler(bad_channels =self.bad_channels,filename = filename) xml.bad_channels2xml(multiplier = 10) xml.write()
def find_cnn_xml_filename(self): if self.save_dir_artifact == '': self.save_dir_artifact = path.corrected_artifact_cnn_xml if self.enforce_coder == True: coder = self.coder else: coder = '*' filename = self.save_dir_artifact + coder + '_' + windower.make_name(self.b) + '.xml' fn = glob.glob(filename) if len(fn) != 1: print('did not find a unique filename',fn,filename) else: self.filename = fn[0] return 0 name = windower.make_name(self.b) + '.xml' fn = glob.glob(path.artifact_cnn_xml + '*' +name) if len(fn) != 1: raise ValueError('did not find a unique filename',fn,name) self.filename= fn[0]
def find_ch_cnn_xml_filename(self): print('search xml channel file in corrected dir...') if self.save_dir_ch == '': self.save_dir_ch= path.corrected_ch_cnn_xml filename = self.save_dir_ch + self.coder + '_' + windower.make_name(self.b) + '.xml' print('looking for file:',filename) if os.path.isfile(filename): self.filename_channels = filename return 0 print('not corrected yet, search in cnn output...') name = windower.make_name(self.b) + '.xml' fn = glob.glob(path.artifact_ch_cnn_xml + '*' +name) if len(fn) != 1: raise ValueError('did not find a unique filename',fn,name) self.filename_channels = fn[0] print(fn,self.filename_channels,'bla')
def make_matrix(fo, add_pp_info = False,normalize_data = True,save_directory =None,make_data= True): print('making artifact training np matrices per block, with default 1 sec and 99 perc overlap') if save_directory == None: save_directory = path.artifact_training_data print('save directory:',save_directory) if make_data: fout = open('nrows_per_block_v2','w') nrows = 0 for i in range(1,49): p = e.Participant(i,fid2ort = fo) p.add_all_sessions() for s in p.sessions: for b in s.blocks: if os.path.isfile(path.artifact_training_data+ windower.make_name(b) +'.npy'): # check whether windowed data is already present continue if not os.path.isfile(path.eeg100hz + windower.make_name(b) +'.npy'): # check whether downsampled data is present to load continue print(windower.make_name(b)) d = load_100hz_numpy_block(windower.make_name(b)) if b.start_marker_missing or b.end_marker_missing: w = windower.Windower(b,nsamples= d.shape[1], sf = 100,window_overlap_percentage = .99) else: w = windower.Windower(b,sf = 100,window_overlap_percentage = .99) if w.fn_annotation == 0: print('skipping:',w.name,'NO ANNOTATION') continue # if there is no annotation file skip print('processing:',w.name,w.fn_annotation) w.make_info_matrix(add_pp_info = add_pp_info) if make_data: d = remove_channels(d) d = windower.window_data(d,w.windows['sf100'],flatten=True,normalize= normalize_data) # d = unit_norm(d) # d = normalize_numpy_matrix(d) rows = d.shape[0] nrows += rows fout.write(w.name + '\t' + str(rows) + '\n') print (d.shape, w.info_matrix.shape[0]) assert d.shape[0] == w.info_matrix.shape[0] np.save(save_directory+ w.name + '_data',d) np.save(save_directory+ w.name + '_info',w.info_matrix) if make_data: fout.write('all_blocks\t'+str(nrows)+'\n') fout.close()
def make(fo): print('making event info xml files per block') nartifacts = 0 nclean = 0 fout = open('artifact_info.txt', 'w') for i in range(1, 49): p = e.Participant(i, fid2ort=fo) p.add_all_sessions() for s in p.sessions: for b in s.blocks: if not os.path.isfile(path.eeg100hz + windower.make_name(b) + '.npy'): continue if b.start_marker_missing or b.end_marker_missing: d = load_100hz_numpy_block(windower.make_name(b)) w = windower.Windower(b, nsamples=d.shape[1], sf=100) w = windower.Windower(b, sf=100) if not os.path.isfile(path.artifact_data_all_pp + w.name + '_pred.npy'): print(path.artifact_data_all_pp + w.name + '_pred.npy', 'no prediction file present') continue if b.exp_type == 'k': nepoch = 20 if b.exp_type == 'o': nepoch = 60 if b.exp_type == 'ifadv': nepoch = 80 ii_xml = xml_cnn.xml_cnn(w, select_nartifact=nepoch, select_nclean=nepoch, cnn_model_name='cnn_5X5_2048_1') ii_xml.make_index_info() ii_xml.make_selection() ii_xml.write() nartifacts += ii_xml.nartifact_indices nclean += ii_xml.nclean_indices print(w.name + '\t' + ii_xml.nclean + '\t' + ii_xml.nartifact + '\n') fout.write(w.name + '\t' + ii_xml.nclean + '\t' + ii_xml.nartifact + '\n') fout.write('all_blocks\t' + str(nclean) + '\t' + str(nartifacts) + '\n') fout.close()
def load_data(self): self.fn_data = windower.make_name(self.b) self.data = utils.load_100hz_numpy_block(self.fn_data) * 10**6 self.all_ch_names = utils.load_ch_names() self.remove_ch = ['VEOG', 'HEOG', 'TP10_RM', 'STI 014', 'LM', 'Fp2'] self.ch_names = [ ch for ch in self.all_ch_names if ch not in self.remove_ch ] self.ch_index = [self.all_ch_names.index(ch) for ch in self.ch_names] self.data = self.data[self.ch_index, :] self.ch_index = [self.ch_names.index(ch) for ch in self.ch_names]
def load_blinks(block): '''Load object to file with name == eeg file and extension .blinks in path.blinks folder.''' name = windower.make_name(block) fn = path.blinks + name + '.blinks' if not os.path.isfile(fn): print('File does not excist, please provide raw eeg data object', fn) return 0 else: print('loading blinks:', fn) fin = open(fn, 'rb') return pickle.load(fin)
def block2eegdata(self,b): '''Load the 100hz eeg data that corresponds to the block object and returns a windowed version of it, identical to the method used in make_artifact_matrix_v2.py.''' self.clean_up() self.d = load_100hz_numpy_block(windower.make_name(b)) if b.start_marker_missing or b.end_marker_missing: # the eeg data in d has a sf 100, windower expects an sf 1000, the sf parameter adjust the start and end times of snippets, therefore the nsamples needs to be multiplied by 10. w = windower.Windower(b,nsamples= self.d.shape[1] * 10, sf = 100,window_overlap_percentage = .99) else: w = windower.Windower(b,sf = 100,window_overlap_percentage = .99) self.d = remove_channels(self.d) self.d = windower.window_data(self.d,w.windows['sf100'],flatten=True,normalize= True)
def set_block_name(self,block,name): '''Sets name and loads corresponding block object.''' if name == None and block == None: print('Please provide name or block object.') return False if name == None: self.name = windower.make_name(block) self.block = block if block == None: self.block = utils.name2block(name,self.fo) self.name = name return True
def load_from_xml(self,filename = ''): if os.path.isfile(filename): self.filename = filename elif os.path.isfile(path.artifacts + filename): self.filename = path.artifacts + filename elif os.path.isfile(self.filename): pass elif os.path.isfile(self.filename_channels): pass elif self.show_cnn_pred: name = windower.make_name(self.b) fn = glob.glob(path.artifact_cnn_xml + '*' + name + '*') if len(fn) > 0: self.filename = fn[0] else: print('Auto generating filename based on block information.') self.filename = path.artifacts + self.coder + '_pp' + str(self.pp_id) + '_exp-' + self.exp_type + '_bid-' + str(self.bid) + '.xml' self.filename_channels = path.bad_channels+ self.coder + '_pp' + str(self.pp_id) + '_exp-' + self.exp_type + '_bid-' + str(self.bid) + '_channels.xml' if os.path.isfile(self.filename): print('loading bad epochs with filename:', self.filename) xml = xml_handler.xml_handler(filename = self.filename) self.bad_epochs = xml.xml2bad_epochs(multiplier = 0.1,remove_clean = True) for be in self.bad_epochs: if be.start == None or be.end == None: print(be) else: self.boundaries.append(be.start) self.boundaries.append(be.end) if self.filename != '' and self.filename_channels == '': self.filename_channels = self.filename.strip('.xml') + '_channels.xml' if not os.path.isfile(self.filename_channels): name= self.filename_channels.split('/')[-1] self.filename_channels = path.bad_channels + name print(self.filename_channels) if os.path.isfile(self.filename_channels): print('loading xml channels') xml = xml_handler.xml_handler(filename = self.filename_channels) temp= xml.xml2bad_channels(multiplier = 0.1) self.bad_channels= [bc for bc in temp if bc.channel not in self.remove_ch] if path.artifact_ch_cnn_xml in self.filename_channels: print('loading from cnn directory, setting bc to incorrect.') for bc in self.bad_channels: bc.correct = 'incorrect' for bc in self.bad_channels: # if bc.channel in self.remove_ch: continue if bc.annotation == 'all':self.complete_bad_channel.append(bc.channel) if bc.start == None or bc.end == None: print(bc) else: self.channel_boundaries.append(bc.start) self.channel_boundaries.append(bc.end) else: print('filename xml channels not found.',self.filename_channels)
def __init__( self, b=None, cnn_ch_model_name='rep-26_perc-20_fold-1_part-70_kernel-6_model7', save_dir=None, bad_channels=[], filename='', load_predictions=True, use_adjusted_prediction=False, minimal_clean_duration=2000, minimal_artifact_duration=1000): '''Writes artifact info generated with manual_artifact_coder to xml files b block object cnn_model.. name of the cnn model that generated the predictions save_dir directory to save data bad_chan... a list of bad_channel objects, can be empty filename xml filename, for loading or writing ''' self.b = b self.cnn_ch_model_name = cnn_ch_model_name self.ch_names = utils.load_selection_ch_names() self.use_adjusted_prediction = use_adjusted_prediction self.minimal_artifact_duration = minimal_artifact_duration self.minimal_clean_duration = minimal_clean_duration if save_dir == None: self.save_dir = path.artifact_ch_cnn_xml elif not os.path.isdir(save_dir): print('Could not locate:', save_dir, 'using default:', path.artifact_ch_cnn_xml) self.save_dir = path.artifact_ch_cnn_xml else: self.save_dir = save_dir if self.b != None: self.w = windower.Windower(b, sf=100) self.name = windower.make_name(self.b) self.filename = make_filename(self.w, self.cnn_ch_model_name) if load_predictions: self.load_predictions() else: self.loaded = False if self.loaded: self.set_indices() else: self.bad_channels = bad_channels self.filename = filename self.nclean_indices = 'NA' self.nartifact_indices = 'NA' # self.make_index_info() self.cnn_result = etree.Element('artifacts')
def __init__(self, b, pre=200, post=300, thres_value=60, min_dist=200, plot=False, marker='unk', remove_veog=True, force_create=False): '''Extract blink information from VEOG channel from raw mne object. b block object pre n sample before peak detection (positive) post n sample after peak detection (positive) thres_value threshold for peak detection in mu volts min_dist minimum distance between peaks plot whether to plot results block load blink info from file, raw file takes precedence (if raw provided file not loaded) force_cre.. force create new blink object, do not load from file ''' if force_create: blinks = 0 else: blinks = load_blinks(b) if blinks == 0: self.name = windower.make_name(b) result = self.extract_veog(b) if result == 0: self.nblinks = 'NA' return None print('detecting blinks with peak detection.') self.st_sample = b.st_sample self.n_samples = b.duration_sample self.pre = pre self.post = post self.thres_value = thres_value self.min_dist = min_dist self.marker = b.marker self.remove_veog = remove_veog if self.veog_loaded: self.find_peaks() self.save_blinks() else: self.__dict__.update(blinks.__dict__) if plot: self.plot()
def __init__(self, b=None, cnn_model_name='rep-3_perc-50_fold-2_part-90', save_dir=None, bad_epochs=[], filename='', load_predictions=True, use_corrected=True): '''Writes artifact info generated with manual_artifact_coder to xml files w windower object cnn_model.. name of the cnn model that generated the predictions save_dir directory to save data bad_epochs a list of bad_epoch objects, can be empty filename xml filename, for loading or writing use_cor... whether to use the xml that is based on automatic cnn annotation and manually corrected, if no such file exists it will revert to the auto file ''' self.b = b self.cnn_model_name = cnn_model_name if save_dir == None: self.save_dir = path.artifact_cnn_xml elif not os.path.isdir(save_dir): print('Could not locate:', save_dir, 'using default:', path.artifact_cnn_xml) self.save_dir = path.artifact_cnn_xml else: self.save_dir = save_dir if self.b != None: self.w = windower.Windower(b, sf=100) self.name = windower.make_name(self.b) self.filename = make_filename(self.w, self.cnn_model_name, use_corrected) if load_predictions: self.load_predictions() else: self.loaded = False if self.loaded: self.set_indices() else: self.bad_epochs = bad_epochs self.filename = filename self.nclean_indices = 'NA' self.nartifact_indices = 'NA' # self.make_index_info() self.cnn_result = etree.Element('artifacts')
def load_blinks(self, offset = 500): '''Load blink sample number as found with automatically classified blink model.''' try: st = self.st_sample self.blinks_text= open(path.blinks + windower.make_name(self)+ '_blink-model.classification').read() self.blink_peak_sample = np.array([int(line.split('\t')[2])-st for line in self.blinks_text.split('\n')]) self.nblinks = len(self.blink_peak_sample) self.blink_start = (self.blink_peak_sample - offset) / 1000 self.blink_end = (self.blink_peak_sample + offset) / 1000 self.blink_duration= self.blink_end - self.blink_start self.blink_start_sample = (self.blink_peak_sample - offset) self.blink_end_sample = (self.blink_peak_sample + offset) self.blink_duration_sample= self.blink_end - self.blink_start return True except: print('could not load blinks') self.blinks_text,self.blink_peak_sample,self.nblinks = 'NA','NA','NA' self.blink_start, self.blink_end = 'NA','NA' return False
def block2gt_mp(b, model_name='FIRST_TRY/rep-2_perc-50_fold-2_part-20', m=None, save=True, overwrite=False, identifier=''): '''Load the ground truth of a block (if there is one) and return predicted class and percentage np array. b block object corresponding to a block of eeg data of specific participant model... name of the cnn_model used to generate prediction m model object, if this is provided, model_name will not be used and the tf session will not be removed save whether to save the predictions overwrite whether to overwrite the existing prediction file ( if this is false and prediction files exist they will be loaded from disk ''' name = windower.make_name(b) gt_filename = path.snippet_annotation + name + '.gt_indices.npy' if os.path.isfile: print('Loading file with ground truth:', gt_filename) gt = np.load(gt_filename) else: print('File:', gt_filename, 'does not excist') gt = 'NA' predicted_name = path.snippet_annotation + identifier + m.filename_model.split( '/')[-1] + '_' + name if os.path.isfile(predicted_name + '_class.npy') and not overwrite: print('Loading prediction file:', predicted_name + '_class.npy', predicted_name + '_perc.npy') predicted_class = np.load(predicted_name + '_class.npy') predicted_perc = np.load(predicted_name + '_perc.npy') else: clean_up = False if m == None: data = cnn_data.cnn_data(fold=2) m = model_cnn.load_model(path.model + model_name, data) clean_up = True predicted_class, predicted_perc = m.predict_block(b, save=save) if clean_up: m.clean_up() return gt, predicted_class, predicted_perc
def predict_block(self, b, batch_size=500, identifier='', save=True): '''Use currently loaded model to classify epochs of block. The corresponding eeg data is loaded and windowed in the cnn_data object a percentage and class prediction is saved. batch_size number of epochs to classify in one go identifier string to be prepended to the filename ''' self.b = b self.data.block2eegdata(b) name = windower.make_name(self.b) print('testing block:', name) self.prediction_block_class = self.compute_prediction_class( self.data.d, batch_size) self.prediction_block_perc = self.prediction_perc output_name = path.snippet_annotation + identifier + self.filename_model.split( '/')[-1] + '_' + name if save: print('saving predictions to filename:', output_name) np.save(output_name + '_class', self.prediction_block_class) np.save(output_name + '_perc', self.prediction_block_perc) return self.prediction_block_class, self.prediction_block_perc
def make_xml_all_pp(fo, minimal_duration=2000, start_index=0, exp=None): xml = [] missing_data_blocks = [] if exp == None: exp = e.Experiment() exp.add_all_participants(fid2ort=fo) exp.add_all_sessions() for i, b in enumerate(exp.blocks[start_index:]): print(i, b.pp_id, b.exp_type, b.bid) n = windower.make_name(b) if not os.path.isfile(path.eeg100hz + n + '.npy'): print(path.eeg100hz + n + '.npy', 'file not found.') missing_data_blocks.append(b) continue x = xml_cnn(b) if x.loaded: x.make_bad_epochs(minimal_duration) x.bad_epochs2xml() x.write() xml.append(x) else: print('No xml file for:\n', b) return xml
def predict_block(self, b, batch_size=800, identifier='', save=True): '''Use currently loaded model to classify epochs of block. The corresponding eeg data is loaded and windowed in the cnn_data object a percentage and class prediction is saved. batch_size number of epochs to classify in one go identifier string to be prepended to the filename ''' self.b = b self.data.block2eegdata(b) name = windower.make_name(self.b) print('testing block:', name) data = self.data.d nrows = data.shape[0] data = np.reshape(data, (nrows, -1)) self.prediction_block_class = np.zeros((nrows, self.nchannels)) self.prediction_block_perc = np.zeros((nrows, self.nchannels)) for i in range(self.nchannels): print('original ch index:', i, 'transelated index:', self.chindex[i]) set_target_channel(data, [self.chindex[i]] * data.shape[0]) self.prediction_block_class[:, i] = self.compute_prediction_class( data, batch_size) self.prediction_block_perc[:, i] = self.prediction_perc[:, 1] # self.predicted[i*nrows:i*nrows+nrows]= self.compute_prediction_class(data,batch_size) # self.predicted_perc[i*nrows:i*nrows+nrows,i]= self.prediction_perc[:,1] output_name = path.channel_snippet_annotation + identifier + self.filename_model.split( '/')[-1] + '_' + name if save: print('saving predictions to filename:', output_name) np.save(output_name + '_class', self.prediction_block_class) np.save(output_name + '_perc', self.prediction_block_perc) return self.prediction_block_class, self.prediction_block_perc
def all_name(self): self.names = [] for b in self.blocks: self.names.append(windower.make_name(b))
def set_filename(self): if self.b: self.name = windower.make_name(self.b) if self.s: self.name = self.s.name self.filename = self.name + self.identifier + '-eog.xml'
def load_eeg(self): self.ch_names = utils.load_ch_names() self.block_name = windower.make_name(self.b) self.data = utils.load_100hz_numpy_block(self.block_name) * 10**6 self.remove_channels(self.remove_ch)
def make_matrix(fo, add_pp_info=False, normalize_data=True, save_directory=None, make_data=True, start_pp=1, overwrite=False): print( 'making artifact training np matrices per block, with default 1 sec and 99 perc overlap' ) if save_directory == None: save_directory = path.channel_artifact_training_data print('save directory:', save_directory) if make_data: fout = open(path.bak + 'channel_nrows_per_block-p', 'w') nrows = 0 for i in range(start_pp, 49): p = e.Participant(i, fid2ort=fo) p.add_all_sessions() for s in p.sessions: for b in s.blocks: if os.path.isfile(path.channel_artifact_training_data + windower.make_name(b) + '_data.npy') and not overwrite: # check whether windowed data is already present continue if not os.path.isfile(path.eeg100hz + windower.make_name(b) + '.npy'): # check whether downsampled data is present to load continue print(windower.make_name(b)) d = load_100hz_numpy_block(windower.make_name(b)) if b.start_marker_missing or b.end_marker_missing: w = windower.Windower(b, nsamples=d.shape[1] * 10, sf=100, window_overlap_percentage=.99) else: w = windower.Windower(b, sf=100, window_overlap_percentage=.99) f = windower.block2channel_fn_annotation( w.b, path.channel_artifacts_clean) if f == 0: print('skipping:', w.name, 'NO ANNOTATION') continue # if there is no annotation file skip print('processing:', w.name, w.fn_annotation) w.make_channel_ca_info_matrix(add_pp_info=add_pp_info) if make_data: d = remove_channels(d) d = windower.window_data(d, w.windows['sf100'], flatten=True, normalize=normalize_data, cut_off=300) rows = d.shape[0] nrows += rows fout.write(w.name + '\t' + str(rows) + '\n') print(d.shape, w.info_matrix.shape[0]) assert d.shape[0] == w.info_matrix.shape[0] # d = insert_target_channel_rows(d,nchannels=26,kernel_size=6) # IMPORTANT: this was not commented when I checked the file # however resulting files look like it was commented during execution # insert target channel is done later in pipeline # not sure what happened np.save(save_directory + w.name + '_data', d) np.save(save_directory + w.name + '_info', w.info_matrix) if make_data: fout.write(path.bak + 'all_blocks\t' + str(nrows) + '\n') fout.close()
def handle_note(self): # plt.close(self.fig) n = notes.note(windower.make_name(self.b),annotation_type='channels') n.edit() self.handle_plot(force_redraw=True)