def chunk_file_iterator(chunk_samples, fs, seiz_file): n = 0 try: while True: data, _, labels = edfread( seiz_file, rec_times=(n * chunk_samples / float(fs), (n + 1) * chunk_samples / float(fs))) #data_is_full = np.size(data,0)>=chunk_samples start_time = n * chunk_samples / float(fs) yield data, start_time, n, labels n += 1 except ValueError: pass
def chunk_file_iterator(chunk_samples,fs,seiz_file, good_channels = None): """ iterates over chunks of data. Useful for long (longer than 5 minutes) edf files, saves memory. """ n = 0 try: while True: data,_,labels = edfread(seiz_file, rec_times=(n*chunk_samples/float(fs),(n+1)*chunk_samples/float(fs)), good_channels=good_channels) #data_is_full = np.size(data,0)>=chunk_samples start_time = n*chunk_samples/float(fs) yield data, start_time,n, labels n+=1 except ValueError: pass
def parallel_coherence(file_path, win_len, win_overlap, fs, nprocs, save_path): # these are hard-coded parameters to # chunk up data into 10 min chunks min_per_chunk = 10 sec_per_min = 60 i = 0 while True: # get chunk start and end times start = i * sec_per_min * min_per_chunk end = (i+1) * sec_per_min * min_per_chunk + float(win_overlap) / fs # get the chunk try: # extract the chunk print 'Extracting the ' + str(i) + ' chunk' X_chunk, _, labels = edfread(file_path, rec_times = [start, end]) print 'Printing the labels:' for i in range(len(labels)): print '\tChannel ' + str(i) + ' is ' + labels[i] # compute coherence for this chunk and save json file this_save_path = save_path + ".json" mp_coherence(X_chunk, win_len, win_overlap, fs, nprocs, this_save_path) # if less than an entire chunk was read, then this is the last one! if X_chunk.shape[0] < sec_per_min * min_per_chunk: break except ValueError: break # the start was past the end! return
def analyze_patient(data_path, save_path, patient_id, res_f, window_length=1.0, window_overlap=0.5, num_windows=3000, f_s=1e3, include_awake=True, include_asleep=False): # reformat window length and overlap as indices window_length = int(window_length * f_s) window_overlap = int(window_overlap * f_s) # create save path if not os.path.isdir(save_path): os.makedirs(save_path) # specify data paths print 'Specifying file paths' if not os.path.isdir(data_path): sys.exit('Error: Specified data path does not exist') p_file = os.path.join(data_path, 'patient_pickle.txt') with open(p_file,'r') as pickle_file: patient_info = pickle.load(pickle_file) # add data file names data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] con_type = ['ictal'] * len(data_filenames) if include_awake: data_filenames += patient_info['awake_inter_filenames'] seizure_times += [None] * len(patient_info['awake_inter_filenames']) con_type += ['awake'] * len(patient_info['awake_inter_filenames']) if include_asleep: data_filenames += patient_info['asleep_inter_filenames'] seizure_times += [None] * len(patient_info['asleep_inter_filenames']) con_type += ['sleep'] * len(patient_info['asleep_inter_filenames']) data_filenames = [os.path.join(data_path,filename) for filename in data_filenames] num_files = len(data_filenames) # get data in numpy array print 'Reading data from edf files to numpy array' all_data = [] num_channels = [] i = 1 for seizure_file in data_filenames: print '\tReading ' + str(i) + ' of ' + str(num_files) i += 1 X,_,_ = edfread(seizure_file) num_channels.append(X.shape[1]) all_data.append(X) if len(set(num_channels)) == 1: num_channels = num_channels[0] gt1 = num_channels > 1 print 'There ' + 'is '*(not gt1) + 'are '*gt1 + str(num_channels) + ' channel' + 's'*gt1 else: print 'Channels: ' + str(num_channels) sys.exit('Error: There are different numbers of channels being used for different seizure files...') # get the number of parameters (3 energy statistics per channel) p_feat = 3 * num_channels # pre-process data -- filter parameters print 'Applying a band-pass filter to the data' band = np.array([0.1,100.]) band_norm = band / (f_s / 2.) # normalize the band filt_order = 3 # band pass filter the data b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter for j in range(num_files): all_data[j] = signal.filtfilt(b,a,all_data[j],axis=0) # filter the data # run leave-one-out cross validation testing sensitivity, latency, FP, time = loocv_testing(all_data, con_type, window_length, window_overlap, num_windows, f_s, seizure_times, p_feat, save_path) # get mean statistics m_sense = np.nanmean(sensitivity) m_latency = np.nanmean(latency) m_fpr = np.nansum(FP) / np.nansum(time) # print to results file print >> res_f, '\nPatient ' + patient_id + '\n=========================' # print the results -- aggregates and total print >> res_f, 'Mean Sensitivity: \t%.2f' %(m_sense) print >> res_f, 'Mean Latency: \t%.4f' %(m_latency) print >> res_f, 'False Positive Rate: \t%.5f (fp/Hr) \n' % m_fpr print >> res_f, 'Sensitivity: ' + str(sensitivity) print >> res_f, 'Latency: ' + str(latency) print >> res_f, 'False Positive Rate: ' + str(FP / time) return sensitivity, latency, m_fpr
while True: # get chunk start and end times start = desired_chunk_index * sec_per_min * min_per_chunk end = (desired_chunk_index + 1) * sec_per_min * min_per_chunk print '\tStart time in seconds:', start print '\tEnd time in seconds:', end try: # extract the chunk print '\t\t\tChunk ' + str(ind) + ' reading...\n', # Gotta get the data_filename right! dimensions_to_keep = choose_best_channels( patient_id, seizure=0, filename=data_filename_path) X_chunk, _, labels = edfread(data_filename_path, rec_times=[start, end], good_channels=dimensions_to_keep) # Added: if the readed chunk is too short, break. if X_chunk.shape[0] < 300 * f_s: print "Chunk is too short! Pick different chunk to analyze!" break # update file information all_files.append(X_chunk) tmp_data_filenames = data_filename_path tmp_file_type = file_type tmp_seizure_times = seizure_times print '\t\t\tInterictal chunk %d reading complete!' % (ind) break except ValueError:
def analyze_patient_raw(data_path, f_s=1e3, include_awake=True, include_asleep=False, long_interictal=False): # minutes per chunk (only for long interictal files) min_per_chunk = 15 sec_per_min = 60 # specify data paths if not os.path.isdir(data_path): sys.exit('Error: Specified data path does not exist') p_file = os.path.join(data_path, 'patient_pickle.txt') with open(p_file, 'r') as pickle_file: print("Open Pickle: {}".format(p_file) + "...\n") patient_info = pickle.load(pickle_file) # # add data file names data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] file_type = ['ictal'] * len(data_filenames) seizure_print = [True] * len(data_filenames) # mark whether is seizure if include_awake: data_filenames += patient_info['awake_inter_filenames'] seizure_times += [None] * len(patient_info['awake_inter_filenames']) file_type += ['awake'] * len(patient_info['awake_inter_filenames']) seizure_print += [False] * len(patient_info['awake_inter_filenames']) if include_asleep: data_filenames += patient_info['asleep_inter_filenames'] seizure_times += [None] * len(patient_info['asleep_inter_filenames']) file_type += ['sleep'] * len(patient_info['asleep_inter_filenames']) seizure_print += [False] * len(patient_info['asleep_inter_filenames']) data_filenames = [ os.path.join(data_path, filename) for filename in data_filenames ] good_channels = patient_info['good_channels'] # band pass filter parameters # band = np.array([0.1, 100.]) # band_norm = band / (f_s / 2.) # normalize the band # filt_order = 3 # b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter # get data in numpy array num_channels = [] all_files = [] all_files_unfiltered = [] tmp_data_filenames = [] tmp_file_type = [] tmp_seizure_times = [] tmp_seizure_print = [] print 'Getting Data...' for i, seizure_file in enumerate(data_filenames): # this is for when we have inter-ictal files that are an hour long that has split it up into parts if long_interictal and not (file_type[i] is 'ictal'): print '\tThis code has not been written' else: print '\tSeizure file %d reading...' % (i + 1), # read data in X, _, labels = edfread(seizure_file) all_files_unfiltered.append(X) n, p = X.shape num_channels.append(p) # good_channels_ind = [] # labels = list(labels) # for channel in good_channels: # good_channels_ind.append(labels.index(channel)) # # filter data # print 'filtering...', # X = signal.filtfilt(b, a, X, axis=0) # filter the data all_files.append(X) # add raw data to files # update temporary stuff data_filenames = update_list(data_filenames, tmp_data_filenames) file_type = update_list(file_type, tmp_file_type) seizure_times = update_list(seizure_times, tmp_seizure_times) seizure_print = update_list(seizure_print, tmp_seizure_print) # double check that the number of channels matches across data if len(set(num_channels)) == 1: num_channels = num_channels[0] gt1 = num_channels > 1 print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str( num_channels) + ' channel' + 's' * gt1 + "\n" else: print 'Channels: ' + str(num_channels) print 'There are inconsistent number of channels in the raw edf data' sys.exit( 'Error: There are different numbers of channels being used for different seizure files...' ) # double check that no NaN values appear in the features for X, i in enumerate(all_files): if np.any(np.isnan(X)): print 'There are NaN in raw data of file', i sys.exit('Error: Uh-oh, NaN encountered while extracting features') return all_files, data_filenames, file_type, seizure_times, seizure_print
def analyze_patient(patient_id, data_path, save_path, log_file, parameters, folds, win_len=1.0, win_overlap=0.5, num_windows=1000, f_s=1e3,include_awake=True, include_asleep=False, long_interictal=False): # minutes per chunk (only for long interictal files) min_per_chunk = 15 sec_per_min = 60 # reformat window length and overlap as indices win_len = int(win_len * f_s) win_overlap = int(win_overlap * f_s) # create save path if not os.path.isdir(save_path): os.makedirs(save_path) # specify data paths if not os.path.isdir(data_path): sys.exit('Error: Specified data path does not exist') p_file = os.path.join(data_path, 'patient_pickle.txt') with open(p_file,'r') as pickle_file: print("Open Pickle: {}".format(p_file)+"...\n") patient_info = pickle.load(pickle_file) # add data file names data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] file_type = ['ictal'] * len(data_filenames) seizure_print = [True] * len(data_filenames) # mark whether is seizure if include_awake: data_filenames += patient_info['awake_inter_filenames'] seizure_times += [None] * len(patient_info['awake_inter_filenames']) file_type += ['awake'] * len(patient_info['awake_inter_filenames']) seizure_print += [False] * len(patient_info['awake_inter_filenames']) if include_asleep: data_filenames += patient_info['asleep_inter_filenames'] seizure_times += [None] * len(patient_info['asleep_inter_filenames']) file_type += ['sleep'] * len(patient_info['asleep_inter_filenames']) seizure_print += [False] * len(patient_info['asleep_inter_filenames']) data_filenames = [os.path.join(data_path,filename) for filename in data_filenames] good_channels = patient_info['good_channels'] # band pass filter parameters band = np.array([0.1,100.]) band_norm = band / (f_s / 2.) # normalize the band filt_order = 3 b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter # get data in numpy array num_channels = [] all_files = [] tmp_data_filenames = [] tmp_file_type = [] tmp_seizure_times = [] tmp_seizure_print = [] print 'Getting Data...' for i, seizure_file in enumerate(data_filenames): # check that we haven't already gotten energy statistics for this seizure file file_begin = os.path.join(save_path, os.path.splitext(os.path.basename(data_filenames[i]))[0]) # this is for when we have inter-ictal files that are an hour long that has split it up into parts if long_interictal and not (file_type[i] is 'ictal'): # get all files in the save path all_files_in_dir = [os.path.join(save_path,fn) for fn in next(os.walk(save_path))[2]] # all_files_in_dir = os.listdir(save_path) # if the energy stats exist for this file(.es represents energy statistics) inter_ictal_files = [s for s in all_files_in_dir if s.startswith(file_begin) and s.endswith(".es")] # if inter ictal (energy) files found, read them if inter_ictal_files: tmp_data_filenames.append( (i, inter_ictal_files)) tmp_file_type.append( (i, [file_type[i]] * len(inter_ictal_files)) ) tmp_seizure_times.append( (i, [seizure_times[i]] * len(inter_ictal_files)) ) tmp_seizure_print.append( (i, [seizure_print[i]] * len(inter_ictal_files)) ) # read each of the interictal files print "\tSeizure file %d is long --reading energy statistics directly..." %(i+1), for j, file_name in enumerate(inter_ictal_files): print "%d" %(j+1), X_feat, p = read_energy_file(file_name) all_files.append(X_feat) # feature vector X from all the inter_ictal_files saved to all_files num_channels.append(p) print " " else: # read the file print '\tSeizure file %d is long...' %(i+1) j = 0 while True: # get chunk start and end times start = j * sec_per_min * min_per_chunk end = (j+1) * sec_per_min * min_per_chunk # get the chunk try: # extract the chunk print '\t\tChunk ' + str(j+1) + ' reading...', X_chunk, _, labels = edfread(seizure_file, rec_times = [start, end]) n,p = X_chunk.shape num_channels.append(p) good_channels_ind = [] labels = list(labels) for channel in good_channels: good_channels_ind.append(labels.index(channel)) # compute feature vector print 'filtering...', X_chunk = signal.filtfilt(b,a,X_chunk,axis=0) # filter the data # get feature vectors from windows -- energy statistics print 'extracting features...', n_windows = n / (win_len - win_overlap) - 1 # evaluates to floor( n / (L - O ) - 1 since ints X_feat = np.empty((n_windows,3,p)) m = 0 for k in range(win_len, X_chunk.shape[0], win_len - win_overlap): window = X_chunk[(k-win_len):k,:] # select window f = energy_features(window) # extract energy statistics X_feat[m,:,:] = f m += 1 all_files.append(X_feat) # add feature to files # save energy statistics file print 'saving...' es_file = file_begin + "_%d.es"%(j) write_energy_file(es_file, X_feat) # print to csv if this is a desired seizure file if seizure_print[i]: filtered_file = file_begin + '_%d_filtered.csv'%(j) energy_file = file_begin + '_%d_energystats.csv'%(j) create_filtered_csv(filtered_file, X_chunk, good_channels_ind) create_energy_csv(energy_file, X_feat[:,:,good_channels_ind[0]]) # update count j += 1 # if less than an entire chunk was read, then this is the last one! if X_chunk.shape[0] < sec_per_min * min_per_chunk: break except ValueError: print "no wait, that doesn't exist!" break # the start was past the end! # store temporary stuff tmp_data_filenames.append( (i, [os.path.join(save_path,file_begin + "_%d.es"%(k)) for k in range(j)]) ) tmp_file_type.append( (i, [file_type[i]] * j) ) tmp_seizure_times.append( (i, [seizure_times[i]] * j) ) tmp_seizure_print.append( (i, [seizure_print[i]] * j) ) else: es_file = file_begin + ".es" if os.path.isfile(es_file): print "\tSeizure file %d --reading energy statistics directly" %(i+1) X_feat, p = read_energy_file(es_file) all_files.append(X_feat) num_channels.append(p) else: print '\tSeizure file %d reading...' %(i+1), # read data in X,_,labels = edfread(seizure_file) n,p = X.shape num_channels.append(p) good_channels_ind = [] labels = list(labels) for channel in good_channels: print "channel",channel good_channels_ind.append(labels.index(channel)) # filter data print 'filtering...', X = signal.filtfilt(b,a,X,axis=0) # filter the data # get feature vectors from windows -- energy statistics print 'extracting features...' n_windows = n / (win_len - win_overlap) - 1 # evaluates to floor( n / (L - O ) - 1 since ints X_feat = np.empty((n_windows,3,p)) k = 0 for j in range(win_len, X.shape[0], win_len - win_overlap): window = X[(j-win_len):j,:] # select window f = energy_features(window) # extract energy statistics X_feat[k,:,:] = f k += 1 all_files.append(X_feat) # add feature to files # save energy statistics file write_energy_file(es_file, X_feat) # print to csv if this is a desired seizure file if seizure_print[i]: filtered_file = file_begin + '_filtered.csv' energy_file = file_begin + '_energystats.csv' create_filtered_csv(filtered_file, X, good_channels_ind) create_energy_csv(energy_file, X_feat[:,:,good_channels_ind[0]]) # update temporary stuff data_filenames = update_list(data_filenames, tmp_data_filenames) file_type = update_list(file_type, tmp_file_type) seizure_times = update_list(seizure_times, tmp_seizure_times) seizure_print = update_list(seizure_print, tmp_seizure_print) # double check that the number of channels matches across data if len(set(num_channels)) == 1: num_channels = num_channels[0] gt1 = num_channels > 1 print 'There ' + 'is '*(not gt1) + 'are '*gt1 + str(num_channels) + ' channel' + 's'*gt1+"\n" else: print 'Channels: ' + str(num_channels) sys.exit('Error: There are different numbers of channels being used for different seizure files...') # double check that no NaN values appear in the features for X in all_files: if np.any(np.isnan(X)): sys.exit('Error: Uh-oh, NaN encountered while extracting features') # leave one out cross validation, update log fitnesses=loocv_testing(all_files, data_filenames, file_type, seizure_times, seizure_print, win_len, win_overlap, num_windows, f_s, save_path,parameters, folds) #update_log(log_file, patient_id, sensitivity, latency, FP, time) return fitnesses
# # # asleep_file= "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS041/DA00101Q_1-1_02oct2010_03_00_05_Sleep+.edf" # # X_asleep, _,labels_asleep = edfread(asleep_file) # # print "how many channels for asleep file?",len(labels_asleep) # # awake_file= "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS041/DA00101P_1-1_02oct2010_09_00_38_Awake+.edf" # # X_awake, _,labels_awake = edfread(awake_file) # # print "how many channels for asleep file?",len(labels_awake) file = "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS039/CA00100D_1-1+.edf" X, _, labels = edfread(file) expected = "RAH3" print labels.index(expected) """Test Filters""" # # Filter a noisy signal. # T = 0.05 # nsamples = T * fs # t = np.linspace(0, T, nsamples, endpoint=False) # a = 0.02 # f0 = 600.0 # x = 0.1 * np.sin(2 * np.pi * 1.2 * np.sqrt(t)) # x += 0.01 * np.cos(2 * np.pi * 312 * t + 0.1) # x += a * np.cos(2 * np.pi * f0 * t + .11) # x += 0.03 * np.cos(2 * np.pi * 2000 * t) # plt.figure(2)
def classif_data_collect(seiz_filenames, seizure_times, inter_filenames, window_len, preictal_time, postictal_time, n_windows, sliding_window=False, window_overlap=.8, fs=1000., good_channels=None, bad_channels=None, rstat_bands=((1, 4), (5, 8), (9, 13), (14, 25), (25, 90), (100, 200)), rstat_win_len=20000, notch_filt=True, norm_whole_file=True, norm_window=False): ''' :param seiz_filenames: A list of the .edf filenames which contain epileptic data :param seizure_times: A list corresponding to the ictal filenames of: tuples containing seizure start and end times (in seconds), if the recording contains of seizure For instance, if my filename list looks like ['seizure_a.edf','seizure_b.edf'] Then my seizure_times list might be: [(123,135),(60,300)] :param inter_filenames: A list of .edf filenames of interictal data :param good_channels: A list of the channel names that should be observed for the :param window_len: The length of the windows that the program takes, in number of samples :param preictal_time: The amount of time, in seconds, before a seizure, for which a window will still be considered as preictal for training purposes :param postictal_time:The amount of time, in seconds, after a seizure that the period is considered postictal :param n_windows: The number of windows of each class we choose to extract :param sliding_window: Do you want the data to be a sliding window? :param window_overlap: The maximum allowable overlap (percentage) between two windows. :param fs: the sampling frequency of the sample :param bad_channels: if you want to read in all channels, except for a select few :param rstat_bands: An iterable of pairs of two elements, representing the start and stop of the bands of interest. Can be NoneType if no rstat is desired :param rstat_win_len: (int) The length of the random windows extracted in computing the r-statistic :param notch_filt: (bool) A boolean value determining whether a notch filter is to be applied to the data :param norm_whole_file: (bool) A boolean value that normalizes the whole file, not just the window. :param norm_window: (bool) A boolean value that chooses to normalize each window, not the whole file. :return: ''' def select_windows_from_interval(int_start, int_end, max_iter=10): ''' :param int_start: :param int_end: :param max_iter: :return: window_ends, a numpy array with indices at which the sample ends ''' window_ends = np.sort( np.random.randint(int_start + window_len, int_end, n_windows)) if n_windows == 1: return window_ends for i in range(max_iter): diffs = np.diff(window_ends) if all(diffs >= (window_len * window_overlap)): return window_ends #TODO: refine window logic window_ends = np.sort( np.random.randint(int_start + window_len, int_end, n_windows)) else: #manually select window ends window_ends = int_end - window_overlap * window_len * np.arange( n_windows) return window_ends def generate_windows_from_seizure(sliding_window=False): ''' Given seizure data, select random windows from the preictal,ictal,and postictal phases :return: ''' preictal_samples, postictal_samples = int(preictal_time * fs), int( postictal_time * fs) seiz_start, seiz_end = seize_times ictal_samples = int((seiz_end - seiz_start) * fs) max_iter = 4 if not sliding_window: if not window_overlap: if (n_windows*window_len) > \ (min(preictal_samples, postictal_samples,ictal_samples)): raise ValueError( 'Nonoverlapping windows not possible to produce') else: if (n_windows*window_len - (n_windows-1)*window_len*window_overlap)>\ min(preictal_samples,postictal_samples,ictal_samples): raise ValueError( 'Not possible to produce overlapping windows with certain max_proportion' ) seiz_start = int(seiz_start * fs) seiz_end = int(seiz_end * fs) interval_starts = (seiz_start - preictal_samples, seiz_start, seiz_end) interval_ends = (seiz_start, seiz_end, np.size(seiz_data, axis=0)) labels = ['Preictal', 'Ictal', 'Postictal'] if sliding_window: end_times = range(window_len - 1, np.size(seiz_data, 0), int(window_len * (1 - window_overlap))) for end in end_times: start = end - window_len + 1 lab_index = interval_inclusion_index(start, end, interval_starts, interval_ends) if lab_index is None: label = 'None of the Above' else: label = labels[lab_index] window = preprocess(seiz_data[start:end + 1, :], normaliz=norm_window, notch_filt=False) data_container.append({ 'window': window, 'label': label, 'fold_lab': 'S{}'.format(seiz_count), 'time': end / fs }) else: for interval_start, interval_end, label in zip( interval_starts, interval_ends, labels): window_ends = select_windows_from_interval( interval_start, interval_end, max_iter) for end in window_ends: window = preprocess(seiz_data[end - window_len + 1:end + 1, :], normaliz=norm_window, notch_filt=False) data_container.append({ 'window': window, 'label': label, 'fold_lab': 'S{}'.format(seiz_count), 'time': end / fs }) return def generate_windows_from_nonseizure(sliding_window=False): label = 'Interictal' if sliding_window: end_times = range(window_len, np.size(seiz_data, 0), int(window_len * (1 - window_overlap))) for end in end_times: window = preprocess(seiz_data[end - window_len + 1:end + 1, :], normaliz=norm_window, notch_filt=False) data_container.append({ 'window': window, 'label': label, 'fold_lab': 'NS{}'.format(non_seiz_count), 'time': end / fs }) else: max_iter = 4 # maximum times to pick a random list for nonoverlapping windows end_times = select_windows_from_interval( 0, np.size(seiz_data, axis=0), max_iter) for end in end_times: window = preprocess(seiz_data[end - window_len + 1:end + 1, :], normaliz=norm_window, notch_filt=False) data_container.append({ 'window': window, 'label': label, 'fold_lab': 'NS{}'.format(non_seiz_count), 'time': end / fs }) return def preprocess(seiz_data, normaliz=False, notch_filt=True, rstat_band=False, causal=True): if normaliz: seiz_data = normalize(seiz_data) if notch_filt: seiz_data = notch(seiz_data, 56., 64., fs, mode=not causal) if rstat_band: seiz_data = rstat_processor.optimal_bandpass(seiz_data, mode=not causal) return seiz_data ###start main code of the function rstat_filt = False if rstat_bands is not None: rstat_processor = RstatPreprocessor(inter_filenames[0], seiz_filenames[0], seizure_times=seizure_times[0], fs=1000.) rstat_processor.prepare_rstat(rstat_bands, good_channels=good_channels, bad_channels=bad_channels, window_len=20000, mode=0) rstat_filt = True data_container = [] if not window_overlap: window_overlap = 0 for seiz_count, (seizure_file, seize_times) in enumerate( zip(seiz_filenames, seizure_times)): seiz_data, _, _ = edfread(seizure_file, good_channels=good_channels, bad_channels=bad_channels) seiz_data = preprocess(seiz_data, normaliz=norm_whole_file, notch_filt=notch_filt, rstat_band=rstat_filt) generate_windows_from_seizure(sliding_window) for non_seiz_count, nonseizure_file in enumerate(inter_filenames): seiz_data, _, _ = edfread(nonseizure_file, good_channels=good_channels, bad_channels=bad_channels) seiz_data = preprocess(seiz_data, normaliz=norm_whole_file, notch_filt=notch_filt, rstat_band=rstat_filt) generate_windows_from_nonseizure(sliding_window) return {'data': data_container, 'seize_times': seizure_times}
os.getcwd()))) # direct to EpilepsyVIP data_path = os.path.join(to_data, 'data') # direct to data folder for i, patient_id in enumerate(patients): # update paths specific to each patient p_data_path = os.path.join(data_path, patient_id) print "---------------------------Analyzing patient ", patient_id, "----------------------------\n" # if data path does not work out if not os.path.isdir(data_path): sys.exit('Error: Specified data path does not exist') # get pickle file p_file = os.path.join(p_data_path, 'patient_pickle.txt') # open pickle file and load with open(p_file, 'r') as pickle_file: print("Open Pickle: {}".format(p_file) + "...\n") patient_info = pickle.load(pickle_file) data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] file_type = ['ictal'] * len(data_filenames) seizure_print = [True] * len(data_filenames) # mark whether is seizure print 'Getting Data...' # read seizure file for i, seizure_file in enumerate(data_filenames): path_to_seizure = os.path.join(p_data_path, seizure_file) print path_to_seizure x, _, labels = edfread(path_to_seizure) # output x, raw iEEG signal print 'There are', x.shape[1], 'channels' selected_features = feature_selection(x) labels = clustering(selected_features)