# import packages import matplotlib.pyplot as plt import numpy as np import scipy from DCEpy.Features.BurnsStudy.eig_centrality import eig_centrality from DCEpy.Features.GardnerStudy.edfread import edfread # download a data file print('Downloading file...') filename = '/home/chris/Documents/Rice/senior/EpilepsyVIP/data/RMPt2/DA00101U_1-1+.edf' # Chris seizure_start,seizure_end = 262,330 fs = 1000 bad_channels = ('Events/Markers','EDF Annotations', 'EEG Mark1', 'EEG Mark2') data,_,labels = edfread(filename, bad_channels=bad_channels) data_len,nchannels = np.shape(data) print('shape is ' + str(np.shape(data))) # window size window_size = int(5e3) window_increment = 1250 window_num = len(range(window_size, data_len, window_increment)) eigs = np.empty( (window_num, nchannels) ) # initialize for eigenvectors col = np.empty((window_num)) # initialize seizure labels # find eigenvectors i = 0 v0 = np.ones(nchannels) / np.sqrt(nchannels) print('Getting Eigenvectors...') for end_time in range(window_size, data_len, window_increment):
filenames = ( '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101U_1-1+.edf', '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101V_1-1+.edf', '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101W_1-1+.edf', '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101P_1-1_02oct2010_09_00_38_Awake+.edf' ) good_channels = [ 'LAH1', 'LAH2', 'LPH6', 'LPH7', 'LPH9', 'LPH10', 'LPH11', 'LPH12' ] seizure_starts = 262, 107, 191, 0 seizure_ends = 330, 287, 405, 0 for plot_no,(filename, seizure_start, seizure_end) in \ enumerate(zip(filenames,seizure_starts,seizure_ends)): data, _, labels = edfread(filename, good_channels=good_channels) # data = normalize(np.random.rand(3e4,5)) #data, A = artif_VAR_data(N=5, n=2000, p=4, burn=50, A_type="tridiag") eig_seq, tim = ar_stability_window(data, order=15, n_eigs=8, w_len=5000, w_gap=1000) plt.subplot(1, len(filenames), plot_no + 1) plt.plot(tim / 1000, eig_seq) plt.xlabel('Time(s)') plt.ylabel('Top Eigenvalues') plt.vlines((seizure_start, seizure_end), 0.9, 1.04, 'g',
def burns(all_files, ictal_interval, inter_interval): """ Input: - array of files for the patient 0 position contains ictal data for band picking 1 position contains inter ictal data for band picking - interval [a, b] for ictal data for band picking - interval [c, d] for inter ictal data for band picking (ensure that b-a = d-c) Output: - list of states and the centers of the clusters (might change this a bit) """ # testing with two files of TS039 #test_file = 'CA1353FN_1-1_small.edf' #[test_patient1, annotations1, labels1] = edfread.edfread(test_file) #inter_file = 'C:\Users\User\Documents\EpilepsySeniorDesign\Burns\CA00100D_1-1+.edf' #ictal_file = 'C:\Users\User\Documents\EpilepsySeniorDesign\Burns\DA00101L_1-1+.edf' #[inter_data, annotations1, labels1] = edfread.edfread(inter_file) #[ictal_data, annotations2, labels2] = edfread.edfread(ictal_file) #y_inter = preprocessing(inter_data) #y_ictal = preprocessing(ictal_data) # create list to hold data and sampling frequency all_data = [] fs = 1000 # load and preprocess for file in all_files: [data, annotations, labels] = edfread.edfread(file) all_data.append(preprocessing(data)) print('Data is loaded and preprocessed') # Find band for r statistic y_ictal = all_data[0][ictal_interval[0]:ictal_interval[1], 0:1] y_inter = all_data[1][inter_interval[0]:inter_interval[1], 0:1] bands = np.array([[1, 4], [5, 8], [9, 13], [14, 25], [25, 90], [100, 200]]) # possible bands band = rstat.calc_rstat(y_ictal, y_inter, fs, bands) print('Band selected is: ' + str(band)) # band pass filter given band band_norm = [(1.0 * band[0]) / (fs / 2.0), (1.0 * band[1]) / (fs / 2.0)] # normalize the band filt_order = 3 b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter num_files = len(all_data) for j in range(num_files): all_data[j] = signal.filtfilt(b, a, all_data[j], axis=0) # filter the data print 'Done filtering' # list to hold eigenvectors evc = [] # for each file given for file_data in all_data: # get data shape n, m = file_data.shape print('Data has size ' + str(file_data.shape)) # determine edges to be used connections = range(m) weightType = 'coherence' # go through each window and create a coherence graph num_windows = int(math.floor((1.0 * n) / 1000) - 3) for i in range(0, num_windows): # get window col1 = i * 1000 col2 = col1 + 3000 window = file_data[:, col1:col2] # build coherence graph G = build_network(window, connections, weightType) # get eigenvector centrality try: current_evc = nx.eigenvector_centrality(G, weight=weightType) current_evc_ar = np.empty(m) # dictionary to array for i in range(m): current_evc_ar[i] = current_evc[i] evc.append(current_evc_ar) except: num_exc += 1 print("Eigenvector Centrality not found/did not converge") print('Finished computing eigenvector centrality') # convert into a numpy array evcs = np.array(evc) # choose k by gap statistic K = np.arange(20) n_tests = 10 k, min_gap = gap_stat.gap_statistic(evcs, K, n_tests) print('Gap Statistic chose k=' + str(k)) # cluster the eigenvectors [centroids, labels] = cluster.vq.kmeans2(evcs, k) return centroids, labels
def create_model_file(data_path, win_len, win_overlap, f_s, model_file, param_file, num_windows=500, include_awake=True, include_asleep=False): # use pickle files p_file = os.path.join(data_path, 'patient_pickle.txt') with open(p_file, 'r') as pickle_file: patient_info = pickle.load(pickle_file) # add data file names and types data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] file_types = ['ictal'] * len(data_filenames) if include_awake: data_filenames += patient_info['awake_inter_filenames'] seizure_times += [None] * len(patient_info['awake_inter_filenames']) file_types += ['awake'] * len(patient_info['awake_inter_filenames']) if include_asleep: data_filenames += patient_info['asleep_inter_filenames'] seizure_times += [None] * len(patient_info['asleep_inter_filenames']) file_types += ['sleep'] * len(patient_info['asleep_inter_filenames']) # attach data file names to data path data_filenames = [ os.path.join(data_path, filename) for filename in data_filenames ] num_files = len(data_filenames) # get best channel to train on # TODO: (this will change in the future to include all channels) good_channels = patient_info['best_channel'] # TODO: change this to read an edf file, then get energy statistic BEFORE going to next edf file # # read files and store in an array print 'Reading data from edf files to numpy array' all_data = [] num_channels = [] i = 1 for seizure_file in data_filenames: print '\tReading ' + str(i) + ' of ' + str(num_files) i += 1 X, _, _ = edfread(seizure_file, good_channels=good_channels) num_channels.append(X.shape[1]) all_data.append(X) if len(set(num_channels)) == 1: num_channels = num_channels[0] gt1 = num_channels > 1 print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str( num_channels) + ' channel' + 's' * gt1 else: print 'Channels: ' + str(num_channels) sys.exit( 'Error: There are different numbers of channels being used for different seizure files...' ) p_feat = 3 # this is the number of energy statistics # pre-process data -- filter parameters print 'Applying a band-pass filter to the data' band = np.array([0.1, 100.]) band_norm = band / (f_s / 2.) # normalize the band filt_order = 3 # band pass filter the data b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter for j in range(num_files): all_data[j] = signal.filtfilt(b, a, all_data[j], axis=0) # filter the data # get features from time series num_files = len(all_data) feat_vec = [] print '\tExtracting features from input files...', i = 1 for X in all_data: # print progress print str(i) + ', ', i += 1 # initialize empty feature vector n = X.shape[0] n_windows = n / ( window_length - window_overlap ) - 1 # evaluates to floor( n / (L - O ) - 1 since ints X_feat = np.zeros((n_windows, p_feat)) # empty feature vector k = 0 # collect features from windows for j in range(window_length, n, window_length - window_overlap): window = X[(j - window_length):j, :] # select window f = energy_features(window) # extract energy statistics X_feat[k, :] = f k += 1 # add the new feature vector feat_vec.append(X_feat) print '' # new line # check for NaN for X in feat_vec: if np.any(np.isnan(X)): print '\tUh-oh, NaN encountered while extracting features' print '\tCollecting inter-ictal windows' inter_ictal = [ feat_vec[j] for j in range(len(feat_vec)) if feat_vec[j] is not "ictal" ] X_train = collect_windows(inter_ictal, num_windows) # parameter tuning nu, gamma, C, adapt_rate, T_per = parameter_tuning(X_train, feat_vec, seizure_times, f_s, window_length, window_overlap) print 'Obtained optimal parameters' # run an SVM on the training data clf = learn_support(X_train, nu=nu, gamma=gamma) num_SV = clf.support_.size # create model file print 'Writing to model file' f = open(model_file, 'w') f.write('svm_type one_class\n') # one class SVM f.write('kernel_type rbf\n') # kernel type = rbf f.write('gamma %.6f\n' % gamma) # gamma f.write('nr_class 2\n') # number of classes = 2 f.write('total_sv %d\n' % num_SV) # total num of support vectors f.write('rho %.6f\n' % clf.intercept_[0]) # offset f.write('SV\n') # ready for support vectors! # write support vectors to model file for i in range(num_SV): f.write('%.6f ' % clf.dual_coef_[0, i]) for j in range(p_feat): f.write(str(j + 1) + ':%.6f ' % clf.support_vectors_[i, j]) f.write('\n') f.close() # write other parameters file f = open(param_file, 'w') # TODO: do not hardcode number of channels num_channels = 135 f.write('adapt_rate: %d\n' % adapt_rate) f.write('channel: threshold: weight\n') for i in range(num_channels): if i == 2: weight = 1 else: weight = 0 f.write("%d: %.4f: %d\n" % (i, C, weight)) f.close() return
def analyze_patient(data_path, save_path, patient_id, res_f, window_length=1.0, window_overlap=0.5, num_windows=3000, f_s=1e3, include_awake=True, include_asleep=False): # reformat window length and overlap as indices window_length = int(window_length * f_s) window_overlap = int(window_overlap * f_s) # create save path if not os.path.isdir(save_path): os.makedirs(save_path) # specify data paths print 'Specifying file paths' if not os.path.isdir(data_path): sys.exit('Error: Specified data path does not exist') p_file = os.path.join(data_path, 'patient_pickle.txt') with open(p_file, 'r') as pickle_file: patient_info = pickle.load(pickle_file) # add data file names data_filenames = patient_info['seizure_data_filenames'] seizure_times = patient_info['seizure_times'] con_type = ['ictal'] * len(data_filenames) if include_awake: data_filenames += patient_info['awake_inter_filenames'] seizure_times += [None] * len(patient_info['awake_inter_filenames']) con_type += ['awake'] * len(patient_info['awake_inter_filenames']) if include_asleep: data_filenames += patient_info['asleep_inter_filenames'] seizure_times += [None] * len(patient_info['asleep_inter_filenames']) con_type += ['sleep'] * len(patient_info['asleep_inter_filenames']) data_filenames = [ os.path.join(data_path, filename) for filename in data_filenames ] num_files = len(data_filenames) # get data in numpy array print 'Reading data from edf files to numpy array' all_data = [] num_channels = [] i = 1 for seizure_file in data_filenames: print '\tReading ' + str(i) + ' of ' + str(num_files) i += 1 X, _, _ = edfread(seizure_file) num_channels.append(X.shape[1]) all_data.append(X) if len(set(num_channels)) == 1: num_channels = num_channels[0] gt1 = num_channels > 1 print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str( num_channels) + ' channel' + 's' * gt1 else: print 'Channels: ' + str(num_channels) sys.exit( 'Error: There are different numbers of channels being used for different seizure files...' ) # get the number of parameters (3 energy statistics per channel) p_feat = 3 * num_channels # pre-process data -- filter parameters print 'Applying a band-pass filter to the data' band = np.array([0.1, 100.]) band_norm = band / (f_s / 2.) # normalize the band filt_order = 3 # band pass filter the data b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter for j in range(num_files): all_data[j] = signal.filtfilt(b, a, all_data[j], axis=0) # filter the data # run leave-one-out cross validation testing sensitivity, latency, FP, time = loocv_testing(all_data, con_type, window_length, window_overlap, num_windows, f_s, seizure_times, p_feat, save_path) # get mean statistics m_sense = np.nanmean(sensitivity) m_latency = np.nanmean(latency) m_fpr = np.nansum(FP) / np.nansum(time) # print to results file print >> res_f, '\nPatient ' + patient_id + '\n=========================' # print the results -- aggregates and total print >> res_f, 'Mean Sensitivity: \t%.2f' % (m_sense) print >> res_f, 'Mean Latency: \t%.4f' % (m_latency) print >> res_f, 'False Positive Rate: \t%.5f (fp/Hr) \n' % m_fpr print >> res_f, 'Sensitivity: ' + str(sensitivity) print >> res_f, 'Latency: ' + str(latency) print >> res_f, 'False Positive Rate: ' + str(FP / time) return sensitivity, latency, m_fpr