def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data bounds_true = find_szr_idx(y_true, np.array( [0, 1])) # get bounds of true seizures self.df_cntr = 0 # restart df_cntr for ii in range(len(self.thresh_array)): # detect seizures bigger than threshold thresh = (np.mean(x_data) + self.thresh_array[ii] * np.std(x_data) ) # get threshold y_pred_array = x_data > thresh # get predictions self.append_pred(y_pred_array, bounds_true) # add predictions to self.df return True
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis') if os.path.exists(ver_path)== False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.') return False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # Normalize data x_data = StandardScaler().fit_transform(x_data) # get bounds of true seizures bounds_true = find_szr_idx(y_true, np.array([0,1])) if bounds_true.shape[0] > 0: # proceed if seizures are present for ii in range(len(self.feature_labels)): # iterate through parameteres # detect seizures bigger than threshold y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii])) # get bounds of predicted sezures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures # get total numbers self.df.at[ii, 'total'] += bounds_true.shape[0] self.df.at[ii, 'detected'] += detected self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected return True
# Get y data f = tables.open_file(os.path.join(main_path, 'y_data.h5') , mode = 'r') # open tables object y_data = f.root.data[:]; f.close() y_data = y_data.astype(np.int) # Get x data f = tables.open_file(os.path.join(main_path, 'x_data.h5') , mode = 'r') # open tables object data = f.root.data[:]; f.close() data = data[:,:,ch_num] # get only desirer channels # Clean and filter data data = preprocess_data(data, clean = True, filt = True) # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) labels = np.array(labels) # convert to np array # Normalize data scaler = StandardScaler() x_data = scaler.fit_transform(x_data) # get multiplied data and remap labels new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)]) x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1) labels = [x.__name__ for x in param_list]; labels += [x.__name__ for x in cross_ch_param_list] labels = np.array(labels) # split training and testing data x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = test_ratio, shuffle = True)
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data bounds_true = find_szr_idx(y_true, np.array( [0, 1])) # get bounds of true seizures for ii in range(len(self.df)): # iterate through df # detect seizures bigger than threshold thresh = ( np.mean(x_data) + np.array(self.df.loc[ii][self.thresh]) * np.std(x_data) ) # get threshold y_pred_array = x_data > thresh # get predictions # find predicted seizures w = np.array(self.df.loc[ii][self.weights]) # get weights e = np.array( self.df.loc[ii][self.enabled]) # get enabled features y_pred = y_pred_array * w * e # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( w * e) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array( [0, 1])) # get predicted seizure index detected = 0 # set default detected to 0 if bounds_pred.shape[0] > 0: # get bounds of predicted sezures bounds_pred = merge_close( bounds_pred, merge_margin=5) # merge seizures close together detected = match_szrs( bounds_true, bounds_pred, err_margin=10) # find matching seizures # get total numbers self.df['total'][ii] += bounds_true.shape[0] # total true self.df['detected'][ii] += detected # n of detected seizures self.df['false_positives'][ii] += bounds_pred.shape[ 0] - detected # n of false positives return True
def file_loop(main_path): # get data list ver_path = os.path.join(main_path, 'verified_predictions_pantelis') filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending true_total = 0 total_detected = 0 total_exta = 0 for i in range(0, len(filelist)): # loop through files # # get data and true labels data, y_true = get_data(main_path, filelist[i], ch_num=num_channels) print('->', filelist[i], 'loaded.') # Clean and filter data data = preprocess_data(data, clean=True, filt=False) print('-> data pre-processed.') # Get features and labels x_data, feature_labels = get_features_allch(data, param_list, cross_ch_param_list) print('-> features extracted') # Normalize data x_data = StandardScaler().fit_transform(x_data) # make predictions xbest = x_data[:, 1] * x_data[:, 9] threshold = np.mean(xbest) + 4 * np.std(xbest) y_pred = xbest > threshold # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true # plot figures if bounds_pred.shape[0] > 0: # plt.figure() # ax = plt.axes() # ax.plot(xbest,c='k') # y = xbest # x = np.linspace(1,y.shape[0],y.shape[0]) # ix = np.where(y_true == 1) # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15) # ix = np.where(y_pred == 1) # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8) # ax.legend() # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin=10) print('Detected', detected, 'out of', bounds_true.shape[0], 'seizures') print('+', bounds_pred.shape[0] - detected, 'extra \n') true_total += bounds_true.shape[0] total_detected += detected total_exta += bounds_pred.shape[0] - detected print('Total detected', total_detected, 'out of', true_total, 'seizures') print(total_exta, 'extra seizures') print('Time elapsed = ', time.time() - tic, 'seconds.') return true_total, total_detected, total_exta
def folder_loop(folder_path, thresh_multiplier = 5): # get file list ver_path = os.path.join(folder_path, 'verified_predictions_pantelis') if os.path.exists(ver_path)== False: print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.') return False, False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending # create feature labels feature_labels=[] for n in ch_list: feature_labels += [x.__name__ + '_'+ str(n) for x in param_list] feature_labels += [x.__name__ for x in cross_ch_param_list] feature_labels = np.array(feature_labels) # create dataframe columns = ['true_total', 'total_detected', 'total_exta'] df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64) df['Features'] = feature_labels # create seizure array szrs = np.zeros((len(filelist),3,feature_labels.shape[0])) # get total time analized time = 0 for i in tqdm(range(0, len(filelist))): # loop through experiments # get data and true labels data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) ## UNCOMMENT LINE BELOW TO : Clean and filter data # data = preprocess_data(data, clean = True, filt = True, verbose = 0) # print('-> data pre-processed.') # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # UNCOMMENT LINES BELOW TO : get refined data (multiply channels) # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)]) # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1) # Normalize data x_data = StandardScaler().fit_transform(x_data) time+=x_data.shape[0] for ii in range(len(feature_labels)): # iterate through parameteres x_data.shape[1] # get boolean index # Percentile # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier) # SD y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)])) # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2 ## UNCOMMENT LINE BELOW: for running threshold ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120) # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0,1])) # true # get true number of seizures szrs[i,0,ii] = bounds_true.shape[0] # plot figures if bounds_pred.shape[0] > 0: # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin = 5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # get number of matching and extra seizures detected szrs[i,1,ii] = detected # number of true seizures detected szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected # get total numbers df.at[ii, 'true_total'] += szrs[i,0,ii] df.at[ii, 'total_detected'] += szrs[i,1,ii] df.at[ii, 'total_exta'] += szrs[i,2,ii] print(time*5/60, 'minutes of eeg recordings') return df, szrs
def get_feature_pred(self, file_id): """ get_feature_pred(self, file_id) Parameters ---------- file_id : Str Returns ------- data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel) bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures) """ # Define parameter list param_list = ( features.autocorr, features.line_length, features.rms, features.mad, features.var, features.std, features.psd, features.energy, features.get_envelope_max_diff, ) # single channel features cross_ch_param_list = ( features.cross_corr, features.signal_covar, features.signal_abs_covar, ) # cross channel features # Get data and true labels data = get_data(self.gen_path, file_id, ch_num=ch_list, inner_path={'data_path': 'filt_data'}, load_y=False) # Extract features and normalize x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data # Get predictions thresh = (np.mean(x_data) + self.thresh * np.std(x_data) ) # get threshold vector y_pred_array = (x_data > thresh) # get predictions for all conditions y_pred = y_pred_array * self.weights * self.enabled # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( self.weights * self.enabled) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array([0, 1 ])) # get predicted seizure index # If seizures are detected proceed to refine them if bounds_pred.shape[0] > 0: # Merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # Remove seizures where a feature (line length or power) is not higher than preceeding region idx = np.where( np.char.find(self.feature_names, 'line_length_0') == 0)[0][0] bounds_pred = self.refine_based_on_surround( x_data[:, idx], bounds_pred) return bounds_pred
def folder_loop(self, folder_path): """ folder_loop(self, folder_path) Parameters ---------- folder_path : Str, to child dir """ # get path ver_path = os.path.join(self.main_path, folder_path, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: print('path not found, skipping:', os.path.join(self.main_path, folder_path), '.') return False # get file list filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range( 0, len(filelist))): # loop through experiments len(filelist) # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_path), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) ## UNCOMMENT LINE BELOW TO : Clean and filter data # data = preprocess_data(data, clean = True, filt = True, verbose = 0) # Get features and labels x_data, labels = get_features_allch(data, param_list, cross_ch_param_list) # Normalize data x_data = StandardScaler().fit_transform(x_data) for ii in range( len(self.feature_labels) ): # iterate through parameteres x_data.shape[1] len(feature_labels) # create dataframe df = pd.DataFrame(data=np.zeros((0, len(self.columns))), columns=self.columns, dtype=np.int64) # get seizure index bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true if bounds_true.shape[0] > 0: # get seizure and surround properties szrs = GetCatalogue.get_surround(x_data[:, ii], bounds_true, self.time_bins, self.szr_properties) # insert seizure start and end df['exp_id'] = [os.path.join(folder_path, filelist[i]) ] * bounds_true.shape[0] df['szr_start'] = bounds_true[:, 0] df['szr_end'] = bounds_true[:, 1] # append seizure properties df.iloc[:, 3:] = szrs # append to dataframe df.to_csv(os.path.join(self.save_folder, self.feature_labels[ii] + '.csv'), mode='a', header=False, index=False)
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) if np.sum(y_true) > 3: # Get features and labels x_data, labels = get_features_allch(data, param_list, cross_ch_param_list) x_data = StandardScaler().fit_transform( x_data) # Normalize data for ii in range(len(self.metrics)): # get metric for features metric = self.metrics[ii](x_data, y_true) # create dateframe with metric df = pd.DataFrame(data=np.array(metric).reshape(1, -1), columns=self.feature_labels) df.insert(loc=0, column='exp_id', value=filelist[i]) # save to csv df.to_csv(os.path.join(self.save_folder, self.metrics[ii].__name__ + '.csv'), mode='a', header=False, index=False) return True