def user_cost(y_true, y_pred): """ user_cost(y_true, y_pred) Parameters ---------- y_true : 1ndarray bool, ground truth values y_pred : 1ndarray bool, predicted values Returns ------- cost : float """ detected = 0 # number of detected seizures # get bounds of sezures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # total predicted bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # total predicted bounds_pred = merge_close(bounds_pred, merge_margin=5) # merge seizures close together if bounds_pred.shape[0] > 0: # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin=10) # calculate cost a = 1 - (detected / bounds_true.shape[0]) # get detected ratio b = (bounds_pred.shape[0] - detected) # get false positives cost = a + np.log10(b + 1) # cost function return cost
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis') if os.path.exists(ver_path)== False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.') return False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # Normalize data x_data = StandardScaler().fit_transform(x_data) # get bounds of true seizures bounds_true = find_szr_idx(y_true, np.array([0,1])) if bounds_true.shape[0] > 0: # proceed if seizures are present for ii in range(len(self.feature_labels)): # iterate through parameteres # detect seizures bigger than threshold y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii])) # get bounds of predicted sezures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures # get total numbers self.df.at[ii, 'total'] += bounds_true.shape[0] self.df.at[ii, 'detected'] += detected self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected return True
def find_threshold(x_data, y_true): # thresh = 1; ftr = 8 x = x_data[:, ftr] # fig = plt.figure() # ax = fig.add_subplot(111) # t = np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)) # line1 = ax.plot(x) # line2 = ax.plot(t) n_loop = 100 cost_array = np.zeros(n_loop) thresh_array = np.zeros(n_loop) thresh_array = np.linspace(1, 20, n_loop) for i in range(n_loop): # thresh_array[i] = thresh y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 2])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = create_cost(bounds_true, bounds_pred) # get cost # cost = log_loss(y_true, y_pred ,labels =[True,False]) cost_array[i] = cost # if cost == 0: # print('cost has reached zero, stopping') # return cost_array,thresh_array # thresh += cost # update cost # ax.plot(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))) # line2[0].set_ydata(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))) # fig.canvas.draw() plt.figure() plt.plot(thresh_array, cost_array) plt.ylabel('cost') plt.xlabel('thresh') print('seizures = ', bounds_true.shape[0]) return cost_array, thresh_array
def append_pred(self, y_pred_array, bounds_true): """ Adds metrics to self.df Parameters ---------- y_pred_array : np array, bool (rows = time, columns = features) bounds_true : np.array (rows = seizures, cols= [start idx, stop idx]) """ for i in range(len(self.weights)): for ii in range(len(self.feature_set)): # find predicted seizures y_pred = y_pred_array * self.weights[i] * self.feature_set[ ii] # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( self.weights[i] * self.feature_set[ii] ) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array( [0, 1])) # get predicted seizure index detected = 0 # set default detected to 0 if bounds_pred.shape[0] > 0: # get bounds of predicted sezures bounds_pred = merge_close( bounds_pred, merge_margin=5) # merge seizures close together detected = match_szrs( bounds_true, bounds_pred, err_margin=10) # find matching seizures # get total numbers self.df['total'][self.df_cntr] += bounds_true.shape[ 0] # total true self.df['detected'][ self.df_cntr] += detected # n of detected seizures self.df['false_positives'][self.df_cntr] += bounds_pred.shape[ 0] - detected # n of false positives self.df_cntr += 1 # update counter
def find_threshold_all(x_data, y_true): thresh = 1 ftr = 1 x = x_data[:, ftr] fig = plt.figure() ax = fig.add_subplot(111) t = np.ones(x.shape[0]) * (np.mean(x) + thresh * np.std(x)) line1 = ax.plot(x) line2 = ax.plot(t) n_loop = 100 cost_array = np.zeros(n_loop) thresh_array = np.zeros(n_loop) # thresh_array = np.linspace(10, 0, n_loop) for i in range(n_loop): thresh_array[i] = thresh y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = create_cost(bounds_true, bounds_pred) # get cost # cost = log_loss(y_true, y_pred ,labels =[True,False]) cost_array[i] = cost if cost == 0: print('cost has reached zero, stopping') return cost_array, thresh_array return cost_array, thresh_array
def get_min_cost(feature, y_true): """ get_min_cost(feature, y_true) Parameters ---------- feature : 1D ndarray, extracted feature y_true : 1D ndarray, bool grund truth labels Returns ------- TYPE: Float, threshold value that gves minimum cost """ n_loop = 100 # loop number and separation thresh_array = np.linspace(1, 20, n_loop) # thresholds to test cost_array = np.zeros(n_loop) for i in range(n_loop): # thresh_array[i] = thresh y_pred = feature > (np.mean(feature) + thresh_array[i] * np.std(feature)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = szr_cost(bounds_true, bounds_pred) # get cost # pass to array cost_array[i] = cost return thresh_array[np.argmin(cost_array)]
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data bounds_true = find_szr_idx(y_true, np.array( [0, 1])) # get bounds of true seizures for ii in range(len(self.df)): # iterate through df # detect seizures bigger than threshold thresh = ( np.mean(x_data) + np.array(self.df.loc[ii][self.thresh]) * np.std(x_data) ) # get threshold y_pred_array = x_data > thresh # get predictions # find predicted seizures w = np.array(self.df.loc[ii][self.weights]) # get weights e = np.array( self.df.loc[ii][self.enabled]) # get enabled features y_pred = y_pred_array * w * e # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( w * e) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array( [0, 1])) # get predicted seizure index detected = 0 # set default detected to 0 if bounds_pred.shape[0] > 0: # get bounds of predicted sezures bounds_pred = merge_close( bounds_pred, merge_margin=5) # merge seizures close together detected = match_szrs( bounds_true, bounds_pred, err_margin=10) # find matching seizures # get total numbers self.df['total'][ii] += bounds_true.shape[0] # total true self.df['detected'][ii] += detected # n of detected seizures self.df['false_positives'][ii] += bounds_pred.shape[ 0] - detected # n of false positives return True
def file_loop(main_path): # get data list ver_path = os.path.join(main_path, 'verified_predictions_pantelis') filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending true_total = 0 total_detected = 0 total_exta = 0 for i in range(0, len(filelist)): # loop through files # # get data and true labels data, y_true = get_data(main_path, filelist[i], ch_num=num_channels) print('->', filelist[i], 'loaded.') # Clean and filter data data = preprocess_data(data, clean=True, filt=False) print('-> data pre-processed.') # Get features and labels x_data, feature_labels = get_features_allch(data, param_list, cross_ch_param_list) print('-> features extracted') # Normalize data x_data = StandardScaler().fit_transform(x_data) # make predictions xbest = x_data[:, 1] * x_data[:, 9] threshold = np.mean(xbest) + 4 * np.std(xbest) y_pred = xbest > threshold # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true # plot figures if bounds_pred.shape[0] > 0: # plt.figure() # ax = plt.axes() # ax.plot(xbest,c='k') # y = xbest # x = np.linspace(1,y.shape[0],y.shape[0]) # ix = np.where(y_true == 1) # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15) # ix = np.where(y_pred == 1) # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8) # ax.legend() # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin=10) print('Detected', detected, 'out of', bounds_true.shape[0], 'seizures') print('+', bounds_pred.shape[0] - detected, 'extra \n') true_total += bounds_true.shape[0] total_detected += detected total_exta += bounds_pred.shape[0] - detected print('Total detected', total_detected, 'out of', true_total, 'seizures') print(total_exta, 'extra seizures') print('Time elapsed = ', time.time() - tic, 'seconds.') return true_total, total_detected, total_exta
def folder_loop(folder_path, thresh_multiplier = 5): # get file list ver_path = os.path.join(folder_path, 'verified_predictions_pantelis') if os.path.exists(ver_path)== False: print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.') return False, False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending # create feature labels feature_labels=[] for n in ch_list: feature_labels += [x.__name__ + '_'+ str(n) for x in param_list] feature_labels += [x.__name__ for x in cross_ch_param_list] feature_labels = np.array(feature_labels) # create dataframe columns = ['true_total', 'total_detected', 'total_exta'] df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64) df['Features'] = feature_labels # create seizure array szrs = np.zeros((len(filelist),3,feature_labels.shape[0])) # get total time analized time = 0 for i in tqdm(range(0, len(filelist))): # loop through experiments # get data and true labels data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) ## UNCOMMENT LINE BELOW TO : Clean and filter data # data = preprocess_data(data, clean = True, filt = True, verbose = 0) # print('-> data pre-processed.') # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # UNCOMMENT LINES BELOW TO : get refined data (multiply channels) # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)]) # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1) # Normalize data x_data = StandardScaler().fit_transform(x_data) time+=x_data.shape[0] for ii in range(len(feature_labels)): # iterate through parameteres x_data.shape[1] # get boolean index # Percentile # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier) # SD y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)])) # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2 ## UNCOMMENT LINE BELOW: for running threshold ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120) # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0,1])) # true # get true number of seizures szrs[i,0,ii] = bounds_true.shape[0] # plot figures if bounds_pred.shape[0] > 0: # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin = 5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # get number of matching and extra seizures detected szrs[i,1,ii] = detected # number of true seizures detected szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected # get total numbers df.at[ii, 'true_total'] += szrs[i,0,ii] df.at[ii, 'total_detected'] += szrs[i,1,ii] df.at[ii, 'total_exta'] += szrs[i,2,ii] print(time*5/60, 'minutes of eeg recordings') return df, szrs
def get_feature_pred(self, file_id): """ get_feature_pred(self, file_id) Parameters ---------- file_id : Str Returns ------- data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel) bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures) """ # Define parameter list param_list = ( features.autocorr, features.line_length, features.rms, features.mad, features.var, features.std, features.psd, features.energy, features.get_envelope_max_diff, ) # single channel features cross_ch_param_list = ( features.cross_corr, features.signal_covar, features.signal_abs_covar, ) # cross channel features # Get data and true labels data = get_data(self.gen_path, file_id, ch_num=ch_list, inner_path={'data_path': 'filt_data'}, load_y=False) # Extract features and normalize x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data # Get predictions thresh = (np.mean(x_data) + self.thresh * np.std(x_data) ) # get threshold vector y_pred_array = (x_data > thresh) # get predictions for all conditions y_pred = y_pred_array * self.weights * self.enabled # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( self.weights * self.enabled) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array([0, 1 ])) # get predicted seizure index # If seizures are detected proceed to refine them if bounds_pred.shape[0] > 0: # Merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # Remove seizures where a feature (line length or power) is not higher than preceeding region idx = np.where( np.char.find(self.feature_names, 'line_length_0') == 0)[0][0] bounds_pred = self.refine_based_on_surround( x_data[:, idx], bounds_pred) return bounds_pred