def user_cost(y_true, y_pred): """ user_cost(y_true, y_pred) Parameters ---------- y_true : 1ndarray bool, ground truth values y_pred : 1ndarray bool, predicted values Returns ------- cost : float """ detected = 0 # number of detected seizures # get bounds of sezures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # total predicted bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # total predicted bounds_pred = merge_close(bounds_pred, merge_margin=5) # merge seizures close together if bounds_pred.shape[0] > 0: # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin=10) # calculate cost a = 1 - (detected / bounds_true.shape[0]) # get detected ratio b = (bounds_pred.shape[0] - detected) # get false positives cost = a + np.log10(b + 1) # cost function return cost
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis') if os.path.exists(ver_path)== False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.') return False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # Normalize data x_data = StandardScaler().fit_transform(x_data) # get bounds of true seizures bounds_true = find_szr_idx(y_true, np.array([0,1])) if bounds_true.shape[0] > 0: # proceed if seizures are present for ii in range(len(self.feature_labels)): # iterate through parameteres # detect seizures bigger than threshold y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii])) # get bounds of predicted sezures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures # get total numbers self.df.at[ii, 'total'] += bounds_true.shape[0] self.df.at[ii, 'detected'] += detected self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected return True
def find_threshold(x_data, y_true): # thresh = 1; ftr = 8 x = x_data[:, ftr] # fig = plt.figure() # ax = fig.add_subplot(111) # t = np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)) # line1 = ax.plot(x) # line2 = ax.plot(t) n_loop = 100 cost_array = np.zeros(n_loop) thresh_array = np.zeros(n_loop) thresh_array = np.linspace(1, 20, n_loop) for i in range(n_loop): # thresh_array[i] = thresh y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 2])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = create_cost(bounds_true, bounds_pred) # get cost # cost = log_loss(y_true, y_pred ,labels =[True,False]) cost_array[i] = cost # if cost == 0: # print('cost has reached zero, stopping') # return cost_array,thresh_array # thresh += cost # update cost # ax.plot(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))) # line2[0].set_ydata(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))) # fig.canvas.draw() plt.figure() plt.plot(thresh_array, cost_array) plt.ylabel('cost') plt.xlabel('thresh') print('seizures = ', bounds_true.shape[0]) return cost_array, thresh_array
def main_func(self, file_id): """ main_func(self, file_id) Parameters ---------- file_id : String Returns ------- data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel) idx_bounds : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures) """ print('-> File being analyzed: ', file_id,'\n') # Get predictions pred_path = os.path.join(self.rawpred_path, file_id) # get path bin_pred = np.loadtxt(pred_path, delimiter=',', skiprows=0) # get predictions idx_bounds = find_szr_idx(bin_pred[:,1]>0.5, np.array([0,1])) # find seizure boundaries # load raw data for visualization data_path = os.path.join(self.org_rawpath, file_id.replace('.csv','.h5')) f = tables.open_file(data_path, mode='r') data = f.root.data[:] f.close() # check whether to continue print('>>>>',idx_bounds.shape[0] ,'seizures detected') return data, idx_bounds
def append_func(self, ds_x, ds_y): ''' Append training dataset to pytables datastore for one folder ''' for i in range(len(self.filelist)): # loop through files print('extracting data from', self.filelist[i]) # load data f = tables.open_file(os.path.join(self.data_path, self.filelist[i] + '.h5'), mode='r') data = f.root.data[:] f.close() # Get ground truth data y = np.loadtxt(os.path.join(self.verpred_path, self.filelist[i] + '.csv'), delimiter=',', skiprows=0) # find seizure segments idx_bounds = find_szr_idx(y, np.array([0, 1], dtype=int)) if idx_bounds.shape[0] > 0: # if seizures were detected # get x and y data x_data, y_data = self.get_data(data, idx_bounds) # # x_data, y_data = get_data_static(self.win,self.fs,self.chnls, data, idx_bounds) # self.samples_added += y_data.shape[0] print(self.samples_added, 'samples added') # append x and y data to datastore ds_x.append(x_data) ds_y.append(y_data)
def main_func(main_path): # dict load settings = lab2mat.load(os.path.join(main_path, 'organized.json')) # get user seizures df = pd.read_csv(os.path.join(main_path, 'Extracted_seizures.csv'), header=None) # get verified predictions file list ver_path = os.path.join(main_path, 'verified_predictions_pantelis') filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) if len(df) != len(filelist): # file check print( 'Warning: length of extracted seizures does not match list of verified predictions!' ) print(str(len(filelist)) + ' files will be analyzed...') # create dataframe to store metrics df_save = pd.DataFrame(np.zeros((len(df) + 1, 3)), columns=['total', 'detected', 'false_positives']) df_save.insert(0, 'exp_id', filelist + ['Grand_sum']) for i in tqdm(range(len(filelist))): # get user scored seizured index (gold standard) true_idx = get_szr_index(df, filelist[i].replace('.csv', '')) # load seizure index from user-curated model-predictions rawpred = np.loadtxt(os.path.join(ver_path, filelist[i]), delimiter=',', skiprows=1) if np.sum(rawpred) > 0: # check if any seizures were detected # get index bounds of semi-manual detected seizures pred_bounds = find_szr_idx(rawpred, np.array([0, 1])) pred_bounds *= settings['win'] # convert to seconds # get matching seizures df_save['total'].at[i] = true_idx.shape[0] # total df_save['detected'].at[i], non_detected_idx = get_match( true_idx, pred_bounds) # detected df_save['false_positives'].at[i] = pred_bounds.shape[0] - df_save[ 'detected'][i] # false positives if df_save['total'][i] != df_save['detected'][i]: print('not all seizures were detected') print(filelist[i], get_hours(non_detected_idx)) # get grand totals df_save['total'].at[i + 1] = df_save['total'].sum() df_save['detected'].at[i + 1] = df_save['detected'].sum() df_save['false_positives'].at[i + 1] = df_save['false_positives'].sum() # save csv df_save.to_csv(os.path.join(main_path, 'detected.csv'), index=False) print('Metrics for seizure matching completed')
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data bounds_true = find_szr_idx(y_true, np.array( [0, 1])) # get bounds of true seizures self.df_cntr = 0 # restart df_cntr for ii in range(len(self.thresh_array)): # detect seizures bigger than threshold thresh = (np.mean(x_data) + self.thresh_array[ii] * np.std(x_data) ) # get threshold y_pred_array = x_data > thresh # get predictions self.append_pred(y_pred_array, bounds_true) # add predictions to self.df return True
def save_func(self): ''' Save training dataset to pytables datastore for one folder ''' # Saving Parameters atom = tables.Float64Atom() # declare data type fsave_xdata = tables.open_file(os.path.join(main_path, 'x_data.h5'), mode='w') # open tables object fsave_ytrue = tables.open_file(os.path.join(main_path, 'y_data.h5'), mode='w') # open tables object ds_x = fsave_xdata.create_earray( fsave_xdata.root, 'data', atom, # create data store [0, int(self.win * self.fs), self.chnls]) ds_y = fsave_ytrue.create_earray( fsave_ytrue.root, 'data', atom, # create data store [0]) for i in tqdm(range(len(self.filelist))): # loop through files # load data f = tables.open_file(os.path.join(self.data_path, self.filelist[i] + '.h5'), mode='r') data = f.root.data[:] f.close() # Get ground truth data y = np.loadtxt(os.path.join(self.verpred_path, self.filelist[i] + '.csv'), delimiter=',', skiprows=0) # find seizure segments idx_bounds = find_szr_idx(y, np.array([0, 1], dtype=int)) if idx_bounds.shape[0] > 0: # if seizures were detected # get x and y data x_data, y_data = self.get_data(data, idx_bounds) # # x_data, y_data = get_data_static(self.win,self.fs,self.chnls, data, idx_bounds) # # append x and y data to datastore ds_x.append(x_data) ds_y.append(y_data) # close save objects fsave_xdata.close() fsave_ytrue.close() print('Training dataset created.')
def find_threshold_all(x_data, y_true): thresh = 1 ftr = 1 x = x_data[:, ftr] fig = plt.figure() ax = fig.add_subplot(111) t = np.ones(x.shape[0]) * (np.mean(x) + thresh * np.std(x)) line1 = ax.plot(x) line2 = ax.plot(t) n_loop = 100 cost_array = np.zeros(n_loop) thresh_array = np.zeros(n_loop) # thresh_array = np.linspace(10, 0, n_loop) for i in range(n_loop): thresh_array[i] = thresh y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = create_cost(bounds_true, bounds_pred) # get cost # cost = log_loss(y_true, y_pred ,labels =[True,False]) cost_array[i] = cost if cost == 0: print('cost has reached zero, stopping') return cost_array, thresh_array return cost_array, thresh_array
def get_min_cost(feature, y_true): """ get_min_cost(feature, y_true) Parameters ---------- feature : 1D ndarray, extracted feature y_true : 1D ndarray, bool grund truth labels Returns ------- TYPE: Float, threshold value that gves minimum cost """ n_loop = 100 # loop number and separation thresh_array = np.linspace(1, 20, n_loop) # thresholds to test cost_array = np.zeros(n_loop) for i in range(n_loop): # thresh_array[i] = thresh y_pred = feature > (np.mean(feature) + thresh_array[i] * np.std(feature)) # get number of seizures bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted # merge seizures close together if bounds_pred.shape[0] > 1: bounds_pred = merge_close(bounds_pred, merge_margin=5) cost = szr_cost(bounds_true, bounds_pred) # get cost # pass to array cost_array[i] = cost return thresh_array[np.argmin(cost_array)]
def append_pred(self, y_pred_array, bounds_true): """ Adds metrics to self.df Parameters ---------- y_pred_array : np array, bool (rows = time, columns = features) bounds_true : np.array (rows = seizures, cols= [start idx, stop idx]) """ for i in range(len(self.weights)): for ii in range(len(self.feature_set)): # find predicted seizures y_pred = y_pred_array * self.weights[i] * self.feature_set[ ii] # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( self.weights[i] * self.feature_set[ii] ) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array( [0, 1])) # get predicted seizure index detected = 0 # set default detected to 0 if bounds_pred.shape[0] > 0: # get bounds of predicted sezures bounds_pred = merge_close( bounds_pred, merge_margin=5) # merge seizures close together detected = match_szrs( bounds_true, bounds_pred, err_margin=10) # find matching seizures # get total numbers self.df['total'][self.df_cntr] += bounds_true.shape[ 0] # total true self.df['detected'][ self.df_cntr] += detected # n of detected seizures self.df['false_positives'][self.df_cntr] += bounds_pred.shape[ 0] - detected # n of false positives self.df_cntr += 1 # update counter
def folder_loop(self, folder_name): """ folder_loop(self, folder_name) Parameters ---------- folder_name : Str, folder name Returns ------- bool """ # get file list ver_path = os.path.join(self.main_path, folder_name, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: # error check print('path not found, skipping:', os.path.join(self.main_path, folder_name), '.') return False filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range(0, len(filelist))): # iterate through experiments # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_name), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data bounds_true = find_szr_idx(y_true, np.array( [0, 1])) # get bounds of true seizures for ii in range(len(self.df)): # iterate through df # detect seizures bigger than threshold thresh = ( np.mean(x_data) + np.array(self.df.loc[ii][self.thresh]) * np.std(x_data) ) # get threshold y_pred_array = x_data > thresh # get predictions # find predicted seizures w = np.array(self.df.loc[ii][self.weights]) # get weights e = np.array( self.df.loc[ii][self.enabled]) # get enabled features y_pred = y_pred_array * w * e # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( w * e) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array( [0, 1])) # get predicted seizure index detected = 0 # set default detected to 0 if bounds_pred.shape[0] > 0: # get bounds of predicted sezures bounds_pred = merge_close( bounds_pred, merge_margin=5) # merge seizures close together detected = match_szrs( bounds_true, bounds_pred, err_margin=10) # find matching seizures # get total numbers self.df['total'][ii] += bounds_true.shape[0] # total true self.df['detected'][ii] += detected # n of detected seizures self.df['false_positives'][ii] += bounds_pred.shape[ 0] - detected # n of false positives return True
def file_loop(main_path): # get data list ver_path = os.path.join(main_path, 'verified_predictions_pantelis') filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending true_total = 0 total_detected = 0 total_exta = 0 for i in range(0, len(filelist)): # loop through files # # get data and true labels data, y_true = get_data(main_path, filelist[i], ch_num=num_channels) print('->', filelist[i], 'loaded.') # Clean and filter data data = preprocess_data(data, clean=True, filt=False) print('-> data pre-processed.') # Get features and labels x_data, feature_labels = get_features_allch(data, param_list, cross_ch_param_list) print('-> features extracted') # Normalize data x_data = StandardScaler().fit_transform(x_data) # make predictions xbest = x_data[:, 1] * x_data[:, 9] threshold = np.mean(xbest) + 4 * np.std(xbest) y_pred = xbest > threshold # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0, 1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true # plot figures if bounds_pred.shape[0] > 0: # plt.figure() # ax = plt.axes() # ax.plot(xbest,c='k') # y = xbest # x = np.linspace(1,y.shape[0],y.shape[0]) # ix = np.where(y_true == 1) # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15) # ix = np.where(y_pred == 1) # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8) # ax.legend() # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin=10) print('Detected', detected, 'out of', bounds_true.shape[0], 'seizures') print('+', bounds_pred.shape[0] - detected, 'extra \n') true_total += bounds_true.shape[0] total_detected += detected total_exta += bounds_pred.shape[0] - detected print('Total detected', total_detected, 'out of', true_total, 'seizures') print(total_exta, 'extra seizures') print('Time elapsed = ', time.time() - tic, 'seconds.') return true_total, total_detected, total_exta
def folder_loop(folder_path, thresh_multiplier = 5): # get file list ver_path = os.path.join(folder_path, 'verified_predictions_pantelis') if os.path.exists(ver_path)== False: print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.') return False, False filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending # create feature labels feature_labels=[] for n in ch_list: feature_labels += [x.__name__ + '_'+ str(n) for x in param_list] feature_labels += [x.__name__ for x in cross_ch_param_list] feature_labels = np.array(feature_labels) # create dataframe columns = ['true_total', 'total_detected', 'total_exta'] df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64) df['Features'] = feature_labels # create seizure array szrs = np.zeros((len(filelist),3,feature_labels.shape[0])) # get total time analized time = 0 for i in tqdm(range(0, len(filelist))): # loop through experiments # get data and true labels data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True) ## UNCOMMENT LINE BELOW TO : Clean and filter data # data = preprocess_data(data, clean = True, filt = True, verbose = 0) # print('-> data pre-processed.') # Get features and labels x_data, labels = get_features_allch(data,param_list,cross_ch_param_list) # UNCOMMENT LINES BELOW TO : get refined data (multiply channels) # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)]) # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1) # Normalize data x_data = StandardScaler().fit_transform(x_data) time+=x_data.shape[0] for ii in range(len(feature_labels)): # iterate through parameteres x_data.shape[1] # get boolean index # Percentile # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier) # SD y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii])) # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)])) # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2 ## UNCOMMENT LINE BELOW: for running threshold ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120) # get number of seizures bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted bounds_true = find_szr_idx(y_true, np.array([0,1])) # true # get true number of seizures szrs[i,0,ii] = bounds_true.shape[0] # plot figures if bounds_pred.shape[0] > 0: # merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin = 5) # find matching seizures detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # get number of matching and extra seizures detected szrs[i,1,ii] = detected # number of true seizures detected szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected # get total numbers df.at[ii, 'true_total'] += szrs[i,0,ii] df.at[ii, 'total_detected'] += szrs[i,1,ii] df.at[ii, 'total_exta'] += szrs[i,2,ii] print(time*5/60, 'minutes of eeg recordings') return df, szrs
def get_feature_pred(self, file_id): """ get_feature_pred(self, file_id) Parameters ---------- file_id : Str Returns ------- data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel) bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures) """ # Define parameter list param_list = ( features.autocorr, features.line_length, features.rms, features.mad, features.var, features.std, features.psd, features.energy, features.get_envelope_max_diff, ) # single channel features cross_ch_param_list = ( features.cross_corr, features.signal_covar, features.signal_abs_covar, ) # cross channel features # Get data and true labels data = get_data(self.gen_path, file_id, ch_num=ch_list, inner_path={'data_path': 'filt_data'}, load_y=False) # Extract features and normalize x_data, labels = get_features_allch( data, param_list, cross_ch_param_list) # Get features and labels x_data = StandardScaler().fit_transform(x_data) # Normalize data # Get predictions thresh = (np.mean(x_data) + self.thresh * np.std(x_data) ) # get threshold vector y_pred_array = (x_data > thresh) # get predictions for all conditions y_pred = y_pred_array * self.weights * self.enabled # get predictions based on weights and selected features y_pred = np.sum(y_pred, axis=1) / np.sum( self.weights * self.enabled) # normalize to weights and selected features y_pred = y_pred > 0.5 # get popular vote bounds_pred = find_szr_idx(y_pred, np.array([0, 1 ])) # get predicted seizure index # If seizures are detected proceed to refine them if bounds_pred.shape[0] > 0: # Merge seizures close together bounds_pred = merge_close(bounds_pred, merge_margin=5) # Remove seizures where a feature (line length or power) is not higher than preceeding region idx = np.where( np.char.find(self.feature_names, 'line_length_0') == 0)[0][0] bounds_pred = self.refine_based_on_surround( x_data[:, idx], bounds_pred) return bounds_pred
def folder_loop(self, folder_path): """ folder_loop(self, folder_path) Parameters ---------- folder_path : Str, to child dir """ # get path ver_path = os.path.join(self.main_path, folder_path, 'verified_predictions_pantelis') if os.path.exists(ver_path) == False: print('path not found, skipping:', os.path.join(self.main_path, folder_path), '.') return False # get file list filelist = list( filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending for i in tqdm(range( 0, len(filelist))): # loop through experiments len(filelist) # get data and true labels data, y_true = get_data(os.path.join(self.main_path, folder_path), filelist[i], ch_num=ch_list, inner_path={ 'data_path': 'filt_data', 'pred_path': 'verified_predictions_pantelis' }, load_y=True) ## UNCOMMENT LINE BELOW TO : Clean and filter data # data = preprocess_data(data, clean = True, filt = True, verbose = 0) # Get features and labels x_data, labels = get_features_allch(data, param_list, cross_ch_param_list) # Normalize data x_data = StandardScaler().fit_transform(x_data) for ii in range( len(self.feature_labels) ): # iterate through parameteres x_data.shape[1] len(feature_labels) # create dataframe df = pd.DataFrame(data=np.zeros((0, len(self.columns))), columns=self.columns, dtype=np.int64) # get seizure index bounds_true = find_szr_idx(y_true, np.array([0, 1])) # true if bounds_true.shape[0] > 0: # get seizure and surround properties szrs = GetCatalogue.get_surround(x_data[:, ii], bounds_true, self.time_bins, self.szr_properties) # insert seizure start and end df['exp_id'] = [os.path.join(folder_path, filelist[i]) ] * bounds_true.shape[0] df['szr_start'] = bounds_true[:, 0] df['szr_end'] = bounds_true[:, 1] # append seizure properties df.iloc[:, 3:] = szrs # append to dataframe df.to_csv(os.path.join(self.save_folder, self.feature_labels[ii] + '.csv'), mode='a', header=False, index=False)