Esempio n. 1
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            self.df_cntr = 0
            # restart df_cntr
            for ii in range(len(self.thresh_array)):
                # detect seizures bigger than threshold
                thresh = (np.mean(x_data) +
                          self.thresh_array[ii] * np.std(x_data)
                          )  # get threshold
                y_pred_array = x_data > thresh  # get predictions
                self.append_pred(y_pred_array,
                                 bounds_true)  # add predictions to self.df
        return True
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """
        
        # get file list 
        ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis')
        if os.path.exists(ver_path)== False: # error check
                print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.')
                return False
        filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
        filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
     
        for i in tqdm(range(0, len(filelist))): # iterate through experiments
    
            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, 
                                    inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
            
            # Get features and labels
            x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
    
            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)
            
            # get bounds of true seizures
            bounds_true = find_szr_idx(y_true, np.array([0,1]))
            
            if bounds_true.shape[0] > 0:  # proceed if seizures are present  
            
                for ii in range(len(self.feature_labels)): # iterate through parameteres
        
                    # detect seizures bigger than threshold
                    y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii]))
                    
                    # get bounds of predicted sezures
                    bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted
                    bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together                  
                    detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures

                    # get total numbers
                    self.df.at[ii, 'total'] += bounds_true.shape[0] 
                    self.df.at[ii, 'detected'] += detected
                    self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected
        return True
# Get y data
f = tables.open_file(os.path.join(main_path, 'y_data.h5') , mode = 'r') # open tables object
y_data = f.root.data[:]; f.close()
y_data = y_data.astype(np.int)

# Get x data
f = tables.open_file(os.path.join(main_path, 'x_data.h5') , mode = 'r') # open tables object
data = f.root.data[:]; f.close()
data = data[:,:,ch_num] # get only desirer channels

# Clean and filter data
data = preprocess_data(data,  clean = True, filt = True)

# Get features and labels
x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
labels = np.array(labels) # convert to np array

# Normalize data
scaler = StandardScaler()
x_data = scaler.fit_transform(x_data)

# get multiplied data and remap labels
new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)])
x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1)
labels = [x.__name__ for x in param_list]; labels += [x.__name__ for x in cross_ch_param_list]
labels = np.array(labels)

# split training and testing data
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size = test_ratio, shuffle = True)
Esempio n. 4
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            for ii in range(len(self.df)):  # iterate through df
                # detect seizures bigger than threshold
                thresh = (
                    np.mean(x_data) +
                    np.array(self.df.loc[ii][self.thresh]) * np.std(x_data)
                )  # get threshold
                y_pred_array = x_data > thresh  # get predictions

                # find predicted seizures
                w = np.array(self.df.loc[ii][self.weights])  # get weights
                e = np.array(
                    self.df.loc[ii][self.enabled])  # get enabled features
                y_pred = y_pred_array * w * e  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    w * e)  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][ii] += bounds_true.shape[0]  # total true
                self.df['detected'][ii] += detected  # n of detected seizures
                self.df['false_positives'][ii] += bounds_pred.shape[
                    0] - detected  # n of false positives

        return True
def file_loop(main_path):

    # get data list
    ver_path = os.path.join(main_path, 'verified_predictions_pantelis')
    filelist = list(
        filter(lambda k: '.csv' in k,
               os.listdir(ver_path)))  # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist]  # remove csv ending

    true_total = 0
    total_detected = 0
    total_exta = 0
    for i in range(0, len(filelist)):  # loop through files #

        # get data and true labels
        data, y_true = get_data(main_path, filelist[i], ch_num=num_channels)
        print('->', filelist[i], 'loaded.')

        # Clean and filter data
        data = preprocess_data(data, clean=True, filt=False)
        print('-> data pre-processed.')

        # Get features and labels
        x_data, feature_labels = get_features_allch(data, param_list,
                                                    cross_ch_param_list)
        print('-> features extracted')

        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)

        # make predictions
        xbest = x_data[:, 1] * x_data[:, 9]
        threshold = np.mean(xbest) + 4 * np.std(xbest)
        y_pred = xbest > threshold

        # get number of  seizures
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

        # plot figures
        if bounds_pred.shape[0] > 0:
            # plt.figure()
            # ax = plt.axes()
            # ax.plot(xbest,c='k')
            # y = xbest
            # x =  np.linspace(1,y.shape[0],y.shape[0])
            # ix = np.where(y_true == 1)
            # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15)
            # ix = np.where(y_pred == 1)
            # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8)
            # ax.legend()

            # merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # find matching seizures
            detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

            print('Detected', detected, 'out of', bounds_true.shape[0],
                  'seizures')
            print('+', bounds_pred.shape[0] - detected, 'extra \n')

            true_total += bounds_true.shape[0]
            total_detected += detected
            total_exta += bounds_pred.shape[0] - detected

    print('Total detected', total_detected, 'out of', true_total, 'seizures')
    print(total_exta, 'extra seizures')
    print('Time elapsed = ', time.time() - tic, 'seconds.')
    return true_total, total_detected, total_exta
Esempio n. 6
0
def folder_loop(folder_path, thresh_multiplier = 5):
    
    # get file list 
    ver_path = os.path.join(folder_path, 'verified_predictions_pantelis')
    if os.path.exists(ver_path)== False:
            print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.')
            return False, False
    filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
    
    # create feature labels
    feature_labels=[]
    for n in ch_list:
        feature_labels += [x.__name__ + '_'+ str(n) for x in param_list]
    feature_labels += [x.__name__  for x in cross_ch_param_list]
    feature_labels = np.array(feature_labels)
    
    # create dataframe
    columns = ['true_total', 'total_detected', 'total_exta']
    df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64)
    df['Features'] = feature_labels
    
    # create seizure array
    szrs = np.zeros((len(filelist),3,feature_labels.shape[0]))
    
    # get total time analized
    time = 0    
    for i in tqdm(range(0, len(filelist))): # loop through experiments

        # get data and true labels
        data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, 
                                inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
        
        ## UNCOMMENT LINE BELOW TO : Clean and filter data
        # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)
        # print('-> data pre-processed.')
        
        # Get features and labels
        x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
        
        #  UNCOMMENT LINES BELOW TO : get refined data (multiply channels)
        # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)])
        # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1)
        
        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)
        
        time+=x_data.shape[0]
        for ii in range(len(feature_labels)): # iterate through parameteres  x_data.shape[1]

            # get boolean index
            
            # Percentile
            # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier)
            
            # SD
            y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            
            # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)]))
            
            # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2
            ## UNCOMMENT LINE BELOW: for running threshold
            ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120)
            
            # get number of seizures
            bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted
            bounds_true = find_szr_idx(y_true, np.array([0,1])) # true
            
            # get true number of seizures
            szrs[i,0,ii] = bounds_true.shape[0] 
            
            # plot figures
            if bounds_pred.shape[0] > 0:
            
                # merge seizures close together
                bounds_pred = merge_close(bounds_pred, merge_margin = 5)
            
                # find matching seizures
                detected = match_szrs(bounds_true, bounds_pred, err_margin = 10)
                
                # get number of matching and extra seizures detected
                szrs[i,1,ii] = detected # number of true seizures detected
                szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected         
                
            # get total numbers
            df.at[ii, 'true_total'] += szrs[i,0,ii]
            df.at[ii, 'total_detected'] +=  szrs[i,1,ii]
            df.at[ii, 'total_exta'] += szrs[i,2,ii]
            
    print(time*5/60, 'minutes of eeg recordings')        
    return df, szrs
    def get_feature_pred(self, file_id):
        """
        get_feature_pred(self, file_id)

        Parameters
        ----------
        file_id : Str

        Returns
        -------
        data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel)
        bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures)

        """

        # Define parameter list
        param_list = (
            features.autocorr,
            features.line_length,
            features.rms,
            features.mad,
            features.var,
            features.std,
            features.psd,
            features.energy,
            features.get_envelope_max_diff,
        )  # single channel features
        cross_ch_param_list = (
            features.cross_corr,
            features.signal_covar,
            features.signal_abs_covar,
        )  # cross channel features

        # Get data and true labels
        data = get_data(self.gen_path,
                        file_id,
                        ch_num=ch_list,
                        inner_path={'data_path': 'filt_data'},
                        load_y=False)

        # Extract features and normalize
        x_data, labels = get_features_allch(
            data, param_list, cross_ch_param_list)  # Get features and labels
        x_data = StandardScaler().fit_transform(x_data)  # Normalize data

        # Get predictions
        thresh = (np.mean(x_data) + self.thresh * np.std(x_data)
                  )  # get threshold vector
        y_pred_array = (x_data > thresh)  # get predictions for all conditions
        y_pred = y_pred_array * self.weights * self.enabled  # get predictions based on weights and selected features
        y_pred = np.sum(y_pred, axis=1) / np.sum(
            self.weights *
            self.enabled)  # normalize to weights and selected features
        y_pred = y_pred > 0.5  # get popular vote
        bounds_pred = find_szr_idx(y_pred,
                                   np.array([0, 1
                                             ]))  # get predicted seizure index

        # If seizures are detected proceed to refine them
        if bounds_pred.shape[0] > 0:

            # Merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # Remove seizures where a feature (line length or power) is not higher than preceeding region
            idx = np.where(
                np.char.find(self.feature_names, 'line_length_0') == 0)[0][0]
            bounds_pred = self.refine_based_on_surround(
                x_data[:, idx], bounds_pred)

        return bounds_pred
    def folder_loop(self, folder_path):
        """
        folder_loop(self, folder_path)
        
        Parameters
        ----------
        folder_path : Str, to child dir
    
        """

        # get path
        ver_path = os.path.join(self.main_path, folder_path,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_path), '.')
            return False

        # get file list
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(
                0, len(filelist))):  # loop through experiments   len(filelist)

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_path),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            ## UNCOMMENT LINE BELOW TO : Clean and filter data
            # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)

            # Get features and labels
            x_data, labels = get_features_allch(data, param_list,
                                                cross_ch_param_list)

            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)

            for ii in range(
                    len(self.feature_labels)
            ):  # iterate through parameteres  x_data.shape[1] len(feature_labels)

                # create dataframe
                df = pd.DataFrame(data=np.zeros((0, len(self.columns))),
                                  columns=self.columns,
                                  dtype=np.int64)

                # get seizure index
                bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

                if bounds_true.shape[0] > 0:
                    # get seizure and surround properties
                    szrs = GetCatalogue.get_surround(x_data[:,
                                                            ii], bounds_true,
                                                     self.time_bins,
                                                     self.szr_properties)

                    # insert seizure start and end
                    df['exp_id'] = [os.path.join(folder_path, filelist[i])
                                    ] * bounds_true.shape[0]
                    df['szr_start'] = bounds_true[:, 0]
                    df['szr_end'] = bounds_true[:, 1]

                    # append seizure properties
                    df.iloc[:, 3:] = szrs

                    # append to dataframe
                    df.to_csv(os.path.join(self.save_folder,
                                           self.feature_labels[ii] + '.csv'),
                              mode='a',
                              header=False,
                              index=False)
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)
            if np.sum(y_true) > 3:
                # Get features and labels
                x_data, labels = get_features_allch(data, param_list,
                                                    cross_ch_param_list)
                x_data = StandardScaler().fit_transform(
                    x_data)  # Normalize data

                for ii in range(len(self.metrics)):
                    # get metric for features
                    metric = self.metrics[ii](x_data, y_true)

                    # create dateframe with metric
                    df = pd.DataFrame(data=np.array(metric).reshape(1, -1),
                                      columns=self.feature_labels)
                    df.insert(loc=0, column='exp_id', value=filelist[i])

                    # save to csv
                    df.to_csv(os.path.join(self.save_folder,
                                           self.metrics[ii].__name__ + '.csv'),
                              mode='a',
                              header=False,
                              index=False)
        return True