def szr_cost(bounds_true, bounds_pred):
    """
    create_cost(bounds_true, bounds_pred)

    Parameters
    ----------
    bounds_true : 2d ndarray (rows = seizrs, columns = start,stop), ground truth
    bounds_pred : 2d ndarray (rows = seizrs, columns = start,stop), predicted

    Returns
    -------
    cost : Float,

    """

    # find matching seizurs
    detected = 0
    if bounds_pred.shape[0] > 0:
        detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

    if bounds_true.shape[0] > 0:
        # get detected ratio
        a = 1 - (detected / bounds_true.shape[0])

    if (a > 0 and a <= 1):
        a = 20

    # get false positives
    b = (bounds_pred.shape[0] - detected)

    # cost function
    cost = a + np.log10(b + 1)

    return cost
def user_cost(y_true, y_pred):
    """
    user_cost(y_true, y_pred)
    
    Parameters
    ----------
    y_true : 1ndarray bool, ground truth values
    y_pred : 1ndarray bool, predicted values

    Returns
    -------
    cost : float
    """

    detected = 0  # number of detected seizures

    # get bounds of sezures
    bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # total predicted
    bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # total predicted
    bounds_pred = merge_close(bounds_pred,
                              merge_margin=5)  # merge seizures close together

    if bounds_pred.shape[0] > 0:  # find matching seizures
        detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

    # calculate cost
    a = 1 - (detected / bounds_true.shape[0])  # get detected ratio
    b = (bounds_pred.shape[0] - detected)  # get false positives
    cost = a + np.log10(b + 1)  # cost function

    return cost
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """
        
        # get file list 
        ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis')
        if os.path.exists(ver_path)== False: # error check
                print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.')
                return False
        filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
        filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
     
        for i in tqdm(range(0, len(filelist))): # iterate through experiments
    
            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, 
                                    inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
            
            # Get features and labels
            x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
    
            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)
            
            # get bounds of true seizures
            bounds_true = find_szr_idx(y_true, np.array([0,1]))
            
            if bounds_true.shape[0] > 0:  # proceed if seizures are present  
            
                for ii in range(len(self.feature_labels)): # iterate through parameteres
        
                    # detect seizures bigger than threshold
                    y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii]))
                    
                    # get bounds of predicted sezures
                    bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted
                    bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together                  
                    detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures

                    # get total numbers
                    self.df.at[ii, 'total'] += bounds_true.shape[0] 
                    self.df.at[ii, 'detected'] += detected
                    self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected
        return True
Beispiel #4
0
    def append_pred(self, y_pred_array, bounds_true):
        """
        Adds metrics to self.df

        Parameters
        ----------
        y_pred_array : np array, bool (rows = time, columns = features)
        bounds_true : np.array (rows = seizures, cols= [start idx, stop idx])
        """

        for i in range(len(self.weights)):
            for ii in range(len(self.feature_set)):

                # find predicted seizures
                y_pred = y_pred_array * self.weights[i] * self.feature_set[
                    ii]  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    self.weights[i] * self.feature_set[ii]
                )  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][self.df_cntr] += bounds_true.shape[
                    0]  # total true
                self.df['detected'][
                    self.df_cntr] += detected  # n of detected seizures
                self.df['false_positives'][self.df_cntr] += bounds_pred.shape[
                    0] - detected  # n of false positives
                self.df_cntr += 1  # update counter
Beispiel #5
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            for ii in range(len(self.df)):  # iterate through df
                # detect seizures bigger than threshold
                thresh = (
                    np.mean(x_data) +
                    np.array(self.df.loc[ii][self.thresh]) * np.std(x_data)
                )  # get threshold
                y_pred_array = x_data > thresh  # get predictions

                # find predicted seizures
                w = np.array(self.df.loc[ii][self.weights])  # get weights
                e = np.array(
                    self.df.loc[ii][self.enabled])  # get enabled features
                y_pred = y_pred_array * w * e  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    w * e)  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][ii] += bounds_true.shape[0]  # total true
                self.df['detected'][ii] += detected  # n of detected seizures
                self.df['false_positives'][ii] += bounds_pred.shape[
                    0] - detected  # n of false positives

        return True
def file_loop(main_path):

    # get data list
    ver_path = os.path.join(main_path, 'verified_predictions_pantelis')
    filelist = list(
        filter(lambda k: '.csv' in k,
               os.listdir(ver_path)))  # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist]  # remove csv ending

    true_total = 0
    total_detected = 0
    total_exta = 0
    for i in range(0, len(filelist)):  # loop through files #

        # get data and true labels
        data, y_true = get_data(main_path, filelist[i], ch_num=num_channels)
        print('->', filelist[i], 'loaded.')

        # Clean and filter data
        data = preprocess_data(data, clean=True, filt=False)
        print('-> data pre-processed.')

        # Get features and labels
        x_data, feature_labels = get_features_allch(data, param_list,
                                                    cross_ch_param_list)
        print('-> features extracted')

        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)

        # make predictions
        xbest = x_data[:, 1] * x_data[:, 9]
        threshold = np.mean(xbest) + 4 * np.std(xbest)
        y_pred = xbest > threshold

        # get number of  seizures
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

        # plot figures
        if bounds_pred.shape[0] > 0:
            # plt.figure()
            # ax = plt.axes()
            # ax.plot(xbest,c='k')
            # y = xbest
            # x =  np.linspace(1,y.shape[0],y.shape[0])
            # ix = np.where(y_true == 1)
            # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15)
            # ix = np.where(y_pred == 1)
            # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8)
            # ax.legend()

            # merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # find matching seizures
            detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

            print('Detected', detected, 'out of', bounds_true.shape[0],
                  'seizures')
            print('+', bounds_pred.shape[0] - detected, 'extra \n')

            true_total += bounds_true.shape[0]
            total_detected += detected
            total_exta += bounds_pred.shape[0] - detected

    print('Total detected', total_detected, 'out of', true_total, 'seizures')
    print(total_exta, 'extra seizures')
    print('Time elapsed = ', time.time() - tic, 'seconds.')
    return true_total, total_detected, total_exta
Beispiel #7
0
def folder_loop(folder_path, thresh_multiplier = 5):
    
    # get file list 
    ver_path = os.path.join(folder_path, 'verified_predictions_pantelis')
    if os.path.exists(ver_path)== False:
            print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.')
            return False, False
    filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
    
    # create feature labels
    feature_labels=[]
    for n in ch_list:
        feature_labels += [x.__name__ + '_'+ str(n) for x in param_list]
    feature_labels += [x.__name__  for x in cross_ch_param_list]
    feature_labels = np.array(feature_labels)
    
    # create dataframe
    columns = ['true_total', 'total_detected', 'total_exta']
    df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64)
    df['Features'] = feature_labels
    
    # create seizure array
    szrs = np.zeros((len(filelist),3,feature_labels.shape[0]))
    
    # get total time analized
    time = 0    
    for i in tqdm(range(0, len(filelist))): # loop through experiments

        # get data and true labels
        data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, 
                                inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
        
        ## UNCOMMENT LINE BELOW TO : Clean and filter data
        # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)
        # print('-> data pre-processed.')
        
        # Get features and labels
        x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
        
        #  UNCOMMENT LINES BELOW TO : get refined data (multiply channels)
        # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)])
        # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1)
        
        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)
        
        time+=x_data.shape[0]
        for ii in range(len(feature_labels)): # iterate through parameteres  x_data.shape[1]

            # get boolean index
            
            # Percentile
            # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier)
            
            # SD
            y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            
            # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)]))
            
            # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2
            ## UNCOMMENT LINE BELOW: for running threshold
            ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120)
            
            # get number of seizures
            bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted
            bounds_true = find_szr_idx(y_true, np.array([0,1])) # true
            
            # get true number of seizures
            szrs[i,0,ii] = bounds_true.shape[0] 
            
            # plot figures
            if bounds_pred.shape[0] > 0:
            
                # merge seizures close together
                bounds_pred = merge_close(bounds_pred, merge_margin = 5)
            
                # find matching seizures
                detected = match_szrs(bounds_true, bounds_pred, err_margin = 10)
                
                # get number of matching and extra seizures detected
                szrs[i,1,ii] = detected # number of true seizures detected
                szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected         
                
            # get total numbers
            df.at[ii, 'true_total'] += szrs[i,0,ii]
            df.at[ii, 'total_detected'] +=  szrs[i,1,ii]
            df.at[ii, 'total_exta'] += szrs[i,2,ii]
            
    print(time*5/60, 'minutes of eeg recordings')        
    return df, szrs