def user_cost(y_true, y_pred):
    """
    user_cost(y_true, y_pred)
    
    Parameters
    ----------
    y_true : 1ndarray bool, ground truth values
    y_pred : 1ndarray bool, predicted values

    Returns
    -------
    cost : float
    """

    detected = 0  # number of detected seizures

    # get bounds of sezures
    bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # total predicted
    bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # total predicted
    bounds_pred = merge_close(bounds_pred,
                              merge_margin=5)  # merge seizures close together

    if bounds_pred.shape[0] > 0:  # find matching seizures
        detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

    # calculate cost
    a = 1 - (detected / bounds_true.shape[0])  # get detected ratio
    b = (bounds_pred.shape[0] - detected)  # get false positives
    cost = a + np.log10(b + 1)  # cost function

    return cost
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """
        
        # get file list 
        ver_path = os.path.join(self.main_path, folder_name,'verified_predictions_pantelis')
        if os.path.exists(ver_path)== False: # error check
                print('path not found, skipping:', os.path.join(self.main_path, folder_name) ,'.')
                return False
        filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
        filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
     
        for i in tqdm(range(0, len(filelist))): # iterate through experiments
    
            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),filelist[i], ch_num = ch_list, 
                                    inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
            
            # Get features and labels
            x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
    
            # Normalize data
            x_data = StandardScaler().fit_transform(x_data)
            
            # get bounds of true seizures
            bounds_true = find_szr_idx(y_true, np.array([0,1]))
            
            if bounds_true.shape[0] > 0:  # proceed if seizures are present  
            
                for ii in range(len(self.feature_labels)): # iterate through parameteres
        
                    # detect seizures bigger than threshold
                    y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + self.threshold*np.std(x_data[:,ii]))
                    
                    # get bounds of predicted sezures
                    bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # total predicted
                    bounds_pred = merge_close(bounds_pred, merge_margin = 5) # merge seizures close together                  
                    detected = match_szrs(bounds_true, bounds_pred, err_margin = 10) # find matching seizures

                    # get total numbers
                    self.df.at[ii, 'total'] += bounds_true.shape[0] 
                    self.df.at[ii, 'detected'] += detected
                    self.df.at[ii, 'false_positives'] += bounds_pred.shape[0] - detected
        return True
def find_threshold(x_data, y_true):

    # thresh = 1;
    ftr = 8

    x = x_data[:, ftr]

    # fig = plt.figure()
    # ax = fig.add_subplot(111)
    # t = np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x))
    # line1 = ax.plot(x)
    # line2 = ax.plot(t)

    n_loop = 100
    cost_array = np.zeros(n_loop)
    thresh_array = np.zeros(n_loop)
    thresh_array = np.linspace(1, 20, n_loop)
    for i in range(n_loop):

        # thresh_array[i] = thresh
        y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 2]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = create_cost(bounds_true, bounds_pred)  # get cost

        # cost = log_loss(y_true, y_pred ,labels =[True,False])

        cost_array[i] = cost

        # if cost == 0:
        #     print('cost has reached zero, stopping')
        #     return cost_array,thresh_array
        # thresh += cost # update cost
        # ax.plot(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)))
        # line2[0].set_ydata(np.ones(x.shape[0]) * (np.mean(x) + thresh*np.std(x)))
        # fig.canvas.draw()

    plt.figure()
    plt.plot(thresh_array, cost_array)
    plt.ylabel('cost')
    plt.xlabel('thresh')
    print('seizures = ', bounds_true.shape[0])
    return cost_array, thresh_array
Esempio n. 4
0
    def append_pred(self, y_pred_array, bounds_true):
        """
        Adds metrics to self.df

        Parameters
        ----------
        y_pred_array : np array, bool (rows = time, columns = features)
        bounds_true : np.array (rows = seizures, cols= [start idx, stop idx])
        """

        for i in range(len(self.weights)):
            for ii in range(len(self.feature_set)):

                # find predicted seizures
                y_pred = y_pred_array * self.weights[i] * self.feature_set[
                    ii]  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    self.weights[i] * self.feature_set[ii]
                )  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][self.df_cntr] += bounds_true.shape[
                    0]  # total true
                self.df['detected'][
                    self.df_cntr] += detected  # n of detected seizures
                self.df['false_positives'][self.df_cntr] += bounds_pred.shape[
                    0] - detected  # n of false positives
                self.df_cntr += 1  # update counter
def find_threshold_all(x_data, y_true):

    thresh = 1
    ftr = 1

    x = x_data[:, ftr]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    t = np.ones(x.shape[0]) * (np.mean(x) + thresh * np.std(x))
    line1 = ax.plot(x)
    line2 = ax.plot(t)

    n_loop = 100
    cost_array = np.zeros(n_loop)
    thresh_array = np.zeros(n_loop)
    # thresh_array = np.linspace(10, 0, n_loop)
    for i in range(n_loop):

        thresh_array[i] = thresh
        y_pred = x > (np.mean(x) + thresh_array[i] * np.std(x))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = create_cost(bounds_true, bounds_pred)  # get cost

        # cost = log_loss(y_true, y_pred ,labels =[True,False])

        cost_array[i] = cost

        if cost == 0:
            print('cost has reached zero, stopping')
            return cost_array, thresh_array

    return cost_array, thresh_array
def get_min_cost(feature, y_true):
    """
    get_min_cost(feature, y_true)

    Parameters
    ----------
    feature : 1D ndarray, extracted feature
    y_true : 1D ndarray, bool grund truth labels
    Returns
    -------
    TYPE: Float, threshold value that gves minimum cost

    """

    n_loop = 100  # loop number and separation
    thresh_array = np.linspace(1, 20, n_loop)  # thresholds to test
    cost_array = np.zeros(n_loop)

    for i in range(n_loop):

        # thresh_array[i] = thresh
        y_pred = feature > (np.mean(feature) +
                            thresh_array[i] * np.std(feature))

        # get number of seizures
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted

        # merge seizures close together
        if bounds_pred.shape[0] > 1:
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

        cost = szr_cost(bounds_true, bounds_pred)  # get cost

        # pass to array
        cost_array[i] = cost

    return thresh_array[np.argmin(cost_array)]
Esempio n. 7
0
    def folder_loop(self, folder_name):
        """
        folder_loop(self, folder_name)

        Parameters
        ----------
        folder_name : Str, folder name

        Returns
        -------
        bool
        """

        # get file list
        ver_path = os.path.join(self.main_path, folder_name,
                                'verified_predictions_pantelis')
        if os.path.exists(ver_path) == False:  # error check
            print('path not found, skipping:',
                  os.path.join(self.main_path, folder_name), '.')
            return False
        filelist = list(
            filter(lambda k: '.csv' in k,
                   os.listdir(ver_path)))  # get only files with predictions
        filelist = [os.path.splitext(x)[0]
                    for x in filelist]  # remove csv ending

        for i in tqdm(range(0, len(filelist))):  # iterate through experiments

            # get data and true labels
            data, y_true = get_data(os.path.join(self.main_path, folder_name),
                                    filelist[i],
                                    ch_num=ch_list,
                                    inner_path={
                                        'data_path':
                                        'filt_data',
                                        'pred_path':
                                        'verified_predictions_pantelis'
                                    },
                                    load_y=True)

            x_data, labels = get_features_allch(
                data, param_list,
                cross_ch_param_list)  # Get features and labels
            x_data = StandardScaler().fit_transform(x_data)  # Normalize data
            bounds_true = find_szr_idx(y_true, np.array(
                [0, 1]))  # get bounds of true seizures

            for ii in range(len(self.df)):  # iterate through df
                # detect seizures bigger than threshold
                thresh = (
                    np.mean(x_data) +
                    np.array(self.df.loc[ii][self.thresh]) * np.std(x_data)
                )  # get threshold
                y_pred_array = x_data > thresh  # get predictions

                # find predicted seizures
                w = np.array(self.df.loc[ii][self.weights])  # get weights
                e = np.array(
                    self.df.loc[ii][self.enabled])  # get enabled features
                y_pred = y_pred_array * w * e  # get predictions based on weights and selected features
                y_pred = np.sum(y_pred, axis=1) / np.sum(
                    w * e)  # normalize to weights and selected features
                y_pred = y_pred > 0.5  # get popular vote
                bounds_pred = find_szr_idx(y_pred, np.array(
                    [0, 1]))  # get predicted seizure index

                detected = 0  # set default detected to 0
                if bounds_pred.shape[0] > 0:
                    # get bounds of predicted sezures
                    bounds_pred = merge_close(
                        bounds_pred,
                        merge_margin=5)  # merge seizures close together
                    detected = match_szrs(
                        bounds_true, bounds_pred,
                        err_margin=10)  # find matching seizures

                # get total numbers
                self.df['total'][ii] += bounds_true.shape[0]  # total true
                self.df['detected'][ii] += detected  # n of detected seizures
                self.df['false_positives'][ii] += bounds_pred.shape[
                    0] - detected  # n of false positives

        return True
def file_loop(main_path):

    # get data list
    ver_path = os.path.join(main_path, 'verified_predictions_pantelis')
    filelist = list(
        filter(lambda k: '.csv' in k,
               os.listdir(ver_path)))  # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist]  # remove csv ending

    true_total = 0
    total_detected = 0
    total_exta = 0
    for i in range(0, len(filelist)):  # loop through files #

        # get data and true labels
        data, y_true = get_data(main_path, filelist[i], ch_num=num_channels)
        print('->', filelist[i], 'loaded.')

        # Clean and filter data
        data = preprocess_data(data, clean=True, filt=False)
        print('-> data pre-processed.')

        # Get features and labels
        x_data, feature_labels = get_features_allch(data, param_list,
                                                    cross_ch_param_list)
        print('-> features extracted')

        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)

        # make predictions
        xbest = x_data[:, 1] * x_data[:, 9]
        threshold = np.mean(xbest) + 4 * np.std(xbest)
        y_pred = xbest > threshold

        # get number of  seizures
        bounds_pred = find_szr_idx(y_pred, np.array([0, 1]))  # predicted
        bounds_true = find_szr_idx(y_true, np.array([0, 1]))  # true

        # plot figures
        if bounds_pred.shape[0] > 0:
            # plt.figure()
            # ax = plt.axes()
            # ax.plot(xbest,c='k')
            # y = xbest
            # x =  np.linspace(1,y.shape[0],y.shape[0])
            # ix = np.where(y_true == 1)
            # ax.scatter(x[ix], y[ix], c = 'blue', label = 'true', s = 15)
            # ix = np.where(y_pred == 1)
            # ax.scatter(x[ix], y[ix], c = 'orange', label = 'predicted', s = 8)
            # ax.legend()

            # merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # find matching seizures
            detected = match_szrs(bounds_true, bounds_pred, err_margin=10)

            print('Detected', detected, 'out of', bounds_true.shape[0],
                  'seizures')
            print('+', bounds_pred.shape[0] - detected, 'extra \n')

            true_total += bounds_true.shape[0]
            total_detected += detected
            total_exta += bounds_pred.shape[0] - detected

    print('Total detected', total_detected, 'out of', true_total, 'seizures')
    print(total_exta, 'extra seizures')
    print('Time elapsed = ', time.time() - tic, 'seconds.')
    return true_total, total_detected, total_exta
Esempio n. 9
0
def folder_loop(folder_path, thresh_multiplier = 5):
    
    # get file list 
    ver_path = os.path.join(folder_path, 'verified_predictions_pantelis')
    if os.path.exists(ver_path)== False:
            print('path not found, skipping:', os.path.join(main_path, folder_path) ,'.')
            return False, False
    filelist = list(filter(lambda k: '.csv' in k, os.listdir(ver_path))) # get only files with predictions
    filelist = [os.path.splitext(x)[0] for x in filelist] # remove csv ending
    
    # create feature labels
    feature_labels=[]
    for n in ch_list:
        feature_labels += [x.__name__ + '_'+ str(n) for x in param_list]
    feature_labels += [x.__name__  for x in cross_ch_param_list]
    feature_labels = np.array(feature_labels)
    
    # create dataframe
    columns = ['true_total', 'total_detected', 'total_exta']
    df = pd.DataFrame(data= np.zeros((len(feature_labels),len(columns))), columns = columns, dtype=np.int64)
    df['Features'] = feature_labels
    
    # create seizure array
    szrs = np.zeros((len(filelist),3,feature_labels.shape[0]))
    
    # get total time analized
    time = 0    
    for i in tqdm(range(0, len(filelist))): # loop through experiments

        # get data and true labels
        data, y_true = get_data(folder_path,filelist[i], ch_num = ch_list, 
                                inner_path={'data_path':'filt_data', 'pred_path':'verified_predictions_pantelis'} , load_y = True)
        
        ## UNCOMMENT LINE BELOW TO : Clean and filter data
        # data = preprocess_data(data,  clean = True, filt = True, verbose = 0)
        # print('-> data pre-processed.')
        
        # Get features and labels
        x_data, labels = get_features_allch(data,param_list,cross_ch_param_list)
        
        #  UNCOMMENT LINES BELOW TO : get refined data (multiply channels)
        # new_data = np.multiply(x_data[:,0:len(param_list)],x_data[:,len(param_list):x_data.shape[1]-len(cross_ch_param_list)])
        # x_data = np.concatenate((new_data, x_data[:,x_data.shape[1]-1:]), axis=1)
        
        # Normalize data
        x_data = StandardScaler().fit_transform(x_data)
        
        time+=x_data.shape[0]
        for ii in range(len(feature_labels)): # iterate through parameteres  x_data.shape[1]

            # get boolean index
            
            # Percentile
            # y_pred = x_data[:,ii]> np.percentile(x_data[:,ii], thresh_multiplier)
            
            # SD
            y_pred = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            
            # y_pred1 = x_data[:,ii]> (np.mean(x_data[:,ii]) + thresh_multiplier*np.std(x_data[:,ii]))
            # y_pred2 = x_data[:,ii+len(feature_labels)]> (np.mean(x_data[:,ii+len(feature_labels)]) + thresh_multiplier*np.std(x_data[:,ii+len(feature_labels)]))
            
            # y_pred = (y_pred1.astype(int) + y_pred2.astype(int)) == 2
            ## UNCOMMENT LINE BELOW: for running threshold
            ## y_pred = running_std_detection(x_data[:,ii] , 5, int(60/5)*120)
            
            # get number of seizures
            bounds_pred = find_szr_idx(y_pred, np.array([0,1])) # predicted
            bounds_true = find_szr_idx(y_true, np.array([0,1])) # true
            
            # get true number of seizures
            szrs[i,0,ii] = bounds_true.shape[0] 
            
            # plot figures
            if bounds_pred.shape[0] > 0:
            
                # merge seizures close together
                bounds_pred = merge_close(bounds_pred, merge_margin = 5)
            
                # find matching seizures
                detected = match_szrs(bounds_true, bounds_pred, err_margin = 10)
                
                # get number of matching and extra seizures detected
                szrs[i,1,ii] = detected # number of true seizures detected
                szrs[i,2,ii] = bounds_pred.shape[0] - detected # number of extra seizures detected         
                
            # get total numbers
            df.at[ii, 'true_total'] += szrs[i,0,ii]
            df.at[ii, 'total_detected'] +=  szrs[i,1,ii]
            df.at[ii, 'total_exta'] += szrs[i,2,ii]
            
    print(time*5/60, 'minutes of eeg recordings')        
    return df, szrs
    def get_feature_pred(self, file_id):
        """
        get_feature_pred(self, file_id)

        Parameters
        ----------
        file_id : Str

        Returns
        -------
        data : 3d Numpy Array (1D = segments, 2D = time, 3D = channel)
        bounds_pred : 2D Numpy Array (rows = seizures, cols = start and end points of detected seizures)

        """

        # Define parameter list
        param_list = (
            features.autocorr,
            features.line_length,
            features.rms,
            features.mad,
            features.var,
            features.std,
            features.psd,
            features.energy,
            features.get_envelope_max_diff,
        )  # single channel features
        cross_ch_param_list = (
            features.cross_corr,
            features.signal_covar,
            features.signal_abs_covar,
        )  # cross channel features

        # Get data and true labels
        data = get_data(self.gen_path,
                        file_id,
                        ch_num=ch_list,
                        inner_path={'data_path': 'filt_data'},
                        load_y=False)

        # Extract features and normalize
        x_data, labels = get_features_allch(
            data, param_list, cross_ch_param_list)  # Get features and labels
        x_data = StandardScaler().fit_transform(x_data)  # Normalize data

        # Get predictions
        thresh = (np.mean(x_data) + self.thresh * np.std(x_data)
                  )  # get threshold vector
        y_pred_array = (x_data > thresh)  # get predictions for all conditions
        y_pred = y_pred_array * self.weights * self.enabled  # get predictions based on weights and selected features
        y_pred = np.sum(y_pred, axis=1) / np.sum(
            self.weights *
            self.enabled)  # normalize to weights and selected features
        y_pred = y_pred > 0.5  # get popular vote
        bounds_pred = find_szr_idx(y_pred,
                                   np.array([0, 1
                                             ]))  # get predicted seizure index

        # If seizures are detected proceed to refine them
        if bounds_pred.shape[0] > 0:

            # Merge seizures close together
            bounds_pred = merge_close(bounds_pred, merge_margin=5)

            # Remove seizures where a feature (line length or power) is not higher than preceeding region
            idx = np.where(
                np.char.find(self.feature_names, 'line_length_0') == 0)[0][0]
            bounds_pred = self.refine_based_on_surround(
                x_data[:, idx], bounds_pred)

        return bounds_pred