# import packages
import matplotlib.pyplot as plt
import numpy as np
import scipy

from DCEpy.Features.BurnsStudy.eig_centrality import eig_centrality
from DCEpy.Features.GardnerStudy.edfread import edfread

# download a data file
print('Downloading file...')
filename = '/home/chris/Documents/Rice/senior/EpilepsyVIP/data/RMPt2/DA00101U_1-1+.edf' # Chris
seizure_start,seizure_end = 262,330
fs = 1000
bad_channels = ('Events/Markers','EDF Annotations', 'EEG Mark1', 'EEG Mark2')
data,_,labels = edfread(filename, bad_channels=bad_channels)
data_len,nchannels = np.shape(data)
print('shape is ' + str(np.shape(data)))

# window size
window_size = int(5e3)
window_increment = 1250
window_num = len(range(window_size, data_len, window_increment))
eigs = np.empty( (window_num, nchannels) ) # initialize for eigenvectors
col = np.empty((window_num)) # initialize seizure labels

# find eigenvectors
i = 0
v0 = np.ones(nchannels) / np.sqrt(nchannels)
print('Getting Eigenvectors...')
for end_time in range(window_size, data_len, window_increment):
filenames = (
    '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101U_1-1+.edf',
    '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101V_1-1+.edf',
    '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101W_1-1+.edf',
    '/Users/vsp/Google Drive/MATLAB/Scattering Coeffs/DA00101P_1-1_02oct2010_09_00_38_Awake+.edf'
)
good_channels = [
    'LAH1', 'LAH2', 'LPH6', 'LPH7', 'LPH9', 'LPH10', 'LPH11', 'LPH12'
]
seizure_starts = 262, 107, 191, 0
seizure_ends = 330, 287, 405, 0

for plot_no,(filename, seizure_start, seizure_end) in \
        enumerate(zip(filenames,seizure_starts,seizure_ends)):
    data, _, labels = edfread(filename, good_channels=good_channels)
    # data = normalize(np.random.rand(3e4,5))
    #data, A = artif_VAR_data(N=5, n=2000, p=4, burn=50, A_type="tridiag")
    eig_seq, tim = ar_stability_window(data,
                                       order=15,
                                       n_eigs=8,
                                       w_len=5000,
                                       w_gap=1000)
    plt.subplot(1, len(filenames), plot_no + 1)
    plt.plot(tim / 1000, eig_seq)
    plt.xlabel('Time(s)')
    plt.ylabel('Top Eigenvalues')
    plt.vlines((seizure_start, seizure_end),
               0.9,
               1.04,
               'g',
Example #3
0
def burns(all_files, ictal_interval, inter_interval):
    """
    Input:
        - array of files for the patient
            0 position contains ictal data for band picking
            1 position contains inter ictal data for band picking
        - interval [a, b] for ictal data for band picking
        - interval [c, d] for inter ictal data for band picking
         (ensure that b-a = d-c)
    Output:
        - list of states and the centers of the clusters
        (might change this a bit)
    """

    # testing with two files of TS039
    #test_file = 'CA1353FN_1-1_small.edf'
    #[test_patient1, annotations1, labels1] = edfread.edfread(test_file)

    #inter_file = 'C:\Users\User\Documents\EpilepsySeniorDesign\Burns\CA00100D_1-1+.edf'
    #ictal_file = 'C:\Users\User\Documents\EpilepsySeniorDesign\Burns\DA00101L_1-1+.edf'

    #[inter_data, annotations1, labels1] = edfread.edfread(inter_file)
    #[ictal_data, annotations2, labels2] = edfread.edfread(ictal_file)

    #y_inter = preprocessing(inter_data)
    #y_ictal = preprocessing(ictal_data)

    # create list to hold data and sampling frequency
    all_data = []
    fs = 1000

    # load and preprocess
    for file in all_files:
        [data, annotations, labels] = edfread.edfread(file)
        all_data.append(preprocessing(data))
    print('Data is loaded and preprocessed')

    # Find band for r statistic
    y_ictal = all_data[0][ictal_interval[0]:ictal_interval[1], 0:1]
    y_inter = all_data[1][inter_interval[0]:inter_interval[1], 0:1]
    bands = np.array([[1, 4], [5, 8], [9, 13], [14, 25], [25, 90],
                      [100, 200]])  # possible bands
    band = rstat.calc_rstat(y_ictal, y_inter, fs, bands)
    print('Band selected is: ' + str(band))

    # band pass filter given band
    band_norm = [(1.0 * band[0]) / (fs / 2.0),
                 (1.0 * band[1]) / (fs / 2.0)]  # normalize the band
    filt_order = 3
    b, a = signal.butter(filt_order, band_norm, 'bandpass')  # design filter
    num_files = len(all_data)
    for j in range(num_files):
        all_data[j] = signal.filtfilt(b, a, all_data[j],
                                      axis=0)  # filter the data
    print 'Done filtering'

    # list to hold eigenvectors
    evc = []

    # for each file given
    for file_data in all_data:

        # get data shape
        n, m = file_data.shape
        print('Data has size ' + str(file_data.shape))

        # determine edges to be used
        connections = range(m)
        weightType = 'coherence'

        # go through each window and create a coherence graph
        num_windows = int(math.floor((1.0 * n) / 1000) - 3)
        for i in range(0, num_windows):

            # get window
            col1 = i * 1000
            col2 = col1 + 3000
            window = file_data[:, col1:col2]

            # build coherence graph
            G = build_network(window, connections, weightType)

            # get eigenvector centrality
            try:
                current_evc = nx.eigenvector_centrality(G, weight=weightType)
                current_evc_ar = np.empty(m)  # dictionary to array
                for i in range(m):
                    current_evc_ar[i] = current_evc[i]
                evc.append(current_evc_ar)
            except:
                num_exc += 1
                print("Eigenvector Centrality not found/did not converge")
    print('Finished computing eigenvector centrality')

    # convert into a numpy array
    evcs = np.array(evc)

    # choose k by gap statistic
    K = np.arange(20)
    n_tests = 10
    k, min_gap = gap_stat.gap_statistic(evcs, K, n_tests)
    print('Gap Statistic chose k=' + str(k))

    # cluster the eigenvectors
    [centroids, labels] = cluster.vq.kmeans2(evcs, k)
    return centroids, labels
Example #4
0
def create_model_file(data_path,
                      win_len,
                      win_overlap,
                      f_s,
                      model_file,
                      param_file,
                      num_windows=500,
                      include_awake=True,
                      include_asleep=False):

    # use pickle files
    p_file = os.path.join(data_path, 'patient_pickle.txt')
    with open(p_file, 'r') as pickle_file:
        patient_info = pickle.load(pickle_file)

    # add data file names and types
    data_filenames = patient_info['seizure_data_filenames']
    seizure_times = patient_info['seizure_times']
    file_types = ['ictal'] * len(data_filenames)

    if include_awake:
        data_filenames += patient_info['awake_inter_filenames']
        seizure_times += [None] * len(patient_info['awake_inter_filenames'])
        file_types += ['awake'] * len(patient_info['awake_inter_filenames'])

    if include_asleep:
        data_filenames += patient_info['asleep_inter_filenames']
        seizure_times += [None] * len(patient_info['asleep_inter_filenames'])
        file_types += ['sleep'] * len(patient_info['asleep_inter_filenames'])

    # attach data file names to data path
    data_filenames = [
        os.path.join(data_path, filename) for filename in data_filenames
    ]
    num_files = len(data_filenames)

    # get best channel to train on
    # TODO: (this will change in the future to include all channels)
    good_channels = patient_info['best_channel']

    # TODO: change this to read an edf file, then get energy statistic BEFORE going to next edf file
    #
    # read files and store in an array
    print 'Reading data from edf files to numpy array'
    all_data = []
    num_channels = []
    i = 1
    for seizure_file in data_filenames:
        print '\tReading ' + str(i) + ' of ' + str(num_files)
        i += 1
        X, _, _ = edfread(seizure_file, good_channels=good_channels)
        num_channels.append(X.shape[1])
        all_data.append(X)

    if len(set(num_channels)) == 1:
        num_channels = num_channels[0]
        gt1 = num_channels > 1
        print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str(
            num_channels) + ' channel' + 's' * gt1
    else:
        print 'Channels: ' + str(num_channels)
        sys.exit(
            'Error: There are different numbers of channels being used for different seizure files...'
        )

    p_feat = 3  # this is the number of energy statistics

    # pre-process data -- filter parameters
    print 'Applying a band-pass filter to the data'
    band = np.array([0.1, 100.])
    band_norm = band / (f_s / 2.)  # normalize the band
    filt_order = 3

    # band pass filter the data
    b, a = signal.butter(filt_order, band_norm, 'bandpass')  # design filter
    for j in range(num_files):
        all_data[j] = signal.filtfilt(b, a, all_data[j],
                                      axis=0)  # filter the data

    # get features from time series
    num_files = len(all_data)
    feat_vec = []
    print '\tExtracting features from input files...',
    i = 1
    for X in all_data:

        # print progress
        print str(i) + ', ',
        i += 1

        # initialize empty feature vector
        n = X.shape[0]
        n_windows = n / (
            window_length - window_overlap
        ) - 1  # evaluates to floor( n / (L - O ) - 1 since ints
        X_feat = np.zeros((n_windows, p_feat))  # empty feature vector

        k = 0
        # collect features from windows
        for j in range(window_length, n, window_length - window_overlap):
            window = X[(j - window_length):j, :]  # select window
            f = energy_features(window)  # extract energy statistics
            X_feat[k, :] = f
            k += 1

        # add the new feature vector
        feat_vec.append(X_feat)

    print ''  # new line

    # check for NaN
    for X in feat_vec:
        if np.any(np.isnan(X)):
            print '\tUh-oh, NaN encountered while extracting features'

    print '\tCollecting inter-ictal windows'
    inter_ictal = [
        feat_vec[j] for j in range(len(feat_vec)) if feat_vec[j] is not "ictal"
    ]
    X_train = collect_windows(inter_ictal, num_windows)

    # parameter tuning
    nu, gamma, C, adapt_rate, T_per = parameter_tuning(X_train, feat_vec,
                                                       seizure_times, f_s,
                                                       window_length,
                                                       window_overlap)
    print 'Obtained optimal parameters'

    # run an SVM on the training data
    clf = learn_support(X_train, nu=nu, gamma=gamma)
    num_SV = clf.support_.size

    # create model file
    print 'Writing to model file'
    f = open(model_file, 'w')
    f.write('svm_type one_class\n')  # one class SVM
    f.write('kernel_type rbf\n')  # kernel type = rbf
    f.write('gamma %.6f\n' % gamma)  # gamma
    f.write('nr_class 2\n')  # number of classes = 2
    f.write('total_sv %d\n' % num_SV)  # total num of support vectors
    f.write('rho %.6f\n' % clf.intercept_[0])  # offset
    f.write('SV\n')  # ready for support vectors!

    # write support vectors to model file
    for i in range(num_SV):
        f.write('%.6f ' % clf.dual_coef_[0, i])
        for j in range(p_feat):
            f.write(str(j + 1) + ':%.6f ' % clf.support_vectors_[i, j])
        f.write('\n')

    f.close()

    # write other parameters file
    f = open(param_file, 'w')

    # TODO: do not hardcode number of channels
    num_channels = 135
    f.write('adapt_rate: %d\n' % adapt_rate)
    f.write('channel: threshold: weight\n')
    for i in range(num_channels):
        if i == 2:
            weight = 1
        else:
            weight = 0
        f.write("%d: %.4f: %d\n" % (i, C, weight))
    f.close()

    return
def analyze_patient(data_path,
                    save_path,
                    patient_id,
                    res_f,
                    window_length=1.0,
                    window_overlap=0.5,
                    num_windows=3000,
                    f_s=1e3,
                    include_awake=True,
                    include_asleep=False):

    # reformat window length and overlap as indices
    window_length = int(window_length * f_s)
    window_overlap = int(window_overlap * f_s)

    # create save path
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    # specify data paths
    print 'Specifying file paths'
    if not os.path.isdir(data_path):
        sys.exit('Error: Specified data path does not exist')

    p_file = os.path.join(data_path, 'patient_pickle.txt')
    with open(p_file, 'r') as pickle_file:
        patient_info = pickle.load(pickle_file)

    # add data file names
    data_filenames = patient_info['seizure_data_filenames']
    seizure_times = patient_info['seizure_times']
    con_type = ['ictal'] * len(data_filenames)

    if include_awake:
        data_filenames += patient_info['awake_inter_filenames']
        seizure_times += [None] * len(patient_info['awake_inter_filenames'])
        con_type += ['awake'] * len(patient_info['awake_inter_filenames'])

    if include_asleep:
        data_filenames += patient_info['asleep_inter_filenames']
        seizure_times += [None] * len(patient_info['asleep_inter_filenames'])
        con_type += ['sleep'] * len(patient_info['asleep_inter_filenames'])

    data_filenames = [
        os.path.join(data_path, filename) for filename in data_filenames
    ]
    num_files = len(data_filenames)

    # get data in numpy array
    print 'Reading data from edf files to numpy array'
    all_data = []
    num_channels = []
    i = 1
    for seizure_file in data_filenames:
        print '\tReading ' + str(i) + ' of ' + str(num_files)
        i += 1
        X, _, _ = edfread(seizure_file)
        num_channels.append(X.shape[1])
        all_data.append(X)

    if len(set(num_channels)) == 1:
        num_channels = num_channels[0]
        gt1 = num_channels > 1
        print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str(
            num_channels) + ' channel' + 's' * gt1
    else:
        print 'Channels: ' + str(num_channels)
        sys.exit(
            'Error: There are different numbers of channels being used for different seizure files...'
        )

    # get the number of parameters (3 energy statistics per channel)
    p_feat = 3 * num_channels

    # pre-process data -- filter parameters
    print 'Applying a band-pass filter to the data'
    band = np.array([0.1, 100.])
    band_norm = band / (f_s / 2.)  # normalize the band
    filt_order = 3

    # band pass filter the data
    b, a = signal.butter(filt_order, band_norm, 'bandpass')  # design filter
    for j in range(num_files):
        all_data[j] = signal.filtfilt(b, a, all_data[j],
                                      axis=0)  # filter the data

    # run leave-one-out cross validation testing
    sensitivity, latency, FP, time = loocv_testing(all_data, con_type,
                                                   window_length,
                                                   window_overlap, num_windows,
                                                   f_s, seizure_times, p_feat,
                                                   save_path)

    # get mean statistics
    m_sense = np.nanmean(sensitivity)
    m_latency = np.nanmean(latency)
    m_fpr = np.nansum(FP) / np.nansum(time)

    # print to results file
    print >> res_f, '\nPatient ' + patient_id + '\n========================='

    # print the results -- aggregates and total
    print >> res_f, 'Mean Sensitivity: \t%.2f' % (m_sense)
    print >> res_f, 'Mean Latency: \t%.4f' % (m_latency)
    print >> res_f, 'False Positive Rate: \t%.5f (fp/Hr) \n' % m_fpr

    print >> res_f, 'Sensitivity: ' + str(sensitivity)
    print >> res_f, 'Latency: ' + str(latency)
    print >> res_f, 'False Positive Rate: ' + str(FP / time)

    return sensitivity, latency, m_fpr