def chunk_file_iterator(chunk_samples, fs, seiz_file):

            n = 0
            try:
                while True:
                    data, _, labels = edfread(
                        seiz_file,
                        rec_times=(n * chunk_samples / float(fs),
                                   (n + 1) * chunk_samples / float(fs)))
                    #data_is_full = np.size(data,0)>=chunk_samples
                    start_time = n * chunk_samples / float(fs)
                    yield data, start_time, n, labels
                    n += 1
            except ValueError:
                pass
Beispiel #2
0
        def chunk_file_iterator(chunk_samples,fs,seiz_file, good_channels = None):

            """
            iterates over chunks of data. Useful for long (longer than 5 minutes) edf files, saves memory.
            """

            n = 0
            try:
                while True:
                    data,_,labels = edfread(seiz_file,
                                            rec_times=(n*chunk_samples/float(fs),(n+1)*chunk_samples/float(fs)),
                                            good_channels=good_channels)
                    #data_is_full = np.size(data,0)>=chunk_samples
                    start_time = n*chunk_samples/float(fs)
                    yield data, start_time,n, labels
                    n+=1
            except ValueError:
                pass
def parallel_coherence(file_path, win_len, win_overlap, fs, nprocs, save_path):

    # these are hard-coded parameters to
    # chunk up data into 10 min chunks
    min_per_chunk = 10
    sec_per_min = 60

    i = 0
    while True:

        # get chunk start and end times
        start = i * sec_per_min * min_per_chunk
        end = (i+1) * sec_per_min * min_per_chunk + float(win_overlap) / fs

        # get the chunk
        try:

            # extract the chunk
            print 'Extracting the ' + str(i) + ' chunk'
            X_chunk, _, labels = edfread(file_path, rec_times = [start, end])

            print 'Printing the labels:'
            for i in range(len(labels)):
                print '\tChannel ' + str(i) + ' is ' + labels[i]

            # compute coherence for this chunk and save json file
            this_save_path = save_path + ".json"
            mp_coherence(X_chunk, win_len, win_overlap, fs, nprocs, this_save_path)

            # if less than an entire chunk was read, then this is the last one!
            if X_chunk.shape[0] < sec_per_min * min_per_chunk:
                break

        except ValueError:

            break # the start was past the end!

    return
Beispiel #4
0
def analyze_patient(data_path, save_path, patient_id, res_f, window_length=1.0, window_overlap=0.5, num_windows=3000, f_s=1e3, include_awake=True, include_asleep=False):

    # reformat window length and overlap as indices
    window_length = int(window_length * f_s)
    window_overlap = int(window_overlap * f_s)

    # create save path
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    # specify data paths
    print 'Specifying file paths'
    if not os.path.isdir(data_path):
        sys.exit('Error: Specified data path does not exist')

    p_file = os.path.join(data_path, 'patient_pickle.txt')
    with open(p_file,'r') as pickle_file:
        patient_info = pickle.load(pickle_file)

    # add data file names
    data_filenames = patient_info['seizure_data_filenames']
    seizure_times = patient_info['seizure_times']
    con_type = ['ictal'] * len(data_filenames)

    if include_awake:
        data_filenames += patient_info['awake_inter_filenames']
        seizure_times += [None] * len(patient_info['awake_inter_filenames'])
        con_type += ['awake'] * len(patient_info['awake_inter_filenames'])

    if include_asleep:
        data_filenames += patient_info['asleep_inter_filenames']
        seizure_times += [None] * len(patient_info['asleep_inter_filenames'])
        con_type += ['sleep'] * len(patient_info['asleep_inter_filenames'])

    data_filenames = [os.path.join(data_path,filename) for filename in data_filenames]
    num_files = len(data_filenames)

    # get data in numpy array
    print 'Reading data from edf files to numpy array'
    all_data = []
    num_channels = []
    i = 1
    for seizure_file in data_filenames:
        print '\tReading ' + str(i) + ' of ' + str(num_files)
        i += 1
        X,_,_ = edfread(seizure_file)
        num_channels.append(X.shape[1])
        all_data.append(X)

    if len(set(num_channels)) == 1:
        num_channels = num_channels[0]
        gt1 = num_channels > 1
        print 'There ' + 'is '*(not gt1) + 'are '*gt1 + str(num_channels) + ' channel' + 's'*gt1
    else:
        print 'Channels: ' + str(num_channels)
        sys.exit('Error: There are different numbers of channels being used for different seizure files...')

    # get the number of parameters (3 energy statistics per channel)
    p_feat = 3 * num_channels

    # pre-process data -- filter parameters
    print 'Applying a band-pass filter to the data'
    band = np.array([0.1,100.])
    band_norm = band / (f_s / 2.) # normalize the band
    filt_order = 3

    # band pass filter the data
    b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter
    for j in range(num_files):
        all_data[j] = signal.filtfilt(b,a,all_data[j],axis=0) # filter the data

    # run leave-one-out cross validation testing
    sensitivity, latency, FP, time = loocv_testing(all_data, con_type, window_length, window_overlap, num_windows, f_s, seizure_times, p_feat, save_path)

    # get mean statistics
    m_sense = np.nanmean(sensitivity)
    m_latency = np.nanmean(latency)
    m_fpr = np.nansum(FP) / np.nansum(time)

    # print to results file
    print >> res_f, '\nPatient ' + patient_id + '\n========================='

    # print the results -- aggregates and total
    print >> res_f, 'Mean Sensitivity: \t%.2f' %(m_sense)
    print >> res_f, 'Mean Latency: \t%.4f' %(m_latency)
    print >> res_f, 'False Positive Rate: \t%.5f (fp/Hr) \n' % m_fpr

    print >> res_f, 'Sensitivity: ' + str(sensitivity)
    print >> res_f, 'Latency: ' + str(latency)
    print >> res_f, 'False Positive Rate: ' + str(FP / time)

    return sensitivity, latency, m_fpr
        while True:
            # get chunk start and end times
            start = desired_chunk_index * sec_per_min * min_per_chunk
            end = (desired_chunk_index + 1) * sec_per_min * min_per_chunk
            print '\tStart time in seconds:', start
            print '\tEnd time in seconds:', end

            try:

                # extract the chunk
                print '\t\t\tChunk ' + str(ind) + ' reading...\n',
                # Gotta get the data_filename right!
                dimensions_to_keep = choose_best_channels(
                    patient_id, seizure=0, filename=data_filename_path)
                X_chunk, _, labels = edfread(data_filename_path,
                                             rec_times=[start, end],
                                             good_channels=dimensions_to_keep)
                # Added: if the readed chunk is too short, break.
                if X_chunk.shape[0] < 300 * f_s:
                    print "Chunk is too short! Pick different chunk to analyze!"
                    break

                # update file information
                all_files.append(X_chunk)
                tmp_data_filenames = data_filename_path
                tmp_file_type = file_type
                tmp_seizure_times = seizure_times
                print '\t\t\tInterictal chunk %d reading complete!' % (ind)
                break

            except ValueError:
Beispiel #6
0
def analyze_patient_raw(data_path,
                        f_s=1e3,
                        include_awake=True,
                        include_asleep=False,
                        long_interictal=False):

    # minutes per chunk (only for long interictal files)
    min_per_chunk = 15
    sec_per_min = 60

    # specify data paths
    if not os.path.isdir(data_path):
        sys.exit('Error: Specified data path does not exist')

    p_file = os.path.join(data_path, 'patient_pickle.txt')

    with open(p_file, 'r') as pickle_file:
        print("Open Pickle: {}".format(p_file) + "...\n")
        patient_info = pickle.load(pickle_file)

    # # add data file names
    data_filenames = patient_info['seizure_data_filenames']
    seizure_times = patient_info['seizure_times']
    file_type = ['ictal'] * len(data_filenames)
    seizure_print = [True] * len(data_filenames)  # mark whether is seizure

    if include_awake:
        data_filenames += patient_info['awake_inter_filenames']
        seizure_times += [None] * len(patient_info['awake_inter_filenames'])
        file_type += ['awake'] * len(patient_info['awake_inter_filenames'])
        seizure_print += [False] * len(patient_info['awake_inter_filenames'])

    if include_asleep:
        data_filenames += patient_info['asleep_inter_filenames']
        seizure_times += [None] * len(patient_info['asleep_inter_filenames'])
        file_type += ['sleep'] * len(patient_info['asleep_inter_filenames'])
        seizure_print += [False] * len(patient_info['asleep_inter_filenames'])

    data_filenames = [
        os.path.join(data_path, filename) for filename in data_filenames
    ]
    good_channels = patient_info['good_channels']

    # band pass filter parameters
    # band = np.array([0.1, 100.])
    # band_norm = band / (f_s / 2.)  # normalize the band
    # filt_order = 3
    # b, a = signal.butter(filt_order, band_norm, 'bandpass')  # design filter

    # get data in numpy array
    num_channels = []
    all_files = []
    all_files_unfiltered = []
    tmp_data_filenames = []
    tmp_file_type = []
    tmp_seizure_times = []
    tmp_seizure_print = []

    print 'Getting Data...'
    for i, seizure_file in enumerate(data_filenames):

        # this is for when we have inter-ictal files that are an hour long that has split it up into parts
        if long_interictal and not (file_type[i] is 'ictal'):

            print '\tThis code has not been written'

        else:

            print '\tSeizure file %d reading...' % (i + 1),

            # read data in
            X, _, labels = edfread(seizure_file)

            all_files_unfiltered.append(X)
            n, p = X.shape
            num_channels.append(p)

            # good_channels_ind = []
            # labels = list(labels)
            # for channel in good_channels:
            #     good_channels_ind.append(labels.index(channel))

            # # filter data
            # print 'filtering...',
            # X = signal.filtfilt(b, a, X, axis=0)  # filter the data

            all_files.append(X)  # add raw data to files

    # update temporary stuff
    data_filenames = update_list(data_filenames, tmp_data_filenames)
    file_type = update_list(file_type, tmp_file_type)
    seizure_times = update_list(seizure_times, tmp_seizure_times)
    seizure_print = update_list(seizure_print, tmp_seizure_print)

    # double check that the number of channels matches across data
    if len(set(num_channels)) == 1:
        num_channels = num_channels[0]
        gt1 = num_channels > 1
        print 'There ' + 'is ' * (not gt1) + 'are ' * gt1 + str(
            num_channels) + ' channel' + 's' * gt1 + "\n"
    else:
        print 'Channels: ' + str(num_channels)
        print 'There are inconsistent number of channels in the raw edf data'
        sys.exit(
            'Error: There are different numbers of channels being used for different seizure files...'
        )

    # double check that no NaN values appear in the features
    for X, i in enumerate(all_files):
        if np.any(np.isnan(X)):
            print 'There are NaN in raw data of file', i
            sys.exit('Error: Uh-oh, NaN encountered while extracting features')

    return all_files, data_filenames, file_type, seizure_times, seizure_print
def analyze_patient(patient_id, data_path, save_path, log_file, parameters, folds, win_len=1.0, win_overlap=0.5, num_windows=1000, f_s=1e3,include_awake=True, include_asleep=False, long_interictal=False):

    # minutes per chunk (only for long interictal files)
    min_per_chunk = 15
    sec_per_min = 60

    # reformat window length and overlap as indices
    win_len = int(win_len * f_s)
    win_overlap = int(win_overlap * f_s)

    # create save path
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    # specify data paths
    if not os.path.isdir(data_path):
        sys.exit('Error: Specified data path does not exist')

    p_file = os.path.join(data_path, 'patient_pickle.txt')


    with open(p_file,'r') as pickle_file:
        print("Open Pickle: {}".format(p_file)+"...\n")
        patient_info = pickle.load(pickle_file)

    # add data file names
    data_filenames = patient_info['seizure_data_filenames']
    seizure_times = patient_info['seizure_times']
    file_type = ['ictal'] * len(data_filenames)
    seizure_print = [True] * len(data_filenames)      # mark whether is seizure

    if include_awake:
        data_filenames += patient_info['awake_inter_filenames']
        seizure_times += [None] * len(patient_info['awake_inter_filenames'])
        file_type += ['awake'] * len(patient_info['awake_inter_filenames'])
        seizure_print += [False] * len(patient_info['awake_inter_filenames'])

    if include_asleep:
        data_filenames += patient_info['asleep_inter_filenames']
        seizure_times += [None] * len(patient_info['asleep_inter_filenames'])
        file_type += ['sleep'] * len(patient_info['asleep_inter_filenames'])
        seizure_print += [False] * len(patient_info['asleep_inter_filenames'])

    data_filenames = [os.path.join(data_path,filename) for filename in data_filenames]
    good_channels = patient_info['good_channels']

    # band pass filter parameters
    band = np.array([0.1,100.])
    band_norm = band / (f_s / 2.) # normalize the band
    filt_order = 3
    b, a = signal.butter(filt_order, band_norm, 'bandpass') # design filter

    # get data in numpy array
    num_channels = []
    all_files = []
    tmp_data_filenames = []
    tmp_file_type = []
    tmp_seizure_times = []
    tmp_seizure_print = []

    print 'Getting Data...'
    for i, seizure_file in enumerate(data_filenames):

        # check that we haven't already gotten energy statistics for this seizure file
        file_begin = os.path.join(save_path, os.path.splitext(os.path.basename(data_filenames[i]))[0])

        # this is for when we have inter-ictal files that are an hour long that has split it up into parts
        if long_interictal and not (file_type[i] is 'ictal'):

            # get all files in the save path
            all_files_in_dir = [os.path.join(save_path,fn) for fn in next(os.walk(save_path))[2]]
            # all_files_in_dir = os.listdir(save_path)


            # if the energy stats exist for this file(.es represents energy statistics)
            inter_ictal_files = [s for s in all_files_in_dir if s.startswith(file_begin) and s.endswith(".es")]

            # if inter ictal (energy) files found, read them
            if inter_ictal_files:

                tmp_data_filenames.append( (i, inter_ictal_files))
                tmp_file_type.append( (i, [file_type[i]] * len(inter_ictal_files)) )
                tmp_seizure_times.append( (i, [seizure_times[i]] * len(inter_ictal_files)) )
                tmp_seizure_print.append( (i, [seizure_print[i]] * len(inter_ictal_files)) )

                # read each of the interictal files
                print "\tSeizure file %d is long --reading energy statistics directly..." %(i+1),
                for j, file_name in enumerate(inter_ictal_files):
                    print "%d" %(j+1),
                    X_feat, p = read_energy_file(file_name)
                    all_files.append(X_feat)        #  feature vector X from all the inter_ictal_files saved to all_files
                    num_channels.append(p)
                print " "


            else:

                # read the file
                print '\tSeizure file %d is long...' %(i+1)
                j = 0
                while True:
                    # get chunk start and end times
                    start = j * sec_per_min * min_per_chunk
                    end = (j+1) * sec_per_min * min_per_chunk

                    # get the chunk
                    try:

                        # extract the chunk
                        print '\t\tChunk ' + str(j+1) + ' reading...',
                        X_chunk, _, labels = edfread(seizure_file, rec_times = [start, end])
                        n,p = X_chunk.shape
                        num_channels.append(p)
                        good_channels_ind = []
                        labels = list(labels)
                        for channel in good_channels:
                            good_channels_ind.append(labels.index(channel))

                        # compute feature vector
                        print 'filtering...',
                        X_chunk = signal.filtfilt(b,a,X_chunk,axis=0) # filter the data

                        # get feature vectors from windows -- energy statistics
                        print 'extracting features...',
                        n_windows = n / (win_len - win_overlap) - 1 # evaluates to floor( n / (L - O ) - 1 since ints
                        X_feat = np.empty((n_windows,3,p))
                        m = 0
                        for k in range(win_len, X_chunk.shape[0], win_len - win_overlap):
                            window = X_chunk[(k-win_len):k,:] # select window
                            f = energy_features(window) # extract energy statistics
                            X_feat[m,:,:] = f
                            m += 1
                        all_files.append(X_feat) # add feature to files

                        # save energy statistics file
                        print 'saving...'
                        es_file = file_begin + "_%d.es"%(j)
                        write_energy_file(es_file, X_feat)

                        # print to csv if this is a desired seizure file
                        if seizure_print[i]:
                            filtered_file = file_begin + '_%d_filtered.csv'%(j)
                            energy_file = file_begin + '_%d_energystats.csv'%(j)
                            create_filtered_csv(filtered_file, X_chunk, good_channels_ind)
                            create_energy_csv(energy_file, X_feat[:,:,good_channels_ind[0]])

                        # update count
                        j += 1
                        # if less than an entire chunk was read, then this is the last one!
                        if X_chunk.shape[0] < sec_per_min * min_per_chunk:
                            break
                    except ValueError:
                            print "no wait, that doesn't exist!"
                            break # the start was past the end!

                # store temporary stuff
                tmp_data_filenames.append( (i, [os.path.join(save_path,file_begin + "_%d.es"%(k)) for k in range(j)]) )
                tmp_file_type.append( (i, [file_type[i]] * j) )
                tmp_seizure_times.append( (i, [seizure_times[i]] * j) )
                tmp_seizure_print.append( (i, [seizure_print[i]] * j) )

        else:
            es_file = file_begin + ".es"
            if os.path.isfile(es_file):
                print "\tSeizure file %d --reading energy statistics directly" %(i+1)
                X_feat, p = read_energy_file(es_file)
                all_files.append(X_feat)
                num_channels.append(p)
            else:
                print '\tSeizure file %d reading...' %(i+1),

                # read data in
                X,_,labels = edfread(seizure_file)
                n,p = X.shape
                num_channels.append(p)
                good_channels_ind = []
                labels = list(labels)
                for channel in good_channels:
                    print "channel",channel
                    good_channels_ind.append(labels.index(channel))

                # filter data
                print 'filtering...',
                X = signal.filtfilt(b,a,X,axis=0) # filter the data

                # get feature vectors from windows -- energy statistics
                print 'extracting features...'
                n_windows = n / (win_len - win_overlap) - 1 # evaluates to floor( n / (L - O ) - 1 since ints
                X_feat = np.empty((n_windows,3,p))
                k = 0
                for j in range(win_len, X.shape[0], win_len - win_overlap):
                    window = X[(j-win_len):j,:] # select window
                    f = energy_features(window) # extract energy statistics
                    X_feat[k,:,:] = f
                    k += 1
                all_files.append(X_feat) # add feature to files

                # save energy statistics file
                write_energy_file(es_file, X_feat)

                # print to csv if this is a desired seizure file
                if seizure_print[i]:
                    filtered_file = file_begin + '_filtered.csv'
                    energy_file = file_begin + '_energystats.csv'
                    create_filtered_csv(filtered_file, X, good_channels_ind)
                    create_energy_csv(energy_file, X_feat[:,:,good_channels_ind[0]])


    # update temporary stuff
    data_filenames = update_list(data_filenames, tmp_data_filenames)
    file_type = update_list(file_type, tmp_file_type)
    seizure_times = update_list(seizure_times, tmp_seizure_times)
    seizure_print = update_list(seizure_print, tmp_seizure_print)

    # double check that the number of channels matches across data
    if len(set(num_channels)) == 1:
        num_channels = num_channels[0]
        gt1 = num_channels > 1
        print 'There ' + 'is '*(not gt1) + 'are '*gt1 + str(num_channels) + ' channel' + 's'*gt1+"\n"
    else:
        print 'Channels: ' + str(num_channels)
        sys.exit('Error: There are different numbers of channels being used for different seizure files...')

    # double check that no NaN values appear in the features
    for X in all_files:
        if np.any(np.isnan(X)):
            sys.exit('Error: Uh-oh, NaN encountered while extracting features')

    # leave one out cross validation, update log
    fitnesses=loocv_testing(all_files, data_filenames, file_type, seizure_times, seizure_print, win_len, win_overlap, num_windows, f_s, save_path,parameters, folds)
    #update_log(log_file, patient_id, sensitivity, latency, FP, time)

    return fitnesses
#
#
# asleep_file= "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS041/DA00101Q_1-1_02oct2010_03_00_05_Sleep+.edf"
#
# X_asleep, _,labels_asleep = edfread(asleep_file)
#
# print "how many channels for asleep file?",len(labels_asleep)
#
# awake_file= "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS041/DA00101P_1-1_02oct2010_09_00_38_Awake+.edf"
#
# X_awake, _,labels_awake = edfread(awake_file)
#
# print "how many channels for asleep file?",len(labels_awake)

file = "/Users/TianyiZhang/Documents/EpilepsyVIP/data/TS039/CA00100D_1-1+.edf"
X, _, labels = edfread(file)
expected = "RAH3"
print labels.index(expected)
"""Test Filters"""

# # Filter a noisy signal.
# T = 0.05
# nsamples = T * fs
# t = np.linspace(0, T, nsamples, endpoint=False)
# a = 0.02
# f0 = 600.0
# x = 0.1 * np.sin(2 * np.pi * 1.2 * np.sqrt(t))
# x += 0.01 * np.cos(2 * np.pi * 312 * t + 0.1)
# x += a * np.cos(2 * np.pi * f0 * t + .11)
# x += 0.03 * np.cos(2 * np.pi * 2000 * t)
# plt.figure(2)
def classif_data_collect(seiz_filenames,
                         seizure_times,
                         inter_filenames,
                         window_len,
                         preictal_time,
                         postictal_time,
                         n_windows,
                         sliding_window=False,
                         window_overlap=.8,
                         fs=1000.,
                         good_channels=None,
                         bad_channels=None,
                         rstat_bands=((1, 4), (5, 8), (9, 13), (14, 25),
                                      (25, 90), (100, 200)),
                         rstat_win_len=20000,
                         notch_filt=True,
                         norm_whole_file=True,
                         norm_window=False):
    '''

    :param seiz_filenames:       A list of the .edf filenames which contain epileptic data
    :param seizure_times:   A list corresponding to the ictal filenames of:
                                tuples containing seizure start and end times (in seconds),
                                    if the recording contains of seizure

                            For instance, if my filename list looks like
                            ['seizure_a.edf','seizure_b.edf']

                            Then my seizure_times list might be:
                            [(123,135),(60,300)]
    :param inter_filenames: A list of .edf filenames of interictal data
    :param good_channels:   A list of the channel names that should be observed for the
    :param window_len:      The length of the windows that the program takes,
                                in number of samples
    :param preictal_time:   The amount of time, in seconds, before a seizure, for which
                                a window will still be considered as preictal for training purposes

    :param postictal_time:The amount of time, in seconds, after a seizure
                                that the period is considered postictal
    :param n_windows:       The number of windows of each class we choose to extract
    :param sliding_window: Do you want the data to be a sliding window?
    :param window_overlap: The maximum allowable overlap (percentage) between two windows.
    :param fs: the sampling frequency of the sample
    :param bad_channels: if you want to read in all channels, except for a select few
    :param rstat_bands: An iterable of pairs of two elements,
                        representing the start and stop of the bands of interest. Can be NoneType if no rstat is desired
    :param rstat_win_len: (int) The length of the random windows
                                extracted in computing the r-statistic
    :param notch_filt: (bool) A boolean value determining whether a notch filter is to be applied to the data
    :param norm_whole_file: (bool) A boolean value that normalizes the whole file, not just the window.
    :param norm_window: (bool) A boolean value that chooses to normalize each window, not the whole file.

    :return:
    '''
    def select_windows_from_interval(int_start, int_end, max_iter=10):
        '''
        :param int_start:
        :param int_end:
        :param max_iter:
        :return: window_ends, a numpy array with indices at which the sample ends
        '''
        window_ends = np.sort(
            np.random.randint(int_start + window_len, int_end, n_windows))
        if n_windows == 1:
            return window_ends
        for i in range(max_iter):
            diffs = np.diff(window_ends)
            if all(diffs >= (window_len * window_overlap)):
                return window_ends
            #TODO: refine window logic
            window_ends = np.sort(
                np.random.randint(int_start + window_len, int_end, n_windows))

        else:
            #manually select window ends
            window_ends = int_end - window_overlap * window_len * np.arange(
                n_windows)
            return window_ends

    def generate_windows_from_seizure(sliding_window=False):
        '''
        Given seizure data, select random windows
        from the preictal,ictal,and postictal phases
        :return:
        '''
        preictal_samples, postictal_samples = int(preictal_time * fs), int(
            postictal_time * fs)
        seiz_start, seiz_end = seize_times

        ictal_samples = int((seiz_end - seiz_start) * fs)
        max_iter = 4

        if not sliding_window:
            if not window_overlap:
                if (n_windows*window_len) > \
                        (min(preictal_samples, postictal_samples,ictal_samples)):
                    raise ValueError(
                        'Nonoverlapping windows not possible to produce')
            else:
                if (n_windows*window_len - (n_windows-1)*window_len*window_overlap)>\
                    min(preictal_samples,postictal_samples,ictal_samples):
                    raise ValueError(
                        'Not possible to produce overlapping windows with certain max_proportion'
                    )

        seiz_start = int(seiz_start * fs)
        seiz_end = int(seiz_end * fs)

        interval_starts = (seiz_start - preictal_samples, seiz_start, seiz_end)
        interval_ends = (seiz_start, seiz_end, np.size(seiz_data, axis=0))
        labels = ['Preictal', 'Ictal', 'Postictal']

        if sliding_window:
            end_times = range(window_len - 1, np.size(seiz_data, 0),
                              int(window_len * (1 - window_overlap)))

            for end in end_times:
                start = end - window_len + 1
                lab_index = interval_inclusion_index(start, end,
                                                     interval_starts,
                                                     interval_ends)
                if lab_index is None:
                    label = 'None of the Above'
                else:
                    label = labels[lab_index]
                window = preprocess(seiz_data[start:end + 1, :],
                                    normaliz=norm_window,
                                    notch_filt=False)
                data_container.append({
                    'window': window,
                    'label': label,
                    'fold_lab': 'S{}'.format(seiz_count),
                    'time': end / fs
                })
        else:

            for interval_start, interval_end, label in zip(
                    interval_starts, interval_ends, labels):
                window_ends = select_windows_from_interval(
                    interval_start, interval_end, max_iter)
                for end in window_ends:
                    window = preprocess(seiz_data[end - window_len + 1:end +
                                                  1, :],
                                        normaliz=norm_window,
                                        notch_filt=False)
                    data_container.append({
                        'window': window,
                        'label': label,
                        'fold_lab': 'S{}'.format(seiz_count),
                        'time': end / fs
                    })
        return

    def generate_windows_from_nonseizure(sliding_window=False):
        label = 'Interictal'
        if sliding_window:
            end_times = range(window_len, np.size(seiz_data, 0),
                              int(window_len * (1 - window_overlap)))
            for end in end_times:
                window = preprocess(seiz_data[end - window_len + 1:end + 1, :],
                                    normaliz=norm_window,
                                    notch_filt=False)
                data_container.append({
                    'window': window,
                    'label': label,
                    'fold_lab': 'NS{}'.format(non_seiz_count),
                    'time': end / fs
                })

        else:
            max_iter = 4  # maximum times to pick a random list for nonoverlapping windows
            end_times = select_windows_from_interval(
                0, np.size(seiz_data, axis=0), max_iter)
            for end in end_times:
                window = preprocess(seiz_data[end - window_len + 1:end + 1, :],
                                    normaliz=norm_window,
                                    notch_filt=False)
                data_container.append({
                    'window': window,
                    'label': label,
                    'fold_lab': 'NS{}'.format(non_seiz_count),
                    'time': end / fs
                })
        return

    def preprocess(seiz_data,
                   normaliz=False,
                   notch_filt=True,
                   rstat_band=False,
                   causal=True):
        if normaliz:
            seiz_data = normalize(seiz_data)

        if notch_filt:
            seiz_data = notch(seiz_data, 56., 64., fs, mode=not causal)

        if rstat_band:
            seiz_data = rstat_processor.optimal_bandpass(seiz_data,
                                                         mode=not causal)
        return seiz_data

    ###start main code of the function
    rstat_filt = False
    if rstat_bands is not None:

        rstat_processor = RstatPreprocessor(inter_filenames[0],
                                            seiz_filenames[0],
                                            seizure_times=seizure_times[0],
                                            fs=1000.)
        rstat_processor.prepare_rstat(rstat_bands,
                                      good_channels=good_channels,
                                      bad_channels=bad_channels,
                                      window_len=20000,
                                      mode=0)

        rstat_filt = True

    data_container = []

    if not window_overlap:
        window_overlap = 0

    for seiz_count, (seizure_file, seize_times) in enumerate(
            zip(seiz_filenames, seizure_times)):
        seiz_data, _, _ = edfread(seizure_file,
                                  good_channels=good_channels,
                                  bad_channels=bad_channels)

        seiz_data = preprocess(seiz_data,
                               normaliz=norm_whole_file,
                               notch_filt=notch_filt,
                               rstat_band=rstat_filt)
        generate_windows_from_seizure(sliding_window)

    for non_seiz_count, nonseizure_file in enumerate(inter_filenames):
        seiz_data, _, _ = edfread(nonseizure_file,
                                  good_channels=good_channels,
                                  bad_channels=bad_channels)

        seiz_data = preprocess(seiz_data,
                               normaliz=norm_whole_file,
                               notch_filt=notch_filt,
                               rstat_band=rstat_filt)
        generate_windows_from_nonseizure(sliding_window)
    return {'data': data_container, 'seize_times': seizure_times}
        os.getcwd())))  # direct to EpilepsyVIP
    data_path = os.path.join(to_data, 'data')  # direct to data folder

    for i, patient_id in enumerate(patients):
        # update paths specific to each patient
        p_data_path = os.path.join(data_path, patient_id)
        print "---------------------------Analyzing patient ", patient_id, "----------------------------\n"
        # if data path does not work out
        if not os.path.isdir(data_path):
            sys.exit('Error: Specified data path does not exist')
        # get pickle file
        p_file = os.path.join(p_data_path, 'patient_pickle.txt')
        # open pickle file and load
        with open(p_file, 'r') as pickle_file:
            print("Open Pickle: {}".format(p_file) + "...\n")
            patient_info = pickle.load(pickle_file)
        data_filenames = patient_info['seizure_data_filenames']
        seizure_times = patient_info['seizure_times']
        file_type = ['ictal'] * len(data_filenames)
        seizure_print = [True] * len(data_filenames)  # mark whether is seizure

        print 'Getting Data...'
        # read seizure file
        for i, seizure_file in enumerate(data_filenames):
            path_to_seizure = os.path.join(p_data_path, seizure_file)
            print path_to_seizure
            x, _, labels = edfread(path_to_seizure)
            # output x, raw iEEG signal
            print 'There are', x.shape[1], 'channels'
            selected_features = feature_selection(x)
            labels = clustering(selected_features)