Ejemplo n.º 1
0
def test_predictor(predictor_cls):
    predictor = predictor_cls()

    data_path = Global.path_map('clips_folder')
    # arbritary
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    feature_extractor = FFTFeatures(band_means=band_means,
                                    band_width=band_width)

    feature_extractor = ARFeatures()

    loader = DataLoader(data_path, feature_extractor)
    X_list = loader.training_data("Dog_1")
    y_list = loader.labels("Dog_1")

    print(
        XValidation.evaluate(X_list, y_list[0], predictor,
                             evaluation=accuracy))

    # Set the conditioned results for proper evaluation
    conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])]
    print(
        XValidation.evaluate(X_list,
                             conditioned,
                             predictor,
                             evaluation=accuracy))
    def __init__(self, feature_extractors, predictors, patient, 
            data_path=Global.path_map('clips_folder')):

        assert(type(feature_extractors)==type([]))
        assert(type(predictors)==type([]))
        assert(isinstance(patient, basestring))
        self._feature_extractors = feature_extractors
        self._predictors = predictors
        self._patient = patient
        self._data_path = data_path
Ejemplo n.º 3
0
def test_predictor(predictor_cls, patient_name='Dog_1'):
    ''' function that loads data for Dog_1 run crossvalidation with ARFeatures 
        INPUT:
        - predictor_cls: a Predictor class (implement)  
    '''

    # instanciating a predictor object from Predictor class
    predictor = predictor_cls()

    # path to data (here path from within gatsby network)
    data_path = Global.path_map('clips_folder')

    # creating instance of autoregressive features
    #feature_extractor = ARFeatures()
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    FFTFeatures_args = {'band_means': band_means, 'band_width': band_width}

    #    feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}},
    #                                     {'name':"FFTFeatures",'args':FFTFeatures_args}])
    feature_extractor = MixFeatures([{'name': "ARFeatures", 'args': {}}])
    #    feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}])
    #feature_extractor = ARFeatures()

    # loading the data
    loader = DataLoader(data_path, feature_extractor)
    print(loader.base_dir)

    print('\npatient = %s' % patient_name)
    X_list = loader.training_data(patient_name)
    y_list = loader.labels(patient_name)

    # separating the label
    early_vs_not = y_list[1]  #[a * b for (a, b) in zip(y_list[0], y_list[1])]
    seizure_vs_not = y_list[0]

    # running cross validation
    #    conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])]
    print("\ncross validation: seizures vs not")
    result = XValidation.evaluate(X_list,
                                  seizure_vs_not,
                                  predictor,
                                  evaluation=auc)
    print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
           % (np.mean(result), np.std(result), result))

    print("\ncross validation: early_vs_not")
    result = XValidation.evaluate(X_list,
                                  early_vs_not,
                                  predictor,
                                  evaluation=auc)
    print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
          % (np.mean(result), np.std(result), result))
    def __init__(self,
                 patient_name,
                 base_dir=Global.path_map('clips_folder'),
                 use_cache=True,
                 max_train_segments=-1,
                 max_test_segments=-1):
        """
        patient_name: for example, Dog_1
        base_dir: path to the directory containing patient folders i.e., directory 
        containing Dog_1/, Dog_2, ..., Patient_1, Patient_2, ....
        use_cache: if True, the loaded data is retained in memory so that the 
        call to data loading method will return immediately next time. 
        Require a large amount of memory. 
        max_XX_segments: maximum segments to load. -1 to use the number of 
        total segments available. Otherwise, all segments (ictal and interictal)
        will be randomly subsampled without replacement. 

        """
        if not os.path.isdir(base_dir):
            raise ValueError('%s is not a directory.' % base_dir)

        # The followings attributes, once assigned, will not change.
        self.base_dir = base_dir
        # patient_name = e.g., Dog_1
        self.patient_name = patient_name
        self.use_cache = use_cache
        self.max_train_segments = max_train_segments
        self.max_test_segments = max_test_segments

        # this will be cached when get_train_data() is called. a list of training
        # file names
        self.loaded_train_fnames = None
        # a list of loaded (Instance, y_seizure, y_early)
        self.loaded_train_data = None
        # this will be cached when get_test_data() is called. a list of test
        # file names
        self.loaded_test_fnames = None
        self.loaded_test_data = None

        # type_labels = a list of {0, 1}. Indicators of a seizure (1 for seizure).
        self.type_labels = None
        # early_labels = a list of {0, 1}. Indicators of an early seizure
        # (1 for an early seizure).
        self.early_labels = None
        self.params = {
            'anti_alias_cutoff': 500.,
            'anti_alias_width': 30.,
            'anti_alias_attenuation': 40,
            'elec_noise_width': 3.,
            'elec_noise_attenuation': 60.0,
            'elec_noise_cutoff': [59., 61.],
            'targetrate': 500
        }
Ejemplo n.º 5
0
    def __init__(self,
                 feature_extractors,
                 predictors,
                 patient,
                 data_path=Global.path_map('clips_folder')):

        assert (type(feature_extractors) == type([]))
        assert (type(predictors) == type([]))
        assert (isinstance(patient, str))
        self._feature_extractors = feature_extractors
        self._predictors = predictors
        self._patient = patient
        self._data_path = data_path
    def __init__(self, feature_extractor, predictor, patient, 
            data_path=Global.path_map('clips_folder')):
        """
        feature_extractor: an instance of FeatureExtractBase
        predictor: an instance of PredictorBase
        patient: a string indicating a subject e.g., Dog_1
        """

        assert(isinstance(feature_extractor, FeatureExtractBase))
        assert(isinstance(predictor, PredictorBase))
        assert(isinstance(patient, basestring))

        self._feature_extractor = feature_extractor
        self._predictor = predictor
        self._patient = patient
        self._data_path = data_path
def test_predictor(predictor_cls, patient_name='Dog_1'):
    ''' function that loads data for Dog_1 run crossvalidation with ARFeatures 
        INPUT:
        - predictor_cls: a Predictor class (implement)  
    '''

    # instanciating a predictor object from Predictor class
    predictor = predictor_cls()    

    # path to data (here path from within gatsby network)
    data_path = Global.path_map('clips_folder')
    
    # creating instance of autoregressive features
    #feature_extractor = ARFeatures()
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    FFTFeatures_args = {'band_means':band_means, 'band_width':band_width}

#    feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}},
#                                     {'name':"FFTFeatures",'args':FFTFeatures_args}])
    feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}])
#    feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}])
    #feature_extractor = ARFeatures()

    # loading the data
    loader = DataLoader(data_path, feature_extractor)
    print loader.base_dir

    print '\npatient = %s' % patient_name
    X_list = loader.training_data(patient_name)
    y_list = loader.labels(patient_name)

    # separating the label
    early_vs_not = y_list[1] #[a * b for (a, b) in zip(y_list[0], y_list[1])]
    seizure_vs_not = y_list[0]
    
    # running cross validation    
#    conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])]
    print "\ncross validation: seizures vs not"
    result = XValidation.evaluate(X_list, seizure_vs_not, predictor, evaluation=auc)
    print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
           % (np.mean(result), np.std(result), result)

    print "\ncross validation: early_vs_not"
    result = XValidation.evaluate(X_list, early_vs_not, predictor, evaluation=auc)
    print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
          % (np.mean(result), np.std(result), result)
Ejemplo n.º 8
0
    def __init__(self,
                 feature_extractor,
                 predictor,
                 patient,
                 data_path=Global.path_map('clips_folder')):
        """
        feature_extractor: an instance of FeatureExtractBase
        predictor: an instance of PredictorBase
        patient: a string indicating a subject e.g., Dog_1
        """

        assert (isinstance(feature_extractor, FeatureExtractBase))
        assert (isinstance(predictor, PredictorBase))
        assert (isinstance(patient, str))

        self._feature_extractor = feature_extractor
        self._predictor = predictor
        self._patient = patient
        self._data_path = data_path
    def __init__(self, data_path,patients=None):
        """
        Constructor
        
        Parameters:
        data_path   - / terminated path. This is the base folder 
        containing e.g., Dog_1/, Dog_2/, ....
        patients - a list of patient names e.g., ['Dog_1', 'Patient_2', ...]
        """
        if not os.path.isdir(data_path):
            raise ValueError('%s is not a directory.'%data_path)

        self.data_path = Global.path_map('clips_folder')
        #self.data_path = '/nfs/data3/kaggle_seizure/clips/'

        if patients == None:
            self.patients = ["Dog_%d" % i for i in range(1, 5)] + ["Patient_%d" % i for i in range(1, 9)]
        else:
            self.patients = [patients] # will only work for single subject here...
Ejemplo n.º 10
0
def Xval_on_single_patient(predictor_cls,
                           feature_extractor,
                           patient_name="Dog_1",
                           preprocess=True):
    """
    Single patient cross validation
    Returns 2 lists of cross validation performances
    :param predictor_cls:
    :param feature_extractor
    :param patient_name:
    :return:
    """
    # predictor_cls is a handle to an instance of PredictorBase
    # Instantiate the predictor
    predictor = predictor_cls()
    base_dir = Global.path_map('clips_folder')
    base_dir = '/nfs/data3/kaggle_seizure/clips/'
    loader = DataLoader(base_dir, feature_extractor)

    X_list, y_seizure, y_early = loader.blocks_for_Xvalidation(
        patient_name, preprocess=preprocess)
    #X_train,y_seizure, y_early = loader.training_data(patient_name)
    #y_train = [y_seizure,y_early]
    #X_list,y_list = train_test_split(X_train,y_train)

    # running cross validation
    print(patient_name)
    print("\ncross validation: seizures vs not")
    result_seizure = XValidation.evaluate(X_list,
                                          y_seizure,
                                          predictor,
                                          evaluation=auc)
    print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
           % (np.mean(result_seizure), np.std(result_seizure), result_seizure))
    print("\ncross validation: early_vs_not")
    result_early = XValidation.evaluate(X_list,
                                        y_early,
                                        predictor,
                                        evaluation=auc)
    print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \
          % (np.mean(result_early), np.std(result_early), result_early))
    return result_seizure, result_early
def test_predictor(predictor_cls):
    predictor = predictor_cls()
    
    data_path = Global.path_map('clips_folder')
    # arbritary
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width)
    
    feature_extractor = ARFeatures()
    
    loader = DataLoader(data_path, feature_extractor)
    X_list = loader.training_data("Dog_1")
    y_list = loader.labels("Dog_1")

    print XValidation.evaluate(X_list, y_list[0], predictor, evaluation=accuracy)

    # Set the conditioned results for proper evaluation
    conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])]
    print XValidation.evaluate(X_list, conditioned, predictor, evaluation=accuracy)
Ejemplo n.º 12
0
    def __init__(self, data_path, patients=None):
        """
        Constructor
        
        Parameters:
        data_path   - / terminated path. This is the base folder 
        containing e.g., Dog_1/, Dog_2/, ....
        patients - a list of patient names e.g., ['Dog_1', 'Patient_2', ...]
        """
        if not os.path.isdir(data_path):
            raise ValueError('%s is not a directory.' % data_path)

        self.data_path = Global.path_map('clips_folder')
        #self.data_path = '/nfs/data3/kaggle_seizure/clips/'

        if patients == None:
            self.patients = ["Dog_%d" % i for i in range(1, 5)
                             ] + ["Patient_%d" % i for i in range(1, 9)]
        else:
            self.patients = [patients
                             ]  # will only work for single subject here...
def visualise():
    data_path = Global.path_map('clips_folder')

    # arbritary
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width)
    
    loader = DataLoader(data_path, feature_extractor)
    X_list = loader.training_data("Dog_1")
    y_list = loader.labels("Dog_1")[0]
    
    plt.figure()
    for i in range(len(X_list)):
        X = X_list[i]
        
        y_seizure = y_list[i]
        
        _, _, V = np.linalg.svd(X, full_matrices=True)
        plt.plot(V[0][y_seizure == 0], V[1][y_seizure == 0], 'bo')
        plt.plot(V[0][y_seizure == 1], V[1][y_seizure == 1], 'ro')
        
    plt.show()
Ejemplo n.º 14
0
def visualise():
    data_path = Global.path_map('clips_folder')

    # arbritary
    band_means = np.linspace(0, 200, 66)
    band_width = 2
    feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width)
    
    loader = DataLoader(data_path, feature_extractor)
    X_list = loader.training_data("Dog_1")
    y_list = loader.labels("Dog_1")[0]
    
    plt.figure()
    for i in range(len(X_list)):
        X = X_list[i]
        
        y_seizure = y_list[i]
        
        _, _, V = np.linalg.svd(X, full_matrices=True)
        plt.plot(V[0][y_seizure == 0], V[1][y_seizure == 0], 'bo')
        plt.plot(V[0][y_seizure == 1], V[1][y_seizure == 1], 'ro')
        
    plt.show()
Ejemplo n.º 15
0
import numpy as np
from seizures.submission import SubmissionFile
from seizures.prediction.SVMPredictor import SVMPredictor
from seizures.features.MixFeatures import MixFeatures
from seizures.Global import Global

# Example script to generate submission file

#data_path =  "/nfs/data3/kaggle_seizure/scratch/Stiched_data/Dog_1/"
data_path = Global.get_subject_folder('Dog_1')

# Define Predictor
predictor_seizure = SVMPredictor
predictor_early = SVMPredictor

# Define Features
band_means = np.linspace(0, 200, 66)
band_width = 2
FFTFeatures_args = {'band_means':band_means, 'band_width':band_width}
#feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}},
#                                 {'name':"FFTFeatures",'args':FFTFeatures_args}])
feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}},
                                 {'name':"PLVFeatures",'args':{}}])

submissionfile = SubmissionFile(data_path)

# Load training data
# Learn classifiers
# Make final file
submissionfile.generate_submission(predictor_seizure, predictor_early,
Ejemplo n.º 16
0
'''
Created on 28 Jun 2014

@author: heiko
'''
from seizures.features.RandomFeatures import RandomFeatures
from seizures.prediction.RandomPredictor import RandomPredictor
from seizures.submission.SubmissionFile import SubmissionFile
from seizures.Global import Global

if __name__ == '__main__':
    predictor = RandomPredictor()
    extractor = RandomFeatures()

    test_files = ["Dog_1_test_segment_1.mat"]
    data_path = Global.path_map('clips_folder')

    submission = SubmissionFile(data_path)
    predictor_seizure = predictor
    predictor_early = predictor
    submission.generate_submission(predictor_seizure,
                                   predictor_early,
                                   extractor,
                                   test_filenames=test_files)
'''
Created on 28 Jun 2014

@author: heiko
'''
from seizures.features.RandomFeatures import RandomFeatures
from seizures.prediction.RandomPredictor import RandomPredictor
from seizures.submission.SubmissionFile import SubmissionFile
from seizures.Global import Global


if __name__ == '__main__':
    predictor = RandomPredictor()
    extractor = RandomFeatures()
    
    test_files = ["Dog_1_test_segment_1.mat"]
    data_path = Global.path_map('clips_folder')
    
    submission = SubmissionFile(data_path)
    predictor_seizure = predictor 
    predictor_early = predictor 
    submission.generate_submission(predictor_seizure, predictor_early,
                            extractor, test_filenames=test_files)
Ejemplo n.º 18
0
    def generate_submission(self,
                            predictor_seizure,
                            predictor_early,
                            feature_extractor,
                            output_fname="output.csv",
                            test_filenames=None,
                            preprocess=True):
        """
        Generates a submission file for a given pair of predictors, which will
        be trained on all training data per patient/dog instance.
        
        Parameters:
        predictor_seizure - Instance of PredictorBase, fixed parameters
        predictor_early   - Instance of PredictorBase, fixed parameters
        feature_extractor - Instance of FeatureExtractBase, to extract test features
        output_fname      - Optional filename for result submission file
        test_filename     - Optional list of filenames to produce results on,
                            default is to use all
        """
        # make sure given objects are valid
        assert (isinstance(predictor_seizure, PredictorBase))
        assert (isinstance(predictor_early, PredictorBase))
        assert (isinstance(feature_extractor, FeatureExtractBase))

        test_filenames = SubmissionFile.get_submission_filenames()
        train_filenames = SubmissionFile.get_train_filenames()

        all_result_lines = []

        for patient in self.patients:
            result_lines = []

            loader = DataLoader(self.data_path, feature_extractor)
            # X_train is n x d
            X_train, y_seizure, y_early = loader.training_data(
                patient, preprocess=preprocess)

            print X_train.shape
            print y_seizure.shape

            # train both models
            #print
            print "Training seizure for " + patient
            predictor_seizure.fit(X_train, y_seizure)
            print "Training early for " + patient
            predictor_early.fit(X_train, y_early)

            pred_seizure = predictor_seizure.predict(X_train)
            pred_early = predictor_early.predict(X_train)
            print 'Results on training data'
            print 'seizure\tearly\tp(seizure)\tp(early)'
            for y_1, y_2, p_1, p_2 in izip(y_seizure, y_early, pred_seizure,
                                           pred_early):
                print '%d\t%d\t%.3f\t%.3f' % (y_1, y_2, p_1, p_2)

            # find out filenames that correspond to patient/dog
            test_fnames_patient = []
            for fname in test_filenames:
                if patient in fname:
                    test_fnames_patient += [fname]

            # now predict on all test points
            loader = DataLoader(self.data_path, feature_extractor)
            # X_test: n x d matrix
            X_test = loader.test_data(patient, preprocess=preprocess)
            test_fnames_patient = loader.files

            for ifname in range(len(test_fnames_patient)):
                fname = test_fnames_patient[ifname]
                # X is one instance
                X = X_test[ifname, :]
                # [0] to extract probability out of the ndarray
                pred_seizure = predictor_seizure.predict(X)[0]
                pred_early = predictor_early.predict(X)[0]
                name = fname.split("/")[-1]
                result_lines.append(",".join(
                    [name, str(pred_seizure),
                     str(pred_early)]))

            csv_fname = patient + '_' + output_fname + '.csv'
            csv_path = Global.get_child_result_folder(csv_fname)
            print "Storing results to", csv_fname
            f = open(csv_path, "w")
            f.write("clip,seizure,early\n")
            for line in result_lines:
                f.write(line + '\n')
            f.close()

            all_result_lines.append(result_lines)
    def generate_submission(self, predictor_seizure, predictor_early,
                            feature_extractor, output_fname="output.csv",
                            test_filenames=None, preprocess=True):
        """
        Generates a submission file for a given pair of predictors, which will
        be trained on all training data per patient/dog instance.
        
        Parameters:
        predictor_seizure - Instance of PredictorBase, fixed parameters
        predictor_early   - Instance of PredictorBase, fixed parameters
        feature_extractor - Instance of FeatureExtractBase, to extract test features
        output_fname      - Optional filename for result submission file
        test_filename     - Optional list of filenames to produce results on,
                            default is to use all
        """
        # make sure given objects are valid
        assert(isinstance(predictor_seizure, PredictorBase))
        assert(isinstance(predictor_early, PredictorBase))
        assert(isinstance(feature_extractor, FeatureExtractBase))
        
        test_filenames = SubmissionFile.get_submission_filenames()
        train_filenames = SubmissionFile.get_train_filenames()
        
        all_result_lines = []

        for patient in self.patients:
            result_lines = []


            loader = DataLoader(self.data_path, feature_extractor)
            # X_train is n x d
            X_train,y_seizure, y_early = loader.training_data(patient,preprocess=preprocess)

            print X_train.shape
            print y_seizure.shape

            # train both models
            #print
            print "Training seizure for " + patient
            predictor_seizure.fit(X_train, y_seizure)
            print "Training early for " + patient
            predictor_early.fit(X_train, y_early)

            pred_seizure = predictor_seizure.predict(X_train)
            pred_early = predictor_early.predict(X_train)
            print 'Results on training data'
            print 'seizure\tearly\tp(seizure)\tp(early)'
            for y_1, y_2, p_1, p_2 in izip(y_seizure, y_early, pred_seizure, pred_early):
                print '%d\t%d\t%.3f\t%.3f' % (y_1, y_2, p_1, p_2)

            # find out filenames that correspond to patient/dog
            test_fnames_patient = []
            for fname in test_filenames:
                if patient in fname:
                    test_fnames_patient += [fname]

            # now predict on all test points
            loader = DataLoader(self.data_path, feature_extractor)
            # X_test: n x d matrix
            X_test = loader.test_data(patient,preprocess=preprocess)
            test_fnames_patient = loader.files

            for ifname in range(len(test_fnames_patient)):
                fname = test_fnames_patient[ifname]
                # X is one instance 
                X = X_test[ifname,:]
                # [0] to extract probability out of the ndarray
                pred_seizure = predictor_seizure.predict(X)[0]
                pred_early = predictor_early.predict(X)[0]
                name = fname.split("/")[-1]
                result_lines.append(",".join([name, str(pred_seizure), str(pred_early)]))


            csv_fname = patient + '_' + output_fname + '.csv'
            csv_path = Global.get_child_result_folder(csv_fname)
            print "Storing results to", csv_fname
            f = open(csv_path, "w")
            f.write("clip,seizure,early\n")
            for line in result_lines:
                f.write(line + '\n')
            f.close()

            all_result_lines.append(result_lines)