def test_predictor(predictor_cls): predictor = predictor_cls() data_path = Global.path_map('clips_folder') # arbritary band_means = np.linspace(0, 200, 66) band_width = 2 feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width) feature_extractor = ARFeatures() loader = DataLoader(data_path, feature_extractor) X_list = loader.training_data("Dog_1") y_list = loader.labels("Dog_1") print( XValidation.evaluate(X_list, y_list[0], predictor, evaluation=accuracy)) # Set the conditioned results for proper evaluation conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print( XValidation.evaluate(X_list, conditioned, predictor, evaluation=accuracy))
def __init__(self, feature_extractors, predictors, patient, data_path=Global.path_map('clips_folder')): assert(type(feature_extractors)==type([])) assert(type(predictors)==type([])) assert(isinstance(patient, basestring)) self._feature_extractors = feature_extractors self._predictors = predictors self._patient = patient self._data_path = data_path
def test_predictor(predictor_cls, patient_name='Dog_1'): ''' function that loads data for Dog_1 run crossvalidation with ARFeatures INPUT: - predictor_cls: a Predictor class (implement) ''' # instanciating a predictor object from Predictor class predictor = predictor_cls() # path to data (here path from within gatsby network) data_path = Global.path_map('clips_folder') # creating instance of autoregressive features #feature_extractor = ARFeatures() band_means = np.linspace(0, 200, 66) band_width = 2 FFTFeatures_args = {'band_means': band_means, 'band_width': band_width} # feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, # {'name':"FFTFeatures",'args':FFTFeatures_args}]) feature_extractor = MixFeatures([{'name': "ARFeatures", 'args': {}}]) # feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}]) #feature_extractor = ARFeatures() # loading the data loader = DataLoader(data_path, feature_extractor) print(loader.base_dir) print('\npatient = %s' % patient_name) X_list = loader.training_data(patient_name) y_list = loader.labels(patient_name) # separating the label early_vs_not = y_list[1] #[a * b for (a, b) in zip(y_list[0], y_list[1])] seizure_vs_not = y_list[0] # running cross validation # conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print("\ncross validation: seizures vs not") result = XValidation.evaluate(X_list, seizure_vs_not, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result)) print("\ncross validation: early_vs_not") result = XValidation.evaluate(X_list, early_vs_not, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result))
def __init__(self, patient_name, base_dir=Global.path_map('clips_folder'), use_cache=True, max_train_segments=-1, max_test_segments=-1): """ patient_name: for example, Dog_1 base_dir: path to the directory containing patient folders i.e., directory containing Dog_1/, Dog_2, ..., Patient_1, Patient_2, .... use_cache: if True, the loaded data is retained in memory so that the call to data loading method will return immediately next time. Require a large amount of memory. max_XX_segments: maximum segments to load. -1 to use the number of total segments available. Otherwise, all segments (ictal and interictal) will be randomly subsampled without replacement. """ if not os.path.isdir(base_dir): raise ValueError('%s is not a directory.' % base_dir) # The followings attributes, once assigned, will not change. self.base_dir = base_dir # patient_name = e.g., Dog_1 self.patient_name = patient_name self.use_cache = use_cache self.max_train_segments = max_train_segments self.max_test_segments = max_test_segments # this will be cached when get_train_data() is called. a list of training # file names self.loaded_train_fnames = None # a list of loaded (Instance, y_seizure, y_early) self.loaded_train_data = None # this will be cached when get_test_data() is called. a list of test # file names self.loaded_test_fnames = None self.loaded_test_data = None # type_labels = a list of {0, 1}. Indicators of a seizure (1 for seizure). self.type_labels = None # early_labels = a list of {0, 1}. Indicators of an early seizure # (1 for an early seizure). self.early_labels = None self.params = { 'anti_alias_cutoff': 500., 'anti_alias_width': 30., 'anti_alias_attenuation': 40, 'elec_noise_width': 3., 'elec_noise_attenuation': 60.0, 'elec_noise_cutoff': [59., 61.], 'targetrate': 500 }
def __init__(self, feature_extractors, predictors, patient, data_path=Global.path_map('clips_folder')): assert (type(feature_extractors) == type([])) assert (type(predictors) == type([])) assert (isinstance(patient, str)) self._feature_extractors = feature_extractors self._predictors = predictors self._patient = patient self._data_path = data_path
def __init__(self, feature_extractor, predictor, patient, data_path=Global.path_map('clips_folder')): """ feature_extractor: an instance of FeatureExtractBase predictor: an instance of PredictorBase patient: a string indicating a subject e.g., Dog_1 """ assert(isinstance(feature_extractor, FeatureExtractBase)) assert(isinstance(predictor, PredictorBase)) assert(isinstance(patient, basestring)) self._feature_extractor = feature_extractor self._predictor = predictor self._patient = patient self._data_path = data_path
def test_predictor(predictor_cls, patient_name='Dog_1'): ''' function that loads data for Dog_1 run crossvalidation with ARFeatures INPUT: - predictor_cls: a Predictor class (implement) ''' # instanciating a predictor object from Predictor class predictor = predictor_cls() # path to data (here path from within gatsby network) data_path = Global.path_map('clips_folder') # creating instance of autoregressive features #feature_extractor = ARFeatures() band_means = np.linspace(0, 200, 66) band_width = 2 FFTFeatures_args = {'band_means':band_means, 'band_width':band_width} # feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, # {'name':"FFTFeatures",'args':FFTFeatures_args}]) feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}]) # feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}]) #feature_extractor = ARFeatures() # loading the data loader = DataLoader(data_path, feature_extractor) print loader.base_dir print '\npatient = %s' % patient_name X_list = loader.training_data(patient_name) y_list = loader.labels(patient_name) # separating the label early_vs_not = y_list[1] #[a * b for (a, b) in zip(y_list[0], y_list[1])] seizure_vs_not = y_list[0] # running cross validation # conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print "\ncross validation: seizures vs not" result = XValidation.evaluate(X_list, seizure_vs_not, predictor, evaluation=auc) print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result) print "\ncross validation: early_vs_not" result = XValidation.evaluate(X_list, early_vs_not, predictor, evaluation=auc) print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result)
def __init__(self, feature_extractor, predictor, patient, data_path=Global.path_map('clips_folder')): """ feature_extractor: an instance of FeatureExtractBase predictor: an instance of PredictorBase patient: a string indicating a subject e.g., Dog_1 """ assert (isinstance(feature_extractor, FeatureExtractBase)) assert (isinstance(predictor, PredictorBase)) assert (isinstance(patient, str)) self._feature_extractor = feature_extractor self._predictor = predictor self._patient = patient self._data_path = data_path
def __init__(self, data_path,patients=None): """ Constructor Parameters: data_path - / terminated path. This is the base folder containing e.g., Dog_1/, Dog_2/, .... patients - a list of patient names e.g., ['Dog_1', 'Patient_2', ...] """ if not os.path.isdir(data_path): raise ValueError('%s is not a directory.'%data_path) self.data_path = Global.path_map('clips_folder') #self.data_path = '/nfs/data3/kaggle_seizure/clips/' if patients == None: self.patients = ["Dog_%d" % i for i in range(1, 5)] + ["Patient_%d" % i for i in range(1, 9)] else: self.patients = [patients] # will only work for single subject here...
def Xval_on_single_patient(predictor_cls, feature_extractor, patient_name="Dog_1", preprocess=True): """ Single patient cross validation Returns 2 lists of cross validation performances :param predictor_cls: :param feature_extractor :param patient_name: :return: """ # predictor_cls is a handle to an instance of PredictorBase # Instantiate the predictor predictor = predictor_cls() base_dir = Global.path_map('clips_folder') base_dir = '/nfs/data3/kaggle_seizure/clips/' loader = DataLoader(base_dir, feature_extractor) X_list, y_seizure, y_early = loader.blocks_for_Xvalidation( patient_name, preprocess=preprocess) #X_train,y_seizure, y_early = loader.training_data(patient_name) #y_train = [y_seizure,y_early] #X_list,y_list = train_test_split(X_train,y_train) # running cross validation print(patient_name) print("\ncross validation: seizures vs not") result_seizure = XValidation.evaluate(X_list, y_seizure, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result_seizure), np.std(result_seizure), result_seizure)) print("\ncross validation: early_vs_not") result_early = XValidation.evaluate(X_list, y_early, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result_early), np.std(result_early), result_early)) return result_seizure, result_early
def test_predictor(predictor_cls): predictor = predictor_cls() data_path = Global.path_map('clips_folder') # arbritary band_means = np.linspace(0, 200, 66) band_width = 2 feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width) feature_extractor = ARFeatures() loader = DataLoader(data_path, feature_extractor) X_list = loader.training_data("Dog_1") y_list = loader.labels("Dog_1") print XValidation.evaluate(X_list, y_list[0], predictor, evaluation=accuracy) # Set the conditioned results for proper evaluation conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print XValidation.evaluate(X_list, conditioned, predictor, evaluation=accuracy)
def __init__(self, data_path, patients=None): """ Constructor Parameters: data_path - / terminated path. This is the base folder containing e.g., Dog_1/, Dog_2/, .... patients - a list of patient names e.g., ['Dog_1', 'Patient_2', ...] """ if not os.path.isdir(data_path): raise ValueError('%s is not a directory.' % data_path) self.data_path = Global.path_map('clips_folder') #self.data_path = '/nfs/data3/kaggle_seizure/clips/' if patients == None: self.patients = ["Dog_%d" % i for i in range(1, 5) ] + ["Patient_%d" % i for i in range(1, 9)] else: self.patients = [patients ] # will only work for single subject here...
def visualise(): data_path = Global.path_map('clips_folder') # arbritary band_means = np.linspace(0, 200, 66) band_width = 2 feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width) loader = DataLoader(data_path, feature_extractor) X_list = loader.training_data("Dog_1") y_list = loader.labels("Dog_1")[0] plt.figure() for i in range(len(X_list)): X = X_list[i] y_seizure = y_list[i] _, _, V = np.linalg.svd(X, full_matrices=True) plt.plot(V[0][y_seizure == 0], V[1][y_seizure == 0], 'bo') plt.plot(V[0][y_seizure == 1], V[1][y_seizure == 1], 'ro') plt.show()
import numpy as np from seizures.submission import SubmissionFile from seizures.prediction.SVMPredictor import SVMPredictor from seizures.features.MixFeatures import MixFeatures from seizures.Global import Global # Example script to generate submission file #data_path = "/nfs/data3/kaggle_seizure/scratch/Stiched_data/Dog_1/" data_path = Global.get_subject_folder('Dog_1') # Define Predictor predictor_seizure = SVMPredictor predictor_early = SVMPredictor # Define Features band_means = np.linspace(0, 200, 66) band_width = 2 FFTFeatures_args = {'band_means':band_means, 'band_width':band_width} #feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, # {'name':"FFTFeatures",'args':FFTFeatures_args}]) feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, {'name':"PLVFeatures",'args':{}}]) submissionfile = SubmissionFile(data_path) # Load training data # Learn classifiers # Make final file submissionfile.generate_submission(predictor_seizure, predictor_early,
''' Created on 28 Jun 2014 @author: heiko ''' from seizures.features.RandomFeatures import RandomFeatures from seizures.prediction.RandomPredictor import RandomPredictor from seizures.submission.SubmissionFile import SubmissionFile from seizures.Global import Global if __name__ == '__main__': predictor = RandomPredictor() extractor = RandomFeatures() test_files = ["Dog_1_test_segment_1.mat"] data_path = Global.path_map('clips_folder') submission = SubmissionFile(data_path) predictor_seizure = predictor predictor_early = predictor submission.generate_submission(predictor_seizure, predictor_early, extractor, test_filenames=test_files)
def generate_submission(self, predictor_seizure, predictor_early, feature_extractor, output_fname="output.csv", test_filenames=None, preprocess=True): """ Generates a submission file for a given pair of predictors, which will be trained on all training data per patient/dog instance. Parameters: predictor_seizure - Instance of PredictorBase, fixed parameters predictor_early - Instance of PredictorBase, fixed parameters feature_extractor - Instance of FeatureExtractBase, to extract test features output_fname - Optional filename for result submission file test_filename - Optional list of filenames to produce results on, default is to use all """ # make sure given objects are valid assert (isinstance(predictor_seizure, PredictorBase)) assert (isinstance(predictor_early, PredictorBase)) assert (isinstance(feature_extractor, FeatureExtractBase)) test_filenames = SubmissionFile.get_submission_filenames() train_filenames = SubmissionFile.get_train_filenames() all_result_lines = [] for patient in self.patients: result_lines = [] loader = DataLoader(self.data_path, feature_extractor) # X_train is n x d X_train, y_seizure, y_early = loader.training_data( patient, preprocess=preprocess) print X_train.shape print y_seizure.shape # train both models #print print "Training seizure for " + patient predictor_seizure.fit(X_train, y_seizure) print "Training early for " + patient predictor_early.fit(X_train, y_early) pred_seizure = predictor_seizure.predict(X_train) pred_early = predictor_early.predict(X_train) print 'Results on training data' print 'seizure\tearly\tp(seizure)\tp(early)' for y_1, y_2, p_1, p_2 in izip(y_seizure, y_early, pred_seizure, pred_early): print '%d\t%d\t%.3f\t%.3f' % (y_1, y_2, p_1, p_2) # find out filenames that correspond to patient/dog test_fnames_patient = [] for fname in test_filenames: if patient in fname: test_fnames_patient += [fname] # now predict on all test points loader = DataLoader(self.data_path, feature_extractor) # X_test: n x d matrix X_test = loader.test_data(patient, preprocess=preprocess) test_fnames_patient = loader.files for ifname in range(len(test_fnames_patient)): fname = test_fnames_patient[ifname] # X is one instance X = X_test[ifname, :] # [0] to extract probability out of the ndarray pred_seizure = predictor_seizure.predict(X)[0] pred_early = predictor_early.predict(X)[0] name = fname.split("/")[-1] result_lines.append(",".join( [name, str(pred_seizure), str(pred_early)])) csv_fname = patient + '_' + output_fname + '.csv' csv_path = Global.get_child_result_folder(csv_fname) print "Storing results to", csv_fname f = open(csv_path, "w") f.write("clip,seizure,early\n") for line in result_lines: f.write(line + '\n') f.close() all_result_lines.append(result_lines)
def generate_submission(self, predictor_seizure, predictor_early, feature_extractor, output_fname="output.csv", test_filenames=None, preprocess=True): """ Generates a submission file for a given pair of predictors, which will be trained on all training data per patient/dog instance. Parameters: predictor_seizure - Instance of PredictorBase, fixed parameters predictor_early - Instance of PredictorBase, fixed parameters feature_extractor - Instance of FeatureExtractBase, to extract test features output_fname - Optional filename for result submission file test_filename - Optional list of filenames to produce results on, default is to use all """ # make sure given objects are valid assert(isinstance(predictor_seizure, PredictorBase)) assert(isinstance(predictor_early, PredictorBase)) assert(isinstance(feature_extractor, FeatureExtractBase)) test_filenames = SubmissionFile.get_submission_filenames() train_filenames = SubmissionFile.get_train_filenames() all_result_lines = [] for patient in self.patients: result_lines = [] loader = DataLoader(self.data_path, feature_extractor) # X_train is n x d X_train,y_seizure, y_early = loader.training_data(patient,preprocess=preprocess) print X_train.shape print y_seizure.shape # train both models #print print "Training seizure for " + patient predictor_seizure.fit(X_train, y_seizure) print "Training early for " + patient predictor_early.fit(X_train, y_early) pred_seizure = predictor_seizure.predict(X_train) pred_early = predictor_early.predict(X_train) print 'Results on training data' print 'seizure\tearly\tp(seizure)\tp(early)' for y_1, y_2, p_1, p_2 in izip(y_seizure, y_early, pred_seizure, pred_early): print '%d\t%d\t%.3f\t%.3f' % (y_1, y_2, p_1, p_2) # find out filenames that correspond to patient/dog test_fnames_patient = [] for fname in test_filenames: if patient in fname: test_fnames_patient += [fname] # now predict on all test points loader = DataLoader(self.data_path, feature_extractor) # X_test: n x d matrix X_test = loader.test_data(patient,preprocess=preprocess) test_fnames_patient = loader.files for ifname in range(len(test_fnames_patient)): fname = test_fnames_patient[ifname] # X is one instance X = X_test[ifname,:] # [0] to extract probability out of the ndarray pred_seizure = predictor_seizure.predict(X)[0] pred_early = predictor_early.predict(X)[0] name = fname.split("/")[-1] result_lines.append(",".join([name, str(pred_seizure), str(pred_early)])) csv_fname = patient + '_' + output_fname + '.csv' csv_path = Global.get_child_result_folder(csv_fname) print "Storing results to", csv_fname f = open(csv_path, "w") f.write("clip,seizure,early\n") for line in result_lines: f.write(line + '\n') f.close() all_result_lines.append(result_lines)