def test_all_combinations(features, feature_extractors, predictors): """ features is a list [(X_seizure, y_seizure, X_early, y_early)] where each element in the tuple is itself a list of length = fold containing data in each CV fold return an instance of FeaturesPredictsTable """ # these loops can be parallelized. # !! Can be improved !! L = [] for i, feature_extractor in enumerate(feature_extractors): feature_list = [] X_seizure, y_seizure, X_early, y_early = features[i] for j, predictor in enumerate(predictors): print 'Evaluating feat: %s + pred: %s on seizure task'%(str(feature_extractor), str(predictor) ) result_seizure = XValidation.evaluate(X_seizure, y_seizure, predictor, evaluation=auc) print 'Evaluating feat: %s + pred: %s on early seizure task'%(str(feature_extractor), str(predictor) ) result_early = XValidation.evaluate(X_early, y_early, predictor, evaluation=auc) r = {} r['predictor'] = predictor r['feature_extractor'] = feature_extractor # total features extracted. X_i is n x d r['total_features'] = X_early[0].shape[1] r['cv_fold'] = len(X_early) r['seizure_mean_auc'] = np.mean(result_seizure) r['seizure_std_auc'] = np.std(result_seizure) r['early_mean_auc'] = np.mean(result_early) r['early_std_auc'] = np.std(result_early) feature_list.append(r) L.append(feature_list) return FeaturesPredictsTable(L)
def test_predictor(predictor_cls): predictor = predictor_cls() data_path = Global.path_map('clips_folder') # arbritary band_means = np.linspace(0, 200, 66) band_width = 2 feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width) feature_extractor = ARFeatures() loader = DataLoader(data_path, feature_extractor) X_list = loader.training_data("Dog_1") y_list = loader.labels("Dog_1") print( XValidation.evaluate(X_list, y_list[0], predictor, evaluation=accuracy)) # Set the conditioned results for proper evaluation conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print( XValidation.evaluate(X_list, conditioned, predictor, evaluation=accuracy))
def test_predictor(predictor_cls, patient_name='Dog_1'): ''' function that loads data for Dog_1 run crossvalidation with ARFeatures INPUT: - predictor_cls: a Predictor class (implement) ''' # instanciating a predictor object from Predictor class predictor = predictor_cls() # path to data (here path from within gatsby network) data_path = Global.path_map('clips_folder') # creating instance of autoregressive features #feature_extractor = ARFeatures() band_means = np.linspace(0, 200, 66) band_width = 2 FFTFeatures_args = {'band_means': band_means, 'band_width': band_width} # feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, # {'name':"FFTFeatures",'args':FFTFeatures_args}]) feature_extractor = MixFeatures([{'name': "ARFeatures", 'args': {}}]) # feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}]) #feature_extractor = ARFeatures() # loading the data loader = DataLoader(data_path, feature_extractor) print(loader.base_dir) print('\npatient = %s' % patient_name) X_list = loader.training_data(patient_name) y_list = loader.labels(patient_name) # separating the label early_vs_not = y_list[1] #[a * b for (a, b) in zip(y_list[0], y_list[1])] seizure_vs_not = y_list[0] # running cross validation # conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print("\ncross validation: seizures vs not") result = XValidation.evaluate(X_list, seizure_vs_not, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result)) print("\ncross validation: early_vs_not") result = XValidation.evaluate(X_list, early_vs_not, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result))
def test_combination(self, fold=3, max_segments=-1): """ Test the predictor using features given by feature_extractor on the data specified by the patient argument. Based on examples/cross_validation_test.py :param max_segments: maximum segments to load. -1 to use the number of total segments available. Otherwise, all segments (ictal and interictal) will be randomly subsampled without replacement. return: a dictionary containing error report """ predictor = self._predictor loader = DataLoader(self._data_path, self._feature_extractor) X_list, y_seizure, y_early = loader.blocks_for_Xvalidation( self._patient, fold, max_segments) # running cross validation #print 'Testing %d-fold CV on data of %s'%(fold, self._patient) #print "\ncross validation: seizures vs not" result_seizure = XValidation.evaluate(X_list, y_seizure, predictor, evaluation=auc) #print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ #% (np.mean(result_seizure), np.std(result_seizure), result_seizure) #print "\ncross validation: early_vs_not" result_early = XValidation.evaluate(X_list, y_early, predictor, evaluation=auc) #print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ #% (np.mean(result_early), np.std(result_early), result_early) # dict containing bunch of reports r = {} r['predictor'] = predictor r['feature_extractor'] = self._feature_extractor # total features extracted. X_i is n x d r['total_features'] = X_list[0].shape[1] r['cv_fold'] = fold r['seizure_mean_auc'] = np.mean(result_seizure) r['seizure_std_auc'] = np.std(result_seizure) r['early_mean_auc'] = np.mean(result_early) r['early_std_auc'] = np.std(result_early) return r
def test_predictor(predictor_cls, patient_name='Dog_1'): ''' function that loads data for Dog_1 run crossvalidation with ARFeatures INPUT: - predictor_cls: a Predictor class (implement) ''' # instanciating a predictor object from Predictor class predictor = predictor_cls() # path to data (here path from within gatsby network) data_path = Global.path_map('clips_folder') # creating instance of autoregressive features #feature_extractor = ARFeatures() band_means = np.linspace(0, 200, 66) band_width = 2 FFTFeatures_args = {'band_means':band_means, 'band_width':band_width} # feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}, # {'name':"FFTFeatures",'args':FFTFeatures_args}]) feature_extractor = MixFeatures([{'name':"ARFeatures",'args':{}}]) # feature_extractor = MixFeatures([{'name':"FFTFeatures",'args':FFTFeatures_args}]) #feature_extractor = ARFeatures() # loading the data loader = DataLoader(data_path, feature_extractor) print loader.base_dir print '\npatient = %s' % patient_name X_list = loader.training_data(patient_name) y_list = loader.labels(patient_name) # separating the label early_vs_not = y_list[1] #[a * b for (a, b) in zip(y_list[0], y_list[1])] seizure_vs_not = y_list[0] # running cross validation # conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print "\ncross validation: seizures vs not" result = XValidation.evaluate(X_list, seizure_vs_not, predictor, evaluation=auc) print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result) print "\ncross validation: early_vs_not" result = XValidation.evaluate(X_list, early_vs_not, predictor, evaluation=auc) print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result), np.std(result), result)
def test_combination(self, fold=3, max_segments=-1): """ Test the predictor using features given by feature_extractor on the data specified by the patient argument. Based on examples/cross_validation_test.py :param max_segments: maximum segments to load. -1 to use the number of total segments available. Otherwise, all segments (ictal and interictal) will be randomly subsampled without replacement. return: a dictionary containing error report """ predictor = self._predictor loader = DataLoader(self._data_path, self._feature_extractor) X_list,y_seizure, y_early = loader.blocks_for_Xvalidation( self._patient, fold, max_segments) # running cross validation #print 'Testing %d-fold CV on data of %s'%(fold, self._patient) #print "\ncross validation: seizures vs not" result_seizure = XValidation.evaluate(X_list, y_seizure, predictor, evaluation=auc) #print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ #% (np.mean(result_seizure), np.std(result_seizure), result_seizure) #print "\ncross validation: early_vs_not" result_early = XValidation.evaluate(X_list, y_early, predictor, evaluation=auc) #print 'cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ #% (np.mean(result_early), np.std(result_early), result_early) # dict containing bunch of reports r = {} r['predictor'] = predictor r['feature_extractor'] = self._feature_extractor # total features extracted. X_i is n x d r['total_features'] = X_list[0].shape[1] r['cv_fold'] = fold r['seizure_mean_auc'] = np.mean(result_seizure) r['seizure_std_auc'] = np.std(result_seizure) r['early_mean_auc'] = np.mean(result_early) r['early_std_auc'] = np.std(result_early) return r
def Xval_on_single_patient(predictor_cls, feature_extractor, patient_name="Dog_1", preprocess=True): """ Single patient cross validation Returns 2 lists of cross validation performances :param predictor_cls: :param feature_extractor :param patient_name: :return: """ # predictor_cls is a handle to an instance of PredictorBase # Instantiate the predictor predictor = predictor_cls() base_dir = Global.path_map('clips_folder') base_dir = '/nfs/data3/kaggle_seizure/clips/' loader = DataLoader(base_dir, feature_extractor) X_list, y_seizure, y_early = loader.blocks_for_Xvalidation( patient_name, preprocess=preprocess) #X_train,y_seizure, y_early = loader.training_data(patient_name) #y_train = [y_seizure,y_early] #X_list,y_list = train_test_split(X_train,y_train) # running cross validation print(patient_name) print("\ncross validation: seizures vs not") result_seizure = XValidation.evaluate(X_list, y_seizure, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result_seizure), np.std(result_seizure), result_seizure)) print("\ncross validation: early_vs_not") result_early = XValidation.evaluate(X_list, y_early, predictor, evaluation=auc) print('cross-validation results: mean = %.3f, sd = %.3f, raw scores = %s' \ % (np.mean(result_early), np.std(result_early), result_early)) return result_seizure, result_early
def test_predictor(predictor_cls): predictor = predictor_cls() data_path = Global.path_map('clips_folder') # arbritary band_means = np.linspace(0, 200, 66) band_width = 2 feature_extractor = FFTFeatures(band_means=band_means, band_width=band_width) feature_extractor = ARFeatures() loader = DataLoader(data_path, feature_extractor) X_list = loader.training_data("Dog_1") y_list = loader.labels("Dog_1") print XValidation.evaluate(X_list, y_list[0], predictor, evaluation=accuracy) # Set the conditioned results for proper evaluation conditioned = [a * b for (a, b) in zip(y_list[0], y_list[1])] print XValidation.evaluate(X_list, conditioned, predictor, evaluation=accuracy)
def test_predictor(predictor_cls): predictor = predictor_cls() N = 1000 D = 2 # simulate a 2-fold cross validation Xs = [np.random.randn(N / 2, D), np.random.randn(N / 2, D)] ys = [np.random.randint(0, 2, N / 2), np.random.randint(0, 2, N / 2)] #X=np.random.randn(N,D) #y=np.random.randint(0,2,N) print(XValidation.evaluate(Xs, ys, predictor))
def test_all_combinations(features, feature_extractors, predictors): """ features is a list [(X_seizure, y_seizure, X_early, y_early)] where each element in the tuple is itself a list of length = fold containing data in each CV fold return an instance of FeaturesPredictsTable """ # these loops can be parallelized. # !! Can be improved !! L = [] for i, feature_extractor in enumerate(feature_extractors): feature_list = [] X_seizure, y_seizure, X_early, y_early = features[i] for j, predictor in enumerate(predictors): print('Evaluating feat: %s + pred: %s on seizure task' % (str(feature_extractor), str(predictor))) result_seizure = XValidation.evaluate(X_seizure, y_seizure, predictor, evaluation=auc) print('Evaluating feat: %s + pred: %s on early seizure task' % (str(feature_extractor), str(predictor))) result_early = XValidation.evaluate(X_early, y_early, predictor, evaluation=auc) r = {} r['predictor'] = predictor r['feature_extractor'] = feature_extractor # total features extracted. X_i is n x d r['total_features'] = X_early[0].shape[1] r['cv_fold'] = len(X_early) r['seizure_mean_auc'] = np.mean(result_seizure) r['seizure_std_auc'] = np.std(result_seizure) r['early_mean_auc'] = np.mean(result_early) r['early_std_auc'] = np.std(result_early) feature_list.append(r) L.append(feature_list) return FeaturesPredictsTable(L)
def test_predictor(predictor_cls): predictor = predictor_cls() X_list, y_list = RandomXValidationData.get() print XValidation.evaluate(X_list, y_list, predictor)