Beispiel #1
0
    def _prepare_data(self, X, y=None, **kwargs):
        logger.info(
            "Preparing data for outer fold "
            + str(self.cross_validaton_info.outer_folds[self.outer_fold_id].fold_nr)
            + "..."
        )
        # Prepare Train and validation set data
        train_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].train_indices
        test_indices = self.cross_validaton_info.outer_folds[
            self.outer_fold_id
        ].test_indices
        self._validation_X, self._validation_y, self._validation_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=train_indices
        )
        self._test_X, self._test_y, self._test_kwargs = PhotonDataHelper.split_data(
            X, y, kwargs, indices=test_indices
        )

        # write numbers to database info object
        self.result_object.number_samples_validation = self._validation_y.shape[0]
        self.result_object.number_samples_test = self._test_y.shape[0]
        if self._pipe._estimator_type == "classifier":
            self.result_object.class_distribution_validation = FoldInfo.data_overview(
                self._validation_y
            )
            self.result_object.class_distribution_test = FoldInfo.data_overview(
                self._test_y
            )
Beispiel #2
0
 def setUp(self):
     super(InnerFoldTests, self).setUp()
     self.pipe = PhotonPipeline([
         ("StandardScaler", PipelineElement("StandardScaler")),
         ("PCA", PipelineElement("PCA")),
         ("RidgeClassifier", PipelineElement("RidgeClassifier")),
     ])
     self.config = {
         "PCA__n_components": 5,
         "RidgeClassifier__solver": "svd",
         "RidgeClassifier__random_state": 42,
     }
     self.outer_fold_id = "TestID"
     self.inner_cv = KFold(n_splits=4)
     self.X, self.y = load_breast_cancer(True)
     self.cross_validation = Hyperpipe.CrossValidation(
         self.inner_cv, None, True, 0.2, True, False)
     self.cross_validation.inner_folds = {
         self.outer_fold_id: {
             i: FoldInfo(i, i + 1, train, test)
             for i, (train,
                     test) in enumerate(self.inner_cv.split(self.X, self.y))
         }
     }
     self.optimization = Hyperpipe.Optimization(
         "grid_search", {}, ["accuracy", "recall", "specificity"],
         "accuracy", None)
Beispiel #3
0
 def setUp(self):
     super(InnerFoldTests, self).setUp()
     self.pipe = PhotonPipeline([
         ('StandardScaler', PipelineElement('StandardScaler')),
         ('PCA', PipelineElement('PCA')),
         ('RidgeClassifier', PipelineElement('RidgeClassifier'))
     ])
     self.config = {
         'PCA__n_components': 5,
         'RidgeClassifier__solver': 'svd',
         'RidgeClassifier__random_state': 42
     }
     self.outer_fold_id = 'TestID'
     self.inner_cv = KFold(n_splits=4)
     self.X, self.y = load_breast_cancer(return_X_y=True)
     self.cross_validation = Hyperpipe.CrossValidation(
         self.inner_cv, None, True, 0.2, True, False, False, None)
     self.cross_validation.inner_folds = {
         self.outer_fold_id: {
             i: FoldInfo(i, i + 1, train, test)
             for i, (train,
                     test) in enumerate(self.inner_cv.split(self.X, self.y))
         }
     }
     self.optimization = Hyperpipe.Optimization(
         'grid_search', {}, ['accuracy', 'recall', 'specificity'],
         'accuracy', None)
Beispiel #4
0
    def _generate_inner_folds(self):

        self.inner_folds = FoldInfo.generate_folds(self.cross_validaton_info.inner_cv,
                                                   self._validation_X,
                                                   self._validation_y,
                                                   self._validation_kwargs)

        self.cross_validaton_info.inner_folds[self.outer_fold_id] = {f.fold_id: f for f in self.inner_folds}
Beispiel #5
0
 def test_no_cv_strategy_eval_final_performance_false(self):
     test_size = 0.15
     fold_list = FoldInfo.generate_folds(None,
                                         self.X,
                                         self.y,
                                         self.kwargs,
                                         eval_final_performance=False,
                                         test_size=test_size)
     # check that we have only one outer fold, that is split in training and test according to test size
     self.assertEqual(len(fold_list), 1)
     self.assertEqual(len(fold_list[0].train_indices), self.num_subjects)
     self.assertEqual(len(fold_list[0].test_indices), 0)
Beispiel #6
0
 def base_assertions(self, cv, nr_of_folds, eval_final_performance=True):
     fold_list = FoldInfo.generate_folds(
         cv,
         self.X,
         self.y,
         self.kwargs,
         eval_final_performance=eval_final_performance)
     self.assertTrue(len(fold_list) == nr_of_folds)
     for generated_fold in fold_list:
         self.assertEqual(len(generated_fold.train_indices),
                          (nr_of_folds - 1) *
                          (self.num_subjects / nr_of_folds))
         self.assertEqual(len(generated_fold.test_indices),
                          (self.num_subjects / nr_of_folds))
     # we always start with 1 for the investigator
     self.assertEqual(fold_list[0].fold_nr, 1)
     return fold_list
Beispiel #7
0
    def setUp(self):

        super(OuterFoldTests, self).setUp()
        self.fold_nr_inner_cv = 5
        self.inner_cv = ShuffleSplit(n_splits=self.fold_nr_inner_cv,
                                     random_state=42)
        self.outer_cv = ShuffleSplit(n_splits=1,
                                     test_size=0.2,
                                     random_state=42)
        self.cv_info = Hyperpipe.CrossValidation(
            inner_cv=self.inner_cv,
            outer_cv=self.outer_cv,
            eval_final_performance=True,
            test_size=0.2,
            calculate_metrics_per_fold=True,
            calculate_metrics_across_folds=False,
            learning_curves=False,
            learning_curves_cut=None)

        self.X, self.y = load_boston(return_X_y=True)
        self.outer_fold_id = "TestFoldOuter1"
        self.cv_info.outer_folds = {
            self.outer_fold_id: FoldInfo(0, 1, train, test)
            for train, test in self.outer_cv.split(self.X, self.y)
        }

        self.config_num = 2
        self.optimization_info = Hyperpipe.Optimization(
            metrics=['mean_absolute_error', 'mean_squared_error'],
            best_config_metric='mean_absolute_error',
            optimizer_input='grid_search',
            optimizer_params={},
            performance_constraints=None)
        self.elements = [
            PipelineElement('StandardScaler'),
            PipelineElement('PCA', {'n_components': [4, 7]}),
            PipelineElement('DecisionTreeRegressor', random_state=42)
        ]
        self.pipe = PhotonPipeline([(p.name, p) for p in self.elements])
Beispiel #8
0
 def test_data_overview(self):
     expected_outcome = {str(i): 10 for i in range(10)}
     data_count = FoldInfo.data_overview(self.kwargs['groups'].astype(int))
     self.assertDictEqual(expected_outcome, data_count)
Beispiel #9
0
 def test_class_distribution_info(self):
     unique, counts = np.unique(self.__y, return_counts=True)
     nr_dict = FoldInfo.data_overview(self.__y)
     self.assertEqual(counts[1], nr_dict["1"])