Esempio n. 1
0
    def test_overwrite_result_folder(self):
        """
        Test for right handling of parameter output_settings.overwrite.
        """

        def get_summary_file():
            return os.path.join(
                self.hyperpipe.output_settings.results_folder, "photon_summary.txt"
            )

        # Case 1: default
        output_settings1 = OutputSettings(
            project_folder=self.tmp_folder_path,
            save_output=True,
            overwrite_results=False,
        )
        self.setup_hyperpipe(output_settings1)
        self.hyperpipe.fit(self.__X, self.__y)
        tmp_path = get_summary_file()

        time.sleep(2)

        # again with same settings
        self.setup_hyperpipe(output_settings1)
        self.hyperpipe.fit(self.__X, self.__y)
        tmp_path2 = get_summary_file()

        # we expect a new output folder each time with timestamp
        self.assertNotEqual(tmp_path, tmp_path2)

        # Case 2 overwrite results: all in the same folder
        output_settings2 = OutputSettings(
            project_folder=self.tmp_folder_path,
            save_output=True,
            overwrite_results=True,
        )
        self.setup_hyperpipe(output_settings2)
        self.hyperpipe.fit(self.__X, self.__y)
        tmp_path = get_summary_file()
        tmp_date = os.path.getmtime(tmp_path)

        self.setup_hyperpipe(output_settings2)
        self.hyperpipe.fit(self.__X, self.__y)
        tmp_path2 = get_summary_file()
        tmp_date2 = os.path.getmtime(tmp_path2)

        # same folder but summary file is overwritten through the new analysis
        self.assertEqual(tmp_path, tmp_path2)
        self.assertNotEqual(tmp_date, tmp_date2)

        # Case 3: we have a cache folder
        self.hyperpipe.cache_folder = self.cache_folder_path
        shutil.rmtree(self.cache_folder_path, ignore_errors=True)
        self.hyperpipe.fit(self.__X, self.__y)
        self.assertTrue(os.path.exists(self.cache_folder_path))
Esempio n. 2
0
    def test_neuro_hyperpipe_parallelized_batched_caching(self):

        cache_path = self.cache_folder_path

        self.hyperpipe = Hyperpipe('complex_case',
                                   inner_cv=KFold(n_splits=5),
                                   outer_cv=KFold(n_splits=3),
                                   optimizer='grid_search',
                                   cache_folder=cache_path,
                                   metrics=['mean_squared_error'],
                                   best_config_metric='mean_squared_error',
                                   output_settings=OutputSettings(
                                       project_folder=self.tmp_folder_path))

        nb = ParallelBranch("SubjectCaching", nr_of_processes=1)
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(),
                                     {'voxel_size': [3, 5, 10]},
                                     batch_size=4)
        self.hyperpipe += nb

        self.hyperpipe += PipelineElement("StandardScaler", {})
        self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]})
        self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']})

        self.hyperpipe.fit(self.X, self.y)

        # assert cache is empty again
        nr_of_p_files = len(
            glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p")))
        print(nr_of_p_files)
        self.assertTrue(nr_of_p_files == 0)
Esempio n. 3
0
        def setUp(self):
            self.s_split = ShuffleSplit(n_splits=3,
                                        test_size=0.2,
                                        random_state=42)

            self.time_limit = 20

            settings = OutputSettings(project_folder='./tmp/')

            self.smac_helper = {"data": None, "initial_runs": None}

            # Scenario object
            scenario_dict = {
                "run_obj": "quality",
                "deterministic": "true",
                "wallclock_limit": self.time_limit
            }

            # DESIGN YOUR PIPELINE
            self.pipe = Hyperpipe('basic_svm_pipe',
                                  optimizer='smac',
                                  optimizer_params={
                                      'facade': SMAC4HPO,
                                      'scenario_dict': scenario_dict,
                                      'rng': 42,
                                      'smac_helper': self.smac_helper
                                  },
                                  metrics=['accuracy'],
                                  random_seed=42,
                                  best_config_metric='accuracy',
                                  inner_cv=self.s_split,
                                  verbosity=0,
                                  output_settings=settings)
Esempio n. 4
0
    def test_register_element(self):
        with self.assertRaises(ValueError):
            self.registry.register('MyCustomEstimator', 'custom_estimator.CustomEstimator', 'WrongType')

        self.registry.register('MyCustomEstimator', 'custom_estimator.CustomEstimator', 'Estimator')

        self.registry.activate()
        settings = OutputSettings(save_output=False, project_folder='./tmp/')

        # DESIGN YOUR PIPELINE
        pipe = Hyperpipe('custom_estimator_pipe',
                         optimizer='random_grid_search',
                         optimizer_params={'n_configurations': 2},
                         metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
                         best_config_metric='accuracy',
                         outer_cv=KFold(n_splits=2),
                         inner_cv=KFold(n_splits=2),
                         verbosity=1,
                         output_settings=settings)

        pipe += PipelineElement('MyCustomEstimator')

        pipe.fit(np.random.randn(30, 30), np.random.randint(0, 2, 30))

        self.registry.delete('MyCustomEstimator')

        os.remove(os.path.join(self.custom_folder, 'CustomElements.json'))
Esempio n. 5
0
    def test_huge_combinations(self):
        hp = Hyperpipe(
            "huge_combinations",
            metrics=["accuracy"],
            best_config_metric="accuracy",
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )

        hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]})
        stack = Stack("ensemble")
        for i in range(20):
            stack += PipelineElement(
                "SVC",
                hyperparameters={
                    "C": FloatRange(0.001, 5),
                    "kernel": ["linear", "rbf", "sigmoid", "polynomial"],
                },
            )
        hp += stack
        hp += PipelineElement(
            "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Esempio n. 6
0
    def test_three_levels_of_feature_importances(self):
        hyperpipe = Hyperpipe(
            "fimps",
            inner_cv=KFold(n_splits=4),
            outer_cv=KFold(n_splits=3),
            metrics=["mean_absolute_error", "mean_squared_error"],
            best_config_metric="mean_squared_error",
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )
        hyperpipe += PipelineElement("StandardScaler")
        hyperpipe += PipelineElement("DecisionTreeRegressor")
        X, y = load_boston(True)
        hyperpipe.fit(X, y)

        exepcted_nr_of_feature_importances = X.shape[1]
        self.assertTrue(
            len(hyperpipe.results.best_config_feature_importances) ==
            exepcted_nr_of_feature_importances)

        for outer_fold in hyperpipe.results.outer_folds:
            self.assertTrue(
                len(outer_fold.best_config.best_config_score.
                    feature_importances) == exepcted_nr_of_feature_importances)
            for inner_fold in outer_fold.best_config.inner_folds:
                self.assertTrue(
                    len(inner_fold.feature_importances) ==
                    exepcted_nr_of_feature_importances)
Esempio n. 7
0
    def test_save_optimum_pipe_custom_element(self):
        tmp_path = os.path.join(self.tmp_folder_path, 'optimum_pipypipe')
        settings = OutputSettings(project_folder=tmp_path,
                                  overwrite_results=True)

        my_pipe = Hyperpipe('hyperpipe',
                            optimizer='random_grid_search',
                            optimizer_params={'n_configurations': 1},
                            metrics=['accuracy', 'precision', 'recall'],
                            best_config_metric='f1_score',
                            outer_cv=KFold(n_splits=2),
                            inner_cv=KFold(n_splits=2),
                            verbosity=1,
                            output_settings=settings)
        my_pipe += PipelineElement('KerasDnnClassifier', {},
                                   epochs=1,
                                   hidden_layer_sizes=[5])
        my_pipe.fit(self.__X, self.__y)
        model_path = os.path.join(my_pipe.output_settings.results_folder,
                                  'photon_best_model.photon')
        self.assertTrue(os.path.exists(model_path))

        # check if load_optimum_pipe also works
        # check if we have the meta information recovered
        loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(model_path)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information)
Esempio n. 8
0
    def test_write_convenience_files(self):
        """
        Output creation testing. Only write if output_settings.save_output == True
        """
        for file in self.files:
            self.assertTrue(
                os.path.isfile(
                    os.path.join(self.output_settings.results_folder, file)))

        # correct rows
        with open(
                os.path.join(self.output_settings.results_folder,
                             'best_config_predictions.csv')) as f:
            self.assertEqual(
                sum([
                    outer_fold.number_samples_test
                    for outer_fold in self.hyperpipe.results.outer_folds
                ]),
                sum(1 for _ in f) - 1)

        shutil.rmtree(self.tmp_folder_path, ignore_errors=True)
        self.output_settings = OutputSettings(
            project_folder=self.tmp_folder_path, save_output=False)
        self.hyperpipe.fit(self.__X, self.__y)
        self.assertIsNone(self.output_settings.results_folder)
Esempio n. 9
0
    def create_hyperpipe(self):
        # this is needed here for the parallelisation
        from photonai.base import Hyperpipe, PipelineElement, OutputSettings
        from photonai.optimization import FloatRange, Categorical, IntegerRange
        from sklearn.model_selection import GroupKFold
        from sklearn.model_selection import KFold

        settings = OutputSettings(mongodb_connect_url='mongodb://localhost:27017/photon_results',
                                  project_folder=self.tmp_folder_path)
        my_pipe = Hyperpipe('permutation_test_1',
                            optimizer='grid_search',
                            metrics=['accuracy', 'precision', 'recall'],
                            best_config_metric='accuracy',
                            outer_cv=GroupKFold(n_splits=2),
                            inner_cv=KFold(n_splits=2),
                            calculate_metrics_across_folds=True,
                            eval_final_performance=True,
                            verbosity=1,
                            output_settings=settings)

        # Add transformer elements
        my_pipe += PipelineElement("StandardScaler", hyperparameters={},
                                   test_disabled=False, with_mean=True, with_std=True)

        my_pipe += PipelineElement("PCA", hyperparameters={'n_components': IntegerRange(3, 5)},
                                   test_disabled=False)

        # Add estimator
        my_pipe += PipelineElement("SVC", hyperparameters={'kernel': ['linear', 'rbf']},  # C': FloatRange(0.1, 5),
                                   gamma='scale', max_iter=1000000)

        return my_pipe
Esempio n. 10
0
    def test_register_element(self):
        with self.assertRaises(ValueError):
            self.registry.register("MyCustomEstimator",
                                   "custom_estimator.CustomEstimator",
                                   "WrongType")

        self.registry.register("MyCustomEstimator",
                               "custom_estimator.CustomEstimator", "Estimator")

        self.registry.activate()
        settings = OutputSettings(save_output=False, project_folder="./tmp/")

        # DESIGN YOUR PIPELINE
        pipe = Hyperpipe(
            "custom_estimator_pipe",
            optimizer="random_grid_search",
            optimizer_params={"n_configurations": 2},
            metrics=["accuracy", "precision", "recall", "balanced_accuracy"],
            best_config_metric="accuracy",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=2),
            verbosity=1,
            output_settings=settings,
        )

        pipe += PipelineElement("MyCustomEstimator")

        pipe.fit(np.random.randn(30, 30), np.random.randint(0, 2, 30))

        self.registry.delete("MyCustomEstimator")

        os.remove(os.path.join(self.custom_folder, "CustomElements.json"))
Esempio n. 11
0
    def setUp(self):
        self.time_limit = 60 * 2

        settings = OutputSettings(project_folder="./tmp/")

        self.smac_helper = {"data": None, "initial_runs": None}

        # DESIGN YOUR PIPELINE
        self.pipe = Hyperpipe(
            "basic_svm_pipe",  # the name of your pipeline
            optimizer="smac",  # which optimizer PHOTON shall use
            optimizer_params={
                "wallclock_limit": self.time_limit,
                "smac_helper": self.smac_helper,
                "run_limit": 20,
            },
            metrics=["accuracy"],
            # the performance metrics of your interest
            best_config_metric="accuracy",
            inner_cv=KFold(
                n_splits=3
            ),  # test each configuration ten times respectively,
            verbosity=0,
            output_settings=settings,
        )
Esempio n. 12
0
    def create_hyperpipes(
        metrics: list = None,
        inner_cv=KFold(n_splits=3, shuffle=True, random_state=42),
        outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
        plots: bool = False,
        optimizer: str = "random_grid_search",
        optimizer_params: dict = {"n_configurations": 10},
        eval_final_performance: bool = True,
        performance_constraints: list = None,
        cache_folder="./cache",
        tmp_folder="./tmp",
    ):

        pipe = Hyperpipe(
            name="architecture_test_pipe",
            output_settings=OutputSettings(project_folder=tmp_folder,
                                           plots=plots),
            optimizer=optimizer,
            optimizer_params=optimizer_params,
            best_config_metric="accuracy",
            metrics=metrics,
            inner_cv=inner_cv,
            outer_cv=outer_cv,
            eval_final_performance=eval_final_performance,
            performance_constraints=performance_constraints,
            cache_folder=cache_folder,
            verbosity=1,
        )
        return pipe
Esempio n. 13
0
    def run_parallelized_permutation(hyperpipe_constructor,
                                     X,
                                     perm_run,
                                     y_perm,
                                     permutation_id,
                                     verbosity=-1,
                                     **kwargs):
        # Create new instance of hyperpipe and set all parameters
        perm_pipe = hyperpipe_constructor()
        perm_pipe.verbosity = verbosity
        perm_pipe.name = perm_pipe.name + '_perm_' + str(perm_run)
        perm_pipe.permutation_id = permutation_id

        # print(y_perm)
        po = OutputSettings(
            mongodb_connect_url=perm_pipe.output_settings.mongodb_connect_url,
            save_output=False)
        perm_pipe.output_settings = po
        perm_pipe.calculate_metrics_across_folds = False
        try:
            # Fit hyperpipe
            # WE DO PRINT BECAUSE WE HAVE NO COMMON LOGGER!!!
            print('Fitting permutation ' + str(perm_run) + ' ...')
            perm_pipe.fit(X, y_perm, **kwargs)
            perm_pipe.results.computation_completed = True
            perm_pipe.results.outer_folds = list()
            perm_pipe.results.best_config = None
            perm_pipe.results.save()
            print('Finished permutation ' + str(perm_run) + ' ...')
        except Exception as e:
            if perm_pipe.results is not None:
                perm_pipe.results.permutation_failed = str(e)
                perm_pipe.results.save()
                print('Failed permutation ' + str(perm_run) + ' ...')
        return perm_run
Esempio n. 14
0
 def create_hyperpipe(self):
     self.hyperpipe = Hyperpipe('optimizer_test',
                                output_settings=OutputSettings(project_folder='./tmp'),
                                metrics=['accuracy'],
                                best_config_metric='accuracy',
                                inner_cv=KFold(n_splits=3),
                                outer_cv=ShuffleSplit(n_splits=2),
                                optimizer=self.optimizer_name)
Esempio n. 15
0
        def test_one_hyperpipe(learning_curves, learning_curves_cut):
            if learning_curves and learning_curves_cut is None:
                learning_curves_cut = FloatRange(0, 1, 'range', 0.2)
            output_settings = OutputSettings(
                project_folder=self.tmp_folder_path, save_output=False)
            test_hyperpipe = Hyperpipe(
                'test_pipe',
                learning_curves=learning_curves,
                learning_curves_cut=learning_curves_cut,
                metrics=['accuracy', 'recall', 'specificity'],
                best_config_metric='accuracy',
                inner_cv=self.inner_cv,
                output_settings=output_settings)

            self.assertEqual(test_hyperpipe.cross_validation.learning_curves,
                             learning_curves)
            if learning_curves:
                self.assertEqual(
                    test_hyperpipe.cross_validation.learning_curves_cut,
                    learning_curves_cut)
            else:
                self.assertIsNone(
                    test_hyperpipe.cross_validation.learning_curves_cut)

            test_hyperpipe += PipelineElement('StandardScaler')
            test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]},
                                              random_state=42)
            test_hyperpipe += PipelineElement('SVC', {
                'C': [0.1],
                'kernel': ['linear']
            },
                                              random_state=42)
            test_hyperpipe.fit(self.X, self.y)
            config_results = test_hyperpipe.results_handler.results.outer_folds[
                0].tested_config_list
            config_num = len(config_results)
            for config_nr in range(config_num):
                for inner_fold_nr in range(self.inner_cv.n_splits):
                    curves = config_results[config_nr].inner_folds[
                        inner_fold_nr].learning_curves
                    if learning_curves:
                        self.assertEqual(len(curves),
                                         len(learning_curves_cut.values))
                        for learning_point_nr in range(
                                len(learning_curves_cut.values)):
                            test_metrics = list(
                                curves[learning_point_nr][1].keys())
                            train_metrics = list(
                                curves[learning_point_nr][2].keys())
                            self.assertEqual(
                                test_hyperpipe.optimization.metrics,
                                test_metrics)
                            self.assertEqual(
                                test_hyperpipe.optimization.metrics,
                                train_metrics)
                    else:
                        self.assertEqual(curves, [])
Esempio n. 16
0
    def test_metrics_and_aggreation_eval_performance_false(self):
        self.hyperpipe = Hyperpipe('test_prediction_collection',
                                   inner_cv=KFold(n_splits=self.inner_fold_nr),
                                   metrics=['mean_absolute_error', 'mean_squared_error'],
                                   eval_final_performance=False,
                                   best_config_metric='mean_absolute_error',
                                   calculate_metrics_across_folds=True,
                                   output_settings=OutputSettings(project_folder=self.tmp_folder_path))

        self.test_metrics_and_aggregations()
Esempio n. 17
0
    def setUp(self):
        """
        Set default start settings for all tests.
        """
        super(ResultsHandlerTest, self).setUp()

        self.files = [
            "best_config_predictions.csv",
            "time_monitor.csv",
            "time_monitor_pie.png",
            "photon_result_file.p",
            "photon_summary.txt",
            "photon_best_model.photon",
            "optimum_pipe_feature_importances_backmapped.npz",
            "photon_code.py",
            "optimizer_history.png",
        ]

        self.output_settings = OutputSettings(
            project_folder=self.tmp_folder_path, save_output=True)

        self.ss_pipe_element = PipelineElement("StandardScaler")
        self.pca_pipe_element = PipelineElement("PCA",
                                                {"n_components": [1, 2]},
                                                random_state=42)
        self.svc_pipe_element = PipelineElement(
            "SVC",
            {
                "C": [0.1],
                "kernel": ["linear"]
            },  # 'rbf', 'sigmoid']
            random_state=42,
        )

        self.inner_cv_object = KFold(n_splits=3)
        self.metrics = ["accuracy", "recall", "precision"]
        self.best_config_metric = "accuracy"
        self.hyperpipe = Hyperpipe(
            "god",
            inner_cv=self.inner_cv_object,
            metrics=self.metrics,
            best_config_metric=self.best_config_metric,
            outer_cv=KFold(n_splits=2),
            output_settings=self.output_settings,
            verbosity=1,
        )
        self.hyperpipe += self.ss_pipe_element
        self.hyperpipe += self.pca_pipe_element
        self.hyperpipe.add(self.svc_pipe_element)

        dataset = load_breast_cancer()
        self.__X = dataset.data
        self.__y = dataset.target

        self.hyperpipe.fit(self.__X, self.__y)
Esempio n. 18
0
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            "basic_stacking",
            optimizer="grid_search",
            metrics=["accuracy", "precision", "recall"],
            best_config_metric="f1_score",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder="./tmp/"),
        )

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch("tree_branch")
        tree_qua_branch += PipelineElement("QuantileTransformer")
        tree_qua_branch += PipelineElement(
            "DecisionTreeClassifier",
            {"min_samples_split": IntegerRange(2, 4)},
            criterion="gini",
        )

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch("svm_branch")
        svm_mima_branch += PipelineElement("MinMaxScaler")
        svm_mima_branch += PipelineElement(
            "SVC",
            {
                "kernel": ["rbf", "linear"],  # Categorical(['rbf', 'linear']),
                "C": IntegerRange(0.01, 2.0),
            },
            gamma="auto",
        )

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch("neighbour_branch")
        knn_sta_branch += PipelineElement("StandardScaler")
        knn_sta_branch += PipelineElement("KNeighborsClassifier")

        # voting = True to mean the result of every branch
        my_pipe += Stack("final_stack",
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement("LogisticRegression", solver="lbfgs")

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
    def test_shall_continue(self):
        X, y = load_boston(return_X_y=True)

        inner_fold_length = 7
        # DESIGN YOUR PIPELINE
        my_pipe = Hyperpipe(
            name='performance_pipe',
            optimizer='random_search',
            optimizer_params={'limit_in_minutes': 2},
            metrics=['mean_squared_error'],
            best_config_metric='mean_squared_error',
            inner_cv=KFold(n_splits=inner_fold_length),
            eval_final_performance=True,
            output_settings=OutputSettings(project_folder='./tmp'),
            performance_constraints=[self.constraint_object])

        my_pipe += PipelineElement('StandardScaler')
        my_pipe += PipelineElement(
            'RandomForestRegressor',
            hyperparameters={'n_estimators': IntegerRange(5, 50)})

        # NOW TRAIN YOUR PIPELINE
        my_pipe.fit(X, y)

        # clip config results
        results = my_pipe.results.outer_folds[0].tested_config_list

        configs = []

        for i in range(len(configs) - 1):
            configs.append([
                x.validation.metrics['mean_squared_error']
                for x in results[i].inner_folds
            ])

        threshold = np.inf
        for val in configs[:10]:
            challenger = np.mean(val)
            if threshold > challenger:
                threshold = challenger

        originals_for_std = configs[:10]
        for i, val in enumerate(configs[10:]):
            std = np.mean([np.std(x) for x in originals_for_std])
            for j, v in enumerate(val):

                if np.mean(val[:j + 1]) > threshold + std:
                    self.assertEqual(v, val[-1])
                    continue
                if len(val) == inner_fold_length - 1 and np.mean(
                        val) < threshold + std:
                    threshold = np.mean(val)
            if len(val) > 1:
                originals_for_std.append(val)
Esempio n. 20
0
 def setup_hyperpipe(self, output_settings=None):
     if output_settings is None:
         output_settings = OutputSettings(
             project_folder=self.tmp_folder_path)
     self.hyperpipe = Hyperpipe('god',
                                inner_cv=self.inner_cv_object,
                                metrics=self.metrics,
                                best_config_metric=self.best_config_metric,
                                output_settings=output_settings)
     self.hyperpipe += self.ss_pipe_element
     self.hyperpipe += self.pca_pipe_element
     self.hyperpipe.add(self.svc_pipe_element)
Esempio n. 21
0
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            'basic_stacking',
            optimizer='grid_search',
            metrics=['accuracy', 'precision', 'recall'],
            best_config_metric='f1_score',
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder='./tmp/'))

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch('tree_branch')
        tree_qua_branch += PipelineElement('QuantileTransformer')
        tree_qua_branch += PipelineElement(
            'DecisionTreeClassifier',
            {'min_samples_split': IntegerRange(2, 4)},
            criterion='gini')

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch('svm_branch')
        svm_mima_branch += PipelineElement('MinMaxScaler')
        svm_mima_branch += PipelineElement(
            'SVC',
            {
                'kernel': ['rbf', 'linear'],  # Categorical(['rbf', 'linear']),
                'C': IntegerRange(0.01, 2.0)
            },
            gamma='auto')

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch('neighbour_branch')
        knn_sta_branch += PipelineElement('StandardScaler')
        knn_sta_branch += PipelineElement('KNeighborsClassifier')

        # voting = True to mean the result of every branch
        my_pipe += Stack('final_stack',
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
Esempio n. 22
0
    def test_inverse_transform(self):
        settings = OutputSettings(
            project_folder=self.tmp_folder_path, overwrite_results=True
        )

        # DESIGN YOUR PIPELINE
        pipe = Hyperpipe(
            "Limbic_System",
            optimizer="grid_search",
            metrics=["mean_absolute_error"],
            best_config_metric="mean_absolute_error",
            outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
            inner_cv=ShuffleSplit(n_splits=1, test_size=0.2),
            verbosity=2,
            cache_folder=self.cache_folder_path,
            eval_final_performance=True,
            output_settings=settings,
        )

        # PICK AN ATLAS
        atlas = PipelineElement(
            "BrainAtlas",
            rois=["Hippocampus_L", "Amygdala_L"],
            atlas_name="AAL",
            extract_mode="vec",
            batch_size=20,
        )

        # EITHER ADD A NEURO BRANCH OR THE ATLAS ITSELF
        neuro_branch = NeuroBranch("NeuroBranch")
        neuro_branch += atlas
        pipe += neuro_branch

        pipe += PipelineElement("LinearSVR")

        pipe.fit(self.X, self.y)

        # GET IMPORTANCE SCORES
        handler = ResultsHandler(pipe.results)
        importance_scores_optimum_pipe = handler.results.best_config_feature_importances

        manual_img, _, _ = pipe.optimum_pipe.inverse_transform(
            importance_scores_optimum_pipe, None
        )
        img = image.load_img(
            os.path.join(
                self.tmp_folder_path,
                "Limbic_System_results/optimum_pipe_feature_importances_backmapped.nii.gz",
            )
        )
        self.assertTrue(np.array_equal(manual_img.get_data(), img.get_data()))
Esempio n. 23
0
    def test_huge_combinations(self):
        hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy',
                       output_settings=OutputSettings(project_folder=self.tmp_folder_path))

        hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]})
        stack = Stack('ensemble')
        for i in range(20):
            stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5),
                                                             'kernel': ["linear", "rbf", "sigmoid", "polynomial"]})
        hp += stack
        hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(return_X_y=True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Esempio n. 24
0
 def setUp(self):
     super(ResultHandlerAndHelperTests, self).setUp()
     self.inner_fold_nr = 10
     self.outer_fold_nr = 5
     
     self.y_true = np.linspace(1, 100, 100)
     self.X = self.y_true
     
     self.hyperpipe = Hyperpipe('test_prediction_collection',
                                inner_cv=KFold(n_splits=self.inner_fold_nr),
                                outer_cv=KFold(n_splits=self.outer_fold_nr),
                                metrics=['mean_absolute_error', 'mean_squared_error'],
                                best_config_metric='mean_absolute_error',
                                output_settings=OutputSettings(project_folder=self.tmp_folder_path),
                                verbosity=0)
Esempio n. 25
0
def create_hyperpipe():
    # this is needed here for the parallelisation
    from photonai.base import Hyperpipe, PipelineElement, OutputSettings
    from photonai.optimization import FloatRange, Categorical, IntegerRange
    from sklearn.model_selection import GroupKFold
    from sklearn.model_selection import KFold

    settings = OutputSettings(
        mongodb_connect_url="mongodb://trap-umbriel:27017/photon_results",
        project_folder="./tmp/",
    )
    my_pipe = Hyperpipe(
        "permutation_test_1",
        optimizer="grid_search",
        metrics=["accuracy", "precision", "recall"],
        best_config_metric="accuracy",
        outer_cv=GroupKFold(n_splits=2),
        inner_cv=KFold(n_splits=2),
        calculate_metrics_across_folds=True,
        eval_final_performance=True,
        verbosity=1,
        output_settings=settings,
    )

    # Add transformer elements
    my_pipe += PipelineElement(
        "StandardScaler",
        hyperparameters={},
        test_disabled=True,
        with_mean=True,
        with_std=True,
    )

    my_pipe += PipelineElement(
        "PCA",  # hyperparameters={'n_components': IntegerRange(5, 15)},
        test_disabled=False,
    )

    # Add estimator
    my_pipe += PipelineElement(
        "SVC",
        hyperparameters={"kernel": ["linear",
                                    "rbf"]},  # C': FloatRange(0.1, 5),
        gamma="scale",
        max_iter=1000000,
    )

    return my_pipe
Esempio n. 26
0
    def setUp(self):
        """
        Set default start settings for all tests.
        """
        super(ResultsHandlerTest, self).setUp()

        self.files = [
            'best_config_predictions.csv', 'time_monitor.csv',
            'time_monitor_pie.png', 'photon_result_file.p',
            'photon_summary.txt', 'photon_best_model.photon',
            'optimum_pipe_feature_importances_backmapped.npz',
            'photon_code.py', 'optimizer_history.png'
        ]

        self.output_settings = OutputSettings(
            project_folder=self.tmp_folder_path, save_output=True)

        self.ss_pipe_element = PipelineElement('StandardScaler')
        self.pca_pipe_element = PipelineElement('PCA',
                                                {'n_components': [1, 2]},
                                                random_state=42)
        self.svc_pipe_element = PipelineElement(
            'SVC',
            {
                'C': [0.1],
                'kernel': ['linear']
            },  # 'rbf', 'sigmoid']
            random_state=42)

        self.inner_cv_object = KFold(n_splits=3)
        self.metrics = ["accuracy", 'recall', 'precision']
        self.best_config_metric = "accuracy"
        self.hyperpipe = Hyperpipe('god',
                                   inner_cv=self.inner_cv_object,
                                   metrics=self.metrics,
                                   best_config_metric=self.best_config_metric,
                                   outer_cv=KFold(n_splits=2),
                                   output_settings=self.output_settings,
                                   verbosity=1)
        self.hyperpipe += self.ss_pipe_element
        self.hyperpipe += self.pca_pipe_element
        self.hyperpipe.add(self.svc_pipe_element)

        dataset = load_breast_cancer()
        self.__X = dataset.data
        self.__y = dataset.target

        self.hyperpipe.fit(self.__X, self.__y)
Esempio n. 27
0
    def test_load_from_file(self):
        X, y = load_breast_cancer(True)
        my_pipe = Hyperpipe(
            'load_results_file_test',
            metrics=['accuracy'],
            best_config_metric='accuracy',
            output_settings=OutputSettings(project_folder='./tmp'))
        my_pipe += PipelineElement("StandardScaler")
        my_pipe += PipelineElement("SVC")
        my_pipe.fit(X, y)

        results_file = os.path.join(my_pipe.output_settings.results_folder,
                                    "photon_result_file.p")
        my_result_handler = ResultsHandler()
        my_result_handler.load_from_file(results_file)
        self.assertIsInstance(my_result_handler.results, MDBHyperpipe)
Esempio n. 28
0
    def create_hyperpipe_no_mongo(self):
        from photonai.base import Hyperpipe, OutputSettings
        from sklearn.model_selection import KFold

        settings = OutputSettings(project_folder=self.tmp_folder_path)
        my_pipe = Hyperpipe('permutation_test_1',
                            optimizer='grid_search',
                            metrics=['accuracy', 'precision', 'recall'],
                            best_config_metric='accuracy',
                            outer_cv=KFold(n_splits=2),
                            inner_cv=KFold(n_splits=2),
                            calculate_metrics_across_folds=True,
                            eval_final_performance=True,
                            verbosity=1,
                            output_settings=settings)
        return my_pipe
Esempio n. 29
0
    def test_metrics_and_aggregations_no_outer_cv_but_eval_performance_true(
            self):
        self.hyperpipe = Hyperpipe(
            "test_prediction_collection",
            outer_cv=KFold(n_splits=self.outer_fold_nr),
            inner_cv=KFold(n_splits=self.inner_fold_nr),
            metrics=["mean_absolute_error", "mean_squared_error"],
            eval_final_performance=False,
            best_config_metric="mean_absolute_error",
            calculate_metrics_per_fold=True,
            calculate_metrics_across_folds=True,
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )

        self.test_metrics_and_aggregations()
Esempio n. 30
0
    def test_class_switch(self):
        """
        Test for Pipeline with data.
        """

        X, y = load_breast_cancer(return_X_y=True)

        my_pipe = Hyperpipe(
            'basic_switch_pipe',
            optimizer='random_grid_search',
            optimizer_params={'n_configurations': 15},
            metrics=['accuracy', 'precision', 'recall'],
            best_config_metric='accuracy',
            outer_cv=KFold(n_splits=3),
            inner_cv=KFold(n_splits=5),
            verbosity=1,
            output_settings=OutputSettings(project_folder='./tmp/'))

        # Transformer Switch
        my_pipe += Switch('TransformerSwitch', [
            PipelineElement('StandardScaler'),
            PipelineElement('PCA', test_disabled=True)
        ])

        # Estimator Switch
        svm = PipelineElement('SVC',
                              hyperparameters={'kernel': ['rbf', 'linear']})

        tree = PipelineElement('DecisionTreeClassifier',
                               hyperparameters={
                                   'min_samples_split': IntegerRange(2, 5),
                                   'min_samples_leaf': IntegerRange(1, 5),
                                   'criterion': ['gini', 'entropy']
                               })

        my_pipe += Switch('EstimatorSwitch', [svm, tree])

        json_transformer = JsonTransformer()

        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)

        self.assertDictEqual(elements_to_dict(my_pipe.copy_me()),
                             elements_to_dict(my_pipe_reload.copy_me()))