Exemple #1
0
    def generate_hyperpipes(self):
        if self.atlas_info_object.roi_names_runtime:
            self.rois = self.atlas_info_object.roi_names_runtime
            #
            # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search',
            #                        metrics=['accuracy'], hyperparameter_specific_config_cv_object=
            #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
            #                        hyperparameter_search_cv_object=
                #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                #                        eval_final_performance=True)

            inner_pipe_list = {}
            for i in range(len(self.rois)):
                tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search',
                                           inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                                           eval_final_performance=False, verbose=logging.verbosity_level,
                                           best_config_metric=self.best_config_metric, metrics=self.metrics)

                # at first set a filter element

                roi_filter_element = RoiFilterElement(i)
                tmp_inner_pipe.filter_element = roi_filter_element

                # secondly add all other items
                for pipe_item in self.hyperpipe_elements:
                    tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2])

                inner_pipe_list[self.rois[i]] = tmp_inner_pipe

            self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False)
Exemple #2
0
class AtlasStacker(BaseEstimator):

    def __init__(self, atlas_info_object, hyperpipe_elements, best_config_metric=[], metrics=[]):
        # ToDo
        # - Stacker

        self.atlas_info_object = atlas_info_object
        self.atlas_name = self.atlas_info_object.atlas_name
        self.hyperpipe_elements = hyperpipe_elements
        self.pipeline_fusion = None
        self.best_config_metric = best_config_metric
        self.metrics = metrics
        # self.outer_pipe += pipeline_fusion

    def generate_hyperpipes(self):
        if self.atlas_info_object.roi_names_runtime:
            self.rois = self.atlas_info_object.roi_names_runtime
            #
            # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search',
            #                        metrics=['accuracy'], hyperparameter_specific_config_cv_object=
            #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
            #                        hyperparameter_search_cv_object=
                #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                #                        eval_final_performance=True)

            inner_pipe_list = {}
            for i in range(len(self.rois)):
                tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search',
                                           inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                                           eval_final_performance=False, verbose=logging.verbosity_level,
                                           best_config_metric=self.best_config_metric, metrics=self.metrics)

                # at first set a filter element

                roi_filter_element = RoiFilterElement(i)
                tmp_inner_pipe.filter_element = roi_filter_element

                # secondly add all other items
                for pipe_item in self.hyperpipe_elements:
                    tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2])

                inner_pipe_list[self.rois[i]] = tmp_inner_pipe

            self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False)
        # Todo: else raise Error

    def fit(self, X, y=None):
        if not self.pipeline_fusion and not self.atlas_info_object.roi_names_runtime:
            raise BaseException('No ROIs could be received from Brain Atlas')

        elif not self.pipeline_fusion and self.atlas_info_object.roi_names_runtime:
            self.generate_hyperpipes()

        self.pipeline_fusion.fit(X, y)
        return self

    def transform(self, X, y=None):
        return self.pipeline_fusion.transform(X, y)
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += Stack("estimator_stack", elements=[SVR, RF])
            pipe += PipelineElement("PhotonVotingClassifier")

            self.run_hyperpipe(pipe, self.classification)
Exemple #4
0
    def test_no_y_transformers(self):
        stacking_element = Stack("forbidden_stack")
        my_dummy = PipelineElement.create(
            "dummy", DummyNeedsCovariatesAndYTransformer(), {})

        with self.assertRaises(NotImplementedError):
            stacking_element += my_dummy
    def test_classification_12(self):
        X, y = load_iris(True)
        # multiclass classification
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement("RandomForestClassifier")
            # add to pipe
            pipe += Stack("estimator_stack",
                          elements=[SVC1, SVC2, RF],
                          use_probabilities=True)
            pipe += PipelineElement("RandomForestClassifier")

            pipe.optimization.metrics = ["accuracy"]
            pipe.optimization.best_config_metric = "accuracy"

            pipe.fit(X, y)
Exemple #6
0
    def test_huge_combinations(self):
        hp = Hyperpipe(
            "huge_combinations",
            metrics=["accuracy"],
            best_config_metric="accuracy",
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )

        hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]})
        stack = Stack("ensemble")
        for i in range(20):
            stack += PipelineElement(
                "SVC",
                hyperparameters={
                    "C": FloatRange(0.001, 5),
                    "kernel": ["linear", "rbf", "sigmoid", "polynomial"],
                },
            )
        hp += stack
        hp += PipelineElement(
            "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Exemple #7
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3)
            sp = PipelineElement(
                'PCA', hyperparameters={'n_components': IntegerRange(1, 50)})
            nmb += sp
            nmb_list.append(nmb)

        my_switch = Switch('disabling_test_switch')
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack('stack_of_branches')
        for i in range(3):
            my_branch = Branch('branch_' + str(i + 2))
            my_branch += PipelineElement('StandardScaler')
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement('StandardScaler'))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement('SVC'))
        return nmb_list
Exemple #8
0
    def test_inverse_tansform(self):
        # simple pipe
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)])
        sk_pipe.fit(self.X, self.y)
        sk_transform = sk_pipe.transform(self.X)
        sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)])
        photon_pipe.fit(self.X, self.y)
        p_transform, _, _ = photon_pipe.transform(self.X)
        p_inverse_transformed, _, _ = photon_pipe.inverse_transform(
            p_transform)

        self.assertTrue(
            np.array_equal(sk_inverse_transformed, p_inverse_transformed))

        # now including stack
        stack = Stack("stack", [self.p_pca])
        stack_pipeline = PhotonPipeline([
            ("stack", stack),
            ("StandardScaler", PipelineElement("StandardScaler")),
            ("LinearSVC", PipelineElement("LinearSVC")),
        ])
        stack_pipeline.fit(self.X, self.y)
        feature_importances = stack_pipeline.feature_importances_
        inversed_data, _, _ = stack_pipeline.inverse_transform(
            feature_importances)
        self.assertEqual(inversed_data.shape[1], self.X.shape[1])
Exemple #9
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})

            self.run_hyperpipe(pipe, self.classification)
    def test_copy_me(self):
        switch = Switch("my_copy_switch")
        switch += PipelineElement("StandardScaler")
        switch += PipelineElement("RobustScaler", test_disabled=True)

        stack = Stack("RandomStack")
        stack += PipelineElement("SVC")
        branch = Branch('Random_Branch')
        pca_hyperparameters = {'n_components': [5, 10]}
        branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters)
        branch += PipelineElement("DecisionTreeClassifier")
        stack += branch

        photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")),
                                      ("my_copy_switch", switch),
                                      ('RandomStack', stack),
                                      ('Callback1', CallbackElement('tmp_callback', np.mean)),
                                      ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))])

        copy_of_the_pipe = photon_pipe.copy_me()

        self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state)
        self.assertTrue(len(copy_of_the_pipe.elements) == 5)
        self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack")
        self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled)
        self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters,
                             {"PCA__n_components": [5, 10]})
        self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement))
        self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
Exemple #11
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3)
            nmb += PipelineElement("SmoothImages")
            nmb_list.append(nmb)

        my_switch = Switch("disabling_test_switch")
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack("stack_of_branches")
        for i in range(3):
            my_branch = Branch("branch_" + str(i + 2))
            my_branch += PipelineElement("StandardScaler")
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement("StandardScaler"))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement("SVC"))
        return nmb_list
Exemple #12
0
    def test_classification_11(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement("RandomForestClassifier")
            # add to pipe
            pipe += Stack("estimator_stack",
                          elements=[SVC1, SVC2, RF],
                          use_probabilities=True)
            pipe += PipelineElement("RandomForestClassifier")

            self.run_hyperpipe(pipe, self.classification)
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            "basic_stacking",
            optimizer="grid_search",
            metrics=["accuracy", "precision", "recall"],
            best_config_metric="f1_score",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder="./tmp/"),
        )

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch("tree_branch")
        tree_qua_branch += PipelineElement("QuantileTransformer")
        tree_qua_branch += PipelineElement(
            "DecisionTreeClassifier",
            {"min_samples_split": IntegerRange(2, 4)},
            criterion="gini",
        )

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch("svm_branch")
        svm_mima_branch += PipelineElement("MinMaxScaler")
        svm_mima_branch += PipelineElement(
            "SVC",
            {
                "kernel": ["rbf", "linear"],  # Categorical(['rbf', 'linear']),
                "C": IntegerRange(0.01, 2.0),
            },
            gamma="auto",
        )

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch("neighbour_branch")
        knn_sta_branch += PipelineElement("StandardScaler")
        knn_sta_branch += PipelineElement("KNeighborsClassifier")

        # voting = True to mean the result of every branch
        my_pipe += Stack("final_stack",
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement("LogisticRegression", solver="lbfgs")

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
Exemple #14
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement("StandardScaler")
            pipe += PipelineElement(
                "SamplePairingClassification",
                {
                    "draw_limit": [100],
                    "generator": Categorical(["nearest_pair", "random_pair"]),
                },
                distance_metric="euclidean",
                test_disabled=True,
            )
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch("source1_features")
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(
                start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            source2_branch = Branch("source2_features")
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(
                start=int(np.floor(self.X_shape[1] /
                                   2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack("source_stack",
                          elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )

            self.run_hyperpipe(pipe, self.classification)
Exemple #15
0
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                          'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += Stack('estimator_stack', elements=[SVR, RF])
            pipe += PipelineElement('PhotonVotingClassifier')

            self.run_hyperpipe(pipe, self.classification)
Exemple #16
0
    def test_add(self):
        stack = Stack('MyStack', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})
        stack = Stack('MyStack')
        stack += PipelineElement('PCA', {'n_components': [5]})
        stack += PipelineElement('FastICA')
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})

        def callback(X, y=None):
            pass

        stack = Stack('MyStack', [
            PipelineElement('PCA'),
            CallbackElement('MyCallback', callback),
            Switch('MySwitch',
                   [PipelineElement('PCA'),
                    PipelineElement('FastICA')]),
            Branch('MyBranch', [PipelineElement('PCA')])
        ])
        self.assertEqual(len(stack.elements), 4)

        # test doubled item
        with self.assertRaises(ValueError):
            stack += stack.elements[0]

        stack += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(stack.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            stack.hyperparameters, {
                'MyStack__MySwitch__current_element': [(0, 0), (1, 0)],
                'MyStack__PCA2__n_components': [10, 20]
            })
Exemple #17
0
    def test_classification_7(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack, but use same machine twice
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2])
            pipe += PipelineElement('PhotonVotingClassifier')

            self.run_hyperpipe(pipe, self.classification)
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            'basic_stacking',
            optimizer='grid_search',
            metrics=['accuracy', 'precision', 'recall'],
            best_config_metric='f1_score',
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder='./tmp/'))

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch('tree_branch')
        tree_qua_branch += PipelineElement('QuantileTransformer')
        tree_qua_branch += PipelineElement(
            'DecisionTreeClassifier',
            {'min_samples_split': IntegerRange(2, 4)},
            criterion='gini')

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch('svm_branch')
        svm_mima_branch += PipelineElement('MinMaxScaler')
        svm_mima_branch += PipelineElement(
            'SVC',
            {
                'kernel': ['rbf', 'linear'],  # Categorical(['rbf', 'linear']),
                'C': IntegerRange(0.01, 2.0)
            },
            gamma='auto')

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch('neighbour_branch')
        knn_sta_branch += PipelineElement('StandardScaler')
        knn_sta_branch += PipelineElement('KNeighborsClassifier')

        # voting = True to mean the result of every branch
        my_pipe += Stack('final_stack',
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
Exemple #19
0
    def test_prepare_photon_pipeline(self):
        test_branch = Branch('my_test_branch')
        test_branch += PipelineElement('SimpleImputer')
        test_branch += Switch('my_crazy_switch_bitch')
        test_branch += Stack('my_stacking_stack')
        test_branch += PipelineElement('SVC')

        generated_pipe = test_branch.prepare_photon_pipe(test_branch.elements)

        self.assertEqual(len(generated_pipe.named_steps), 4)
        for idx, element in enumerate(test_branch.elements):
            self.assertIs(generated_pipe.named_steps[element.name], element)
            self.assertIs(generated_pipe.elements[idx][1],
                          test_branch.elements[idx])
Exemple #20
0
    def test_huge_combinations(self):
        hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy',
                       output_settings=OutputSettings(project_folder=self.tmp_folder_path))

        hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]})
        stack = Stack('ensemble')
        for i in range(20):
            stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5),
                                                             'kernel': ["linear", "rbf", "sigmoid", "polynomial"]})
        hp += stack
        hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(return_X_y=True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Exemple #21
0
    def test_classification_11(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier')
            # add to pipe
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True)
            pipe += PipelineElement('RandomForestClassifier')

            self.run_hyperpipe(pipe, self.classification)
Exemple #22
0
    def test_classification_12(self):
        X, y = load_iris(True)
        # multiclass classification
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier')
            # add to pipe
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True)
            pipe += PipelineElement('RandomForestClassifier')

            pipe.optimization.metrics = ['accuracy']
            pipe.optimization.best_config_metric = 'accuracy'

            pipe.fit(X, y)
Exemple #23
0
    def test_set_random_state(self):
        # we handle all elements in one method that is inherited so we capture them all in this test
        random_state = 53
        my_branch = Branch("random_state_branch")
        my_branch += PipelineElement("StandardScaler")
        my_switch = Switch("transformer_Switch")
        my_switch += PipelineElement("LassoFeatureSelection")
        my_switch += PipelineElement("PCA")
        my_branch += my_switch
        my_stack = Stack("Estimator_Stack")
        my_stack += PipelineElement("SVR")
        my_stack += PipelineElement("Ridge")
        my_branch += my_stack
        my_branch += PipelineElement("ElasticNet")

        my_branch.random_state = random_state
        self.assertTrue(my_switch.elements[1].random_state == random_state)
        self.assertTrue(
            my_switch.elements[1].base_element.random_state == random_state)
        self.assertTrue(my_stack.elements[1].random_state == random_state)
        self.assertTrue(
            my_stack.elements[1].base_element.random_state == random_state)
Exemple #24
0
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)

        self.pca = PipelineElement('PCA', {'n_components': [5, 10]})
        self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]})
        self.svc = PipelineElement('SVC', {'C': [1, 2]})
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_leaf': [3, 5]})

        self.transformer_branch_1 = Branch('TransBranch1',
                                           [self.pca.copy_me()])
        self.transformer_branch_2 = Branch('TransBranch2',
                                           [self.scaler.copy_me()])

        self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()])
        self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()])

        self.transformer_stack = Stack(
            'TransformerStack',
            [self.pca.copy_me(), self.scaler.copy_me()])
        self.estimator_stack = Stack(
            'EstimatorStack',
            [self.svc.copy_me(), self.tree.copy_me()])
        self.transformer_branch_stack = Stack('TransBranchStack', [
            self.transformer_branch_1.copy_me(),
            self.transformer_branch_2.copy_me()
        ])
        self.estimator_branch_stack = Stack('EstBranchStack', [
            self.estimator_branch_1.copy_me(),
            self.estimator_branch_2.copy_me()
        ])

        self.stacks = [
            ([self.pca, self.scaler], self.transformer_stack),
            ([self.svc, self.tree], self.estimator_stack),
            ([self.transformer_branch_1,
              self.transformer_branch_2], self.transformer_branch_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]
Exemple #25
0
    def test_classification_7(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack, but use same machine twice
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            pipe += Stack("estimator_stack", elements=[SVC1, SVC2])
            pipe += PipelineElement("PhotonVotingClassifier")

            self.run_hyperpipe(pipe, self.classification)
Exemple #26
0
    def test_classification_8(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement('StandardScaler')
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Exemple #27
0
my_pipe = Hyperpipe(
    "basic_stack_pipe",
    optimizer="sk_opt",
    optimizer_params={"n_configurations": 5},
    metrics=["accuracy", "precision", "recall"],
    best_config_metric="accuracy",
    outer_cv=KFold(n_splits=3),
    inner_cv=KFold(n_splits=3),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

my_pipe += PipelineElement("StandardScaler")

tree = PipelineElement(
    "DecisionTreeClassifier",
    hyperparameters={
        "criterion": ["gini"],
        "min_samples_split": IntegerRange(2, 4)
    },
)

svc = PipelineElement("LinearSVC", hyperparameters={"C": FloatRange(0.5, 25)})

# for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators
# in case only some implement predict_proba, predict is called for the remaining estimators
my_pipe += Stack("final_stack", [tree, svc], use_probabilities=True)

my_pipe += PipelineElement("LinearSVC")
my_pipe.fit(X, y)
                    output_settings=OutputSettings(project_folder='./tmp/'))

# BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
tree_qua_branch = Branch('tree_branch')
tree_qua_branch += PipelineElement('QuantileTransformer')
tree_qua_branch += PipelineElement('DecisionTreeClassifier',
                                   {'min_samples_split': IntegerRange(2, 4)},
                                   criterion='gini')

# BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
svm_mima_branch = Branch('svm_branch')
svm_mima_branch += PipelineElement('MinMaxScaler')
svm_mima_branch += PipelineElement('SVC', {
    'kernel': Categorical(['rbf', 'linear']),
    'C': IntegerRange(0.01, 2.0)
},
                                   gamma='auto')

# BRANCH WITH StandardScaler AND KNeighborsClassifier
knn_sta_branch = Branch('neighbour_branch')
knn_sta_branch += PipelineElement('StandardScaler')
knn_sta_branch += PipelineElement('KNeighborsClassifier')

# voting = True to mean the result of every branch
my_pipe += Stack('final_stack',
                 [tree_qua_branch, svm_mima_branch, knn_sta_branch])

my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')

my_pipe.fit(X, y)
Exemple #29
0
class StackTests(unittest.TestCase):
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)

        self.pca = PipelineElement('PCA', {'n_components': [5, 10]})
        self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]})
        self.svc = PipelineElement('SVC', {'C': [1, 2]})
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_leaf': [3, 5]})

        self.transformer_branch_1 = Branch('TransBranch1',
                                           [self.pca.copy_me()])
        self.transformer_branch_2 = Branch('TransBranch2',
                                           [self.scaler.copy_me()])

        self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()])
        self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()])

        self.transformer_stack = Stack(
            'TransformerStack',
            [self.pca.copy_me(), self.scaler.copy_me()])
        self.estimator_stack = Stack(
            'EstimatorStack',
            [self.svc.copy_me(), self.tree.copy_me()])
        self.transformer_branch_stack = Stack('TransBranchStack', [
            self.transformer_branch_1.copy_me(),
            self.transformer_branch_2.copy_me()
        ])
        self.estimator_branch_stack = Stack('EstBranchStack', [
            self.estimator_branch_1.copy_me(),
            self.estimator_branch_2.copy_me()
        ])

        self.stacks = [
            ([self.pca, self.scaler], self.transformer_stack),
            ([self.svc, self.tree], self.estimator_stack),
            ([self.transformer_branch_1,
              self.transformer_branch_2], self.transformer_branch_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]

    def test_copy_me(self):
        for stack in self.stacks:
            stack = stack[1]
            copy = stack.copy_me()
            self.assertEqual(stack.random_state, copy.random_state)
            self.assertFalse(
                stack.elements[0].__dict__ == copy.elements[0].__dict__)
            self.assertDictEqual(elements_to_dict(stack),
                                 elements_to_dict(copy))

    def test_horizontal_stacking(self):
        for stack in self.stacks:
            element_1 = stack[0][0]
            element_2 = stack[0][1]
            stack = stack[1]

            # fit elements
            Xt_1 = element_1.fit(self.X, self.y).transform(self.X, self.y)
            Xt_2 = element_2.fit(self.X, self.y).transform(self.X, self.y)

            Xt = stack.fit(self.X, self.y).transform(self.X, self.y)

            # output of transform() changes depending on whether it is an estimator stack or a transformer stack
            if isinstance(Xt, tuple):
                Xt = Xt[0]
                Xt_1 = Xt_1[0]
                Xt_2 = Xt_2[0]

            if len(Xt_1.shape) == 1:
                Xt_1 = np.reshape(Xt_1, (-1, 1))
                Xt_2 = np.reshape(Xt_2, (-1, 1))

            self.assertEqual(Xt.shape[1], Xt_1.shape[-1] + Xt_2.shape[-1])

    def recursive_assertion(self, element_a, element_b):
        for key in element_a.keys():
            if isinstance(element_a[key], np.ndarray):
                np.testing.assert_array_equal(element_a[key], element_b[key])
            elif isinstance(element_a[key], dict):
                self.recursive_assertion(element_a[key], element_b[key])
            else:
                self.assertEqual(element_a[key], element_b[key])

    def test_fit(self):
        for elements, stack in [([self.pca,
                                  self.scaler], self.transformer_stack),
                                ([self.svc, self.tree], self.estimator_stack)]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            np.random.seed(42)
            for i, element in enumerate(elements):
                element = element.fit(self.X, self.y)
                element_dict = elements_to_dict(element)
                stack_dict = elements_to_dict(stack.elements[i])
                self.recursive_assertion(element_dict, stack_dict)

    def test_transform(self):
        for elements, stack in self.stacks:
            np.random.seed(42)
            Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element, _, _ = element.fit(self.X,
                                               self.y).transform(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(Xt_stack, Xt_elements)

    def test_predict(self):
        for elements, stack in [
            ([self.svc, self.tree], self.estimator_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            yt_stack = stack.predict(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element = element.fit(self.X, self.y).predict(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(yt_stack, Xt_elements)

    def test_predict_proba(self):
        for elements, stack in [
            ([self.svc, self.tree], self.estimator_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            yt_stack = stack.predict_proba(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element = element.fit(self.X, self.y).predict_proba(self.X)
                if Xt_element is None:
                    Xt_element = element.fit(self.X, self.y).predict(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(yt_stack, Xt_elements)

    def test_inverse_transform(self):
        with self.assertRaises(NotImplementedError):
            self.stacks[0][1].fit(self.X, self.y).inverse_transform(self.X)

    def test_set_params(self):
        trans_config = {
            'PCA__n_components': 2,
            'PCA__disabled': True,
            'StandardScaler__with_mean': True
        }
        est_config = {
            'SVC__C': 3,
            'DecisionTreeClassifier__min_samples_leaf': 1
        }

        # transformer stack
        self.transformer_stack.set_params(**trans_config)
        self.assertEqual(
            self.transformer_stack.elements[0].base_element.n_components, 2)
        self.assertEqual(self.transformer_stack.elements[0].disabled, True)
        self.assertEqual(
            self.transformer_stack.elements[1].base_element.with_mean, True)

        # estimator stack
        self.estimator_stack.set_params(**est_config)
        self.assertEqual(self.estimator_stack.elements[0].base_element.C, 3)
        self.assertEqual(
            self.estimator_stack.elements[1].base_element.min_samples_leaf, 1)

        with self.assertRaises(ValueError):
            self.estimator_stack.set_params(**{'any_weird_param': 1})

        with self.assertRaises(ValueError):
            self.transformer_stack.set_params(**{'any_weird_param': 1})

    def test_add(self):
        stack = Stack('MyStack', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})
        stack = Stack('MyStack')
        stack += PipelineElement('PCA', {'n_components': [5]})
        stack += PipelineElement('FastICA')
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})

        def callback(X, y=None):
            pass

        stack = Stack('MyStack', [
            PipelineElement('PCA'),
            CallbackElement('MyCallback', callback),
            Switch('MySwitch',
                   [PipelineElement('PCA'),
                    PipelineElement('FastICA')]),
            Branch('MyBranch', [PipelineElement('PCA')])
        ])
        self.assertEqual(len(stack.elements), 4)

        # test doubled item
        with self.assertRaises(ValueError):
            stack += stack.elements[0]

        stack += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(stack.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            stack.hyperparameters, {
                'MyStack__MySwitch__current_element': [(0, 0), (1, 0)],
                'MyStack__PCA2__n_components': [10, 20]
            })

    def test_feature_importances(self):
        # single item
        self.estimator_stack.fit(self.X, self.y)
        self.assertIsNone(self.estimator_stack.feature_importances_)

        self.estimator_branch_stack.fit(self.X, self.y)
        self.assertIsNone(self.estimator_branch_stack.feature_importances_)

    def test_use_probabilities(self):
        self.estimator_stack.use_probabilities = True
        self.estimator_stack.fit(self.X, self.y)
        probas = self.estimator_stack.predict(self.X)
        self.assertEqual(probas.shape[1], 3)

        self.estimator_stack.use_probabilities = False
        self.estimator_stack.fit(self.X, self.y)
        preds = self.estimator_stack.predict(self.X)
        self.assertEqual(preds.shape[1], 2)
        probas = self.estimator_stack.predict_proba(self.X)
        self.assertEqual(probas.shape[1], 3)
    def test_class_with_data_02(self):
        """
        Test for Pipeline with data.
        """

        X, y = load_breast_cancer(return_X_y=True)

        # DESIGN YOUR PIPELINE
        my_pipe = Hyperpipe(
            name='Estimator_pipe',
            optimizer='grid_search',
            metrics=['balanced_accuracy'],
            best_config_metric='balanced_accuracy',
            outer_cv=StratifiedKFold(n_splits=2, shuffle=True,
                                     random_state=42),
            inner_cv=StratifiedKFold(n_splits=2, shuffle=True,
                                     random_state=42),
            output_settings=OutputSettings(project_folder='./tmp/'),
            random_seed=42)

        # ADD ELEMENTS TO YOUR PIPELINE
        # first normalize all features
        my_pipe += PipelineElement('StandardScaler')

        # some feature selection
        my_pipe += PipelineElement('LassoFeatureSelection',
                                   hyperparameters={
                                       'percentile_to_keep':
                                       FloatRange(start=0.1,
                                                  step=0.1,
                                                  stop=0.7,
                                                  range_type='range'),
                                       'alpha':
                                       FloatRange(0.5, 1)
                                   },
                                   test_disabled=True)

        # add imbalanced group handling
        my_pipe += PipelineElement('ImbalancedDataTransformer',
                                   method_name='SMOTE',
                                   test_disabled=False)

        # setup estimator stack
        est_stack = Stack(name='classifier_stack')
        clf_list = [
            'RandomForestClassifier', 'LinearSVC', 'NuSVC', "SVC",
            "MLPClassifier", "KNeighborsClassifier", "Lasso",
            "PassiveAggressiveClassifier", "LogisticRegression", "Perceptron",
            "RidgeClassifier", "SGDClassifier", "GaussianProcessClassifier",
            "AdaBoostClassifier", "BaggingClassifier",
            "GradientBoostingClassifier"
        ]

        for clf in clf_list:
            est_stack += PipelineElement(clf)
        my_pipe += est_stack

        my_pipe += PipelineElement('PhotonVotingClassifier')

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)

        self.assertDictEqual(elements_to_dict(my_pipe.copy_me()),
                             elements_to_dict(my_pipe_reload.copy_me()))