Esempio n. 1
0
 def test_ask_advanced(self):
     """
     Test advanced functionality of .ask()
     """
     branch = Branch("branch")
     branch += PipelineElement("PCA")
     branch += PipelineElement("SVC", {
         "C": [0.1, 1],
         "kernel": ["rbf", "sigmoid"]
     })
     pipe_switch = Switch(
         "switch",
         [
             PipelineElement("StandardScaler"),
             PipelineElement("MaxAbsScaler")
         ],
     )
     self.pipeline_elements = [
         PipelineElement("StandardScaler"),
         PipelineElement(
             "PCA",
             hyperparameters={"n_components": IntegerRange(5, 20)},
             test_disabled=True,
         ),
         pipe_switch,
         branch,
         Switch("Switch_in_switch", [branch, pipe_switch]),
     ]
     generated_elements = self.test_ask()
     self.assertIn("PCA__n_components", generated_elements)
     self.assertIn("Switch_in_switch__current_element", generated_elements)
     self.assertIn("branch__SVC__C", generated_elements)
     self.assertIn("branch__SVC__kernel", generated_elements)
     self.assertIn("switch__current_element", generated_elements)
Esempio n. 2
0
 def test_ask_advanced(self):
     """
     Test advanced functionality of .ask()
     """
     branch = Branch('branch')
     branch += PipelineElement('PCA')
     branch += PipelineElement('SVC', {
         'C': [0.1, 1],
         'kernel': ['rbf', 'sigmoid']
     })
     pipe_switch = Switch('switch', [
         PipelineElement("StandardScaler"),
         PipelineElement("MaxAbsScaler")
     ])
     self.pipeline_elements = [
         PipelineElement("StandardScaler"),
         PipelineElement(
             'PCA',
             hyperparameters={'n_components': IntegerRange(5, 20)},
             test_disabled=True), pipe_switch, branch,
         Switch('Switch_in_switch', [branch, pipe_switch])
     ]
     generated_elements = self.test_ask()
     self.assertIn("PCA__n_components", generated_elements)
     self.assertIn("Switch_in_switch__current_element", generated_elements)
     self.assertIn("branch__SVC__C", generated_elements)
     self.assertIn("branch__SVC__kernel", generated_elements)
     self.assertIn("switch__current_element", generated_elements)
Esempio n. 3
0
    def test_estimator_type(self):
        pca = PipelineElement('PCA')
        ica = PipelineElement('FastICA')
        svc = PipelineElement('SVC')
        svr = PipelineElement('SVR')
        tree_class = PipelineElement('DecisionTreeClassifier')
        tree_reg = PipelineElement('DecisionTreeRegressor')

        switch = Switch('MySwitch', [pca, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [svc, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [pca, ica])
        self.assertEqual(switch._estimator_type, None)

        switch = Switch('MySwitch', [tree_class, svc])
        self.assertEqual(switch._estimator_type, 'classifier')

        switch = Switch('MySwitch', [tree_reg, svr])
        self.assertEqual(switch._estimator_type, 'regressor')

        self.assertEqual(self.estimator_switch._estimator_type, 'classifier')
        self.assertEqual(self.estimator_switch_with_branch._estimator_type,
                         'classifier')
        self.assertEqual(self.transformer_switch_with_branch._estimator_type,
                         None)
        self.assertEqual(self.switch_in_switch._estimator_type, None)
Esempio n. 4
0
 def test_predict_proba(self):
     gpc = PipelineElement('GaussianProcessClassifier')
     svc = PipelineElement('SVC')
     switch = Switch('EstimatorSwitch', [gpc, svc])
     switch.set_params(**{'current_element': (0, 0)})
     np.random.seed(42)
     switch_probas = switch.fit(self.X, self.y).predict_proba(self.X)
     np.random.seed(42)
     gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X)
     self.assertTrue(np.array_equal(switch_probas, gpr_probas))
Esempio n. 5
0
    def setUp(self):
        self.svc_pipe_element = PipelineElement('SVC', {'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid']})
        self.lr_pipe_element = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]})
        self.pipe_switch = Switch('switch', [self.svc_pipe_element, self.lr_pipe_element])

        self.branch = Branch('branch')
        self.branch += PipelineElement('PCA')
        self.branch += self.svc_pipe_element

        self.switch_in_switch = Switch('Switch_in_switch', [self.branch,
                                                            self.pipe_switch])
Esempio n. 6
0
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]})
        self.gpc = PipelineElement('GaussianProcessClassifier')
        self.pca = PipelineElement('PCA')

        self.estimator_branch = Branch('estimator_branch',
                                       [self.tree.copy_me()])
        self.transformer_branch = Branch('transformer_branch',
                                         [self.pca.copy_me()])

        self.estimator_switch = Switch(
            'estimator_switch',
            [self.svc.copy_me(),
             self.tree.copy_me(),
             self.gpc.copy_me()])
        self.estimator_switch_with_branch = Switch(
            'estimator_switch_with_branch',
            [self.tree.copy_me(),
             self.estimator_branch.copy_me()])
        self.transformer_switch_with_branch = Switch(
            'transformer_switch_with_branch',
            [self.pca.copy_me(),
             self.transformer_branch.copy_me()])
        self.switch_in_switch = Switch('Switch_in_switch', [
            self.transformer_branch.copy_me(),
            self.transformer_switch_with_branch.copy_me()
        ])
Esempio n. 7
0
    def test_copy_me(self):
        switch = Switch("my_copy_switch")
        switch += PipelineElement("StandardScaler")
        switch += PipelineElement("RobustScaler", test_disabled=True)

        stack = Stack("RandomStack")
        stack += PipelineElement("SVC")
        branch = Branch('Random_Branch')
        pca_hyperparameters = {'n_components': [5, 10]}
        branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters)
        branch += PipelineElement("DecisionTreeClassifier")
        stack += branch

        photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")),
                                      ("my_copy_switch", switch),
                                      ('RandomStack', stack),
                                      ('Callback1', CallbackElement('tmp_callback', np.mean)),
                                      ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))])

        copy_of_the_pipe = photon_pipe.copy_me()

        self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state)
        self.assertTrue(len(copy_of_the_pipe.elements) == 5)
        self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack")
        self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled)
        self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters,
                             {"PCA__n_components": [5, 10]})
        self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement))
        self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
Esempio n. 8
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3)
            nmb += PipelineElement("SmoothImages")
            nmb_list.append(nmb)

        my_switch = Switch("disabling_test_switch")
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack("stack_of_branches")
        for i in range(3):
            my_branch = Branch("branch_" + str(i + 2))
            my_branch += PipelineElement("StandardScaler")
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement("StandardScaler"))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement("SVC"))
        return nmb_list
Esempio n. 9
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch("estimator_switch")
            switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 10
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3)
            sp = PipelineElement(
                'PCA', hyperparameters={'n_components': IntegerRange(1, 50)})
            nmb += sp
            nmb_list.append(nmb)

        my_switch = Switch('disabling_test_switch')
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack('stack_of_branches')
        for i in range(3):
            my_branch = Branch('branch_' + str(i + 2))
            my_branch += PipelineElement('StandardScaler')
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement('StandardScaler'))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement('SVC'))
        return nmb_list
Esempio n. 11
0
    def setUp(self):
        self.svc_pipe_element = PipelineElement("SVC", {
            "C": [0.1, 1],
            "kernel": ["rbf", "sigmoid"]
        })
        self.lr_pipe_element = PipelineElement(
            "DecisionTreeClassifier", {"min_samples_split": [2, 3, 4]})
        self.pipe_switch = Switch(
            "switch", [self.svc_pipe_element, self.lr_pipe_element])

        self.branch = Branch("branch")
        self.branch += PipelineElement("PCA")
        self.branch += self.svc_pipe_element

        self.switch_in_switch = Switch("Switch_in_switch",
                                       [self.branch, self.pipe_switch])
Esempio n. 12
0
    def test_class_switch(self):
        """
        Test for Pipeline with data.
        """

        X, y = load_breast_cancer(return_X_y=True)

        my_pipe = Hyperpipe(
            'basic_switch_pipe',
            optimizer='random_grid_search',
            optimizer_params={'n_configurations': 15},
            metrics=['accuracy', 'precision', 'recall'],
            best_config_metric='accuracy',
            outer_cv=KFold(n_splits=3),
            inner_cv=KFold(n_splits=5),
            verbosity=1,
            output_settings=OutputSettings(project_folder='./tmp/'))

        # Transformer Switch
        my_pipe += Switch('TransformerSwitch', [
            PipelineElement('StandardScaler'),
            PipelineElement('PCA', test_disabled=True)
        ])

        # Estimator Switch
        svm = PipelineElement('SVC',
                              hyperparameters={'kernel': ['rbf', 'linear']})

        tree = PipelineElement('DecisionTreeClassifier',
                               hyperparameters={
                                   'min_samples_split': IntegerRange(2, 5),
                                   'min_samples_leaf': IntegerRange(1, 5),
                                   'criterion': ['gini', 'entropy']
                               })

        my_pipe += Switch('EstimatorSwitch', [svm, tree])

        json_transformer = JsonTransformer()

        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)

        self.assertDictEqual(elements_to_dict(my_pipe.copy_me()),
                             elements_to_dict(my_pipe_reload.copy_me()))
Esempio n. 13
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch("preproc_switch")
            pre_switch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            pre_switch += PipelineElement(
                "FClassifSelectPercentile",
                hyperparameters={
                    "percentile":
                    IntegerRange(start=5, step=20, stop=66, range_type="range")
                },
                test_disabled=True,
            )
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch("estimator_switch")
            estimator_switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            estimator_switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 14
0
    def test_classification_3(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # estimator Switch without hyperparameters
            my_switch = Switch('estimator_switch')
            my_switch += PipelineElement('SVC')
            my_switch += PipelineElement('RandomForestClassifier')
            pipe += my_switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 15
0
    def test_regression_3(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # estimator Switch without hyperparameters
            my_switch = Switch("estimator_switch")
            my_switch += PipelineElement("SVR")
            my_switch += PipelineElement("RandomForestRegressor")
            pipe += my_switch

            self.run_hyperpipe(pipe, self.regression)
Esempio n. 16
0
    def test_regression_4(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Transformer Switch
            my_switch = Switch('trans_switch')
            my_switch += PipelineElement('PCA')
            my_switch += PipelineElement('FRegressionSelectPercentile', hyperparameters={'percentile': IntegerRange(start=5, step=20, stop=66, range_type='range')}, test_disabled=True)
            pipe += my_switch
            pipe += PipelineElement('RandomForestRegressor')

            self.run_hyperpipe(pipe, self.regression)
Esempio n. 17
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch('preproc_switch')
            pre_switch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                          test_disabled=True)
            pre_switch += PipelineElement('FClassifSelectPercentile', hyperparameters={
                'percentile': IntegerRange(start=5, step=20, stop=66, range_type='range')}, test_disabled=True)
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch('estimator_switch')
            estimator_switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                                        'C': Categorical([.01, 1, 5])})
            estimator_switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 18
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 19
0
    def test_prepare_photon_pipeline(self):
        test_branch = Branch('my_test_branch')
        test_branch += PipelineElement('SimpleImputer')
        test_branch += Switch('my_crazy_switch_bitch')
        test_branch += Stack('my_stacking_stack')
        test_branch += PipelineElement('SVC')

        generated_pipe = test_branch.prepare_photon_pipe(test_branch.elements)

        self.assertEqual(len(generated_pipe.named_steps), 4)
        for idx, element in enumerate(test_branch.elements):
            self.assertIs(generated_pipe.named_steps[element.name], element)
            self.assertIs(generated_pipe.elements[idx][1],
                          test_branch.elements[idx])
Esempio n. 20
0
    def test_add(self):
        self.assertEqual(len(self.estimator_switch.elements), 3)
        self.assertEqual(len(self.switch_in_switch.elements), 2)
        self.assertEqual(len(self.transformer_switch_with_branch.elements), 2)

        self.assertEqual(
            list(self.estimator_switch.elements_dict.keys()),
            ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier'])
        self.assertEqual(
            list(self.switch_in_switch.elements_dict.keys()),
            ['transformer_branch', 'transformer_switch_with_branch'])

        switch = Switch('MySwitch',
                        [PipelineElement('PCA'),
                         PipelineElement('FastICA')])
        switch = Switch('MySwitch2')
        switch += PipelineElement('PCA')
        switch += PipelineElement('FastICA')

        # test doubled names
        with self.assertRaises(ValueError):
            self.estimator_switch += self.estimator_switch.elements[0]
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2")
        self.estimator_switch += PipelineElement(
            "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']})
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3")
        self.estimator_switch += PipelineElement("SVR")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVR")
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4")

        # check that hyperparameters are renamed respectively
        self.assertEqual(
            self.estimator_switch.pipeline_element_configurations[4][0]
            ["SVC3__kernel"], 'polynomial')
Esempio n. 21
0
    def test_base_element(self):
        switch = Switch('switch', [self.svc, self.tree])
        switch.set_params(**{'current_element': (1, 1)})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

        # other optimizer
        switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)
Esempio n. 22
0
    def test_classification_4(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Transformer Switch
            my_switch = Switch("trans_switch")
            my_switch += PipelineElement("PCA")
            my_switch += PipelineElement(
                "FClassifSelectPercentile",
                hyperparameters={
                    "percentile":
                    IntegerRange(start=5, step=20, stop=66, range_type="range")
                },
                test_disabled=True,
            )
            pipe += my_switch
            pipe += PipelineElement("RandomForestClassifier")

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 23
0
    def test_classification_10(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Esempio n. 24
0
    def test_set_random_state(self):
        # we handle all elements in one method that is inherited so we capture them all in this test
        random_state = 53
        my_branch = Branch("random_state_branch")
        my_branch += PipelineElement("StandardScaler")
        my_switch = Switch("transformer_Switch")
        my_switch += PipelineElement("LassoFeatureSelection")
        my_switch += PipelineElement("PCA")
        my_branch += my_switch
        my_stack = Stack("Estimator_Stack")
        my_stack += PipelineElement("SVR")
        my_stack += PipelineElement("Ridge")
        my_branch += my_stack
        my_branch += PipelineElement("ElasticNet")

        my_branch.random_state = random_state
        self.assertTrue(my_switch.elements[1].random_state == random_state)
        self.assertTrue(
            my_switch.elements[1].base_element.random_state == random_state)
        self.assertTrue(my_stack.elements[1].random_state == random_state)
        self.assertTrue(
            my_stack.elements[1].base_element.random_state == random_state)
Esempio n. 25
0
    def test_add(self):
        stack = Stack('MyStack', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})
        stack = Stack('MyStack')
        stack += PipelineElement('PCA', {'n_components': [5]})
        stack += PipelineElement('FastICA')
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})

        def callback(X, y=None):
            pass

        stack = Stack('MyStack', [
            PipelineElement('PCA'),
            CallbackElement('MyCallback', callback),
            Switch('MySwitch',
                   [PipelineElement('PCA'),
                    PipelineElement('FastICA')]),
            Branch('MyBranch', [PipelineElement('PCA')])
        ])
        self.assertEqual(len(stack.elements), 4)

        # test doubled item
        with self.assertRaises(ValueError):
            stack += stack.elements[0]

        stack += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(stack.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            stack.hyperparameters, {
                'MyStack__MySwitch__current_element': [(0, 0), (1, 0)],
                'MyStack__PCA2__n_components': [10, 20]
            })
Esempio n. 26
0
X, y = load_breast_cancer(True)

# CREATE HYPERPIPE
my_pipe = Hyperpipe('basic_switch_pipe',
                    optimizer='random_grid_search',
                    optimizer_params={'n_configurations': 15},
                    metrics=['accuracy', 'precision', 'recall'],
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=5),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

# Transformer Switch
my_pipe += Switch('TransformerSwitch', [
    PipelineElement('StandardScaler'),
    PipelineElement('PCA', test_disabled=True)
])

# Estimator Switch
svm = PipelineElement('SVC', hyperparameters={'kernel': ['rbf', 'linear']})

tree = PipelineElement('DecisionTreeClassifier',
                       hyperparameters={
                           'min_samples_split': IntegerRange(2, 5),
                           'min_samples_leaf': IntegerRange(1, 5),
                           'criterion': ['gini', 'entropy']
                       })

my_pipe += Switch('EstimatorSwitch', [svm, tree])

my_pipe.fit(X, y)
Esempio n. 27
0
    "basic_switch_pipe",
    optimizer="random_grid_search",
    optimizer_params={"n_configurations": 15},
    metrics=["accuracy", "precision", "recall"],
    best_config_metric="accuracy",
    outer_cv=KFold(n_splits=3),
    inner_cv=KFold(n_splits=5),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

# Transformer Switch
my_pipe += Switch(
    "TransformerSwitch",
    [
        PipelineElement("StandardScaler"),
        PipelineElement("PCA", test_disabled=True)
    ],
)

# Estimator Switch
svm = PipelineElement("SVC", hyperparameters={"kernel": ["rbf", "linear"]})

tree = PipelineElement(
    "DecisionTreeClassifier",
    hyperparameters={
        "min_samples_split": IntegerRange(2, 5),
        "min_samples_leaf": IntegerRange(1, 5),
        "criterion": ["gini", "entropy"],
    },
)
Esempio n. 28
0
# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe('feature_selection',
                    optimizer='grid_search',
                    metrics=['mean_squared_error', 'pearson_correlation', 'mean_absolute_error', 'explained_variance'],
                    best_config_metric='mean_squared_error',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

my_pipe += PipelineElement('StandardScaler')

lasso = PipelineElement('LassoFeatureSelection',
                        hyperparameters={'percentile_to_keep': [0.1, 0.2, 0.3],
                                         'alpha': 1})

f_regression = PipelineElement('FRegressionSelectPercentile',
                               hyperparameters={'percentile': [10, 20, 30]})

my_pipe += Switch('FeatureSelection', [lasso, f_regression])

my_pipe += PipelineElement('RandomForestRegressor',
                           hyperparameters={'n_estimators': IntegerRange(10, 50)})


my_pipe.fit(X, y)



Esempio n. 29
0
class SwitchTests(unittest.TestCase):
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]})
        self.gpc = PipelineElement('GaussianProcessClassifier')
        self.pca = PipelineElement('PCA')

        self.estimator_branch = Branch('estimator_branch',
                                       [self.tree.copy_me()])
        self.transformer_branch = Branch('transformer_branch',
                                         [self.pca.copy_me()])

        self.estimator_switch = Switch(
            'estimator_switch',
            [self.svc.copy_me(),
             self.tree.copy_me(),
             self.gpc.copy_me()])
        self.estimator_switch_with_branch = Switch(
            'estimator_switch_with_branch',
            [self.tree.copy_me(),
             self.estimator_branch.copy_me()])
        self.transformer_switch_with_branch = Switch(
            'transformer_switch_with_branch',
            [self.pca.copy_me(),
             self.transformer_branch.copy_me()])
        self.switch_in_switch = Switch('Switch_in_switch', [
            self.transformer_branch.copy_me(),
            self.transformer_switch_with_branch.copy_me()
        ])

    def test_init(self):
        self.assertEqual(self.estimator_switch.name, 'estimator_switch')

    def test_hyperparams(self):
        # assert number of different configs to test
        # each config combi for each element: 4 for SVC and 3 for logistic regression = 7
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations), 3)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[0]), 4)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[1]), 3)

        # hyperparameters
        self.assertDictEqual(
            self.estimator_switch.hyperparameters, {
                'estimator_switch__current_element': [(0, 0), (0, 1), (0, 2),
                                                      (0, 3), (1, 0), (1, 1),
                                                      (1, 2), (2, 0)]
            })

        # config grid
        self.assertListEqual(self.estimator_switch.generate_config_grid(), [{
            'estimator_switch__current_element': (0, 0)
        }, {
            'estimator_switch__current_element': (0, 1)
        }, {
            'estimator_switch__current_element': (0, 2)
        }, {
            'estimator_switch__current_element': (0, 3)
        }, {
            'estimator_switch__current_element': (1, 0)
        }, {
            'estimator_switch__current_element': (1, 1)
        }, {
            'estimator_switch__current_element': (1, 2)
        }, {
            'estimator_switch__current_element': (2, 0)
        }])

    def test_set_params(self):

        # test for grid search
        false_config = {'current_element': 1}
        with self.assertRaises(ValueError):
            self.estimator_switch.set_params(**false_config)

        correct_config = {'current_element': (0, 1)}
        self.estimator_switch.set_params(**correct_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C,
                         0.1)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'sigmoid')

        # test for other optimizers
        smac_config = {'SVC__C': 2, 'SVC__kernel': 'rbf'}
        self.estimator_switch.set_params(**smac_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C, 2)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'rbf')

    def test_fit(self):
        np.random.seed(42)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        np.random.seed(42)
        self.tree.set_params(**{'min_samples_split': 2})
        self.tree.fit(self.X, self.y)
        np.testing.assert_array_equal(
            self.tree.base_element.feature_importances_,
            self.estimator_switch.base_element.feature_importances_)

    def test_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)

        switch_Xt, _, _ = self.transformer_switch_with_branch.transform(self.X)
        pca_Xt, _, _ = self.pca.transform(self.X)
        self.assertTrue(np.array_equal(pca_Xt, switch_Xt))

    def test_predict(self):
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        np.random.seed(42)
        self.estimator_switch.fit(self.X, self.y)
        self.tree.set_params(**{'min_samples_split': 2})
        np.random.seed(42)
        self.tree.fit(self.X, self.y)

        switch_preds = self.estimator_switch.predict(self.X)
        tree_preds = self.tree.predict(self.X)
        self.assertTrue(np.array_equal(switch_preds, tree_preds))

    def test_predict_proba(self):
        gpc = PipelineElement('GaussianProcessClassifier')
        svc = PipelineElement('SVC')
        switch = Switch('EstimatorSwitch', [gpc, svc])
        switch.set_params(**{'current_element': (0, 0)})
        np.random.seed(42)
        switch_probas = switch.fit(self.X, self.y).predict_proba(self.X)
        np.random.seed(42)
        gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X)
        self.assertTrue(np.array_equal(switch_probas, gpr_probas))

    def test_inverse_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)
        Xt_pca, _, _ = self.pca.transform(self.X)
        Xt_switch, _, _ = self.transformer_switch_with_branch.transform(self.X)
        X_pca, _, _ = self.pca.inverse_transform(Xt_pca)
        X_switch, _, _ = self.transformer_switch_with_branch.inverse_transform(
            Xt_switch)

        self.assertTrue(np.array_equal(Xt_pca, Xt_switch))
        self.assertTrue(np.array_equal(X_pca, X_switch))
        np.testing.assert_almost_equal(X_switch, self.X)

    def test_base_element(self):
        switch = Switch('switch', [self.svc, self.tree])
        switch.set_params(**{'current_element': (1, 1)})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

        # other optimizer
        switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

    def test_copy_me(self):
        switches = [
            self.estimator_switch, self.estimator_switch_with_branch,
            self.transformer_switch_with_branch, self.switch_in_switch
        ]

        for switch in switches:
            copy = switch.copy_me()

            self.assertEqual(switch.random_state, copy.random_state)

            for i, element in enumerate(copy.elements):
                self.assertNotEqual(copy.elements[i], switch.elements[i])

            switch = elements_to_dict(switch)
            copy = elements_to_dict(copy)

            self.assertDictEqual(copy, switch)

    def test_estimator_type(self):
        pca = PipelineElement('PCA')
        ica = PipelineElement('FastICA')
        svc = PipelineElement('SVC')
        svr = PipelineElement('SVR')
        tree_class = PipelineElement('DecisionTreeClassifier')
        tree_reg = PipelineElement('DecisionTreeRegressor')

        switch = Switch('MySwitch', [pca, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [svc, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [pca, ica])
        self.assertEqual(switch._estimator_type, None)

        switch = Switch('MySwitch', [tree_class, svc])
        self.assertEqual(switch._estimator_type, 'classifier')

        switch = Switch('MySwitch', [tree_reg, svr])
        self.assertEqual(switch._estimator_type, 'regressor')

        self.assertEqual(self.estimator_switch._estimator_type, 'classifier')
        self.assertEqual(self.estimator_switch_with_branch._estimator_type,
                         'classifier')
        self.assertEqual(self.transformer_switch_with_branch._estimator_type,
                         None)
        self.assertEqual(self.switch_in_switch._estimator_type, None)

    def test_add(self):
        self.assertEqual(len(self.estimator_switch.elements), 3)
        self.assertEqual(len(self.switch_in_switch.elements), 2)
        self.assertEqual(len(self.transformer_switch_with_branch.elements), 2)

        self.assertEqual(
            list(self.estimator_switch.elements_dict.keys()),
            ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier'])
        self.assertEqual(
            list(self.switch_in_switch.elements_dict.keys()),
            ['transformer_branch', 'transformer_switch_with_branch'])

        switch = Switch('MySwitch',
                        [PipelineElement('PCA'),
                         PipelineElement('FastICA')])
        switch = Switch('MySwitch2')
        switch += PipelineElement('PCA')
        switch += PipelineElement('FastICA')

        # test doubled names
        with self.assertRaises(ValueError):
            self.estimator_switch += self.estimator_switch.elements[0]
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2")
        self.estimator_switch += PipelineElement(
            "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']})
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3")
        self.estimator_switch += PipelineElement("SVR")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVR")
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4")

        # check that hyperparameters are renamed respectively
        self.assertEqual(
            self.estimator_switch.pipeline_element_configurations[4][0]
            ["SVC3__kernel"], 'polynomial')

    def test_feature_importances(self):

        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch.feature_importances_) == self.X.shape[1])

        self.estimator_switch_with_branch.set_params(
            **{'current_element': (1, 0)})
        self.estimator_switch_with_branch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch_with_branch.feature_importances_) ==
            self.X.shape[1])

        self.estimator_switch.set_params(**{'current_element': (2, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertIsNone(self.estimator_branch.feature_importances_)

        self.switch_in_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)
Esempio n. 30
0
                    optimizer='smac',  # which optimizer PHOTON shall use, in this case smac
                    optimizer_params={'scenario_dict': scenario_dict},
                    metrics=['mean_squared_error', 'pearson_correlation'],
                    best_config_metric='mean_squared_error',
                    outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=settings)


# ADD ELEMENTS TO YOUR PIPELINE
# first normalize all features
my_pipe.add(PipelineElement('StandardScaler'))
# then do feature selection using a PCA, specify which values to try in the hyperparameter search
my_pipe += PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 10)}, test_disabled=True)

switch = Switch("Test_Switch")
# engage and optimize SVR
# linspace and logspace is converted to uniform and log-uniform priors in skopt
switch += PipelineElement('SVR', hyperparameters={'C': FloatRange(0, 10, range_type='linspace'),
                                                   'epsilon': FloatRange(0, 0.0001, range_type='linspace'),
                                                   'tol': FloatRange(1e-4, 1e-2, range_type='linspace'),
                                                   'kernel': Categorical(['linear', 'rbf', 'poly'])})

switch += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators': Categorical([10, 20])})

my_pipe += switch

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)