예제 #1
0
    def test_copy_me(self):
        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        svc.set_params(**{'C': 0.1, 'kernel': 'sigmoid'})
        copy = svc.copy_me()

        self.assertEqual(svc.random_state, copy.random_state)
        self.assertNotEqual(copy.base_element, svc.base_element)
        self.assertDictEqual(elements_to_dict(copy), elements_to_dict(svc))
        self.assertEqual(copy.base_element.C, svc.base_element.C)

        # check if copies are still the same, even when making a copy of a fitted PipelineElement
        copy_after_fit = svc.fit(self.X, self.y).copy_me()
        self.assertDictEqual(elements_to_dict(copy),
                             elements_to_dict(copy_after_fit))

        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        copy = svc.copy_me()
        self.assertDictEqual(copy.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })
        copy.base_element.C = 3
        self.assertNotEqual(svc.base_element.C, copy.base_element.C)

        # test custom element
        custom_element = PipelineElement.create(
            'CustomElement',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        copy = custom_element.copy_me()
        self.assertDictEqual(elements_to_dict(custom_element),
                             elements_to_dict(copy))

        custom_element2 = PipelineElement.create(
            'MyUnDeepcopyableObject',
            base_element=GridSearchOptimizer(),
            hyperparameters={})
        with self.assertRaises(Exception):
            custom_element2.copy_me()
예제 #2
0
class StackTests(unittest.TestCase):
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)

        self.pca = PipelineElement('PCA', {'n_components': [5, 10]})
        self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]})
        self.svc = PipelineElement('SVC', {'C': [1, 2]})
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_leaf': [3, 5]})

        self.transformer_branch_1 = Branch('TransBranch1',
                                           [self.pca.copy_me()])
        self.transformer_branch_2 = Branch('TransBranch2',
                                           [self.scaler.copy_me()])

        self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()])
        self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()])

        self.transformer_stack = Stack(
            'TransformerStack',
            [self.pca.copy_me(), self.scaler.copy_me()])
        self.estimator_stack = Stack(
            'EstimatorStack',
            [self.svc.copy_me(), self.tree.copy_me()])
        self.transformer_branch_stack = Stack('TransBranchStack', [
            self.transformer_branch_1.copy_me(),
            self.transformer_branch_2.copy_me()
        ])
        self.estimator_branch_stack = Stack('EstBranchStack', [
            self.estimator_branch_1.copy_me(),
            self.estimator_branch_2.copy_me()
        ])

        self.stacks = [
            ([self.pca, self.scaler], self.transformer_stack),
            ([self.svc, self.tree], self.estimator_stack),
            ([self.transformer_branch_1,
              self.transformer_branch_2], self.transformer_branch_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]

    def test_copy_me(self):
        for stack in self.stacks:
            stack = stack[1]
            copy = stack.copy_me()
            self.assertEqual(stack.random_state, copy.random_state)
            self.assertFalse(
                stack.elements[0].__dict__ == copy.elements[0].__dict__)
            self.assertDictEqual(elements_to_dict(stack),
                                 elements_to_dict(copy))

    def test_horizontal_stacking(self):
        for stack in self.stacks:
            element_1 = stack[0][0]
            element_2 = stack[0][1]
            stack = stack[1]

            # fit elements
            Xt_1 = element_1.fit(self.X, self.y).transform(self.X, self.y)
            Xt_2 = element_2.fit(self.X, self.y).transform(self.X, self.y)

            Xt = stack.fit(self.X, self.y).transform(self.X, self.y)

            # output of transform() changes depending on whether it is an estimator stack or a transformer stack
            if isinstance(Xt, tuple):
                Xt = Xt[0]
                Xt_1 = Xt_1[0]
                Xt_2 = Xt_2[0]

            if len(Xt_1.shape) == 1:
                Xt_1 = np.reshape(Xt_1, (-1, 1))
                Xt_2 = np.reshape(Xt_2, (-1, 1))

            self.assertEqual(Xt.shape[1], Xt_1.shape[-1] + Xt_2.shape[-1])

    def recursive_assertion(self, element_a, element_b):
        for key in element_a.keys():
            if isinstance(element_a[key], np.ndarray):
                np.testing.assert_array_equal(element_a[key], element_b[key])
            elif isinstance(element_a[key], dict):
                self.recursive_assertion(element_a[key], element_b[key])
            else:
                self.assertEqual(element_a[key], element_b[key])

    def test_fit(self):
        for elements, stack in [([self.pca,
                                  self.scaler], self.transformer_stack),
                                ([self.svc, self.tree], self.estimator_stack)]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            np.random.seed(42)
            for i, element in enumerate(elements):
                element = element.fit(self.X, self.y)
                element_dict = elements_to_dict(element)
                stack_dict = elements_to_dict(stack.elements[i])
                self.recursive_assertion(element_dict, stack_dict)

    def test_transform(self):
        for elements, stack in self.stacks:
            np.random.seed(42)
            Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element, _, _ = element.fit(self.X,
                                               self.y).transform(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(Xt_stack, Xt_elements)

    def test_predict(self):
        for elements, stack in [
            ([self.svc, self.tree], self.estimator_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            yt_stack = stack.predict(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element = element.fit(self.X, self.y).predict(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(yt_stack, Xt_elements)

    def test_predict_proba(self):
        for elements, stack in [
            ([self.svc, self.tree], self.estimator_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]:
            np.random.seed(42)
            stack = stack.fit(self.X, self.y)
            yt_stack = stack.predict_proba(self.X)
            np.random.seed(42)
            Xt_elements = None
            for i, element in enumerate(elements):
                Xt_element = element.fit(self.X, self.y).predict_proba(self.X)
                if Xt_element is None:
                    Xt_element = element.fit(self.X, self.y).predict(self.X)
                Xt_elements = PhotonDataHelper.stack_data_horizontally(
                    Xt_elements, Xt_element)
            np.testing.assert_array_equal(yt_stack, Xt_elements)

    def test_inverse_transform(self):
        with self.assertRaises(NotImplementedError):
            self.stacks[0][1].fit(self.X, self.y).inverse_transform(self.X)

    def test_set_params(self):
        trans_config = {
            'PCA__n_components': 2,
            'PCA__disabled': True,
            'StandardScaler__with_mean': True
        }
        est_config = {
            'SVC__C': 3,
            'DecisionTreeClassifier__min_samples_leaf': 1
        }

        # transformer stack
        self.transformer_stack.set_params(**trans_config)
        self.assertEqual(
            self.transformer_stack.elements[0].base_element.n_components, 2)
        self.assertEqual(self.transformer_stack.elements[0].disabled, True)
        self.assertEqual(
            self.transformer_stack.elements[1].base_element.with_mean, True)

        # estimator stack
        self.estimator_stack.set_params(**est_config)
        self.assertEqual(self.estimator_stack.elements[0].base_element.C, 3)
        self.assertEqual(
            self.estimator_stack.elements[1].base_element.min_samples_leaf, 1)

        with self.assertRaises(ValueError):
            self.estimator_stack.set_params(**{'any_weird_param': 1})

        with self.assertRaises(ValueError):
            self.transformer_stack.set_params(**{'any_weird_param': 1})

    def test_add(self):
        stack = Stack('MyStack', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})
        stack = Stack('MyStack')
        stack += PipelineElement('PCA', {'n_components': [5]})
        stack += PipelineElement('FastICA')
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})

        def callback(X, y=None):
            pass

        stack = Stack('MyStack', [
            PipelineElement('PCA'),
            CallbackElement('MyCallback', callback),
            Switch('MySwitch',
                   [PipelineElement('PCA'),
                    PipelineElement('FastICA')]),
            Branch('MyBranch', [PipelineElement('PCA')])
        ])
        self.assertEqual(len(stack.elements), 4)

        # test doubled item
        with self.assertRaises(ValueError):
            stack += stack.elements[0]

        stack += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(stack.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            stack.hyperparameters, {
                'MyStack__MySwitch__current_element': [(0, 0), (1, 0)],
                'MyStack__PCA2__n_components': [10, 20]
            })

    def test_feature_importances(self):
        # single item
        self.estimator_stack.fit(self.X, self.y)
        self.assertIsNone(self.estimator_stack.feature_importances_)

        self.estimator_branch_stack.fit(self.X, self.y)
        self.assertIsNone(self.estimator_branch_stack.feature_importances_)

    def test_use_probabilities(self):
        self.estimator_stack.use_probabilities = True
        self.estimator_stack.fit(self.X, self.y)
        probas = self.estimator_stack.predict(self.X)
        self.assertEqual(probas.shape[1], 3)

        self.estimator_stack.use_probabilities = False
        self.estimator_stack.fit(self.X, self.y)
        preds = self.estimator_stack.predict(self.X)
        self.assertEqual(preds.shape[1], 2)
        probas = self.estimator_stack.predict_proba(self.X)
        self.assertEqual(probas.shape[1], 3)
예제 #3
0
class SwitchTests(unittest.TestCase):
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]})
        self.gpc = PipelineElement('GaussianProcessClassifier')
        self.pca = PipelineElement('PCA')

        self.estimator_branch = Branch('estimator_branch',
                                       [self.tree.copy_me()])
        self.transformer_branch = Branch('transformer_branch',
                                         [self.pca.copy_me()])

        self.estimator_switch = Switch(
            'estimator_switch',
            [self.svc.copy_me(),
             self.tree.copy_me(),
             self.gpc.copy_me()])
        self.estimator_switch_with_branch = Switch(
            'estimator_switch_with_branch',
            [self.tree.copy_me(),
             self.estimator_branch.copy_me()])
        self.transformer_switch_with_branch = Switch(
            'transformer_switch_with_branch',
            [self.pca.copy_me(),
             self.transformer_branch.copy_me()])
        self.switch_in_switch = Switch('Switch_in_switch', [
            self.transformer_branch.copy_me(),
            self.transformer_switch_with_branch.copy_me()
        ])

    def test_init(self):
        self.assertEqual(self.estimator_switch.name, 'estimator_switch')

    def test_hyperparams(self):
        # assert number of different configs to test
        # each config combi for each element: 4 for SVC and 3 for logistic regression = 7
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations), 3)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[0]), 4)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[1]), 3)

        # hyperparameters
        self.assertDictEqual(
            self.estimator_switch.hyperparameters, {
                'estimator_switch__current_element': [(0, 0), (0, 1), (0, 2),
                                                      (0, 3), (1, 0), (1, 1),
                                                      (1, 2), (2, 0)]
            })

        # config grid
        self.assertListEqual(self.estimator_switch.generate_config_grid(), [{
            'estimator_switch__current_element': (0, 0)
        }, {
            'estimator_switch__current_element': (0, 1)
        }, {
            'estimator_switch__current_element': (0, 2)
        }, {
            'estimator_switch__current_element': (0, 3)
        }, {
            'estimator_switch__current_element': (1, 0)
        }, {
            'estimator_switch__current_element': (1, 1)
        }, {
            'estimator_switch__current_element': (1, 2)
        }, {
            'estimator_switch__current_element': (2, 0)
        }])

    def test_set_params(self):

        # test for grid search
        false_config = {'current_element': 1}
        with self.assertRaises(ValueError):
            self.estimator_switch.set_params(**false_config)

        correct_config = {'current_element': (0, 1)}
        self.estimator_switch.set_params(**correct_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C,
                         0.1)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'sigmoid')

        # test for other optimizers
        smac_config = {'SVC__C': 2, 'SVC__kernel': 'rbf'}
        self.estimator_switch.set_params(**smac_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C, 2)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'rbf')

    def test_fit(self):
        np.random.seed(42)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        np.random.seed(42)
        self.tree.set_params(**{'min_samples_split': 2})
        self.tree.fit(self.X, self.y)
        np.testing.assert_array_equal(
            self.tree.base_element.feature_importances_,
            self.estimator_switch.base_element.feature_importances_)

    def test_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)

        switch_Xt, _, _ = self.transformer_switch_with_branch.transform(self.X)
        pca_Xt, _, _ = self.pca.transform(self.X)
        self.assertTrue(np.array_equal(pca_Xt, switch_Xt))

    def test_predict(self):
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        np.random.seed(42)
        self.estimator_switch.fit(self.X, self.y)
        self.tree.set_params(**{'min_samples_split': 2})
        np.random.seed(42)
        self.tree.fit(self.X, self.y)

        switch_preds = self.estimator_switch.predict(self.X)
        tree_preds = self.tree.predict(self.X)
        self.assertTrue(np.array_equal(switch_preds, tree_preds))

    def test_predict_proba(self):
        gpc = PipelineElement('GaussianProcessClassifier')
        svc = PipelineElement('SVC')
        switch = Switch('EstimatorSwitch', [gpc, svc])
        switch.set_params(**{'current_element': (0, 0)})
        np.random.seed(42)
        switch_probas = switch.fit(self.X, self.y).predict_proba(self.X)
        np.random.seed(42)
        gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X)
        self.assertTrue(np.array_equal(switch_probas, gpr_probas))

    def test_inverse_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)
        Xt_pca, _, _ = self.pca.transform(self.X)
        Xt_switch, _, _ = self.transformer_switch_with_branch.transform(self.X)
        X_pca, _, _ = self.pca.inverse_transform(Xt_pca)
        X_switch, _, _ = self.transformer_switch_with_branch.inverse_transform(
            Xt_switch)

        self.assertTrue(np.array_equal(Xt_pca, Xt_switch))
        self.assertTrue(np.array_equal(X_pca, X_switch))
        np.testing.assert_almost_equal(X_switch, self.X)

    def test_base_element(self):
        switch = Switch('switch', [self.svc, self.tree])
        switch.set_params(**{'current_element': (1, 1)})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

        # other optimizer
        switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

    def test_copy_me(self):
        switches = [
            self.estimator_switch, self.estimator_switch_with_branch,
            self.transformer_switch_with_branch, self.switch_in_switch
        ]

        for switch in switches:
            copy = switch.copy_me()

            self.assertEqual(switch.random_state, copy.random_state)

            for i, element in enumerate(copy.elements):
                self.assertNotEqual(copy.elements[i], switch.elements[i])

            switch = elements_to_dict(switch)
            copy = elements_to_dict(copy)

            self.assertDictEqual(copy, switch)

    def test_estimator_type(self):
        pca = PipelineElement('PCA')
        ica = PipelineElement('FastICA')
        svc = PipelineElement('SVC')
        svr = PipelineElement('SVR')
        tree_class = PipelineElement('DecisionTreeClassifier')
        tree_reg = PipelineElement('DecisionTreeRegressor')

        switch = Switch('MySwitch', [pca, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [svc, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [pca, ica])
        self.assertEqual(switch._estimator_type, None)

        switch = Switch('MySwitch', [tree_class, svc])
        self.assertEqual(switch._estimator_type, 'classifier')

        switch = Switch('MySwitch', [tree_reg, svr])
        self.assertEqual(switch._estimator_type, 'regressor')

        self.assertEqual(self.estimator_switch._estimator_type, 'classifier')
        self.assertEqual(self.estimator_switch_with_branch._estimator_type,
                         'classifier')
        self.assertEqual(self.transformer_switch_with_branch._estimator_type,
                         None)
        self.assertEqual(self.switch_in_switch._estimator_type, None)

    def test_add(self):
        self.assertEqual(len(self.estimator_switch.elements), 3)
        self.assertEqual(len(self.switch_in_switch.elements), 2)
        self.assertEqual(len(self.transformer_switch_with_branch.elements), 2)

        self.assertEqual(
            list(self.estimator_switch.elements_dict.keys()),
            ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier'])
        self.assertEqual(
            list(self.switch_in_switch.elements_dict.keys()),
            ['transformer_branch', 'transformer_switch_with_branch'])

        switch = Switch('MySwitch',
                        [PipelineElement('PCA'),
                         PipelineElement('FastICA')])
        switch = Switch('MySwitch2')
        switch += PipelineElement('PCA')
        switch += PipelineElement('FastICA')

        # test doubled names
        with self.assertRaises(ValueError):
            self.estimator_switch += self.estimator_switch.elements[0]
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2")
        self.estimator_switch += PipelineElement(
            "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']})
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3")
        self.estimator_switch += PipelineElement("SVR")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVR")
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4")

        # check that hyperparameters are renamed respectively
        self.assertEqual(
            self.estimator_switch.pipeline_element_configurations[4][0]
            ["SVC3__kernel"], 'polynomial')

    def test_feature_importances(self):

        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch.feature_importances_) == self.X.shape[1])

        self.estimator_switch_with_branch.set_params(
            **{'current_element': (1, 0)})
        self.estimator_switch_with_branch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch_with_branch.feature_importances_) ==
            self.X.shape[1])

        self.estimator_switch.set_params(**{'current_element': (2, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertIsNone(self.estimator_branch.feature_importances_)

        self.switch_in_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)