Exemple #1
0
class BatchingTests(unittest.TestCase):
    def setUp(self):
        self.batch_size = 10
        nr_features = 3
        origin_list = ["affe", "tiger", "schwein", "giraffe", "löwe"]
        self.data = None
        self.targets = None

        self.neuro_batch = PipelineElement(
            "dummy_batch",
            batch_size=self.batch_size,
            base_element=DummyBatchTransformer())

        for element in origin_list:
            features = [element + str(i) for i in range(0, nr_features)]
            if self.data is None:
                self.data = np.array([features] * self.batch_size)
            else:
                self.data = np.vstack(
                    (self.data, [features] * self.batch_size))
            if self.targets is None:
                self.targets = np.array([element] * self.batch_size)
            else:
                self.targets = np.hstack(
                    (self.targets, [element] * self.batch_size))

        self.data = np.array(self.data)
        self.targets = np.array(self.targets)
        self.kwargs = {"animals": self.targets}

    def test_transform(self):
        X_new, y_new, kwargs_new = self.neuro_batch.transform(
            self.data, self.targets, **self.kwargs)
        self.assertListEqual(X_new[0, :].tolist(),
                             ["affe0affe", "affe1affe", "affe2affe"])
        self.assertListEqual(X_new[49, :].tolist(),
                             ["löwe0löwe", "löwe1löwe", "löwe2löwe"])
        self.assertEqual(kwargs_new["animals"][0], "effa")
        self.assertEqual(kwargs_new["animals"][49], "ewöl")

        with self.assertRaises(Warning):
            self.neuro_batch.transform('str', [0])

    def test_predict(self):
        y_predicted = self.neuro_batch.predict(self.data, **self.kwargs)
        # assure that predict is batch wisely called
        self.assertEqual(y_predicted[0], 1)
        self.assertEqual(y_predicted[-1],
                         (self.data.shape[0] / self.batch_size))
        with self.assertRaises(Warning):
            self.neuro_batch.predict('str')
Exemple #2
0
class SwitchTests(unittest.TestCase):
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]})
        self.gpc = PipelineElement('GaussianProcessClassifier')
        self.pca = PipelineElement('PCA')

        self.estimator_branch = Branch('estimator_branch',
                                       [self.tree.copy_me()])
        self.transformer_branch = Branch('transformer_branch',
                                         [self.pca.copy_me()])

        self.estimator_switch = Switch(
            'estimator_switch',
            [self.svc.copy_me(),
             self.tree.copy_me(),
             self.gpc.copy_me()])
        self.estimator_switch_with_branch = Switch(
            'estimator_switch_with_branch',
            [self.tree.copy_me(),
             self.estimator_branch.copy_me()])
        self.transformer_switch_with_branch = Switch(
            'transformer_switch_with_branch',
            [self.pca.copy_me(),
             self.transformer_branch.copy_me()])
        self.switch_in_switch = Switch('Switch_in_switch', [
            self.transformer_branch.copy_me(),
            self.transformer_switch_with_branch.copy_me()
        ])

    def test_init(self):
        self.assertEqual(self.estimator_switch.name, 'estimator_switch')

    def test_hyperparams(self):
        # assert number of different configs to test
        # each config combi for each element: 4 for SVC and 3 for logistic regression = 7
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations), 3)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[0]), 4)
        self.assertEqual(
            len(self.estimator_switch.pipeline_element_configurations[1]), 3)

        # hyperparameters
        self.assertDictEqual(
            self.estimator_switch.hyperparameters, {
                'estimator_switch__current_element': [(0, 0), (0, 1), (0, 2),
                                                      (0, 3), (1, 0), (1, 1),
                                                      (1, 2), (2, 0)]
            })

        # config grid
        self.assertListEqual(self.estimator_switch.generate_config_grid(), [{
            'estimator_switch__current_element': (0, 0)
        }, {
            'estimator_switch__current_element': (0, 1)
        }, {
            'estimator_switch__current_element': (0, 2)
        }, {
            'estimator_switch__current_element': (0, 3)
        }, {
            'estimator_switch__current_element': (1, 0)
        }, {
            'estimator_switch__current_element': (1, 1)
        }, {
            'estimator_switch__current_element': (1, 2)
        }, {
            'estimator_switch__current_element': (2, 0)
        }])

    def test_set_params(self):

        # test for grid search
        false_config = {'current_element': 1}
        with self.assertRaises(ValueError):
            self.estimator_switch.set_params(**false_config)

        correct_config = {'current_element': (0, 1)}
        self.estimator_switch.set_params(**correct_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C,
                         0.1)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'sigmoid')

        # test for other optimizers
        smac_config = {'SVC__C': 2, 'SVC__kernel': 'rbf'}
        self.estimator_switch.set_params(**smac_config)
        self.assertEqual(self.estimator_switch.base_element.base_element.C, 2)
        self.assertEqual(
            self.estimator_switch.base_element.base_element.kernel, 'rbf')

    def test_fit(self):
        np.random.seed(42)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        np.random.seed(42)
        self.tree.set_params(**{'min_samples_split': 2})
        self.tree.fit(self.X, self.y)
        np.testing.assert_array_equal(
            self.tree.base_element.feature_importances_,
            self.estimator_switch.base_element.feature_importances_)

    def test_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)

        switch_Xt, _, _ = self.transformer_switch_with_branch.transform(self.X)
        pca_Xt, _, _ = self.pca.transform(self.X)
        self.assertTrue(np.array_equal(pca_Xt, switch_Xt))

    def test_predict(self):
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        np.random.seed(42)
        self.estimator_switch.fit(self.X, self.y)
        self.tree.set_params(**{'min_samples_split': 2})
        np.random.seed(42)
        self.tree.fit(self.X, self.y)

        switch_preds = self.estimator_switch.predict(self.X)
        tree_preds = self.tree.predict(self.X)
        self.assertTrue(np.array_equal(switch_preds, tree_preds))

    def test_predict_proba(self):
        gpc = PipelineElement('GaussianProcessClassifier')
        svc = PipelineElement('SVC')
        switch = Switch('EstimatorSwitch', [gpc, svc])
        switch.set_params(**{'current_element': (0, 0)})
        np.random.seed(42)
        switch_probas = switch.fit(self.X, self.y).predict_proba(self.X)
        np.random.seed(42)
        gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X)
        self.assertTrue(np.array_equal(switch_probas, gpr_probas))

    def test_inverse_transform(self):
        self.transformer_switch_with_branch.set_params(
            **{'current_element': (0, 0)})
        self.transformer_switch_with_branch.fit(self.X, self.y)
        self.pca.fit(self.X, self.y)
        Xt_pca, _, _ = self.pca.transform(self.X)
        Xt_switch, _, _ = self.transformer_switch_with_branch.transform(self.X)
        X_pca, _, _ = self.pca.inverse_transform(Xt_pca)
        X_switch, _, _ = self.transformer_switch_with_branch.inverse_transform(
            Xt_switch)

        self.assertTrue(np.array_equal(Xt_pca, Xt_switch))
        self.assertTrue(np.array_equal(X_pca, X_switch))
        np.testing.assert_almost_equal(X_switch, self.X)

    def test_base_element(self):
        switch = Switch('switch', [self.svc, self.tree])
        switch.set_params(**{'current_element': (1, 1)})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

        # other optimizer
        switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2})
        self.assertIs(switch.base_element, self.tree)
        self.assertIs(switch.base_element.base_element, self.tree.base_element)

    def test_copy_me(self):
        switches = [
            self.estimator_switch, self.estimator_switch_with_branch,
            self.transformer_switch_with_branch, self.switch_in_switch
        ]

        for switch in switches:
            copy = switch.copy_me()

            self.assertEqual(switch.random_state, copy.random_state)

            for i, element in enumerate(copy.elements):
                self.assertNotEqual(copy.elements[i], switch.elements[i])

            switch = elements_to_dict(switch)
            copy = elements_to_dict(copy)

            self.assertDictEqual(copy, switch)

    def test_estimator_type(self):
        pca = PipelineElement('PCA')
        ica = PipelineElement('FastICA')
        svc = PipelineElement('SVC')
        svr = PipelineElement('SVR')
        tree_class = PipelineElement('DecisionTreeClassifier')
        tree_reg = PipelineElement('DecisionTreeRegressor')

        switch = Switch('MySwitch', [pca, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [svc, svr])
        with self.assertRaises(NotImplementedError):
            est_type = switch._estimator_type

        switch = Switch('MySwitch', [pca, ica])
        self.assertEqual(switch._estimator_type, None)

        switch = Switch('MySwitch', [tree_class, svc])
        self.assertEqual(switch._estimator_type, 'classifier')

        switch = Switch('MySwitch', [tree_reg, svr])
        self.assertEqual(switch._estimator_type, 'regressor')

        self.assertEqual(self.estimator_switch._estimator_type, 'classifier')
        self.assertEqual(self.estimator_switch_with_branch._estimator_type,
                         'classifier')
        self.assertEqual(self.transformer_switch_with_branch._estimator_type,
                         None)
        self.assertEqual(self.switch_in_switch._estimator_type, None)

    def test_add(self):
        self.assertEqual(len(self.estimator_switch.elements), 3)
        self.assertEqual(len(self.switch_in_switch.elements), 2)
        self.assertEqual(len(self.transformer_switch_with_branch.elements), 2)

        self.assertEqual(
            list(self.estimator_switch.elements_dict.keys()),
            ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier'])
        self.assertEqual(
            list(self.switch_in_switch.elements_dict.keys()),
            ['transformer_branch', 'transformer_switch_with_branch'])

        switch = Switch('MySwitch',
                        [PipelineElement('PCA'),
                         PipelineElement('FastICA')])
        switch = Switch('MySwitch2')
        switch += PipelineElement('PCA')
        switch += PipelineElement('FastICA')

        # test doubled names
        with self.assertRaises(ValueError):
            self.estimator_switch += self.estimator_switch.elements[0]
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2")
        self.estimator_switch += PipelineElement(
            "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']})
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3")
        self.estimator_switch += PipelineElement("SVR")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVR")
        self.estimator_switch += PipelineElement("SVC")
        self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4")

        # check that hyperparameters are renamed respectively
        self.assertEqual(
            self.estimator_switch.pipeline_element_configurations[4][0]
            ["SVC3__kernel"], 'polynomial')

    def test_feature_importances(self):

        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch.feature_importances_) == self.X.shape[1])

        self.estimator_switch_with_branch.set_params(
            **{'current_element': (1, 0)})
        self.estimator_switch_with_branch.fit(self.X, self.y)
        self.assertTrue(
            len(self.estimator_switch_with_branch.feature_importances_) ==
            self.X.shape[1])

        self.estimator_switch.set_params(**{'current_element': (2, 0)})
        self.estimator_switch.fit(self.X, self.y)
        self.assertIsNone(self.estimator_branch.feature_importances_)

        self.switch_in_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)
        self.estimator_switch.set_params(**{'current_element': (1, 0)})
        self.switch_in_switch.fit(self.X, self.y)
        self.assertIsNone(self.switch_in_switch.feature_importances_)
Exemple #3
0
class PipelineElementTests(unittest.TestCase):
    def setUp(self):
        self.pca_pipe_element = PipelineElement('PCA',
                                                {'n_components': [1, 2]},
                                                test_disabled=True)
        self.svc_pipe_element = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.X, self.y = load_breast_cancer(True)
        self.kwargs = {'covariates': self.y}
        self.Xt = self.X + 1
        self.yt = self.y + 1
        self.kwargst = {'covariates': self.y + 1}

    def test_create_failure(self):
        with self.assertRaises(NameError):
            PipelineElement('NONSENSEName', {})

    def test_pipeline_element_create(self):
        # test name, set_disabled and base_element
        self.assertIsInstance(self.pca_pipe_element.base_element, PCA)

        # set_disabled is passed correctly
        self.assertTrue(self.pca_pipe_element.test_disabled)

        # correct name
        self.assertEqual(self.pca_pipe_element.name, 'PCA')

    def test_fit(self):
        self.pca_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.pca_pipe_element.base_element.components_.shape,
                         (30, 30))
        self.assertEqual(self.pca_pipe_element.base_element.components_[0, 0],
                         0.005086232018734175)

        self.svc_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.svc_pipe_element.base_element._intercept_,
                         -0.3753900173819406)

    def test_transform(self):
        self.pca_pipe_element.fit(self.X, self.y)

        Xt, _, _ = self.pca_pipe_element.transform(self.X)
        self.assertEqual(Xt.shape, (569, 30))
        self.assertAlmostEqual(Xt[0, 0], 1160.1425737041347)

    def test_predict(self):
        self.svc_pipe_element.fit(self.X, self.y)

        yt = self.svc_pipe_element.predict(self.X)
        self.assertEqual(yt.shape, (569, ))
        self.assertEqual(yt[21], 1)

    def test_predict_proba(self):
        self.svc_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.svc_pipe_element.predict_proba(self.X), None)

        gpc = PipelineElement('GaussianProcessClassifier')
        gpc.fit(self.X, self.y)
        self.assertTrue(
            np.array_equal(
                gpc.predict_proba(self.X)[0],
                np.asarray([0.5847072926551391, 0.4152927073448609])))

    def test_inverse_transform(self):
        Xt, _, _ = self.pca_pipe_element.fit(self.X, self.y).transform(self.X)
        X, _, _ = self.pca_pipe_element.inverse_transform(Xt)
        np.testing.assert_array_almost_equal(X, self.X)

    def test_one_hyperparameter_setup(self):
        # sklearn attributes are generated
        self.assertDictEqual(self.pca_pipe_element.hyperparameters, {
            'PCA__n_components': [1, 2],
            'PCA__disabled': [False, True]
        })

        # config_grid is created as expected
        self.assertListEqual(self.pca_pipe_element.generate_config_grid(),
                             [{
                                 'PCA__n_components': 1,
                                 'PCA__disabled': False
                             }, {
                                 'PCA__n_components': 2,
                                 'PCA__disabled': False
                             }, {
                                 'PCA__disabled': True
                             }])

    def test_more_hyperparameters_setup(self):
        # sklearn attributes are generated
        self.assertDictEqual(self.svc_pipe_element.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })

        # config_grid is created as expected
        self.assertListEqual(self.svc_pipe_element.generate_config_grid(),
                             [{
                                 'SVC__C': 0.1,
                                 'SVC__kernel': 'rbf'
                             }, {
                                 'SVC__C': 0.1,
                                 'SVC__kernel': 'sigmoid'
                             }, {
                                 'SVC__C': 1,
                                 'SVC__kernel': 'rbf'
                             }, {
                                 'SVC__C': 1,
                                 'SVC__kernel': 'sigmoid'
                             }])

    def test_no_hyperparameters(self):
        pca_sklearn_element = PCA()
        pca_photon_element = PipelineElement('PCA')

        self.assertDictEqual(pca_sklearn_element.__dict__,
                             pca_photon_element.base_element.__dict__)

    def test_set_params(self):
        config = {'n_components': 3, 'disabled': False}
        self.pca_pipe_element.set_params(**config)
        self.assertFalse(self.pca_pipe_element.disabled)
        self.assertEqual(self.pca_pipe_element.base_element.n_components, 3)
        with self.assertRaises(ValueError):
            self.pca_pipe_element.set_params(**{'any_weird_param': 1})

    def test_set_random_state(self):
        # we handle all elements in one method that is inherited so we capture them all in this test
        random_state = 53
        my_branch = Branch("random_state_branch")
        my_branch += PipelineElement("StandardScaler")
        my_switch = Switch("transformer_Switch")
        my_switch += PipelineElement("LassoFeatureSelection")
        my_switch += PipelineElement("PCA")
        my_branch += my_switch
        my_stack = Stack("Estimator_Stack")
        my_stack += PipelineElement("SVR")
        my_stack += PipelineElement("Ridge")
        my_branch += my_stack
        my_branch += PipelineElement("ElasticNet")

        my_branch.random_state = random_state
        self.assertTrue(my_switch.elements[1].random_state == random_state)
        self.assertTrue(
            my_switch.elements[1].base_element.random_state == random_state)
        self.assertTrue(my_stack.elements[1].random_state == random_state)
        self.assertTrue(
            my_stack.elements[1].base_element.random_state == random_state)

    def test_adjusted_delegate_call_transformer(self):
        # check standard transformer
        trans = PipelineElement.create('Transformer',
                                       base_element=DummyTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # only X should be transformed
        self.assertTrue(np.array_equal(y, self.y))
        self.assertDictEqual(kwargs, self.kwargs)

        # check transformer needs y
        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, self.kwargs)

        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X,
                                       self.y)  # this time without any kwargs
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, {})

        # check transformer needs covariates
        trans = PipelineElement.create(
            'NeedsCovariatesTransformer',
            base_element=DummyNeedsCovariatesTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))
        self.assertEqual(y, None)

        # check transformer needs covariates and needs y
        trans = PipelineElement.create(
            'NeedsCovariatesAndYTransformer',
            base_element=DummyNeedsCovariatesAndYTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))

    def test_adjusted_delegate_call_estimator(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        y = est.predict(self.X)
        self.assertTrue(np.array_equal(
            y, self.Xt))  # DummyEstimator returns X as y predictions

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X = est.predict(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions

    def test_predict_when_no_transform(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        X, y, kwargs = est.transform(self.X)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertEqual(y, None)

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X, y, kwargs = est.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargs['covariates']))
        self.assertEqual(y, None)

    def test_copy_me(self):
        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        svc.set_params(**{'C': 0.1, 'kernel': 'sigmoid'})
        copy = svc.copy_me()

        self.assertEqual(svc.random_state, copy.random_state)
        self.assertNotEqual(copy.base_element, svc.base_element)
        self.assertDictEqual(elements_to_dict(copy), elements_to_dict(svc))
        self.assertEqual(copy.base_element.C, svc.base_element.C)

        # check if copies are still the same, even when making a copy of a fitted PipelineElement
        copy_after_fit = svc.fit(self.X, self.y).copy_me()
        self.assertDictEqual(elements_to_dict(copy),
                             elements_to_dict(copy_after_fit))

        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        copy = svc.copy_me()
        self.assertDictEqual(copy.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })
        copy.base_element.C = 3
        self.assertNotEqual(svc.base_element.C, copy.base_element.C)

        # test custom element
        custom_element = PipelineElement.create(
            'CustomElement',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        copy = custom_element.copy_me()
        self.assertDictEqual(elements_to_dict(custom_element),
                             elements_to_dict(copy))

        custom_element2 = PipelineElement.create(
            'MyUnDeepcopyableObject',
            base_element=GridSearchOptimizer(),
            hyperparameters={})
        with self.assertRaises(Exception):
            custom_element2.copy_me()

    def test_estimator_type(self):
        estimator = PipelineElement('SVC')
        self.assertEqual(estimator._estimator_type, 'classifier')

        estimator = PipelineElement('SVR')
        self.assertEqual(estimator._estimator_type, 'regressor')

        estimator = PipelineElement('PCA')
        self.assertEqual(estimator._estimator_type, None)

        estimator = PipelineElement.create('Dummy', DummyEstimatorWrongType(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy',
                                           DummyTransformerWithPredict(), {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy', DummyEstimatorNoPredict(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

    def test_sanity_check_item_for_add(self):
        valid_type = PipelineElement('StandardScaler')
        valid_type2 = CallbackElement('my_callback', None)
        invalid_type = StandardScaler()
        invalid_type2 = Preprocessing()

        PipelineElement.sanity_check_element_type_for_building_photon_pipes(
            valid_type, PipelineElement)
        PipelineElement.sanity_check_element_type_for_building_photon_pipes(
            valid_type2, PipelineElement)

        with self.assertRaises(TypeError):
            PipelineElement.sanity_check_element_type_for_building_photon_pipes(
                invalid_type, PipelineElement)

        with self.assertRaises(TypeError):
            PipelineElement.sanity_check_element_type_for_building_photon_pipes(
                invalid_type2, PipelineElement)

        classes_to_test = [Stack, Switch, Branch, Preprocessing]
        for photon_class in classes_to_test:
            # we name it SVC so it suits all classes
            if photon_class is Preprocessing:
                instance = photon_class()
            else:
                instance = photon_class('tmp_instance')
            instance.add(valid_type)
            instance.add(valid_type2)
            with self.assertRaises(TypeError):
                instance.add(invalid_type)
            with self.assertRaises(TypeError):
                instance.add(invalid_type2)
Exemple #4
0
class PipelineTests(PhotonBaseTest):
    def setUp(self):

        self.X, self.y = load_breast_cancer(True)

        # Photon Version
        self.p_pca = PipelineElement("PCA", {}, random_state=3)
        self.p_svm = PipelineElement("SVC", {}, random_state=3)
        self.p_ss = PipelineElement("StandardScaler", {})
        self.p_dt = PipelineElement("DecisionTreeClassifier", random_state=3)

        dummy_element = DummyYAndCovariatesTransformer()
        self.dummy_photon_element = PipelineElement.create(
            "DummyTransformer", dummy_element, {})

        self.sk_pca = PCA(random_state=3)
        self.sk_svc = SVC(random_state=3)
        self.sk_ss = StandardScaler()
        self.sk_dt = DecisionTreeClassifier(random_state=3)

    def test_regular_use(self):

        photon_pipe = PhotonPipeline([("PCA", self.p_pca),
                                      ("SVC", self.p_svm)])
        photon_pipe.fit(self.X, self.y)

        photon_transformed_X, _, _ = photon_pipe.transform(self.X)
        photon_predicted_y = photon_pipe.predict(self.X)

        # the element is given by reference, so it should be fitted right here
        photon_ref_transformed_X, _, _ = self.p_pca.transform(self.X)
        photon_ref_predicted_y = self.p_svm.predict(photon_ref_transformed_X)

        self.assertTrue(
            np.array_equal(photon_transformed_X, photon_ref_transformed_X))
        self.assertTrue(
            np.array_equal(photon_predicted_y, photon_ref_predicted_y))

        sk_pipe = SKPipeline([("PCA", self.sk_pca), ("SVC", self.sk_svc)])
        sk_pipe.fit(self.X, self.y)

        sk_predicted_y = sk_pipe.predict(self.X)
        self.assertTrue(np.array_equal(photon_predicted_y, sk_predicted_y))

        # sklearn pipeline does not offer a transform function
        # sk_transformed_X = sk_pipe.transform(X)
        # self.assertTrue(np.array_equal(photon_transformed_X, sk_transformed_X))

    def test_add_preprocessing(self):
        my_preprocessing = Preprocessing()
        my_preprocessing += PipelineElement("LabelEncoder")
        photon_pipe = PhotonPipeline([("PCA", self.p_pca),
                                      ("SVC", self.p_svm)])
        photon_pipe._add_preprocessing(my_preprocessing)

        self.assertEqual(len(photon_pipe.named_steps), 3)
        first_element = photon_pipe.elements[0][1]
        self.assertTrue(first_element == my_preprocessing)
        self.assertTrue(
            photon_pipe.named_steps["Preprocessing"] == my_preprocessing)

    def test_no_estimator(self):

        no_estimator_pipe = PhotonPipeline([("StandardScaler", self.p_ss),
                                            ("PCA", self.p_pca)])
        no_estimator_pipe.fit(self.X, self.y)
        photon_no_estimator_transform, _, _ = no_estimator_pipe.transform(
            self.X)
        photon_no_estimator_predict = no_estimator_pipe.predict(self.X)

        self.assertTrue(
            np.array_equal(photon_no_estimator_predict,
                           photon_no_estimator_transform))

        self.sk_ss.fit(self.X)
        standardized_data = self.sk_ss.transform(self.X)
        self.sk_pca.fit(standardized_data)
        pca_data = self.sk_pca.transform(standardized_data)

        self.assertTrue(np.array_equal(photon_no_estimator_transform,
                                       pca_data))
        self.assertTrue(np.array_equal(photon_no_estimator_predict, pca_data))

    def test_y_and_covariates_transformation(self):

        X = np.ones((200, 50))
        y = np.ones((200, )) + 2
        kwargs = {"sample1": np.ones((200, 5))}

        photon_pipe = PhotonPipeline([("DummyTransformer",
                                       self.dummy_photon_element)])

        # if y is none all y transformer should be ignored
        Xt2, yt2, kwargst2 = photon_pipe.transform(X, None, **kwargs)
        self.assertTrue(np.array_equal(Xt2, X))
        self.assertTrue(np.array_equal(yt2, None))
        self.assertTrue(np.array_equal(kwargst2, kwargs))

        # if y is given, all y transformers should be working
        Xt, yt, kwargst = photon_pipe.transform(X, y, **kwargs)

        # assure that data is delivered to element correctly
        self.assertTrue(
            np.array_equal(X, self.dummy_photon_element.base_element.X))
        self.assertTrue(
            np.array_equal(y, self.dummy_photon_element.base_element.y))
        self.assertTrue(
            np.array_equal(
                kwargs["sample1"],
                self.dummy_photon_element.base_element.kwargs["sample1"],
            ))

        # assure that data is transformed correctly
        self.assertTrue(np.array_equal(Xt, X - 1))
        self.assertTrue(np.array_equal(yt, y + 1))
        self.assertTrue("sample1_edit" in kwargst)
        self.assertTrue(
            np.array_equal(kwargst["sample1_edit"], kwargs["sample1"] + 5))

    def test_predict_with_training_flag(self):
        # manually edit labels
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_svc)])
        y_plus_one = self.y + 1
        sk_pipe.fit(self.X, y_plus_one)
        sk_pred = sk_pipe.predict(self.X)

        # edit labels during pipeline
        p_pipe = PhotonPipeline([("SS", self.p_ss),
                                 ("YT", self.dummy_photon_element),
                                 ("SVC", self.p_svm)])
        p_pipe.fit(self.X, self.y)
        p_pred = p_pipe.predict(self.X)

        sk_standardized_X = self.sk_ss.transform(self.X)
        input_of_y_transformer = self.dummy_photon_element.base_element.X
        self.assertTrue(
            np.array_equal(sk_standardized_X, input_of_y_transformer))

        self.assertTrue(np.array_equal(sk_pred, p_pred))

    def test_inverse_tansform(self):
        # simple pipe
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)])
        sk_pipe.fit(self.X, self.y)
        sk_transform = sk_pipe.transform(self.X)
        sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)])
        photon_pipe.fit(self.X, self.y)
        p_transform, _, _ = photon_pipe.transform(self.X)
        p_inverse_transformed, _, _ = photon_pipe.inverse_transform(
            p_transform)

        self.assertTrue(
            np.array_equal(sk_inverse_transformed, p_inverse_transformed))

        # now including stack
        stack = Stack("stack", [self.p_pca])
        stack_pipeline = PhotonPipeline([
            ("stack", stack),
            ("StandardScaler", PipelineElement("StandardScaler")),
            ("LinearSVC", PipelineElement("LinearSVC")),
        ])
        stack_pipeline.fit(self.X, self.y)
        feature_importances = stack_pipeline.feature_importances_
        inversed_data, _, _ = stack_pipeline.inverse_transform(
            feature_importances)
        self.assertEqual(inversed_data.shape[1], self.X.shape[1])

    # Todo: add tests for kwargs

    def test_predict_proba(self):

        sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_dt)])
        sk_pipe.fit(self.X, self.y)
        sk_proba = sk_pipe.predict_proba(self.X)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("SVC", self.p_dt)])
        photon_pipe.fit(self.X, self.y)
        photon_proba = photon_pipe.predict_proba(self.X)

        self.assertTrue(np.array_equal(sk_proba, photon_proba))

    def test_copy_me(self):
        switch = Switch("my_copy_switch")
        switch += PipelineElement("StandardScaler")
        switch += PipelineElement("RobustScaler", test_disabled=True)

        stack = Stack("RandomStack")
        stack += PipelineElement("SVC")
        branch = Branch("Random_Branch")
        pca_hyperparameters = {"n_components": [5, 10]}
        branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters)
        branch += PipelineElement("DecisionTreeClassifier")
        stack += branch

        photon_pipe = PhotonPipeline([
            ("SimpleImputer", PipelineElement("SimpleImputer")),
            ("my_copy_switch", switch),
            ("RandomStack", stack),
            ("Callback1", CallbackElement("tmp_callback", np.mean)),
            ("PhotonVotingClassifier",
             PipelineElement("PhotonVotingClassifier")),
        ])

        copy_of_the_pipe = photon_pipe.copy_me()

        self.assertEqual(photon_pipe.random_state,
                         copy_of_the_pipe.random_state)
        self.assertTrue(len(copy_of_the_pipe.elements) == 5)
        self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack")
        self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].
                        elements[1].test_disabled)
        self.assertDictEqual(
            copy_of_the_pipe.elements[2]
            [1].elements[1].elements[0].hyperparameters,
            {"PCA__n_components": [5, 10]},
        )
        self.assertTrue(
            isinstance(copy_of_the_pipe.elements[3][1], CallbackElement))
        self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].
                        delegate_function == np.mean)

    def test_random_state(self):
        photon_pipe = PhotonPipeline([("SS", self.p_ss),
                                      ("PCA", PipelineElement("PCA")),
                                      ("SVC", self.p_dt)])
        photon_pipe.random_state = 666
        photon_pipe.fit(self.X, self.y)
        self.assertEqual(self.p_dt.random_state, photon_pipe.random_state)
        self.assertEqual(photon_pipe.elements[1][-1].random_state,
                         photon_pipe.random_state)
        self.assertEqual(self.p_dt.random_state, 666)