Ejemplo n.º 1
0
    def test_predict_proba(self):
        self.svc_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.svc_pipe_element.predict_proba(self.X), None)

        gpc = PipelineElement('GaussianProcessClassifier')
        gpc.fit(self.X, self.y)
        self.assertTrue(
            np.array_equal(
                gpc.predict_proba(self.X)[0],
                np.asarray([0.5847072926551391, 0.4152927073448609])))
Ejemplo n.º 2
0
class PipelineElementTests(unittest.TestCase):
    def setUp(self):
        self.pca_pipe_element = PipelineElement('PCA',
                                                {'n_components': [1, 2]},
                                                test_disabled=True)
        self.svc_pipe_element = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.X, self.y = load_breast_cancer(True)
        self.kwargs = {'covariates': self.y}
        self.Xt = self.X + 1
        self.yt = self.y + 1
        self.kwargst = {'covariates': self.y + 1}

    def test_create_failure(self):
        with self.assertRaises(NameError):
            PipelineElement('NONSENSEName', {})

    def test_pipeline_element_create(self):
        # test name, set_disabled and base_element
        self.assertIsInstance(self.pca_pipe_element.base_element, PCA)

        # set_disabled is passed correctly
        self.assertTrue(self.pca_pipe_element.test_disabled)

        # correct name
        self.assertEqual(self.pca_pipe_element.name, 'PCA')

    def test_fit(self):
        self.pca_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.pca_pipe_element.base_element.components_.shape,
                         (30, 30))
        self.assertEqual(self.pca_pipe_element.base_element.components_[0, 0],
                         0.005086232018734175)

        self.svc_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.svc_pipe_element.base_element._intercept_,
                         -0.3753900173819406)

    def test_transform(self):
        self.pca_pipe_element.fit(self.X, self.y)

        Xt, _, _ = self.pca_pipe_element.transform(self.X)
        self.assertEqual(Xt.shape, (569, 30))
        self.assertAlmostEqual(Xt[0, 0], 1160.1425737041347)

    def test_predict(self):
        self.svc_pipe_element.fit(self.X, self.y)

        yt = self.svc_pipe_element.predict(self.X)
        self.assertEqual(yt.shape, (569, ))
        self.assertEqual(yt[21], 1)

    def test_predict_proba(self):
        self.svc_pipe_element.fit(self.X, self.y)
        self.assertEqual(self.svc_pipe_element.predict_proba(self.X), None)

        gpc = PipelineElement('GaussianProcessClassifier')
        gpc.fit(self.X, self.y)
        self.assertTrue(
            np.array_equal(
                gpc.predict_proba(self.X)[0],
                np.asarray([0.5847072926551391, 0.4152927073448609])))

    def test_inverse_transform(self):
        Xt, _, _ = self.pca_pipe_element.fit(self.X, self.y).transform(self.X)
        X, _, _ = self.pca_pipe_element.inverse_transform(Xt)
        np.testing.assert_array_almost_equal(X, self.X)

    def test_one_hyperparameter_setup(self):
        # sklearn attributes are generated
        self.assertDictEqual(self.pca_pipe_element.hyperparameters, {
            'PCA__n_components': [1, 2],
            'PCA__disabled': [False, True]
        })

        # config_grid is created as expected
        self.assertListEqual(self.pca_pipe_element.generate_config_grid(),
                             [{
                                 'PCA__n_components': 1,
                                 'PCA__disabled': False
                             }, {
                                 'PCA__n_components': 2,
                                 'PCA__disabled': False
                             }, {
                                 'PCA__disabled': True
                             }])

    def test_more_hyperparameters_setup(self):
        # sklearn attributes are generated
        self.assertDictEqual(self.svc_pipe_element.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })

        # config_grid is created as expected
        self.assertListEqual(self.svc_pipe_element.generate_config_grid(),
                             [{
                                 'SVC__C': 0.1,
                                 'SVC__kernel': 'rbf'
                             }, {
                                 'SVC__C': 0.1,
                                 'SVC__kernel': 'sigmoid'
                             }, {
                                 'SVC__C': 1,
                                 'SVC__kernel': 'rbf'
                             }, {
                                 'SVC__C': 1,
                                 'SVC__kernel': 'sigmoid'
                             }])

    def test_no_hyperparameters(self):
        pca_sklearn_element = PCA()
        pca_photon_element = PipelineElement('PCA')

        self.assertDictEqual(pca_sklearn_element.__dict__,
                             pca_photon_element.base_element.__dict__)

    def test_set_params(self):
        config = {'n_components': 3, 'disabled': False}
        self.pca_pipe_element.set_params(**config)
        self.assertFalse(self.pca_pipe_element.disabled)
        self.assertEqual(self.pca_pipe_element.base_element.n_components, 3)
        with self.assertRaises(ValueError):
            self.pca_pipe_element.set_params(**{'any_weird_param': 1})

    def test_set_random_state(self):
        # we handle all elements in one method that is inherited so we capture them all in this test
        random_state = 53
        my_branch = Branch("random_state_branch")
        my_branch += PipelineElement("StandardScaler")
        my_switch = Switch("transformer_Switch")
        my_switch += PipelineElement("LassoFeatureSelection")
        my_switch += PipelineElement("PCA")
        my_branch += my_switch
        my_stack = Stack("Estimator_Stack")
        my_stack += PipelineElement("SVR")
        my_stack += PipelineElement("Ridge")
        my_branch += my_stack
        my_branch += PipelineElement("ElasticNet")

        my_branch.random_state = random_state
        self.assertTrue(my_switch.elements[1].random_state == random_state)
        self.assertTrue(
            my_switch.elements[1].base_element.random_state == random_state)
        self.assertTrue(my_stack.elements[1].random_state == random_state)
        self.assertTrue(
            my_stack.elements[1].base_element.random_state == random_state)

    def test_adjusted_delegate_call_transformer(self):
        # check standard transformer
        trans = PipelineElement.create('Transformer',
                                       base_element=DummyTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # only X should be transformed
        self.assertTrue(np.array_equal(y, self.y))
        self.assertDictEqual(kwargs, self.kwargs)

        # check transformer needs y
        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, self.kwargs)

        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X,
                                       self.y)  # this time without any kwargs
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, {})

        # check transformer needs covariates
        trans = PipelineElement.create(
            'NeedsCovariatesTransformer',
            base_element=DummyNeedsCovariatesTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))
        self.assertEqual(y, None)

        # check transformer needs covariates and needs y
        trans = PipelineElement.create(
            'NeedsCovariatesAndYTransformer',
            base_element=DummyNeedsCovariatesAndYTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))

    def test_adjusted_delegate_call_estimator(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        y = est.predict(self.X)
        self.assertTrue(np.array_equal(
            y, self.Xt))  # DummyEstimator returns X as y predictions

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X = est.predict(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions

    def test_predict_when_no_transform(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        X, y, kwargs = est.transform(self.X)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertEqual(y, None)

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X, y, kwargs = est.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargs['covariates']))
        self.assertEqual(y, None)

    def test_copy_me(self):
        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        svc.set_params(**{'C': 0.1, 'kernel': 'sigmoid'})
        copy = svc.copy_me()

        self.assertEqual(svc.random_state, copy.random_state)
        self.assertNotEqual(copy.base_element, svc.base_element)
        self.assertDictEqual(elements_to_dict(copy), elements_to_dict(svc))
        self.assertEqual(copy.base_element.C, svc.base_element.C)

        # check if copies are still the same, even when making a copy of a fitted PipelineElement
        copy_after_fit = svc.fit(self.X, self.y).copy_me()
        self.assertDictEqual(elements_to_dict(copy),
                             elements_to_dict(copy_after_fit))

        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        copy = svc.copy_me()
        self.assertDictEqual(copy.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })
        copy.base_element.C = 3
        self.assertNotEqual(svc.base_element.C, copy.base_element.C)

        # test custom element
        custom_element = PipelineElement.create(
            'CustomElement',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        copy = custom_element.copy_me()
        self.assertDictEqual(elements_to_dict(custom_element),
                             elements_to_dict(copy))

        custom_element2 = PipelineElement.create(
            'MyUnDeepcopyableObject',
            base_element=GridSearchOptimizer(),
            hyperparameters={})
        with self.assertRaises(Exception):
            custom_element2.copy_me()

    def test_estimator_type(self):
        estimator = PipelineElement('SVC')
        self.assertEqual(estimator._estimator_type, 'classifier')

        estimator = PipelineElement('SVR')
        self.assertEqual(estimator._estimator_type, 'regressor')

        estimator = PipelineElement('PCA')
        self.assertEqual(estimator._estimator_type, None)

        estimator = PipelineElement.create('Dummy', DummyEstimatorWrongType(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy',
                                           DummyTransformerWithPredict(), {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy', DummyEstimatorNoPredict(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

    def test_sanity_check_item_for_add(self):
        valid_type = PipelineElement('StandardScaler')
        valid_type2 = CallbackElement('my_callback', None)
        invalid_type = StandardScaler()
        invalid_type2 = Preprocessing()

        PipelineElement.sanity_check_element_type_for_building_photon_pipes(
            valid_type, PipelineElement)
        PipelineElement.sanity_check_element_type_for_building_photon_pipes(
            valid_type2, PipelineElement)

        with self.assertRaises(TypeError):
            PipelineElement.sanity_check_element_type_for_building_photon_pipes(
                invalid_type, PipelineElement)

        with self.assertRaises(TypeError):
            PipelineElement.sanity_check_element_type_for_building_photon_pipes(
                invalid_type2, PipelineElement)

        classes_to_test = [Stack, Switch, Branch, Preprocessing]
        for photon_class in classes_to_test:
            # we name it SVC so it suits all classes
            if photon_class is Preprocessing:
                instance = photon_class()
            else:
                instance = photon_class('tmp_instance')
            instance.add(valid_type)
            instance.add(valid_type2)
            with self.assertRaises(TypeError):
                instance.add(invalid_type)
            with self.assertRaises(TypeError):
                instance.add(invalid_type2)