Example #1
0
 def test_balancing_get_weights_treed_single_label(self):
     Y = np.array([0] * 80 + [1] * 20)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     self.assertTrue(np.allclose(fit_params['classifier:sample_weight'],
                                 np.array([0.4] * 80 + [1.6] * 20)))
Example #2
0
 def test_balancing_get_weights_treed_single_label(self):
     Y = np.array([0] * 80 + [1] * 20)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     self.assertTrue(np.allclose(fit_params['classifier:sample_weight'],
                                 np.array([0.4] * 80 + [1.6] * 20)))
Example #3
0
 def test_balancing_get_weights_treed_multilabel(self):
     Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 +
                  [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     self.assertTrue(np.allclose(fit_params['classifier:sample_weight'],
                                 np.array([0.4] * 500 + [4.0] * 10)))
Example #4
0
 def test_balancing_get_weights_treed_multilabel(self):
     Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 +
                  [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     self.assertTrue(np.allclose(fit_params['classifier:sample_weight'],
                                 np.array([0.4] * 500 + [4.0] * 10)))
Example #5
0
 def test_balancing_get_weights_svm_sgd(self):
     Y = np.array([0] * 80 + [1] * 20)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'libsvm_svc', None, None, None)
     self.assertEqual(("classifier:class_weight", "balanced"),
                      list(init_params.items())[0])
     init_params, fit_params = balancing.get_weights(
         Y, None, 'liblinear_svc_preprocessor', None, None)
     self.assertEqual(("preprocessor:class_weight", "balanced"),
                      list(init_params.items())[0])
Example #6
0
 def test_balancing_get_weights_svm_sgd(self):
     Y = np.array([0] * 80 + [1] * 20)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'libsvm_svc', None, None, None)
     self.assertEqual(("classifier:class_weight", "balanced"),
                      list(init_params.items())[0])
     init_params, fit_params = balancing.get_weights(
         Y, None, 'liblinear_svc_preprocessor', None, None)
     self.assertEqual(("preprocessor:class_weight", "balanced"),
                      list(init_params.items())[0])
Example #7
0
 def test_balancing_get_weights_treed_single_label(self):
     Y = np.array([0] * 80 + [1] * 20)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     self.assertAlmostEqual(
         np.mean(fit_params['classifier:sample_weight']), 1,
     )
     np.testing.assert_allclose(
         fit_params['classifier:sample_weight'],
         np.array([0.625] * 80 + [2.5] * 20),
     )
Example #8
0
 def test_balancing_get_weights_treed_multilabel(self):
     Y = np.array([[0, 0, 0]] * 100 + [[1, 0, 0]] * 100 + [[0, 1, 0]] * 100 +
                  [[1, 1, 0]] * 100 + [[0, 0, 1]] * 100 + [[1, 0, 1]] * 10)
     balancing = Balancing(strategy='weighting')
     init_params, fit_params = balancing.get_weights(
         Y, 'adaboost', None, None, None)
     print(fit_params['classifier:sample_weight'])
     self.assertAlmostEqual(
         np.mean(fit_params['classifier:sample_weight']), 1,
     )
     np.testing.assert_allclose(
         fit_params['classifier:sample_weight'],
         np.array([0.85] * 500 + [8.5] * 10),
     )
Example #9
0
    def _get_pipeline(self):
        steps = []

        default_dataset_properties = {'target_type': 'classification'}

        # Add the always active preprocessing components

        steps.extend([["one_hot_encoding", OneHotEncoder()],
                      ["imputation", Imputation()],
                      [
                          "rescaling",
                          rescaling_components.RescalingChoice(
                              default_dataset_properties)
                      ], ["balancing", Balancing()]])

        # Add the preprocessing component
        steps.append([
            'preprocessor',
            feature_preprocessing_components.FeaturePreprocessorChoice(
                default_dataset_properties)
        ])

        # Add the classification component
        steps.append([
            'classifier',
            classification_components.ClassifierChoice(
                default_dataset_properties)
        ])
        return steps
Example #10
0
    def _get_pipeline_steps(self, dataset_properties):
        steps = []

        default_dataset_properties = {'target_type': 'classification'}
        if dataset_properties is not None and isinstance(
                dataset_properties, dict):
            default_dataset_properties.update(dataset_properties)

        steps.extend(
            [[
                "data_preprocessing",
                DataPreprocessor(dataset_properties=default_dataset_properties)
            ], ["balancing", Balancing()],
             [
                 "feature_preprocessor",
                 feature_preprocessing_components.FeaturePreprocessorChoice(
                     default_dataset_properties)
             ],
             [
                 'classifier',
                 classification_components.ClassifierChoice(
                     default_dataset_properties)
             ]])

        return steps
Example #11
0
    def fit_transformer(self, X, y, fit_params=None):

        if fit_params is None:
            fit_params = {}

        if self.configuration['balancing:strategy'] == 'weighting':
            balancing = Balancing(strategy='weighting')
            _init_params, _fit_params = balancing.get_weights(
                y, self.configuration['classifier:__choice__'],
                self.configuration['preprocessor:__choice__'], {}, {})
            _init_params.update(self._init_params)
            self.set_hyperparameters(configuration=self.configuration,
                                     init_params=_init_params)

            if _fit_params is not None:
                fit_params.update(_fit_params)

        X, fit_params = super().fit_transformer(X, y, fit_params=fit_params)

        return X, fit_params
Example #12
0
    def pre_transform(self, X, y, fit_params=None):
        self.num_targets = 1 if len(y.shape) == 1 else y.shape[1]

        if fit_params is None:
            fit_params = {}

        if self.configuration['balancing:strategy'] == 'weighting':
            balancing = Balancing(strategy='weighting')
            _init_params, _fit_params = balancing.get_weights(
                y, self.configuration['classifier:__choice__'],
                self.configuration['preprocessor:__choice__'], {}, {})
            self.set_hyperparameters(configuration=self.configuration,
                                     init_params=_init_params)

            if _fit_params is not None:
                fit_params.update(_fit_params)

        X, fit_params = super(SimpleClassificationPipeline,
                              self).pre_transform(X, y, fit_params=fit_params)

        return X, fit_params
Example #13
0
    def _get_pipeline_steps(self):
        steps = []
        print(" going execute pipeline autosklearn")
        default_dataset_properties = {'target_type': 'classification'}

        steps.extend([
            ["feature_preprocessor",
             feature_preprocessing_components.FeaturePreprocessorChoice(
                 default_dataset_properties)],
            ["data_preprocessing",
                DataPreprocessor(dataset_properties=default_dataset_properties)],
            ["balancing",
                Balancing()],
            ['classifier',
                classification_components.ClassifierChoice(
                    default_dataset_properties)]
        ])

        return steps