Ejemplo n.º 1
0
    def test_performance_constraints(self):
        # test if the constraints are considered
        # A: for a single constraint
        test_pipe = InnerFoldManager(
            self.pipe.copy_me,
            self.config,
            self.optimization,
            self.cross_validation,
            self.outer_fold_id,
            optimization_constraints=MinimumPerformance(
                "accuracy", 0.95, "first"),
        )

        photon_results_config_item = test_pipe.fit(self.X, self.y)
        # the first fold has an accuracy of 0.874 so we expect the test_pipe to stop calculating after the first fold
        # which means it has only one outer fold and
        self.assertTrue(len(photon_results_config_item.inner_folds) == 1)

        # B: for a list of constraints, accuracy should pass (0.874 in first fold > accuracy threshold)
        # but specificity should stop the computation (0.78 in first fold < specificity threshold)
        test_pipe = InnerFoldManager(
            self.pipe.copy_me,
            self.config,
            self.optimization,
            self.cross_validation,
            self.outer_fold_id,
            optimization_constraints=[
                MinimumPerformance("accuracy", 0.85, "first"),
                MinimumPerformance("specificity", 0.8, "first"),
            ],
        )

        photon_results_config_item = test_pipe.fit(self.X, self.y)
        self.assertTrue(len(photon_results_config_item.inner_folds) == 1)

        # C: for a list of constraints, all should pass
        test_pipe = InnerFoldManager(
            self.pipe.copy_me,
            self.config,
            self.optimization,
            self.cross_validation,
            self.outer_fold_id,
            optimization_constraints=[
                MinimumPerformance("accuracy", 0.75, "all"),
                MinimumPerformance("specificity", 0.75, "all"),
            ],
        )

        photon_results_config_item = test_pipe.fit(self.X, self.y)
        self.assertTrue(len(photon_results_config_item.inner_folds) == 4)
class MinimumPerformanceTest(PhotonBaseConstraintTest):
    def setUp(self):
        super(MinimumPerformanceTest, self).setUp()
        self.constraint_object = MinimumPerformance(strategy="first",
                                                    metric="f1_score",
                                                    threshold=0)

    def test_shall_continue(self):
        super(MinimumPerformanceTest, self).test_shall_continue()

        # error
        self.constraint_object.metric = "mean_squared_error"
        self.constraint_object.threshold = 0
        self.assertEqual(
            self.constraint_object.shall_continue(self.dummy_config_item),
            False)

        self.constraint_object.threshold = 1
        self.constraint_object.strategy = "mean"
        self.assertEqual(
            self.constraint_object.shall_continue(self.dummy_config_item),
            True)

        # dummy_item with linear values
        # score
        self.constraint_object.metric = "f1_score"
        self.constraint_object.threshold = 0.5
        self.constraint_object.strategy = "first"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), False)
        self.constraint_object.strategy = "mean"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), True)
        self.constraint_object.strategy = "all"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), False)

        # error
        self.constraint_object.metric = "mean_squared_error"
        self.constraint_object.threshold = 0.5
        self.constraint_object.strategy = "first"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), True)
        self.constraint_object.strategy = "mean"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), True)
        self.constraint_object.strategy = "all"
        self.assertEqual(
            self.constraint_object.shall_continue(
                self.dummy_linear_config_item), False)
Ejemplo n.º 3
0
    def test_prepare(self):
        self.optimization_info.performance_constraints = [
            DummyPerformance(self.optimization_info.best_config_metric),
            MinimumPerformance('mean_squared_error', 75)
        ]
        outer_fold_man = OuterFoldManager(self.pipe,
                                          self.optimization_info,
                                          self.outer_fold_id,
                                          self.cv_info,
                                          result_obj=MDBOuterFold(fold_nr=1))

        outer_fold_man._prepare_optimization()
        outer_fold_man._prepare_data(self.X, self.y)
        # test that performance constraints are copies
        self.assertTrue(outer_fold_man.constraint_objects, list)
        self.assertTrue(len(outer_fold_man.constraint_objects) == 2)
        for ico, copied_object in enumerate(outer_fold_man.constraint_objects):
            self.assertIsNot(
                self.optimization_info.performance_constraints[ico],
                copied_object)

        # test that optimizer is prepared and can generated our two configs
        self.assertIsNotNone(outer_fold_man.optimizer)
        self.assertTrue(outer_fold_man.optimizer, GridSearchOptimizer)
        self.assertTrue(len(list(outer_fold_man.optimizer.ask)) == 2)

        # assure that we assured there are no cython leftovers in result tree
        self.assertEqual(len(outer_fold_man.result_object.tested_config_list),
                         0)

        # test that data is split (we only check y because the split method is already tested, we just make sure it is applied)
        nr_train = len(
            self.cv_info.outer_folds[self.outer_fold_id].train_indices)
        self.assertTrue(len(outer_fold_man._validation_y) == nr_train)
        nr_test = len(
            self.cv_info.outer_folds[self.outer_fold_id].test_indices)
        self.assertTrue(len(outer_fold_man._test_y) == nr_test)

        # test that infos are in tree
        self.assertEqual(
            outer_fold_man.result_object.number_samples_validation, nr_train)
        self.assertEqual(outer_fold_man.result_object.number_samples_test,
                         nr_test)
Ejemplo n.º 4
0
    optimizer="sk_opt",
    optimizer_params={"n_configurations": 25},
    metrics=["mean_squared_error", "pearson_correlation"],
    best_config_metric="mean_squared_error",
    outer_cv=KFold(n_splits=3, shuffle=True),
    inner_cv=KFold(n_splits=3),
    eval_final_performance=True,
    verbosity=1,
    output_settings=OutputSettings(
        project_folder="./result_folder",
        mongodb_connect_url="mongodb://localhost:27017/photon_results",
        save_output=True,
        plots=True,
    ),
    performance_constraints=[
        MinimumPerformance("mean_squared_error", 35, "first"),
        MinimumPerformance("pearson_correlation", 0.7, "all"),
    ],
)

my_pipe += PipelineElement("StandardScaler")
my_pipe += PipelineElement(
    "RandomForestRegressor",
    hyperparameters={"n_estimators": IntegerRange(5, 50)})

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)

# AND SHOW THE RESULTS IN THE WEBBASED PHOTON INVESTIGATOR TOOL
# Investigator.show(my_pipe)
 def setUp(self):
     super(MinimumPerformanceTest, self).setUp()
     self.constraint_object = MinimumPerformance(strategy="first",
                                                 metric="f1_score",
                                                 threshold=0)
Ejemplo n.º 6
0
# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe(name='basic_svm_pipe_no_performance',
                    optimizer='sk_opt',
                    optimizer_params={'n_configurations': 25},
                    metrics=['mean_squared_error', 'pearson_correlation'],
                    best_config_metric='mean_squared_error',
                    outer_cv=KFold(n_splits=3, shuffle=True),
                    inner_cv=KFold(n_splits=3),
                    eval_final_performance=True,
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./result_folder',
                                                   mongodb_connect_url="mongodb://localhost:27017/photon_results",
                                                   save_output=True,
                                                   plots=True),
                    performance_constraints=[MinimumPerformance('mean_squared_error', 35, 'first'),
                                             MinimumPerformance('pearson_correlation', 0.7, 'all')])

my_pipe += PipelineElement('StandardScaler')
my_pipe += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators': IntegerRange(5, 50)})


# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)

# AND SHOW THE RESULTS IN THE WEBBASED PHOTON INVESTIGATOR TOOL
# Investigator.show(my_pipe)

# YOU CAN ALSO SAVE THE BEST PERFORMING PIPELINE FOR FURTHER USE
# my_pipe.save_optimum_pipe('/home/photon_user/photon_test/optimum_pipe.photon')