예제 #1
0
    def test_named_configs(self, test_num):
        """Tests named optimizer configurations against named objective functions.

        It is prohibitively expensive to test the entire cross product so we test only its subset, but in such a way that
        each configuration will be tested at least once.
        """
        optimizer_named_configs = bayesian_optimizer_config_store.list_named_configs(
        )
        num_optimizer_configs = len(optimizer_named_configs)
        objective_function_named_configs = objective_function_config_store.list_named_configs(
        )
        num_objective_function_configs = len(objective_function_named_configs)

        named_optimizer_config = optimizer_named_configs[test_num %
                                                         num_optimizer_configs]
        named_objective_function_config = objective_function_named_configs[
            test_num % num_objective_function_configs]

        print(
            "#####################################################################################################"
        )
        print(named_optimizer_config)
        print(named_objective_function_config)

        optimizer_evaluator_config = optimizer_evaluator_config_store.get_config_by_name(
            name="parallel_unit_tests_config")
        optimizer_config = named_optimizer_config.config_point
        objective_function_config = named_objective_function_config.config_point

        optimizer_evaluator = OptimizerEvaluator(
            optimizer_evaluator_config=optimizer_evaluator_config,
            objective_function_config=objective_function_config,
            optimizer_config=optimizer_config)

        optimizer_evaluation_report = optimizer_evaluator.evaluate_optimizer()

        mlos.global_values.tracer.trace_events.extend(
            optimizer_evaluation_report.execution_trace)
        if not optimizer_evaluation_report.success:
            raise optimizer_evaluation_report.exception

        for objective_name, single_objective_fit_state in optimizer_evaluation_report.regression_model_fit_state:
            with pd.option_context('display.max_columns', 100):
                print(
                    single_objective_fit_state.get_goodness_of_fit_dataframe(
                        DataSetType.TRAIN).tail())
                for optimum_name, optimum_over_time in optimizer_evaluation_report.optima_over_time.items(
                ):
                    print(
                        "#####################################################################################################"
                    )
                    print(optimum_name)
                    print(optimum_over_time.get_dataframe().tail(10))
                    print(
                        "#####################################################################################################"
                    )
예제 #2
0
    def test_named_configs(self):
        """Tests named optimizer configurations against named objective functions.

        It is prohibitively expensive to test the entire cross product so we test only its subset, but in such a way that
        each configuration will be tested at least once.
        """
        optimizer_named_configs = bayesian_optimizer_config_store.list_named_configs()
        num_optimizer_configs = len(optimizer_named_configs)
        objective_function_named_configs = objective_function_config_store.list_named_configs()
        num_objective_function_configs = len(objective_function_named_configs)

        num_tests = max(num_optimizer_configs, num_objective_function_configs)

        with traced(scope_name="parallel_tests"), concurrent.futures.ProcessPoolExecutor(max_workers=4) as executor:
            outstanding_futures = set()

            for i in range(num_tests):
                named_optimizer_config = optimizer_named_configs[i % num_optimizer_configs]
                named_objective_function_config = objective_function_named_configs[i % num_objective_function_configs]

                print("#####################################################################################################")
                print(named_optimizer_config)
                print(named_objective_function_config)

                optimizer_evaluator_config = optimizer_evaluator_config_store.get_config_by_name(name="parallel_unit_tests_config")
                optimizer_config = named_optimizer_config.config_point
                objective_function_config = named_objective_function_config.config_point

                optimizer_evaluator = OptimizerEvaluator(
                    optimizer_evaluator_config=optimizer_evaluator_config,
                    objective_function_config=objective_function_config,
                    optimizer_config=optimizer_config
                )

                future = executor.submit(optimizer_evaluator.evaluate_optimizer)
                outstanding_futures.add(future)

            done_futures, outstanding_futures = concurrent.futures.wait(outstanding_futures, return_when=concurrent.futures.ALL_COMPLETED)

            for future in done_futures:
                optimizer_evaluation_report = future.result()
                assert optimizer_evaluation_report.success
                mlos.global_values.tracer.trace_events.extend(optimizer_evaluation_report.execution_trace)

                with pd.option_context('display.max_columns', 100):
                    print(optimizer_evaluation_report.regression_model_goodness_of_fit_state.get_goodness_of_fit_dataframe(DataSetType.TRAIN).tail())
                    for optimum_name, optimum_over_time in optimizer_evaluation_report.optima_over_time.items():
                        print("#####################################################################################################")
                        print(optimum_name)
                        print(optimum_over_time.get_dataframe().tail(10))
                        print("#####################################################################################################")
예제 #3
0
    def test_defaults(self):
        """Tests default optimizer configurations against default objective functions."""
        optimizer_evaluator_config = optimizer_evaluator_config_store.default
        optimizer_evaluator_config.num_iterations = 50

        # We want to test this functionality so let's make sure that nobody accidentally disables it in the default config.
        #
        assert optimizer_evaluator_config.include_pickled_optimizer_in_report
        assert optimizer_evaluator_config.include_pickled_objective_function_in_report
        assert optimizer_evaluator_config.report_regression_model_goodness_of_fit
        assert optimizer_evaluator_config.report_optima_over_time
        assert optimizer_evaluator_config.include_execution_trace_in_report

        optimizer_config = bayesian_optimizer_config_store.default
        objective_function_config = objective_function_config_store.default

        self.logger.info(optimizer_evaluator_config.to_json(indent=2))
        self.logger.info(optimizer_config.to_json(indent=2))
        self.logger.info(objective_function_config.to_json(indent=2))

        optimizer_evaluator = OptimizerEvaluator(
            optimizer_evaluator_config=optimizer_evaluator_config,
            objective_function_config=objective_function_config,
            optimizer_config=optimizer_config
        )

        optimizer_evaluation_report = optimizer_evaluator.evaluate_optimizer()
        assert optimizer_evaluation_report.success

        for objective_name, regression_model_fit_state in optimizer_evaluation_report.regression_model_fit_state:
            with pd.option_context('display.max_columns', 100):
                self.logger.info(regression_model_fit_state.get_goodness_of_fit_dataframe(DataSetType.TRAIN).tail())
                for optimum_name, optimum_over_time in optimizer_evaluation_report.optima_over_time.items():
                    self.logger.info("#####################################################################################################")
                    self.logger.info(optimum_name)
                    self.logger.info(optimum_over_time.get_dataframe().tail(10))
                    self.logger.info("#####################################################################################################")

        # Now let's do it again with the unpickled optimizer.
        #
        unpickled_objective_function = pickle.loads(optimizer_evaluation_report.pickled_objective_function_initial_state)
        unpickled_optimizer = pickle.loads(optimizer_evaluation_report.pickled_optimizer_initial_state)
        optimizer_evaluator_2 = OptimizerEvaluator(
            optimizer_evaluator_config=optimizer_evaluator_config,
            objective_function=unpickled_objective_function,
            optimizer=unpickled_optimizer
        )

        optimizer_evaluation_report_2 = optimizer_evaluator_2.evaluate_optimizer()
        for objective_name, single_objective_regression_model_fit_state in optimizer_evaluation_report_2.regression_model_fit_state:
            with pd.option_context('display.max_columns', 100):
                self.logger.info(single_objective_regression_model_fit_state.get_goodness_of_fit_dataframe(DataSetType.TRAIN).tail())
                for optimum_name, optimum_over_time in optimizer_evaluation_report_2.optima_over_time.items():
                    self.logger.info("#####################################################################################################")
                    self.logger.info(optimum_name)
                    self.logger.info(optimum_over_time.get_dataframe().tail(10))
                    self.logger.info("#####################################################################################################")



        optimizer_evaluation_report.write_to_disk(target_folder=self.temp_dir)
        restored_evaluation_report = OptimizerEvaluationReport.read_from_disk(target_folder=self.temp_dir)

        assert optimizer_evaluation_report.success == restored_evaluation_report.success
        assert optimizer_evaluation_report.num_optimization_iterations == restored_evaluation_report.num_optimization_iterations
        assert optimizer_evaluation_report.evaluation_frequency == restored_evaluation_report.evaluation_frequency
        assert optimizer_evaluation_report.exception == restored_evaluation_report.exception
        assert optimizer_evaluation_report.exception_traceback == restored_evaluation_report.exception_traceback

        with open(os.path.join(self.temp_dir, "execution_trace.json")) as in_file:
            trace = json.load(in_file)
        assert len(trace) > 100
        assert all(key in trace[0] for key in ["ts", "name", "ph", "cat", "pid", "tid", "args"])

        for objective_name, fit_state in restored_evaluation_report.regression_model_fit_state:
            gof_df = fit_state.get_goodness_of_fit_dataframe()
            assert len(gof_df.index) > 0
            assert all((col_name in gof_df.columns.values or col_name == "data_set_type") for col_name in GoodnessOfFitMetrics._fields)
            assert all(gof_df[col_name].is_monotonic for col_name in ["last_refit_iteration_number", "observation_count", "prediction_count"])

        for key, optimum_over_time in restored_evaluation_report.optima_over_time.items():
            assert optimizer_evaluation_report.optima_over_time[key].get_dataframe().equals(optimum_over_time.get_dataframe())


        assert restored_evaluation_report.optimizer_configuration == optimizer_config
        assert restored_evaluation_report.objective_function_configuration == objective_function_config

        unpickled_objective_function = pickle.loads(restored_evaluation_report.pickled_objective_function_initial_state)
        assert unpickled_objective_function.objective_function_config == objective_function_config.polynomial_objective_config

        for _ in range(10):
            random_pt = unpickled_objective_function.parameter_space.random()
            assert unpickled_objective_function.evaluate_point(random_pt) in unpickled_objective_function.output_space

        # Lastly let's double check the pickled optimizers
        #
        assert len(restored_evaluation_report.pickled_optimizers_over_time) == 6

        # Finally, let's make sure that the optimizers serialized to disk are usable.
        #
        final_optimizer_from_disk = pickle.loads(restored_evaluation_report.pickled_optimizers_over_time[49])
        final_optimizer_from_report = pickle.loads(optimizer_evaluation_report.pickled_optimizers_over_time[49])

        for _ in range(20):
            assert final_optimizer_from_disk.suggest() in final_optimizer_from_report.optimization_problem.parameter_space
예제 #4
0
    def test_defaults(self):
        """Tests default optimizer configurations against default objective functions."""
        optimizer_evaluator_config = optimizer_evaluator_config_store.default
        optimizer_evaluator_config.num_iterations = 100

        # We want to test this functionality so let's make sure that nobody accidentally disables it in the default config.
        #
        self.assertTrue(
            optimizer_evaluator_config.include_pickled_optimizer_in_report)
        self.assertTrue(optimizer_evaluator_config.
                        include_pickled_objective_function_in_report)
        self.assertTrue(
            optimizer_evaluator_config.report_regression_model_goodness_of_fit)
        self.assertTrue(optimizer_evaluator_config.report_optima_over_time)
        self.assertTrue(
            optimizer_evaluator_config.include_execution_trace_in_report)

        optimizer_config = bayesian_optimizer_config_store.default
        objective_function_config = objective_function_config_store.default

        print(optimizer_evaluator_config.to_json(indent=2))
        print(optimizer_config.to_json(indent=2))
        print(objective_function_config.to_json(indent=2))

        optimizer_evaluator = OptimizerEvaluator(
            optimizer_evaluator_config=optimizer_evaluator_config,
            objective_function_config=objective_function_config,
            optimizer_config=optimizer_config)

        optimizer_evaluation_report = optimizer_evaluator.evaluate_optimizer()
        self.assertTrue(optimizer_evaluation_report.success)

        with pd.option_context('display.max_columns', 100):
            print(
                optimizer_evaluation_report.
                regression_model_goodness_of_fit_state.
                get_goodness_of_fit_dataframe(DataSetType.TRAIN).tail())
            for optimum_name, optimum_over_time in optimizer_evaluation_report.optima_over_time.items(
            ):
                print(
                    "#####################################################################################################"
                )
                print(optimum_name)
                print(optimum_over_time.get_dataframe().tail(10))
                print(
                    "#####################################################################################################"
                )

        # Now let's do it again with the unpickled optimizer.
        #
        unpickled_objective_function = pickle.loads(
            optimizer_evaluation_report.
            pickled_objective_function_initial_state)
        unpickled_optimizer = pickle.loads(
            optimizer_evaluation_report.pickled_optimizer_initial_state)
        optimizer_evaluator_2 = OptimizerEvaluator(
            optimizer_evaluator_config=optimizer_evaluator_config,
            objective_function=unpickled_objective_function,
            optimizer=unpickled_optimizer)

        optimizer_evaluation_report_2 = optimizer_evaluator_2.evaluate_optimizer(
        )
        with pd.option_context('display.max_columns', 100):
            print(
                optimizer_evaluation_report_2.
                regression_model_goodness_of_fit_state.
                get_goodness_of_fit_dataframe(DataSetType.TRAIN).tail())
            for optimum_name, optimum_over_time in optimizer_evaluation_report_2.optima_over_time.items(
            ):
                print(
                    "#####################################################################################################"
                )
                print(optimum_name)
                print(optimum_over_time.get_dataframe().tail(10))
                print(
                    "#####################################################################################################"
                )

        optimizer_evaluation_report.write_to_disk(target_folder=self.temp_dir)

        # Validate that all files were created as expected.
        #
        with open(os.path.join(self.temp_dir,
                               "execution_info.json")) as in_file:
            execution_info = json.load(in_file)
        self.assertTrue(execution_info["success"])
        self.assertTrue(execution_info["num_optimization_iterations"] ==
                        optimizer_evaluator_config.num_iterations)
        self.assertTrue(execution_info["evaluation_frequency"] ==
                        optimizer_evaluator_config.evaluation_frequency)
        self.assertTrue(execution_info["exception"] is None)
        self.assertTrue(execution_info["exception_stack_trace"] is None)

        with open(os.path.join(self.temp_dir,
                               "execution_trace.json")) as in_file:
            trace = json.load(in_file)
        self.assertTrue(len(trace) > 100)
        self.assertTrue(
            all(key in trace[0]
                for key in ["ts", "name", "ph", "cat", "pid", "tid", "args"]))

        with open(os.path.join(self.temp_dir, "goodness_of_fit.pickle"),
                  "rb") as in_file:
            unpickled_gof = pickle.load(in_file)

        gof_df = unpickled_gof.get_goodness_of_fit_dataframe()
        self.assertTrue(len(gof_df.index) > 0)
        self.assertTrue(
            all((col_name in gof_df.columns.values
                 or col_name == "data_set_type")
                for col_name in GoodnessOfFitMetrics._fields))
        self.assertTrue(
            all(gof_df[col_name].is_monotonic for col_name in [
                "last_refit_iteration_number", "observation_count",
                "prediction_count"
            ]))

        with open(os.path.join(self.temp_dir, "optima_over_time.pickle"),
                  "rb") as in_file:
            unpickled_optima_over_time = pickle.load(in_file)

        for key, optimum_over_time in unpickled_optima_over_time.items():
            self.assertTrue(optimizer_evaluation_report.optima_over_time[key].
                            get_dataframe().equals(
                                optimum_over_time.get_dataframe()))

        with open(os.path.join(self.temp_dir,
                               "optimizer_config.json")) as in_file:
            optimizer_config_json_str = in_file.read()
        deserialized_optimizer_config = Point.from_json(
            optimizer_config_json_str)
        self.assertTrue(deserialized_optimizer_config == optimizer_config)

        with open(os.path.join(self.temp_dir,
                               "objective_function_config.json")) as in_file:
            objective_function_json_str = in_file.read()
        deserialized_objective_function_config = Point.from_json(
            objective_function_json_str)
        self.assertTrue(deserialized_objective_function_config ==
                        objective_function_config)

        objective_function_pickle_file_names = [
            "objective_function_final_state.pickle",
            "objective_function_initial_state.pickle"
        ]
        for file_name in objective_function_pickle_file_names:
            with open(os.path.join(self.temp_dir, file_name), "rb") as in_file:
                unpickled_objective_function = pickle.load(in_file)
                self.assertTrue(
                    unpickled_objective_function.objective_function_config ==
                    objective_function_config.polynomial_objective_config)

                for _ in range(10):
                    random_pt = unpickled_objective_function.parameter_space.random(
                    )
                    self.assertTrue(
                        unpickled_objective_function.evaluate_point(random_pt)
                        in unpickled_objective_function.output_space)

        # Lastly let's double check the pickled optimizers
        #
        pickled_optimizers_dir = os.path.join(self.temp_dir,
                                              "pickled_optimizers")
        num_pickled_optimizers = len([
            name for name in os.listdir(pickled_optimizers_dir)
            if os.path.isfile(os.path.join(pickled_optimizers_dir, name))
        ])
        self.assertTrue(num_pickled_optimizers == 11)

        # Finally, let's make sure that the optimizers serialized to disk are usable.
        #
        with open(os.path.join(pickled_optimizers_dir, "99.pickle"),
                  "rb") as in_file:
            final_optimizer_from_disk = pickle.load(in_file)
        final_optimizer_from_report = pickle.loads(
            optimizer_evaluation_report.pickled_optimizers_over_time[99])

        for _ in range(100):
            self.assertTrue(final_optimizer_from_disk.suggest() ==
                            final_optimizer_from_report.suggest())