def test_creation(self): """Test creation""" ar1 = AverageResult() assert ar1.task_num == 1 assert ar1.configuration == {"AverageResult": {"task_num": 1}} ar2 = AverageResult(task_num=5) assert ar2.task_num == 5 assert ar2.configuration == {"AverageResult": {"task_num": 5}}
def test_analysis(self, experiment_config, trial_config): """Test assessment plot""" ar1 = AverageResult() with create_experiment(experiment_config, trial_config, ["completed"]) as ( _, experiment, _, ): plot = ar1.analysis("task_name", [(0, experiment)]) assert type(plot) is plotly.graph_objects.Figure
def test_creation_algorithms(self, benchmark): """Test study creation with all support algorithms input format""" algorithms = [ {"algorithm": {"gridsearch": {"n_values": 1}}, "deterministic": True}, {"algorithm": "tpe"}, {"random": {"seed": 1}}, "asha", ] study = Study(benchmark, algorithms, AverageResult(2), RosenBrock(25, dim=3)) assert study.algorithms[0].name == "gridsearch" assert study.algorithms[0].experiment_algorithm == { "gridsearch": {"n_values": 1} } assert study.algorithms[0].is_deterministic assert study.algorithms[1].name == "tpe" assert study.algorithms[1].experiment_algorithm == "tpe" assert not study.algorithms[1].is_deterministic assert study.algorithms[2].name == "random" assert study.algorithms[2].experiment_algorithm == {"random": {"seed": 1}} assert not study.algorithms[2].is_deterministic assert study.algorithms[3].name == "asha" assert study.algorithms[3].experiment_algorithm == "asha" assert not study.algorithms[3].is_deterministic
def test_create_with_invalid_targets(self, benchmark_config_py): """Test creation with invalid Task and Assessment""" with OrionState(): with pytest.raises(AttributeError) as exc: config = copy.deepcopy(benchmark_config_py) config["targets"] = [{ "assess": [AverageResult(2)], "task": [DummyTask] }] get_or_create_benchmark(**config) assert "type object '{}' has no attribute ".format( "DummyTask") in str(exc.value) with pytest.raises(AttributeError) as exc: config = copy.deepcopy(benchmark_config_py) config["targets"] = [{ "assess": [DummyAssess], "task": [RosenBrock(25, dim=3)] }] get_or_create_benchmark(**config) assert "type object '{}' has no attribute ".format( "DummyAssess") in str(exc.value)
def test_create_with_different_configure(self, benchmark_config_py, caplog): """Test creation with same name but different configure""" with OrionState(): config = copy.deepcopy(benchmark_config_py) bm1 = get_or_create_benchmark(**config) config = copy.deepcopy(benchmark_config_py) config["targets"][0]["assess"] = [AverageResult(2)] with caplog.at_level(logging.WARNING, logger="orion.benchmark.benchmark_client"): bm2 = get_or_create_benchmark(**config) assert bm2.configuration == bm1.configuration assert ( "Benchmark with same name is found but has different configuration, " "which will be used for this creation." in caplog.text) caplog.clear() config = copy.deepcopy(benchmark_config_py) config["targets"][0]["task"] = [ RosenBrock(26, dim=3), CarromTable(20) ] with caplog.at_level(logging.WARNING, logger="orion.benchmark.benchmark_client"): bm3 = get_or_create_benchmark(**config) assert bm3.configuration == bm1.configuration assert ( "Benchmark with same name is found but has different configuration, " "which will be used for this creation." in caplog.text)
def test_figure_layout(self, study_experiments_config): """Test assessment plot format""" ar1 = AverageResult() with create_study_experiments(**study_experiments_config) as experiments: plot = ar1.analysis("task_name", experiments) assert_regrets_plot( plot, [ list(algorithm["algorithm"].keys())[0] for algorithm in study_experiments_config["algorithms"] ], balanced=study_experiments_config["max_trial"], with_avg=True, )
def benchmark_config_py(benchmark_algorithms): config = dict( name="bm00001", algorithms=benchmark_algorithms, targets=[{ "assess": [AverageResult(2), AverageRank(2)], "task": [RosenBrock(25, dim=3), CarromTable(20)], }], ) return config
def benchmark(benchmark_algorithms): """Return a benchmark instance""" return Benchmark( name="benchmark007", algorithms=benchmark_algorithms, targets=[{ "assess": [AverageResult(2), AverageRank(2)], "task": [RosenBrock(25, dim=3), CarromTable(20)], }], )
def main(config: ProfetExperimentConfig): if issubclass(config.task_type, ProfetTask): task = config.task_type( max_trials=config.max_trials, task_id=0, model_config=config.profet_train_config, input_dir=config.input_dir, checkpoint_dir=config.checkpoint_dir, seed=config.seed, ) else: # NOTE: This doesn't normally happen when using this from the command-line. task = config.task_type(max_trials=config.max_trials) print(f"Storage file used: {config.storage_pickle_path}") benchmark = get_or_create_benchmark( name=config.name, algorithms=config.algorithms, targets=[{"assess": [AverageResult(config.n_repetitions)], "task": [task,],}], storage={ "type": "legacy", "database": {"type": "pickleddb", "host": str(config.storage_pickle_path)}, }, debug=config.debug, ) benchmark.setup_studies() # Since we're using the QuadraticsTask, we can evaluate the 'similarity' between # them, hence we just show these figures rather than the (very large) number of other # potential figures. figures_dir = config.figures_dir / benchmark.name figures_dir.mkdir(exist_ok=True, parents=True) benchmark.process(n_workers=1) status = benchmark.status(False) print(status) # if all(isinstance(source_task, QuadraticsTask) for source_task in benchmark.source_tasks): figures = benchmark.analysis() # # # TODO: Instead of having like 30 figures, should try and create an interactive-ish # # # plotly thingy that can switch between the different quantities. # import plotly.io as pio # pio.renderers.default = "browser" for i, figure in enumerate(figures): figure.show() figure.write_image(str(figures_dir / f"fig_{i:02}.svg")) figure.write_image(str(figures_dir / f"fig_{i:02}.png")) figure.write_html(str(figures_dir / f"fig_{i:02}.html"), include_plotlyjs="cdn")
def test_simple(): """Test a end 2 end exucution of benchmark""" task_num = 2 trial_num = 20 assessments = [AverageResult(task_num), AverageRank(task_num)] tasks = [ RosenBrock(trial_num, dim=3), EggHolder(trial_num, dim=4), CarromTable(trial_num), Branin(trial_num), BirdLike(trial_num), ] benchmark = get_or_create_benchmark( name="bm001", algorithms=algorithms, targets=[{ "assess": assessments, "task": tasks }], ) benchmark.process() assert len(benchmark.studies) == len(assessments) * len(tasks) status = benchmark.status() experiments = benchmark.experiments() assert len(experiments ) == len(algorithms) * task_num * len(assessments) * len(tasks) assert len(status) == len(algorithms) * len(assessments) * len(tasks) figures = benchmark.analysis() assert len(figures) == len(benchmark.studies) assert type(figures[0]) is plotly.graph_objects.Figure benchmark = get_or_create_benchmark(name="bm001") figures = benchmark.analysis() assert len(figures) == len(benchmark.studies) assert type(figures[0]) is plotly.graph_objects.Figure
def test_simple(): """Test a end 2 end exucution of benchmark""" task_num = 2 max_trials = 10 assessments = [AverageResult(task_num), AverageRank(task_num)] tasks = [ Branin(max_trials), BirdLike(max_trials), ] benchmark = get_or_create_benchmark( name="bm001", algorithms=algorithms, targets=[{ "assess": assessments, "task": tasks }], ) benchmark.process() assert len(benchmark.studies) == len(assessments) * len(tasks) status = benchmark.status() experiments = benchmark.experiments() assert len(experiments ) == len(algorithms) * task_num * len(assessments) * len(tasks) assert len(status) == len(algorithms) * len(assessments) * len(tasks) figures = benchmark.analysis() assert_benchmark_figures(figures, 4, assessments, tasks) benchmark = get_or_create_benchmark(name="bm001") figures = benchmark.analysis() assert_benchmark_figures(figures, 4, assessments, tasks) benchmark.close()
def study(benchmark, benchmark_algorithms): """Return a study instance""" with benchmark.executor: yield Study( benchmark, benchmark_algorithms, AverageResult(2), RosenBrock(25, dim=3) )
def study(benchmark, benchmark_algorithms): """Return a study instance""" return Study( benchmark, benchmark_algorithms, AverageResult(2), RosenBrock(25, dim=3) )