def test_get_performance(self): bench = LubyBenchmark() env = bench.get_environment() wrapped = PerformanceTrackingWrapper(env) wrapped.reset() done = False while not done: _, _, done, _ = wrapped.step(1) wrapped2 = PerformanceTrackingWrapper(env, 2) wrapped2.reset() done = False while not done: _, _, done, _ = wrapped2.step(1) wrapped3 = PerformanceTrackingWrapper(env, 2, track_instance_performance=False) wrapped3.reset() for i in range(5): wrapped3.step(i) wrapped4 = PerformanceTrackingWrapper(env, track_instance_performance=False) wrapped4.reset() for i in range(5): wrapped4.step(i) overall, instance_performance = wrapped.get_performance() overall_perf, interval_perf, instance_perf = wrapped2.get_performance() overall_performance_only = wrapped4.get_performance() overall_performance, intervals = wrapped3.get_performance() self.assertTrue( np.array_equal( np.round(overall_performance, decimals=2), np.round(overall_performance_only, decimals=2), )) self.assertTrue( np.array_equal(np.round(overall_perf, decimals=2), np.round(overall, decimals=2))) self.assertTrue(len(instance_performance.keys()) == 1) self.assertTrue(len(list(instance_performance.values())[0]) == 1) self.assertTrue(len(instance_perf.keys()) == 1) self.assertTrue(len(list(instance_perf.values())[0]) == 1) self.assertTrue(len(intervals) == 1) self.assertTrue(len(intervals[0]) == 0) self.assertTrue(len(interval_perf) == 1) self.assertTrue(len(interval_perf[0]) == 1)
def run_optimal(results_path, benchmark_name, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() if benchmark_name == "LubyBenchmark": policy = optimal_luby elif benchmark_name == "SigmoidBenchmark": policy = optimal_sigmoid elif benchmark_name == "FastDownwardBenchmark": policy = optimal_fd elif benchmark_name == "CMAESBenchmark": policy = csa else: print("No comparison policy found for this benchmark") return for s in seeds: env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env) agent = GenericAgent(env, policy) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] filedir = results_path + "/" + benchmark_name + "/optimal" filename = f"{filedir}/seed_{s}.json" if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(results_path + "/" + benchmark_name): os.makedirs(results_path + "/" + benchmark_name) if not os.path.exists(filedir): os.makedirs(filedir) with open(filename, "w+") as fp: json.dump(performance, fp)
def run_dacbench(results_path, agent_method, num_episodes): """ Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result Parameters ------- results_path : str Path to where results should be saved agent_method : function Method that takes an env as input and returns an agent num_episodes : int Number of episodes to run for each benchmark """ if not os.path.exists(results_path): os.makedirs(results_path) for b in map(benchmarks.__dict__.get, benchmarks.__all__): overall = [] print(f"Evaluating {b.__name__}") for i in range(10): print(f"Seed {i}/10") bench = b() env = bench.get_benchmark(seed=i) env = PerformanceTrackingWrapper(env) agent = agent_method(env) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] overall.append(performance) print("\n") file_name = results_path + "/" + b.__name__ + ".json" with open(file_name, "w+") as fp: json.dump(overall, fp)
def run_static(results_path, benchmark_name, action, num_episodes, seeds=np.arange(10)): bench = getattr(benchmarks, benchmark_name)() for s in seeds: env = bench.get_benchmark(seed=s) env = PerformanceTrackingWrapper(env) agent = StaticAgent(env, action) run_benchmark(env, agent, num_episodes) performance = env.get_performance()[0] filedir = results_path + "/" + benchmark_name + "/static_" + str( action) filename = f"{filedir}/seed_{s}.json" if not os.path.exists(results_path): os.makedirs(results_path) if not os.path.exists(results_path + "/" + benchmark_name): os.makedirs(results_path + "/" + benchmark_name) if not os.path.exists(filedir): os.makedirs(filedir) with open(filename, "w+") as fp: json.dump(performance, fp)
else: bench.config[k] = config[k] return bench.get_environment() # Experiment configuration # Play 5D scenario action_values = (3, 3, 3, 3, 3) env_config = { "seed": 0, "action_values": action_values, "instance_set_path": "../instance_sets/sigmoid/sigmoid_5D3M_train.csv", } # Make environment # To track rewards we use our wrapper (this is only for simplicity) env = make_sigmoid(env_config) env = PerformanceTrackingWrapper(env) # Make simple PPO policy model = PPO2("MlpPolicy", env) # Run for 10 steps model.learn(total_timesteps=200) performance = env.get_performance()[0] for i in range(len(performance)): print( f"Episode {i+1}/{len(performance)}...........................................Reward: {performance[i]}" )