Example #1
0
    def test_get_performance(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped2 = PerformanceTrackingWrapper(env, 2)
        wrapped2.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped2.step(1)
        wrapped3 = PerformanceTrackingWrapper(env,
                                              2,
                                              track_instance_performance=False)
        wrapped3.reset()
        for i in range(5):
            wrapped3.step(i)
        wrapped4 = PerformanceTrackingWrapper(env,
                                              track_instance_performance=False)
        wrapped4.reset()
        for i in range(5):
            wrapped4.step(i)

        overall, instance_performance = wrapped.get_performance()
        overall_perf, interval_perf, instance_perf = wrapped2.get_performance()
        overall_performance_only = wrapped4.get_performance()
        overall_performance, intervals = wrapped3.get_performance()
        self.assertTrue(
            np.array_equal(
                np.round(overall_performance, decimals=2),
                np.round(overall_performance_only, decimals=2),
            ))

        self.assertTrue(
            np.array_equal(np.round(overall_perf, decimals=2),
                           np.round(overall, decimals=2)))

        self.assertTrue(len(instance_performance.keys()) == 1)
        self.assertTrue(len(list(instance_performance.values())[0]) == 1)
        self.assertTrue(len(instance_perf.keys()) == 1)
        self.assertTrue(len(list(instance_perf.values())[0]) == 1)

        self.assertTrue(len(intervals) == 1)
        self.assertTrue(len(intervals[0]) == 0)
        self.assertTrue(len(interval_perf) == 1)
        self.assertTrue(len(interval_perf[0]) == 1)
Example #2
0
def run_optimal(results_path,
                benchmark_name,
                num_episodes,
                seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    if benchmark_name == "LubyBenchmark":
        policy = optimal_luby
    elif benchmark_name == "SigmoidBenchmark":
        policy = optimal_sigmoid
    elif benchmark_name == "FastDownwardBenchmark":
        policy = optimal_fd
    elif benchmark_name == "CMAESBenchmark":
        policy = csa
    else:
        print("No comparison policy found for this benchmark")
        return

    for s in seeds:
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env)
        agent = GenericAgent(env, policy)
        run_benchmark(env, agent, num_episodes)
        performance = env.get_performance()[0]
        filedir = results_path + "/" + benchmark_name + "/optimal"
        filename = f"{filedir}/seed_{s}.json"

        if not os.path.exists(results_path):
            os.makedirs(results_path)
        if not os.path.exists(results_path + "/" + benchmark_name):
            os.makedirs(results_path + "/" + benchmark_name)
        if not os.path.exists(filedir):
            os.makedirs(filedir)

        with open(filename, "w+") as fp:
            json.dump(performance, fp)
Example #3
0
def run_dacbench(results_path, agent_method, num_episodes):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)

    for b in map(benchmarks.__dict__.get, benchmarks.__all__):
        overall = []
        print(f"Evaluating {b.__name__}")
        for i in range(10):
            print(f"Seed {i}/10")
            bench = b()
            env = bench.get_benchmark(seed=i)
            env = PerformanceTrackingWrapper(env)
            agent = agent_method(env)
            run_benchmark(env, agent, num_episodes)
            performance = env.get_performance()[0]
            overall.append(performance)
        print("\n")
        file_name = results_path + "/" + b.__name__ + ".json"
        with open(file_name, "w+") as fp:
            json.dump(overall, fp)
Example #4
0
def run_static(results_path,
               benchmark_name,
               action,
               num_episodes,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env)
        agent = StaticAgent(env, action)
        run_benchmark(env, agent, num_episodes)
        performance = env.get_performance()[0]
        filedir = results_path + "/" + benchmark_name + "/static_" + str(
            action)
        filename = f"{filedir}/seed_{s}.json"

        if not os.path.exists(results_path):
            os.makedirs(results_path)
        if not os.path.exists(results_path + "/" + benchmark_name):
            os.makedirs(results_path + "/" + benchmark_name)
        if not os.path.exists(filedir):
            os.makedirs(filedir)

        with open(filename, "w+") as fp:
            json.dump(performance, fp)
Example #5
0
        else:
            bench.config[k] = config[k]
    return bench.get_environment()


# Experiment configuration
# Play 5D scenario
action_values = (3, 3, 3, 3, 3)
env_config = {
    "seed": 0,
    "action_values": action_values,
    "instance_set_path": "../instance_sets/sigmoid/sigmoid_5D3M_train.csv",
}

# Make environment
# To track rewards we use our wrapper (this is only for simplicity)
env = make_sigmoid(env_config)
env = PerformanceTrackingWrapper(env)

# Make simple PPO policy
model = PPO2("MlpPolicy", env)

# Run for 10 steps
model.learn(total_timesteps=200)

performance = env.get_performance()[0]
for i in range(len(performance)):
    print(
        f"Episode {i+1}/{len(performance)}...........................................Reward: {performance[i]}"
    )