Example #1
0
def run_optimal(results_path,
                benchmark_name,
                num_episodes,
                seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    if benchmark_name == "LubyBenchmark":
        policy = optimal_luby
    elif benchmark_name == "SigmoidBenchmark":
        policy = optimal_sigmoid
    elif benchmark_name == "FastDownwardBenchmark":
        policy = optimal_fd
    elif benchmark_name == "CMAESBenchmark":
        policy = csa
    else:
        print("No comparison policy found for this benchmark")
        return

    for s in seeds:
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env)
        agent = GenericAgent(env, policy)
        run_benchmark(env, agent, num_episodes)
        performance = env.get_performance()[0]
        filedir = results_path + "/" + benchmark_name + "/optimal"
        filename = f"{filedir}/seed_{s}.json"

        if not os.path.exists(results_path):
            os.makedirs(results_path)
        if not os.path.exists(results_path + "/" + benchmark_name):
            os.makedirs(results_path + "/" + benchmark_name)
        if not os.path.exists(filedir):
            os.makedirs(filedir)

        with open(filename, "w+") as fp:
            json.dump(performance, fp)
Example #2
0
def run_static(results_path,
               benchmark_name,
               action,
               num_episodes,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env)
        agent = StaticAgent(env, action)
        run_benchmark(env, agent, num_episodes)
        performance = env.get_performance()[0]
        filedir = results_path + "/" + benchmark_name + "/static_" + str(
            action)
        filename = f"{filedir}/seed_{s}.json"

        if not os.path.exists(results_path):
            os.makedirs(results_path)
        if not os.path.exists(results_path + "/" + benchmark_name):
            os.makedirs(results_path + "/" + benchmark_name)
        if not os.path.exists(filedir):
            os.makedirs(filedir)

        with open(filename, "w+") as fp:
            json.dump(performance, fp)
Example #3
0
def run_dacbench(results_path, agent_method, num_episodes):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)

    for b in map(benchmarks.__dict__.get, benchmarks.__all__):
        overall = []
        print(f"Evaluating {b.__name__}")
        for i in range(10):
            print(f"Seed {i}/10")
            bench = b()
            env = bench.get_benchmark(seed=i)
            env = PerformanceTrackingWrapper(env)
            agent = agent_method(env)
            run_benchmark(env, agent, num_episodes)
            performance = env.get_performance()[0]
            overall.append(performance)
        print("\n")
        file_name = results_path + "/" + b.__name__ + ".json"
        with open(file_name, "w+") as fp:
            json.dump(overall, fp)
Example #4
0
    def test_init(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        self.assertTrue(len(wrapped.overall_performance) == 0)
        self.assertTrue(wrapped.performance_interval is None)
        wrapped.instance = [0]
        self.assertTrue(wrapped.instance[0] == 0)

        wrapped2 = PerformanceTrackingWrapper(env, 10)
        self.assertTrue(len(wrapped2.overall_performance) == 0)
        self.assertTrue(wrapped2.performance_interval == 10)
        self.assertTrue(len(wrapped2.performance_intervals) == 0)
        self.assertTrue(len(wrapped2.current_performance) == 0)
Example #5
0
def run_policy(results_path,
               benchmark_name,
               num_episodes,
               policy,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()

    for s in seeds:
        if benchmark_name == "CMAESBenchmark":
            experiment_name = f"csa_{s}"
        else:
            experiment_name = f"optimal_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)

        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = GenericAgent(env, policy)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Example #6
0
 def test_render(self, mock_plt):
     bench = LubyBenchmark()
     env = bench.get_environment()
     env = PerformanceTrackingWrapper(env)
     for _ in range(10):
         done = False
         env.reset()
         while not done:
             _, _, done, _ = env.step(1)
     env.render_performance()
     self.assertTrue(mock_plt.show.called)
     env.render_instance_performance()
     self.assertTrue(mock_plt.show.called)
Example #7
0
def make_benchmark(config):
    bench = getattr(benchmarks, config["benchmark"])()
    env = bench.get_benchmark(seed=config["seed"])
    if config["benchmark"] in ["SGDBenchmark", "CMAESBenchmark"]:
        env = ObservationWrapper(env)
    wrapped = PerformanceTrackingWrapper(env, logger=config["logger"])
    logger.set_env(wrapped)
    return wrapped
Example #8
0
def run_dacbench(results_path,
                 agent_method,
                 num_episodes,
                 bench=None,
                 seeds=None):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    bench
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    seeds : list[int]
        List of seeds to runs all benchmarks for. If None (default) seeds [1, ..., 10] are used.
    """

    if bench is None:
        bench = map(benchmarks.__dict__.get, benchmarks.__all__)
    else:
        bench = [getattr(benchmarks, b) for b in bench]

    seeds = seeds if seeds is not None else range(10)
    for b in bench:
        print(f"Evaluating {b.__name__}")
        for i in seeds:
            print(f"Seed {i}/10")
            bench = b()
            try:
                env = bench.get_benchmark(seed=i)
            except:
                continue

            logger = Logger(
                experiment_name=f"seed_{i}",
                output_path=Path(results_path) / f"{b.__name__}",
            )
            perf_logger = logger.add_module(PerformanceTrackingWrapper)
            logger.add_benchmark(bench)
            logger.set_env(env)

            env = PerformanceTrackingWrapper(env, logger=perf_logger)
            agent = agent_method(env)
            logger.add_agent(agent)

            run_benchmark(env, agent, num_episodes, logger)

            logger.close()
Example #9
0
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        if fixed > 1:
            experiment_name = f"random_fixed{fixed}_{s}"
        else:
            experiment_name = f"random_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = DynamicRandomAgent(env, fixed)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Example #10
0
def run_optimal(results_path,
                benchmark_name,
                num_episodes,
                seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    if benchmark_name == "LubyBenchmark":
        policy = optimal_luby
    elif benchmark_name == "SigmoidBenchmark":
        policy = optimal_sigmoid
    elif benchmark_name == "FastDownwardBenchmark":
        policy = optimal_fd
    elif benchmark_name == "CMAESBenchmark":
        policy = csa
    else:
        print("No comparison policy found for this benchmark")
        return

    for s in seeds:
        if benchmark_name == "CMAESBenchmark":
            experiment_name = f"csa_{s}"
        else:
            experiment_name = f"optimal_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)

        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = GenericAgent(env, policy)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Example #11
0
def run_dacbench(results_path, agent_method, num_episodes):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    """

    for b in map(benchmarks.__dict__.get, benchmarks.__all__):
        print(f"Evaluating {b.__name__}")
        for i in range(10):
            print(f"Seed {i}/10")
            bench = b()
            env = bench.get_benchmark(seed=i)

            logger = Logger(
                experiment_name=f"seed_{i}",
                output_path=Path(results_path) / f"{b.__name__}",
            )
            perf_logger = logger.add_module(PerformanceTrackingWrapper)
            logger.add_benchmark(bench)
            logger.set_env(env)
            logger.set_additional_info(seed=i)

            env = PerformanceTrackingWrapper(env, logger=perf_logger)
            agent = agent_method(env)
            logger.add_agent(agent)

            run_benchmark(env, agent, num_episodes, logger)

            logger.close()
Example #12
0
def run_static(results_path,
               benchmark_name,
               action,
               num_episodes,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        logger = Logger(
            experiment_name=f"static_{action}_{s}",
            output_path=results_path / benchmark_name,
        )
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = StaticAgent(env, action)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s, action=action)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Example #13
0
        else:
            bench.config[k] = config[k]
    return bench.get_environment()


# Experiment configuration
# Play 5D scenario
action_values = (3, 3, 3, 3, 3)
env_config = {
    "seed": 0,
    "action_values": action_values,
    "instance_set_path": "../instance_sets/sigmoid/sigmoid_5D3M_train.csv",
}

# Make environment
# To track rewards we use our wrapper (this is only for simplicity)
env = make_sigmoid(env_config)
env = PerformanceTrackingWrapper(env)

# Make simple PPO policy
model = PPO2("MlpPolicy", env)

# Run for 10 steps
model.learn(total_timesteps=200)

performance = env.get_performance()[0]
for i in range(len(performance)):
    print(
        f"Episode {i+1}/{len(performance)}...........................................Reward: {performance[i]}"
    )
Example #14
0
    def test_get_performance(self):
        bench = LubyBenchmark()
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped2 = PerformanceTrackingWrapper(env, 2)
        wrapped2.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped2.step(1)
        wrapped3 = PerformanceTrackingWrapper(env,
                                              2,
                                              track_instance_performance=False)
        wrapped3.reset()
        for i in range(5):
            wrapped3.step(i)
        wrapped4 = PerformanceTrackingWrapper(env,
                                              track_instance_performance=False)
        wrapped4.reset()
        for i in range(5):
            wrapped4.step(i)

        overall, instance_performance = wrapped.get_performance()
        overall_perf, interval_perf, instance_perf = wrapped2.get_performance()
        overall_performance_only = wrapped4.get_performance()
        overall_performance, intervals = wrapped3.get_performance()
        self.assertTrue(
            np.array_equal(
                np.round(overall_performance, decimals=2),
                np.round(overall_performance_only, decimals=2),
            ))

        self.assertTrue(
            np.array_equal(np.round(overall_perf, decimals=2),
                           np.round(overall, decimals=2)))

        self.assertTrue(len(instance_performance.keys()) == 1)
        self.assertTrue(len(list(instance_performance.values())[0]) == 1)
        self.assertTrue(len(instance_perf.keys()) == 1)
        self.assertTrue(len(list(instance_perf.values())[0]) == 1)

        self.assertTrue(len(intervals) == 1)
        self.assertTrue(len(intervals[0]) == 0)
        self.assertTrue(len(interval_perf) == 1)
        self.assertTrue(len(interval_perf[0]) == 1)
Example #15
0
    def test_step(self):
        bench = LubyBenchmark()
        bench.config.instance_set = {
            0: [0, 0],
            1: [1, 1],
            2: [3, 4],
            3: [5, 6]
        }
        env = bench.get_environment()
        wrapped = PerformanceTrackingWrapper(env, 2)

        state = wrapped.reset()
        self.assertTrue(len(state) > 1)

        state, reward, done, _ = wrapped.step(1)
        self.assertTrue(len(state) > 1)
        self.assertTrue(reward <= 0)
        self.assertFalse(done)

        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.overall_performance) == 1)
        self.assertTrue(len(wrapped.performance_intervals) == 0)
        self.assertTrue(len(wrapped.current_performance) == 1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 1)

        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)

        self.assertTrue(len(wrapped.performance_intervals) == 1)
        self.assertTrue(len(wrapped.current_performance) == 1)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 3)

        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        wrapped.reset()
        done = False
        while not done:
            _, _, done, _ = wrapped.step(1)
        self.assertTrue(len(wrapped.instance_performances.keys()) == 4)
Example #16
0
    logger = Logger(
        experiment_name="sigmoid_example",
        output_path=Path("plotting/data"),
        step_write_frequency=None,
        episode_write_frequency=None,
    )
    state_logger = logger.add_module(StateTrackingWrapper)
    performance_logger = logger.add_module(PerformanceTrackingWrapper)

    for s in seeds:
        # Log the seed
        logger.set_additional_info(seed=s)

        # Make & wrap benchmark environment
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env, logger=performance_logger)
        env = StateTrackingWrapper(env, logger=state_logger)

        # Add env to logger
        logger.set_env(env)

        # Run random agent
        agent = RandomAgent(env)
        run_benchmark(env, agent, num_episodes, logger)

    # Close logger object
    logger.close()

    # Load performance of last seed into pandas DataFrame
    logs = load_logs(performance_logger.get_logfile())
    dataframe = log2dataframe(logs, wide=True)
Example #17
0
from examples.example_utils import make_chainer_a3c
from dacbench.benchmarks import CMAESBenchmark

# Make logger object
logger = Logger(experiment_name="CMAESBenchmark",
                output_path=Path("../plotting/data"))

# Make CMA-ES environment
# We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al.
bench = CMAESBenchmark()
env = bench.get_benchmark()
logger.set_env(env)

# Wrap to track performance
performance_logger = logger.add_module(PerformanceTrackingWrapper)
env = PerformanceTrackingWrapper(env=env, logger=performance_logger)

# Also wrap to make the dictionary observations into an easy to work with list
env = ObservationWrapper(env)

# Make chainer agent
obs_size = env.observation_space.low.size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Training
num_episodes = 3
for i in range(num_episodes):
    # Reset environment to begin episode
    state = env.reset()