Ejemplo n.º 1
0
def train_chainer(
    agent, env, num_episodes=10, flatten_state=False, logger: Logger = None
):
    for i in range(num_episodes):
        state = env.reset()
        if flatten_state:
            state = np.array(flatten([state[k] for k in state.keys()]))
            state = state.astype(np.float32)
        done = False
        r = 0
        reward = 0
        while not done:
            action = agent.act_and_train(state, reward)
            next_state, reward, done, _ = env.step(action)
            r += reward
            if flatten_state:
                state = np.array(flatten([next_state[k] for k in next_state.keys()]))
                state = state.astype(np.float32)
            else:
                state = next_state
            if logger is not None:
                logger.next_step()
        agent.stop_episode_and_train(state, reward, done=done)
        if logger is not None:
            logger.next_episode()
        print(
            f"Episode {i}/{num_episodes}...........................................Reward: {r}"
        )
Ejemplo n.º 2
0
    def test_dict_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        episodes = 2
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_dict_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = CMAESBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        state_logger = logger.add_module(StateTrackingWrapper)
        wrapped = StateTrackingWrapper(env, logger=state_logger)
        agent = StaticAgent(env, 3.5)
        logger.set_env(env)

        run_benchmark(wrapped, agent, episodes, logger)
        state_logger.close()

        logs = load_logs(state_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=False)
        state_parts = {
            "Loc": 10,
            "Past Deltas": 40,
            "Population Size": 1,
            "Sigma": 1,
            "History Deltas": 80,
            "Past Sigma Deltas": 40,
        }

        names = dataframe.name.unique()

        def field(name: str):
            state, field_, *idx = name.split("_")
            return field_

        parts = groupby(sorted(names), key=field)

        for part, group_members in parts:
            expected_number = state_parts[part]
            actual_number = len(list(group_members))

            self.assertEqual(expected_number, actual_number)

        temp_dir.cleanup()
Ejemplo n.º 3
0
    def test_box_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        episodes = 10
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_box_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = LubyBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        state_logger = logger.add_module(StateTrackingWrapper)
        wrapped = StateTrackingWrapper(env, logger=state_logger)
        agent = StaticAgent(env, 1)
        logger.set_env(env)

        run_benchmark(wrapped, agent, episodes, logger)
        state_logger.close()

        logs = load_logs(state_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        sate_columns = [
            "state_Action t (current)",
            "state_Step t (current)",
            "state_Action t-1",
            "state_Action t-2",
            "state_Step t-1",
            "state_Step t-2",
        ]

        for state_column in sate_columns:
            self.assertTrue(state_column in dataframe.columns)
            self.assertTrue((~dataframe[state_column].isna()).all())

        temp_dir.cleanup()
Ejemplo n.º 4
0
    def test_logging(self):
        temp_dir = tempfile.TemporaryDirectory()

        episodes = 5
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_logging",
        )
        bench = LubyBenchmark()
        env = bench.get_environment()
        time_logger = logger.add_module(EpisodeTimeWrapper)
        wrapped = EpisodeTimeWrapper(env, logger=time_logger)
        agent = StaticAgent(env=env, action=1)
        run_benchmark(wrapped, agent, episodes, logger)

        logger.close()

        logs = load_logs(time_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        # all steps must have logged time
        self.assertTrue((~dataframe.step_duration.isna()).all())

        # each episode has a recored time
        episodes = dataframe.groupby("episode")
        last_steps_per_episode = dataframe.iloc[episodes.step.idxmax()]
        self.assertTrue(
            (~last_steps_per_episode.episode_duration.isna()).all())

        # episode time equals the sum of the steps in episode
        calculated_episode_times = episodes.step_duration.sum()
        recorded_episode_times = last_steps_per_episode.episode_duration
        self.assertListEqual(calculated_episode_times.tolist(),
                             recorded_episode_times.tolist())

        temp_dir.cleanup()
Ejemplo n.º 5
0
def run_optimal(results_path,
                benchmark_name,
                num_episodes,
                seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    if benchmark_name == "LubyBenchmark":
        policy = optimal_luby
    elif benchmark_name == "SigmoidBenchmark":
        policy = optimal_sigmoid
    elif benchmark_name == "FastDownwardBenchmark":
        policy = optimal_fd
    elif benchmark_name == "CMAESBenchmark":
        policy = csa
    else:
        print("No comparison policy found for this benchmark")
        return

    for s in seeds:
        if benchmark_name == "CMAESBenchmark":
            experiment_name = f"csa_{s}"
        else:
            experiment_name = f"optimal_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)

        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = GenericAgent(env, policy)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Ejemplo n.º 6
0
def run_static(results_path,
               benchmark_name,
               action,
               num_episodes,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        logger = Logger(
            experiment_name=f"static_{action}_{s}",
            output_path=results_path / benchmark_name,
        )
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = StaticAgent(env, action)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s, action=action)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Ejemplo n.º 7
0
def run_random(results_path, benchmark_name, num_episodes, seeds, fixed):
    bench = getattr(benchmarks, benchmark_name)()
    for s in seeds:
        if fixed > 1:
            experiment_name = f"random_fixed{fixed}_{s}"
        else:
            experiment_name = f"random_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = DynamicRandomAgent(env, fixed)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)
        logger.set_additional_info(seed=s)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Ejemplo n.º 8
0
    def test_logging_multi_discrete(self):
        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_multi_discrete_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = ModeaBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        env.seed_action_space(seed)
        action_logger = logger.add_module(ActionFrequencyWrapper)
        wrapped = ActionFrequencyWrapper(env, logger=action_logger)
        agent = RandomAgent(env)
        logger.set_env(env)

        run_benchmark(wrapped, agent, 1, logger)
        action_logger.close()

        logs = load_logs(action_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        expected_actions = pd.DataFrame({
            "action_0": {
                0: 0,
                1: 1,
                2: 0,
                3: 1,
                4: 1,
                5: 0,
                6: 1,
                7: 1,
                8: 0,
                9: 0,
                10: 0,
            },
            "action_1": {
                0: 1,
                1: 0,
                2: 1,
                3: 0,
                4: 0,
                5: 1,
                6: 0,
                7: 1,
                8: 0,
                9: 0,
                10: 1,
            },
            "action_10": {
                0: 0,
                1: 0,
                2: 1,
                3: 0,
                4: 0,
                5: 0,
                6: 0,
                7: 2,
                8: 1,
                9: 2,
                10: 1,
            },
            "action_2": {
                0: 1,
                1: 1,
                2: 1,
                3: 0,
                4: 1,
                5: 1,
                6: 1,
                7: 1,
                8: 0,
                9: 0,
                10: 1,
            },
            "action_3": {
                0: 0,
                1: 1,
                2: 1,
                3: 1,
                4: 1,
                5: 1,
                6: 1,
                7: 0,
                8: 0,
                9: 1,
                10: 1,
            },
            "action_4": {
                0: 0,
                1: 1,
                2: 1,
                3: 0,
                4: 1,
                5: 0,
                6: 0,
                7: 1,
                8: 0,
                9: 1,
                10: 0,
            },
            "action_5": {
                0: 1,
                1: 0,
                2: 0,
                3: 0,
                4: 1,
                5: 1,
                6: 1,
                7: 0,
                8: 0,
                9: 0,
                10: 1,
            },
            "action_6": {
                0: 0,
                1: 1,
                2: 1,
                3: 0,
                4: 0,
                5: 0,
                6: 0,
                7: 0,
                8: 1,
                9: 0,
                10: 0,
            },
            "action_7": {
                0: 1,
                1: 0,
                2: 0,
                3: 0,
                4: 0,
                5: 0,
                6: 0,
                7: 1,
                8: 1,
                9: 1,
                10: 0,
            },
            "action_8": {
                0: 0,
                1: 1,
                2: 0,
                3: 1,
                4: 1,
                5: 1,
                6: 0,
                7: 1,
                8: 0,
                9: 0,
                10: 1,
            },
            "action_9": {
                0: 1,
                1: 2,
                2: 1,
                3: 0,
                4: 0,
                5: 1,
                6: 1,
                7: 1,
                8: 2,
                9: 0,
                10: 2,
            },
        })

        for column in expected_actions.columns:
            # todo: seems to be an bug here. Every so ofter the last action is missing.
            # Double checked not a logging problem. Could be a seeding issue
            self.assertListEqual(
                dataframe[column].to_list()[:10],
                expected_actions[column].to_list()[:10],
                f"Column  {column}",
            )

        temp_dir.cleanup()
Ejemplo n.º 9
0
    def test_logging_discrete(self):

        temp_dir = tempfile.TemporaryDirectory()

        seed = 0
        logger = Logger(
            output_path=Path(temp_dir.name),
            experiment_name="test_discrete_logging",
            step_write_frequency=None,
            episode_write_frequency=1,
        )

        bench = LubyBenchmark()
        bench.set_seed(seed)
        env = bench.get_environment()
        env.seed_action_space(seed)

        action_logger = logger.add_module(ActionFrequencyWrapper)
        wrapped = ActionFrequencyWrapper(env, logger=action_logger)
        agent = RandomAgent(env)
        logger.set_env(env)

        run_benchmark(wrapped, agent, 10, logger)
        action_logger.close()

        logs = load_logs(action_logger.get_logfile())
        dataframe = log2dataframe(logs, wide=True)

        expected_actions = [
            0,
            3,
            5,
            4,
            3,
            5,
            5,
            5,
            3,
            3,
            2,
            1,
            0,
            1,
            2,
            0,
            1,
            1,
            0,
            1,
            2,
            4,
            3,
            0,
            1,
            3,
            0,
            3,
            3,
            3,
            4,
            4,
            4,
            5,
            4,
            0,
            4,
            2,
            1,
            3,
            4,
            2,
            1,
            3,
            3,
            2,
            0,
            5,
            2,
            5,
            2,
            1,
            5,
            3,
            2,
            5,
            1,
            0,
            2,
            3,
            1,
            3,
            2,
            3,
            2,
            4,
            3,
            4,
            0,
            5,
            5,
            1,
            5,
            0,
            1,
            5,
            5,
            3,
            3,
            2,
        ]

        self.assertListEqual(dataframe.action.to_list(), expected_actions)

        temp_dir.cleanup()
Ejemplo n.º 10
0
parser.add_argument("--benchmarks",
                    nargs="+",
                    type=str,
                    default=None,
                    help="Benchmarks to run PPO for")
parser.add_argument("--timesteps",
                    type=int,
                    default=1000000,
                    help="Number of timesteps to run")
parser.add_argument(
    "--seeds",
    nargs="+",
    type=int,
    default=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
    help="Seeds for evaluation",
)
args = parser.parse_args()

for b in args.benchmarks:
    for s in args.seeds:
        logger = Logger(experiment_name=f"PPO_{b}_s{s}",
                        output_path=Path(args.outdir))
        perf_logger = logger.add_module(PerformanceTrackingWrapper)
        logger.set_additional_info(seed=s)
        config = {"seed": s, "logger": perf_logger, "benchmark": b}
        env = make_benchmark(config)
        model = PPO2("MlpPolicy", env)
        logging = LoggerCallback(logger)
        model.learn(total_timesteps=args.timesteps, callback=logging)
        logger.close()
Ejemplo n.º 11
0
def run_policy(results_path,
               benchmark_name,
               num_episodes,
               policy,
               seeds=np.arange(10)):
    bench = getattr(benchmarks, benchmark_name)()

    for s in seeds:
        if benchmark_name == "CMAESBenchmark":
            experiment_name = f"csa_{s}"
        else:
            experiment_name = f"optimal_{s}"
        logger = Logger(experiment_name=experiment_name,
                        output_path=results_path / benchmark_name)

        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(
            env, logger=logger.add_module(PerformanceTrackingWrapper))
        agent = GenericAgent(env, policy)

        logger.add_agent(agent)
        logger.add_benchmark(bench)
        logger.set_env(env)

        run_benchmark(env, agent, num_episodes, logger)

        logger.close()
Ejemplo n.º 12
0
from pathlib import Path
from dacbench.logger import Logger
from dacbench.wrappers import PerformanceTrackingWrapper, ObservationWrapper
from examples.example_utils import make_chainer_a3c
from dacbench.benchmarks import CMAESBenchmark

# Make logger object
logger = Logger(experiment_name="CMAESBenchmark",
                output_path=Path("../plotting/data"))

# Make CMA-ES environment
# We use the configuration from the "Learning to Optimize Step-size Adaption in CMA-ES" Paper by Shala et al.
bench = CMAESBenchmark()
env = bench.get_benchmark()
logger.set_env(env)

# Wrap to track performance
performance_logger = logger.add_module(PerformanceTrackingWrapper)
env = PerformanceTrackingWrapper(env=env, logger=performance_logger)

# Also wrap to make the dictionary observations into an easy to work with list
env = ObservationWrapper(env)

# Make chainer agent
obs_size = env.observation_space.low.size
action_size = env.action_space.low.size
agent = make_chainer_a3c(obs_size, action_size)

# Training
num_episodes = 3
for i in range(num_episodes):
Ejemplo n.º 13
0
import matplotlib.pyplot as plt

# Run an experiment and log the results
if __name__ == "__main__":

    # Make benchmark
    bench = SigmoidBenchmark()

    # Run for 10 episodes each on 10 seeds
    num_episodes = 10
    seeds = range(10)

    # Make logger object and add modules for performance & state logging
    logger = Logger(
        experiment_name="sigmoid_example",
        output_path=Path("plotting/data"),
        step_write_frequency=None,
        episode_write_frequency=None,
    )
    state_logger = logger.add_module(StateTrackingWrapper)
    performance_logger = logger.add_module(PerformanceTrackingWrapper)

    for s in seeds:
        # Log the seed
        logger.set_additional_info(seed=s)

        # Make & wrap benchmark environment
        env = bench.get_benchmark(seed=s)
        env = PerformanceTrackingWrapper(env, logger=performance_logger)
        env = StateTrackingWrapper(env, logger=state_logger)

        # Add env to logger
Ejemplo n.º 14
0
    def setUp(self) -> None:
        self.temp_dir = tempfile.TemporaryDirectory()

        episodes = 80
        seeds = [0, 1, 3, 4, 5]
        experiment_name = "test_env"
        logger = Logger(
            output_path=Path(self.temp_dir.name),
            experiment_name=experiment_name,
            step_write_frequency=None,
            episode_write_frequency=None,
        )

        benchmark = SigmoidBenchmark()
        env = benchmark.get_benchmark()
        agent = RandomAgent(env)
        logger.set_env(env)

        env_logger = logger.add_module(env)
        for seed in seeds:
            env.seed(seed)
            logger.set_additional_info(seed=seed)
            logger.reset_episode()

            for episode in range(episodes):
                state = env.reset()
                done = False
                reward = 0
                step = 0
                while not done:
                    action = agent.act(state, reward)
                    env_logger.log(
                        "logged_step",
                        step,
                    )
                    env_logger.log(
                        "logged_episode",
                        episode,
                    )
                    next_state, reward, done, _ = env.step(action)
                    env_logger.log(
                        "reward",
                        reward,
                    )
                    env_logger.log(
                        "done",
                        done,
                    )
                    agent.train(next_state, reward)
                    state = next_state
                    logger.next_step()

                    step += 1
                agent.end_episode(state, reward)
                logger.next_episode()

        env.close()
        logger.close()

        self.log_file = env_logger.log_file.name
Ejemplo n.º 15
0
def run_dacbench(results_path, agent_method, num_episodes):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    """

    for b in map(benchmarks.__dict__.get, benchmarks.__all__):
        print(f"Evaluating {b.__name__}")
        for i in range(10):
            print(f"Seed {i}/10")
            bench = b()
            env = bench.get_benchmark(seed=i)

            logger = Logger(
                experiment_name=f"seed_{i}",
                output_path=Path(results_path) / f"{b.__name__}",
            )
            perf_logger = logger.add_module(PerformanceTrackingWrapper)
            logger.add_benchmark(bench)
            logger.set_env(env)
            logger.set_additional_info(seed=i)

            env = PerformanceTrackingWrapper(env, logger=perf_logger)
            agent = agent_method(env)
            logger.add_agent(agent)

            run_benchmark(env, agent, num_episodes, logger)

            logger.close()
Ejemplo n.º 16
0
from pathlib import Path

from dacbench.agents import RandomAgent
from dacbench.logger import Logger
from dacbench.runner import run_benchmark
from dacbench.benchmarks.modcma_benchmark import ModCMABenchmark
from dacbench.wrappers import ActionFrequencyWrapper

if __name__ == "__main__":
    bench = ModCMABenchmark()
    env = bench.get_environment()

    # Make logger object
    logger = Logger(experiment_name=type(bench).__name__,
                    output_path=Path("../plotting/data"))
    logger.set_env(env)
    logger.add_benchmark(bench)

    # Wrap environment to track action frequency
    env = ActionFrequencyWrapper(
        env, logger=logger.add_module(ActionFrequencyWrapper))

    # Run random agent for 5 episodes and log actions to file
    agent = RandomAgent(env)
    run_benchmark(env, agent, 5, logger=logger)
Ejemplo n.º 17
0
from pathlib import Path

from dacbench.agents import RandomAgent
from dacbench.logger import Logger
from dacbench.runner import run_benchmark
from dacbench.benchmarks import CMAESBenchmark
from dacbench.wrappers import StateTrackingWrapper

# Make CMAESBenchmark environment
bench = CMAESBenchmark()
env = bench.get_environment()

# Make Logger object to track state information
logger = Logger(experiment_name=type(bench).__name__,
                output_path=Path("../plotting/data"))
logger.set_env(env)

# Wrap env with StateTrackingWrapper
env = StateTrackingWrapper(env, logger=logger.add_module(StateTrackingWrapper))

# Run random agent for 5 episodes and log state information to file
# You can plot these results with the plotting examples
agent = RandomAgent(env)
run_benchmark(env, agent, 5, logger=logger)
logger.close()
Ejemplo n.º 18
0
def run_dacbench(results_path,
                 agent_method,
                 num_episodes,
                 bench=None,
                 seeds=None):
    """
    Run all benchmarks for 10 seeds for a given number of episodes with a given agent and save result

    Parameters
    -------
    bench
    results_path : str
        Path to where results should be saved
    agent_method : function
        Method that takes an env as input and returns an agent
    num_episodes : int
        Number of episodes to run for each benchmark
    seeds : list[int]
        List of seeds to runs all benchmarks for. If None (default) seeds [1, ..., 10] are used.
    """

    if bench is None:
        bench = map(benchmarks.__dict__.get, benchmarks.__all__)
    else:
        bench = [getattr(benchmarks, b) for b in bench]

    seeds = seeds if seeds is not None else range(10)
    for b in bench:
        print(f"Evaluating {b.__name__}")
        for i in seeds:
            print(f"Seed {i}/10")
            bench = b()
            try:
                env = bench.get_benchmark(seed=i)
            except:
                continue

            logger = Logger(
                experiment_name=f"seed_{i}",
                output_path=Path(results_path) / f"{b.__name__}",
            )
            perf_logger = logger.add_module(PerformanceTrackingWrapper)
            logger.add_benchmark(bench)
            logger.set_env(env)

            env = PerformanceTrackingWrapper(env, logger=perf_logger)
            agent = agent_method(env)
            logger.add_agent(agent)

            run_benchmark(env, agent, num_episodes, logger)

            logger.close()