コード例 #1
0
ファイル: check_agent.py プロジェクト: omardrwch/rlberry
def _fit_agent_manager(agent, env="continuous_state", init_kwargs=None):
    """
    Check that the agent is compatible with :class:`~rlberry.manager.AgentManager`.

    Parameters
    ----------
    agent: rlberry agent module
        Agent class to test.
    env: tuple (env_ctor, env_kwargs) or str in {"continuous_state", "discrete_state"}, default="continuous_state"
        if tuple, env is the constructor and keywords of the env on which to test.
        if str in {"continuous_state", "discrete_state"}, we use a default Benchmark environment.
    init_kwargs : dict
        Arguments required by the agent's constructor.
    """
    if init_kwargs is None:
        init_kwargs = {}

    train_env = _make_env(env)
    try:
        agent = AgentManager(
            agent, train_env, fit_budget=5, n_fit=1, seed=SEED, init_kwargs=init_kwargs
        )
        agent.fit()
    except Exception as exc:
        raise RuntimeError("Agent not compatible with Agent Manager") from exc

    return agent
コード例 #2
0
def test_agent_manager_and_multiple_managers_seeding(env, agent_class):
    agent_manager = AgentManager(agent_class,
                                 env,
                                 fit_budget=2,
                                 init_kwargs={"horizon": 10},
                                 n_fit=6,
                                 seed=3456)
    agent_manager_test = AgentManager(agent_class,
                                      env,
                                      fit_budget=2,
                                      init_kwargs={"horizon": 10},
                                      n_fit=6,
                                      seed=3456)

    multimanagers = MultipleManagers()
    multimanagers.append(agent_manager)
    multimanagers.append(agent_manager_test)
    multimanagers.run()

    stats1, stats2 = multimanagers.managers

    for ii in range(2, agent_manager.n_fit):
        traj1 = get_env_trajectory(stats1.agent_handlers[ii - 2].env,
                                   horizon=10)
        traj2 = get_env_trajectory(stats1.agent_handlers[ii - 1].env,
                                   horizon=10)
        traj3 = get_env_trajectory(stats1.agent_handlers[ii].env, horizon=10)

        traj1_test = get_env_trajectory(stats2.agent_handlers[ii - 2].env,
                                        horizon=10)
        traj2_test = get_env_trajectory(stats2.agent_handlers[ii - 1].env,
                                        horizon=10)
        traj3_test = get_env_trajectory(stats2.agent_handlers[ii].env,
                                        horizon=10)

        assert not compare_trajectories(traj1, traj2)
        assert not compare_trajectories(traj1, traj3)
        assert not compare_trajectories(traj2, traj3)
        assert compare_trajectories(traj1, traj1_test)
        assert compare_trajectories(traj2, traj2_test)
        assert compare_trajectories(traj3, traj3_test)

    for ii in range(2, agent_manager.n_fit):
        rand1 = stats1.agent_handlers[ii - 2].seeder.rng.integers(2**32)
        rand2 = stats1.agent_handlers[ii - 1].seeder.rng.integers(2**32)
        rand3 = stats1.agent_handlers[ii].seeder.rng.integers(2**32)

        rand1_test = stats2.agent_handlers[ii - 2].seeder.rng.integers(2**32)
        rand2_test = stats2.agent_handlers[ii - 1].seeder.rng.integers(2**32)
        rand3_test = stats2.agent_handlers[ii].seeder.rng.integers(2**32)

        assert rand1 != rand2
        assert rand1 != rand3
        assert rand2 != rand3
        assert rand1 == rand1_test
        assert rand2 == rand2_test
        assert rand3 == rand3_test

    stats1.clear_output_dir()
    stats2.clear_output_dir()
コード例 #3
0
def check_bandit_agent(Agent, environment=BernoulliBandit, seed=42):
    """
    Function used to check a bandit agent in rlberry on a Gaussian bandit problem.

    Parameters
    ----------
    Agent: rlberry agent module
        Agent class that we want to test.

    environment: rlberry env module
        Environment (i.e bandit instance) on which to test the agent.

    seed : Seed sequence from which to spawn the random number generator.


    Returns
    -------
    result : bool
        Whether the agent is a valid/compatible bandit agent.

    Examples
    --------
    >>> from rlberry.agents.bandits import IndexAgent
    >>> from rlberry.utils import check_bandit_agent
    >>> import numpy as np
    >>> class UCBAgent(IndexAgent):
    >>>     name = "UCB"
    >>>     def __init__(self, env, **kwargs):
    >>>         def index(r, t):
    >>>             return np.mean(r) + np.sqrt(np.log(t**2) / (2 * len(r)))
    >>>         IndexAgent.__init__(self, env, index, **kwargs)
    >>> check_bandit_agent(UCBAgent)
    True

    """
    env_ctor = environment
    env_kwargs = {}

    agent1 = AgentManager(Agent, (env_ctor, env_kwargs),
                          fit_budget=10,
                          n_fit=1,
                          seed=seed)
    agent2 = AgentManager(Agent, (env_ctor, env_kwargs),
                          fit_budget=10,
                          n_fit=1,
                          seed=seed)

    agent1.fit()
    agent2.fit()
    env = env_ctor(**env_kwargs)
    state = env.reset()
    result = True
    for _ in range(5):
        # test reproducibility on 5 actions
        action1 = agent1.agent_handlers[0].policy(state)
        action2 = agent2.agent_handlers[0].policy(state)
        if action1 != action2:
            result = False

    return result
コード例 #4
0
def test_recursive_vs_not_recursive():
    env_ctor = NormalBandit
    env_kwargs = {}

    agent1 = AgentManager(UCBAgent, (env_ctor, env_kwargs),
                          fit_budget=10,
                          n_fit=1,
                          seed=TEST_SEED)

    agent2 = AgentManager(
        RecursiveUCBAgent,
        (env_ctor, env_kwargs),
        fit_budget=10,
        n_fit=1,
        seed=TEST_SEED,
    )

    agent1.fit()
    agent2.fit()
    env = env_ctor(**env_kwargs)
    state = env.reset()
    for _ in range(5):
        # test reproducibility on 5 actions
        action1 = agent1.agent_handlers[0].policy(state)
        action2 = agent2.agent_handlers[0].policy(state)
        assert action1 == action2
コード例 #5
0
def test_equality():
    # Define train and evaluation envs
    train_env = (GridWorld, {})

    # Parameters
    params = dict(hyperparameter1=-1, hyperparameter2=100)
    eval_kwargs = dict(eval_horizon=10)

    # Run AgentManager
    params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)]
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )

    stats_agent2 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )

    stats_agent3 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=42,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )

    assert stats_agent1 == stats_agent2
    assert stats_agent1 != stats_agent3
コード例 #6
0
def _create_and_fit_agent_manager(output_dir, outdir_id_style):
    env_ctor = GridWorld
    env_kwargs = dict(nrows=2, ncols=2, reward_at={(1, 1): 0.1, (2, 2): 1.0})

    manager = AgentManager(
        VIAgent,
        (env_ctor, env_kwargs),
        fit_budget=10,
        n_fit=3,
        output_dir=output_dir,
        outdir_id_style=outdir_id_style,
    )
    manager.fit()
    manager.save()
    return manager
コード例 #7
0
ファイル: check_agent.py プロジェクト: omardrwch/rlberry
def check_fit_additive(agent, env="continuous_state", init_kwargs=None):
    """
    Check that fitting two times with 10 fit budget is the same as fitting
    one time with 20 fit budget.

    Parameters
    ----------
    agent: rlberry agent module
        Agent class to test.
    env: tuple (env_ctor, env_kwargs) or str in ["continuous_state", "discrete_state"], default="continuous_state"
        if tuple, env is the constructor and keywords of the env on which to test.
        if str in ["continuous_state", "discrete_state"], we use a default Benchmark environment.
    init_kwargs : dict
        Arguments required by the agent's constructor.
    """
    if init_kwargs is None:
        init_kwargs = {}
    train_env = _make_env(env)

    agent1 = AgentManager(
        agent, train_env, fit_budget=5, n_fit=1, seed=SEED, init_kwargs=init_kwargs
    )
    agent1.fit(3)
    agent1.fit(3)

    agent2 = AgentManager(
        agent, train_env, fit_budget=5, n_fit=1, seed=SEED, init_kwargs=init_kwargs
    )
    agent2.fit(6)

    result = check_agents_almost_equal(
        agent1.agent_handlers[0], agent2.agent_handlers[0]
    )

    assert (
        result
    ), "Error: fitting the agent two times for 10 steps is not equivalent to fitting it one time for 20 steps."
コード例 #8
0
def test_version():
    # Define train and evaluation envs
    train_env = (GridWorld, {})

    # Parameters
    params = dict(hyperparameter1=-1, hyperparameter2=100)
    eval_kwargs = dict(eval_horizon=10)

    # Run AgentManager
    params_per_instance = [dict(hyperparameter2=ii) for ii in range(4)]
    stats_agent1 = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=5,
        eval_kwargs=eval_kwargs,
        init_kwargs=params,
        n_fit=4,
        seed=123,
        init_kwargs_per_instance=params_per_instance,
    )
    version = stats_agent1.rlberry_version
    assert (version is not None) and (len(version) > 0)
コード例 #9
0
def test_hyperparam_optim_cmaes():
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with CMA-ES sampler
    stats_agent.optimize_hyperparams(sampler_method="cmaes", n_trials=5)
    stats_agent.clear_output_dir()
コード例 #10
0
ファイル: generator.py プロジェクト: omardrwch/rlberry
def experiment_generator():
    """
    Parse command line arguments and yields AgentManager instances.
    """
    args = docopt(__doc__)
    max_workers = int(args["--max_workers"])
    if max_workers == -1:
        max_workers = None
    for (_, agent_manager_kwargs) in parse_experiment_config(
            Path(args["<experiment_path>"]),
            n_fit=int(args["--n_fit"]),
            max_workers=max_workers,
            output_base_dir=args["--output_dir"],
            parallelization=args["--parallelization"],
    ):
        if args["--enable_tensorboard"]:
            if check_packages.TENSORBOARD_INSTALLED:
                agent_manager_kwargs.update(dict(enable_tensorboard=True))
            else:
                logger.warning(
                    "Option --enable_tensorboard is not available: tensorboard is not installed."
                )

        yield AgentManager(**agent_manager_kwargs)
コード例 #11
0
def test_discount_optimization():
    class ValueIterationAgentToOptimize(ValueIterationAgent):
        @classmethod
        def sample_parameters(cls, trial):
            """
            Sample hyperparameters for hyperparam optimization using Optuna (https://optuna.org/)
            """
            gamma = trial.suggest_categorical("gamma", [0.1, 0.99])
            return {"gamma": gamma}

    env = (
        GridWorld,
        dict(
            nrows=3,
            ncols=10,
            reward_at={
                (1, 1): 0.1,
                (2, 9): 1.0
            },
            walls=((1, 4), (2, 4), (1, 5)),
            success_probability=0.9,
        ),
    )

    vi_params = {"gamma": 0.1, "epsilon": 1e-3}

    vi_stats = AgentManager(
        ValueIterationAgentToOptimize,
        env,
        fit_budget=0,
        eval_kwargs=dict(eval_horizon=20),
        init_kwargs=vi_params,
        n_fit=4,
        seed=123,
    )

    vi_stats.optimize_hyperparams(n_trials=5,
                                  n_fit=1,
                                  sampler_method="random",
                                  pruner_method="none")

    assert vi_stats.optuna_study
    vi_stats.clear_output_dir()
コード例 #12
0
def test_hyperparam_optim_tpe():
    # Define trainenv
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        fit_budget=1,
        init_kwargs={},
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with TPE sampler
    # using hyperopt default values
    sampler_kwargs = TPESampler.hyperopt_parameters()
    stats_agent.optimize_hyperparams(sampler_kwargs=sampler_kwargs, n_trials=5)
    stats_agent.clear_output_dir()
コード例 #13
0
def test_hyperparam_optim_random(parallelization):
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
        parallelization=parallelization,
    )

    # test hyperparameter optimization with random sampler
    stats_agent.optimize_hyperparams(sampler_method="random",
                                     n_trials=5,
                                     optuna_parallelization=parallelization)
    stats_agent.clear_output_dir()
コード例 #14
0
def test_jax_dqn(lambda_):
    if not _IMPORT_SUCCESSFUL:
        return

    env = (gym_make, dict(id="CartPole-v0"))
    params = dict(
        chunk_size=4, batch_size=128, target_update_interval=5, lambda_=lambda_
    )

    stats = AgentManager(
        DQNAgent,
        env,
        fit_budget=20,
        eval_env=env,
        init_kwargs=params,
        n_fit=1,
        parallelization="thread",
    )
    stats.fit()
    stats.clear_output_dir()
コード例 #15
0
ファイル: check_agent.py プロジェクト: omardrwch/rlberry
def check_save_load(agent, env="continuous_state", init_kwargs=None):
    """
    Check that the agent save a non-empty file and can load.

    Parameters
    ----------
    agent: rlberry agent module
        Agent class to test.
    env: tuple (env_ctor, env_kwargs) or str in {"continuous_state", "discrete_state"}, default="continuous_state"
        if tuple, env is the constructor and keywords of the env on which to test.
        if str in {"continuous_state", "discrete_state"}, we use a default Benchmark environment.
    init_kwargs : dict
        Arguments required by the agent's constructor.
    """
    if init_kwargs is None:
        init_kwargs = {}

    train_env = _make_env(env)
    env = train_env[0](**train_env[1])
    with tempfile.TemporaryDirectory() as tmpdirname:
        agent = AgentManager(
            agent,
            train_env,
            fit_budget=5,
            n_fit=1,
            seed=SEED,
            init_kwargs=init_kwargs,
            output_dir=tmpdirname,
        )
        agent.fit(3)
        assert (
            os.path.getsize(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle") > 1
        ), "The saved file is empty."
        try:
            agent.load(str(agent.output_dir_) + "/agent_handlers/idx_0.pickle")
        except Exception:
            raise RuntimeError("Failed to load the agent file.")
コード例 #16
0
def test_hyperparam_optim_grid():
    # Define train env
    train_env = (GridWorld, {})

    # Run AgentManager
    stats_agent = AgentManager(
        DummyAgent,
        train_env,
        init_kwargs={},
        fit_budget=1,
        eval_kwargs={"eval_horizon": 5},
        n_fit=4,
    )

    # test hyperparameter optimization with grid sampler
    search_space = {
        "hyperparameter1": [1, 2, 3],
        "hyperparameter2": [-5, 0, 5]
    }
    sampler_kwargs = {"search_space": search_space}
    stats_agent.optimize_hyperparams(n_trials=3 * 3,
                                     sampler_method="grid",
                                     sampler_kwargs=sampler_kwargs)
    stats_agent.clear_output_dir()
コード例 #17
0
    HORIZON = 50
    BONUS_SCALE_FACTOR = 0.1
    MIN_DIST = 0.1

    params_ppo = {"gamma": GAMMA, "horizon": HORIZON, "learning_rate": 0.0003}

    eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

    # -------------------------------
    # Run AgentManager and save results
    # --------------------------------
    ppo_stats = AgentManager(
        PPOAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_ppo,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        output_dir="dev/",
        parallelization="process",
    )
    ppo_stats.fit()  # fit the 4 agents
    ppo_stats_fname = ppo_stats.save()
    del ppo_stats

    # -------------------------------
    # Load and plot results
    # --------------------------------
    ppo_stats = AgentManager.load(ppo_stats_fname)

    # learning curves
    plot_writer_data(
コード例 #18
0
    # -----------------------------
    N_EPISODES = 100
    GAMMA = 0.99
    HORIZON = 50

    params_ppo = {"gamma": GAMMA, "horizon": HORIZON, "learning_rate": 0.0003}

    eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

    # -----------------------------
    # Run AgentManager
    # -----------------------------
    ppo_stats = AgentManager(
        PPOAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_ppo,
        eval_kwargs=eval_kwargs,
        n_fit=4,
    )

    ppo_stats.set_writer(0,
                         SummaryWriter,
                         writer_kwargs={"comment": "worker_0"})
    ppo_stats.set_writer(1,
                         SummaryWriter,
                         writer_kwargs={"comment": "worker_1"})

    agent_manager_list = [ppo_stats]

    agent_manager_list[0].fit()
    agent_manager_list[0].save(
コード例 #19
0
    }

    params_a2c = {"gamma": GAMMA, "horizon": HORIZON, "learning_rate": 0.0003}

    eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

    # -----------------------------
    # Run AgentManager
    # -----------------------------
    rsucbvi_stats = AgentManager(
        RSUCBVIAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        seed=123,
        enable_tensorboard=True,
        default_writer_kwargs=dict(
            maxlen=N_EPISODES - 10,
            log_interval=5.0,
        ),
    )
    rskernel_stats = AgentManager(
        RSKernelUCBVIAgent,
        train_env,
        fit_budget=N_EPISODES,
        init_kwargs=params_kernel,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        seed=123,
        enable_tensorboard=True,
コード例 #20
0
        # we could also record actions with
        # self.env = WriterWrapper(self.env, self.writer,
        #                          write_scalar = "action")


env_ctor = GridWorld
env_kwargs = dict(
    nrows=3,
    ncols=10,
    reward_at={(1, 1): 0.1, (2, 9): 1.0},
    walls=((1, 4), (2, 4), (1, 5)),
    success_probability=0.7,
)

env = env_ctor(**env_kwargs)
agent = AgentManager(VIAgent, (env_ctor, env_kwargs), fit_budget=10, n_fit=3)

agent.fit(budget=10)
# comment the line above if you only want to load data from rlberry_data.


# We use the following preprocessing function to plot the cumulative reward.
def compute_reward(rewards):
    return np.cumsum(rewards)


# Plot of the cumulative reward.
output = plot_writer_data(
    agent, tag="reward", preprocess_func=compute_reward, title="Cumulative Reward"
)
# The output is for 500 global steps because it uses 10 fit_budget * horizon
コード例 #21
0
    env = VecFrameStack(env, n_stack=4)
    env = ScalarizeEnvWrapper(env)
    return env


#
# Testing single agent
#

if __name__ == "__main__":
    #
    # Training several agents and comparing different hyperparams
    #

    stats = AgentManager(
        A2CAgent,
        train_env=(env_constructor, None),
        eval_env=(eval_env_constructor, None),
        eval_kwargs=dict(eval_horizon=200),
        agent_name="A2C baseline",
        fit_budget=5000,
        init_kwargs=dict(policy="CnnPolicy", verbose=10),
        n_fit=4,
        parallelization="process",
        output_dir="dev/stable_baselines_atari",
        seed=123,
    )

    stats.fit()
    stats.optimize_hyperparams(timeout=60, n_fit=2)
コード例 #22
0
    # env = env_ctor(**env_kwargs)
    # agent = A2CAgent(env, 'MlpPolicy', verbose=1)
    # agent.fit(budget=1000)

    #
    # Training several agents and comparing different hyperparams
    #
    from rlberry.manager import AgentManager, MultipleManagers, evaluate_agents

    stats = AgentManager(
        A2CAgent,
        (env_ctor, env_kwargs),
        agent_name="A2C baseline",
        init_kwargs=dict(policy="MlpPolicy", verbose=1),
        fit_kwargs=dict(log_interval=1000),
        fit_budget=2500,
        eval_kwargs=dict(eval_horizon=400),
        n_fit=4,
        parallelization="process",
        output_dir="dev/stable_baselines",
        seed=123,
    )

    stats_alternative = AgentManager(
        A2CAgent,
        (env_ctor, env_kwargs),
        agent_name="A2C optimized",
        init_kwargs=dict(policy="MlpPolicy", verbose=1),
        fit_kwargs=dict(log_interval=1000),
        fit_budget=2500,
        eval_kwargs=dict(eval_horizon=400),
コード例 #23
0
    "gamma": GAMMA,
    "horizon": HORIZON,
}

params_ppo = {"gamma": GAMMA, "horizon": HORIZON, "learning_rate": 0.0003}

eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

# -----------------------------
# Run AgentManager
# -----------------------------
oracle_stats = AgentManager(
    MBQVIAgent,
    d_train_env,
    fit_budget=0,
    init_kwargs=params_oracle,
    eval_kwargs=eval_kwargs,
    n_fit=4,
    agent_name="Oracle",
)
ppo_stats = AgentManager(
    PPOAgent,
    train_env,
    fit_budget=N_EPISODES,
    init_kwargs=params_ppo,
    eval_kwargs=eval_kwargs,
    n_fit=4,
    agent_name="PPO",
)

agent_manager_list = [oracle_stats, ppo_stats]
コード例 #24
0
ファイル: demo_dqn.py プロジェクト: omardrwch/rlberry
#
# Create managers
#
if __name__ == "__main__":
    managers = MultipleManagers()

    # Standard DQN
    managers.append(
        AgentManager(
            DQNAgent,
            TRAIN_ENV,
            agent_name="DQN",
            init_kwargs=DQN_PARAMS,
            fit_kwargs=FIT_KWARGS,
            n_fit=N_FIT,
            max_workers=MAX_WORKERS,
            parallelization="process",
            seed=42,
            enable_tensorboard=ENABLE_TENSORBOARD,
            output_dir="temp/dqn_example",
        ))

    # DQN with Q(lambda)
    managers.append(
        AgentManager(
            DQNAgent,
            TRAIN_ENV,
            agent_name="DQN + Q($\\lambda$)",
            init_kwargs=DQN_LAMBDA_PARAMS,
            fit_kwargs=FIT_KWARGS,
コード例 #25
0
ファイル: plot_exp3_bandit.py プロジェクト: omardrwch/rlberry
rewards = switching_rewards(T, rate=5.0)


# Construction of the experiment

env_ctor = AdversarialBandit
env_kwargs = {"rewards": rewards}

Agents_class = [EXP3Agent, BernoulliTSAgent]

agents = [
    AgentManager(
        Agent,
        (env_ctor, env_kwargs),
        init_kwargs={},
        fit_budget=T,
        n_fit=M,
        parallelization="process",
        mp_context="fork",
    )
    for Agent in Agents_class
]

# these parameters should give parallel computing even in notebooks


# Agent training
for agent in agents:
    agent.fit()

コード例 #26
0
# Parameters of the problem
means = np.array([0, 0.9, 1])  # means of the arms
T = 3000  # Horizon
M = 20  # number of MC simu

# Construction of the experiment

env_ctor = NormalBandit
env_kwargs = {"means": means, "stds": 2 * np.ones(len(means))}

agent = AgentManager(
    UCBAgent,
    (env_ctor, env_kwargs),
    fit_budget=T,
    init_kwargs={"B": 2},
    n_fit=M,
    parallelization="process",
    mp_context="fork",
)
# these parameters should give parallel computing even in notebooks

# Agent training

agent.fit()


# Compute and plot (pseudo-)regret
def compute_pseudo_regret(actions):
    return np.cumsum(np.max(means) - means[actions.astype(int)])
コード例 #27
0
ファイル: demo_ppo_bonus.py プロジェクト: omardrwch/rlberry
    "use_bonus": True,
    "uncertainty_estimator_kwargs": {
        "uncertainty_estimator_fn": uncertainty_estimator_fn
    },
}

eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

# -----------------------------
# Run AgentManager
# -----------------------------
ppo_stats = AgentManager(
    PPOAgent,
    env,
    fit_budget=N_EPISODES,
    init_kwargs=params_ppo,
    eval_kwargs=eval_kwargs,
    n_fit=4,
    agent_name="PPO",
)
ppo_bonus_stats = AgentManager(
    PPOAgent,
    env,
    fit_budget=N_EPISODES,
    init_kwargs=params_ppo_bonus,
    eval_kwargs=eval_kwargs,
    n_fit=4,
    agent_name="PPO-Bonus",
)

agent_manager_list = [ppo_bonus_stats, ppo_stats]
コード例 #28
0
ファイル: server_utils.py プロジェクト: omardrwch/rlberry
def execute_message(message: interface.Message,
                    resources: interface.Resources) -> interface.Message:
    response = interface.Message.create(command=interface.Command.ECHO)
    # LIST_RESOURCES
    if message.command == interface.Command.LIST_RESOURCES:
        info = {}
        for rr in resources:
            info[rr] = resources[rr]["description"]
        response = interface.Message.create(info=info)
    # AGENT_MANAGER_CREATE_INSTANCE
    elif message.command == interface.Command.AGENT_MANAGER_CREATE_INSTANCE:
        params = message.params
        base_dir = pathlib.Path(metadata_utils.RLBERRY_DEFAULT_DATA_DIR)
        if "output_dir" in params:
            params[
                "output_dir"] = base_dir / "server_data" / params["output_dir"]
        else:
            params["output_dir"] = base_dir / "server_data/"
        agent_manager = AgentManager(**params)
        filename = str(agent_manager.save())
        response = interface.Message.create(info=dict(
            filename=filename,
            agent_name=agent_manager.agent_name,
            output_dir=str(agent_manager.output_dir).replace(
                "server_data/", "client_data/"),
        ))
        del agent_manager
    # AGENT_MANAGER_FIT
    elif message.command == interface.Command.AGENT_MANAGER_FIT:
        filename = message.params["filename"]
        budget = message.params["budget"]
        extra_params = message.params["extra_params"]
        agent_manager = AgentManager.load(filename)
        agent_manager.fit(budget, **extra_params)
        agent_manager.save()
        response = interface.Message.create(command=interface.Command.ECHO)
        del agent_manager
    # AGENT_MANAGER_EVAL
    elif message.command == interface.Command.AGENT_MANAGER_EVAL:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        eval_output = agent_manager.eval_agents(
            message.params["n_simulations"])
        response = interface.Message.create(data=dict(output=eval_output))
        del agent_manager
    # AGENT_MANAGER_CLEAR_OUTPUT_DIR
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_OUTPUT_DIR:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_output_dir()
        response = interface.Message.create(
            message=f"Cleared output dir: {agent_manager.output_dir}")
        del agent_manager
    # AGENT_MANAGER_CLEAR_HANDLERS
    elif message.command == interface.Command.AGENT_MANAGER_CLEAR_HANDLERS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.clear_handlers()
        agent_manager.save()
        response = interface.Message.create(
            message=f"Cleared handlers: {filename}")
        del agent_manager
    # AGENT_MANAGER_SET_WRITER
    elif message.command == interface.Command.AGENT_MANAGER_SET_WRITER:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        agent_manager.set_writer(**message.params["kwargs"])
        agent_manager.save()
        del agent_manager
    # AGENT_MANAGER_OPTIMIZE_HYPERPARAMS
    elif message.command == interface.Command.AGENT_MANAGER_OPTIMIZE_HYPERPARAMS:
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        best_params_dict = agent_manager.optimize_hyperparams(
            **message.params["kwargs"])
        agent_manager.save()
        del agent_manager
        response = interface.Message.create(data=best_params_dict)
    # AGENT_MANAGER_GET_WRITER_DATA
    elif message.command == interface.Command.AGENT_MANAGER_GET_WRITER_DATA:
        # writer scalar data
        filename = message.params["filename"]
        agent_manager = AgentManager.load(filename)
        writer_data = agent_manager.get_writer_data()
        writer_data = writer_data or dict()
        for idx in writer_data:
            writer_data[idx] = writer_data[idx].to_csv(index=False)
        # tensoboard data
        tensorboard_bin_data = None
        if agent_manager.tensorboard_dir is not None:
            tensorboard_zip_file = rlberry.utils.io.zipdir(
                agent_manager.tensorboard_dir,
                agent_manager.output_dir / "tensorboard_data.zip",
            )
            if tensorboard_zip_file is not None:
                tensorboard_bin_data = open(tensorboard_zip_file, "rb").read()
                tensorboard_bin_data = base64.b64encode(
                    tensorboard_bin_data).decode("ascii")
        response = interface.Message.create(
            data=dict(writer_data=writer_data,
                      tensorboard_bin_data=tensorboard_bin_data))
        del agent_manager
    # end
    return response
コード例 #29
0
params["optql"] = {
    "horizon": HORIZON,
    "gamma": GAMMA,
    "bonus_scale_factor": 1.0,
}

eval_kwargs = dict(eval_horizon=HORIZON, n_simulations=20)

multimanagers = MultipleManagers()

multimanagers.append(
    AgentManager(
        UCBVIAgent,
        env,
        fit_budget=N_EP,
        init_kwargs=params["ucbvi"],
        eval_kwargs=eval_kwargs,
    ))

multimanagers.append(
    AgentManager(
        OptQLAgent,
        env,
        fit_budget=N_EP,
        init_kwargs=params["optql"],
        eval_kwargs=eval_kwargs,
    ))

multimanagers.run()
コード例 #30
0
    )

    params_greedy = dict(
        feature_map_fn=feature_map_fn,
        horizon=horizon,
        bonus_scale_factor=0.0,
        gamma=gamma,
    )

    params_oracle = dict(horizon=horizon, gamma=gamma)

    stats = AgentManager(
        LSVIUCBAgent,
        env,
        init_kwargs=params,
        fit_budget=n_episodes,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        parallelization=parallelization,
    )

    # UCBVI baseline
    stats_ucbvi = AgentManager(
        UCBVIAgent,
        env,
        init_kwargs=params_ucbvi,
        fit_budget=n_episodes,
        eval_kwargs=eval_kwargs,
        n_fit=4,
        parallelization=parallelization,
    )