Beispiel #1
0
def run_experiment(agents, environment, args, seed=None):
    """Run a set of experiments."""
    seed = args.seed if seed is None else seed
    df = pd.DataFrame()
    for name, agent in agents.items():
        set_random_seed(seed)
        print(f"Running agent {name} on {args.env_name}")
        evaluate_policy(agent, environment)
        store_value_function(agent, environment, episode=0)

        rollout_agent(
            agent=agent,
            environment=environment,
            num_episodes=args.num_episodes,
            max_steps=args.max_steps,
            callback_frequency=1,
            callbacks=[evaluate_policy, store_value_function],
        )

        df_ = pd.DataFrame(agent.logger.statistics)
        df_["name"] = name
        df_["seed"] = args.seed
        df_["time"] = np.arange(len(df_))
        df_["duals"] = np.empty((len(df_)), dtype=object)
        duals = agent.logger.all["dual_loss"]
        for i in range(len(df_)):
            df_.at[i, "duals"] = duals[i * args.num_iter : (i + 1) * args.num_iter]
        df = pd.concat((df, df_), sort=False)
    return df
Beispiel #2
0
def init_experiment(args, **kwargs):
    """Initialize experiment."""
    arg_dict = vars(args)
    arg_dict.update(kwargs)
    arg_dict.update(parse_config_file(args.agent_config))
    arg_dict = {k: v for k, v in arg_dict.items() if v is not None}

    env_config = parse_config_file(args.env_config)
    args.max_steps = env_config.get("max_steps", 1000)
    # %% Set Random seeds.
    set_random_seed(args.seed)

    # %% Initialize environment.
    if env_config["name"] in gym_envs:
        environment = GymEnvironment(env_config["name"], seed=args.seed)
    else:
        env_name, env_task = env_config["name"].split("/")
        environment = DMSuiteEnvironment(env_name, env_task, seed=args.seed)

    # %% Initialize module.
    agent_module = importlib.import_module("rllib.agent")
    agent = getattr(agent_module, f"{args.agent}Agent").default(
        environment,
        reward_transformer=RewardTransformer(
            scale=arg_dict.get("reward_scale", 1.0)),
        **arg_dict,
    )
    agent.logger.save_hparams(arg_dict)

    return agent, environment
Beispiel #3
0
"""Python Script Template."""
from rllib.environment import GymEnvironment
from rllib.util.utilities import set_random_seed
from qreps.environment.random_action_wrapper import RandomActionWrapper

import os

from exps.utilities import parse_arguments, run_experiment
from exps.environments.utilities import get_saddle_agents, get_benchmark_agents

args = parse_arguments()
args.env_name = "RiverSwim-v0"
args.lr = 0.01
args.eta = 5.0

set_random_seed(args.seed)
env = GymEnvironment(args.env_name, seed=args.seed)
env.add_wrapper(RandomActionWrapper, p=args.random_action_p)

agents = get_saddle_agents(env, **vars(args))
agents.update(get_benchmark_agents(env, **vars(args)))

df = run_experiment(agents, env, args)
df.to_pickle(f"river_swim_results_{args.seed}.pkl")

os.system("python river_swim_plot.py")