def rollout_agent(environment, base_agent, extender, num_steps, td_k=True): torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(environment, SEED) agent = extender.default( environment, base_agent_name=base_agent, num_steps=num_steps, num_samples=2, num_iter=2, num_epochs=2, td_k=td_k, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def test_tabular_interaction(agent, policy): LEARNING_RATE = 0.1 environment = EasyGridWorld() critic = TabularQFunction(num_states=environment.num_states, num_actions=environment.num_actions) policy = policy(critic, 0.1) optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE) criterion = torch.nn.MSELoss agent = agent( critic=critic, policy=policy, criterion=criterion, optimizer=optimizer, target_update_frequency=TARGET_UPDATE_FREQUENCY, gamma=GAMMA, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def test_policies(environment, policy, batch_size): environment = GymEnvironment(environment, SEED) critic = NNQFunction( dim_state=environment.dim_observation, dim_action=environment.dim_action, num_states=environment.num_states, num_actions=environment.num_actions, layers=LAYERS, tau=TARGET_UPDATE_TAU, ) policy = policy(critic, 0.1) optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE) criterion = torch.nn.MSELoss agent = SARSAAgent( critic=critic, policy=policy, criterion=criterion, optimizer=optimizer, batch_size=batch_size, target_update_frequency=TARGET_UPDATE_FREQUENCY, gamma=GAMMA, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def train(agent, environment, args): """Train agent.""" train_agent( agent=agent, environment=environment, num_episodes=args.num_train, max_steps=args.max_steps, eval_frequency=args.eval_frequency, print_frequency=args.print_frequency, render=args.render_train, )
def rollout_agent(environment, agent): torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(environment, SEED) agent = agent.default(environment, num_iter=2, num_epochs=2) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory.
def train_student_on_task(self, student, training_task, action_limit, eval_task_params=None, pretrain=False): train_agent(student, environment=training_task, callbacks=[my_callback], plot_flag=False, callback_frequency=1) # return the trajectory and rewards global rews, dones return (None, ), rews, dones
def train_and_evaluate(agent, environment, params, plot_callbacks=None, save_milestones=None): """Train and evaluate agent on environment.""" # %% Train Agent agent.logger.save_hparams(params.toDict()) with gpytorch.settings.fast_computations( ), gpytorch.settings.fast_pred_var(), ( gpytorch.settings.fast_pred_samples()), ( gpytorch.settings.memory_efficient()): train_agent( agent, environment, num_episodes=params.train_episodes, max_steps=params.environment_max_steps, plot_flag=params.plot_train_results, callback_frequency=1, print_frequency=params.print_frequency, save_milestones=save_milestones, render=params.render_train, callbacks=plot_callbacks, ) agent.logger.export_to_json() # Save statistics. # %% Test agent. metrics = dict() evaluate_agent( agent, environment, num_episodes=params.test_episodes, max_steps=params.environment_max_steps, render=params.render_test, ) returns = np.mean( agent.logger.get("environment_return")[-params.test_episodes:]) metrics.update({"test/test_env_returns": returns}) returns = np.mean( agent.logger.get("environment_return")[:-params.test_episodes]) metrics.update({"test/train_env_returns": returns}) agent.logger.log_hparams(params.toDict(), metrics)
def main(args): """Run experiment.""" set_random_seed(args.seed) env_config = parse_config_file(args.env_config_file) environment = GymEnvironment( env_config["name"], ctrl_cost_weight=env_config["action_cost"], seed=args.seed ) reward_model = environment.env.reward_model() if args.exploration == "optimistic": dynamical_model = HallucinatedModel.default(environment, beta=args.beta) environment.add_wrapper(HallucinationWrapper) else: dynamical_model = TransformedModel.default(environment) kwargs = parse_config_file(args.agent_config_file) agent = getattr( importlib.import_module("rllib.agent"), f"{args.agent}Agent" ).default( environment=environment, dynamical_model=dynamical_model, reward_model=reward_model, thompson_sampling=args.exploration == "thompson", **kwargs, ) train_agent( agent=agent, environment=environment, max_steps=env_config["max_steps"], num_episodes=args.train_episodes, render=args.render, print_frequency=1, ) evaluate_agent( agent=agent, environment=environment, max_steps=env_config["max_steps"], num_episodes=args.test_episodes, )
def test_tabular_interaction(agent, policy): LEARNING_RATE = 0.1 environment = EasyGridWorld() critic = TabularQFunction(num_states=environment.num_states, num_actions=environment.num_actions) policy = policy(critic, 0.1) optimizer = torch.optim.Adam(critic.parameters(), lr=LEARNING_RATE) criterion = torch.nn.MSELoss memory = ExperienceReplay(max_len=MEMORY_MAX_SIZE) agent = agent( critic=critic, policy=policy, criterion=criterion, optimizer=optimizer, memory=memory, batch_size=BATCH_SIZE, target_update_frequency=TARGET_UPDATE_FREQUENCY, gamma=GAMMA, ) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, plot_flag=False, ) evaluate_agent(agent, environment, 1, MAX_STEPS, render=False) agent.logger.delete_directory() # Cleanup directory. torch.testing.assert_allclose( critic.table.shape, torch.Size([environment.num_actions, environment.num_states]), )
import torch.optim from rllib.agent import DPGAgent, TD3Agent # noqa: F401 from rllib.dataset import ExperienceReplay, PrioritizedExperienceReplay # noqa: F401 from rllib.environment import GymEnvironment from rllib.util.parameter_decay import ExponentialDecay from rllib.util.training.agent_training import evaluate_agent, train_agent ENVIRONMENT = ["MountainCarContinuous-v0", "Pendulum-v0"][0] NUM_EPISODES = 25 MAX_STEPS = 2500 GAMMA = 0.99 EPS_START = 1.0 EPS_END = 0.1 EPS_DECAY = 1e6 SEED = 0 torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(ENVIRONMENT, SEED) noise = ExponentialDecay(EPS_START, EPS_END, EPS_DECAY) agent = DPGAgent.default(environment, exploration_noise=noise, gamma=GAMMA) train_agent(agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, render=True) evaluate_agent(agent, environment, 1, MAX_STEPS)
import numpy as np import torch.optim from rllib.agent import SACAgent from rllib.dataset import ExperienceReplay, PrioritizedExperienceReplay # noqa: F401 from rllib.environment import GymEnvironment from rllib.util.training.agent_training import evaluate_agent, train_agent ENVIRONMENT = ["MountainCarContinuous-v0", "Pendulum-v0"][1] NUM_EPISODES = 40 MAX_STEPS = 1000 GAMMA = 0.99 SEED = 1 torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(ENVIRONMENT, SEED) agent = SACAgent.default(environment, eta=1.0, regularization=True, gamma=GAMMA) train_agent( agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS, print_frequency=1, render=True, ) evaluate_agent(agent, environment, num_episodes=1, max_steps=MAX_STEPS)
"""Working example of REPS.""" import numpy as np import torch from rllib.agent import REPSAgent from rllib.environment import GymEnvironment from rllib.util.training.agent_training import evaluate_agent, train_agent ETA = 1.0 NUM_EPISODES = 100 GAMMA = 1 SEED = 0 ENVIRONMENT = "CartPole-v0" MAX_STEPS = 200 torch.manual_seed(SEED) np.random.seed(SEED) environment = GymEnvironment(ENVIRONMENT, SEED) agent = REPSAgent.default(environment, epsilon=ETA, regularization=True, gamma=GAMMA) train_agent(agent, environment, num_episodes=NUM_EPISODES, max_steps=MAX_STEPS + 1) evaluate_agent(agent, environment, num_episodes=1, max_steps=MAX_STEPS + 1)