def test_random_agent(): from agentos.agents import RandomAgent from gym.envs.classic_control import CartPoleEnv environment = CartPoleEnv() environment.reset() agent = RandomAgent(environment=environment) done = agent.advance() assert not done, "CartPole never finishes after one random step." run_agent(agent)
if __name__ == "__main__": """Create a mouse agent and see what it learns as its best guess of the size of cookies it is seeing.""" import argparse parser = argparse.ArgumentParser(description=( "Run a MouseAgent that learns by looking at cookies " "using Friston's Free Energy principle. This agent " "is an implementation of the tutorial by Rafal Bogacz at " "https://sciencedirect.com/science/article/pii/S0022249615000759")) parser.add_argument("--max-iters", type=int, default=150) parser.add_argument("-p", "--plot-results", action="store_true") args = parser.parse_args() print(f"Running mouse agent for {args.max_iters} steps...") print("------------------------------------------------") mouse = Mouse(CookieSensorEnv()) agentos.run_agent(mouse, max_iters=args.max_iters) if args.plot_results: plt.figure(figsize=(15, 10)) for k, v in mouse_stats.items(): if k != "belief_light_var" and k != "belief_size_var": plt.plot(v, label=k) for k, v in env_stats.items(): plt.plot(v, label=k) plt.legend() plt.title("Mouse beliefs over time") plt.show()
return int( max(0, round(self.nn(np.array(obs)[np.newaxis]).numpy()[0][0]))) class RandomTFAgent(agentos.Agent): def __init__(self, environment, policy): super().__init__(environment=environment, policy=policy) self.ret_vals = [] def advance(self): trajs = agentos.rollout(self.policy, self.environment, max_steps=2000) self.ret_vals.append(sum(trajs.rewards)) if __name__ == "__main__": from gym.envs.classic_control import CartPoleEnv random_nn_agent = RandomTFAgent( environment=CartPoleEnv, policy=SingleLayerTFPolicy( CartPoleEnv().action_space, CartPoleEnv().observation_space, ), ) agentos.run_agent(random_nn_agent, max_iters=10) print(f"Agent done!\n" f"Num rollouts: {len(random_nn_agent.ret_vals)}\n" f"Avg return: {np.mean(random_nn_agent.ret_vals)}\n" f"Max return: {max(random_nn_agent.ret_vals)}\n" f"Median return: {np.median(random_nn_agent.ret_vals)}\n")
def __init__(self, environment, policy): super().__init__(env=environment, policy=policy) self.policy = policy self.environment = environment self.first_obs = self.env.reset() def advance(self): print("Training") self.learn() print("Evaluating") t = agentos.rollout(self.policy, self.environment.__class__, max_steps=200) print(f"Finished evaluating policy, return: {sum(t.rewards)}") def learn(self): self.policy.improve(self.environment) if __name__ == "__main__": from gym.envs.classic_control import CartPoleEnv env_class = CartPoleEnv my_agent = OnlineBatchAgent( environment=env_class(), policy=EpsilonGreedyTFPolicy(env_class().action_space, env_class().observation_space), ) agentos.run_agent(my_agent, max_iters=100)
from collections import deque from env import MultiChatEnv from env_utils import CommandLineClient from numpy import random as np_random class ChatBot(agentos.Agent): """A simple chatbot that speaks by parroting back things it has heard.""" def __init__(self, env): super().__init__(env) self.memory = deque(maxlen=2048) self.reply_flag = False def advance(self): msg = "" if self.reply_flag: msg = np_random.choice(self.memory) self.reply_flag = False obs, reward, done, _ = self.env.step(msg) if obs: self.memory.append(obs) self.reply_flag = True if __name__ == "__main__": env_generator = MultiChatEnv() agentos.run_agent(ChatBot, env_generator, 1, as_thread=True) cmd_line = CommandLineClient(env_generator()) cmd_line.start()
description="Run reinforce with a simple TF policy on gym CartPole. " "One rollout per call to agent.advance(), " "200 steps per rollout.", ) parser.add_argument( "max_iters", type=int, metavar="MAX_ITERS", help="How many times to call advance() on agent.", ) parser.add_argument("--rollouts_per_iter", type=int, default=1) parser.add_argument("--max_steps_per_rollout", type=int, default=200) parser.add_argument("--discount_rate", type=float, default=0.9) args = parser.parse_args() reinforce_agent = ReinforceAgent( CartPoleEnv(), TwoLayerTFPolicy(), rollouts_per_iter=args.rollouts_per_iter, max_steps_per_rollout=args.max_steps_per_rollout, discount_rate=args.discount_rate, ) agentos.run_agent( reinforce_agent, max_iters=args.max_iters, ) print("Agent done!") if reinforce_agent.ret_vals: print(f"Num rollouts: {len(reinforce_agent.ret_vals)}\n" f"Avg return: {np.mean(reinforce_agent.ret_vals)}\n" f"Max return: {max(reinforce_agent.ret_vals)}\n" f"Median return: {np.median(reinforce_agent.ret_vals)}\n")
def run(agent_file, hz, max_iters): """Run an agent by calling advance() on it until it returns True""" agent = load_agent_from_path(agent_file) agentos.run_agent(agent, hz=hz, max_iters=max_iters)
parser = argparse.ArgumentParser("Run an RLlibAgent.") parser.add_argument( "env_module", metavar="ENV_MODULE", type=str, help="The python module of env, will be imported. " "Must be on pythonpath. If this is empty string, " "ENV_CLASSNAME is assumed to be a Gym Env id " "instead of a classname (e.g., CartPole-v1)", ) parser.add_argument( "env_classname", metavar="ENV_CLASSNAME", type=str, help="The env class for agent to use.", ) parser.add_argument( "algorithm", metavar="ALGO", type=str, help="The name of an RLlib algo. For list of algos, " "see https://github.com/ray-project/ray/blob/" "master/rllib/agents/registry.py", ) args = parser.parse_args() env = args.env_classname if args.env_module: module = importlib.import_module(args.env_module) env = getattr(module, args.env_classname) run_agent(RLlibAgent, env, algo_name=args.algorithm)