def evaluate(experiment): # Prepare workspace seed, accuracy, agent_config, env_config, path = experiment gym.logger.set_level(gym.logger.DISABLED) path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) # Make environment env = load_environment(env_config) # Make agent agent_name, agent_config = agent_config agent_config["accuracy"] = float(accuracy) agent_config["budget"] = 10**9 agent = agent_factory(env, agent_config) logger.debug("Evaluating agent {} with budget {} on seed {}".format(agent_name, budget, seed)) # Compute true value env.seed(seed) observation = env.reset() vi = agent_factory(env, agent_configs()["value_iteration"]) best_action = vi.act(observation) action = agent.act(observation) q = vi.state_action_value simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action] gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2] if hasattr(agent.planner, "budget_used"): budget = agent.planner.budget_used # Save results result = { "agent": agent_name, "budget": budget, "accuracy": agent.planner.config["accuracy"], "horizon": agent.planner.config["horizon"], "seed": seed, "simple_regret": simple_regret, "gap": gap } df = pd.DataFrame.from_records([result]) with open(path, 'a') as f: df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)
def evaluate(env, agent_name, budget=2000, seed=None): print("Evaluating", agent_name) agent_config = agents[agent_name] agent_config["budget"] = budget agent = agent_factory(env, agent_config) if seed is not None: agent.seed(seed) agent.act(env) return agent
def evaluate(env, agent_name, budget, seed=None): print("Evaluating", agent_name, "with budget", budget) agent_config = agents[agent_name] agent_config["budget"] = budget agent = agent_factory(env, agent_config) if seed is not None: env.seed(seed) agent.seed(seed) obs = env.reset() agent.act(obs) return agent
def evaluate(experiment): # Prepare workspace seed, budget, agent_config, env_config, path = experiment gym.logger.set_level(gym.logger.DISABLED) path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) # Make environment env_name, env_config = env_config env = load_environment(env_config) # Make agent agent_name, agent_config = agent_config agent_config["budget"] = int(budget) agent = agent_factory(env, agent_config) # Evaluate print("Evaluating agent {} with budget {} on seed {}".format( agent_name, budget, seed)) evaluation = Evaluation(env, agent, directory=Path("out") / "planners" / agent_name, num_episodes=1, sim_seed=seed, display_env=False, display_agent=False, display_rewards=False) evaluation.test() rewards = evaluation.monitor.stats_recorder.episode_rewards_[0] length = evaluation.monitor.stats_recorder.episode_lengths[0] total_reward = np.sum(rewards) return_ = np.sum([gamma**t * rewards[t] for t in range(len(rewards))]) # Save results result = { "env": env_name, "agent": agent_name, "budget": budget, "seed": seed, "total_reward": total_reward, "return": return_, "length": length } if race_strategy: result["pit_count"] = evaluation.pits df = pd.DataFrame.from_records([result]) with open(path, 'a') as f: df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)
def __init__(self, env, config=None): """ :param env: The environment :param config: The agent configuration. It has to contains the field: - prior_agent is the config used to create the agent, whose class is specified in its __class__ field. """ super(AbstractAgent, self).__init__(config) self.prior_agent = agent_factory(env, config['prior_agent']) # Load the prior agent from a file, if one is set if 'model_save' in config['prior_agent']: self.prior_agent.load(config['prior_agent']['model_save']) super(MCTSWithPriorPolicyAgent, self).__init__(env, self.config) self.planner.prior_policy = self.agent_policy_available self.planner.rollout_policy = self.agent_policy_available
}, # "double": False, "gamma": 0.75, #0.8 "n_steps": 1, "batch_size": 32, #32 "memory_capacity": 15000, "target_update": 50, "exploration": { "method": "EpsilonGreedy", "tau": 6000, "temperature": 1.0, "final_temperature": 0.05 }, "loss_function": "l2" } agent = agent_factory(env, agent_config) print(type(env.observation_space)) obs, done = env.reset(), False # print(obs) print("main") print(type(env.action_space)) action = agent.act(obs) print("main2") print(action) Observation, reward, done, d = env.step(action) # print("main3") # print(Observation)
def evaluate(experiment): # Prepare workspace seed, budget, agent_config, env_config, path = experiment gym.logger.set_level(gym.logger.DISABLED) path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) # Make environment env = load_environment(env_config) # Make agent agent_name, agent_config = agent_config agent_config["budget"] = int(budget) agent = agent_factory(env, agent_config) logger.debug("Evaluating agent {} with budget {} on seed {}".format( agent_name, budget, seed)) # Compute true value compute_regret = True compute_return = False if compute_regret: env.seed(seed) observation = env.reset() vi = agent_factory(env, agent_configs()["value_iteration"]) best_action = vi.act(observation) action = agent.act(observation) q = vi.state_action_value simple_regret = q[vi.mdp.state, best_action] - q[vi.mdp.state, action] gap = q[vi.mdp.state, best_action] - np.sort(q[vi.mdp.state, :])[-2] else: simple_regret = 0 gap = 0 if compute_return: # Evaluate evaluation = Evaluation(env, agent, directory=Path("out") / "planners" / agent_name, num_episodes=1, sim_seed=seed, display_env=False, display_agent=False, display_rewards=False) evaluation.test() rewards = evaluation.monitor.stats_recorder.episode_rewards_[0] length = evaluation.monitor.stats_recorder.episode_lengths[0] total_reward = np.sum(rewards) cum_discount = lambda signal: np.sum( [gamma**t * signal[t] for t in range(len(signal))]) return_ = cum_discount(rewards) mean_return = np.mean( [cum_discount(rewards[t:]) for t in range(len(rewards))]) else: length = 0 total_reward = 0 return_ = 0 mean_return = 0 # Save results result = { "agent": agent_name, "budget": budget, "seed": seed, "total_reward": total_reward, "return": return_, "mean_return": mean_return, "length": length, "simple_regret": simple_regret, "gap": gap } df = pd.DataFrame.from_records([result]) with open(path, 'a') as f: df.to_csv(f, sep=',', encoding='utf-8', header=f.tell() == 0, index=False)
if args.env_name in ['highway-v0']: import highway_env from rl_agents.agents.common.factory import agent_factory env = make_vec_envs(args.env_name, seed, 1, 0.99, f'{args.emo_data_dir}/tmp/gym', device,\ True, stats_path=stats_path, hyperparams=hyperparams, time=time, atari_max_steps=args.atari_max_steps) # Make agent agent_config = { "__class__": "<class 'rl_agents.agents.tree_search.deterministic.DeterministicPlannerAgent'>", "budget": 50, "gamma": 0.7, } th_model = agent_factory(gym.make(args.env_name), agent_config) time = False elif args.env_name in ['duckietown']: from a2c_ppo_acktr.duckietown.env import launch_env from a2c_ppo_acktr.duckietown.wrappers import NormalizeWrapper, ImgWrapper,\ DtRewardWrapper, ActionWrapper, ResizeWrapper from a2c_ppo_acktr.duckietown.teacher import PurePursuitExpert env = launch_env() env = ResizeWrapper(env) env = NormalizeWrapper(env) env = ImgWrapper(env) env = ActionWrapper(env) env = DtRewardWrapper(env) # Create an imperfect demonstrator expert = PurePursuitExpert(env=env)