def evaluate(n_episodes): run = SUBMISSIONS["rlps-tcpr"] config, run = init_run(run) agent = ShortestPathRllibAgent(get_agent(config, run)) env = get_env(config, rl=True) env_renderer = RenderTool(env, screen_width=8800) returns = [] pcs = [] malfs = [] for _ in tqdm(range(n_episodes)): obs, _ = env.reset(regenerate_schedule=True, regenerate_rail=True) if RENDER: env_renderer.reset() env_renderer.render_env(show=True, frames=True, show_observations=False) if not obs: break steps = 0 ep_return = 0 done = defaultdict(lambda: False) robust_env = RobustFlatlandGymEnv(rail_env=env, max_nr_active_agents=200, observation_space=None, priorizer=DistToTargetPriorizer(), allow_noop=True) sorted_handles = robust_env.priorizer.priorize(handles=list( obs.keys()), rail_env=env) while not done['__all__']: actions = agent.compute_actions(obs, env) robust_actions = robust_env.get_robust_actions( actions, sorted_handles) obs, all_rewards, done, info = env.step(robust_actions) if RENDER: env_renderer.render_env(show=True, frames=True, show_observations=False) print('.', end='', flush=True) steps += 1 ep_return += np.sum(list(all_rewards.values())) pc = np.sum(np.array([1 for a in env.agents if is_done(a) ])) / env.get_num_agents() print("EPISODE PC:", pc) n_episodes += 1 pcs.append(pc) returns.append(ep_return / (env._max_episode_steps * env.get_num_agents())) malfs.append( np.sum([a.malfunction_data['nr_malfunctions'] for a in env.agents])) return pcs, returns, malfs
def evaluate(n_episodes): run = SUBMISSIONS["ato"] config, run = init_run(run) agent = get_agent(config, run) env = get_env(config, rl=True) env_renderer = RenderTool(env, screen_width=8800) returns = [] pcs = [] malfs = [] for _ in tqdm(range(n_episodes)): obs, _ = env.reset(regenerate_schedule=True, regenerate_rail=True) if RENDER: env_renderer.reset() env_renderer.render_env(show=True, frames=True, show_observations=False) if not obs: break steps = 0 ep_return = 0 done = defaultdict(lambda: False) while not done['__all__']: actions = agent.compute_actions(obs, explore=False) obs, all_rewards, done, info = env.step(actions) if RENDER: env_renderer.render_env(show=True, frames=True, show_observations=False) print('.', end='', flush=True) steps += 1 ep_return += np.sum(list(all_rewards.values())) pc = np.sum(np.array([1 for a in env.agents if is_done(a) ])) / env.get_num_agents() print("EPISODE PC:", pc) n_episodes += 1 pcs.append(pc) returns.append(ep_return / (env._max_episode_steps * env.get_num_agents())) malfs.append( np.sum([a.malfunction_data['nr_malfunctions'] for a in env.agents])) return pcs, returns, malfs
def evaluate(n_episodes, rl_prio=True): agent = None if rl_prio: config, run = init_run() agent = get_agent(config, run) env = get_env(config, rl=True) else: env = get_env(rl=False) env_renderer = RenderTool(env, screen_width=8800) returns = [] pcs = [] malfs = [] for _ in tqdm(range(n_episodes)): obs, _ = env.reset(regenerate_schedule=True, regenerate_rail=True) if RENDER: env_renderer.reset() env_renderer.render_env(show=True, frames=True, show_observations=False) if not obs: break steps = 0 ep_return = 0 done = defaultdict(lambda: False) robust_env = CprFlatlandGymEnv(rail_env=env, max_nr_active_agents=200, observation_space=None, priorizer=NrAgentsSameStart(), allow_noop=True) # if rl_prio: # priorities = prio_agent.compute_actions(obs, explore=False) # sorted_actions = {k: v for k, v in sorted(priorities.items(), key=lambda item: item[1], reverse=True)} # sorted_handles = list(sorted_actions.keys()) # else: sorted_handles = robust_env.priorizer.priorize(handles=list( obs.keys()), rail_env=env) while not done['__all__']: actions = ShortestPathAgent().compute_actions(obs, env) robust_actions = robust_env.get_robust_actions( actions, sorted_handles) obs, all_rewards, done, info = env.step(robust_actions) if RENDER: env_renderer.render_env(show=True, frames=True, show_observations=False) print('.', end='', flush=True) steps += 1 ep_return += np.sum(list(all_rewards.values())) pc = np.sum(np.array([1 for a in env.agents if is_done(a) ])) / env.get_num_agents() print("EPISODE PC:", pc) n_episodes += 1 pcs.append(pc) returns.append(ep_return / (env._max_episode_steps * env.get_num_agents())) malfs.append( np.sum([a.malfunction_data['nr_malfunctions'] for a in env.agents])) return pcs, returns, malfs
if (time() - start_time) > TIME_LIMIT: skip(done) break if done['__all__']: total_reward = episode_end_info( all_rewards, total_reward, evaluation_number, steps, remote_client=remote_client) break except TimeoutException as err: print( "Timeout! Will skip this episode and go to the next.", err) break except TimeoutException as err: print( "Timeout during planning time. Will skip to next evaluation!", err) print("Evaluation of all environments complete...") print(remote_client.submit()) if __name__ == "__main__": config, run = init_run() evaluate(config, run)