def render_env(env, fname): env_renderer = RenderTool(env, gl="PGL") env_renderer.render_env() image = env_renderer.get_image() pil_image = PIL.Image.fromarray(image) pil_image.save(fname)
def render_env(env): env_renderer = RenderTool(env, gl="PGL") env_renderer.render_env() image = env_renderer.get_image() pil_image = PIL.Image.fromarray(image) #print("RENDER") #pil_image.show() images.append(pil_image) print(len(images))
def render(self, mode='human'): # TODO: Merge both strategies (Jupyter vs .py) # In .py files # self.renderer.render_env(show=False, show_observations=False, show_predictions=False) # In Jupyter Notebooks env_renderer = RenderTool(self.flatland_env, gl="PILSVG") env_renderer.render_env() image = env_renderer.get_image() pil_image = Image.fromarray(image) display(pil_image) return image
def createEnvSet(nStart, nEnd, sDir, bSmall=True): # print("Generate small envs in train-envs-small:") print(f"Generate envs (small={bSmall}) in dir {sDir}:") sDirImages = "train-envs-small/images/" if not os.path.exists(sDirImages): os.makedirs(sDirImages) for test_id in range(nStart, nEnd, 1): env = create_test_env(RandomTestParams_small, test_id, sDir) oRender = RenderTool(env, gl="PILSVG") # oRender.envs = envs # oRender.set_new_rail() oRender.render_env() g2img = oRender.get_image() imgPIL = Image.fromarray(g2img) # imgPIL.show() imgPIL.save(sDirImages + "Level_{}.png".format(test_id))
def eval_policy(env_params, checkpoint, n_eval_episodes, max_steps, action_size, state_size, seed, render, allow_skipping, allow_caching): # Evaluation is faster on CPU (except if you use a really huge policy) parameters = {'use_gpu': False} # policy = DDDQNPolicy(state_size, action_size, Namespace(**parameters), evaluation_mode=True) # policy.qnetwork_local = torch.load(checkpoint, map_location={'cuda:0': 'cpu'}) env_params = Namespace(**env_params) # Environment parameters n_agents = env_params.n_agents x_dim = env_params.x_dim y_dim = env_params.y_dim n_cities = env_params.n_cities max_rails_between_cities = env_params.max_rails_between_cities max_rails_in_city = env_params.max_rails_in_city agents = [] for agent_id in range(n_agents): agent = AttentionAgent(num_in_pol=state_size, num_out_pol=action_size, hidden_dim=256, lr=0.001) agent.policy = torch.load(os.path.join( checkpoint, f'2300_agent{agent_id}' + '.pth'), map_location=torch.device('cpu')) agent.policy.eval() agents.append(agent) # Malfunction and speed profiles # TODO pass these parameters properly from main! malfunction_parameters = MalfunctionParameters( malfunction_rate=1. / 2000, # Rate of malfunctions min_duration=20, # Minimal duration max_duration=50 # Max duration ) # Only fast trains in Round 1 speed_profiles = { 1.: 1.0, # Fast passenger train 1. / 2.: 0.0, # Fast freight train 1. / 3.: 0.0, # Slow commuter train 1. / 4.: 0.0 # Slow freight train } # Observation parameters observation_tree_depth = env_params.observation_tree_depth observation_radius = env_params.observation_radius observation_max_path_depth = env_params.observation_max_path_depth # Observation builder predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth) tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor) # Setup the environment env = RailEnv( width=x_dim, height=y_dim, rail_generator=sparse_rail_generator( max_num_cities=n_cities, grid_mode=False, max_rails_between_cities=max_rails_between_cities, max_rails_in_city=max_rails_in_city, ), # rail_generator = complex_rail_generator( # nr_start_goal=10, # nr_extra=10, # min_dist=10, # max_dist=99999, # seed=1 # ), schedule_generator=sparse_schedule_generator(speed_profiles), number_of_agents=n_agents, malfunction_generator_and_process_data=malfunction_from_params( malfunction_parameters), obs_builder_object=tree_observation) if render: # env_renderer = RenderTool(env, gl="PGL") env_renderer = RenderTool( env, # gl="PGL", agent_render_variant=AgentRenderVariant. AGENT_SHOWS_OPTIONS_AND_BOX, show_debug=False, screen_height=600, # Adjust these parameters to fit your resolution screen_width=800) action_dict = dict() scores = [] completions = [] nb_steps = [] inference_times = [] preproc_times = [] agent_times = [] step_times = [] for agent_id in range(n_agents): action_dict[agent_id] = 0 for episode_idx in range(n_eval_episodes): images = [] seed += 1 inference_timer = Timer() preproc_timer = Timer() agent_timer = Timer() step_timer = Timer() step_timer.start() obs, info = env.reset(regenerate_rail=True, regenerate_schedule=True, random_seed=seed) step_timer.end() agent_obs = [None] * env.get_num_agents() score = 0.0 if render: env_renderer.set_new_rail() final_step = 0 skipped = 0 nb_hit = 0 agent_last_obs = {} agent_last_action = {} for step in range(max_steps - 1): # time.sleep(0.2) if allow_skipping and check_if_all_blocked(env): # FIXME why -1? bug where all agents are "done" after max_steps! skipped = max_steps - step - 1 final_step = max_steps - 2 n_unfinished_agents = sum(not done[idx] for idx in env.get_agent_handles()) score -= skipped * n_unfinished_agents break agent_timer.start() for agent in env.get_agent_handles(): agent_model = agents[agent] if obs[agent] and info['action_required'][agent]: if agent in agent_last_obs and np.all( agent_last_obs[agent] == obs[agent]): nb_hit += 1 action = agent_last_action[agent] else: preproc_timer.start() norm_obs = normalize_observation( obs[agent], tree_depth=observation_tree_depth, observation_radius=observation_radius) preproc_timer.end() inference_timer.start() action = act(agent_model, norm_obs) inference_timer.end() action_dict.update({agent: action}) if allow_caching: agent_last_obs[agent] = obs[agent] agent_last_action[agent] = action agent_timer.end() step_timer.start() obs, all_rewards, done, info = env.step(action_dict) step_timer.end() if render: env_renderer.render_env(show=True, frames=False, show_observations=False, show_predictions=False) im = env_renderer.get_image() im = PIL.Image.fromarray(im) images.append(im) if step % 100 == 0: print("{}/{}".format(step, max_steps - 1)) for agent in env.get_agent_handles(): score += all_rewards[agent] final_step = step if done['__all__']: break if render: for _ in range(10): images.append(images[len(images) - 1]) # save video images[0].save( f'/Users/nikhilvs/repos/nyu/flatland-reinforcement-learning/videos/maac-final/out_{episode_idx}.gif', save_all=True, append_images=images[1:], optimize=False, duration=60, loop=0) normalized_score = score / (max_steps * env.get_num_agents()) scores.append(normalized_score) tasks_finished = sum(done[idx] for idx in env.get_agent_handles()) completion = tasks_finished / max(1, env.get_num_agents()) completions.append(completion) nb_steps.append(final_step) inference_times.append(inference_timer.get()) preproc_times.append(preproc_timer.get()) agent_times.append(agent_timer.get()) step_times.append(step_timer.get()) skipped_text = "" if skipped > 0: skipped_text = "\t⚡ Skipped {}".format(skipped) hit_text = "" if nb_hit > 0: hit_text = "\t⚡ Hit {} ({:.1f}%)".format(nb_hit, (100 * nb_hit) / (n_agents * final_step)) print( "☑️ Score: {:.3f} \tDone: {:.1f}% \tNb steps: {:.3f} " "\t🍭 Seed: {}" "\t🚉 Env: {:.3f}s " "\t🤖 Agent: {:.3f}s (per step: {:.3f}s) \t[preproc: {:.3f}s \tinfer: {:.3f}s]" "{}{}".format(normalized_score, completion * 100.0, final_step, seed, step_timer.get(), agent_timer.get(), agent_timer.get() / final_step, preproc_timer.get(), inference_timer.get(), skipped_text, hit_text)) return scores, completions, nb_steps, agent_times, step_times
class FlatlandEnv(gym.Env): def __init__(self, n_cars=3, n_acts=5, min_obs=-1, max_obs=1, n_nodes=2, ob_radius=10, x_dim=36, y_dim=36, feats='all'): self.tree_obs = tree_observation.TreeObservation(n_nodes) self.n_cars = n_cars self.n_nodes = n_nodes self.ob_radius = ob_radius self.feats = feats rail_gen = sparse_rail_generator(max_num_cities=3, seed=666, grid_mode=False, max_rails_between_cities=2, max_rails_in_city=3) self._rail_env = RailEnv( width=x_dim, height=y_dim, rail_generator=rail_gen, schedule_generator=sparse_schedule_generator(speed_ration_map), number_of_agents=n_cars, malfunction_generator_and_process_data=malfunction_from_params( stochastic_data), obs_builder_object=self.tree_obs) self.renderer = RenderTool(self._rail_env, gl="PILSVG") self.action_dict = dict() self.info = dict() self.old_obs = dict() def step(self, action): # Update the action of each agent for agent_id in range(self.n_cars): if action[agent_id] is None: action[agent_id] = 2 self.action_dict.update({ agent_id: action[agent_id] + 1 }) # FIXME: Hack for ignoring action 0 (model only outputs 4) # Take actions, get observations next_obs, all_rewards, done, self.info = self._rail_env.step( self.action_dict) # Normalise observations for each agent for agent_id in range(self._rail_env.get_num_agents()): # Check if agent is finished if not done[agent_id]: # Normalise next observation next_obs[agent_id] = normalize_observation( tree=next_obs[agent_id], max_depth=self.n_nodes, observation_radius=self.ob_radius, feats=self.feats) # Keep track of last observation for trains that finish self.old_obs[agent_id] = next_obs[agent_id].copy() else: # Use last observation if agent finished next_obs[agent_id] = self.old_obs[agent_id] return next_obs, all_rewards, done, self.info def reset(self): """ Reset the state of the environment and returns an initial observation. return obs: initial observation of the space """ self.action_dict = dict() self.info = dict() self.old_obs = dict() obs, self.info = self._rail_env.reset(True, True) for agent_id in range(self.n_cars): if obs[agent_id]: obs[agent_id] = normalize_observation(obs[agent_id], self.n_nodes, self.ob_radius, feats=self.feats) self.renderer.reset() return obs, self.info def render(self, mode=None): self.renderer.render_env() image = self.renderer.get_image() cv2.imshow('Render', image) cv2.waitKey(20)
for idx in range(env.get_num_agents()): tmp_agent = env.agents[idx] tmp_agent.speed_data["speed"] = 1 / (idx + 1) env_renderer.reset() # Here you can also further enhance the provided observation by means of normalization # See training navigation example in the baseline repository images = [] score = 0 # Run episode for step in range(1000): # Chose an action for each agent in the environment action_dict = controller.act(observation=obs) # Environment step which returns the observations for all agents, their corresponding # reward and whether their are done next_obs, all_rewards, done, _ = env.step(action_dict) env_renderer.render_env(show=False, show_observations=False, show_predictions=False) images.append(env_renderer.get_image()) score += sum(all_rewards) # Update replay buffer and train agent controller.env_reaction(obs, action_dict, all_rewards, next_obs, done) obs = next_obs.copy() if done['__all__']: break controller.end_of_round() print('Episode Nr. {}\t Score = {}'.format(trial, score)) log_video(images, trial)
from flatland.envs.agent_utils import RailAgentStatus from flatland.utils.rendertools import RenderTool import PIL observation_tree_depth = 2 observation_radius = 10 observation_max_path_depth = 30 predictor = ShortestPathPredictorForRailEnv(observation_max_path_depth) tree_observation = TreeObsForRailEnv(max_depth=observation_tree_depth, predictor=predictor) env_file = "D:/Sudhish/FYP/Final-Year-Project-main/Sudhish/envs-100-999/envs/Level_100.pkl" env = RailEnv( width=1, height=1, rail_generator=rail_from_file(env_file), schedule_generator=schedule_from_file(env_file), malfunction_generator_and_process_data=malfunction_from_file(env_file), obs_builder_object=tree_observation) obs, info = env.reset(True, True) env_renderer = RenderTool(env, gl="PILSVG") env_renderer.render_env() image = env_renderer.get_image() pil_image = PIL.Image.fromarray(image) pil_image.show() print("Env Loaded")
class FlatlandMultiAgentEnv(MultiAgentEnv): """ Wrap a flatland RailEnv as an Rllib MultiAgentEnv. width, height, number_of_agents: int remove_agents_at_target: bool """ def __init__(self, width, height, rail_generator, number_of_agents, remove_agents_at_target, obs_builder_object, wait_for_all_done, schedule_generator=random_schedule_generator(), name=None): super().__init__() self.env = RailEnv( width=width, height=height, rail_generator=rail_generator, schedule_generator=schedule_generator, number_of_agents=number_of_agents, obs_builder_object=obs_builder_object, remove_agents_at_target=remove_agents_at_target, ) self.wait_for_all_done = wait_for_all_done self.env_renderer = None self.agents_done = [] self.frame_step = 0 self.name = name self.number_of_agents = number_of_agents # Track when targets are reached. Ony used for correct reward propagation # when using wait_for_all_done=True self.at_target = dict( zip(list(np.arange(self.number_of_agents)), [False for _ in range(self.number_of_agents)])) def _running_agents(self): """ Return IDs of the agents that are not done """ agents = range(len(self.env.agents)) return (i for i in agents if i not in self.agents_done) def _agents_not_at_target(self): """ Return the number of agents that are not at their targets. Used when wait_for_all_done=True """ return max(1, list(self.at_target.values()).count(False)) def step(self, action_dict): """ Env step for each agent, like a gym.step() call The action_dict object is a dict with str or int keys corresponding to agent IDs E.g: {'0': ..., '1': ..., ...} or {0: ..., 1: ..., ...} Return a dict with keys: "observations" "rewards" "dones" "infos" """ obs, rewards, dones, infos = self.env.step(action_dict) o, r, d, i = {}, {}, {}, {} for agent in self._running_agents(): o[agent] = obs[agent] r[agent] = rewards[agent] / self._agents_not_at_target() i[agent] = infos if self.wait_for_all_done: dones, r, i = self._process_all_done(agent, dones, r, i) d[agent] = dones[agent] d["__all__"] = dones["__all__"] for agent, done in dones.items(): if agent != "__all__" and done: self.agents_done.append(agent) self.frame_step += 1 return o, r, d, i def reset(self): """ Return a dict {agent_id: agent_obs, ...} """ self.agents_done = [] obs, _ = self.env.reset() if self.env_renderer: self.env_renderer.set_new_rail() return obs def render(self, **kwargs): from flatland.utils.rendertools import RenderTool if not self.env_renderer: self.env_renderer = RenderTool(self.env, gl="PILSVG") self.env_renderer.set_new_rail() self.env_renderer.render_env(show=True, frames=False, show_observations=False, **kwargs) time.sleep(0.1) self.env_renderer.render_env(show=True, frames=False, show_observations=False, **kwargs) return self.env_renderer.get_image() def _process_all_done(self, agent, dones, r, i): # Do not count target reward more than once if self.at_target[agent]: r[agent] = 0.0 # If agent is done, and the group is not done, and agent has # not previously reached the target if dones[agent] and not dones['__all__']: self.at_target[agent] = True # Ensure each individual agent is only marked 'done' when all are done for a in list(dones.keys()): dones[a] = dones['__all__'] return dones, r, i @property def action_space(self): return Discrete(5) @property def observation_space(self): size, pow4 = 0, 1 for _ in range(self.env.obs_builder.max_depth + 1): size += pow4 pow4 *= 4 observation_size = size * self.env.obs_builder.observation_dim return Box(-np.inf, np.inf, shape=(observation_size, ))