def reset(self, regenerate_rail=True, regenerate_schedule=True, activate_agents=False, random_seed=None): ''' Reset the environment ''' # Get a random seed if random_seed: self._seed(random_seed) # Regenerate the rail, if necessary optionals = {} if regenerate_rail or self.rail is None: rail, optionals = self._generate_rail() self.rail = rail self.height, self.width = self.rail.grid.shape self.obs_builder.set_env(self) # Set the distance map if optionals and 'distance_map' in optionals: self.distance_map.set(optionals['distance_map']) # Regenerate the schedule, if necessary if regenerate_schedule or regenerate_rail or self.get_num_agents( ) == 0: agents_hints = None if optionals and 'agents_hints' in optionals: agents_hints = optionals['agents_hints'] schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets, self.np_random) self.agents = EnvAgent.from_schedule(schedule) self._max_episode_steps = schedule.max_episode_steps # Reset agents positions self.agent_positions = np.full((self.height, self.width), -1, dtype=int) self.reset_agents() for i, agent in enumerate(self.agents): if activate_agents: self.set_agent_active(agent) self._break_agent(agent) if agent.malfunction_data["malfunction"] > 0: agent.speed_data[ 'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING self._fix_agent_after_malfunction(agent) # Reset partial rewards self.partial_rewards[i] = 0.0 # Reset common variables self.num_resets += 1 self._elapsed_steps = 0 self.dones = dict.fromkeys( list(range(self.get_num_agents())) + ["__all__"], False) self.arrived_turns = [None] * self.get_num_agents() self.stop_actions = [0] * self.get_num_agents() # Build the cell orientation graph self.railway_encoding = CellOrientationGraph(grid=self.rail.grid, agents=self.agents) # Reset the state of the observation builder with the new environment self.obs_builder.reset() self.distance_map.reset(self.agents, self.rail) # Reset the malfunction generator if "generate" in dir(self.malfunction_generator): self.malfunction_generator.generate(reset=True) else: self.malfunction_generator(reset=True) # Empty the episode store of agent positions self.cur_episode = [] # Compute deadlocks self.deadlocks_detector.reset(self.get_num_agents()) # Build the info dict self.current_info = { 'action_required': {}, 'malfunction': {}, 'speed': {}, 'status': {}, 'deadlocks': {}, 'deadlock_turns': {}, 'finished': {}, 'first_time_deadlock': {}, 'first_time_finished': {} } for i, agent in enumerate(self.agents): self.current_info['action_required'][i] = self.action_required( agent) self.current_info['malfunction'][i] = agent.malfunction_data[ 'malfunction'] self.current_info['speed'][i] = agent.speed_data['speed'] self.current_info['status'][i] = agent.status self.current_info["deadlocks"][ i] = self.deadlocks_detector.deadlocks[i] self.current_info["deadlock_turns"][ i] = self.deadlocks_detector.deadlock_turns[i] self.current_info["finished"][ i] = self.dones[i] or self.deadlocks_detector.deadlocks[i] self.current_info["first_time_deadlock"][i] = ( self.deadlocks_detector.deadlocks[i] and 0 == self.deadlocks_detector.deadlock_turns[i]) self.current_info["first_time_finished"][i] = ( self.dones[i] and 0 == self.arrived_turns[i]) # Return the new observation vectors for each agent observation_dict = self._get_observations() return (self._normalize_obs(observation_dict), self.current_info)
def reset(self, regenerate_rail: bool = True, regenerate_schedule: bool = True, activate_agents: bool = False, random_seed: bool = None) -> (Dict, Dict): """ reset(regenerate_rail, regenerate_schedule, activate_agents, random_seed) The method resets the rail environment Parameters ---------- regenerate_rail : bool, optional regenerate the rails regenerate_schedule : bool, optional regenerate the schedule and the static agents activate_agents : bool, optional activate the agents random_seed : bool, optional random seed for environment Returns ------- observation_dict: Dict Dictionary with an observation for each agent info_dict: Dict with agent specific information """ if random_seed: self._seed(random_seed) optionals = {} if regenerate_rail or self.rail is None: rail, optionals = self.rail_generator(self.width, self.height, self.number_of_agents, self.num_resets, self.np_random) self.rail = rail self.height, self.width = self.rail.grid.shape # Do a new set_env call on the obs_builder to ensure # that obs_builder specific instantiations are made according to the # specifications of the current environment : like width, height, etc self.obs_builder.set_env(self) if optionals and 'distance_map' in optionals: self.distance_map.set(optionals['distance_map']) if regenerate_schedule or regenerate_rail or self.get_num_agents( ) == 0: agents_hints = None if optionals and 'agents_hints' in optionals: agents_hints = optionals['agents_hints'] schedule = self.schedule_generator(self.rail, self.number_of_agents, agents_hints, self.num_resets, self.np_random) self.agents = EnvAgent.from_schedule(schedule) if agents_hints and 'city_orientations' in agents_hints: ratio_nr_agents_to_nr_cities = self.get_num_agents() / len( agents_hints['city_orientations']) self._max_episode_steps = self.compute_max_episode_steps( width=self.width, height=self.height, ratio_nr_agents_to_nr_cities=ratio_nr_agents_to_nr_cities) else: self._max_episode_steps = self.compute_max_episode_steps( width=self.width, height=self.height) self.agent_positions = np.zeros( (self.height, self.width), dtype=int) - 1 # Reset agents to initial self.reset_agents() for agent in self.agents: # Induce malfunctions if activate_agents: self.set_agent_active(agent) self._break_agent(agent) if agent.malfunction_data["malfunction"] > 0: agent.speed_data[ 'transition_action_on_cellexit'] = RailEnvActions.DO_NOTHING # Fix agents that finished their malfunction self._fix_agent_after_malfunction(agent) self.num_resets += 1 self._elapsed_steps = 0 # TODO perhaps dones should be part of each agent. self.dones = dict.fromkeys( list(range(self.get_num_agents())) + ["__all__"], False) # Reset the state of the observation builder with the new environment self.obs_builder.reset() self.distance_map.reset(self.agents, self.rail) # Reset the malfunction generator self.malfunction_generator(reset=True) info_dict: Dict = { 'action_required': { i: self.action_required(agent) for i, agent in enumerate(self.agents) }, 'malfunction': { i: agent.malfunction_data['malfunction'] for i, agent in enumerate(self.agents) }, 'speed': { i: agent.speed_data['speed'] for i, agent in enumerate(self.agents) }, 'status': {i: agent.status for i, agent in enumerate(self.agents)} } # Return the new observation vectors for each agent observation_dict: Dict = self._get_observations() return observation_dict, info_dict