def render(self, mode='human'): """ Render the environment. Create a viewer if none exists, and use it to render an image. :param mode: the rendering mode """ self.rendering_mode = mode if self.viewer is None: self.viewer = EnvViewer(self, offscreen=self.offscreen) self.enable_auto_render = not self.offscreen # If the frame has already been rendered, do nothing if self.should_update_rendering: self.viewer.display() if mode == 'rgb_array': image = self.viewer.get_image() if not self.viewer.offscreen: self.viewer.handle_events() self.viewer.handle_events() return image elif mode == 'human': if not self.viewer.offscreen: self.viewer.handle_events() self.should_update_rendering = False
def __init__(self, env: 'AbstractEnv', observation_shape: Tuple[int, int], stack_size: int, weights: List[float], scaling: Optional[float] = None, centering_position: Optional[List[float]] = None, **kwargs) -> None: super().__init__(env) self.observation_shape = observation_shape self.shape = (stack_size, ) + self.observation_shape self.weights = weights self.obs = np.zeros(self.shape) # The viewer configuration can be different between this observation and env.render() (typically smaller) viewer_config = env.config.copy() viewer_config.update({ "offscreen_rendering": True, "screen_width": self.observation_shape[0], "screen_height": self.observation_shape[1], "scaling": scaling or viewer_config["scaling"], "centering_position": centering_position or viewer_config["centering_position"] }) self.viewer = EnvViewer(env, config=viewer_config)
class GrayscaleObservation(ObservationType): """ An observation class that collects directly what the simulator renders. Also stacks the collected frames as in the nature DQN. The observation shape is C x W x H. Specific keys are expected in the configuration dictionary passed. Example of observation dictionary in the environment config: observation": { "type": "GrayscaleObservation", "observation_shape": (84, 84) "stack_size": 4, "weights": [0.2989, 0.5870, 0.1140], # weights for RGB conversion, } """ def __init__(self, env: 'AbstractEnv', observation_shape: Tuple[int, int], stack_size: int, weights: List[float], scaling: Optional[float] = None, centering_position: Optional[List[float]] = None, **kwargs) -> None: super().__init__(env) self.observation_shape = observation_shape self.shape = (stack_size, ) + self.observation_shape self.weights = weights self.obs = np.zeros(self.shape) # The viewer configuration can be different between this observation and env.render() (typically smaller) viewer_config = env.config.copy() viewer_config.update({ "offscreen_rendering": True, "screen_width": self.observation_shape[0], "screen_height": self.observation_shape[1], "scaling": scaling or viewer_config["scaling"], "centering_position": centering_position or viewer_config["centering_position"] }) self.viewer = EnvViewer(env, config=viewer_config) def space(self) -> spaces.Space: return spaces.Box(shape=self.shape, low=0, high=255, dtype=np.uint8) def observe(self) -> np.ndarray: new_obs = self._render_to_grayscale() self.obs = np.roll(self.obs, -1, axis=0) self.obs[-1, :, :] = new_obs return self.obs def _render_to_grayscale(self) -> np.ndarray: # TODO: center rendering on the observer vehicle self.viewer.display() raw_rgb = self.viewer.get_image() # H x W x C raw_rgb = np.moveaxis(raw_rgb, 0, 1) return np.dot(raw_rgb[..., :3], self.weights).clip(0, 255).astype(np.uint8)
def render(self, mode: str = 'human') -> Optional[np.ndarray]: """ Render the environment. Create a viewer if none exists, and use it to render an image. :param mode: the rendering mode """ self.rendering_mode = mode if self.viewer is None: self.viewer = EnvViewer(self) self.enable_auto_render = True self.viewer.display() if not self.viewer.offscreen: self.viewer.handle_events() if mode == 'rgb_array': image = self.viewer.get_image() return image
class AbstractEnv(gym.Env): """ A generic environment for various tasks involving a vehicle driving on a road. The environment contains a road populated with vehicles, and a controlled ego-vehicle that can change lane and speed. The action space is fixed, but the observation space and reward function must be defined in the environment implementations. """ observation_type: ObservationType action_type: ActionType automatic_rendering_callback: Optional[Callable] metadata = {'render.modes': ['human', 'rgb_array']} PERCEPTION_DISTANCE = 6.0 * MDPVehicle.SPEED_MAX """The maximum distance of any vehicle present in the observation [m]""" def __init__(self, config: dict = None) -> None: # Configuration self.config = self.default_config() if config: self.config.update(config) # Seeding self.np_random = None self.seed() # Scene self.road = None self.controlled_vehicles = [] # Spaces self.action_type = None self.action_space = None self.observation_type = None self.observation_space = None self.define_spaces() # Running self.time = 0 # Simulation time self.steps = 0 # Actions performed self.done = False # Rendering self.viewer = None self.automatic_rendering_callback = None self.should_update_rendering = True self.rendering_mode = 'human' self.enable_auto_render = False self.reset() @property def vehicle(self) -> Vehicle: """First (default) controlled vehicle.""" return self.controlled_vehicles[0] if self.controlled_vehicles else None @vehicle.setter def vehicle(self, vehicle: Vehicle) -> None: """Set a unique controlled vehicle.""" self.controlled_vehicles = [vehicle] @classmethod def default_config(cls) -> dict: """ Default environment configuration. Can be overloaded in environment implementations, or by calling configure(). :return: a configuration dict """ return { "observation": { "type": "TimeToCollision" }, "action": { "type": "DiscreteMetaAction" }, "simulation_frequency": 15, # [Hz] "policy_frequency": 1, # [Hz] "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", "screen_width": 600, # [px] "screen_height": 150, # [px] "centering_position": [0.3, 0.5], "scaling": 5.5, "show_trajectories": False, "render_agent": True, "offscreen_rendering": os.environ.get("OFFSCREEN_RENDERING", "0") == "1", "manual_control": False, "real_time_rendering": False } def seed(self, seed: int = None) -> List[int]: self.np_random, seed = seeding.np_random(seed) return [seed] def configure(self, config: dict) -> None: if config: self.config.update(config) def define_spaces(self) -> None: """ Set the types and spaces of observation and action from config. """ self.observation_type = observation_factory(self, self.config["observation"]) self.action_type = action_factory(self, self.config["action"]) self.observation_space = self.observation_type.space() self.action_space = self.action_type.space() def _reward(self, action: Action) -> float: """ Return the reward associated with performing a given action and ending up in the current state. :param action: the last action performed :return: the reward """ raise NotImplementedError def _is_terminal(self) -> bool: """ Check whether the current state is a terminal state :return:is the state terminal """ raise NotImplementedError def _info(self, obs: Observation, action: Action) -> dict: """ Return a dictionary of additional information :param obs: current observation :param action: current action :return: info dict """ info = { "speed": self.vehicle.speed, "crashed": self.vehicle.crashed, "action": action, "time_limit_reached": (self.steps >= self.config["duration"] * self.config["policy_frequency"]) } try: info["cost"] = self._cost(action) except NotImplementedError: pass return info def _cost(self, action: Action) -> float: """ A constraint metric, for budgeted MDP. If a constraint is defined, it must be used with an alternate reward that doesn't contain it as a penalty. :param action: the last action performed :return: the constraint signal, the alternate (constraint-free) reward """ raise NotImplementedError def reset(self) -> Observation: """ Reset the environment to it's initial configuration :return: the observation of the reset state """ self.define_spaces( ) # First, to set the controlled vehicle class depending on action space self.time = self.steps = 0 self.done = False self.should_update_rendering = True self._reset() self.define_spaces( ) # Second, to link the obs and actions to the vehicles once the scene is created return self.observation_type.observe() def _reset(self) -> None: """ Reset the scene: roads and vehicles. This method must be overloaded by the environments. """ raise NotImplementedError() def step(self, action: Action) -> Tuple[Observation, float, bool, dict]: """ Perform an action and step the environment dynamics. The action is executed by the ego-vehicle, and all other vehicles on the road performs their default behaviour for several simulation timesteps until the next decision making step. :param action: the action performed by the ego-vehicle :return: a tuple (observation, reward, terminal, info) """ if self.road is None or self.vehicle is None: raise NotImplementedError( "The road and vehicle must be initialized in the environment implementation" ) self.steps += 1 self._simulate(action) obs = self.observation_type.observe() reward = self._reward(action) terminal = self._is_terminal() info = self._info(obs, action) return obs, reward, terminal, info def _simulate(self, action: Optional[Action] = None) -> None: """Perform several steps of simulation with constant action.""" for _ in range( int(self.config["simulation_frequency"] // self.config["policy_frequency"])): # Forward action to the vehicle if action is not None \ and not self.config["manual_control"] \ and self.time % int(self.config["simulation_frequency"] // self.config["policy_frequency"]) == 0: self.action_type.act(action) self.road.act() self.road.step(1 / self.config["simulation_frequency"]) self.time += 1 # Automatically render intermediate simulation steps if a viewer has been launched # Ignored if the rendering is done offscreen self._automatic_rendering() self.enable_auto_render = False def render(self, mode: str = 'human') -> Optional[np.ndarray]: """ Render the environment. Create a viewer if none exists, and use it to render an image. :param mode: the rendering mode """ self.rendering_mode = mode if self.viewer is None: self.viewer = EnvViewer(self) self.enable_auto_render = True # If the frame has already been rendered, do nothing if self.should_update_rendering: self.viewer.display() if not self.viewer.offscreen: self.viewer.handle_events() if mode == 'rgb_array': image = self.viewer.get_image() return image self.should_update_rendering = False def close(self) -> None: """ Close the environment. Will close the environment viewer if it exists. """ self.done = True if self.viewer is not None: self.viewer.close() self.viewer = None def get_available_actions(self) -> List[int]: """ Get the list of currently available actions. Lane changes are not available on the boundary of the road, and speed changes are not available at maximal or minimal speed. :return: the list of available actions """ if not isinstance(self.action_type, DiscreteMetaAction): raise ValueError("Only discrete meta-actions can be unavailable.") actions = [self.action_type.actions_indexes['IDLE']] for l_index in self.road.network.side_lanes(self.vehicle.lane_index): if l_index[2] < self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position) \ and self.action_type.lateral: actions.append(self.action_type.actions_indexes['LANE_LEFT']) if l_index[2] > self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position) \ and self.action_type.lateral: actions.append(self.action_type.actions_indexes['LANE_RIGHT']) if self.vehicle.speed_index < self.vehicle.SPEED_COUNT - 1 and self.action_type.longitudinal: actions.append(self.action_type.actions_indexes['FASTER']) if self.vehicle.speed_index > 0 and self.action_type.longitudinal: actions.append(self.action_type.actions_indexes['SLOWER']) return actions def _automatic_rendering(self) -> None: """ Automatically render the intermediate frames while an action is still ongoing. This allows to render the whole video and not only single steps corresponding to agent decision-making. If a callback has been set, use it to perform the rendering. This is useful for the environment wrappers such as video-recording monitor that need to access these intermediate renderings. """ if self.viewer is not None and self.enable_auto_render: self.should_update_rendering = True if self.automatic_rendering_callback is not None: self.automatic_rendering_callback() else: self.render(self.rendering_mode) def simplify(self) -> 'AbstractEnv': """ Return a simplified copy of the environment where distant vehicles have been removed from the road. This is meant to lower the policy computational load while preserving the optimal actions set. :return: a simplified environment state """ state_copy = copy.deepcopy(self) state_copy.road.vehicles = [state_copy.vehicle ] + state_copy.road.close_vehicles_to( state_copy.vehicle, self.PERCEPTION_DISTANCE) return state_copy def change_vehicles(self, vehicle_class_path: str) -> 'AbstractEnv': """ Change the type of all vehicles on the road :param vehicle_class_path: The path of the class of behavior for other vehicles Example: "highway_env.vehicle.behavior.IDMVehicle" :return: a new environment with modified behavior model for other vehicles """ vehicle_class = utils.class_from_path(vehicle_class_path) env_copy = copy.deepcopy(self) vehicles = env_copy.road.vehicles for i, v in enumerate(vehicles): if v is not env_copy.vehicle: vehicles[i] = vehicle_class.create_from(v) return env_copy def set_preferred_lane(self, preferred_lane: int = None) -> 'AbstractEnv': env_copy = copy.deepcopy(self) if preferred_lane: for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.route = [(lane[0], lane[1], preferred_lane) for lane in v.route] # Vehicle with lane preference are also less cautious v.LANE_CHANGE_MAX_BRAKING_IMPOSED = 1000 return env_copy def set_route_at_intersection(self, _to: str) -> 'AbstractEnv': env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.set_route_at_intersection(_to) return env_copy def set_vehicle_field(self, args: Tuple[str, object]) -> 'AbstractEnv': field, value = args env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if v is not self.vehicle: setattr(v, field, value) return env_copy def call_vehicle_method(self, args: Tuple[str, Tuple[object]]) -> 'AbstractEnv': method, method_args = args env_copy = copy.deepcopy(self) for i, v in enumerate(env_copy.road.vehicles): if hasattr(v, method): env_copy.road.vehicles[i] = getattr(v, method)(*method_args) return env_copy def randomize_behaviour(self) -> 'AbstractEnv': env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.randomize_behavior() return env_copy def to_finite_mdp(self): return finite_mdp(self, time_quantization=1 / self.config["policy_frequency"]) def __deepcopy__(self, memo): """Perform a deep copy but without copying the environment viewer.""" cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result for k, v in self.__dict__.items(): if k not in ['viewer', 'automatic_rendering_callback']: setattr(result, k, copy.deepcopy(v, memo)) else: setattr(result, k, None) return result
class AbstractEnv(gym.Env): """ A generic environment for various tasks involving a vehicle driving on a road. The environment contains a road populated with vehicles, and a controlled ego-vehicle that can change lane and velocity. The action space is fixed, but the observation space and reward function must be defined in the environment implementations. """ metadata = {'render.modes': ['human', 'rgb_array']} ACTIONS = { 0: 'LANE_LEFT', 1: 'IDLE', 2: 'LANE_RIGHT', 3: 'FASTER', 4: 'SLOWER' } """ A mapping of action indexes to action labels """ ACTIONS_INDEXES = {v: k for k, v in ACTIONS.items()} """ A mapping of action labels to action indexes """ SIMULATION_FREQUENCY = 15 """ The frequency at which the system dynamics are simulated [Hz] """ PERCEPTION_DISTANCE = 5.0 * MDPVehicle.SPEED_MAX """ The maximum distance of any vehicle present in the observation [m] """ DEFAULT_CONFIG = { "observation": { "type": "TimeToCollision" }, "policy_frequency": 1, # [Hz] "screen_width": 600, "screen_height": 150 } def __init__(self, config=None): # Configuration self.config = config if not self.config: self.config = self.DEFAULT_CONFIG.copy() # Seeding self.np_random = None self.seed() # Scene self.road = None self.vehicle = None # Spaces self.observation = None self.define_spaces() # Running self.time = 0 self.done = False # Rendering self.viewer = None self.automatic_rendering_callback = None self.should_update_rendering = True self.rendering_mode = 'human' self.offscreen = self.config.get("offscreen_rendering", False) self.enable_auto_render = False def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) return [seed] def configure(self, config): if config: self.config.update(config) def define_spaces(self): self.action_space = spaces.Discrete(len(self.ACTIONS)) if "observation" not in self.config: raise ValueError("The observation configuration must be defined") self.observation = observation_factory(self, self.config["observation"]) self.observation_space = self.observation.space() def _reward(self, action): """ Return the reward associated with performing a given action and ending up in the current state. :param action: the last action performed :return: the reward """ raise NotImplementedError() def _is_terminal(self): """ Check whether the current state is a terminal state :return:is the state terminal """ raise NotImplementedError() def _cost(self, action): """ A constraint metric, for budgeted MDP. If a constraint is defined, it must be used with an alternate reward that doesn't contain it as a penalty. :param action: the last action performed :return: the constraint signal, the alternate (constraint-free) reward """ return None, self._reward(action) def reset(self): """ Reset the environment to it's initial configuration :return: the observation of the reset state """ self.time = 0 self.done = False self.define_spaces() return self.observation.observe() def step(self, action): """ Perform an action and step the environment dynamics. The action is executed by the ego-vehicle, and all other vehicles on the road performs their default behaviour for several simulation timesteps until the next decision making step. :param int action: the action performed by the ego-vehicle :return: a tuple (observation, reward, terminal, info) """ if self.road is None or self.vehicle is None: raise NotImplementedError( "The road and vehicle must be initialized in the environment implementation" ) self._simulate(action) obs = self.observation.observe() reward = self._reward(action) terminal = self._is_terminal() cost = self._cost(action) info = {'cost': cost, "c_": cost} return obs, reward, terminal, info def _simulate(self, action=None): """ Perform several steps of simulation with constant action """ for k in range( int(self.SIMULATION_FREQUENCY // self.config["policy_frequency"])): if action is not None and \ self.time % int(self.SIMULATION_FREQUENCY // self.config["policy_frequency"]) == 0: # Forward action to the vehicle self.vehicle.act(self.ACTIONS[action]) self.road.act() self.road.step(1 / self.SIMULATION_FREQUENCY) self.time += 1 # Automatically render intermediate simulation steps if a viewer has been launched # Ignored if the rendering is done offscreen self._automatic_rendering() # Stop at terminal states if self.done or self._is_terminal(): break self.enable_auto_render = False def render(self, mode='human'): """ Render the environment. Create a viewer if none exists, and use it to render an image. :param mode: the rendering mode """ self.rendering_mode = mode if self.viewer is None: self.viewer = EnvViewer(self, offscreen=self.offscreen) self.enable_auto_render = not self.offscreen # If the frame has already been rendered, do nothing if self.should_update_rendering: self.viewer.display() if mode == 'rgb_array': image = self.viewer.get_image() if not self.viewer.offscreen: self.viewer.handle_events() self.viewer.handle_events() return image elif mode == 'human': if not self.viewer.offscreen: self.viewer.handle_events() self.should_update_rendering = False def close(self): """ Close the environment. Will close the environment viewer if it exists. """ self.done = True if self.viewer is not None: self.viewer.close() self.viewer = None def get_available_actions(self): """ Get the list of currently available actions. Lane changes are not available on the boundary of the road, and velocity changes are not available at maximal or minimal velocity. :return: the list of available actions """ actions = [self.ACTIONS_INDEXES['IDLE']] for l_index in self.road.network.side_lanes(self.vehicle.lane_index): if l_index[2] < self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position): actions.append(self.ACTIONS_INDEXES['LANE_LEFT']) if l_index[2] > self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position): actions.append(self.ACTIONS_INDEXES['LANE_RIGHT']) if self.vehicle.velocity_index < self.vehicle.SPEED_COUNT - 1: actions.append(self.ACTIONS_INDEXES['FASTER']) if self.vehicle.velocity_index > 0: actions.append(self.ACTIONS_INDEXES['SLOWER']) return actions def _automatic_rendering(self): """ Automatically render the intermediate frames while an action is still ongoing. This allows to render the whole video and not only single steps corresponding to agent decision-making. If a callback has been set, use it to perform the rendering. This is useful for the environment wrappers such as video-recording monitor that need to access these intermediate renderings. """ if self.viewer is not None and self.enable_auto_render: self.should_update_rendering = True if self.automatic_rendering_callback: self.automatic_rendering_callback() else: self.render(self.rendering_mode) def simplify(self): """ Return a simplified copy of the environment where distant vehicles have been removed from the road. This is meant to lower the policy computational load while preserving the optimal actions set. :return: a simplified environment state """ state_copy = copy.deepcopy(self) state_copy.road.vehicles = [state_copy.vehicle ] + state_copy.road.close_vehicles_to( state_copy.vehicle, self.PERCEPTION_DISTANCE) return state_copy def change_vehicles(self, vehicle_class_path): """ Change the type of all vehicles on the road :param vehicle_class_path: The path of the class of behavior for other vehicles Example: "highway_env.vehicle.behavior.IDMVehicle" :return: a new environment with modified behavior model for other vehicles """ vehicle_class = utils.class_from_path(vehicle_class_path) env_copy = copy.deepcopy(self) vehicles = env_copy.road.vehicles for i, v in enumerate(vehicles): if v is not env_copy.vehicle and not isinstance(v, Obstacle): vehicles[i] = vehicle_class.create_from(v) return env_copy def set_preferred_lane(self, preferred_lane=None): env_copy = copy.deepcopy(self) if preferred_lane: for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): raise NotImplementedError() # Vehicle with lane preference are also less cautious v.LANE_CHANGE_MAX_BRAKING_IMPOSED = 1000 return env_copy def set_route_at_intersection(self, _to): env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.set_route_at_intersection(_to) return env_copy def randomize_behaviour(self): env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.randomize_behavior() return env_copy def to_finite_mdp(self): return finite_mdp(self, time_quantization=1 / self.config["policy_frequency"]) def __deepcopy__(self, memo): """ Perform a deep copy but without copying the environment viewer. """ cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result for k, v in self.__dict__.items(): if k not in ['viewer', 'automatic_rendering_callback']: setattr(result, k, copy.deepcopy(v, memo)) else: setattr(result, k, None) return result
class AbstractEnv(gym.Env): """ A generic environment for various tasks involving a vehicle driving on a road. The environment contains a road populated with vehicles, and a controlled ego-vehicle that can change lane and speed. The action space is fixed, but the observation space and reward function must be defined in the environment implementations. """ observation_type: ObservationType action_type: ActionType automatic_rendering_callback: Optional[Callable] metadata = {'render.modes': ['human', 'rgb_array']} PERCEPTION_DISTANCE = 6.0 * MDPVehicle.SPEED_MAX """The maximum distance of any vehicle present in the observation [m]""" FEATURES: List[str] = ['presence', 'x', 'y', 'vx', 'vy'] def __init__(self, config: dict = None) -> None: # Configuration self.config = self.default_config() if config: self.config.update(config) self.features = self.FEATURES # Seeding self.np_random = None self.seed() # Scene self.road = None self.controlled_vehicles = [] # Spaces self.action_type = None self.action_space = None self.observation_type = None self.observation_space = None self.define_spaces() # Running self.time = 0 # Simulation time self.steps = 0 # Actions performed self.done = False # Rendering self.viewer = None self.automatic_rendering_callback = None self.should_update_rendering = True self.rendering_mode = 'human' self.enable_auto_render = False self.record_past = deque(maxlen=5) self.store_img = False self.numimg = 0 self.reset() @property def vehicle(self) -> Vehicle: """First (default) controlled vehicle.""" return self.controlled_vehicles[0] if self.controlled_vehicles else None @vehicle.setter def vehicle(self, vehicle: Vehicle) -> None: """Set a unique controlled vehicle.""" self.controlled_vehicles = [vehicle] @classmethod def default_config(cls) -> dict: """ Default environment configuration. Can be overloaded in environment implementations, or by calling configure(). :return: a configuration dict """ return { "observation": { "type": "TimeToCollision" }, "action": { "type": "DiscreteMetaAction" }, "simulation_frequency": 15, # [Hz] "policy_frequency": 1, # [Hz] "other_vehicles_type": "highway_env.vehicle.behavior.IDMVehicle", "screen_width": 600, # [px] "screen_height": 150, # [px] "centering_position": [0.3, 0.5], "scaling": 5.5, "show_trajectories": False, "render_agent": True, "offscreen_rendering": os.environ.get("OFFSCREEN_RENDERING", "0") == "1", "manual_control": False, "real_time_rendering": False } def seed(self, seed: int = None) -> List[int]: self.np_random, seed = seeding.np_random(seed) return [seed] def configure(self, config: dict) -> None: if config: self.config.update(config) def define_spaces(self) -> None: """ Set the types and spaces of observation and action from config. """ self.observation_type = observation_factory(self, self.config["observation"]) self.action_type = action_factory(self, self.config["action"]) self.observation_space = self.observation_type.space() self.action_space = self.action_type.space() def _reward(self, action: Action) -> float: """ Return the reward associated with performing a given action and ending up in the current state. :param action: the last action performed :return: the reward """ raise NotImplementedError def _is_terminal(self) -> bool: """ Check whether the current state is a terminal state :return:is the state terminal """ raise NotImplementedError def _cost(self, action: Action) -> float: """ A constraint metric, for budgeted MDP. If a constraint is defined, it must be used with an alternate reward that doesn't contain it as a penalty. :param action: the last action performed :return: the constraint signal, the alternate (constraint-free) reward """ raise NotImplementedError def reset(self) -> Observation: """ Reset the environment to it's initial configuration :return: the observation of the reset state """ self.define_spaces( ) # First, to set the controlled vehicle class depending on action space self.time = self.steps = 0 self.done = False self._reset() self.define_spaces() deeproad = copy.deepcopy(self.road) futurePos = self._simulateFuture(deeproad).reshape(100) pastPos = np.zeros((112, 112, 5)).reshape(5, 112, 112) obs = self.observation_type.observe().reshape(6, 5, 1) # Second, to link the obs and actions to the vehicles once the scene is created return obs, futurePos, pastPos def _reset(self) -> None: """ Reset the scene: roads and vehicles. This method must be overloaded by the environments. """ raise NotImplementedError() def step(self, action: Action) -> Tuple[Observation, float, bool, dict]: """ Perform an action and step the environment dynamics. The action is executed by the ego-vehicle, and all other vehicles on the road performs their default behaviour for several simulation timesteps until the next decision making step. :param action: the action performed by the ego-vehicle :return: a tuple (observation, reward, terminal, info) """ if self.road is None or self.vehicle is None: raise NotImplementedError( "The road and vehicle must be initialized in the environment implementation" ) self.steps += 1 self._simulate(action) pastPos = self._renderPastPosition().reshape(5, 112, 112) deeproad = copy.deepcopy(self.road) futurePos = self._simulateFuture(deeproad) obs = self.observation_type.observe().reshape(6, 5, -1) reward = self._reward(action) terminal = self._is_terminal() info = { "speed": self.vehicle.speed, "crashed": self.vehicle.crashed, "action": action, } try: info["cost"] = self._cost(action) except NotImplementedError: pass return obs, futurePos, pastPos, reward, terminal, info def _simulate(self, action: Optional[Action] = None) -> None: """Perform several steps of simulation with constant action.""" for i in range( int(self.config["simulation_frequency"] // self.config["policy_frequency"])): # Forward action to the vehicle if action is not None \ and not self.config["manual_control"] \ and self.time % int(self.config["simulation_frequency"] // self.config["policy_frequency"]) == 0: self.action_type.act(action) self.road.act() self.road.step(1 / self.config["simulation_frequency"]) if i % 3 == 0: self.record_past.append(copy.deepcopy(self.road.vehicles)) self.time += 1 # Automatically render intermediate simulation steps if a viewer has been launched # Ignored if the rendering is done offscreen self._automatic_rendering() # Stop at terminal states if self.done or self._is_terminal(): break self.enable_auto_render = False def _renderPastPosition(self): point_size = 1 point_color = (225, 225, 225) # BGR thickness = 4 savepath = "./vis/" pastPosition = self._getPastPosition() img_list = [] for tp in pastPosition: points_list = [] img = np.ones((224, 224, 3), np.uint8) * 0 for ip in tp: x_pos = int(round(ip[0])) + 112 y_pos = int(round(ip[1])) + 112 points_list.append((x_pos, y_pos)) for point in points_list: cv.circle(img, point, point_size, point_color, thickness) img_gray = cv.cvtColor(img, cv.COLOR_RGB2GRAY) img_new = cv.resize(img_gray, (112, 112)) if self.store_img: filename = savepath + str(self.numimg) + ".png" cv.imwrite(filename, img_new) self.numimg += 1 img_list.append(img_new) array_tuple = tuple(img_list) input_array = np.stack((array_tuple), axis=2) return input_array def _getClostestVehicles(self, vehicles_list): time_postion = [] ego_vehicles = pd.DataFrame.from_records( [self.road.vehicles[0].to_dict()])[self.features].values.copy() for vehicles in vehicles_list: vehiInfo = pd.DataFrame.from_records( [vehicles.to_dict()])[self.features].values.copy() relx = vehiInfo[0][1] - ego_vehicles[0][1] rely = vehiInfo[0][2] - ego_vehicles[0][2] if abs(relx) <= 112: time_postion.append([relx, rely]) return time_postion def _getPastPosition(self): pastPosition = [] for i in range(len(self.record_past)): pastPosition.append(self._getClostestVehicles(self.record_past[i])) return pastPosition def _simulateFuture(self, road): futurePosition = np.zeros((5, 20), dtype=float) ego_vehicles = pd.DataFrame.from_records( [road.vehicles[0].to_dict()])[self.features].values.copy() close_vehicles = road.close_vehicles_to(road.vehicles[0], self.PERCEPTION_DISTANCE, count=5, see_behind=True) for time in range(10): # Forward action to the vehicle road.act() road.step(0.2) empty = 5 - len(close_vehicles) for index, vehicles in enumerate(close_vehicles): if vehicles in road.vehicles: vehiInfo = pd.DataFrame.from_records( [vehicles.to_dict()])[self.features].values.copy() relx = vehiInfo[0][1] - ego_vehicles[0][1] rely = vehiInfo[0][2] - ego_vehicles[0][2] futurePosition[index, 2 * time] = relx futurePosition[index, 2 * time + 1] = rely else: futurePosition[index, 2 * time] = 0.0 futurePosition[index, 2 * time + 1] = 0.0 result_matrix = self.normalize_matrixTrajectory( self.road.vehicles[0], futurePosition) return np.array(futurePosition).reshape(100) def normalize_trajectory(self, relativePosition, ego_vehicles, road): """ Normalize the observation values. For now, assume that the road is straight along the x axis. :param Dataframe df: observation data """ side_lanes = road.network.all_side_lanes(ego_vehicles.lane_index) features_range = { "x": [-6.0 * MDPVehicle.SPEED_MAX, 6.0 * MDPVehicle.SPEED_MAX], "y": [ -AbstractLane.DEFAULT_WIDTH * len(side_lanes), AbstractLane.DEFAULT_WIDTH * len(side_lanes) ], } relativePosition[0] = utils.lmap( relativePosition[0], [features_range['x'][0], features_range['x'][1]], [-1, 1]) relativePosition[1] = utils.lmap( relativePosition[0], [features_range['y'][0], features_range['y'][1]], [-1, 1]) return relativePosition def normalize_matrixTrajectory(self, ego_vehicles, posMatrix): """ Normalize the observation values. For now, assume that the road is straight along the x axis. :param Dataframe df: observation data """ side_lanes = self.road.network.all_side_lanes(ego_vehicles.lane_index) normalizeMatirx = copy.deepcopy(posMatrix) for i in range(1, int(len(posMatrix[0]) / 2)): normalizeMatirx[:, 2 * i] = posMatrix[:, 2 * i] - posMatrix[:, 2 * (i - 1)] normalizeMatirx[:, 2 * i + 1] = posMatrix[:, 2 * i + 1] - posMatrix[:, 2 * (i - 1) + 1] for i in range(0, int(len(posMatrix[0]) / 2)): """normalize x axis""" if i == 0: normalizeMatirx[:, 0] = np.clip(normalizeMatirx[:, 0] / 112, -1, 1) else: normalizeMatirx[:,2 * i] = normalizeMatirx[:,2 * i] /\ (0.2 * MDPVehicle.MAX_SPEED) """normalize y axis""" normalizeMatirx[:, 2 * i + 1] = normalizeMatirx[:, 2 * i + 1] / ( AbstractLane.DEFAULT_WIDTH * len(side_lanes)) return normalizeMatirx def render(self, mode: str = 'human') -> Optional[np.ndarray]: """ Render the environment. Create a viewer if none exists, and use it to render an image. :param mode: the rendering mode """ self.rendering_mode = mode if self.viewer is None: self.viewer = EnvViewer(self) self.enable_auto_render = True # If the frame has already been rendered, do nothing if self.should_update_rendering: self.viewer.display() if not self.viewer.offscreen: self.viewer.handle_events() if mode == 'rgb_array': image = self.viewer.get_image() return image self.should_update_rendering = False def close(self) -> None: """ Close the environment. Will close the environment viewer if it exists. """ self.done = True if self.viewer is not None: self.viewer.close() self.viewer = None def get_available_actions(self) -> List[int]: """ Get the list of currently available actions. Lane changes are not available on the boundary of the road, and speed changes are not available at maximal or minimal speed. :return: the list of available actions """ if not isinstance(self.action_type, DiscreteMetaAction): raise ValueError("Only discrete meta-actions can be unavailable.") actions = [self.action_type.actions_indexes['IDLE']] for l_index in self.road.network.side_lanes(self.vehicle.lane_index): if l_index[2] < self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position) \ and self.action_type.lateral: actions.append(self.action_type.actions_indexes['LANE_LEFT']) if l_index[2] > self.vehicle.lane_index[2] \ and self.road.network.get_lane(l_index).is_reachable_from(self.vehicle.position) \ and self.action_type.lateral: actions.append(self.action_type.actions_indexes['LANE_RIGHT']) if self.vehicle.speed_index < self.vehicle.SPEED_COUNT - 1 and self.action_type.longitudinal: actions.append(self.action_type.actions_indexes['FASTER']) if self.vehicle.speed_index > 0 and self.action_type.longitudinal: actions.append(self.action_type.actions_indexes['SLOWER']) return actions def _automatic_rendering(self) -> None: """ Automatically render the intermediate frames while an action is still ongoing. This allows to render the whole video and not only single steps corresponding to agent decision-making. If a callback has been set, use it to perform the rendering. This is useful for the environment wrappers such as video-recording monitor that need to access these intermediate renderings. """ if self.viewer is not None and self.enable_auto_render: self.should_update_rendering = True if self.automatic_rendering_callback is not None: self.automatic_rendering_callback() else: self.render(self.rendering_mode) def simplify(self) -> 'AbstractEnv': """ Return a simplified copy of the environment where distant vehicles have been removed from the road. This is meant to lower the policy computational load while preserving the optimal actions set. :return: a simplified environment state """ state_copy = copy.deepcopy(self) state_copy.road.vehicles = [state_copy.vehicle ] + state_copy.road.close_vehicles_to( state_copy.vehicle, self.PERCEPTION_DISTANCE) return state_copy def change_vehicles(self, vehicle_class_path: str) -> 'AbstractEnv': """ Change the type of all vehicles on the road :param vehicle_class_path: The path of the class of behavior for other vehicles Example: "highway_env.vehicle.behavior.IDMVehicle" :return: a new environment with modified behavior model for other vehicles """ vehicle_class = utils.class_from_path(vehicle_class_path) env_copy = copy.deepcopy(self) vehicles = env_copy.road.vehicles for i, v in enumerate(vehicles): if v is not env_copy.vehicle: vehicles[i] = vehicle_class.create_from(v) return env_copy def set_preferred_lane(self, preferred_lane: int = None) -> 'AbstractEnv': env_copy = copy.deepcopy(self) if preferred_lane: for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.route = [(lane[0], lane[1], preferred_lane) for lane in v.route] # Vehicle with lane preference are also less cautious v.LANE_CHANGE_MAX_BRAKING_IMPOSED = 1000 return env_copy def set_route_at_intersection(self, _to: str) -> 'AbstractEnv': env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.set_route_at_intersection(_to) return env_copy def set_vehicle_field(self, args: Tuple[str, object]) -> 'AbstractEnv': field, value = args env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if v is not self.vehicle: setattr(v, field, value) return env_copy def call_vehicle_method(self, args: Tuple[str, Tuple[object]]) -> 'AbstractEnv': method, method_args = args env_copy = copy.deepcopy(self) for i, v in enumerate(env_copy.road.vehicles): if hasattr(v, method): env_copy.road.vehicles[i] = getattr(v, method)(*method_args) return env_copy def randomize_behaviour(self) -> 'AbstractEnv': env_copy = copy.deepcopy(self) for v in env_copy.road.vehicles: if isinstance(v, IDMVehicle): v.randomize_behavior() return env_copy def to_finite_mdp(self): return finite_mdp(self, time_quantization=1 / self.config["policy_frequency"]) def __deepcopy__(self, memo): """Perform a deep copy but without copying the environment viewer.""" cls = self.__class__ result = cls.__new__(cls) memo[id(self)] = result for k, v in self.__dict__.items(): if k not in ['viewer', 'automatic_rendering_callback']: setattr(result, k, copy.deepcopy(v, memo)) else: setattr(result, k, None) return result