class Env(*classdef): """Base environment class. Provides the interface for interacting with various aspects of a traffic simulation. Using this class, you can start a simulation instance, provide a scenario to specify a configuration and controllers, perform simulation steps, and reset the simulation to an initial configuration. Env is Serializable to allow for pickling and replaying of the policy. This class cannot be used as is: you must extend it to implement an action applicator method, and properties to define the MDP if you choose to use it with an rl library (e.g. RLlib). This can be done by overloading the following functions in a child class: * action_space * observation_space * apply_rl_action * get_state * compute_reward Attributes ---------- env_params : flow.core.params.EnvParams see flow/core/params.py sim_params : flow.core.params.SimParams see flow/core/params.py net_params : flow.core.params.NetParams see flow/core/params.py initial_config : flow.core.params.InitialConfig see flow/core/params.py scenario : flow.scenarios.Scenario see flow/scenarios/base_scenario.py simulator : str the simulator used, one of {'traci', 'aimsun'} k : flow.core.kernel.Kernel Flow kernel object, using for state acquisition and issuing commands to the certain components of the simulator. For more information, see: flow/core/kernel/kernel.py state : to be defined in observation space state of the simulation obs_var_labels : list optional labels for each entries in observed state sim_step : float optional seconds per simulation step; 0.1 by default time_counter : int number of steps taken since the start of a rollout step_counter : int number of steps taken since the environment was initialized, or since `restart_simulation` was called initial_state : dict initial state information for all vehicles. The network is always initialized with the number of vehicles originally specified in VehicleParams * Key = Vehicle ID, * Entry = (vehicle type, starting edge, starting lane index, starting position on edge, starting speed) initial_ids : list of str name of the vehicles that will originally available in the network at the start of a rollout (i.e. after `env.reset()` is called). This also corresponds to `self.initial_state.keys()`. available_routes : dict the available_routes variable contains a dictionary of routes vehicles can traverse; to be used when routes need to be chosen dynamically. Equivalent to `scenario.rts`. renderer : flow.renderer.pyglet_renderer.PygletRenderer or None renderer class, used to collect image-based representations of the traffic network. This attribute is set to None if `sim_params.render` is set to True or False. """ def __init__(self, env_params, sim_params, scenario, simulator='traci'): """Initialize the environment class. Parameters ---------- env_params : flow.core.params.EnvParams see flow/core/params.py sim_params : flow.core.params.SimParams see flow/core/params.py scenario : flow.scenarios.Scenario see flow/scenarios/base_scenario.py simulator : str the simulator used, one of {'traci', 'aimsun'}. Defaults to 'traci' Raises ------ flow.utils.exceptions.FatalFlowError if the render mode is not set to a valid value """ # Invoke serializable if using rllab if serializable_flag: Serializable.quick_init(self, locals()) self.env_params = env_params self.scenario = scenario self.net_params = scenario.net_params self.initial_config = scenario.initial_config self.sim_params = sim_params time_stamp = ''.join(str(time.time()).split('.')) if os.environ.get("TEST_FLAG", 0): # 1.0 works with stress_test_start 10k times time.sleep(1.0 * int(time_stamp[-6:]) / 1e6) # FIXME: this is sumo-specific self.sim_params.port = sumolib.miscutils.getFreeSocketPort() # time_counter: number of steps taken since the start of a rollout self.time_counter = 0 # step_counter: number of total steps taken self.step_counter = 0 # initial_state: self.initial_state = {} self.state = None self.obs_var_labels = [] # simulation step size self.sim_step = sim_params.sim_step # the simulator used by this environment self.simulator = simulator # create the Flow kernel self.k = Kernel(simulator=self.simulator, sim_params=sim_params) # use the scenario class's network parameters to generate the necessary # scenario components within the scenario kernel self.k.scenario.generate_network(scenario) # initial the vehicles kernel using the VehicleParams object self.k.vehicle.initialize(deepcopy(scenario.vehicles)) # initialize the simulation using the simulation kernel. This will use # the scenario kernel as an input in order to determine what network # needs to be simulated. kernel_api = self.k.simulation.start_simulation( scenario=self.k.scenario, sim_params=sim_params) # pass the kernel api to the kernel and it's subclasses self.k.pass_api(kernel_api) # the available_routes variable contains a dictionary of routes # vehicles can traverse; to be used when routes need to be chosen # dynamically self.available_routes = self.k.scenario.rts # store the initial vehicle ids self.initial_ids = deepcopy(scenario.vehicles.ids) # store the initial state of the vehicles kernel (needed for restarting # the simulation) self.k.vehicle.kernel_api = None self.k.vehicle.master_kernel = None self.initial_vehicles = deepcopy(self.k.vehicle) self.k.vehicle.kernel_api = self.k.kernel_api self.k.vehicle.master_kernel = self.k self.setup_initial_state() # use pyglet to render the simulation if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: save_render = self.sim_params.save_render sight_radius = self.sim_params.sight_radius pxpm = self.sim_params.pxpm show_radius = self.sim_params.show_radius # get network polygons network = [] # FIXME: add to scenario kernel instead of hack for lane_id in self.k.kernel_api.lane.getIDList(): _lane_poly = self.k.kernel_api.lane.getShape(lane_id) lane_poly = [i for pt in _lane_poly for i in pt] network.append(lane_poly) # instantiate a pyglet renderer self.renderer = Renderer(network, self.sim_params.render, save_render, sight_radius=sight_radius, pxpm=pxpm, show_radius=show_radius) # render a frame self.render(reset=True) elif self.sim_params.render in [True, False]: pass # default to sumo-gui (if True) or sumo (if False) else: raise FatalFlowError('Mode %s is not supported!' % self.sim_params.render) atexit.register(self.terminate) def restart_simulation(self, sim_params, render=None): """Restart an already initialized simulation instance. This is used when visualizing a rollout, in order to update the rendering with potentially a gui and export emission data from sumo. This is also used to handle cases when the runtime of an experiment is too long, causing the sumo instance Parameters ---------- sim_params : flow.core.params.SimParams simulation-specific parameters render : bool, optional specifies whether to use the gui """ self.k.close() # killed the sumo process if using sumo/TraCI if self.simulator == 'traci': self.k.simulation.sumo_proc.kill() if render is not None: self.sim_params.render = render if sim_params.emission_path is not None: ensure_dir(sim_params.emission_path) self.sim_params.emission_path = sim_params.emission_path self.k.scenario.generate_network(self.scenario) self.k.vehicle.initialize(deepcopy(self.scenario.vehicles)) kernel_api = self.k.simulation.start_simulation( scenario=self.k.scenario, sim_params=self.sim_params) self.k.pass_api(kernel_api) self.setup_initial_state() def setup_initial_state(self): """Store information on the initial state of vehicles in the network. This information is to be used upon reset. This method also adds this information to the self.vehicles class and starts a subscription with sumo to collect state information each step. """ # determine whether to shuffle the vehicles if self.initial_config.shuffle: random.shuffle(self.initial_ids) # generate starting position for vehicles in the network start_pos, start_lanes = self.k.scenario.generate_starting_positions( initial_config=self.initial_config, num_vehicles=len(self.initial_ids)) # save the initial state. This is used in the _reset function for i, veh_id in enumerate(self.initial_ids): type_id = self.k.vehicle.get_type(veh_id) pos = start_pos[i][1] lane = start_lanes[i] speed = self.k.vehicle.get_initial_speed(veh_id) edge = start_pos[i][0] self.initial_state[veh_id] = (type_id, edge, lane, pos, speed) def step(self, rl_actions): """Advance the environment by one step. Assigns actions to autonomous and human-driven agents (i.e. vehicles, traffic lights, etc...). Actions that are not assigned are left to the control of the simulator. The actions are then used to advance the simulator by the number of time steps requested per environment step. Results from the simulations are processed through various classes, such as the Vehicle and TrafficLight kernels, to produce standardized methods for identifying specific network state features. Finally, results from the simulator are used to generate appropriate observations. Parameters ---------- rl_actions : array_like an list of actions provided by the rl algorithm Returns ------- observation : array_like agent's observation of the current environment reward : float amount of reward associated with the previous state/action pair done : bool indicates whether the episode has ended info : dict contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): self.time_counter += 1 self.step_counter += 1 # perform acceleration actions for controlled human-driven vehicles if len(self.k.vehicle.get_controlled_ids()) > 0: accel = [] for veh_id in self.k.vehicle.get_controlled_ids(): action = self.k.vehicle.get_acc_controller( veh_id).get_action(self) accel.append(action) self.k.vehicle.apply_acceleration( self.k.vehicle.get_controlled_ids(), accel) # perform lane change actions for controlled human-driven vehicles if len(self.k.vehicle.get_controlled_lc_ids()) > 0: direction = [] for veh_id in self.k.vehicle.get_controlled_lc_ids(): target_lane = self.k.vehicle.get_lane_changing_controller( veh_id).get_action(self) direction.append(target_lane) self.k.vehicle.apply_lane_change( self.k.vehicle.get_controlled_lc_ids(), direction=direction) # perform (optionally) routing actions for all vehicles in the # network, including RL and SUMO-controlled vehicles routing_ids = [] routing_actions = [] for veh_id in self.k.vehicle.get_ids(): if self.k.vehicle.get_routing_controller(veh_id) \ is not None: routing_ids.append(veh_id) route_contr = self.k.vehicle.get_routing_controller(veh_id) routing_actions.append(route_contr.choose_route(self)) self.k.vehicle.choose_routes(routing_ids, routing_actions) self.apply_rl_actions(rl_actions) self.additional_command() # advance the simulation in the simulator by one step self.k.simulation.simulation_step() # store new observations in the vehicles and traffic lights class self.k.update(reset=False) # update the colors of vehicles if self.sim_params.render: self.k.vehicle.update_vehicle_colors() # crash encodes whether the simulator experienced a collision crash = self.k.simulation.check_collision() # stop collecting new simulation steps if there is a collision if crash: break # render a frame self.render() states = self.get_state() # collect information of the state of the network based on the # environment class used self.state = np.asarray(states).T # collect observation new state associated with action next_observation = np.copy(states) # test if the environment should terminate due to a collision or the # time horizon being met done = (self.time_counter >= self.env_params.warmup_steps + self.env_params.horizon ) # or crash # compute the info for each agent infos = {} # compute the reward if self.env_params.clip_actions: rl_clipped = self.clip_actions(rl_actions) reward = self.compute_reward(rl_clipped, fail=crash) else: reward = self.compute_reward(rl_actions, fail=crash) return next_observation, reward, done, infos def reset(self): """Reset the environment. This method is performed in between rollouts. It resets the state of the environment, and re-initializes the vehicles in their starting positions. If "shuffle" is set to True in InitialConfig, the initial positions of vehicles is recalculated and the vehicles are shuffled. Returns ------- observation : array_like the initial observation of the space. The initial reward is assumed to be zero. """ # reset the time counter self.time_counter = 0 # warn about not using restart_instance when using inflows if len(self.net_params.inflows.get()) > 0 and \ not self.sim_params.restart_instance: print( "**********************************************************\n" "**********************************************************\n" "**********************************************************\n" "WARNING: Inflows will cause computational performance to\n" "significantly decrease after large number of rollouts. In \n" "order to avoid this, set SumoParams(restart_instance=True).\n" "**********************************************************\n" "**********************************************************\n" "**********************************************************") if self.sim_params.restart_instance or \ (self.step_counter > 2e6 and self.simulator != 'aimsun'): self.step_counter = 0 # issue a random seed to induce randomness into the next rollout self.sim_params.seed = random.randint(0, 1e5) self.k.vehicle = deepcopy(self.initial_vehicles) self.k.vehicle.master_kernel = self.k # restart the sumo instance self.restart_simulation(self.sim_params) # perform shuffling (if requested) elif self.initial_config.shuffle: self.setup_initial_state() # clear all vehicles from the network and the vehicles class if self.simulator == 'traci': for veh_id in self.k.kernel_api.vehicle.getIDList(): # FIXME: hack try: self.k.vehicle.remove(veh_id) except (FatalTraCIError, TraCIException): print(traceback.format_exc()) # clear all vehicles from the network and the vehicles class # FIXME (ev, ak) this is weird and shouldn't be necessary for veh_id in list(self.k.vehicle.get_ids()): # do not try to remove the vehicles from the network in the first # step after initializing the network, as there will be no vehicles if self.step_counter == 0: continue try: self.k.vehicle.remove(veh_id) except (FatalTraCIError, TraCIException): print("Error during start: {}".format(traceback.format_exc())) # reintroduce the initial vehicles to the network for veh_id in self.initial_ids: type_id, edge, lane_index, pos, speed = \ self.initial_state[veh_id] try: self.k.vehicle.add(veh_id=veh_id, type_id=type_id, edge=edge, lane=lane_index, pos=pos, speed=speed) except (FatalTraCIError, TraCIException): # if a vehicle was not removed in the first attempt, remove it # now and then reintroduce it self.k.vehicle.remove(veh_id) if self.simulator == 'traci': self.k.kernel_api.vehicle.remove(veh_id) # FIXME: hack self.k.vehicle.add(veh_id=veh_id, type_id=type_id, edge=edge, lane=lane_index, pos=pos, speed=speed) # advance the simulation in the simulator by one step self.k.simulation.simulation_step() # update the information in each kernel to match the current state self.k.update(reset=True) # update the colors of vehicles if self.sim_params.render: self.k.vehicle.update_vehicle_colors() if self.simulator == 'traci': initial_ids = self.k.kernel_api.vehicle.getIDList() else: initial_ids = self.initial_ids # check to make sure all vehicles have been spawned if len(self.initial_ids) > len(initial_ids): missing_vehicles = list(set(self.initial_ids) - set(initial_ids)) msg = '\nNot enough vehicles have spawned! Bad start?\n' \ 'Missing vehicles / initial state:\n' for veh_id in missing_vehicles: msg += '- {}: {}\n'.format(veh_id, self.initial_state[veh_id]) raise FatalFlowError(msg=msg) states = self.get_state() # collect information of the state of the network based on the # environment class used self.state = np.asarray(states).T # observation associated with the reset (no warm-up steps) observation = np.copy(states) # perform (optional) warm-up steps before training for _ in range(self.env_params.warmup_steps): observation, _, _, _ = self.step(rl_actions=None) # render a frame self.render(reset=True) return observation def additional_command(self): """Additional commands that may be performed by the step method.""" pass def clip_actions(self, rl_actions=None): """Clip the actions passed from the RL agent. Parameters ---------- rl_actions : array_like list of actions provided by the RL algorithm Returns ------- array_like The rl_actions clipped according to the box """ # ignore if no actions are issued if rl_actions is None: return None # clip according to the action space requirements if isinstance(self.action_space, Box): rl_actions = np.clip(rl_actions, a_min=self.action_space.low, a_max=self.action_space.high) return rl_actions def apply_rl_actions(self, rl_actions=None): """Specify the actions to be performed by the rl agent(s). If no actions are provided at any given step, the rl agents default to performing actions specified by SUMO. Parameters ---------- rl_actions : array_like list of actions provided by the RL algorithm """ # ignore if no actions are issued if rl_actions is None: return rl_clipped = self.clip_actions(rl_actions) self._apply_rl_actions(rl_clipped) def _apply_rl_actions(self, rl_actions): raise NotImplementedError def get_state(self): """Return the state of the simulation as perceived by the RL agent. MUST BE implemented in new environments. Returns ------- state : array_like information on the state of the vehicles, which is provided to the agent """ raise NotImplementedError @property def action_space(self): """Identify the dimensions and bounds of the action space. MUST BE implemented in new environments. Returns ------- gym Box or Tuple type a bounded box depicting the shape and bounds of the action space """ raise NotImplementedError @property def observation_space(self): """Identify the dimensions and bounds of the observation space. MUST BE implemented in new environments. Returns ------- gym Box or Tuple type a bounded box depicting the shape and bounds of the observation space """ raise NotImplementedError def compute_reward(self, rl_actions, **kwargs): """Reward function for the RL agent(s). MUST BE implemented in new environments. Defaults to 0 for non-implemented environments. Parameters ---------- rl_actions : array_like actions performed by rl vehicles kwargs : dict other parameters of interest. Contains a "fail" element, which is True if a vehicle crashed, and False otherwise Returns ------- reward : float or list of float """ return 0 def terminate(self): """Close the TraCI I/O connection. Should be done at end of every experiment. Must be in Env because the environment opens the TraCI connection. """ try: # close everything within the kernel self.k.close() # close pyglet renderer if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: self.renderer.close() except FileNotFoundError: # Skip automatic termination. Connection is probably already closed print(traceback.format_exc()) def render(self, reset=False, buffer_length=5): """Render a frame. Parameters ---------- reset : bool set to True to reset the buffer buffer_length : int length of the buffer """ if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: # render a frame self.pyglet_render() # cache rendering if reset: self.frame_buffer = [self.frame.copy() for _ in range(5)] self.sights_buffer = [self.sights.copy() for _ in range(5)] else: if self.step_counter % int(1 / self.sim_step) == 0: self.frame_buffer.append(self.frame.copy()) self.sights_buffer.append(self.sights.copy()) if len(self.frame_buffer) > buffer_length: self.frame_buffer.pop(0) self.sights_buffer.pop(0) def pyglet_render(self): """Render a frame using pyglet.""" # get human and RL simulation status human_idlist = self.k.vehicle.get_human_ids() machine_idlist = self.k.vehicle.get_rl_ids() human_logs = [] human_orientations = [] human_dynamics = [] machine_logs = [] machine_orientations = [] machine_dynamics = [] max_speed = self.k.scenario.max_speed() for id in human_idlist: # Force tracking human vehicles by adding "track" in vehicle id. # The tracked human vehicles will be treated as machine vehicles. if 'track' in id: machine_logs.append([ self.k.vehicle.get_timestep(id), self.k.vehicle.get_timedelta(id), id ]) machine_orientations.append(self.k.vehicle.get_orientation(id)) machine_dynamics.append( self.k.vehicle.get_speed(id) / max_speed) else: human_logs.append([ self.k.vehicle.get_timestep(id), self.k.vehicle.get_timedelta(id), id ]) human_orientations.append(self.k.vehicle.get_orientation(id)) human_dynamics.append(self.k.vehicle.get_speed(id) / max_speed) for id in machine_idlist: machine_logs.append([ self.k.vehicle.get_timestep(id), self.k.vehicle.get_timedelta(id), id ]) machine_orientations.append(self.k.vehicle.get_orientation(id)) machine_dynamics.append(self.k.vehicle.get_speed(id) / max_speed) # step the renderer self.frame = self.renderer.render(human_orientations, machine_orientations, human_dynamics, machine_dynamics, human_logs, machine_logs) # get local observation of RL vehicles self.sights = [] for id in human_idlist: # Force tracking human vehicles by adding "track" in vehicle id. # The tracked human vehicles will be treated as machine vehicles. if "track" in id: orientation = self.k.vehicle.get_orientation(id) sight = self.renderer.get_sight(orientation, id) self.sights.append(sight) for id in machine_idlist: orientation = self.k.vehicle.get_orientation(id) sight = self.renderer.get_sight(orientation, id) self.sights.append(sight)
class Env(*classdef): """Base environment class. Provides the interface for controlling a SUMO simulation. Using this class, you can start sumo, provide a scenario to specify a configuration and controllers, perform simulation steps, and reset the simulation to an initial configuration. Env is Serializable to allow for pickling and replaying of the policy. This class cannot be used as is: you must extend it to implement an action applicator method, and properties to define the MDP if you choose to use it with an rl library (e.g. RLlib). This can be done by overloading the following functions in a child class: - action_space - observation_space - apply_rl_action - get_state - compute_reward Attributes ---------- env_params : flow.core.params.EnvParams see flow/core/params.py sim_params: flow.core.params.SimParams see flow/core/params.py scenario: Scenario type see flow/scenarios/base_scenario.py """ def __init__(self, env_params, sim_params, scenario): # Invoke serializable if using rllab if serializable_flag: Serializable.quick_init(self, locals()) self.env_params = env_params self.scenario = scenario self.sim_params = sim_params time_stamp = ''.join(str(time.time()).split('.')) if os.environ.get("TEST_FLAG", 0): # 1.0 works with stress_test_start 10k times time.sleep(1.0 * int(time_stamp[-6:]) / 1e6) # FIXME: this is sumo-specific self.sim_params.port = sumolib.miscutils.getFreeSocketPort() self.vehicles = scenario.vehicles self.traffic_lights = scenario.traffic_lights # time_counter: number of steps taken since the start of a rollout self.time_counter = 0 # step_counter: number of total steps taken self.step_counter = 0 # initial_state: # Key = Vehicle ID, # Entry = (type_id, route_id, lane_index, lane_pos, speed, pos) self.initial_state = {} self.state = None self.obs_var_labels = [] # simulation step size self.sim_step = sim_params.sim_step # TraCI connection used to communicate with sumo self.traci_connection = None # dictionary of initial observations used while resetting vehicles # after each rollout self.initial_observations = dict.fromkeys(self.vehicles.get_ids()) # store the initial vehicle ids self.initial_ids = deepcopy(self.vehicles.get_ids()) # store the initial state of the vehicles class (for restarting sumo) self.initial_vehicles = deepcopy(self.vehicles) # colors used to distinguish between types of vehicles in the network self.colors = {} # the simulator used by this environment self.simulator = 'traci' # create the Flow kernel self.k = Kernel(simulator=self.simulator, sim_params=sim_params) # use the scenario class's network parameters to generate the necessary # scenario components within the scenario kernel self.k.scenario.generate_network(scenario) # initialize the simulation using the simulation kernel. This will use # the scenario kernel as an input in order to determine what network # needs to be simulated. self.traci_connection = self.k.simulation.start_simulation( scenario=self.k.scenario, sim_params=sim_params) # pass the kernel api to the kernel and it's subclasses self.k.pass_api(self.traci_connection) # the available_routes variable contains a dictionary of routes # vehicles can traverse; to be used when routes need to be chosen # dynamically self.available_routes = self.k.scenario.rts self.setup_initial_state() # use pyglet to render the simulation if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: save_render = self.sim_params.save_render sight_radius = self.sim_params.sight_radius pxpm = self.sim_params.pxpm show_radius = self.sim_params.show_radius # get network polygons network = [] for lane_id in self.traci_connection.lane.getIDList(): _lane_poly = self.traci_connection.lane.getShape(lane_id) lane_poly = [i for pt in _lane_poly for i in pt] network.append(lane_poly) # instantiate a pyglet renderer self.renderer = Renderer( network, self.sim_params.render, save_render, sight_radius=sight_radius, pxpm=pxpm, show_radius=show_radius) # render a frame self.render(reset=True) elif self.sim_params.render in [True, False]: pass # default to sumo-gui (if True) or sumo (if False) else: raise ValueError("Mode %s is not supported!" % self.sim_params.render) atexit.register(self.terminate) def restart_simulation(self, sim_params, render=None): """Restart an already initialized simulation instance. This is used when visualizing a rollout, in order to update the rendering with potentially a gui and export emission data from sumo. This is also used to handle cases when the runtime of an experiment is too long, causing the sumo instance Parameters ---------- sim_params : flow.core.params.SimParams sumo-specific parameters render: bool, optional specifies whether to use the gui """ self.k.close() # killed the sumo process if using sumo/TraCI if self.simulator == 'traci': self.k.simulation.sumo_proc.kill() if render is not None: self.sim_params.render = render if sim_params.emission_path is not None: ensure_dir(sim_params.emission_path) self.sim_params.emission_path = sim_params.emission_path self.k.scenario.generate_network(self.scenario) self.traci_connection = self.k.simulation.start_simulation( scenario=self.k.scenario, sim_params=self.sim_params) self.k.pass_api(self.traci_connection) self.setup_initial_state() def setup_initial_state(self): """Store information on the initial state of vehicles in the network. This information is to be used upon reset. This method also adds this information to the self.vehicles class and starts a subscription with sumo to collect state information each step. """ # determine whether to shuffle the vehicles if self.scenario.initial_config.shuffle: random.shuffle(self.initial_ids) # generate starting position for vehicles in the network start_pos, start_lanes = self.k.scenario.generate_starting_positions( initial_config=self.scenario.initial_config, num_vehicles=len(self.initial_ids)) # save the initial state. This is used in the _reset function for i, veh_id in enumerate(self.initial_ids): type_id = self.scenario.vehicles.get_type(veh_id) pos = start_pos[i][1] lane = start_lanes[i] speed = self.scenario.vehicles.get_initial_speed(veh_id) route_id = "route" + start_pos[i][0] self.initial_state[veh_id] = (type_id, route_id, lane, pos, speed) def step(self, rl_actions): """Advance the environment by one step. Assigns actions to autonomous and human-driven agents (i.e. vehicles, traffic lights, etc...). Actions that are not assigned are left to the control of the simulator. The actions are then used to advance the simulator by the number of time steps requested per environment step. Results from the simulations are processed through various classes, such as the Vehicle and TrafficLight kernels, to produce standardized methods for identifying specific network state features. Finally, results from the simulator are used to generate appropriate observations. Parameters ---------- rl_actions: numpy ndarray an list of actions provided by the rl algorithm Returns ------- observation: numpy ndarray agent's observation of the current environment reward: float amount of reward associated with the previous state/action pair done: bool indicates whether the episode has ended info: dict contains other diagnostic information from the previous action """ for _ in range(self.env_params.sims_per_step): self.time_counter += 1 self.step_counter += 1 # perform acceleration actions for controlled human-driven vehicles if len(self.vehicles.get_controlled_ids()) > 0: accel = [] for veh_id in self.vehicles.get_controlled_ids(): accel_contr = self.vehicles.get_acc_controller(veh_id) action = accel_contr.get_action(self) accel.append(action) self.apply_acceleration(self.vehicles.get_controlled_ids(), accel) # perform lane change actions for controlled human-driven vehicles if len(self.vehicles.get_controlled_lc_ids()) > 0: direction = [] for veh_id in self.vehicles.get_controlled_lc_ids(): lc_contr = self.vehicles.get_lane_changing_controller( veh_id) target_lane = lc_contr.get_action(self) direction.append(target_lane) self.apply_lane_change( self.vehicles.get_controlled_lc_ids(), direction=direction) # perform (optionally) routing actions for all vehicles in the # network, including RL and SUMO-controlled vehicles routing_ids = [] routing_actions = [] for veh_id in self.vehicles.get_ids(): if self.vehicles.get_routing_controller(veh_id) is not None: routing_ids.append(veh_id) route_contr = self.vehicles.get_routing_controller(veh_id) routing_actions.append(route_contr.choose_route(self)) self.choose_routes(routing_ids, routing_actions) self.apply_rl_actions(rl_actions) self.additional_command() # advance the simulation in the simulator by one step self.k.simulation.simulation_step() # collect subscription information from sumo vehicle_obs = \ self.traci_connection.vehicle.getSubscriptionResults() id_lists = \ self.traci_connection.simulation.getSubscriptionResults() # store new observations in the vehicles and traffic lights class self.vehicles.update(vehicle_obs, id_lists, self) # store new observations in the vehicles and traffic lights class self.k.update(reset=False) # update the colors of vehicles self.update_vehicle_colors() # crash encodes whether the simulator experienced a collision crash = self.k.simulation.check_collision() # stop collecting new simulation steps if there is a collision if crash: break # render a frame self.render() states = self.get_state() if isinstance(states, dict): self.state = {} next_observation = {} done = {} infos = {} temp_state = states for key, state in temp_state.items(): # collect information of the state of the network based on the # environment class used self.state[key] = np.asarray(state).T # collect observation new state associated with action next_observation[key] = np.copy(self.state[key]) # test if a crash has occurred done[key] = crash # test if the agent has exited the system, if so # its agent should be done # FIXME(ev) this assumes that agents are single vehicles if key in self.vehicles.get_arrived_ids(): done[key] = True # check if an agent is done if crash: done['__all__'] = True else: done['__all__'] = False infos[key] = {} else: # collect information of the state of the network based on the # environment class used self.state = np.asarray(states).T # collect observation new state associated with action next_observation = np.copy(states) # test if the agent should terminate due to a crash done = crash # compute the info for each agent infos = {} # compute the reward rl_clipped = self.clip_actions(rl_actions) reward = self.compute_reward(rl_clipped, fail=crash) return next_observation, reward, done, infos def reset(self): """Reset the environment. This method is performed in between rollouts. It resets the state of the environment, and re-initializes the vehicles in their starting positions. If "shuffle" is set to True in InitialConfig, the initial positions of vehicles is recalculated and the vehicles are shuffled. Returns ------- observation: numpy ndarray the initial observation of the space. The initial reward is assumed to be zero. """ # reset the time counter self.time_counter = 0 # warn about not using restart_instance when using inflows if len(self.scenario.net_params.inflows.get()) > 0 and \ not self.sim_params.restart_instance: print( "**********************************************************\n" "**********************************************************\n" "**********************************************************\n" "WARNING: Inflows will cause computational performance to\n" "significantly decrease after large number of rollouts. In \n" "order to avoid this, set SumoParams(restart_instance=True).\n" "**********************************************************\n" "**********************************************************\n" "**********************************************************" ) if self.sim_params.restart_instance or self.step_counter > 2e6: self.step_counter = 0 # issue a random seed to induce randomness into the next rollout self.sim_params.seed = random.randint(0, 1e5) # modify the vehicles class to match initial data self.vehicles = deepcopy(self.initial_vehicles) # restart the simulation instance self.restart_simulation(self.sim_params) elif self.scenario.initial_config.shuffle: # perform shuffling (if requested) self.setup_initial_state() # clear all vehicles from the network and the vehicles class for veh_id in self.traci_connection.vehicle.getIDList(): try: self.traci_connection.vehicle.remove(veh_id) self.traci_connection.vehicle.unsubscribe(veh_id) self.vehicles.remove(veh_id) except (FatalTraCIError, TraCIException): print("Error during start: {}".format(traceback.format_exc())) # clear all vehicles from the network and the vehicles class # FIXME (ev, ak) this is weird and shouldn't be necessary for veh_id in list(self.vehicles.get_ids()): self.vehicles.remove(veh_id) # do not try to remove the vehicles from the network in the first # step after initializing the network, as there will be no vehicles if self.step_counter == 0: continue try: self.traci_connection.vehicle.remove(veh_id) self.traci_connection.vehicle.unsubscribe(veh_id) except (FatalTraCIError, TraCIException): print("Error during start: {}".format(traceback.format_exc())) # reintroduce the initial vehicles to the network for veh_id in self.initial_ids: type_id, route_id, lane_index, pos, speed = \ self.initial_state[veh_id] try: self.traci_connection.vehicle.addFull( veh_id, route_id, typeID=str(type_id), departLane=str(lane_index), departPos=str(pos), departSpeed=str(speed)) except (FatalTraCIError, TraCIException): # if a vehicle was not removed in the first attempt, remove it # now and then reintroduce it self.traci_connection.vehicle.remove(veh_id) self.traci_connection.vehicle.addFull( veh_id, route_id, typeID=str(type_id), departLane=str(lane_index), departPos=str(pos), departSpeed=str(speed)) # advance the simulation in the simulator by one step self.k.simulation.simulation_step() # collect subscription information from sumo vehicle_obs = self.traci_connection.vehicle.getSubscriptionResults() id_lists = self.traci_connection.simulation.getSubscriptionResults() # store new observations in the vehicles and traffic lights class self.vehicles.update(vehicle_obs, id_lists, self) # store new observations in the vehicles and traffic lights class self.k.update(reset=True) # update the colors of vehicles self.update_vehicle_colors() # check to make sure all vehicles have been spawned if len(self.initial_ids) > self.vehicles.num_vehicles: missing_vehicles = list( set(self.initial_ids) - set(self.vehicles.get_ids())) logging.error('Not enough vehicles have spawned! Bad start?') logging.error('Missing vehicles / initial state:') for veh_id in missing_vehicles: logging.error('- {}: {}'.format(veh_id, self.initial_state[veh_id])) sys.exit() self.prev_last_lc = dict() for veh_id in self.vehicles.get_ids(): # re-initialize memory on last lc self.prev_last_lc[veh_id] = -float("inf") states = self.get_state() if isinstance(states, dict): self.state = {} observation = {} for key, state in states.items(): # collect information of the state of the network based on the # environment class used self.state[key] = np.asarray(state).T # collect observation new state associated with action observation[key] = np.copy(self.state[key]).tolist() else: # collect information of the state of the network based on the # environment class used self.state = np.asarray(states).T # observation associated with the reset (no warm-up steps) observation = np.copy(states) # perform (optional) warm-up steps before training for _ in range(self.env_params.warmup_steps): observation, _, _, _ = self.step(rl_actions=None) # render a frame self.render(reset=True) return observation def additional_command(self): """Additional commands that may be performed by the step method.""" pass def clip_actions(self, rl_actions=None): """Clip the actions passed from the RL agent. Parameters ---------- rl_actions : list or numpy ndarray list of actions provided by the RL algorithm Returns ------- numpy ndarray (float) The rl_actions clipped according to the box """ # ignore if no actions are issued if rl_actions is None: return None # clip according to the action space requirements if isinstance(self.action_space, Box): rl_actions = np.clip( rl_actions, a_min=self.action_space.low, a_max=self.action_space.high) return rl_actions def apply_rl_actions(self, rl_actions=None): """Specify the actions to be performed by the rl agent(s). If no actions are provided at any given step, the rl agents default to performing actions specified by SUMO. Parameters ---------- rl_actions : list or numpy ndarray list of actions provided by the RL algorithm """ # ignore if no actions are issued if rl_actions is None: return rl_clipped = self.clip_actions(rl_actions) self._apply_rl_actions(rl_clipped) def _apply_rl_actions(self, rl_actions): raise NotImplementedError def apply_acceleration(self, veh_ids, acc): """Apply the acceleration requested by a vehicle in SUMO. Note that, if the SUMO-specified speed mode of the vehicle is not "aggressive", the acceleration may be clipped by some safety velocity or maximum possible acceleration. Parameters ---------- veh_ids: list of str vehicles IDs associated with the requested accelerations acc: numpy ndarray or list of float requested accelerations from the vehicles """ for i, vid in enumerate(veh_ids): if acc[i] is not None: this_vel = self.vehicles.get_speed(vid) next_vel = max([this_vel + acc[i] * self.sim_step, 0]) self.traci_connection.vehicle.slowDown(vid, next_vel, 1) def apply_lane_change(self, veh_ids, direction): """Apply an instantaneous lane-change to a set of vehicles. This method also prevents vehicles from moving to lanes that do not exist, and set the "last_lc" variable for RL vehicles that lane changed to match the current time step, in order to assist in maintaining a lane change duration for these vehicles. Parameters ---------- veh_ids: list of str vehicles IDs associated with the requested accelerations direction: list of {-1, 0, 1} -1: lane change to the right 0: no lane change 1: lane change to the left Raises ------ ValueError If any of the direction values are not -1, 0, or 1. """ # if any of the directions are not -1, 0, or 1, raise a ValueError if any(d not in [-1, 0, 1] for d in direction): raise ValueError( "Direction values for lane changes may only be: -1, 0, or 1.") for i, veh_id in enumerate(veh_ids): # check for no lane change if direction[i] == 0: continue # compute the target lane, and clip it so vehicle don't try to lane # change out of range this_lane = self.vehicles.get_lane(veh_id) this_edge = self.vehicles.get_edge(veh_id) target_lane = min( max(this_lane + direction[i], 0), self.k.scenario.num_lanes(this_edge) - 1) # perform the requested lane action action in TraCI if target_lane != this_lane: self.traci_connection.vehicle.changeLane( veh_id, int(target_lane), 100000) if veh_id in self.vehicles.get_rl_ids(): self.prev_last_lc[veh_id] = \ self.vehicles.get_state(veh_id, "last_lc") def choose_routes(self, veh_ids, route_choices): """Update the route choice of vehicles in the network. Parameters ---------- veh_ids: list list of vehicle identifiers route_choices: numpy array or list of floats list of edges the vehicle wishes to traverse, starting with the edge the vehicle is currently on. If a value of None is provided, the vehicle does not update its route """ for i, veh_id in enumerate(veh_ids): if route_choices[i] is not None: self.traci_connection.vehicle.setRoute( vehID=veh_id, edgeList=route_choices[i]) def get_x_by_id(self, veh_id): """Provide a 1-D representation of the position of a vehicle. Note: These values are only meaningful if the specify_edge_starts method in the scenario is set appropriately; otherwise, a value of 0 is returned for all vehicles. Parameters ---------- veh_id: str vehicle identifier Returns ------- float position of a vehicle relative to a certain reference. """ if self.vehicles.get_edge(veh_id) == '': # occurs when a vehicle crashes is teleported for some other reason return 0. return self.k.scenario.get_x( self.vehicles.get_edge(veh_id), self.vehicles.get_position(veh_id)) def update_vehicle_colors(self): """Modify the color of vehicles if rendering is active. The colors of all vehicles are updated as follows: - red: autonomous (rl) vehicles - white: unobserved human-driven vehicles - cyan: observed human-driven vehicles """ # do not change the colors of vehicles if the sumo-gui is not active # (in order to avoid slow downs) if self.sim_params.render is not True: return for veh_id in self.vehicles.get_rl_ids(): try: # color rl vehicles red self.traci_connection.vehicle.setColor( vehID=veh_id, color=RED) except (FatalTraCIError, TraCIException): pass for veh_id in self.vehicles.get_human_ids(): try: if veh_id in self.vehicles.get_observed_ids(): # color observed human-driven vehicles cyan color = CYAN else: # color unobserved human-driven vehicles white color = WHITE self.traci_connection.vehicle.setColor( vehID=veh_id, color=color) except (FatalTraCIError, TraCIException): pass # clear the list of observed vehicles for veh_id in self.vehicles.get_observed_ids(): self.vehicles.remove_observed(veh_id) def get_state(self): """Return the state of the simulation as perceived by the RL agent. MUST BE implemented in new environments. Returns ------- state: numpy ndarray information on the state of the vehicles, which is provided to the agent """ raise NotImplementedError @property def action_space(self): """Identify the dimensions and bounds of the action space. MUST BE implemented in new environments. Returns ------- gym Box or Tuple type a bounded box depicting the shape and bounds of the action space """ raise NotImplementedError @property def observation_space(self): """Identify the dimensions and bounds of the observation space. MUST BE implemented in new environments. Returns ------- gym Box or Tuple type a bounded box depicting the shape and bounds of the observation space """ raise NotImplementedError def compute_reward(self, rl_actions, **kwargs): """Reward function for the RL agent(s). MUST BE implemented in new environments. Defaults to 0 for non-implemented environments. Parameters ---------- rl_actions: numpy ndarray actions performed by rl vehicles kwargs: dict other parameters of interest. Contains a "fail" element, which is True if a vehicle crashed, and False otherwise Returns ------- reward: float or list <float> """ return 0 def terminate(self): """Close the TraCI I/O connection. Should be done at end of every experiment. Must be in Env because the environment opens the TraCI connection. """ try: print( "Closing connection to TraCI and stopping simulation.\n" "Note, this may print an error message when it closes." ) self.k.close() # close pyglet renderer if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: self.renderer.close() except FileNotFoundError: print("Skip automatic termination. " "Connection is probably already closed.") def render(self, reset=False, buffer_length=5): """Render a frame. Parameters ---------- reset: bool set to True to reset the buffer buffer_length: int length of the buffer """ if self.sim_params.render in ['gray', 'dgray', 'rgb', 'drgb']: # render a frame self.pyglet_render() # cache rendering if reset: self.frame_buffer = [self.frame.copy() for _ in range(5)] self.sights_buffer = [self.sights.copy() for _ in range(5)] else: if self.step_counter % int(1/self.sim_step) == 0: self.frame_buffer.append(self.frame.copy()) self.sights_buffer.append(self.sights.copy()) if len(self.frame_buffer) > buffer_length: self.frame_buffer.pop(0) self.sights_buffer.pop(0) def pyglet_render(self): """Render a frame using pyglet.""" # get human and RL simulation status human_idlist = self.vehicles.get_human_ids() machine_idlist = self.vehicles.get_rl_ids() human_logs = [] human_orientations = [] human_dynamics = [] machine_logs = [] machine_orientations = [] machine_dynamics = [] max_speed = self.k.scenario.max_speed() for id in human_idlist: # Force tracking human vehicles by adding "track" in vehicle id. # The tracked human vehicles will be treated as machine vehicles. if 'track' in id: machine_logs.append( [self.vehicles.get_timestep(id), self.vehicles.get_timedelta(id), id]) machine_orientations.append( self.vehicles.get_orientation(id)) machine_dynamics.append( self.vehicles.get_speed(id)/max_speed) else: human_logs.append( [self.vehicles.get_timestep(id), self.vehicles.get_timedelta(id), id]) human_orientations.append( self.vehicles.get_orientation(id)) human_dynamics.append( self.vehicles.get_speed(id)/max_speed) for id in machine_idlist: machine_logs.append( [self.vehicles.get_timestep(id), self.vehicles.get_timedelta(id), id]) machine_orientations.append( self.vehicles.get_orientation(id)) machine_dynamics.append( self.vehicles.get_speed(id)/max_speed) # step the renderer self.frame = self.renderer.render(human_orientations, machine_orientations, human_dynamics, machine_dynamics, human_logs, machine_logs) # get local observation of RL vehicles self.sights = [] for id in human_idlist: # Force tracking human vehicles by adding "track" in vehicle id. # The tracked human vehicles will be treated as machine vehicles. if "track" in id: orientation = self.vehicles.get_orientation(id) sight = self.renderer.get_sight( orientation, id) self.sights.append(sight) for id in machine_idlist: orientation = self.vehicles.get_orientation(id) sight = self.renderer.get_sight( orientation, id) self.sights.append(sight)