def __init__(self, env_params, sim_params, mdp_params, network, exp_path, seed, simulator='traci'): super(TrafficLightEnv, self).__init__(env_params, sim_params, network, simulator=simulator) # Traffic light system type. # ('rl', 'random', 'static', 'webster', 'actuated' or 'max-pressure'). self.ts_type = env_params.additional_params.get('tls_type') # Cycle time. self.cycle_time = network.cycle_time # TLS programs (discrete action space). if mdp_params.action_space == 'discrete': self.programs = network.programs # Keeps the internal value of sim step. self.sim_step = sim_params.sim_step assert self.sim_step == 1 # step size must equal 1. # Problem formulation params. self.mdp_params = mdp_params mdp_params.phases_per_traffic_light = network.phases_per_tls mdp_params.num_actions = network.num_signal_plans_per_tls if self.ts_type in ('rl', 'random'): self.tsc = DecentralizedMAS(mdp_params, exp_path, seed) elif self.ts_type == "centralized": self.tsc = CentralizedAgent(mdp_params, exp_path, seed) elif self.ts_type in ('max_pressure', 'webster'): self.tsc = get_ts_controller(self.ts_type, network.phases_per_tls, self.tls_phases, self.cycle_time) elif self.ts_type in ('static', 'actuated'): pass # Nothing to do here. else: raise ValueError(f'Unknown ts_type:{self.ts_type}') # Reward function. self.reward = build_rewards(mdp_params) self.actions_log = {} self.states_log = {} # Overrides GYM's observation space. self.observation_space = State(network, mdp_params) # Continuous action space signal plans. self.signal_plans_continous = {} self._reset()
def observation_space(self): observation_space = State(self.network, self.mdp_params) observation_space.reset() # Fake environment interaction with state object. timesteps = list(range(1,60)) + [0] for t, data in zip(timesteps, self.kernel_data_1): observation_space.update(t, data) return observation_space
def test_time_period(self): """ Time period. """ mdp_params = MDPParams(features=('delay', ), reward='reward_min_delay', normalize_velocities=True, discretize_state_space=False, reward_rescale=0.01, time_period=3600, velocity_threshold=0.1) self.observation_space = State(self.network, mdp_params) self.observation_space.reset() with open('tests/unit/data/grid_kernel_data.dat', "rb") as f: kernel_data = pickle.load(f) self.assertEqual(len(kernel_data), 60) # Fake environment interaction with state object. timesteps = list(range(1, 60)) + [0] for t, data in zip(timesteps, kernel_data): self.observation_space.update(t, data) # Get state. state = self.observation_space.feature_map( categorize=mdp_params.discretize_state_space, flatten=True) self.assertEqual(len(state['247123161']), 3) self.assertEqual(len(state['247123464']), 3) self.assertEqual(len(state['247123468']), 3) # State. # 247123161. self.assertEqual(state['247123161'][0], 0) # time variable # 247123464. self.assertEqual(state['247123464'][0], 0) # time variable # 247123468. self.assertEqual(state['247123468'][0], 0) # time variable self.observation_space.reset() hours = list(range(24)) + [0, 1] for hour in hours: for minute in range(60): # Fake environment interaction with state object. # (60 seconds = 1 minute). timesteps = list(range(1, 60)) + [0] for t, data in zip(timesteps, kernel_data): self.observation_space.update(t, data) # Get state. state = self.observation_space.feature_map( categorize=mdp_params.discretize_state_space, flatten=True) self.assertEqual(state['247123161'][0], hour) self.assertEqual(state['247123464'][0], hour) self.assertEqual(state['247123468'][0], hour)
class TrafficLightEnv(Env): """ Environment used to train traffic light systems. """ def __init__(self, env_params, sim_params, mdp_params, network, exp_path, seed, simulator='traci'): super(TrafficLightEnv, self).__init__(env_params, sim_params, network, simulator=simulator) # Traffic light system type. # ('rl', 'random', 'static', 'webster', 'actuated' or 'max-pressure'). self.ts_type = env_params.additional_params.get('tls_type') # Cycle time. self.cycle_time = network.cycle_time # TLS programs (discrete action space). if mdp_params.action_space == 'discrete': self.programs = network.programs # Keeps the internal value of sim step. self.sim_step = sim_params.sim_step assert self.sim_step == 1 # step size must equal 1. # Problem formulation params. self.mdp_params = mdp_params mdp_params.phases_per_traffic_light = network.phases_per_tls mdp_params.num_actions = network.num_signal_plans_per_tls if self.ts_type in ('rl', 'random'): self.tsc = DecentralizedMAS(mdp_params, exp_path, seed) elif self.ts_type == "centralized": self.tsc = CentralizedAgent(mdp_params, exp_path, seed) elif self.ts_type in ('max_pressure', 'webster'): self.tsc = get_ts_controller(self.ts_type, network.phases_per_tls, self.tls_phases, self.cycle_time) elif self.ts_type in ('static', 'actuated'): pass # Nothing to do here. else: raise ValueError(f'Unknown ts_type:{self.ts_type}') # Reward function. self.reward = build_rewards(mdp_params) self.actions_log = {} self.states_log = {} # Overrides GYM's observation space. self.observation_space = State(network, mdp_params) # Continuous action space signal plans. self.signal_plans_continous = {} self._reset() #ABCMeta def action_space(self): return self.mdp_params.action_space @property def duration(self): if self.time_counter == 0: return 0.0 if self._duration_counter != self.time_counter: self._duration = \ round(self._duration + self.sim_step, 2) % self.cycle_time self._duration_counter = self.time_counter return self._duration # overrides GYM's observation space @property def observation_space(self): return self._observation_space @observation_space.setter def observation_space(self, observation_space): self._observation_space = observation_space # TODO: create a delegate class @property def stop(self): return self.tsc.stop @stop.setter def stop(self, stop): self.tsc.stop = stop # TODO: restrict delegate property to an # instance of class @lazy_property def tls_ids(self): return self.network.tls_ids @lazy_property def tls_phases(self): return self.network.tls_phases @lazy_property def tls_states(self): return self.network.tls_states @lazy_property def tls_durations(self): # Timings used by 'static' ts_type. return { tid: np.cumsum(durations).tolist() for tid, durations in self.network.tls_durations.items() } # Obtains central agent action (0-7^N), unpacks each agent's action (0-7) and returns if each agent should switch # in this timestep def get_joined_actions(self): dur = int(self.duration) joined_action = [] action_index = self._current_rl_action() if (dur == 0 and self.step_counter > 1): # New cycle. return [True]*len(self.tls_ids) for tid in self.tls_ids: curr_action = action_index % len(self.programs[tid]) joined_action.append(dur in self.programs[tid][curr_action]) action_index //= len(self.programs[tid]) return joined_action def update_observation_space(self): """ Query kernel to retrieve vehicles' information and send it to ilurl.State object for state computation. Returns: ------- observation_space: ilurl.State object """ # Query kernel and retrieve vehicles' data. vehs = {nid: {p: build_vehicles(nid, data['incoming'], self.k.vehicle) for p, data in self.tls_phases[nid].items()} for nid in self.tls_ids} self.observation_space.update(self.duration, vehs) def get_state(self): """ Return the state of the simulation as perceived by the RL agent(s). Returns: ------- state : dict """ obs = self.observation_space.feature_map( categorize=self.mdp_params.discretize_state_space, flatten=True ) return obs def _rl_actions(self, state): """ Return the selected action(s) given the state of the environment. Parameters: ---------- state : dict Information on the state of the vehicles, which is provided to the agent Returns: ------- actions: dict """ return self.tsc.act(state) # Delegate to Multi-Agent System. def _periodic_control_actions(self): """ Executes pre-timed periodic control actions. * Actions generate a plan for one period ahead e.g cycle time. Returns: ------- controller_actions: tuple<bool> False; duration<state_k> < duration < duration<state_k+1> True; duration == duration<state_k+1> """ ret = [] dur = int(self.duration) def fn(tid): if self.ts_type in ('rl', 'random') and \ (dur == 1 or self.time_counter == 1) and \ self.mdp_params.action_space == 'continuous': # Calculate cycle length allocations for the # new cycle (continuous action space). # Get current action and respective number of phases. current_action = np.array(self._current_rl_action()[tid]) num_phases = len(current_action) # Remove yellow time from cycle length (yellow time = 6 seconds). available_time = self.cycle_time - (6.0 * num_phases) # Allocate time for each of the phases. # By default 20% of the cycle length will be equally distributed for # all the phases in order to ensure a minimum green time for all phases. # The remainder 80% are allocated by the agent. phases_durations = np.around(0.2 * available_time * (1 / num_phases) + \ current_action * 0.8 * available_time, decimals=0) # Convert phases allocations into a signal plan. counter = 0 timings = [] for p in range(num_phases): timings.append(counter + phases_durations[p]) timings.append(counter + phases_durations[p] + 6.0) counter += phases_durations[p] + 6.0 timings[-1] = self.cycle_time timings[-2] = self.cycle_time - 6.0 # Store the signal plan. This variable stores the signal # plan to be executed throughout the current cycle. self.signal_plans_continous[tid] = timings if (dur == 0 and self.step_counter > 1): # New cycle. return True if self.ts_type == 'static': return dur in self.tls_durations[tid] elif self.ts_type == 'webster': return dur in self.webster_timings[tid] elif self.ts_type in ('rl', 'random'): if self.mdp_params.action_space == 'discrete': # Discrete action space: TLS programs. progid = self._current_rl_action()[tid] return dur in self.programs[tid][progid] else: # Continuous action space: phases durations. return dur in self.signal_plans_continous[tid] else: raise ValueError(f'Unknown ts_type:{self.ts_type}') if self.ts_type == 'centralized': ret = self.get_joined_actions() else: ret = [fn(tid) for tid in self.tls_ids] return tuple(ret) def apply_rl_actions(self, rl_actions): """ Specify the actions to be performed. Parameters: ---------- rl_actions: dict or None actions to be performed """ self.update_observation_space() if is_controller_periodic(self.ts_type): if self.ts_type in ('rl', 'random', 'centralized') and \ (self.duration == 0 or self.time_counter == 1): # New cycle. # Get the number of the current cycle. cycle_number = \ int(self.step_counter / self.cycle_time) # Get current state. state = self.get_state() # Select new action. if rl_actions is None: rl_action = self._rl_actions(state) else: rl_action = rl_actions self.actions_log[cycle_number] = rl_action self.states_log[cycle_number] = state if self.step_counter > 1: # RL-agent update. reward = self.compute_reward(None) prev_state = self.states_log[cycle_number - 1] prev_action = self.actions_log[cycle_number - 1] self.tsc.update(prev_state, prev_action, reward, state) if self.ts_type == 'webster': kernel_data = {nid: {p: build_vehicles(nid, data['incoming'], self.k.vehicle) for p, data in self.tls_phases[nid].items()} for nid in self.tls_ids} self.webster_timings = self.tsc.act(kernel_data) if self.ts_type == 'static': pass # Nothing to do here. self._apply_tsc_actions(self._periodic_control_actions()) else: # Aperiodic controller. if self.ts_type == 'max_pressure': controller_actions = self.tsc.act(self.observation_space, self.time_counter) # Update traffic lights' control signals. self._apply_tsc_actions(controller_actions) if self.ts_type == 'actuated': pass # Nothing to do here. def _apply_tsc_actions(self, controller_actions): """ For each TSC shift phase or keep phase. Parameters: ---------- controller_actions: list<bool> False; keep state True; switch to next state """ for i, tid in enumerate(self.tls_ids): if controller_actions[i]: states = self.tls_states[tid] self._tls_phase_indicator[tid] = \ (self._tls_phase_indicator[tid] + 1) % len(states) next_state = states[self._tls_phase_indicator[tid]] self.k.traffic_light.set_state( node_id=tid, state=next_state) def _current_rl_action(self): """ Returns current action. """ N = (self.cycle_time / self.sim_step) actid = \ int(max(0, self.step_counter - 1) / N) return self.actions_log[actid] def compute_reward(self, rl_actions, **kwargs): """ Reward calculation. Parameters: ---------- rl_actions : array_like Actions performed by each TSC. kwargs : dict Other parameters of interest. Returns: ------- reward : dict<float> Reward at each TSC. """ return self.reward(self.observation_space) def reset(self): super(TrafficLightEnv, self).reset() self._reset() def _reset(self): # The 'duration' variable measures the elapsed time since # the beggining of the cycle, i.e. it measures (in seconds) # for how long the current configuration has been going on. self._duration_counter = -1 self._duration = self.time_counter * self.sim_step # Stores the state index. self._tls_phase_indicator = {} for node_id in self.tls_ids: if self.ts_type != 'actuated': self._tls_phase_indicator[node_id] = 0 s0 = self.tls_states[node_id][0] self.k.traffic_light.set_state(node_id=node_id, state=s0) # Notify controller. # Observation space. self.observation_space.reset()
class TestStateReward(unittest.TestCase): """ Set of tests that target the implemented problem formulations, i.e. state and reward function definitions. """ def setUp(self): network_args = { 'network_id': 'grid', 'horizon': 999, 'demand_type': 'constant', 'tls_type': 'rl' } self.network = Network(**network_args) def test_time_period(self): """ Time period. """ mdp_params = MDPParams(features=('delay', ), reward='reward_min_delay', normalize_velocities=True, discretize_state_space=False, reward_rescale=0.01, time_period=3600, velocity_threshold=0.1) self.observation_space = State(self.network, mdp_params) self.observation_space.reset() with open('tests/unit/data/grid_kernel_data.dat', "rb") as f: kernel_data = pickle.load(f) self.assertEqual(len(kernel_data), 60) # Fake environment interaction with state object. timesteps = list(range(1, 60)) + [0] for t, data in zip(timesteps, kernel_data): self.observation_space.update(t, data) # Get state. state = self.observation_space.feature_map( categorize=mdp_params.discretize_state_space, flatten=True) self.assertEqual(len(state['247123161']), 3) self.assertEqual(len(state['247123464']), 3) self.assertEqual(len(state['247123468']), 3) # State. # 247123161. self.assertEqual(state['247123161'][0], 0) # time variable # 247123464. self.assertEqual(state['247123464'][0], 0) # time variable # 247123468. self.assertEqual(state['247123468'][0], 0) # time variable self.observation_space.reset() hours = list(range(24)) + [0, 1] for hour in hours: for minute in range(60): # Fake environment interaction with state object. # (60 seconds = 1 minute). timesteps = list(range(1, 60)) + [0] for t, data in zip(timesteps, kernel_data): self.observation_space.update(t, data) # Get state. state = self.observation_space.feature_map( categorize=mdp_params.discretize_state_space, flatten=True) self.assertEqual(state['247123161'][0], hour) self.assertEqual(state['247123464'][0], hour) self.assertEqual(state['247123468'][0], hour) def tearDown(self): pass