Exemple #1
0
    def __init__(self,
                 env_params,
                 sim_params,
                 mdp_params,
                 network,
                 exp_path,
                 seed,
                 simulator='traci'):

        super(TrafficLightEnv, self).__init__(env_params,
                                              sim_params,
                                              network,
                                              simulator=simulator)

        # Traffic light system type.
        # ('rl', 'random', 'static', 'webster', 'actuated' or 'max-pressure').
        self.ts_type = env_params.additional_params.get('tls_type')

        # Cycle time.
        self.cycle_time = network.cycle_time

        # TLS programs (discrete action space).
        if mdp_params.action_space == 'discrete':
            self.programs = network.programs

        # Keeps the internal value of sim step.
        self.sim_step = sim_params.sim_step
        assert self.sim_step == 1 # step size must equal 1.

        # Problem formulation params.
        self.mdp_params = mdp_params
        mdp_params.phases_per_traffic_light = network.phases_per_tls
        mdp_params.num_actions = network.num_signal_plans_per_tls

        if self.ts_type in ('rl', 'random'):
            self.tsc = DecentralizedMAS(mdp_params, exp_path, seed)
        elif self.ts_type == "centralized":
            self.tsc = CentralizedAgent(mdp_params, exp_path, seed)
        elif self.ts_type in ('max_pressure', 'webster'):
            self.tsc = get_ts_controller(self.ts_type, network.phases_per_tls,
                                        self.tls_phases, self.cycle_time)
        elif self.ts_type in ('static', 'actuated'):
            pass # Nothing to do here.
        else:
            raise ValueError(f'Unknown ts_type:{self.ts_type}')

        # Reward function.
        self.reward = build_rewards(mdp_params)

        self.actions_log = {}
        self.states_log = {}

        # Overrides GYM's observation space.
        self.observation_space = State(network, mdp_params)

        # Continuous action space signal plans.
        self.signal_plans_continous = {}

        self._reset()
Exemple #2
0
    def observation_space(self):
        observation_space = State(self.network, self.mdp_params)
        observation_space.reset()
        # Fake environment interaction with state object.
        timesteps = list(range(1,60)) + [0]

        for t, data in zip(timesteps, self.kernel_data_1):
            observation_space.update(t, data)

        return observation_space
Exemple #3
0
    def test_time_period(self):
        """
            Time period.
        """
        mdp_params = MDPParams(features=('delay', ),
                               reward='reward_min_delay',
                               normalize_velocities=True,
                               discretize_state_space=False,
                               reward_rescale=0.01,
                               time_period=3600,
                               velocity_threshold=0.1)

        self.observation_space = State(self.network, mdp_params)
        self.observation_space.reset()

        with open('tests/unit/data/grid_kernel_data.dat', "rb") as f:
            kernel_data = pickle.load(f)

        self.assertEqual(len(kernel_data), 60)

        # Fake environment interaction with state object.
        timesteps = list(range(1, 60)) + [0]
        for t, data in zip(timesteps, kernel_data):
            self.observation_space.update(t, data)

        # Get state.
        state = self.observation_space.feature_map(
            categorize=mdp_params.discretize_state_space, flatten=True)

        self.assertEqual(len(state['247123161']), 3)
        self.assertEqual(len(state['247123464']), 3)
        self.assertEqual(len(state['247123468']), 3)

        # State.
        # 247123161.
        self.assertEqual(state['247123161'][0], 0)  # time variable

        # 247123464.
        self.assertEqual(state['247123464'][0], 0)  # time variable

        # 247123468.
        self.assertEqual(state['247123468'][0], 0)  # time variable

        self.observation_space.reset()

        hours = list(range(24)) + [0, 1]
        for hour in hours:
            for minute in range(60):

                # Fake environment interaction with state object.
                # (60 seconds = 1 minute).
                timesteps = list(range(1, 60)) + [0]
                for t, data in zip(timesteps, kernel_data):
                    self.observation_space.update(t, data)

                # Get state.
                state = self.observation_space.feature_map(
                    categorize=mdp_params.discretize_state_space, flatten=True)

                self.assertEqual(state['247123161'][0], hour)
                self.assertEqual(state['247123464'][0], hour)
                self.assertEqual(state['247123468'][0], hour)
Exemple #4
0
class TrafficLightEnv(Env):
    """
        Environment used to train traffic light systems.
    """
    def __init__(self,
                 env_params,
                 sim_params,
                 mdp_params,
                 network,
                 exp_path,
                 seed,
                 simulator='traci'):

        super(TrafficLightEnv, self).__init__(env_params,
                                              sim_params,
                                              network,
                                              simulator=simulator)

        # Traffic light system type.
        # ('rl', 'random', 'static', 'webster', 'actuated' or 'max-pressure').
        self.ts_type = env_params.additional_params.get('tls_type')

        # Cycle time.
        self.cycle_time = network.cycle_time

        # TLS programs (discrete action space).
        if mdp_params.action_space == 'discrete':
            self.programs = network.programs

        # Keeps the internal value of sim step.
        self.sim_step = sim_params.sim_step
        assert self.sim_step == 1 # step size must equal 1.

        # Problem formulation params.
        self.mdp_params = mdp_params
        mdp_params.phases_per_traffic_light = network.phases_per_tls
        mdp_params.num_actions = network.num_signal_plans_per_tls

        if self.ts_type in ('rl', 'random'):
            self.tsc = DecentralizedMAS(mdp_params, exp_path, seed)
        elif self.ts_type == "centralized":
            self.tsc = CentralizedAgent(mdp_params, exp_path, seed)
        elif self.ts_type in ('max_pressure', 'webster'):
            self.tsc = get_ts_controller(self.ts_type, network.phases_per_tls,
                                        self.tls_phases, self.cycle_time)
        elif self.ts_type in ('static', 'actuated'):
            pass # Nothing to do here.
        else:
            raise ValueError(f'Unknown ts_type:{self.ts_type}')

        # Reward function.
        self.reward = build_rewards(mdp_params)

        self.actions_log = {}
        self.states_log = {}

        # Overrides GYM's observation space.
        self.observation_space = State(network, mdp_params)

        # Continuous action space signal plans.
        self.signal_plans_continous = {}

        self._reset()

    #ABCMeta
    def action_space(self):
        return self.mdp_params.action_space

    @property
    def duration(self):
        if self.time_counter == 0:
            return 0.0

        if self._duration_counter != self.time_counter:
            self._duration = \
                round(self._duration + self.sim_step, 2) % self.cycle_time
            self._duration_counter = self.time_counter
        return self._duration

    # overrides GYM's observation space
    @property
    def observation_space(self):
        return self._observation_space

    @observation_space.setter
    def observation_space(self, observation_space):
        self._observation_space = observation_space

    # TODO: create a delegate class
    @property
    def stop(self):
        return self.tsc.stop

    @stop.setter
    def stop(self, stop):
        self.tsc.stop = stop

    # TODO: restrict delegate property to an
    # instance of class
    @lazy_property
    def tls_ids(self):
        return self.network.tls_ids

    @lazy_property
    def tls_phases(self):
        return self.network.tls_phases

    @lazy_property
    def tls_states(self):
        return self.network.tls_states

    @lazy_property
    def tls_durations(self):
        # Timings used by 'static' ts_type.
        return {
            tid: np.cumsum(durations).tolist()
            for tid, durations in self.network.tls_durations.items()
        }

    # Obtains central agent action (0-7^N), unpacks each agent's action (0-7) and returns if each agent should switch
    # in this timestep
    def get_joined_actions(self):
        dur = int(self.duration)
        joined_action = []
        action_index = self._current_rl_action()
        if (dur == 0 and self.step_counter > 1):
            # New cycle.
            return [True]*len(self.tls_ids)

        for tid in self.tls_ids:
            curr_action = action_index % len(self.programs[tid])
            joined_action.append(dur in self.programs[tid][curr_action])
            action_index //= len(self.programs[tid])

        return joined_action


    def update_observation_space(self):
        """ Query kernel to retrieve vehicles' information
        and send it to ilurl.State object for state computation.

        Returns:
        -------
        observation_space: ilurl.State object

        """
        # Query kernel and retrieve vehicles' data.
        vehs = {nid: {p: build_vehicles(nid, data['incoming'], self.k.vehicle)
                    for p, data in self.tls_phases[nid].items()}
                        for nid in self.tls_ids}

        self.observation_space.update(self.duration, vehs)

    def get_state(self):
        """ Return the state of the simulation as perceived by the RL agent(s).

        Returns:
        -------
        state : dict

        """
        obs = self.observation_space.feature_map(
            categorize=self.mdp_params.discretize_state_space,
            flatten=True
        )
        return obs

    def _rl_actions(self, state):
        """ Return the selected action(s) given the state of the environment.

        Parameters:
        ----------
        state : dict
            Information on the state of the vehicles, which is provided to the
            agent

        Returns:
        -------
        actions: dict

        """
        return self.tsc.act(state) # Delegate to Multi-Agent System.

    def _periodic_control_actions(self):
        """ Executes pre-timed periodic control actions.

        * Actions generate a plan for one period ahead e.g cycle time.

        Returns:
        -------
        controller_actions: tuple<bool>
            False;  duration<state_k> < duration < duration<state_k+1>
            True;  duration == duration<state_k+1>

        """
        ret = []
        dur = int(self.duration)

        def fn(tid):

            if self.ts_type in ('rl', 'random') and \
                (dur == 1 or self.time_counter == 1) and \
                self.mdp_params.action_space == 'continuous':
                # Calculate cycle length allocations for the
                # new cycle (continuous action space).

                # Get current action and respective number of phases.
                current_action = np.array(self._current_rl_action()[tid])
                num_phases = len(current_action)

                # Remove yellow time from cycle length (yellow time = 6 seconds).
                available_time = self.cycle_time - (6.0 * num_phases)

                # Allocate time for each of the phases.
                # By default 20% of the cycle length will be equally distributed for 
                # all the phases in order to ensure a minimum green time for all phases.
                # The remainder 80% are allocated by the agent.
                phases_durations = np.around(0.2 * available_time * (1 / num_phases) + \
                                    current_action * 0.8 * available_time, decimals=0)

                # Convert phases allocations into a signal plan.
                counter = 0
                timings = []
                for p in range(num_phases):
                    timings.append(counter + phases_durations[p])
                    timings.append(counter + phases_durations[p] + 6.0)
                    counter += phases_durations[p] + 6.0

                timings[-1] = self.cycle_time
                timings[-2] = self.cycle_time - 6.0

                # Store the signal plan. This variable stores the signal
                # plan to be executed throughout the current cycle.
                self.signal_plans_continous[tid] = timings

            if (dur == 0 and self.step_counter > 1):
                # New cycle.
                return True

            if self.ts_type == 'static':
                return dur in self.tls_durations[tid]
            elif self.ts_type == 'webster':
                return dur in self.webster_timings[tid]
            elif self.ts_type in ('rl', 'random'):
                if self.mdp_params.action_space == 'discrete':
                    # Discrete action space: TLS programs.
                    progid = self._current_rl_action()[tid]
                    return dur in self.programs[tid][progid]
                else:
                    # Continuous action space: phases durations.
                    return dur in self.signal_plans_continous[tid]
            else:
                raise ValueError(f'Unknown ts_type:{self.ts_type}')

        if self.ts_type == 'centralized':
            ret = self.get_joined_actions()
        else:
            ret = [fn(tid) for tid in self.tls_ids]

        return tuple(ret)

    def apply_rl_actions(self, rl_actions):
        """ Specify the actions to be performed.

        Parameters:
        ----------
        rl_actions: dict or None
            actions to be performed

        """
        self.update_observation_space()

        if is_controller_periodic(self.ts_type):

            if self.ts_type in ('rl', 'random', 'centralized') and \
                (self.duration == 0 or self.time_counter == 1):
                # New cycle.

                # Get the number of the current cycle.
                cycle_number = \
                    int(self.step_counter / self.cycle_time)

                # Get current state.
                state = self.get_state()

                # Select new action.
                if rl_actions is None:
                    rl_action = self._rl_actions(state)
                else:
                    rl_action = rl_actions

                self.actions_log[cycle_number] = rl_action
                self.states_log[cycle_number] = state

                if self.step_counter > 1:
                    # RL-agent update.
                    reward = self.compute_reward(None)
                    prev_state = self.states_log[cycle_number - 1]
                    prev_action = self.actions_log[cycle_number - 1]
                    self.tsc.update(prev_state, prev_action, reward, state)

            if self.ts_type == 'webster':
                kernel_data = {nid: {p: build_vehicles(nid, data['incoming'], self.k.vehicle)
                                for p, data in self.tls_phases[nid].items()}
                                    for nid in self.tls_ids}
                self.webster_timings = self.tsc.act(kernel_data)

            if self.ts_type  == 'static':
                pass # Nothing to do here.

            self._apply_tsc_actions(self._periodic_control_actions())

        else:
            # Aperiodic controller.
            if self.ts_type == 'max_pressure':
                controller_actions = self.tsc.act(self.observation_space, self.time_counter)

                # Update traffic lights' control signals.
                self._apply_tsc_actions(controller_actions)

            if self.ts_type == 'actuated':
                pass # Nothing to do here.

    def _apply_tsc_actions(self, controller_actions):
        """ For each TSC shift phase or keep phase.

        Parameters:
        ----------
        controller_actions: list<bool>
            False; keep state
            True; switch to next state

        """
        for i, tid in enumerate(self.tls_ids):
            if controller_actions[i]:
                states = self.tls_states[tid]
                self._tls_phase_indicator[tid] = \
                        (self._tls_phase_indicator[tid] + 1) % len(states)
                next_state = states[self._tls_phase_indicator[tid]]
                self.k.traffic_light.set_state(
                    node_id=tid, state=next_state)

    def _current_rl_action(self):
        """ Returns current action. """
        N = (self.cycle_time / self.sim_step)
        actid = \
            int(max(0, self.step_counter - 1) / N)

        return self.actions_log[actid]

    def compute_reward(self, rl_actions, **kwargs):
        """ Reward calculation.

        Parameters:
        ----------
        rl_actions : array_like
            Actions performed by each TSC.

        kwargs : dict
            Other parameters of interest.

        Returns:
        -------
        reward : dict<float>
            Reward at each TSC.

        """
        return self.reward(self.observation_space)

    def reset(self):
        super(TrafficLightEnv, self).reset()
        self._reset()

    def _reset(self):
        # The 'duration' variable measures the elapsed time since
        # the beggining of the cycle, i.e. it measures (in seconds)
        # for how long the current configuration has been going on.
        self._duration_counter = -1
        self._duration = self.time_counter * self.sim_step

        # Stores the state index.
        self._tls_phase_indicator = {}
        for node_id in self.tls_ids:
            if self.ts_type != 'actuated':
                self._tls_phase_indicator[node_id] = 0
                s0 = self.tls_states[node_id][0]
                self.k.traffic_light.set_state(node_id=node_id, state=s0)

                # Notify controller.

        # Observation space.
        self.observation_space.reset()
Exemple #5
0
class TestStateReward(unittest.TestCase):
    """
        Set of tests that target the implemented
        problem formulations, i.e. state and reward
        function definitions.
    """
    def setUp(self):

        network_args = {
            'network_id': 'grid',
            'horizon': 999,
            'demand_type': 'constant',
            'tls_type': 'rl'
        }
        self.network = Network(**network_args)

    def test_time_period(self):
        """
            Time period.
        """
        mdp_params = MDPParams(features=('delay', ),
                               reward='reward_min_delay',
                               normalize_velocities=True,
                               discretize_state_space=False,
                               reward_rescale=0.01,
                               time_period=3600,
                               velocity_threshold=0.1)

        self.observation_space = State(self.network, mdp_params)
        self.observation_space.reset()

        with open('tests/unit/data/grid_kernel_data.dat', "rb") as f:
            kernel_data = pickle.load(f)

        self.assertEqual(len(kernel_data), 60)

        # Fake environment interaction with state object.
        timesteps = list(range(1, 60)) + [0]
        for t, data in zip(timesteps, kernel_data):
            self.observation_space.update(t, data)

        # Get state.
        state = self.observation_space.feature_map(
            categorize=mdp_params.discretize_state_space, flatten=True)

        self.assertEqual(len(state['247123161']), 3)
        self.assertEqual(len(state['247123464']), 3)
        self.assertEqual(len(state['247123468']), 3)

        # State.
        # 247123161.
        self.assertEqual(state['247123161'][0], 0)  # time variable

        # 247123464.
        self.assertEqual(state['247123464'][0], 0)  # time variable

        # 247123468.
        self.assertEqual(state['247123468'][0], 0)  # time variable

        self.observation_space.reset()

        hours = list(range(24)) + [0, 1]
        for hour in hours:
            for minute in range(60):

                # Fake environment interaction with state object.
                # (60 seconds = 1 minute).
                timesteps = list(range(1, 60)) + [0]
                for t, data in zip(timesteps, kernel_data):
                    self.observation_space.update(t, data)

                # Get state.
                state = self.observation_space.feature_map(
                    categorize=mdp_params.discretize_state_space, flatten=True)

                self.assertEqual(state['247123161'][0], hour)
                self.assertEqual(state['247123464'][0], hour)
                self.assertEqual(state['247123468'][0], hour)

    def tearDown(self):
        pass