Ejemplo n.º 1
0
    def parse_mdp_params(self, tls_ids):
        """
            Parses MDP parameters (mdp_args) from config file located
            at self.config_path and returns a ilurl.params.MDPParams
            object with the parsed parameters.
        """
        # Load config file with parameters.
        train_config = configparser.ConfigParser()
        train_config.read(str(self.config_path))

        mdp_args = train_config['mdp_args']
        agent_type = train_config.get('agent_type', 'agent_type')
        time_period = int(mdp_args['time_period']) if not isNone(
            mdp_args['time_period']) else None

        # Merge custom categories and default categories
        mdp_params = MDPParams(
            discount_factor=float(mdp_args['discount_factor']),
            action_space=literal_eval(mdp_args['action_space']),
            features=literal_eval(mdp_args['features']),
            normalize_velocities=str2bool(mdp_args['normalize_velocities']),
            normalize_vehicles=str2bool(mdp_args['normalize_vehicles']),
            discretize_state_space=str2bool(
                mdp_args['discretize_state_space']),
            reward=literal_eval(mdp_args['reward']),
            reward_rescale=float(mdp_args['reward_rescale']),
            velocity_threshold=literal_eval(mdp_args['velocity_threshold']),
            time_period=time_period,
            categories=literal_eval(mdp_args.get('category', '{}')),
            category_times=eval(mdp_args['category_times']))

        return mdp_params
Ejemplo n.º 2
0
 def mdp_params(self):
     mdp_params = MDPParams(features=('speed', 'count'),
                            reward='reward_min_speed_delta',
                            normalize_velocities=True,
                            discretize_state_space=False,
                            reward_rescale=0.01,
                            time_period=None)
     return mdp_params
Ejemplo n.º 3
0
 def mdp_params(self):
     mdp_params = MDPParams(features=('flow', ),
                            reward='reward_max_flow',
                            normalize_velocities=True,
                            discretize_state_space=False,
                            reward_rescale=0.01,
                            time_period=None,
                            velocity_threshold=0.1)
     return mdp_params
Ejemplo n.º 4
0
 def mdp_params(self):
     mdp_params = MDPParams(features=('waiting_time', ),
                            reward='reward_min_waiting_time',
                            normalize_velocities=True,
                            normalize_vehicles=self.norm_vehs,
                            discretize_state_space=False,
                            reward_rescale=0.01,
                            time_period=None,
                            velocity_threshold=0.1)
     return mdp_params
Ejemplo n.º 5
0
 def mdp_params(self):
     mdp_params = MDPParams(
                     features=('delay', 'lag[delay]'),
                     reward='reward_max_delay_reduction',
                     normalize_velocities=True,
                     normalize_vehicles=False,
                     discretize_state_space=False,
                     reward_rescale=0.01,
                     time_period=None,
                     velocity_threshold=0.1)
     return mdp_params
Ejemplo n.º 6
0
    def mdp_params(self):

        mdp_params = MDPParams(
                        features=('queue', 'lag[queue]'),
                        reward='reward_min_queue_squared',
                        normalize_velocities=True,
                        normalize_vehicles=True,
                        discretize_state_space=False,
                        reward_rescale=0.01,
                        time_period=None,
                        velocity_threshold=0.1)
        return mdp_params
Ejemplo n.º 7
0
    def test_time_period(self):
        """
            Time period.
        """
        mdp_params = MDPParams(features=('delay', ),
                               reward='reward_min_delay',
                               normalize_velocities=True,
                               discretize_state_space=False,
                               reward_rescale=0.01,
                               time_period=3600,
                               velocity_threshold=0.1)

        self.observation_space = State(self.network, mdp_params)
        self.observation_space.reset()

        with open('tests/unit/data/grid_kernel_data.dat', "rb") as f:
            kernel_data = pickle.load(f)

        self.assertEqual(len(kernel_data), 60)

        # Fake environment interaction with state object.
        timesteps = list(range(1, 60)) + [0]
        for t, data in zip(timesteps, kernel_data):
            self.observation_space.update(t, data)

        # Get state.
        state = self.observation_space.feature_map(
            categorize=mdp_params.discretize_state_space, flatten=True)

        self.assertEqual(len(state['247123161']), 3)
        self.assertEqual(len(state['247123464']), 3)
        self.assertEqual(len(state['247123468']), 3)

        # State.
        # 247123161.
        self.assertEqual(state['247123161'][0], 0)  # time variable

        # 247123464.
        self.assertEqual(state['247123464'][0], 0)  # time variable

        # 247123468.
        self.assertEqual(state['247123468'][0], 0)  # time variable

        self.observation_space.reset()

        hours = list(range(24)) + [0, 1]
        for hour in hours:
            for minute in range(60):

                # Fake environment interaction with state object.
                # (60 seconds = 1 minute).
                timesteps = list(range(1, 60)) + [0]
                for t, data in zip(timesteps, kernel_data):
                    self.observation_space.update(t, data)

                # Get state.
                state = self.observation_space.feature_map(
                    categorize=mdp_params.discretize_state_space, flatten=True)

                self.assertEqual(state['247123161'][0], hour)
                self.assertEqual(state['247123464'][0], hour)
                self.assertEqual(state['247123468'][0], hour)