Exemple #1
0
    def test_num_runs(self):
        # run the experiment for 1 run and collect the last position of all
        # vehicles
        env, _, flow_params = ring_road_exp_setup()
        flow_params['sim'].render = False
        flow_params['env'].horizon = 10
        exp = Experiment(flow_params)
        exp.env = env
        exp.run(num_runs=1)

        vel1 = [exp.env.k.vehicle.get_speed(exp.env.k.vehicle.get_ids())]

        # run the experiment for 2 runs and collect the last position of all
        # vehicles
        env, _, flow_params = ring_road_exp_setup()
        flow_params['sim'].render = False
        flow_params['env'].horizon = 10

        exp = Experiment(flow_params)
        exp.env = env
        exp.run(num_runs=2)

        vel2 = [exp.env.k.vehicle.get_speed(exp.env.k.vehicle.get_ids())]

        # check that the final position is the same in both instances
        np.testing.assert_array_almost_equal(vel1, vel2)
Exemple #2
0
    def test_rl_actions(self):
        def rl_actions(*_):
            return [1]  # actions are always an acceleration of 1 for one veh

        # create an environment using AccelEnv with 1 RL vehicle
        vehicles = VehicleParams()
        vehicles.add(veh_id="rl",
                     acceleration_controller=(RLController, {}),
                     routing_controller=(ContinuousRouter, {}),
                     car_following_params=SumoCarFollowingParams(
                         speed_mode="aggressive", ),
                     num_vehicles=1)

        env, _, flow_params = ring_road_exp_setup(vehicles=vehicles)
        flow_params['sim'].render = False
        flow_params['env'].horizon = 10
        exp = Experiment(flow_params)
        exp.env = env
        exp.run(1, rl_actions=rl_actions)

        # check that the acceleration of the RL vehicle was that specified by
        # the rl_actions method
        self.assertAlmostEqual(exp.env.k.vehicle.get_speed("rl_0"),
                               1,
                               places=1)
Exemple #3
0
def evaluate_policy(benchmark, _get_actions, _get_states=None):
    """Evaluate the performance of a controller on a predefined benchmark.

    Parameters
    ----------
    benchmark : str
        name of the benchmark, must be printed as it is in the
        benchmarks folder; otherwise a FatalFlowError will be raised
    _get_actions : method
        the mapping from states to actions for the RL agent(s)
    _get_states : method, optional
        a mapping from the environment object in Flow to some state, which
        overrides the _get_states method of the environment. Note that the
        same cannot be done for the actions.

    Returns
    -------
    float
        mean of the evaluation return of the benchmark from NUM_RUNS number
        of simulations
    float
        standard deviation of the evaluation return of the benchmark from
        NUM_RUNS number of simulations

    Raises
    ------
    flow.utils.exceptions.FatalFlowError
        If the specified benchmark is not available.
    """
    if benchmark not in AVAILABLE_BENCHMARKS.keys():
        raise FatalFlowError(
            "benchmark {} is not available. Check spelling?".format(benchmark))

    # get the flow params from the benchmark
    flow_params = AVAILABLE_BENCHMARKS[benchmark]

    exp_tag = flow_params["exp_tag"]
    sim_params = flow_params["sim"]
    vehicles = flow_params["veh"]
    env_params = flow_params["env"]
    env_params.evaluate = True  # Set to true to get evaluation returns
    net_params = flow_params["net"]
    initial_config = flow_params.get("initial", InitialConfig())
    traffic_lights = flow_params.get("tls", TrafficLightParams())

    # import the environment and network classes
    module = __import__("flow.envs", fromlist=[flow_params["env_name"]])
    env_class = getattr(module, flow_params["env_name"])
    module = __import__("flow.networks", fromlist=[flow_params["network"]])
    network_class = getattr(module, flow_params["network"])

    # recreate the network and environment
    network = network_class(
        name=exp_tag,
        vehicles=vehicles,
        net_params=net_params,
        initial_config=initial_config,
        traffic_lights=traffic_lights)

    # make sure the _get_states method of the environment is the one
    # specified by the user
    if _get_states is not None:

        class _env_class(env_class):
            def get_state(self):
                return _get_states(self)

        env_class = _env_class

    env = env_class(
        env_params=env_params, sim_params=sim_params, network=network)

    flow_params = dict(
        # name of the experiment
        exp_tag=exp_tag,

        # name of the flow environment the experiment is running on
        env_name=env_class,

        # name of the network class the experiment is running on
        network=network_class,

        # simulator that is used by the experiment
        simulator='traci',

        # sumo-related parameters (see flow.core.params.SumoParams)
        sim=sim_params,

        # environment related parameters (see flow.core.params.EnvParams)
        env=env_params,

        # network-related parameters (see flow.core.params.NetParams and the
        # network's documentation or ADDITIONAL_NET_PARAMS component)
        net=net_params,

        # vehicles to be placed in the network at the start of a rollout (see
        # flow.core.params.VehicleParams)
        veh=vehicles,

        # parameters specifying the positioning of vehicles upon initialization/
        # reset (see flow.core.params.InitialConfig)
        initial=initial_config,

        # traffic lights to be introduced to specific nodes (see
        # flow.core.params.TrafficLightParams)
        tls=traffic_lights,
    )

    # number of time steps
    flow_params['env'].horizon = env.env_params.horizon

    # create a Experiment object. Note that the state may not be that which is
    # specified by the environment.
    exp = Experiment(flow_params)
    exp.env = env

    exp = Experiment(flow_params)
    exp.env = env

    # run the experiment and return the reward
    res = exp.run(
        num_runs=NUM_RUNS,
        rl_actions=_get_actions)

    return np.mean(res["returns"]), np.std(res["returns"])