def test_num_runs(self): # run the experiment for 1 run and collect the last position of all # vehicles env, _, flow_params = ring_road_exp_setup() flow_params['sim'].render = False flow_params['env'].horizon = 10 exp = Experiment(flow_params) exp.env = env exp.run(num_runs=1) vel1 = [exp.env.k.vehicle.get_speed(exp.env.k.vehicle.get_ids())] # run the experiment for 2 runs and collect the last position of all # vehicles env, _, flow_params = ring_road_exp_setup() flow_params['sim'].render = False flow_params['env'].horizon = 10 exp = Experiment(flow_params) exp.env = env exp.run(num_runs=2) vel2 = [exp.env.k.vehicle.get_speed(exp.env.k.vehicle.get_ids())] # check that the final position is the same in both instances np.testing.assert_array_almost_equal(vel1, vel2)
def test_rl_actions(self): def rl_actions(*_): return [1] # actions are always an acceleration of 1 for one veh # create an environment using AccelEnv with 1 RL vehicle vehicles = VehicleParams() vehicles.add(veh_id="rl", acceleration_controller=(RLController, {}), routing_controller=(ContinuousRouter, {}), car_following_params=SumoCarFollowingParams( speed_mode="aggressive", ), num_vehicles=1) env, _, flow_params = ring_road_exp_setup(vehicles=vehicles) flow_params['sim'].render = False flow_params['env'].horizon = 10 exp = Experiment(flow_params) exp.env = env exp.run(1, rl_actions=rl_actions) # check that the acceleration of the RL vehicle was that specified by # the rl_actions method self.assertAlmostEqual(exp.env.k.vehicle.get_speed("rl_0"), 1, places=1)
def evaluate_policy(benchmark, _get_actions, _get_states=None): """Evaluate the performance of a controller on a predefined benchmark. Parameters ---------- benchmark : str name of the benchmark, must be printed as it is in the benchmarks folder; otherwise a FatalFlowError will be raised _get_actions : method the mapping from states to actions for the RL agent(s) _get_states : method, optional a mapping from the environment object in Flow to some state, which overrides the _get_states method of the environment. Note that the same cannot be done for the actions. Returns ------- float mean of the evaluation return of the benchmark from NUM_RUNS number of simulations float standard deviation of the evaluation return of the benchmark from NUM_RUNS number of simulations Raises ------ flow.utils.exceptions.FatalFlowError If the specified benchmark is not available. """ if benchmark not in AVAILABLE_BENCHMARKS.keys(): raise FatalFlowError( "benchmark {} is not available. Check spelling?".format(benchmark)) # get the flow params from the benchmark flow_params = AVAILABLE_BENCHMARKS[benchmark] exp_tag = flow_params["exp_tag"] sim_params = flow_params["sim"] vehicles = flow_params["veh"] env_params = flow_params["env"] env_params.evaluate = True # Set to true to get evaluation returns net_params = flow_params["net"] initial_config = flow_params.get("initial", InitialConfig()) traffic_lights = flow_params.get("tls", TrafficLightParams()) # import the environment and network classes module = __import__("flow.envs", fromlist=[flow_params["env_name"]]) env_class = getattr(module, flow_params["env_name"]) module = __import__("flow.networks", fromlist=[flow_params["network"]]) network_class = getattr(module, flow_params["network"]) # recreate the network and environment network = network_class( name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config, traffic_lights=traffic_lights) # make sure the _get_states method of the environment is the one # specified by the user if _get_states is not None: class _env_class(env_class): def get_state(self): return _get_states(self) env_class = _env_class env = env_class( env_params=env_params, sim_params=sim_params, network=network) flow_params = dict( # name of the experiment exp_tag=exp_tag, # name of the flow environment the experiment is running on env_name=env_class, # name of the network class the experiment is running on network=network_class, # simulator that is used by the experiment simulator='traci', # sumo-related parameters (see flow.core.params.SumoParams) sim=sim_params, # environment related parameters (see flow.core.params.EnvParams) env=env_params, # network-related parameters (see flow.core.params.NetParams and the # network's documentation or ADDITIONAL_NET_PARAMS component) net=net_params, # vehicles to be placed in the network at the start of a rollout (see # flow.core.params.VehicleParams) veh=vehicles, # parameters specifying the positioning of vehicles upon initialization/ # reset (see flow.core.params.InitialConfig) initial=initial_config, # traffic lights to be introduced to specific nodes (see # flow.core.params.TrafficLightParams) tls=traffic_lights, ) # number of time steps flow_params['env'].horizon = env.env_params.horizon # create a Experiment object. Note that the state may not be that which is # specified by the environment. exp = Experiment(flow_params) exp.env = env exp = Experiment(flow_params) exp.env = env # run the experiment and return the reward res = exp.run( num_runs=NUM_RUNS, rl_actions=_get_actions) return np.mean(res["returns"]), np.std(res["returns"])