def run_task(*_): """Implement the ``run_task`` method needed to run experiments with rllab. Note that the flow-specific parameters are imported at the start of this script and unzipped and processed here. """ env_name = flow_params["env_name"] exp_tag = flow_params["exp_tag"] sumo_params = flow_params["sumo"] vehicles = flow_params["veh"] env_params = flow_params["env"] net_params = flow_params["net"] initial_config = flow_params.get("initial", InitialConfig()) traffic_lights = flow_params.get("tls", TrafficLights()) # import the scenario and generator classes module = __import__("flow.scenarios", fromlist=[flow_params["scenario"]]) scenario_class = getattr(module, flow_params["scenario"]) module = __import__("flow.scenarios", fromlist=[flow_params["generator"]]) generator_class = getattr(module, flow_params["generator"]) # create the scenario object scenario = scenario_class(name=exp_tag, generator_class=generator_class, vehicles=vehicles, net_params=net_params, initial_config=initial_config, traffic_lights=traffic_lights) pass_params = (env_name, sumo_params, vehicles, env_params, net_params, initial_config, scenario) env = GymEnv(env_name, record_video=False, register_params=pass_params) env = normalize(env) policy = GaussianMLPPolicy(env_spec=env.spec, hidden_sizes=(100, 50, 25)) baseline = LinearFeatureBaseline(env_spec=env.spec) horizon = flow_params["env"].horizon algo = TRPO( env=env, policy=policy, baseline=baseline, batch_size=horizon * (N_ROLLOUTS - PARALLEL_ROLLOUTS + 1), max_path_length=horizon, n_itr=500, discount=0.999, step_size=0.01, ) algo.train(),
def grid1_baseline(num_runs, render=True): """Run script for the grid1 baseline. Parameters ---------- num_runs : int number of rollouts the performance of the environment is evaluated over render: bool, optional specifies whether to use sumo's gui during execution Returns ------- flow.core.experiment.Experiment class needed to run simulations """ exp_tag = flow_params['exp_tag'] sumo_params = flow_params['sumo'] vehicles = flow_params['veh'] env_params = flow_params['env'] net_params = flow_params['net'] initial_config = flow_params.get('initial', InitialConfig()) # define the traffic light logic tl_logic = TrafficLightParams(baseline=False) phases = [{ 'duration': '31', 'minDur': '5', 'maxDur': '45', 'state': 'GGGrrrGGGrrr' }, { 'duration': '2', 'minDur': '2', 'maxDur': '2', 'state': 'yyyrrryyyrrr' }, { 'duration': '31', 'minDur': '5', 'maxDur': '45', 'state': 'rrrGGGrrrGGG' }, { 'duration': '2', 'minDur': '2', 'maxDur': '2', 'state': 'rrryyyrrryyy' }] for i in range(N_ROWS * N_COLUMNS): tl_logic.add('center' + str(i), tls_type='actuated', phases=phases, programID=1) # modify the rendering to match what is requested sumo_params.render = render # set the evaluation flag to True env_params.evaluate = True # import the scenario class module = __import__('flow.scenarios', fromlist=[flow_params['scenario']]) scenario_class = getattr(module, flow_params['scenario']) # create the scenario object scenario = scenario_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config, traffic_lights=tl_logic) # import the environment class module = __import__('flow.envs', fromlist=[flow_params['env_name']]) env_class = getattr(module, flow_params['env_name']) # create the environment object env = env_class(env_params, sumo_params, scenario) exp = Experiment(env) results = exp.run(num_runs, env_params.horizon) total_delay = np.mean(results['returns']) return total_delay
def grid1_baseline(num_runs, render=True): """Run script for the grid1 baseline. Parameters ---------- num_runs : int number of rollouts the performance of the environment is evaluated over render: bool, optional specifies whether to the gui during execution Returns ------- flow.core.experiment.Experiment class needed to run simulations """ exp_tag = flow_params['exp_tag'] sim_params = flow_params['sim'] vehicles = flow_params['veh'] env_params = flow_params['env'] net_params = flow_params['net'] initial_config = flow_params.get('initial', InitialConfig()) # define the traffic light logic tl_logic = TrafficLightParams(baseline=False) phases = [{ 'duration': '31', 'minDur': '5', 'maxDur': '45', "state": "GrGr" }, { 'duration': '2', 'minDur': '2', 'maxDur': '2', "state": "yryr" }, { 'duration': '31', 'minDur': '5', 'maxDur': '45', "state": "rGrG" }, { 'duration': '2', 'minDur': '2', 'maxDur': '2', "state": "ryry" }] for i in range(N_ROWS * N_COLUMNS): tl_logic.add('center' + str(i), tls_type='actuated', phases=phases, programID=1) # modify the rendering to match what is requested sim_params.render = render # set the evaluation flag to True env_params.evaluate = True # import the network class network_class = flow_params['network'] # create the network object network = network_class(name=exp_tag, vehicles=vehicles, net_params=net_params, initial_config=initial_config, traffic_lights=tl_logic) # import the environment class env_class = flow_params['env_name'] # create the environment object env = env_class(env_params, sim_params, network) exp = Experiment(env) results = exp.run(num_runs, env_params.horizon) total_delay = np.mean(results['returns']) return total_delay