def test_single_agent_ring(self): # create the base environment env = FlowEnv( flow_params=ring( num_automated=5, simulator="traci", multiagent=False ), multiagent=False, shared=False, version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([-float("inf") for _ in range(25)]), expected_max=np.array([float("inf") for _ in range(25)]), expected_size=25, ) # test action space test_space( env.action_space, expected_min=np.array([-1 for _ in range(5)]), expected_max=np.array([1 for _ in range(5)]), expected_size=5, ) # kill the environment env.wrapped_env.terminate()
def test_single_agent_highway_single(self): # create the base environment env = FlowEnv( flow_params=highway_single( multiagent=False ), multiagent=False, shared=False, version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([-float("inf") for _ in range(50)]), expected_max=np.array([float("inf") for _ in range(50)]), expected_size=50, ) # test action space test_space( env.action_space, expected_min=np.array([-1 for _ in range(10)]), expected_max=np.array([1 for _ in range(10)]), expected_size=10, ) # kill the environment env.wrapped_env.terminate()
def test_single_agent_ring_small(self): # create the base environment env = FlowEnv( flow_params=ring_small( num_automated=1, horizon=1500, simulator="traci", multiagent=False ), version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([-np.inf for _ in range(3)]), expected_max=np.array([np.inf for _ in range(3)]), expected_size=3, ) # test action space test_space( env.action_space, expected_min=np.array([-1]), expected_max=np.array([1]), expected_size=1, ) # kill the environment env.wrapped_env.terminate()
def test_single_agent_ring(self): # create the base environment env = FlowEnv( env_name="ring", env_params={ "num_automated": 1, "horizon": 1500, "simulator": "traci", "multiagent": False }, version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([-np.inf for _ in range(3)]), expected_max=np.array([np.inf for _ in range(3)]), expected_size=3, ) # test action space test_space( env.action_space, expected_min=np.array([-1]), expected_max=np.array([1]), expected_size=1, ) # kill the environment env.wrapped_env.terminate()
def _get_ring_env_attributes(scale): """Return the environment parameters of the fast ring environment. Parameters ---------- scale : int the scale of the ring environment. The length of the network and the number of human/RL vehicles is scaled by this value. Returns ------- dict see ENV_ATTRIBUTES """ return { "meta_ac_space": lambda relative_goals, multiagent: Box( low=-5 if relative_goals else 0, high=5 if relative_goals else 10, shape=(1 if multiagent else scale, ), dtype=np.float32), "state_indices": lambda multiagent: [0] if multiagent else [15 * i for i in range(scale)], "env": lambda evaluate, render, multiagent, shared, maddpg: FlowEnv( flow_params=ring( stopping_penalty=False, acceleration_penalty=False, scale=scale, evaluate=evaluate, multiagent=multiagent, ), render=render, multiagent=multiagent, shared=shared, maddpg=maddpg, ) if evaluate else (RingMultiAgentEnv if multiagent else RingSingleAgentEnv)( maddpg=maddpg, length=[250 * scale, 360 * scale], num_vehicles=22 * scale, dt=0.2, horizon=3000, gen_emission=False, rl_ids=[22 * i for i in range(scale)], warmup_steps=0, initial_state=os.path.join( hbaselines_config.PROJECT_PATH, "hbaselines/envs/mixed_autonomy/envs/initial_states/" "ring-v{}.json".format(scale - 1)), sims_per_step=1, ), }
def test_multi_agent_ring(self): # create the base environment env = FlowEnv( flow_params=ring( num_automated=5, simulator="traci", multiagent=True ), multiagent=True, shared=False, version=1 ) env.reset() # test the agent IDs. self.assertListEqual( sorted(env.agents), ['rl_0_0', 'rl_0_1', 'rl_0_2', 'rl_0_3', 'rl_0_4']) # test observation space test_space( env.observation_space["rl_0_0"], expected_min=np.array([-float("inf") for _ in range(5)]), expected_max=np.array([float("inf") for _ in range(5)]), expected_size=5, ) # test action space test_space( env.action_space["rl_0_0"], expected_min=np.array([-1]), expected_max=np.array([1]), expected_size=1, ) # kill the environment env.wrapped_env.terminate()
def test_multi_agent_figure_eight(self): # create the base environment env = FlowEnv( env_name="figure_eight", env_params={ "num_automated": 1, "horizon": 1500, "simulator": "traci", "multiagent": True }, version=0 ) env.reset() # test observation space pass # TODO # test action space pass # TODO # kill the environment env.wrapped_env.terminate() # create the environment with multiple automated vehicles env = FlowEnv( env_name="figure_eight", env_params={ "num_automated": 14, "horizon": 1500, "simulator": "traci", "multiagent": True }, version=1 ) env.reset() # test observation space pass # TODO # test action space pass # TODO # kill the environment env.wrapped_env.terminate()
def test_multi_agent_figure_eight(self): # create the base environment env = FlowEnv( flow_params=figure_eight( num_automated=1, horizon=1500, simulator="traci", multiagent=True ), version=0 ) env.reset() # test observation space pass # TODO # test action space pass # TODO # kill the environment env.wrapped_env.terminate() # create the environment with multiple automated vehicles env = FlowEnv( flow_params=figure_eight( num_automated=14, horizon=1500, simulator="traci", multiagent=True ), version=1 ) env.reset() # test observation space pass # TODO # test action space pass # TODO # kill the environment env.wrapped_env.terminate()
def create_env(env, render=False, evaluate=False): """Return, and potentially create, the environment. Parameters ---------- env : str or gym.Env the environment, or the name of a registered environment. render : bool whether to render the environment evaluate : bool specifies whether this is a training or evaluation environment Returns ------- gym.Env or list of gym.Env gym-compatible environment(s) """ if env == "AntGather": env = AntGatherEnv() elif env == "AntMaze": if evaluate: env = [ AntMaze(use_contexts=True, context_range=[16, 0]), AntMaze(use_contexts=True, context_range=[16, 16]), AntMaze(use_contexts=True, context_range=[0, 16]) ] else: env = AntMaze(use_contexts=True, random_contexts=True, context_range=[(-4, 20), (-4, 20)]) elif env == "AntPush": if evaluate: env = AntPush(use_contexts=True, context_range=[0, 19]) else: env = AntPush(use_contexts=True, context_range=[0, 19]) # env = AntPush(use_contexts=True, # random_contexts=True, # context_range=[(-16, 16), (-4, 20)]) elif env == "AntFall": if evaluate: env = AntFall(use_contexts=True, context_range=[0, 27, 4.5]) else: env = AntFall(use_contexts=True, context_range=[0, 27, 4.5]) # env = AntFall(use_contexts=True, # random_contexts=True, # context_range=[(-4, 12), (-4, 28), (0, 5)]) elif env == "AntFourRooms": if evaluate: env = [ AntFourRooms(use_contexts=True, context_range=[30, 0]), AntFourRooms(use_contexts=True, context_range=[0, 30]), AntFourRooms(use_contexts=True, context_range=[30, 30]) ] else: env = AntFourRooms(use_contexts=True, random_contexts=False, context_range=[[30, 0], [0, 30], [30, 30]]) elif env == "UR5": if evaluate: env = UR5(use_contexts=True, random_contexts=True, context_range=[(-np.pi, np.pi), (-np.pi / 4, 0), (-np.pi / 4, np.pi / 4)], show=render) else: env = UR5(use_contexts=True, random_contexts=True, context_range=[(-np.pi, np.pi), (-np.pi / 4, 0), (-np.pi / 4, np.pi / 4)], show=render) elif env == "Pendulum": if evaluate: env = Pendulum(use_contexts=True, context_range=[0, 0], show=render) else: env = Pendulum(use_contexts=True, random_contexts=True, context_range=[(np.deg2rad(-16), np.deg2rad(16)), (-0.6, 0.6)], show=render) elif env in [ "bottleneck0", "bottleneck1", "bottleneck2", "grid0", "grid1" ]: # Import the benchmark and fetch its flow_params benchmark = __import__("flow.benchmarks.{}".format(env), fromlist=["flow_params"]) flow_params = benchmark.flow_params # Get the env name and a creator for the environment. create_env, _ = make_create_env(flow_params, version=0, render=render) # Create the environment. env = create_env() elif env in ["ring0", "multi-ring0"]: env = FlowEnv("ring", render=render) # FIXME elif env in [ "merge0", "merge1", "merge2", "multi-merge0", "multi-merge1", "multi-merge2" ]: env_num = int(env[-1]) env = FlowEnv("merge", env_params={ "exp_num": env_num, "horizon": 6000, "simulator": "traci", "multiagent": env[:5] == "multi" }, render=render) elif env in [ "figureeight0", "figureeight1", "figureeight02", "multi-figureeight0", "multi-figureeight1", "multi-figureeight02" ]: env_num = int(env[-1]) env = FlowEnv("figure_eight", env_params={ "num_automated": [1, 7, 14][env_num], "horizon": 750, "simulator": "traci", "multiagent": env[:5] == "multi" }, render=render) elif env == "BipedalSoccer": env = BipedalSoccer(render=render) elif isinstance(env, str): # This is assuming the environment is registered with OpenAI gym. env = gym.make(env) # Reset the environment. if env is not None: if isinstance(env, list): for next_env in env: next_env.reset() else: env.reset() return env
def train_h_baselines(flow_params, args, multiagent): """Train policies using SAC and TD3 with h-baselines.""" from hbaselines.algorithms import OffPolicyRLAlgorithm from hbaselines.utils.train import parse_options, get_hyperparameters from hbaselines.envs.mixed_autonomy import FlowEnv flow_params = deepcopy(flow_params) # Get the command-line arguments that are relevant here args = parse_options(description="", example_usage="", args=args) # the base directory that the logged data will be stored in base_dir = "training_data" # Create the training environment. env = FlowEnv( flow_params, multiagent=multiagent, shared=args.shared, maddpg=args.maddpg, render=args.render, version=0 ) # Create the evaluation environment. if args.evaluate: eval_flow_params = deepcopy(flow_params) eval_flow_params['env'].evaluate = True eval_env = FlowEnv( eval_flow_params, multiagent=multiagent, shared=args.shared, maddpg=args.maddpg, render=args.render_eval, version=1 ) else: eval_env = None for i in range(args.n_training): # value of the next seed seed = args.seed + i # The time when the current experiment started. now = strftime("%Y-%m-%d-%H:%M:%S") # Create a save directory folder (if it doesn't exist). dir_name = os.path.join(base_dir, '{}/{}'.format(args.env_name, now)) ensure_dir(dir_name) # Get the policy class. if args.alg == "TD3": if multiagent: from hbaselines.multi_fcnet.td3 import MultiFeedForwardPolicy policy = MultiFeedForwardPolicy else: from hbaselines.fcnet.td3 import FeedForwardPolicy policy = FeedForwardPolicy elif args.alg == "SAC": if multiagent: from hbaselines.multi_fcnet.sac import MultiFeedForwardPolicy policy = MultiFeedForwardPolicy else: from hbaselines.fcnet.sac import FeedForwardPolicy policy = FeedForwardPolicy else: raise ValueError("Unknown algorithm: {}".format(args.alg)) # Get the hyperparameters. hp = get_hyperparameters(args, policy) # Add the seed for logging purposes. params_with_extra = hp.copy() params_with_extra['seed'] = seed params_with_extra['env_name'] = args.env_name params_with_extra['policy_name'] = policy.__name__ params_with_extra['algorithm'] = args.alg params_with_extra['date/time'] = now # Add the hyperparameters to the folder. with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f: json.dump(params_with_extra, f, sort_keys=True, indent=4) # Create the algorithm object. alg = OffPolicyRLAlgorithm( policy=policy, env=env, eval_env=eval_env, **hp ) # Perform training. alg.learn( total_timesteps=args.total_steps, log_dir=dir_name, log_interval=args.log_interval, eval_interval=args.eval_interval, save_interval=args.save_interval, initial_exploration_steps=args.initial_exploration_steps, seed=seed, )
def test_single_agent_merge(self): # create version 0 of the environment env = FlowEnv( flow_params=merge( exp_num=0, horizon=6000, simulator="traci", multiagent=False ), version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(25)]), expected_max=np.array([1 for _ in range(25)]), expected_size=25, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(5)]), expected_max=np.array([1.5 for _ in range(5)]), expected_size=5, ) # kill the environment env.wrapped_env.terminate() # create version 1 of the environment env = FlowEnv( flow_params=merge( exp_num=1, horizon=6000, simulator="traci", multiagent=False ), version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(65)]), expected_max=np.array([1 for _ in range(65)]), expected_size=65, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(13)]), expected_max=np.array([1.5 for _ in range(13)]), expected_size=13, ) # kill the environment env.wrapped_env.terminate() # create version 2 of the environment env = FlowEnv( flow_params=merge( exp_num=2, horizon=6000, simulator="traci", multiagent=False ), version=2 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(85)]), expected_max=np.array([1 for _ in range(85)]), expected_size=85, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(17)]), expected_max=np.array([1.5 for _ in range(17)]), expected_size=17, ) # kill the environment env.wrapped_env.terminate()
def test_single_agent_figure_eight(self): # create the base environment env = FlowEnv( env_name="figure_eight", env_params={ "num_automated": 1, "horizon": 1500, "simulator": "traci", "multiagent": False }, version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(28)]), expected_max=np.array([1 for _ in range(28)]), expected_size=28, ) # test action space test_space( env.action_space, expected_min=np.array([-3]), expected_max=np.array([3]), expected_size=1, ) # kill the environment env.wrapped_env.terminate() # create the environment with multiple automated vehicles env = FlowEnv( env_name="figure_eight", env_params={ "num_automated": 14, "horizon": 1500, "simulator": "traci", "multiagent": False }, version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(28)]), expected_max=np.array([1 for _ in range(28)]), expected_size=28, ) # test action space test_space( env.action_space, expected_min=np.array([-3 for _ in range(14)]), expected_max=np.array([3 for _ in range(14)]), expected_size=14, ) # kill the environment env.wrapped_env.terminate()
"ring-v0": { "meta_ac_space": lambda relative_goals, multiagent: Box(low=-5 if relative_goals else 0, high=5 if relative_goals else 10, shape=(5, ), dtype=np.float32), "state_indices": lambda multiagent: [0], "env": lambda evaluate, render, n_levels, multiagent, shared, maddpg: FlowEnv( flow_params=ring( stopping_penalty=True, acceleration_penalty=True, evaluate=evaluate, multiagent=multiagent, ), render=render, multiagent=multiagent, shared=shared, maddpg=maddpg, ), }, "merge-v0": { "meta_ac_space": lambda relative_goals, multiagent: Box(low=-.5 if relative_goals else 0, high=.5 if relative_goals else 1, shape=(1 if multiagent else 5, ), dtype=np.float32),
def test_single_agent_merge(self): # create version 0 of the environment env = FlowEnv( env_name="merge", env_params={ "exp_num": 0, "horizon": 6000, "simulator": "traci", "multiagent": False }, version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(25)]), expected_max=np.array([1 for _ in range(25)]), expected_size=25, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(5)]), expected_max=np.array([1.5 for _ in range(5)]), expected_size=5, ) # kill the environment env.wrapped_env.terminate() # create version 1 of the environment env = FlowEnv( env_name="merge", env_params={ "exp_num": 1, "horizon": 6000, "simulator": "traci", "multiagent": False }, version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(65)]), expected_max=np.array([1 for _ in range(65)]), expected_size=65, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(13)]), expected_max=np.array([1.5 for _ in range(13)]), expected_size=13, ) # kill the environment env.wrapped_env.terminate() # create version 2 of the environment env = FlowEnv( env_name="merge", env_params={ "exp_num": 2, "horizon": 6000, "simulator": "traci", "multiagent": False }, version=2 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(85)]), expected_max=np.array([1 for _ in range(85)]), expected_size=85, ) # test action space test_space( env.action_space, expected_min=np.array([-1.5 for _ in range(17)]), expected_max=np.array([1.5 for _ in range(17)]), expected_size=17, ) # kill the environment env.wrapped_env.terminate()
# ======================================================================= # "ring_small": { "meta_ac_space": lambda relative_goals: Box(low=-.5 if relative_goals else 0, high=.5 if relative_goals else 1, shape=(1, ), dtype=np.float32), "state_indices": [0], "env": lambda evaluate, render, multiagent, shared, maddpg: [ FlowEnv( flow_params=ring_small( ring_length=[230, 230], evaluate=True, multiagent=multiagent, ), render=render, multiagent=multiagent, shared=shared, maddpg=maddpg, ), FlowEnv( flow_params=ring_small( ring_length=[260, 260], evaluate=True, multiagent=multiagent, ), render=render, multiagent=multiagent, shared=shared, maddpg=maddpg,
# ======================================================================= # # Mixed autonomy traffic flow environments. # # ======================================================================= # "ring": { "meta_ac_space": lambda relative_goals: Box(low=-10 if relative_goals else 0, high=10 if relative_goals else 30, shape=(5, ), dtype=np.float32), "state_indices": [5 * i for i in range(5)], "env": lambda evaluate, render, multiagent, shared, maddpg: FlowEnv( flow_params=ring( evaluate=evaluate, multiagent=multiagent, ), render=render, multiagent=multiagent, shared=shared, maddpg=maddpg, ), }, "ring_small": { "meta_ac_space": lambda relative_goals: Box(low=-.5 if relative_goals else 0, high=.5 if relative_goals else 1, shape=(1, ), dtype=np.float32), "state_indices": [0], "env": lambda evaluate, render, multiagent, shared, maddpg: [ FlowEnv(
def test_multi_agent_ring_small(self): # create the base environment env = FlowEnv( flow_params=ring_small( num_automated=1, horizon=1500, simulator="traci", multiagent=True ), multiagent=True, shared=False, version=1 ) env.reset() # test the agent IDs. self.assertListEqual(env.agents, ["rl_0_0"]) # test observation space test_space( env.observation_space["rl_0_0"], expected_min=np.array([-5 for _ in range(3)]), expected_max=np.array([5 for _ in range(3)]), expected_size=3, ) # test action space test_space( env.action_space["rl_0_0"], expected_min=np.array([-1]), expected_max=np.array([1]), expected_size=1, ) # kill the environment env.wrapped_env.terminate() # create the environment with multiple automated vehicles env = FlowEnv( flow_params=ring_small( num_automated=4, horizon=1500, simulator="traci", multiagent=True ), multiagent=True, shared=True, ) env.reset() # test the agent IDs. self.assertListEqual( env.agents, ["rl_0_0", "rl_1_0", "rl_2_0", "rl_3_0"]) # test observation space test_space( env.observation_space, expected_min=np.array([-5 for _ in range(3)]), expected_max=np.array([5 for _ in range(3)]), expected_size=3, ) # test action space test_space( env.action_space, expected_min=np.array([-1]), expected_max=np.array([1]), expected_size=1, ) # kill the environment env.wrapped_env.terminate()
def import_flow_env(env_name, render, shared, maddpg, evaluate): """Import an environment from the flow/examples folder. This method imports the flow_params dict from the exp_configs folders in this directory and generates an appropriate FlowEnv object. Parameters ---------- env_name : str the environment name. Starts with "flow:" to signify that it should be imported from the flow/experiments folder. render : bool whether to render the environment shared : bool specifies whether agents in an environment are meant to share policies. This is solely used by multi-agent Flow environments. maddpg : bool whether to use an environment variant that is compatible with the MADDPG algorithm evaluate : bool specifies whether this is a training or evaluation environment Returns ------- hbaselines.envs.mixed_autonomy.FlowEnv the training/evaluation environment Raises ------ ValueError if the environment is not abailable in flow/examples """ # Parse the exp_config name from the environment name exp_config = env_name[5:] # Add flow/examples to your path to located the below modules. sys.path.append(os.path.join(config.PROJECT_PATH, "examples")) # Import relevant information from the exp_config script. module = __import__("exp_configs.rl.singleagent", fromlist=[exp_config]) module_ma = __import__("exp_configs.rl.multiagent", fromlist=[exp_config]) # Import the sub-module containing the specified exp_config and determine # whether the environment is single agent or multi-agent. if hasattr(module, exp_config): submodule = getattr(module, exp_config) multiagent = False elif hasattr(module_ma, exp_config): submodule = getattr(module_ma, exp_config) multiagent = True else: raise ValueError("Unable to find experiment config.") # Collect the flow_params object. flow_params = deepcopy(submodule.flow_params) # Update the evaluation flag to match what is requested. flow_params['env'].evaluate = evaluate # Return the environment. return FlowEnv( flow_params, multiagent=multiagent, shared=shared, maddpg=maddpg, render=render, )
def test_single_agent_figure_eight(self): # create the base environment env = FlowEnv( flow_params=figure_eight( num_automated=1, horizon=1500, simulator="traci", multiagent=False ), version=0 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(28)]), expected_max=np.array([1 for _ in range(28)]), expected_size=28, ) # test action space test_space( env.action_space, expected_min=np.array([-3]), expected_max=np.array([3]), expected_size=1, ) # kill the environment env.wrapped_env.terminate() # create the environment with multiple automated vehicles env = FlowEnv( flow_params=figure_eight( num_automated=14, horizon=1500, simulator="traci", multiagent=False ), version=1 ) env.reset() # test observation space test_space( env.observation_space, expected_min=np.array([0 for _ in range(28)]), expected_max=np.array([1 for _ in range(28)]), expected_size=28, ) # test action space test_space( env.action_space, expected_min=np.array([-3 for _ in range(14)]), expected_max=np.array([3 for _ in range(14)]), expected_size=14, ) # kill the environment env.wrapped_env.terminate()