Esempio n. 1
0
    def test_single_agent_ring(self):
        # create the base environment
        env = FlowEnv(
            flow_params=ring(
                num_automated=5,
                simulator="traci",
                multiagent=False
            ),
            multiagent=False,
            shared=False,
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([-float("inf") for _ in range(25)]),
            expected_max=np.array([float("inf") for _ in range(25)]),
            expected_size=25,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1 for _ in range(5)]),
            expected_max=np.array([1 for _ in range(5)]),
            expected_size=5,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 2
0
    def test_single_agent_highway_single(self):
        # create the base environment
        env = FlowEnv(
            flow_params=highway_single(
                multiagent=False
            ),
            multiagent=False,
            shared=False,
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([-float("inf") for _ in range(50)]),
            expected_max=np.array([float("inf") for _ in range(50)]),
            expected_size=50,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1 for _ in range(10)]),
            expected_max=np.array([1 for _ in range(10)]),
            expected_size=10,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 3
0
    def test_single_agent_ring_small(self):
        # create the base environment
        env = FlowEnv(
            flow_params=ring_small(
                num_automated=1,
                horizon=1500,
                simulator="traci",
                multiagent=False
            ),
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([-np.inf for _ in range(3)]),
            expected_max=np.array([np.inf for _ in range(3)]),
            expected_size=3,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1]),
            expected_max=np.array([1]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 4
0
    def test_single_agent_ring(self):
        # create the base environment
        env = FlowEnv(
            env_name="ring",
            env_params={
                "num_automated": 1,
                "horizon": 1500,
                "simulator": "traci",
                "multiagent": False
            },
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([-np.inf for _ in range(3)]),
            expected_max=np.array([np.inf for _ in range(3)]),
            expected_size=3,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1]),
            expected_max=np.array([1]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 5
0
def _get_ring_env_attributes(scale):
    """Return the environment parameters of the fast ring environment.

    Parameters
    ----------
    scale : int
        the scale of the ring environment. The length of the network and the
        number of human/RL vehicles is scaled by this value.

    Returns
    -------
    dict
        see ENV_ATTRIBUTES
    """
    return {
        "meta_ac_space":
        lambda relative_goals, multiagent: Box(
            low=-5 if relative_goals else 0,
            high=5 if relative_goals else 10,
            shape=(1 if multiagent else scale, ),
            dtype=np.float32),
        "state_indices":
        lambda multiagent: [0]
        if multiagent else [15 * i for i in range(scale)],
        "env":
        lambda evaluate, render, multiagent, shared, maddpg: FlowEnv(
            flow_params=ring(
                stopping_penalty=False,
                acceleration_penalty=False,
                scale=scale,
                evaluate=evaluate,
                multiagent=multiagent,
            ),
            render=render,
            multiagent=multiagent,
            shared=shared,
            maddpg=maddpg,
        ) if evaluate else
        (RingMultiAgentEnv if multiagent else RingSingleAgentEnv)(
            maddpg=maddpg,
            length=[250 * scale, 360 * scale],
            num_vehicles=22 * scale,
            dt=0.2,
            horizon=3000,
            gen_emission=False,
            rl_ids=[22 * i for i in range(scale)],
            warmup_steps=0,
            initial_state=os.path.join(
                hbaselines_config.PROJECT_PATH,
                "hbaselines/envs/mixed_autonomy/envs/initial_states/"
                "ring-v{}.json".format(scale - 1)),
            sims_per_step=1,
        ),
    }
Esempio n. 6
0
    def test_multi_agent_ring(self):
        # create the base environment
        env = FlowEnv(
            flow_params=ring(
                num_automated=5,
                simulator="traci",
                multiagent=True
            ),
            multiagent=True,
            shared=False,
            version=1
        )
        env.reset()

        # test the agent IDs.
        self.assertListEqual(
            sorted(env.agents), ['rl_0_0', 'rl_0_1', 'rl_0_2', 'rl_0_3',
                                 'rl_0_4'])

        # test observation space
        test_space(
            env.observation_space["rl_0_0"],
            expected_min=np.array([-float("inf") for _ in range(5)]),
            expected_max=np.array([float("inf") for _ in range(5)]),
            expected_size=5,
        )

        # test action space
        test_space(
            env.action_space["rl_0_0"],
            expected_min=np.array([-1]),
            expected_max=np.array([1]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 7
0
    def test_multi_agent_figure_eight(self):
        # create the base environment
        env = FlowEnv(
            env_name="figure_eight",
            env_params={
                "num_automated": 1,
                "horizon": 1500,
                "simulator": "traci",
                "multiagent": True
            },
            version=0
        )
        env.reset()

        # test observation space
        pass  # TODO

        # test action space
        pass  # TODO

        # kill the environment
        env.wrapped_env.terminate()

        # create the environment with multiple automated vehicles
        env = FlowEnv(
            env_name="figure_eight",
            env_params={
                "num_automated": 14,
                "horizon": 1500,
                "simulator": "traci",
                "multiagent": True
            },
            version=1
        )
        env.reset()

        # test observation space
        pass  # TODO

        # test action space
        pass  # TODO

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 8
0
    def test_multi_agent_figure_eight(self):
        # create the base environment
        env = FlowEnv(
            flow_params=figure_eight(
                num_automated=1,
                horizon=1500,
                simulator="traci",
                multiagent=True
            ),
            version=0
        )
        env.reset()

        # test observation space
        pass  # TODO

        # test action space
        pass  # TODO

        # kill the environment
        env.wrapped_env.terminate()

        # create the environment with multiple automated vehicles
        env = FlowEnv(
            flow_params=figure_eight(
                num_automated=14,
                horizon=1500,
                simulator="traci",
                multiagent=True
            ),
            version=1
        )
        env.reset()

        # test observation space
        pass  # TODO

        # test action space
        pass  # TODO

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 9
0
def create_env(env, render=False, evaluate=False):
    """Return, and potentially create, the environment.

    Parameters
    ----------
    env : str or gym.Env
        the environment, or the name of a registered environment.
    render : bool
        whether to render the environment
    evaluate : bool
        specifies whether this is a training or evaluation environment

    Returns
    -------
    gym.Env or list of gym.Env
        gym-compatible environment(s)
    """
    if env == "AntGather":
        env = AntGatherEnv()

    elif env == "AntMaze":
        if evaluate:
            env = [
                AntMaze(use_contexts=True, context_range=[16, 0]),
                AntMaze(use_contexts=True, context_range=[16, 16]),
                AntMaze(use_contexts=True, context_range=[0, 16])
            ]
        else:
            env = AntMaze(use_contexts=True,
                          random_contexts=True,
                          context_range=[(-4, 20), (-4, 20)])

    elif env == "AntPush":
        if evaluate:
            env = AntPush(use_contexts=True, context_range=[0, 19])
        else:
            env = AntPush(use_contexts=True, context_range=[0, 19])
            # env = AntPush(use_contexts=True,
            #               random_contexts=True,
            #               context_range=[(-16, 16), (-4, 20)])

    elif env == "AntFall":
        if evaluate:
            env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
        else:
            env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
            # env = AntFall(use_contexts=True,
            #               random_contexts=True,
            #               context_range=[(-4, 12), (-4, 28), (0, 5)])

    elif env == "AntFourRooms":
        if evaluate:
            env = [
                AntFourRooms(use_contexts=True, context_range=[30, 0]),
                AntFourRooms(use_contexts=True, context_range=[0, 30]),
                AntFourRooms(use_contexts=True, context_range=[30, 30])
            ]
        else:
            env = AntFourRooms(use_contexts=True,
                               random_contexts=False,
                               context_range=[[30, 0], [0, 30], [30, 30]])

    elif env == "UR5":
        if evaluate:
            env = UR5(use_contexts=True,
                      random_contexts=True,
                      context_range=[(-np.pi, np.pi), (-np.pi / 4, 0),
                                     (-np.pi / 4, np.pi / 4)],
                      show=render)
        else:
            env = UR5(use_contexts=True,
                      random_contexts=True,
                      context_range=[(-np.pi, np.pi), (-np.pi / 4, 0),
                                     (-np.pi / 4, np.pi / 4)],
                      show=render)

    elif env == "Pendulum":
        if evaluate:
            env = Pendulum(use_contexts=True,
                           context_range=[0, 0],
                           show=render)
        else:
            env = Pendulum(use_contexts=True,
                           random_contexts=True,
                           context_range=[(np.deg2rad(-16), np.deg2rad(16)),
                                          (-0.6, 0.6)],
                           show=render)

    elif env in [
            "bottleneck0", "bottleneck1", "bottleneck2", "grid0", "grid1"
    ]:
        # Import the benchmark and fetch its flow_params
        benchmark = __import__("flow.benchmarks.{}".format(env),
                               fromlist=["flow_params"])
        flow_params = benchmark.flow_params

        # Get the env name and a creator for the environment.
        create_env, _ = make_create_env(flow_params, version=0, render=render)

        # Create the environment.
        env = create_env()

    elif env in ["ring0", "multi-ring0"]:
        env = FlowEnv("ring", render=render)  # FIXME

    elif env in [
            "merge0", "merge1", "merge2", "multi-merge0", "multi-merge1",
            "multi-merge2"
    ]:
        env_num = int(env[-1])
        env = FlowEnv("merge",
                      env_params={
                          "exp_num": env_num,
                          "horizon": 6000,
                          "simulator": "traci",
                          "multiagent": env[:5] == "multi"
                      },
                      render=render)

    elif env in [
            "figureeight0", "figureeight1", "figureeight02",
            "multi-figureeight0", "multi-figureeight1", "multi-figureeight02"
    ]:
        env_num = int(env[-1])
        env = FlowEnv("figure_eight",
                      env_params={
                          "num_automated": [1, 7, 14][env_num],
                          "horizon": 750,
                          "simulator": "traci",
                          "multiagent": env[:5] == "multi"
                      },
                      render=render)

    elif env == "BipedalSoccer":
        env = BipedalSoccer(render=render)

    elif isinstance(env, str):
        # This is assuming the environment is registered with OpenAI gym.
        env = gym.make(env)

    # Reset the environment.
    if env is not None:
        if isinstance(env, list):
            for next_env in env:
                next_env.reset()
        else:
            env.reset()

    return env
Esempio n. 10
0
def train_h_baselines(flow_params, args, multiagent):
    """Train policies using SAC and TD3 with h-baselines."""
    from hbaselines.algorithms import OffPolicyRLAlgorithm
    from hbaselines.utils.train import parse_options, get_hyperparameters
    from hbaselines.envs.mixed_autonomy import FlowEnv

    flow_params = deepcopy(flow_params)

    # Get the command-line arguments that are relevant here
    args = parse_options(description="", example_usage="", args=args)

    # the base directory that the logged data will be stored in
    base_dir = "training_data"

    # Create the training environment.
    env = FlowEnv(
        flow_params,
        multiagent=multiagent,
        shared=args.shared,
        maddpg=args.maddpg,
        render=args.render,
        version=0
    )

    # Create the evaluation environment.
    if args.evaluate:
        eval_flow_params = deepcopy(flow_params)
        eval_flow_params['env'].evaluate = True
        eval_env = FlowEnv(
            eval_flow_params,
            multiagent=multiagent,
            shared=args.shared,
            maddpg=args.maddpg,
            render=args.render_eval,
            version=1
        )
    else:
        eval_env = None

    for i in range(args.n_training):
        # value of the next seed
        seed = args.seed + i

        # The time when the current experiment started.
        now = strftime("%Y-%m-%d-%H:%M:%S")

        # Create a save directory folder (if it doesn't exist).
        dir_name = os.path.join(base_dir, '{}/{}'.format(args.env_name, now))
        ensure_dir(dir_name)

        # Get the policy class.
        if args.alg == "TD3":
            if multiagent:
                from hbaselines.multi_fcnet.td3 import MultiFeedForwardPolicy
                policy = MultiFeedForwardPolicy
            else:
                from hbaselines.fcnet.td3 import FeedForwardPolicy
                policy = FeedForwardPolicy
        elif args.alg == "SAC":
            if multiagent:
                from hbaselines.multi_fcnet.sac import MultiFeedForwardPolicy
                policy = MultiFeedForwardPolicy
            else:
                from hbaselines.fcnet.sac import FeedForwardPolicy
                policy = FeedForwardPolicy
        else:
            raise ValueError("Unknown algorithm: {}".format(args.alg))

        # Get the hyperparameters.
        hp = get_hyperparameters(args, policy)

        # Add the seed for logging purposes.
        params_with_extra = hp.copy()
        params_with_extra['seed'] = seed
        params_with_extra['env_name'] = args.env_name
        params_with_extra['policy_name'] = policy.__name__
        params_with_extra['algorithm'] = args.alg
        params_with_extra['date/time'] = now

        # Add the hyperparameters to the folder.
        with open(os.path.join(dir_name, 'hyperparameters.json'), 'w') as f:
            json.dump(params_with_extra, f, sort_keys=True, indent=4)

        # Create the algorithm object.
        alg = OffPolicyRLAlgorithm(
            policy=policy,
            env=env,
            eval_env=eval_env,
            **hp
        )

        # Perform training.
        alg.learn(
            total_timesteps=args.total_steps,
            log_dir=dir_name,
            log_interval=args.log_interval,
            eval_interval=args.eval_interval,
            save_interval=args.save_interval,
            initial_exploration_steps=args.initial_exploration_steps,
            seed=seed,
        )
Esempio n. 11
0
    def test_single_agent_merge(self):
        # create version 0 of the environment
        env = FlowEnv(
            flow_params=merge(
                exp_num=0,
                horizon=6000,
                simulator="traci",
                multiagent=False
            ),
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(25)]),
            expected_max=np.array([1 for _ in range(25)]),
            expected_size=25,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(5)]),
            expected_max=np.array([1.5 for _ in range(5)]),
            expected_size=5,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create version 1 of the environment
        env = FlowEnv(
            flow_params=merge(
                exp_num=1,
                horizon=6000,
                simulator="traci",
                multiagent=False
            ),
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(65)]),
            expected_max=np.array([1 for _ in range(65)]),
            expected_size=65,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(13)]),
            expected_max=np.array([1.5 for _ in range(13)]),
            expected_size=13,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create version 2 of the environment
        env = FlowEnv(
            flow_params=merge(
                exp_num=2,
                horizon=6000,
                simulator="traci",
                multiagent=False
            ),
            version=2
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(85)]),
            expected_max=np.array([1 for _ in range(85)]),
            expected_size=85,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(17)]),
            expected_max=np.array([1.5 for _ in range(17)]),
            expected_size=17,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 12
0
    def test_single_agent_figure_eight(self):
        # create the base environment
        env = FlowEnv(
            env_name="figure_eight",
            env_params={
                "num_automated": 1,
                "horizon": 1500,
                "simulator": "traci",
                "multiagent": False
            },
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(28)]),
            expected_max=np.array([1 for _ in range(28)]),
            expected_size=28,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-3]),
            expected_max=np.array([3]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create the environment with multiple automated vehicles
        env = FlowEnv(
            env_name="figure_eight",
            env_params={
                "num_automated": 14,
                "horizon": 1500,
                "simulator": "traci",
                "multiagent": False
            },
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(28)]),
            expected_max=np.array([1 for _ in range(28)]),
            expected_size=28,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-3 for _ in range(14)]),
            expected_max=np.array([3 for _ in range(14)]),
            expected_size=14,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 13
0
 "ring-v0": {
     "meta_ac_space":
     lambda relative_goals, multiagent: Box(low=-5 if relative_goals else 0,
                                            high=5
                                            if relative_goals else 10,
                                            shape=(5, ),
                                            dtype=np.float32),
     "state_indices":
     lambda multiagent: [0],
     "env":
     lambda evaluate, render, n_levels, multiagent, shared, maddpg: FlowEnv(
         flow_params=ring(
             stopping_penalty=True,
             acceleration_penalty=True,
             evaluate=evaluate,
             multiagent=multiagent,
         ),
         render=render,
         multiagent=multiagent,
         shared=shared,
         maddpg=maddpg,
     ),
 },
 "merge-v0": {
     "meta_ac_space":
     lambda relative_goals, multiagent: Box(low=-.5
                                            if relative_goals else 0,
                                            high=.5
                                            if relative_goals else 1,
                                            shape=(1
                                                   if multiagent else 5, ),
                                            dtype=np.float32),
Esempio n. 14
0
    def test_single_agent_merge(self):
        # create version 0 of the environment
        env = FlowEnv(
            env_name="merge",
            env_params={
                "exp_num": 0,
                "horizon": 6000,
                "simulator": "traci",
                "multiagent": False
            },
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(25)]),
            expected_max=np.array([1 for _ in range(25)]),
            expected_size=25,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(5)]),
            expected_max=np.array([1.5 for _ in range(5)]),
            expected_size=5,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create version 1 of the environment
        env = FlowEnv(
            env_name="merge",
            env_params={
                "exp_num": 1,
                "horizon": 6000,
                "simulator": "traci",
                "multiagent": False
            },
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(65)]),
            expected_max=np.array([1 for _ in range(65)]),
            expected_size=65,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(13)]),
            expected_max=np.array([1.5 for _ in range(13)]),
            expected_size=13,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create version 2 of the environment
        env = FlowEnv(
            env_name="merge",
            env_params={
                "exp_num": 2,
                "horizon": 6000,
                "simulator": "traci",
                "multiagent": False
            },
            version=2
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(85)]),
            expected_max=np.array([1 for _ in range(85)]),
            expected_size=85,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1.5 for _ in range(17)]),
            expected_max=np.array([1.5 for _ in range(17)]),
            expected_size=17,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 15
0
 # ======================================================================= #
 "ring_small": {
     "meta_ac_space":
     lambda relative_goals: Box(low=-.5 if relative_goals else 0,
                                high=.5 if relative_goals else 1,
                                shape=(1, ),
                                dtype=np.float32),
     "state_indices": [0],
     "env":
     lambda evaluate, render, multiagent, shared, maddpg: [
         FlowEnv(
             flow_params=ring_small(
                 ring_length=[230, 230],
                 evaluate=True,
                 multiagent=multiagent,
             ),
             render=render,
             multiagent=multiagent,
             shared=shared,
             maddpg=maddpg,
         ),
         FlowEnv(
             flow_params=ring_small(
                 ring_length=[260, 260],
                 evaluate=True,
                 multiagent=multiagent,
             ),
             render=render,
             multiagent=multiagent,
             shared=shared,
             maddpg=maddpg,
Esempio n. 16
0
 # ======================================================================= #
 # Mixed autonomy traffic flow environments.                               #
 # ======================================================================= #
 "ring": {
     "meta_ac_space":
     lambda relative_goals: Box(low=-10 if relative_goals else 0,
                                high=10 if relative_goals else 30,
                                shape=(5, ),
                                dtype=np.float32),
     "state_indices": [5 * i for i in range(5)],
     "env":
     lambda evaluate, render, multiagent, shared, maddpg: FlowEnv(
         flow_params=ring(
             evaluate=evaluate,
             multiagent=multiagent,
         ),
         render=render,
         multiagent=multiagent,
         shared=shared,
         maddpg=maddpg,
     ),
 },
 "ring_small": {
     "meta_ac_space":
     lambda relative_goals: Box(low=-.5 if relative_goals else 0,
                                high=.5 if relative_goals else 1,
                                shape=(1, ),
                                dtype=np.float32),
     "state_indices": [0],
     "env":
     lambda evaluate, render, multiagent, shared, maddpg: [
         FlowEnv(
Esempio n. 17
0
    def test_multi_agent_ring_small(self):
        # create the base environment
        env = FlowEnv(
            flow_params=ring_small(
                num_automated=1,
                horizon=1500,
                simulator="traci",
                multiagent=True
            ),
            multiagent=True,
            shared=False,
            version=1
        )
        env.reset()

        # test the agent IDs.
        self.assertListEqual(env.agents, ["rl_0_0"])

        # test observation space
        test_space(
            env.observation_space["rl_0_0"],
            expected_min=np.array([-5 for _ in range(3)]),
            expected_max=np.array([5 for _ in range(3)]),
            expected_size=3,
        )

        # test action space
        test_space(
            env.action_space["rl_0_0"],
            expected_min=np.array([-1]),
            expected_max=np.array([1]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create the environment with multiple automated vehicles
        env = FlowEnv(
            flow_params=ring_small(
                num_automated=4,
                horizon=1500,
                simulator="traci",
                multiagent=True
            ),
            multiagent=True,
            shared=True,
        )
        env.reset()

        # test the agent IDs.
        self.assertListEqual(
            env.agents, ["rl_0_0", "rl_1_0", "rl_2_0", "rl_3_0"])

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([-5 for _ in range(3)]),
            expected_max=np.array([5 for _ in range(3)]),
            expected_size=3,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-1]),
            expected_max=np.array([1]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()
Esempio n. 18
0
def import_flow_env(env_name, render, shared, maddpg, evaluate):
    """Import an environment from the flow/examples folder.

    This method imports the flow_params dict from the exp_configs folders in
    this directory and generates an appropriate FlowEnv object.

    Parameters
    ----------
    env_name : str
        the environment name. Starts with "flow:" to signify that it should be
        imported from the flow/experiments folder.
    render : bool
        whether to render the environment
    shared : bool
        specifies whether agents in an environment are meant to share policies.
        This is solely used by multi-agent Flow environments.
    maddpg : bool
        whether to use an environment variant that is compatible with the
        MADDPG algorithm
    evaluate : bool
        specifies whether this is a training or evaluation environment

    Returns
    -------
    hbaselines.envs.mixed_autonomy.FlowEnv
        the training/evaluation environment

    Raises
    ------
    ValueError
        if the environment is not abailable in flow/examples
    """
    # Parse the exp_config name from the environment name
    exp_config = env_name[5:]

    # Add flow/examples to your path to located the below modules.
    sys.path.append(os.path.join(config.PROJECT_PATH, "examples"))

    # Import relevant information from the exp_config script.
    module = __import__("exp_configs.rl.singleagent", fromlist=[exp_config])
    module_ma = __import__("exp_configs.rl.multiagent", fromlist=[exp_config])

    # Import the sub-module containing the specified exp_config and determine
    # whether the environment is single agent or multi-agent.
    if hasattr(module, exp_config):
        submodule = getattr(module, exp_config)
        multiagent = False
    elif hasattr(module_ma, exp_config):
        submodule = getattr(module_ma, exp_config)
        multiagent = True
    else:
        raise ValueError("Unable to find experiment config.")

    # Collect the flow_params object.
    flow_params = deepcopy(submodule.flow_params)

    # Update the evaluation flag to match what is requested.
    flow_params['env'].evaluate = evaluate

    # Return the environment.
    return FlowEnv(
        flow_params,
        multiagent=multiagent,
        shared=shared,
        maddpg=maddpg,
        render=render,
    )
Esempio n. 19
0
    def test_single_agent_figure_eight(self):
        # create the base environment
        env = FlowEnv(
            flow_params=figure_eight(
                num_automated=1,
                horizon=1500,
                simulator="traci",
                multiagent=False
            ),
            version=0
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(28)]),
            expected_max=np.array([1 for _ in range(28)]),
            expected_size=28,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-3]),
            expected_max=np.array([3]),
            expected_size=1,
        )

        # kill the environment
        env.wrapped_env.terminate()

        # create the environment with multiple automated vehicles
        env = FlowEnv(
            flow_params=figure_eight(
                num_automated=14,
                horizon=1500,
                simulator="traci",
                multiagent=False
            ),
            version=1
        )
        env.reset()

        # test observation space
        test_space(
            env.observation_space,
            expected_min=np.array([0 for _ in range(28)]),
            expected_max=np.array([1 for _ in range(28)]),
            expected_size=28,
        )

        # test action space
        test_space(
            env.action_space,
            expected_min=np.array([-3 for _ in range(14)]),
            expected_max=np.array([3 for _ in range(14)]),
            expected_size=14,
        )

        # kill the environment
        env.wrapped_env.terminate()