예제 #1
0
    def test_contextual_reward(self):
        """Check the functionality of the context_space attribute.

        This method is tested for the following environments:

        1. AntMaze
        2. AntPush
        3. AntFall
        4. AntFourRooms
        """
        from hbaselines.envs.efficient_hrl.envs import REWARD_SCALE

        # test case 1
        env = AntMaze(use_contexts=True, context_range=[0, 0])
        self.assertAlmostEqual(
            env.contextual_reward(
                np.array([0, 0]), np.array([1, 1]), np.array([2, 2])),
            -1.4142135624084504 * REWARD_SCALE
        )

        # test case 2
        env = AntPush(use_contexts=True, context_range=[0, 0])
        self.assertAlmostEqual(
            env.contextual_reward(
                np.array([0, 0]), np.array([1, 1]), np.array([2, 2])),
            -1.4142135624084504 * REWARD_SCALE
        )

        # test case 3
        env = AntFall(use_contexts=True, context_range=[0, 0, 0])
        self.assertAlmostEqual(
            env.contextual_reward(
                np.array([0, 0, 0]), np.array([1, 1, 1]), np.array([2, 2, 2])),
            -1.7320508075977448 * REWARD_SCALE
        )

        # test case 4
        env = AntFourRooms(use_contexts=True, context_range=[0, 0])
        self.assertAlmostEqual(
            env.contextual_reward(
                np.array([0, 0]), np.array([1, 1]), np.array([2, 2])),
            -1.4142135624084504 * REWARD_SCALE
        )
예제 #2
0
    def test_context_space(self):
        """Check the functionality of the context_space attribute.

        This method is tested for the following cases:

        1. no context
        2. random contexts
        3. fixed single context
        4. fixed multiple contexts
        """
        # test case 1
        env = AntMaze(use_contexts=False)
        self.assertIsNone(env.context_space)

        # test case 2
        env = AntMaze(use_contexts=True, random_contexts=True,
                      context_range=[(-4, 5), (4, 20)])
        np.testing.assert_almost_equal(
            env.context_space.low, np.array([-4, 4]))
        np.testing.assert_almost_equal(
            env.context_space.high, np.array([5, 20]))

        # test case 3
        env = AntMaze(use_contexts=True, random_contexts=False,
                      context_range=[-4, 5])
        np.testing.assert_almost_equal(
            env.context_space.low, np.array([-4, 5]))
        np.testing.assert_almost_equal(
            env.context_space.high, np.array([-4, 5]))

        # test case 4
        env = AntMaze(use_contexts=True, random_contexts=False,
                      context_range=[[-4, 5], [-3, 10], [-2, 7]])
        np.testing.assert_almost_equal(
            env.context_space.low, np.array([-4, 5]))
        np.testing.assert_almost_equal(
            env.context_space.high, np.array([-2, 10]))
예제 #3
0
     low=np.array([
         -10, -10, -0.5, -1, -1, -1, -1, -0.5, -0.3, -0.5, -0.3, -0.5,
         -0.3, -0.5, -0.3
     ]),
     high=np.array([
         10, 10, 0.5, 1, 1, 1, 1, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3
     ]),
     dtype=np.float32,
 ),
 "state_indices":
 lambda multiagent: [i for i in range(15)],
 "env":
 lambda evaluate, render, n_levels, multiagent, shared, maddpg: [
     AntMaze(
         use_contexts=True,
         context_range=[16, 0],
         evaluate=True,
         num_levels=n_levels,
     ),
     AntMaze(
         use_contexts=True,
         context_range=[16, 16],
         evaluate=True,
         num_levels=n_levels,
     ),
     AntMaze(
         use_contexts=True,
         context_range=[0, 16],
         evaluate=True,
         num_levels=n_levels,
     )
 ] if evaluate else AntMaze(
예제 #4
0
 "AntMaze": {
     "meta_ac_space":
     lambda relative_goals: Box(
         low=np.array([
             -10, -10, -0.5, -1, -1, -1, -1, -0.5, -0.3, -0.5, -0.3, -0.5,
             -0.3, -0.5, -0.3
         ]),
         high=np.array([
             10, 10, 0.5, 1, 1, 1, 1, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3
         ]),
         dtype=np.float32,
     ),
     "state_indices": [i for i in range(15)],
     "env":
     lambda evaluate, render, multiagent, shared, maddpg: [
         AntMaze(use_contexts=True, context_range=[16, 0]),
         AntMaze(use_contexts=True, context_range=[16, 16]),
         AntMaze(use_contexts=True, context_range=[0, 16])
     ] if evaluate else AntMaze(use_contexts=True,
                                random_contexts=True,
                                context_range=[(-4, 20), (-4, 20)]),
 },
 "AntPush": {
     "meta_ac_space":
     lambda relative_goals: Box(
         low=np.array([
             -10, -10, -0.5, -1, -1, -1, -1, -0.5, -0.3, -0.5, -0.3, -0.5,
             -0.3, -0.5, -0.3
         ]),
         high=np.array([
             10, 10, 0.5, 1, 1, 1, 1, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3, 0.5, 0.3
예제 #5
0
def create_env(env, render=False, evaluate=False):
    """Return, and potentially create, the environment.

    Parameters
    ----------
    env : str or gym.Env
        the environment, or the name of a registered environment.
    render : bool
        whether to render the environment
    evaluate : bool
        specifies whether this is a training or evaluation environment

    Returns
    -------
    gym.Env or list of gym.Env
        gym-compatible environment(s)
    """
    if env == "AntGather":
        env = AntGatherEnv()

    elif env == "AntMaze":
        if evaluate:
            env = [
                AntMaze(use_contexts=True, context_range=[16, 0]),
                AntMaze(use_contexts=True, context_range=[16, 16]),
                AntMaze(use_contexts=True, context_range=[0, 16])
            ]
        else:
            env = AntMaze(use_contexts=True,
                          random_contexts=True,
                          context_range=[(-4, 20), (-4, 20)])

    elif env == "AntPush":
        if evaluate:
            env = AntPush(use_contexts=True, context_range=[0, 19])
        else:
            env = AntPush(use_contexts=True, context_range=[0, 19])
            # env = AntPush(use_contexts=True,
            #               random_contexts=True,
            #               context_range=[(-16, 16), (-4, 20)])

    elif env == "AntFall":
        if evaluate:
            env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
        else:
            env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
            # env = AntFall(use_contexts=True,
            #               random_contexts=True,
            #               context_range=[(-4, 12), (-4, 28), (0, 5)])

    elif env == "AntFourRooms":
        if evaluate:
            env = [
                AntFourRooms(use_contexts=True, context_range=[30, 0]),
                AntFourRooms(use_contexts=True, context_range=[0, 30]),
                AntFourRooms(use_contexts=True, context_range=[30, 30])
            ]
        else:
            env = AntFourRooms(use_contexts=True,
                               random_contexts=False,
                               context_range=[[30, 0], [0, 30], [30, 30]])

    elif env == "UR5":
        if evaluate:
            env = UR5(use_contexts=True,
                      random_contexts=True,
                      context_range=[(-np.pi, np.pi), (-np.pi / 4, 0),
                                     (-np.pi / 4, np.pi / 4)],
                      show=render)
        else:
            env = UR5(use_contexts=True,
                      random_contexts=True,
                      context_range=[(-np.pi, np.pi), (-np.pi / 4, 0),
                                     (-np.pi / 4, np.pi / 4)],
                      show=render)

    elif env == "Pendulum":
        if evaluate:
            env = Pendulum(use_contexts=True,
                           context_range=[0, 0],
                           show=render)
        else:
            env = Pendulum(use_contexts=True,
                           random_contexts=True,
                           context_range=[(np.deg2rad(-16), np.deg2rad(16)),
                                          (-0.6, 0.6)],
                           show=render)

    elif env in [
            "bottleneck0", "bottleneck1", "bottleneck2", "grid0", "grid1"
    ]:
        # Import the benchmark and fetch its flow_params
        benchmark = __import__("flow.benchmarks.{}".format(env),
                               fromlist=["flow_params"])
        flow_params = benchmark.flow_params

        # Get the env name and a creator for the environment.
        create_env, _ = make_create_env(flow_params, version=0, render=render)

        # Create the environment.
        env = create_env()

    elif env in ["ring0", "multi-ring0"]:
        env = FlowEnv("ring", render=render)  # FIXME

    elif env in [
            "merge0", "merge1", "merge2", "multi-merge0", "multi-merge1",
            "multi-merge2"
    ]:
        env_num = int(env[-1])
        env = FlowEnv("merge",
                      env_params={
                          "exp_num": env_num,
                          "horizon": 6000,
                          "simulator": "traci",
                          "multiagent": env[:5] == "multi"
                      },
                      render=render)

    elif env in [
            "figureeight0", "figureeight1", "figureeight02",
            "multi-figureeight0", "multi-figureeight1", "multi-figureeight02"
    ]:
        env_num = int(env[-1])
        env = FlowEnv("figure_eight",
                      env_params={
                          "num_automated": [1, 7, 14][env_num],
                          "horizon": 750,
                          "simulator": "traci",
                          "multiagent": env[:5] == "multi"
                      },
                      render=render)

    elif env == "BipedalSoccer":
        env = BipedalSoccer(render=render)

    elif isinstance(env, str):
        # This is assuming the environment is registered with OpenAI gym.
        env = gym.make(env)

    # Reset the environment.
    if env is not None:
        if isinstance(env, list):
            for next_env in env:
                next_env.reset()
        else:
            env.reset()

    return env
예제 #6
0
    def _create_env(env, evaluate=False):
        """Return, and potentially create, the environment.

        Parameters
        ----------
        env : str or gym.Env
            the environment, or the name of a registered environment.
        evaluate : bool, optional
            specifies whether this is a training or evaluation environment

        Returns
        -------
        gym.Env
            a gym-compatible environment
        """
        if env == "AntMaze":
            if evaluate:
                env = AntMaze(use_contexts=True, context_range=[16, 0])
                # env = AntMaze(use_contexts=True, context_range=[16, 16])
                # env = AntMaze(use_contexts=True, context_range=[0, 16])
            else:
                env = AntMaze(use_contexts=True,
                              random_contexts=True,
                              context_range=[(-4, 20), (-4, 20)])

        elif env == "AntPush":
            if evaluate:
                env = AntPush(use_contexts=True, context_range=[0, 19])
            else:
                env = AntPush(use_contexts=True, context_range=[0, 19])
                # env = AntPush(use_contexts=True,
                #               random_contexts=True,
                #               context_range=[(-16, 16), (-4, 20)])

        elif env == "AntFall":
            if evaluate:
                env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
            else:
                env = AntFall(use_contexts=True, context_range=[0, 27, 4.5])
                # env = AntFall(use_contexts=True,
                #               random_contexts=True,
                #               context_range=[(-4, 12), (-4, 28), (0, 5)])

        elif env in [
                "figureeight0", "figureeight1", "figureeight2", "merge0",
                "merge1", "merge2", "bottleneck0", "bottleneck1",
                "bottleneck2", "grid0", "grid1"
        ]:
            # Import the benchmark and fetch its flow_params
            benchmark = __import__("flow.benchmarks.{}".format(env),
                                   fromlist=["flow_params"])
            flow_params = benchmark.flow_params

            # Get the env name and a creator for the environment.
            create_env, env_name = make_create_env(flow_params, version=0)

            # Create the environment.
            env = create_env()

        elif isinstance(env, str):
            # This is assuming the environment is registered with OpenAI gym.
            env = gym.make(env)

        # Reset the environment.
        if env is not None:
            env.reset()

        return env
예제 #7
0
    def test_current_context(self):
        """Check the functionality of the current_context attribute.

        This method is tested for the following cases:

        1. no context
        2. random contexts
        3. fixed single context
        4. fixed multiple contexts
        """
        np.random.seed(0)
        random.seed(0)

        # test case 1
        env = AntMaze(use_contexts=False)
        env.reset()
        self.assertIsNone(env.current_context)

        # test case 2
        env = AntMaze(use_contexts=True, random_contexts=True,
                      context_range=[(-4, 5), (4, 20)])
        env.reset()
        np.testing.assert_almost_equal(
            env.current_context, np.array([3.5997967, 16.1272704]))

        # test case 3
        env = AntMaze(use_contexts=True, random_contexts=False,
                      context_range=[-4, 5])
        env.reset()
        np.testing.assert_almost_equal(
            env.current_context, np.array([-4, 5]))

        # test case 4
        env = AntMaze(use_contexts=True, random_contexts=False,
                      context_range=[[-4, 5], [-3, 6], [-2, 7]])
        env.reset()
        np.testing.assert_almost_equal(
            env.current_context, np.array([-3, 6]))
        env.reset()
        np.testing.assert_almost_equal(
            env.current_context, np.array([-4, 5]))