Esempio n. 1
0
    def test_custom_multi_action_distribution(self):
        class Model:
            pass

        ray.init(
            object_store_memory=1000 * 1024 * 1024, ignore_reinit_error=True
        )  # otherwise fails sometimes locally
        # registration
        ModelCatalog.register_custom_action_dist("test", CustomMultiActionDistribution)
        s1 = Discrete(5)
        s2 = Box(0, 1, shape=(3,), dtype=np.float32)
        spaces = dict(action_1=s1, action_2=s2)
        action_space = Dict(spaces)
        # test retrieving it
        model_config = MODEL_DEFAULTS.copy()
        model_config["custom_action_dist"] = "test"
        dist_cls, param_shape = ModelCatalog.get_action_dist(action_space, model_config)
        self.assertIsInstance(dist_cls, partial)
        self.assertEqual(param_shape, s1.n + 2 * s2.shape[0])

        # test the class works as a distribution
        dist_input = tf1.placeholder(tf.float32, (None, param_shape))
        model = Model()
        model.model_config = model_config
        dist = dist_cls(dist_input, model=model)
        self.assertIsInstance(dist.sample(), dict)
        self.assertIn("action_1", dist.sample())
        self.assertIn("action_2", dist.sample())
        self.assertEqual(dist.sample()["action_1"].dtype, tf.int64)
        self.assertEqual(dist.sample()["action_2"].shape[1:], s2.shape)

        with self.assertRaises(NotImplementedError):
            dist.entropy()
Esempio n. 2
0
    def testCustomActionDistribution(self):
        ray.init()
        # registration
        ModelCatalog.register_custom_action_dist("test",
                                                 CustomActionDistribution)
        action_space = Box(0, 1, shape=(5, 3), dtype=np.float32)

        # test retrieving it
        model_config = MODEL_DEFAULTS.copy()
        model_config["custom_action_dist"] = "test"
        dist_cls, param_shape = ModelCatalog.get_action_dist(
            action_space, model_config)
        self.assertEqual(str(dist_cls), str(CustomActionDistribution))
        self.assertEqual(param_shape, action_space.shape)

        # test the class works as a distribution
        dist_input = tf.placeholder(tf.float32, (None, ) + param_shape)
        dist = dist_cls(dist_input, model_config=model_config)
        self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
        self.assertIsInstance(dist.sample(), tf.Tensor)
        with self.assertRaises(NotImplementedError):
            dist.entropy()

        # test passing the options to it
        model_config["custom_options"].update({"output_dim": (3, )})
        dist_cls, param_shape = ModelCatalog.get_action_dist(
            action_space, model_config)
        self.assertEqual(param_shape, (3, ))
        dist_input = tf.placeholder(tf.float32, (None, ) + param_shape)
        dist = dist_cls(dist_input, model_config=model_config)
        self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
        self.assertIsInstance(dist.sample(), tf.Tensor)
        with self.assertRaises(NotImplementedError):
            dist.entropy()
Esempio n. 3
0
    def test_custom_action_distribution(self):
        class Model():
            pass

        ray.init(object_store_memory=1000 * 1024 * 1024,
                 ignore_reinit_error=True)  # otherwise fails sometimes locally
        # registration
        ModelCatalog.register_custom_action_dist("test",
                                                 CustomActionDistribution)
        action_space = Box(0, 1, shape=(5, 3), dtype=np.float32)

        # test retrieving it
        model_config = MODEL_DEFAULTS.copy()
        model_config["custom_action_dist"] = "test"
        dist_cls, param_shape = ModelCatalog.get_action_dist(
            action_space, model_config)
        self.assertEqual(str(dist_cls), str(CustomActionDistribution))
        self.assertEqual(param_shape, action_space.shape)

        # test the class works as a distribution
        dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
        model = Model()
        model.model_config = model_config
        dist = dist_cls(dist_input, model=model)
        self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
        self.assertIsInstance(dist.sample(), tf.Tensor)
        with self.assertRaises(NotImplementedError):
            dist.entropy()

        # test passing the options to it
        model_config["custom_model_config"].update({"output_dim": (3, )})
        dist_cls, param_shape = ModelCatalog.get_action_dist(
            action_space, model_config)
        self.assertEqual(param_shape, (3, ))
        dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape)
        model.model_config = model_config
        dist = dist_cls(dist_input, model=model)
        self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:])
        self.assertIsInstance(dist.sample(), tf.Tensor)
        with self.assertRaises(NotImplementedError):
            dist.entropy()
Esempio n. 4
0
def build_sac_model(policy: Policy, obs_space: gym.spaces.Space,
                    action_space: gym.spaces.Space,
                    config: TrainerConfigDict) -> ModelV2:
    """Constructs the necessary ModelV2 for the Policy and returns it.

    Args:
        policy (Policy): The TFPolicy that will use the models.
        obs_space (gym.spaces.Space): The observation space.
        action_space (gym.spaces.Space): The action space.
        config (TrainerConfigDict): The SAC trainer's config dict.

    Returns:
        ModelV2: The ModelV2 to be used by the Policy. Note: An additional
            target model will be created in this function and assigned to
            `policy.target_model`.
    """
    # Force-ignore any additionally provided hidden layer sizes.
    # Everything should be configured using SAC's "Q_model" and "policy_model"
    # settings.
    policy_model_config = MODEL_DEFAULTS.copy()
    policy_model_config.update(config["policy_model"])
    q_model_config = MODEL_DEFAULTS.copy()
    q_model_config.update(config["Q_model"])

    default_model_cls = SACTorchModel if config["framework"] == "torch" \
        else SACTFModel

    model = ModelCatalog.get_model_v2(obs_space=obs_space,
                                      action_space=action_space,
                                      num_outputs=None,
                                      model_config=config["model"],
                                      framework=config["framework"],
                                      default_model=default_model_cls,
                                      name="sac_model",
                                      policy_model_config=policy_model_config,
                                      q_model_config=q_model_config,
                                      twin_q=config["twin_q"],
                                      initial_alpha=config["initial_alpha"],
                                      target_entropy=config["target_entropy"])

    assert isinstance(model, default_model_cls)

    # Create an exact copy of the model and store it in `policy.target_model`.
    # This will be used for tau-synched Q-target models that run behind the
    # actual Q-networks and are used for target q-value calculations in the
    # loss terms.
    policy.target_model = ModelCatalog.get_model_v2(
        obs_space=obs_space,
        action_space=action_space,
        num_outputs=None,
        model_config=config["model"],
        framework=config["framework"],
        default_model=default_model_cls,
        name="target_sac_model",
        policy_model_config=policy_model_config,
        q_model_config=q_model_config,
        twin_q=config["twin_q"],
        initial_alpha=config["initial_alpha"],
        target_entropy=config["target_entropy"])

    assert isinstance(policy.target_model, default_model_cls)

    return model
Esempio n. 5
0
from ray.rllib.agents.ppo import (PPOTrainer as Trainer, DEFAULT_CONFIG as
                                  AGENT_DEFAULT_CONFIG)

from gym_jiminy.toolbox.rllib.utilities import initialize, train, test

# Register learning environment
register_env("env", lambda env_config: gym.make(GYM_ENV_NAME, **env_config))

# ============= Initialize Ray and Tensorboard daemons =============

logger_creator = initialize(num_cpus=N_THREADS, num_gpus=N_GPU, debug=DEBUG)

# ======================== Configure model =========================

# Copy the default model configuration
mdl_cfg = MODEL_DEFAULTS.copy()

# Fully-connected network settings
mdl_cfg[
    "fcnet_activation"] = "tanh"  # Nonlinearity for built-in fully connected net ["tanh", "relu", "linear"]
mdl_cfg["fcnet_hiddens"] = [
    64, 64
]  # Number of hidden layers for fully connected net
mdl_cfg[
    "no_final_linear"] = False  # Whether to skip the final linear layer used to resize the outputs to `num_outputs`
mdl_cfg[
    "free_log_std"] = True  # The last half of the output layer does not dependent on the input
mdl_cfg[
    "vf_share_layers"] = False  # Whether layers should be shared for the value function.

# ========================= Configure RLlib ========================
Esempio n. 6
0
def build_rnnsac_model(
    policy: Policy,
    obs_space: gym.spaces.Space,
    action_space: gym.spaces.Space,
    config: AlgorithmConfigDict,
) -> ModelV2:
    """Constructs the necessary ModelV2 for the Policy and returns it.

    Args:
        policy: The TFPolicy that will use the models.
        obs_space (gym.spaces.Space): The observation space.
        action_space (gym.spaces.Space): The action space.
        config: The SAC's config dict.

    Returns:
        ModelV2: The ModelV2 to be used by the Policy. Note: An additional
            target model will be created in this function and assigned to
            `policy.target_model`.
    """
    # With separate state-preprocessor (before obs+action concat).
    num_outputs = int(np.product(obs_space.shape))

    # Force-ignore any additionally provided hidden layer sizes.
    # Everything should be configured using SAC's `q_model_config` and
    # `policy_model_config` config settings.
    policy_model_config = MODEL_DEFAULTS.copy()
    policy_model_config.update(config["policy_model_config"])
    q_model_config = MODEL_DEFAULTS.copy()
    q_model_config.update(config["q_model_config"])

    default_model_cls = RNNSACTorchModel

    model = ModelCatalog.get_model_v2(
        obs_space=obs_space,
        action_space=action_space,
        num_outputs=num_outputs,
        model_config=config["model"],
        framework=config["framework"],
        default_model=default_model_cls,
        name="sac_model",
        policy_model_config=policy_model_config,
        q_model_config=q_model_config,
        twin_q=config["twin_q"],
        initial_alpha=config["initial_alpha"],
        target_entropy=config["target_entropy"],
    )

    assert isinstance(model, default_model_cls)

    # Create an exact copy of the model and store it in `policy.target_model`.
    # This will be used for tau-synched Q-target models that run behind the
    # actual Q-networks and are used for target q-value calculations in the
    # loss terms.
    policy.target_model = ModelCatalog.get_model_v2(
        obs_space=obs_space,
        action_space=action_space,
        num_outputs=num_outputs,
        model_config=config["model"],
        framework=config["framework"],
        default_model=default_model_cls,
        name="target_sac_model",
        policy_model_config=policy_model_config,
        q_model_config=q_model_config,
        twin_q=config["twin_q"],
        initial_alpha=config["initial_alpha"],
        target_entropy=config["target_entropy"],
    )

    assert isinstance(policy.target_model, default_model_cls)

    return model