Esempio n. 1
0
    def validate_config(self, config: TrainerConfigDict) -> None:
        """Checks and updates the config based on settings."""
        # Call super's validation method.
        super().validate_config(config)

        if config["exploration_config"]["type"] == "ParameterNoise":
            if config["batch_mode"] != "complete_episodes":
                logger.warning(
                    "ParameterNoise Exploration requires `batch_mode` to be "
                    "'complete_episodes'. Setting batch_mode="
                    "complete_episodes.")
                config["batch_mode"] = "complete_episodes"
            if config.get("noisy", False):
                raise ValueError(
                    "ParameterNoise Exploration and `noisy` network cannot be"
                    " used at the same time!")

        validate_buffer_config(config)

        # Multi-agent mode and multi-GPU optimizer.
        if config["multiagent"]["policies"] and not config["simple_optimizer"]:
            logger.info(
                "In multi-agent mode, policies will be optimized sequentially"
                " by the multi-GPU optimizer. Consider setting "
                "`simple_optimizer=True` if this doesn't work for you.")
Esempio n. 2
0
    def validate_config(self, config: TrainerConfigDict) -> None:
        # Call super's validation method.
        super().validate_config(config)

        validate_buffer_config(config)

        if config["num_gpus"] > 1:
            raise ValueError("`num_gpus` > 1 not yet supported for MARWIL!")

        if config["postprocess_inputs"] is False and config["beta"] > 0.0:
            raise ValueError(
                "`postprocess_inputs` must be True for MARWIL (to "
                "calculate accum., discounted returns)!")
Esempio n. 3
0
    def validate_config(self, config: AlgorithmConfigDict) -> None:
        # Call super's validation method.
        super().validate_config(config)

        # TODO: Move this to super()?
        validate_buffer_config(config)
        if config["beta"] < 0.0 or config["beta"] > 1.0:
            raise ValueError("`beta` must be within 0.0 and 1.0!")

        if config["num_gpus"] > 1:
            raise ValueError("`num_gpus` > 1 not yet supported for MARWIL!")

        if config["postprocess_inputs"] is False and config["beta"] > 0.0:
            raise ValueError(
                "`postprocess_inputs` must be True for MARWIL (to "
                "calculate accum., discounted returns)!")
Esempio n. 4
0
 def validate_config(self, config: TrainerConfigDict) -> None:
     """Checks and updates the config based on settings."""
     # Call super's validation method.
     super().validate_config(config)
     validate_buffer_config(config)
Esempio n. 5
0
 def validate_config(self, config: TrainerConfigDict) -> None:
     super().validate_config(config)
     validate_buffer_config(config)
Esempio n. 6
0
config = DQNConfig().training(replay_buffer_config={"type": ReplayBuffer}).to_dict()

another_config = (
    DQNConfig().training(replay_buffer_config={"type": "ReplayBuffer"}).to_dict()
)


yet_another_config = (
    DQNConfig()
    .training(
        replay_buffer_config={"type": "ray.rllib.utils.replay_buffers.ReplayBuffer"}
    )
    .to_dict()
)

validate_buffer_config(config)
validate_buffer_config(another_config)
validate_buffer_config(yet_another_config)

# After validation, all three configs yield the same effective config
assert config == another_config == yet_another_config
# __sphinx_doc_replay_buffer_type_specification__end__


# __sphinx_doc_replay_buffer_basic_interaction__begin__
# We choose fragments because it does not impose restrictions on our batch to be added
buffer = ReplayBuffer(capacity=2, storage_unit=StorageUnit.FRAGMENTS)
dummy_batch = SampleBatch({"a": [1], "b": [2]})
buffer.add(dummy_batch)
buffer.sample(2)
# Because elements can be sampled multiple times, we receive a concatenated version