def validate_config(self, config: TrainerConfigDict) -> None: """Checks and updates the config based on settings.""" # Call super's validation method. super().validate_config(config) if config["exploration_config"]["type"] == "ParameterNoise": if config["batch_mode"] != "complete_episodes": logger.warning( "ParameterNoise Exploration requires `batch_mode` to be " "'complete_episodes'. Setting batch_mode=" "complete_episodes.") config["batch_mode"] = "complete_episodes" if config.get("noisy", False): raise ValueError( "ParameterNoise Exploration and `noisy` network cannot be" " used at the same time!") validate_buffer_config(config) # Multi-agent mode and multi-GPU optimizer. if config["multiagent"]["policies"] and not config["simple_optimizer"]: logger.info( "In multi-agent mode, policies will be optimized sequentially" " by the multi-GPU optimizer. Consider setting " "`simple_optimizer=True` if this doesn't work for you.")
def validate_config(self, config: TrainerConfigDict) -> None: # Call super's validation method. super().validate_config(config) validate_buffer_config(config) if config["num_gpus"] > 1: raise ValueError("`num_gpus` > 1 not yet supported for MARWIL!") if config["postprocess_inputs"] is False and config["beta"] > 0.0: raise ValueError( "`postprocess_inputs` must be True for MARWIL (to " "calculate accum., discounted returns)!")
def validate_config(self, config: AlgorithmConfigDict) -> None: # Call super's validation method. super().validate_config(config) # TODO: Move this to super()? validate_buffer_config(config) if config["beta"] < 0.0 or config["beta"] > 1.0: raise ValueError("`beta` must be within 0.0 and 1.0!") if config["num_gpus"] > 1: raise ValueError("`num_gpus` > 1 not yet supported for MARWIL!") if config["postprocess_inputs"] is False and config["beta"] > 0.0: raise ValueError( "`postprocess_inputs` must be True for MARWIL (to " "calculate accum., discounted returns)!")
def validate_config(self, config: TrainerConfigDict) -> None: """Checks and updates the config based on settings.""" # Call super's validation method. super().validate_config(config) validate_buffer_config(config)
def validate_config(self, config: TrainerConfigDict) -> None: super().validate_config(config) validate_buffer_config(config)
config = DQNConfig().training(replay_buffer_config={"type": ReplayBuffer}).to_dict() another_config = ( DQNConfig().training(replay_buffer_config={"type": "ReplayBuffer"}).to_dict() ) yet_another_config = ( DQNConfig() .training( replay_buffer_config={"type": "ray.rllib.utils.replay_buffers.ReplayBuffer"} ) .to_dict() ) validate_buffer_config(config) validate_buffer_config(another_config) validate_buffer_config(yet_another_config) # After validation, all three configs yield the same effective config assert config == another_config == yet_another_config # __sphinx_doc_replay_buffer_type_specification__end__ # __sphinx_doc_replay_buffer_basic_interaction__begin__ # We choose fragments because it does not impose restrictions on our batch to be added buffer = ReplayBuffer(capacity=2, storage_unit=StorageUnit.FRAGMENTS) dummy_batch = SampleBatch({"a": [1], "b": [2]}) buffer.add(dummy_batch) buffer.sample(2) # Because elements can be sampled multiple times, we receive a concatenated version