def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Create a new config by merging the dicts. # run_config is not a tunable hyperparameter so it does not need to be # merged. run_config = base_config.pop("run_config", None) self._merged_config = merge_dicts(base_config, self.config) self._merged_config["run_config"] = run_config
def default_resource_request( cls, config: PartialTrainerConfigDict ) -> Union[Resources, PlacementGroupFactory]: resolved_config = merge_dicts(base_config, config) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config(process_datasets=False) return rllib_trainer.default_resource_request(rllib_config)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Create a new config by merging the dicts. # run_config is not a tunable hyperparameter so it does not need to be # merged. run_config = base_config.pop("run_config", None) self._merged_config = merge_dicts(base_config, self.config) self._merged_config["run_config"] = run_config merged_scaling_config = self._merged_config.get( "scaling_config") if isinstance(merged_scaling_config, dict): merged_scaling_config = ScalingConfig( **merged_scaling_config) self._merged_config[ "scaling_config"] = self._reconcile_scaling_config_with_trial_resources( merged_scaling_config)
def __init__( self, config: Optional[PartialAlgorithmConfigDict] = None, env: Optional[Union[str, EnvType]] = None, logger_creator: Optional[Callable[[], Logger]] = None, remote_checkpoint_dir: Optional[str] = None, custom_syncer: Optional[Syncer] = None, ): resolved_config = merge_dicts(base_config, config or {}) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config(process_datasets=True) super(AIRRLTrainer, self).__init__( config=rllib_config, env=env, logger_creator=logger_creator, remote_checkpoint_dir=remote_checkpoint_dir, custom_syncer=custom_syncer, )
def __init__( self, config: Optional[PartialTrainerConfigDict] = None, env: Optional[Union[str, EnvType]] = None, logger_creator: Optional[Callable[[], Logger]] = None, remote_checkpoint_dir: Optional[str] = None, sync_function_tpl: Optional[str] = None, ): resolved_config = merge_dicts(base_config, config) param_dict["config"] = resolved_config trainer = trainer_cls(**param_dict) rllib_config = trainer._get_rllib_config(process_datasets=True) super(AIRRLTrainer, self).__init__( rllib_config, env, logger_creator, remote_checkpoint_dir, sync_function_tpl, )
def __init__(self, algo_class=None): """Initializes a ApexConfig instance.""" super().__init__(algo_class=algo_class or ApexDQN) # fmt: off # __sphinx_doc_begin__ # APEX-DQN settings overriding DQN ones: # .training() self.optimizer = merge_dicts( DQNConfig().optimizer, { "max_weight_sync_delay": 400, "num_replay_buffer_shards": 4, "debug": False }) self.n_step = 3 self.train_batch_size = 512 self.target_network_update_freq = 500000 self.training_intensity = 1 # max number of inflight requests to each sampling worker # see the AsyncRequestsManager class for more details # Tuning these values is important when running experimens with large sample # batches. If the sample batches are large in size, then there is the risk that # the object store may fill up, causing the store to spill objects to disk. # This can cause any asynchronous requests to become very slow, making your # experiment run slowly. You can inspect the object store during your # experiment via a call to ray memory on your headnode, and by using the ray # dashboard. If you're seeing that the object store is filling up, turn down # the number of remote requests in flight, or enable compression in your # experiment of timesteps. self.max_requests_in_flight_per_sampler_worker = 2 self.max_requests_in_flight_per_replay_worker = float("inf") self.timeout_s_sampler_manager = 0.0 self.timeout_s_replay_manager = 0.0 # APEX-DQN is using a distributed (non local) replay buffer. self.replay_buffer_config = { "no_local_replay_buffer": True, # Specify prioritized replay by supplying a buffer type that supports # prioritization "type": "MultiAgentPrioritizedReplayBuffer", "capacity": 2000000, # Alpha parameter for prioritized replay buffer. "prioritized_replay_alpha": 0.6, # Beta parameter for sampling from prioritized replay buffer. "prioritized_replay_beta": 0.4, # Epsilon to add to the TD errors when updating priorities. "prioritized_replay_eps": 1e-6, "learning_starts": 50000, # Whether all shards of the replay buffer must be co-located # with the learner process (running the execution plan). # This is preferred b/c the learner process should have quick # access to the data from the buffer shards, avoiding network # traffic each time samples from the buffer(s) are drawn. # Set this to False for relaxing this constraint and allowing # replay shards to be created on node(s) other than the one # on which the learner is located. "replay_buffer_shards_colocated_with_driver": True, "worker_side_prioritization": True, # Deprecated key. "prioritized_replay": DEPRECATED_VALUE, } # .rollouts() self.num_workers = 32 self.rollout_fragment_length = 50 self.exploration_config = { "type": "PerWorkerEpsilonGreedy", } # .resources() self.num_gpus = 1 # .reporting() self.min_time_s_per_iteration = 30 self.min_sample_timesteps_per_iteration = 25000
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # Create a new config by merging the dicts. self._merged_config = merge_dicts(base_config, self.config)