def __init__(self, observation_space, action_space, config): config = dict( ray.rllib.algorithms.marwil.marwil.MARWILConfig().to_dict(), **config) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) ValueNetworkMixin.__init__(self, config) PostprocessAdvantages.__init__(self) # Not needed for pure BC. if config["beta"] != 0.0: # Set up a torch-var for the squared moving avg. advantage norm. self._moving_average_sqd_adv_norm = torch.tensor( [config["moving_average_sqd_adv_norm_start"]], dtype=torch.float32, requires_grad=False, ).to(self.device) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.agents.ppo.appo.DEFAULT_CONFIG, **config) # Although this is a no-op, we call __init__ here to make it clear # that base.__init__ will use the make_model() call. VTraceOptimizer.__init__(self) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) EntropyCoeffSchedule.__init__( self, config["entropy_coeff"], config["entropy_coeff_schedule"] ) ValueNetworkMixin.__init__(self, config) KLCoeffMixin.__init__(self, config) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch() # Initiate TargetNetwork ops after loss initialization. TargetNetworkMixin.__init__(self)
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, config: AlgorithmConfigDict, ): config = dict(ray.rllib.algorithms.ddpg.ddpg.DDPGConfig().to_dict(), **config) # Create global step for counting the number of update operations. self.global_step = 0 # Validate action space for DDPG validate_spaces(self, observation_space, action_space) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) ComputeTDErrorMixin.__init__(self) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch() TargetNetworkMixin.__init__(self)
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.algorithms.pg.PGConfig().to_dict(), **config) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.algorithms.maml.maml.DEFAULT_CONFIG, **config) validate_config(config) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) KLCoeffMixin.__init__(self, config) ValueNetworkMixin.__init__(self, config) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def __init__( self, observation_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict, ): self.target_model = None # assign it in self.make_model self._is_action_discrete = isinstance(action_space, gym.spaces.Discrete) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) """
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.agents.a3c.a3c.A3CConfig().to_dict(), **config) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) ValueNetworkMixin.__init__(self, config) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.agents.impala.impala.DEFAULT_CONFIG, **config) VTraceOptimizer.__init__(self) # Need to initialize learning rate variable before calling # TorchPolicyV2.__init__. LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def __init__(self, observation_space, action_space, config): config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(), **config) # TODO: Move into Policy API, if needed at all here. Why not move this into # `PPOConfig`?. validate_config(config) TorchPolicyV2.__init__( self, observation_space, action_space, config, max_seq_len=config["model"]["max_seq_len"], ) ValueNetworkMixin.__init__(self, config) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) KLCoeffMixin.__init__(self, config) # TODO: Don't require users to call this manually. self._initialize_loss_from_dummy_batch()
def set_weights(self: TorchPolicyV2, weights): # Makes sure that whenever we restore weights for this policy's # model, we sync the target network (from the main model) # at the same time. TorchPolicyV2.set_weights(self, weights) self.update_target()