def __init__(self, *args, td3_kwargs, her_kwargs, base_kwargs, **kwargs): HERTrainer.__init__( self, **her_kwargs, ) TD3.__init__(self, *args, **kwargs, **td3_kwargs, **base_kwargs) assert isinstance(self.replay_buffer, ObsDictRelabelingBuffer)
def __init__(self, env, qf1, qf2, exploration_policy, td3_kwargs, tdm_kwargs, base_kwargs, policy=None, eval_policy=None, replay_buffer=None, optimizer_class=optim.Adam, use_policy_saturation_cost=False, **kwargs): TD3.__init__(self, env=env, qf1=qf1, qf2=qf2, policy=policy, exploration_policy=exploration_policy, replay_buffer=replay_buffer, eval_policy=eval_policy, optimizer_class=optimizer_class, **td3_kwargs, **base_kwargs) super().__init__(**tdm_kwargs) self.use_policy_saturation_cost = use_policy_saturation_cost
def __init__( self, *args, td3_kwargs, her_kwargs, base_kwargs, **kwargs ): HER.__init__( self, **her_kwargs, ) TD3.__init__(self, *args, **kwargs, **td3_kwargs, **base_kwargs) assert isinstance( self.replay_buffer, SimpleHerReplayBuffer ) or isinstance( self.replay_buffer, RelabelingReplayBuffer ) or isinstance( self.replay_buffer, ObsDictRelabelingBuffer )