예제 #1
0
 def from_discrete_action_model_parameters(
         cls, params: rlp.DiscreteActionModelParameters):
     return cls(
         actions=params.actions,
         rl=params.rl,
         double_q_learning=params.rainbow.double_q_learning,
         num_atoms=params.rainbow.num_atoms,
         minibatch_size=params.training.minibatch_size,
         minibatches_per_step=params.training.minibatches_per_step,
         cpe_optimizer=rlp.OptimizerParameters(
             optimizer=params.training.optimizer,
             learning_rate=params.training.learning_rate,
             l2_decay=params.training.l2_decay,
         ),
         optimizer=rlp.OptimizerParameters(
             optimizer=params.training.optimizer,
             learning_rate=params.training.learning_rate,
             l2_decay=params.rainbow.c51_l2_decay,
         ),
         evaluation=params.evaluation,
     )
예제 #2
0
 def from_discrete_action_model_parameters(
         cls, params: DiscreteActionModelParameters):
     return cls(
         actions=params.actions,
         rl=params.rl,
         double_q_learning=params.rainbow.double_q_learning,
         bcq=BCQConfig(drop_threshold=params.rainbow.bcq_drop_threshold)
         if params.rainbow.bcq else None,
         minibatch_size=params.training.minibatch_size,
         minibatches_per_step=params.training.minibatches_per_step,
         optimizer=rlp.OptimizerParameters(
             optimizer=params.training.optimizer,
             learning_rate=params.training.learning_rate,
             l2_decay=params.training.l2_decay,
         ),
         evaluation=params.evaluation,
     )