예제 #1
0
 def _should_switch_skills(self, time_step: TimeStep, state):
     should_switch_skills = ((state.steps % self._num_steps_per_skill) == 0)
     # is_last is only necessary for `rollout_step` because it marks an
     # episode end in the replay buffer for training the policy `self._rl`.
     return should_switch_skills | time_step.is_first() | time_step.is_last(
     )
예제 #2
0
 def _should_switch_action(self, time_step: TimeStep, state):
     repeat_last_step = (state.steps == 0)
     return repeat_last_step | time_step.is_first() | time_step.is_last()