def _should_switch_skills(self, time_step: TimeStep, state): should_switch_skills = ((state.steps % self._num_steps_per_skill) == 0) # is_last is only necessary for `rollout_step` because it marks an # episode end in the replay buffer for training the policy `self._rl`. return should_switch_skills | time_step.is_first() | time_step.is_last( )
def _should_switch_action(self, time_step: TimeStep, state): repeat_last_step = (state.steps == 0) return repeat_last_step | time_step.is_first() | time_step.is_last()