Example #1
0
 def _protected_ProcessTransition(
     self,
     brain: base.Brain,
     transition: base.Transition,
     step_idx: int,
 ) -> None:
     brain.UpdateFromTransitions([transition])
Example #2
0
 def _protected_ProcessTransition(
     self,
     brain: base.Brain,
     transition: base.Transition,
     step_idx: int,
 ) -> None:
     """Processes a new transition; e.g. to train the QFunction."""
     brain.UpdateFromTransitions([transition])
Example #3
0
 def _protected_ProcessTransition(
     self,
     brain: base.Brain,
     transition: base.Transition,
     step_idx: int,
 ) -> None:
     self._experience.AddTransition(transition)
     if step_idx % self._train_every_n_steps == 0:
         brain.UpdateFromTransitions(
             self._experience.Sample(self._experience_sample_batch_size))
Example #4
0
 def _protected_ProcessTransition(
     self,
     brain: base.Brain,
     transition: base.Transition,
     step_idx: int,
 ) -> None:
     """Processes a new transition; e.g. to train the QFunction."""
     self._memory.append(transition)
     if len(self._memory) == self._batch_size:
         brain.UpdateFromTransitions(self._memory)
         self._memory = []
Example #5
0
    def _protected_ProcessTransition(
        self,
        brain: base.Brain,
        transition: base.Transition,
        step_idx: int,
    ) -> None:
        train_transitions = []
        self._memory.append(transition)
        train_transitions.append(self._GetNStepTransition())

        if len(self._memory) >= self._n_step_return:
            self._memory.pop(0)

        if transition.sp is None:
            while self._memory:
                train_transitions.append(self._GetNStepTransition())
                self._memory.pop(0)
        brain.UpdateFromTransitions(train_transitions)