예제 #1
0
    def _per_store(self, i: int, data: BatchExperiences) -> NoReturn:
        # TODO: 优化
        q = self.queue[i]
        if len(q) == 0:  # 如果Nstep临时经验池为空,就直接添加
            q.append(data)
            return

        if len(q) == self.n:
            self._store_op(q.pop(0))
        if not NamedTupleStaticClass.check_equal(
                q[-1].obs_,
                data.obs):  # 如果截断了,非常规done,把Nstep临时经验池中已存在的经验都存进去,临时经验池清空
            q.clear(
            )  # 保证经验池中不存在不足N长度的序列,有done的除外,因为(1-done)为0,导致gamma的次方计算不准确也没有关系。
            q.append(data)
        else:
            _len = len(q)
            for j in range(_len):  # 然后再存入一条最新的经验到Nstep临时经验池
                q[j] = q[j]._replace(reward=q[j].reward + data.reward *
                                     (self.gamma**(_len - j)))
                q[j] = q[j]._replace(obs_=data.obs_)
                q[j] = q[j]._replace(done=data.done)
            q.append(data)
            if data.done:  # done or not # 如果新数据是done,就清空临时经验池
                while q:  # (1-done)会清零不正确的n-step
                    self._store_op(q.pop())
예제 #2
0
 def _per_store(self, i: int, data: BatchExperiences) -> NoReturn:
     q = self.queue[i]
     if len(q) == 0:
         q.append(data)
         return
     if not NamedTupleStaticClass.check_equal(q[-1].obs_, data.obs):
         self._store_op(q.copy())
         q.clear()
         q.append(data)
         return
     if data.done:
         q.append(data)
         self._store_op(q.copy())
         q.clear()
         return
     q.append(data)