Esempio n. 1
0
 def test_sampling(self):
     exp_q = NoReplacementQueue()
     #          obs           r    a    e
     exp_q.add((np.zeros(10), [1], [1], [0]))
     exp_q.add((np.zeros(10), [0], [-1], [1]))  # 1st episode end
     exp_q.add((np.zeros(10), [1], [2], [0]))
     exp_q.add((np.zeros(10), [1], [3], [0]))
     exp_q.add((np.zeros(10), [1], [4], [0]))
     exp_seqs = exp_q.sample(self.is_episode_end)
     self.assertEqual(len(exp_q), 1)
     self.assertEqual(len(exp_seqs), 2)
     self.assertEqual(len(exp_seqs[0]), 2)
     self.assertEqual(exp_seqs[0][0][2], [1])
     self.assertEqual(exp_seqs[0][1][2], [-1])
     self.assertEqual(len(exp_seqs[1]), 3)
     self.assertEqual(exp_seqs[1][0][2], [2])
     self.assertEqual(exp_seqs[1][1][2], [3])
     self.assertEqual(exp_seqs[1][2][2], [4])
     #          obs           r    a    e
     exp_q.add((np.zeros(10), [0], [-2], [1]))
     exp_seqs = exp_q.sample(self.is_episode_end)
     self.assertEqual(len(exp_q), 0)
     self.assertEqual(len(exp_seqs), 1)
     self.assertEqual(len(exp_seqs[0]), 2)
     self.assertEqual(exp_seqs[0][0][2], [4])
     self.assertEqual(exp_seqs[0][1][2], [-2])
     self.assertEqual(len(exp_q), 0)
Esempio n. 2
0
class OnlineHelper(AgentHelper):
    """
    Online helper. It calls `learn()` every `sample_interval`
    steps.

    While waiting for learning return, the calling `Agent` is blocked.
    """
    def __init__(self, name, communicator, sample_interval=5):
        super(OnlineHelper, self).__init__(name, communicator, sample_interval)
        # NoReplacementQueue used to store past experience.
        self.exp_queue = NoReplacementQueue()

    @staticmethod
    def exp_replay():
        return False

    def add_experience(self, e):
        self.exp_queue.add(e)

    def sample_experiences(self):
        return self.exp_queue.sample()