Beispiel #1
0
Datei: v8.py Projekt: ymd-h/cpprb
    def test_multi_add(self):
        nb = NstepBuffer({'obs': {}, 'done': {}}, {"size": 4})

        self.assertIs(nb.add(obs=(1, 1), done=(0, 0)), None)

        np.testing.assert_allclose(
            nb.add(obs=(1, 1), done=(0, 0))['obs'],
            np.array((1), dtype=np.float32))
Beispiel #2
0
    def test_gamma_large_step_add(self):
        nb = NstepBuffer({'rew': {}, 'done': {}},
                         {"size": 4, "rew": "rew", "gamma": 0.5})

        np.testing.assert_allclose(nb.add(rew=(1,1,1,1,1),
                                          done=(0,0,0,0,0))['discounts'],
                                   np.array((0.5*0.5*0.5,
                                             0.5*0.5*0.5),
                                            dtype=np.float32).reshape(-1,1))
Beispiel #3
0
    def test_gamma_multi_step(self):
        nb = NstepBuffer({'rew': {}, 'done': {}},
                         {"size": 4, "rew": "rew", "gamma": 0.5})

        self.assertIs(nb.add(rew=(1,1),done=(0,0)),None)

        np.testing.assert_allclose(nb.add(rew=(1,1),
                                          done=(0,0))['discounts'],
                                   np.array((0.5*0.5*0.5),
                                            dtype=np.float32))
Beispiel #4
0
    def test_next(self):
        nb = NstepBuffer({'next_obs': {}, 'done': {}},{"size": 4, "next": "next_obs"})

        self.assertIs(nb.add(next_obs=1,done=0),None)
        self.assertIs(nb.add(next_obs=1,done=0),None)
        self.assertIs(nb.add(next_obs=1,done=0),None)

        for i in range(5):
            with self.subTest(i=i):
                np.testing.assert_allclose(nb.add(next_obs=(i),done=0)["next_obs"],
                                           np.array(i,dtype=np.float32).reshape(-1,1))
Beispiel #5
0
    def test_nstep_buffer(self):
        buffer = NstepBuffer({"obs": {}, "rew": {},  "done": {}, "next_obs": {}},
                             Nstep={"size": 3, "rew": "rew", "next": "next_obs"})
        obs = jnp.asarray([1])
        rew = jnp.asarray([1])
        done = jnp.asarray([1])
        next_obs = jnp.asarray([1])

        for i in range(4):
            with self.subTest(i=i):
                buffer.add(obs=obs, rew=rew, done=done, next_obs=next_obs)
Beispiel #6
0
    def test_gamma(self):
        nb = NstepBuffer({'rew': {}, 'done': {}},
                         {"size": 4, "rew": "rew", "gamma": 0.5})

        self.assertIs(nb.add(rew=1,done=0),None)
        self.assertIs(nb.add(rew=1,done=0),None)
        self.assertIs(nb.add(rew=1,done=0),None)

        for i in range(5):
            with self.subTest(i=i):
                np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"],
                                           0.5*0.5*0.5)
Beispiel #7
0
    def test_rew(self):
        nb = NstepBuffer({"rew": {}, "done": {}},
                         {"size": 4, "rew": "rew", "gamma": 0.5})

        self.assertIs(nb.add(rew=1,done=0),None)
        self.assertIs(nb.add(rew=1,done=0),None)
        self.assertIs(nb.add(rew=1,done=0),None)

        for i in range(5):
            with self.subTest(i=i):
                np.testing.assert_allclose(nb.add(rew=1,done=0)["rew"],
                                           1 + 0.5 + 0.5*0.5 + 0.5*0.5*0.5)
Beispiel #8
0
Datei: v8.py Projekt: ymd-h/cpprb
    def test_gamma_large_step_add(self):
        nb = NstepBuffer({
            'rew': {},
            'done': {}
        }, {
            "size": 4,
            "rew": "rew",
            "gamma": 0.5
        })

        np.testing.assert_allclose(
            nb.add(rew=(1, 1, 1, 1, 1), done=(0, 0, 0, 0, 0))['done'],
            np.asarray([[0], [0]]))
Beispiel #9
0
Datei: v8.py Projekt: ymd-h/cpprb
    def test_gamma_multi_step(self):
        nb = NstepBuffer({
            'rew': {},
            'done': {}
        }, {
            "size": 4,
            "rew": "rew",
            "gamma": 0.5
        })

        self.assertIs(nb.add(rew=(1, 1), done=(0, 0)), None)

        np.testing.assert_allclose(
            nb.add(rew=(1, 1), done=(0, 0))['done'], np.asarray([[0]]))
Beispiel #10
0
    def test_gamma_with_done(self):
        nb = NstepBuffer({"rew": {}, "done": {}},
                         {"size": 4, "rew": "rew", "gamma": 0.5})

        self.assertIs(nb.add(rew=1,done=0),None)
        self.assertIs(nb.add(rew=1,done=1),None)
        self.assertIs(nb.add(rew=1,done=0),None)

        np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"],
                                   0.5)
        np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"],
                                   1)
        np.testing.assert_allclose(nb.add(rew=1,done=0)["discounts"],
                                   0.5*0.5*0.5)
Beispiel #11
0
Datei: v8.py Projekt: ymd-h/cpprb
    def test_gamma_with_done(self):
        nb = NstepBuffer({
            "rew": {},
            "done": {}
        }, {
            "size": 4,
            "rew": "rew",
            "gamma": 0.5
        })

        self.assertIs(nb.add(rew=1, done=0), None)
        self.assertIs(nb.add(rew=1, done=1), None)
        self.assertIs(nb.add(rew=1, done=0), None)

        np.testing.assert_allclose(
            nb.add(rew=1, done=0)["done"], np.asarray([[1]]))
        np.testing.assert_allclose(
            nb.add(rew=1, done=0)["done"], np.asarray([[1]]))
        np.testing.assert_allclose(
            nb.add(rew=1, done=0)["done"], np.asarray([[0]]))
Beispiel #12
0
Datei: v8.py Projekt: ymd-h/cpprb
    def test_large_step_add(self):
        nb = NstepBuffer({'obs': {}, 'done': {}}, {"size": 4})

        np.testing.assert_allclose(
            nb.add(obs=(1, 1, 1, 1, 1), done=(0, 0, 0, 0, 0))['obs'],
            np.array((1, 1), dtype=np.float32).reshape(-1, 1))