コード例 #1
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_basic(self):
        """
        Basic Test Case

        Loaded buffer have same transitions with saved one.
        """
        buffer_size = 4
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict)
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        a = [1, 2, 3, 4]

        rb1.add(a=a)

        fname = "basic.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
コード例 #2
0
class ReplayBuffer:
    def __init__(self, size, env_dict, n_step_dict=None, min_storage=10000, done_string="done"):
        super().__init__()
        self.done_string = done_string
        self.min_storage = min_storage
        cpprb_args = {
            "size": size,
            "env_dict": env_dict,
            "Nstep": n_step_dict
        }
        self.buffer = CPPRB(**cpprb_args)

    def add(self, data: Sequence[Dict[str, np.ndarray]]) -> None:
        for d in data:
            self.buffer.add(**d)
            if d[self.done_string]:
                self.buffer.on_episode_end()

    def sample(self, size: int) -> Dict[str, np.ndarray]:
        if self.buffer.get_stored_size() < self.min_storage:
            print(
                f"stored sample {self.buffer.get_stored_size()} is smaller than mininum storage" +
                f"size {self.min_storage}. Returning None."
            )
            return None
        else:
            return self.buffer.sample(size)
コード例 #3
0
def explorer(global_rb,env_dict,is_training_done,queue):
    local_buffer_size = int(1e+2)
    local_rb = ReplayBuffer(local_buffer_size,env_dict)

    model = MyModel()
    env = gym.make("CartPole-v1")

    obs = env.reset()
    while not is_training_done.is_set():
        if not queue.empty():
            w = queue.get()
            model.weights = w

        action = model.get_action(obs)
        next_obs, reward, done, _ = env.step(action)
        local_rb.add(obs=obs,act=action,rew=reward,next_obs=next_obs,done=done)

        if done:
            local_rb.on_episode_end()
            obs = env.reset()
        else:
            obs = next_obs

        if local_rb.get_stored_size() == local_buffer_size:
            local_sample = local_rb.get_all_transitions()
            local_rb.clear()

            absTD = model.abs_TD_error(local_sample)
            global_rb.add(**local_sample,priorities=absTD)
コード例 #4
0
    def test_buffer(self):

        buffer_size = 256
        obs_shape = (15,15)
        act_dim = 5

        N = 512

        erb = ReplayBuffer(buffer_size,{"obs":{"shape": obs_shape},
                                        "act":{"shape": act_dim},
                                        "rew":{},
                                        "next_obs":{"shape": obs_shape},
                                        "done":{}})

        for i in range(N):
            obs = np.full(obs_shape,i,dtype=np.double)
            act = np.full(act_dim,i,dtype=np.double)
            rew = i
            next_obs = obs + 1
            done = 0

            erb.add(obs=obs,act=act,rew=rew,next_obs=next_obs,done=done)

        es = erb._encode_sample(range(buffer_size))

        erb.sample(32)

        erb.clear()

        self.assertEqual(erb.get_next_index(),0)
        self.assertEqual(erb.get_stored_size(),0)
コード例 #5
0
class buffer_class:
    def __init__(self, max_length, seed_number, env):
        env_dict = create_env_dict(env)

        #override the observation length in the replay memory
        env_dict['obs'] = {"dtype": numpy.float32, "shape": (17, )}
        env_dict['next_obs'] = {"dtype": numpy.float32, "shape": (17, )}
        print('!!!!', env_dict['obs'])
        self.before_add = create_before_add_func(env)
        self.storage = ReplayBuffer(max_length, env_dict)

    def append(self, s, a, r, done, sp):
        self.storage.add(
            **self.before_add(obs=s, act=a, rew=r, done=done, next_obs=sp))

    def sample(self, batch_size):
        batch = self.storage.sample(batch_size)
        s_matrix = batch['obs']
        a_matrix = batch['act']
        r_matrix = batch['rew']
        done_matrix = batch['done']
        sp_matrix = batch['next_obs']
        return s_matrix, a_matrix, r_matrix, done_matrix, sp_matrix

    def __len__(self):
        return self.storage.get_stored_size()
コード例 #6
0
ファイル: issue.py プロジェクト: ymd-h/cpprb
    def test_stack_compress(self):
        bsize = 10
        odim = 2
        ssize = 2
        rb = ReplayBuffer(bsize, {"a": {
            "shape": (odim, ssize)
        }},
                          stack_compress="a")
        a = np.random.rand(odim, bsize + ssize - 1)

        for i in range(bsize):
            rb.add(a=a[:, i:i + ssize])

        _a = rb.get_all_transitions()["a"]
        for i in range(bsize):
            with self.subTest(i=i, label="without cache"):
                np.testing.assert_allclose(_a[i], a[:, i:i + ssize])

        for i in range(bsize):
            rb._encode_sample([i])

        rb.clear()

        for i in range(bsize):
            rb.add(a=a[:, i:i + ssize])
            rb.on_episode_end()

        _a = rb.get_all_transitions()["a"]
        for i in range(bsize):
            with self.subTest(i=i, label="without cache"):
                np.testing.assert_allclose(_a[i], a[:, i:i + ssize])

        for i in range(bsize):
            rb._encode_sample([i])
コード例 #7
0
    def test_with_one(self):
        buffer_size = 32
        obs_shape = 3
        act_shape = 4

        rb = ReplayBuffer(buffer_size, {
            "obs": {
                "shape": obs_shape
            },
            "act": {
                "shape": act_shape
            },
            "done": {}
        })

        v = {
            "obs": np.ones(shape=obs_shape),
            "act": np.zeros(shape=act_shape),
            "done": 0
        }

        rb.add(**v)

        tx = rb.get_all_transitions()

        for key in ["obs", "act", "done"]:
            with self.subTest(key=key):
                np.testing.assert_allclose(tx[key],
                                           np.asarray(v[key]).reshape((1, -1)))
コード例 #8
0
class Agent:
    def __init__(self, learn_rate, state_shape, num_actions, batch_size):
        self.mem_size=100000
        self.gamma = 0.99
        self.action_space = list(range(num_actions))
        self.batch_size = batch_size

        self.epsilon = Lerper(start=1.0, end=0.01, num_steps=2000)

        self.memory = ReplayBuffer(
            self.mem_size, 
            {   "obs":      { "shape": state_shape  },
                "act":      { "shape": 1            },
                "rew":      {                       },
                "next_obs": { "shape": state_shape  },
                "done":     { "shape": 1            }})

        self.net = Network(learn_rate, state_shape, num_actions)

    def choose_action(self, observation):
        state = torch.tensor(observation).float().detach()
        state = state.to(self.net.device)
        state = state.unsqueeze(0)

        q_values = self.net(state)
        action = torch.argmax(q_values).item()
        return action

    def store_memory(self, state, action, reward, next_state, done):
        self.memory.add(obs=state, act=action, rew=reward, next_obs=next_state, done=done)  

    def learn(self):
        if self.memory.get_stored_size() < self.batch_size:
            return
    
        batch = self.memory.sample(self.batch_size)
            
        states  = torch.tensor( batch["obs"]                     ).to(self.net.device)
        actions = torch.tensor( batch["act"],   dtype=torch.int64).to(self.net.device).T[0]
        rewards = torch.tensor( batch["rew"]                     ).to(self.net.device).T[0]
        states_ = torch.tensor( batch["next_obs"]                ).to(self.net.device)
        dones   = torch.tensor( batch["done"],  dtype=torch.bool ).to(self.net.device).T[0]

        batch_index = np.arange(self.batch_size, dtype=np.int64)

        q_values  =   self.net(states)[batch_index, actions]
        q_values_ =   self.net(states_)

        action_qs_ = torch.max(q_values_, dim=1)[0]
        action_qs_[dones] = 0.0
        q_target = rewards + self.gamma * action_qs_

        td = q_target - q_values

        self.net.optimizer.zero_grad()
        loss = (td ** 2.0).mean()
        loss.backward()
        self.net.optimizer.step()

        self.net.reset_noise()
コード例 #9
0
ファイル: features.py プロジェクト: ymd-h/cpprb
    def test_memmap(self):
        rb = ReplayBuffer(32, {"done": {}}, mmap_prefix="mmap")

        for _ in range(1000):
            rb.add(done=0.0)

        self.assertTrue(os.path.exists("mmap_done.dat"))
コード例 #10
0
ファイル: v8.py プロジェクト: ymd-h/cpprb
    def test_nstep_with_memory_compress(self):
        rb = ReplayBuffer(32, {
            "obs": {
                "shape": (16, 16)
            },
            'rew': {},
            'done': {}
        },
                          next_of="obs",
                          stack_compress="obs",
                          Nstep={
                              "size": 4,
                              "rew": "rew"
                          })

        self.assertIs(
            rb.add(obs=(np.ones((16, 16))),
                   next_obs=(np.ones((16, 16))),
                   rew=1,
                   done=0), None)
        self.assertIs(
            rb.add(obs=(np.ones((16, 16))),
                   next_obs=(np.ones((16, 16))),
                   rew=1,
                   done=0), None)
        self.assertIs(
            rb.add(obs=(np.ones((16, 16))),
                   next_obs=(np.ones((16, 16))),
                   rew=1,
                   done=0), None)
        self.assertEqual(
            rb.add(obs=(np.ones((16, 16))),
                   next_obs=(np.ones((16, 16))),
                   rew=1,
                   done=0), 0)
コード例 #11
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_Nstep_incompatibility(self):
        """
        Raise ValueError when Nstep incompatibility
        """
        buffer_size = 10
        env_dict = {"done": {}}
        Nstep = {"size": 3, "gamma": 0.99}

        rb1 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        d = [0, 0, 0, 0, 1]

        rb1.add(done=d)
        rb1.on_episode_end()

        fname="Nstep_raise.npz"
        rb1.save_transitions(fname)

        with self.assertRaises(ValueError):
            rb2.load_transitions(fname)

        with self.assertRaises(ValueError):
            rb3.load_transitions(v(1,fname))
コード例 #12
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_unsafe_next_of_stack_compress(self):
        """
        Load next_of and stack_compress transitions
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, next_of="a", stack_compress="a")

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6],
             [5, 6, 7],
             [6, 7, 8]]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="unsafe_next_of_stack_compress.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
コード例 #13
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_stack_compress(self):
        """
        Load stack_compress transitions
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6]]

        rb1.add(a=a)

        fname="stack_compress.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
コード例 #14
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_incompatible_unsafe_stack_compress(self):
        """
        Load incompatible stack_compress transitions with unsafe mode
        """
        buffer_size = 10
        env_dict = {"a": {"shape": 3}}

        rb1 = ReplayBuffer(buffer_size, env_dict, stack_compress="a")
        rb2 = ReplayBuffer(buffer_size, env_dict)
        rb3 = ReplayBuffer(buffer_size, env_dict)

        a = [[1, 2, 3],
             [2, 3, 4],
             [3, 4, 5],
             [4, 5, 6]]

        rb1.add(a=a)

        fname="incompatible_unsafe_stack_compress.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(fname)

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
コード例 #15
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_incompatible_unsafe_next_of(self):
        """
        Load incompatible next_of transitions with unsafe mode
        """
        buffer_size = 10
        env_dict1 = {"a": {}}
        env_dict2 = {"a": {}, "next_a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict1, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict2)
        rb3 = ReplayBuffer(buffer_size, env_dict2)

        a = [1, 2, 3, 4, 5, 6]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="unsafe_incompatible_next_of.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
コード例 #16
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_fulled_unsafe_next_of(self):
        """
        Load with already fulled buffer
        """
        buffer_size = 10
        env_dict = {"a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict, next_of="a")
        rb3 = ReplayBuffer(buffer_size, env_dict, next_of="a")

        a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="fulled_unsafe_next_of.npz"
        rb1.save_transitions(fname, safe=False)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
コード例 #17
0
    def test_train(self):
        agent = DQN(
            state_shape=self.env.observation_space.shape,
            action_dim=self.env.action_space.n,
            memory_capacity=100,
            gpu=-1)
        from cpprb import ReplayBuffer
        replay_buffer = ReplayBuffer(
            obs_dim=self.env.observation_space.shape,
            act_dim=1,
            size=agent.memory_capacity)

        obs = self.env.reset()
        for _ in range(100):
            action = agent.get_action(obs)
            next_obs, reward, done, _ = self.env.step(action)
            replay_buffer.add(obs=obs, act=action, next_obs=next_obs, rew=reward, done=done)
            if done:
                next_obs = self.env.reset()
            obs = next_obs

        for _ in range(100):
            samples = replay_buffer.sample(agent.batch_size)
            agent.train(samples["obs"], samples["act"], samples["next_obs"],
                        samples["rew"], np.array(samples["done"], dtype=np.float64))
コード例 #18
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_next_of(self):
        """
        Load next_of transitions with safe mode

        For safe mode, next_of is not neccessary at loaded buffer.
        """
        buffer_size = 10
        env_dict1 = {"a": {}}
        env_dict2 = {"a": {}, "next_a": {}}

        rb1 = ReplayBuffer(buffer_size, env_dict1, next_of="a")
        rb2 = ReplayBuffer(buffer_size, env_dict2)
        rb3 = ReplayBuffer(buffer_size, env_dict2)

        a = [1, 2, 3, 4, 5, 6]

        rb1.add(a=a[:-1], next_a=a[1:])

        fname="next_of.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["a"], t2["a"])
        np.testing.assert_allclose(t1["next_a"], t2["next_a"])
        np.testing.assert_allclose(t1["a"], t3["a"])
        np.testing.assert_allclose(t1["next_a"], t3["next_a"])
コード例 #19
0
ファイル: save_load.py プロジェクト: ymd-h/cpprb
    def test_load_Nstep(self):
        """
        Load Nstep transitions
        """
        buffer_size = 10
        env_dict = {"done": {}}
        Nstep = {"size": 3, "gamma": 0.99}

        rb1 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb2 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)
        rb3 = ReplayBuffer(buffer_size, env_dict, Nstep=Nstep)

        d = [0, 0, 0, 0, 1]

        rb1.add(done=d)
        rb1.on_episode_end()

        fname="Nstep.npz"
        rb1.save_transitions(fname)
        rb2.load_transitions(fname)
        rb3.load_transitions(v(1,fname))

        t1 = rb1.get_all_transitions()
        t2 = rb2.get_all_transitions()
        t3 = rb3.get_all_transitions()

        np.testing.assert_allclose(t1["done"], t2["done"])
        np.testing.assert_allclose(t1["done"], t3["done"])
コード例 #20
0
    def test(self):
        buffer_size = 256
        obs_dim = 3
        act_dim = 1
        rb = ReplayBuffer(
            buffer_size, {
                "obs": {
                    "shape": obs_dim
                },
                "act": {
                    "shape": act_dim
                },
                "rew": {},
                "next_obs": {
                    "shape": obs_dim
                },
                "done": {}
            })

        obs = np.ones(shape=(obs_dim))
        act = np.ones(shape=(act_dim))
        rew = 0
        next_obs = np.ones(shape=(obs_dim))
        done = 0

        for i in range(500):
            rb.add(obs=obs, act=act, rew=rew, next_obs=next_obs, done=done)

        batch_size = 32
        sample = rb.sample(batch_size)
コード例 #21
0
ファイル: issue.py プロジェクト: ymd-h/cpprb
    def test_has_next_of(self):
        bsize = 10
        rb = ReplayBuffer(bsize, {"a": {}}, next_of="a")
        a = np.random.rand(bsize + 1)

        for i in range(bsize):
            rb.add(a=a[i], next_a=a[i + 1])

        _next_a = np.ravel(rb.get_all_transitions()["next_a"])
        np.testing.assert_allclose(_next_a, a[1:bsize + 1])

        for i in range(bsize):
            rb._encode_sample([i])

        rb.clear()

        for i in range(bsize):
            rb.add(a=a[i], next_a=a[i + 1])
            rb.on_episode_end()

        _next_a = np.ravel(rb.get_all_transitions()["next_a"])
        np.testing.assert_allclose(_next_a, a[1:bsize + 1])

        for i in range(bsize):
            rb._encode_sample([i])
コード例 #22
0
    def test_nstep(self):
        rb = ReplayBuffer(32,{'rew': {}, 'done': {}},
                          Nstep={"size": 4, "rew": "rew"})

        self.assertIs(rb.add(rew=1,done=0),None)
        self.assertIs(rb.add(rew=1,done=0),None)
        self.assertIs(rb.add(rew=1,done=0),None)
        self.assertEqual(rb.add(rew=1,done=0),0)
コード例 #23
0
    def test_multistep_add(self):
        rb = ReplayBuffer(4, {"done": {}})

        done = jnp.asarray([1,1,1])

        for i in range(2):
            with self.subTest(i=i):
                rb.add(done=done)
コード例 #24
0
ファイル: issue.py プロジェクト: ymd-h/cpprb
    def test_python_type(self):
        types = [bool, int, float]

        for d in types:
            with self.subTest(type=d):
                b = ReplayBuffer(10, {"a": {"dtype": d}})
                b.add(a=d(1))
                self.assertEqual(b.get_all_transitions()["a"].dtype, d)
コード例 #25
0
    def test_add(self):
        rb = ReplayBuffer(4, {"done": {}})

        done = jnp.asarray(1)

        for i in range(5):
            with self.subTest(i=i):
                rb.add(done=done)
コード例 #26
0
    def set_replay_buffer(self, env, get_from_file):

        obs_dim = env.observation_space.shape
        act_dim = env.action_space.shape

        if get_from_file:
            print(colorize("Pulling saved expert %s trajectories from file over %d episodes" %
                           (self.config_name, self.expert_episodes), 'blue', bold=True))

            f = open(self._demo_dir + 'sim_data_' + str(self.expert_episodes) + '_buffer.pkl', "rb")
            buffer_file = pickle.load(f)
            f.close()

            data = samples_from_cpprb(npsamples=buffer_file)

            # Reconstruct the data, then pass it to replay buffer
            np_states, np_rewards, np_actions, np_next_states, np_dones, np_next_dones = samples_to_np(data)

            # Create environment
            before_add = create_before_add_func(env)

            replay_buffer = ReplayBuffer(size= self.replay_buffer_size,
                                         env_dict={
                                             "obs": {"shape": obs_dim},
                                             "act": {"shape": act_dim},
                                             "rew": {},
                                             "next_obs": {"shape": obs_dim},
                                             "done": {}})

            replay_buffer.add(**before_add(obs=np_states[~np_dones],
                                           act=np_actions[~np_dones],
                                           rew=np_rewards[~np_dones],
                                           next_obs=np_next_states[~np_dones],
                                           done=np_next_dones[~np_dones]))
            self.replay_buffer = replay_buffer

        else:
            # Generate expert data
            print(colorize(
                "Generating expert %s trajectories from file over %d episodes" % (self.config_name, self.expert_episodes),
                'blue', bold=True))

            # Load trained policy
            _, get_action = load_policy_and_env(osp.join(self._root_data_path, self.file_name, self.file_name + '_s0/'),
                                                'last', False)
            expert_rb = run_policy(env,
                                   get_action,
                                   0,
                                   self.expert_episodes,
                                   False,
                                   record=not get_from_file,
                                   record_name='expert_' + self.file_name + '_' + str(self.expert_episodes) + '_runs',
                                   record_project='clone_benchmarking_' + self.config_name,
                                   data_path= self._expert_path,
                                   config_name= self.config_name,
                                   max_len_rb=self.replay_buffer_size)

            self.replay_buffer = expert_rb
コード例 #27
0
    def test_cache_next_of(self):
        stack_size = 3
        episode_len = 5
        rb = ReplayBuffer(32,
                          {"obs": {
                              "shape": (stack_size),
                              "dtype": np.int
                          }},
                          next_of="obs",
                          stack_compress="obs")

        obs = np.arange(episode_len + stack_size + 2, dtype=np.int)
        # [0,1,...,episode_len+stack_size+1]
        obs2 = obs + 3 * episode_len
        # [3*episode_len,...,4*episode_len+stack_size+1]

        # Add 1st episode
        for i in range(episode_len):
            rb.add(obs=obs[i:i + stack_size],
                   next_obs=obs[i + 1:i + 1 + stack_size])

        s = rb.get_all_transitions()
        self.assertEqual(rb.get_stored_size(), episode_len)
        for i in range(episode_len):
            with self.subTest(i=i):
                np.testing.assert_equal(s["obs"][i], obs[i:i + stack_size])
                np.testing.assert_equal(s["next_obs"][i],
                                        obs[i + 1:i + 1 + stack_size])

        # Reset environment
        rb.on_episode_end()
        s = rb.get_all_transitions()
        self.assertEqual(rb.get_stored_size(), episode_len)
        for i in range(episode_len):
            with self.subTest(i=i):
                np.testing.assert_equal(s["obs"][i], obs[i:i + stack_size])
                np.testing.assert_equal(s["next_obs"][i],
                                        obs[i + 1:i + 1 + stack_size])

        # Add 2nd episode
        for i in range(episode_len):
            rb.add(obs=obs2[i:i + stack_size],
                   next_obs=obs2[i + 1:i + 1 + stack_size])

        s = rb.get_all_transitions()
        self.assertEqual(rb.get_stored_size(), 2 * episode_len)
        for i in range(episode_len):
            with self.subTest(i=i):
                np.testing.assert_equal(s["obs"][i], obs[i:i + stack_size])
                np.testing.assert_equal(s["next_obs"][i],
                                        obs[i + 1:i + 1 + stack_size])
        for i in range(episode_len):
            with self.subTest(i=i + episode_len):
                np.testing.assert_equal(s["obs"][i + episode_len],
                                        obs2[i:i + stack_size])
                np.testing.assert_equal(s["next_obs"][i + episode_len],
                                        obs2[i + 1:i + 1 + stack_size])
コード例 #28
0
    def tet_nstep_multistep_add(self):
        rb = ReplayBuffer(6, {"obs": {}, "rew": {}, "done": {}, "next_obs":{}},
                          Nstep={"size": 4, "rew": "rew", "next": "next_obs"})

        obs = jnp.asarray([1,1,1,1])
        rew = jnp.asarray([1,1,1,1])
        done = jnp.asarray([1,1,1,1])
        next_obs = jnp.asarray([1,1,1,1])

        for i in range(7):
            with self.subTest(i=i):
                rb.add(obs=obs, rew=rew, done=done, next_obs=next_obs)
コード例 #29
0
    def set_multiple_replay_buffers(self, env):
        print(self.config_name_list)

        obs_dim = env.observation_space.shape
        act_dim = env.action_space.shape

        print(colorize("Pulling saved trajectories from two experts ( %s and %s) from files over %d episodes" %
                       (self.config_name_list[0], self.config_name_list[1], self.expert_episodes), 'blue', bold=True))

        rb_list = []

        v = 0
        for x in self.config_name_list:

            _expert_demo_dir = os.path.join(self._expert_path, x + '_episodes/')

            f = open(_expert_demo_dir + 'sim_data_' + str(self.expert_episodes) + '_buffer.pkl', "rb")
            buffer_file = pickle.load(f)
            f.close()

            data = samples_from_cpprb(npsamples=buffer_file)

            # Reconstruct the data, then pass it to replay buffer
            np_states, np_rewards, np_actions, np_next_states, np_dones, np_next_dones = samples_to_np(data)

            # Create environment
            before_add = create_before_add_func(env)

            replay_buffer = ReplayBuffer(size=self.replay_buffer_size,
                                         env_dict={
                                             "obs": {"shape": tuple([obs_dim[0]+2,])},
                                             "act": {"shape": act_dim},
                                             "rew": {},
                                             "next_obs": {"shape": tuple([obs_dim[0]+2,])},
                                             "done": {}})



            # Concatenate the states with one hot vectors depending on class
            extend1 = [one_hot(np.array([v]), self.n_experts)] * np_states[~np_dones].shape[0]

            appended_states = np.append(np_states[~np_dones], np.c_[extend1], 1)
            appended_next_states = np.append(np_next_states[~np_dones], np.c_[extend1], 1)

            replay_buffer.add(**before_add(obs=appended_states,
                                           act=np_actions[~np_dones],
                                           rew=np_rewards[~np_dones],
                                           next_obs=appended_next_states,
                                           done=np_next_dones[~np_dones]))

            rb_list.append(replay_buffer)
            v += 1
        self.rb_list = rb_list
コード例 #30
0
ファイル: features.py プロジェクト: ymd-h/cpprb
    def test_shuffle_transitions(self):
        rb = ReplayBuffer(64, {"a": {}})

        a = np.arange(64)
        rb.add(a=a)

        s1 = rb.get_all_transitions()["a"]
        s2 = rb.get_all_transitions(shuffle=True)["a"]

        self.assertFalse((s1 == s2).all())

        s = np.intersect1d(s1, s2, assume_unique=True)
        np.testing.assert_allclose(np.ravel(s), np.ravel(s1))