Python ReplayBuffer.get 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tianshou.data

클래스/타입: ReplayBuffer

메소드/함수: get

hotexamples.com에서의 예제들: 6

Python ReplayBuffer.get - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tianshou.data.ReplayBuffer.get에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

ReplayBuffer(30)

add(30)

sample(18)

update(9)

reset(6)

get(6)

unfinished_index(4)

from_data(3)

sample_index(3)

update_weight(3)

next(2)

sample_indices(2)

load_hdf5(1)

prev(1)

__init__(1)

rew(1)

save_hdf5(1)

stack_num(1)

_add_to_buffer(1)

예제 #1

파일 보기

def test_stack(size=5, bufsize=9, stack_num=4):
    env = MyTestEnv(size)
    buf = ReplayBuffer(bufsize, stack_num=stack_num)
    buf2 = ReplayBuffer(bufsize, stack_num=stack_num, sample_avail=True)
    buf3 = ReplayBuffer(bufsize, stack_num=stack_num, save_only_last_obs=True)
    obs = env.reset(1)
    for i in range(16):
        obs_next, rew, done, info = env.step(1)
        buf.add(obs, 1, rew, done, None, info)
        buf2.add(obs, 1, rew, done, None, info)
        buf3.add([None, None, obs], 1, rew, done, [None, obs], info)
        obs = obs_next
        if done:
            obs = env.reset(1)
    indice = np.arange(len(buf))
    assert np.allclose(buf.get(indice, 'obs')[..., 0], [
        [1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4],
        [1, 1, 1, 1], [1, 1, 1, 2], [1, 1, 2, 3],
        [1, 2, 3, 4], [4, 4, 4, 4], [1, 1, 1, 1]])
    assert np.allclose(buf.get(indice, 'obs'), buf3.get(indice, 'obs'))
    assert np.allclose(buf.get(indice, 'obs'), buf3.get(indice, 'obs_next'))
    _, indice = buf2.sample(0)
    assert indice.tolist() == [2, 6]
    _, indice = buf2.sample(1)
    assert indice in [2, 6]
    with pytest.raises(IndexError):
        buf[bufsize * 2]

예제 #2

파일 보기

파일: test_buffer.py 프로젝트: nsarang/tianshou

def test_stack(size=5, bufsize=9, stack_num=4, cached_num=3):
    env = MyTestEnv(size)
    buf = ReplayBuffer(bufsize, stack_num=stack_num)
    buf2 = ReplayBuffer(bufsize, stack_num=stack_num, sample_avail=True)
    buf3 = ReplayBuffer(bufsize, stack_num=stack_num, save_only_last_obs=True)
    obs = env.reset(1)
    for _ in range(16):
        obs_next, rew, done, info = env.step(1)
        buf.add(Batch(obs=obs, act=1, rew=rew, done=done, info=info))
        buf2.add(Batch(obs=obs, act=1, rew=rew, done=done, info=info))
        buf3.add(
            Batch(obs=[obs, obs, obs],
                  act=1,
                  rew=rew,
                  done=done,
                  obs_next=[obs, obs],
                  info=info))
        obs = obs_next
        if done:
            obs = env.reset(1)
    indices = np.arange(len(buf))
    assert np.allclose(
        buf.get(indices, 'obs')[..., 0],
        [[1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4], [1, 1, 1, 1], [1, 1, 1, 2],
         [1, 1, 2, 3], [1, 2, 3, 4], [4, 4, 4, 4], [1, 1, 1, 1]])
    assert np.allclose(buf.get(indices, 'obs'), buf3.get(indices, 'obs'))
    assert np.allclose(buf.get(indices, 'obs'), buf3.get(indices, 'obs_next'))
    _, indices = buf2.sample(0)
    assert indices.tolist() == [2, 6]
    _, indices = buf2.sample(1)
    assert indices[0] in [2, 6]
    batch, indices = buf2.sample(-1)  # neg bsz -> no data
    assert indices.tolist() == [] and len(batch) == 0
    with pytest.raises(IndexError):
        buf[bufsize * 2]

예제 #3

파일 보기

파일: demo_APIs.py 프로젝트: ZhangRui111/MiniProjects

def test_ReplayBuffer():
    """
    tianshou.data.ReplayBuffer
    buf.add()
    buf.get()
    buf.update()
    buf.sample()
    buf.reset()
    len(buf)
    :return:
    """
    buf1 = ReplayBuffer(size=15)
    for i in range(3):
        buf1.add(obs=i,
                 act=i,
                 rew=i,
                 done=i,
                 obs_next=i + 1,
                 info={},
                 weight=None)
    print(len(buf1))
    print(buf1.obs)
    buf2 = ReplayBuffer(size=10)
    for i in range(15):
        buf2.add(obs=i,
                 act=i,
                 rew=i,
                 done=i,
                 obs_next=i + 1,
                 info={},
                 weight=None)
    print(buf2.obs)
    buf1.update(buf2)
    print(buf1.obs)
    index = [1, 3, 5]
    # key is an obligatory args
    print(buf2.get(index, key='obs'))
    print('--------------------')
    sample_data, indice = buf2.sample(batch_size=4)
    print(sample_data, indice)
    print(sample_data.obs == buf2[indice].obs)
    print('--------------------')
    # buf.reset() only resets the index, not the content.
    print(len(buf2))
    buf2.reset()
    print(len(buf2))
    print(buf2)
    print('--------------------')

예제 #4

파일 보기

파일: test_buffer.py 프로젝트: yisuoyanyudmj/tianshou

def test_stack(size=5, bufsize=9, stack_num=4):
    env = MyTestEnv(size)
    buf = ReplayBuffer(bufsize, stack_num)
    obs = env.reset(1)
    for i in range(15):
        obs_next, rew, done, info = env.step(1)
        buf.add(obs, 1, rew, done, None, info)
        obs = obs_next
        if done:
            obs = env.reset(1)
    indice = np.arange(len(buf))
    assert abs(
        buf.get(indice, 'obs') -
        np.array([[1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4], [1, 1, 1, 1],
                  [1, 1, 1, 2], [1, 1, 2, 3], [3, 3, 3, 3], [3, 3, 3, 4],
                  [1, 1, 1, 1]])).sum() < 1e-6
    print(buf)

예제 #5

파일 보기

파일: test_buffer.py 프로젝트: qxwsniff/tianshou

def test_stack(size=5, bufsize=9, stack_num=4):
    env = MyTestEnv(size)
    buf = ReplayBuffer(bufsize, stack_num=stack_num)
    buf2 = ReplayBuffer(bufsize, stack_num=stack_num, sample_avail=True)
    obs = env.reset(1)
    for i in range(15):
        obs_next, rew, done, info = env.step(1)
        buf.add(obs, 1, rew, done, None, info)
        buf2.add(obs, 1, rew, done, None, info)
        obs = obs_next
        if done:
            obs = env.reset(1)
    indice = np.arange(len(buf))
    assert np.allclose(buf.get(indice, 'obs'), np.array([
        [1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4],
        [1, 1, 1, 1], [1, 1, 1, 2], [1, 1, 2, 3],
        [3, 3, 3, 3], [3, 3, 3, 4], [1, 1, 1, 1]]))
    print(buf)
    _, indice = buf2.sample(0)
    assert indice == [2]
    _, indice = buf2.sample(1)
    assert indice.sum() == 2

예제 #6

파일 보기

파일: test_buffer.py 프로젝트: zzjun725/tianshou

def test_stack(size=5, bufsize=9, stack_num=4):
    env = MyTestEnv(size)
    buf = ReplayBuffer(bufsize, stack_num=stack_num)
    buf2 = ReplayBuffer(bufsize, stack_num=stack_num, sample_avail=True)
    obs = env.reset(1)
    for i in range(16):
        obs_next, rew, done, info = env.step(1)
        buf.add(obs, 1, rew, done, None, info)
        buf2.add(obs, 1, rew, done, None, info)
        obs = obs_next
        if done:
            obs = env.reset(1)
    indice = np.arange(len(buf))
    assert np.allclose(
        buf.get(indice, 'obs'),
        np.expand_dims([[1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4], [1, 1, 1, 1],
                        [1, 1, 1, 2], [1, 1, 2, 3], [1, 2, 3, 4], [4, 4, 4, 4],
                        [1, 1, 1, 1]],
                       axis=-1))
    _, indice = buf2.sample(0)
    assert indice.tolist() == [2, 6]
    _, indice = buf2.sample(1)
    assert indice in [2, 6]