Beispiel #1
0
def test_simple_get_frame():
    """ Check if get_frame returns frames from a buffer partially full """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 1), dtype=int)
    action_space = gym.spaces.Discrete(4)
    buffer = DequeMultiEnvBufferBackend(20, num_envs=2, observation_space=observation_space, action_space=action_space)

    v1 = np.ones(8).reshape((2, 2, 2, 1))
    v1[1] *= 2

    v2 = v1 * 2
    v3 = v1 * 3

    buffer.store_transition(v1, 0, 0, False)
    buffer.store_transition(v2, 0, 0, False)
    buffer.store_transition(v3, 0, 0, False)

    assert np.all(buffer.get_frame(0, 0, 4).max(0).max(0) == np.array([0, 0, 0, 1]))
    assert np.all(buffer.get_frame(1, 0, 4).max(0).max(0) == np.array([0, 0, 1, 2]))
    assert np.all(buffer.get_frame(2, 0, 4).max(0).max(0) == np.array([0, 1, 2, 3]))

    assert np.all(buffer.get_frame(0, 1, 4).max(0).max(0) == np.array([0, 0, 0, 2]))
    assert np.all(buffer.get_frame(1, 1, 4).max(0).max(0) == np.array([0, 0, 2, 4]))
    assert np.all(buffer.get_frame(2, 1, 4).max(0).max(0) == np.array([0, 2, 4, 6]))

    with t.assert_raises(VelException):
        buffer.get_frame(3, 0, 4)

    with t.assert_raises(VelException):
        buffer.get_frame(4, 0, 4)

    with t.assert_raises(VelException):
        buffer.get_frame(3, 1, 4)

    with t.assert_raises(VelException):
        buffer.get_frame(4, 1, 4)
    def __init__(self, environment: VecEnv, device, number_of_steps,
                 discount_factor, buffer_capacity, buffer_initial_size,
                 frame_stack_compensation):
        self._environment = environment
        self.device = device
        self.number_of_steps = number_of_steps
        self.discount_factor = discount_factor
        self.buffer_capacity = buffer_capacity
        self.buffer_initial_size = buffer_initial_size
        self.frame_stack_compensation = frame_stack_compensation

        # Initial observation
        self.last_observation_cpu = self.environment.reset()
        self.last_observation = self._to_tensor(self.last_observation_cpu)

        # Replay buffer
        self.replay_buffer = DequeMultiEnvBufferBackend(
            buffer_capacity=self.buffer_capacity,
            num_envs=self.environment.num_envs,
            observation_space=self.environment.observation_space,
            action_space=self.environment.action_space,
            extra_data={
                'action_logits':
                np.zeros((self.buffer_capacity, self.environment.num_envs,
                          self.environment.action_space.n),
                         dtype=np.float32)
            },
            frame_stack_compensation=self.frame_stack_compensation is not None)
Beispiel #3
0
def get_half_filled_buffer():
    """ Return simple preinitialized buffer """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 1), dtype=int)
    action_space = gym.spaces.Discrete(4)

    buffer = DequeMultiEnvBufferBackend(20, num_envs=2, observation_space=observation_space, action_space=action_space)

    v1 = np.ones(8).reshape((2, 2, 2, 1))

    for i in range(10):
        item = v1.copy()
        item[0] *= (i+1)
        item[1] *= 10 * (i+1)

        buffer.store_transition(item, 0, float(i)/2, False)

    return buffer
Beispiel #4
0
def get_filled_buffer3x3():
    """ Return simple preinitialized buffer """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 2), dtype=int)
    action_space = gym.spaces.Box(low=-1.0, high=1.0, shape=(2, 2, 2), dtype=float)

    buffer = DequeMultiEnvBufferBackend(20, num_envs=2, observation_space=observation_space, action_space=action_space)

    v1 = np.ones(16).reshape((2, 2, 2, 2))
    a1 = np.arange(16).reshape((2, 2, 2, 2))

    for i in range(30):
        item = v1.copy()
        item[:, 0] *= (i+1)
        item[:, 1] *= 10 * (i+1)

        buffer.store_transition(item, i * a1, float(i)/2, False)

    return buffer
Beispiel #5
0
def test_buffer_filling_size():
    """ Check if buffer size is properly updated when we add items """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 1), dtype=int)
    action_space = gym.spaces.Discrete(4)
    buffer = DequeMultiEnvBufferBackend(20, num_envs=2, observation_space=observation_space, action_space=action_space)

    v1 = np.ones(8).reshape((2, 2, 2, 1))

    t.eq_(buffer.current_size, 0)

    buffer.store_transition(v1, 0, 0, False)
    buffer.store_transition(v1, 0, 0, False)

    t.eq_(buffer.current_size, 2)

    for i in range(30):
        buffer.store_transition(v1 * (i+1), 0, float(i)/2, False)

    t.eq_(buffer.current_size, buffer.buffer_capacity)
Beispiel #6
0
def get_filled_buffer_with_dones():
    """ Return simple preinitialized buffer with some done's in there """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 1), dtype=int)
    action_space = gym.spaces.Discrete(4)

    buffer = DequeMultiEnvBufferBackend(20, num_envs=2, observation_space=observation_space, action_space=action_space)

    v1 = np.ones(8).reshape((2, 2, 2, 1))

    done_set = {2, 5, 10, 13, 18, 22, 28}

    for i in range(30):
        item = v1.copy()
        item[0] *= (i+1)
        item[1] *= 10 * (i+1)

        done_array = np.array([i in done_set, (i+1) in done_set], dtype=bool)
        buffer.store_transition(item, 0, float(i)/2, done_array)

    return buffer
Beispiel #7
0
def get_filled_buffer_extra_info():
    """ Return simple preinitialized buffer """
    observation_space = gym.spaces.Box(low=0, high=255, shape=(2, 2, 1), dtype=int)
    action_space = gym.spaces.Discrete(4)

    buffer = DequeMultiEnvBufferBackend(
        20, num_envs=2, observation_space=observation_space, action_space=action_space,
        extra_data={
            'neglogp': np.zeros((20, 2), dtype=float)
        }
    )

    v1 = np.ones(8).reshape((2, 2, 2, 1))

    for i in range(30):
        item = v1.copy()
        item[0] *= (i+1)
        item[1] *= 10 * (i+1)
        buffer.store_transition(item, 0, float(i)/2, False, extra_info={
            'neglogp': np.array([i / 30.0, (i+1) / 30.0])
        })

    return buffer