Beispiel #1
0
    def test_sample_alignment(self):
        """Verify that sample data lines up"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 32, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4, 5, 0, 1, 2, 3]
        rewards = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, -1.0, -2.0, -3.0, -4.0]
        terminals = [
            False, False, False, True, False, False, False, False, False, False
        ]

        frame = np.array(
            [
                [1, 1, 1, 1],
                [2, 2, 2, 2],
                [3, 3, 3, 3]
            ]
        )

        for i in range(len(actions)):
            self.replay.insert(frame * i, actions[i], rewards[i], terminals[i])

        batch = self.replay.sample()
        b_frames1, b_actions, b_rewards, b_frames2, b_terminals = batch

        for sample_i, r in enumerate(b_rewards):
            input_i = rewards.index(r)
            b_action = np.nonzero(b_actions[sample_i])[0][0]
            assert b_action == actions[input_i]
            assert b_terminals[sample_i] == terminals[input_i]
Beispiel #2
0
    def test_add_same_frame_repeatedly(self):
        """Mimic the initial state wherein the first frame is duplicated
        history_length times to populate replay for first retrieved state
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame = np.array(
            [
                [0,   63,  127, 191],
                [255, 191, 127, 63],
                [127, 191, 255, 0]
            ]
        )
        action = 0
        reward = 0.0
        terminal = False

        for _ in range(4):
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [0,   0,   0,   0],
                    [63,  63,  63,  63],
                    [127, 127, 127, 127],
                    [191, 191, 191, 191]
                ],
                [
                    [255, 255, 255, 255],
                    [191, 191, 191, 191],
                    [127, 127, 127, 127],
                    [63,  63,  63,  63]
                ],
                [
                    [127, 127, 127, 127],
                    [191, 191, 191, 191],
                    [255, 255, 255, 255],
                    [0,   0,   0,   0]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)
Beispiel #3
0
    def test_advance_state_window(self):
        """Does it return the most recent history_length frames, ignoring
        frames outside of the window?
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0] * 5
        rewards = [0.0] * 5
        terminals = [False] * 5

        self.add_test_frames(actions, rewards, terminals)

        expected = np.array(
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)
Beispiel #4
0
class TestReplay:
    def setup_method(self):
        self.sess = tf.Session()

    def test_add_same_frame_repeatedly(self):
        """Mimic the initial state wherein the first frame is duplicated
        history_length times to populate replay for first retrieved state
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame = np.array(
            [
                [0,   63,  127, 191],
                [255, 191, 127, 63],
                [127, 191, 255, 0]
            ]
        )
        action = 0
        reward = 0.0
        terminal = False

        for _ in range(4):
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [0,   0,   0,   0],
                    [63,  63,  63,  63],
                    [127, 127, 127, 127],
                    [191, 191, 191, 191]
                ],
                [
                    [255, 255, 255, 255],
                    [191, 191, 191, 191],
                    [127, 127, 127, 127],
                    [63,  63,  63,  63]
                ],
                [
                    [127, 127, 127, 127],
                    [191, 191, 191, 191],
                    [255, 255, 255, 255],
                    [0,   0,   0,   0]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)


    def test_add_unique_frames(self):
        """Mimic a state that is populated with unique consecutive frames"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame_1 = np.array(
            [
                [11, 12, 13, 14],
                [21, 22, 23, 24],
                [31, 32, 33, 34]
            ]
        )
        frame_2 = np.array(
            [
                [41, 42, 43, 44],
                [51, 52, 53, 54],
                [61, 62, 63, 64]
            ]
        )
        frame_3 = np.array(
            [
                [71, 72, 73, 74],
                [81, 82, 83, 84],
                [91, 92, 93, 94]
            ]
        )
        frame_4 = np.array(
            [
                [101, 102, 103, 104],
                [111, 112, 113, 114],
                [121, 122, 123, 124]
            ]
        )

        action = 0
        reward = 0.0
        terminal = False

        for frame in [frame_1, frame_2, frame_3, frame_4]:
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [11, 41, 71, 101],
                    [12, 42, 72, 102],
                    [13, 43, 73, 103],
                    [14, 44, 74, 104]
                ],
                [
                    [21, 51, 81, 111],
                    [22, 52, 82, 112],
                    [23, 53, 83, 113],
                    [24, 54, 84, 114]
                ],
                [
                    [31, 61, 91, 121],
                    [32, 62, 92, 122],
                    [33, 63, 93, 123],
                    [34, 64, 94, 124]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)


    def add_test_frames(self, actions, rewards, terminals):
        frame_1 = np.array(
            [
                [11, 12, 13, 14],
                [21, 22, 23, 24],
                [31, 32, 33, 34]
            ]
        )
        frame_2 = np.array(
            [
                [41, 42, 43, 44],
                [51, 52, 53, 54],
                [61, 62, 63, 64]
            ]
        )
        frame_3 = np.array(
            [
                [71, 72, 73, 74],
                [81, 82, 83, 84],
                [91, 92, 93, 94]
            ]
        )
        frame_4 = np.array(
            [
                [101, 102, 103, 104],
                [111, 112, 113, 114],
                [121, 122, 123, 124]
            ]
        )
        advance_frame = np.array(
            [
                [127, 127, 127, 127],
                [191, 191, 191, 191],
                [255, 255, 255, 255]
            ]
        )

        frames = [frame_1, frame_2, frame_3, frame_4, advance_frame]

        for i in range(5):
            self.replay.insert(frames[i], actions[i], rewards[i], terminals[i])


    def test_advance_state_window(self):
        """Does it return the most recent history_length frames, ignoring
        frames outside of the window?
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0] * 5
        rewards = [0.0] * 5
        terminals = [False] * 5

        self.add_test_frames(actions, rewards, terminals)

        expected = np.array(
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)


    def test_wrap(self):
        """Mimic adding past the capacity of the replay memory"""
        self.replay = Replay(self.sess, 4, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame_1 = np.array(
            [
                [11, 12, 13, 14],
                [21, 22, 23, 24],
                [31, 32, 33, 34]
            ]
        )
        frame_2 = np.array(
            [
                [41, 42, 43, 44],
                [51, 52, 53, 54],
                [61, 62, 63, 64]
            ]
        )
        frame_3 = np.array(
            [
                [71, 72, 73, 74],
                [81, 82, 83, 84],
                [91, 92, 93, 94]
            ]
        )
        frame_4 = np.array(
            [
                [101, 102, 103, 104],
                [111, 112, 113, 114],
                [121, 122, 123, 124]
            ]
        )
        wrap_frame = np.array(
            [
                [127, 127, 127, 127],
                [191, 191, 191, 191],
                [255, 255, 255, 255]
            ]
        )

        frames = [frame_1, frame_2, frame_3, frame_4, wrap_frame]

        action = 0
        reward = 0.0
        terminal = False

        for frame in frames:
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)


    def test_single_sample(self):
        """Verify a single sample"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 1, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4]
        rewards = [0.0, -1.0, 2.0, 0.0, 3.0]
        terminals = [False, False, False, False, True]

        self.add_test_frames(actions, rewards, terminals)

        sample = self.replay.sample()

        expected_first_state = np.array([
            [
                [
                    [11, 41, 71, 101],
                    [12, 42, 72, 102],
                    [13, 43, 73, 103],
                    [14, 44, 74, 104]
                ],
                [
                    [21, 51, 81, 111],
                    [22, 52, 82, 112],
                    [23, 53, 83, 113],
                    [24, 54, 84, 114]
                ],
                [
                    [31, 61, 91, 121],
                    [32, 62, 92, 122],
                    [33, 63, 93, 123],
                    [34, 64, 94, 124]
                ]
            ]
        ])

        expected_next_state = np.array([
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        ])

        assert len(sample) == 5
        assert np.array_equal(sample[0], expected_first_state)
        assert np.array_equal(sample[1], np.array([[0, 0, 0, 0, 1, 0]]))
        assert np.array_equal(sample[2], np.array([3]))
        assert np.array_equal(sample[3], expected_next_state)
        assert np.array_equal(sample[4], np.array([True]))

    def test_no_terminals_in_first_state_1(self):
        """Verify that the sample is not the one with a terminal in the first
        state.
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 1, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4]
        rewards = [0.0, -1.0, 2.0, 0.0, 3.0]
        terminals = [False, False, False, False, True]

        self.add_test_frames(actions, rewards, terminals)

        frame = np.array(
            [
                [95, 95, 95, 95],
                [159, 159, 159, 159],
                [221, 221, 221, 221]
            ]
        )

        self.replay.insert(frame, 3, 1.0, False)

        sample = self.replay.sample()

        expected_first_state = np.array([
            [
                [
                    [11, 41, 71, 101],
                    [12, 42, 72, 102],
                    [13, 43, 73, 103],
                    [14, 44, 74, 104]
                ],
                [
                    [21, 51, 81, 111],
                    [22, 52, 82, 112],
                    [23, 53, 83, 113],
                    [24, 54, 84, 114]
                ],
                [
                    [31, 61, 91, 121],
                    [32, 62, 92, 122],
                    [33, 63, 93, 123],
                    [34, 64, 94, 124]
                ]
            ]
        ])

        expected_next_state = np.array([
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        ])

        assert len(sample) == 5
        assert np.array_equal(sample[0], expected_first_state)
        assert np.array_equal(sample[1], np.array([[0, 0, 0, 0, 1, 0]]))
        assert np.array_equal(sample[2], np.array([3]))
        assert np.array_equal(sample[3], expected_next_state)
        assert np.array_equal(sample[4], np.array([True]))


    def test_no_terminals_in_first_state_2(self):
        """Verify that the sample is not the one with a terminal in the first
        state.
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 1, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4]
        rewards = [0.0, -1.0, 2.0, 0.0, 3.0]
        terminals = [True, False, False, False, False]

        self.add_test_frames(actions, rewards, terminals)

        frame = np.array(
            [
                [95, 95, 95, 95],
                [159, 159, 159, 159],
                [221, 221, 221, 221]
            ]
        )

        self.replay.insert(frame, 3, 1.0, True)

        sample = self.replay.sample()

        expected_first_state = np.array([
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        ])

        expected_next_state = np.array([
            [
                [
                    [71, 101, 127, 95],
                    [72, 102, 127, 95],
                    [73, 103, 127, 95],
                    [74, 104, 127, 95]
                ],
                [
                    [81, 111, 191, 159],
                    [82, 112, 191, 159],
                    [83, 113, 191, 159],
                    [84, 114, 191, 159]
                ],
                [
                    [91, 121, 255, 221],
                    [92, 122, 255, 221],
                    [93, 123, 255, 221],
                    [94, 124, 255, 221]
                ]
            ]
        ])

        assert len(sample) == 5
        assert np.array_equal(sample[0], expected_first_state)
        assert np.array_equal(sample[1], np.array([[0, 0, 0, 1, 0, 0]]))
        assert np.array_equal(sample[2], np.array([1.0]))
        assert np.array_equal(sample[3], expected_next_state)
        assert np.array_equal(sample[4], np.array([True]))


    def test_sample_alignment(self):
        """Verify that sample data lines up"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 32, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4, 5, 0, 1, 2, 3]
        rewards = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, -1.0, -2.0, -3.0, -4.0]
        terminals = [
            False, False, False, True, False, False, False, False, False, False
        ]

        frame = np.array(
            [
                [1, 1, 1, 1],
                [2, 2, 2, 2],
                [3, 3, 3, 3]
            ]
        )

        for i in range(len(actions)):
            self.replay.insert(frame * i, actions[i], rewards[i], terminals[i])

        batch = self.replay.sample()
        b_frames1, b_actions, b_rewards, b_frames2, b_terminals = batch

        for sample_i, r in enumerate(b_rewards):
            input_i = rewards.index(r)
            b_action = np.nonzero(b_actions[sample_i])[0][0]
            assert b_action == actions[input_i]
            assert b_terminals[sample_i] == terminals[input_i]
Beispiel #5
0
    def test_add_unique_frames(self):
        """Mimic a state that is populated with unique consecutive frames"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame_1 = np.array(
            [
                [11, 12, 13, 14],
                [21, 22, 23, 24],
                [31, 32, 33, 34]
            ]
        )
        frame_2 = np.array(
            [
                [41, 42, 43, 44],
                [51, 52, 53, 54],
                [61, 62, 63, 64]
            ]
        )
        frame_3 = np.array(
            [
                [71, 72, 73, 74],
                [81, 82, 83, 84],
                [91, 92, 93, 94]
            ]
        )
        frame_4 = np.array(
            [
                [101, 102, 103, 104],
                [111, 112, 113, 114],
                [121, 122, 123, 124]
            ]
        )

        action = 0
        reward = 0.0
        terminal = False

        for frame in [frame_1, frame_2, frame_3, frame_4]:
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [11, 41, 71, 101],
                    [12, 42, 72, 102],
                    [13, 43, 73, 103],
                    [14, 44, 74, 104]
                ],
                [
                    [21, 51, 81, 111],
                    [22, 52, 82, 112],
                    [23, 53, 83, 113],
                    [24, 54, 84, 114]
                ],
                [
                    [31, 61, 91, 121],
                    [32, 62, 92, 122],
                    [33, 63, 93, 123],
                    [34, 64, 94, 124]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)
Beispiel #6
0
    def test_no_terminals_in_first_state_2(self):
        """Verify that the sample is not the one with a terminal in the first
        state.
        """
        self.replay = Replay(self.sess, 10, (3, 4), 4, 1, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4]
        rewards = [0.0, -1.0, 2.0, 0.0, 3.0]
        terminals = [True, False, False, False, False]

        self.add_test_frames(actions, rewards, terminals)

        frame = np.array(
            [
                [95, 95, 95, 95],
                [159, 159, 159, 159],
                [221, 221, 221, 221]
            ]
        )

        self.replay.insert(frame, 3, 1.0, True)

        sample = self.replay.sample()

        expected_first_state = np.array([
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        ])

        expected_next_state = np.array([
            [
                [
                    [71, 101, 127, 95],
                    [72, 102, 127, 95],
                    [73, 103, 127, 95],
                    [74, 104, 127, 95]
                ],
                [
                    [81, 111, 191, 159],
                    [82, 112, 191, 159],
                    [83, 113, 191, 159],
                    [84, 114, 191, 159]
                ],
                [
                    [91, 121, 255, 221],
                    [92, 122, 255, 221],
                    [93, 123, 255, 221],
                    [94, 124, 255, 221]
                ]
            ]
        ])

        assert len(sample) == 5
        assert np.array_equal(sample[0], expected_first_state)
        assert np.array_equal(sample[1], np.array([[0, 0, 0, 1, 0, 0]]))
        assert np.array_equal(sample[2], np.array([1.0]))
        assert np.array_equal(sample[3], expected_next_state)
        assert np.array_equal(sample[4], np.array([True]))
Beispiel #7
0
    def test_single_sample(self):
        """Verify a single sample"""
        self.replay = Replay(self.sess, 10, (3, 4), 4, 1, 6)
        self.sess.run(tf.global_variables_initializer())

        actions = [0, 1, 2, 3, 4]
        rewards = [0.0, -1.0, 2.0, 0.0, 3.0]
        terminals = [False, False, False, False, True]

        self.add_test_frames(actions, rewards, terminals)

        sample = self.replay.sample()

        expected_first_state = np.array([
            [
                [
                    [11, 41, 71, 101],
                    [12, 42, 72, 102],
                    [13, 43, 73, 103],
                    [14, 44, 74, 104]
                ],
                [
                    [21, 51, 81, 111],
                    [22, 52, 82, 112],
                    [23, 53, 83, 113],
                    [24, 54, 84, 114]
                ],
                [
                    [31, 61, 91, 121],
                    [32, 62, 92, 122],
                    [33, 63, 93, 123],
                    [34, 64, 94, 124]
                ]
            ]
        ])

        expected_next_state = np.array([
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        ])

        assert len(sample) == 5
        assert np.array_equal(sample[0], expected_first_state)
        assert np.array_equal(sample[1], np.array([[0, 0, 0, 0, 1, 0]]))
        assert np.array_equal(sample[2], np.array([3]))
        assert np.array_equal(sample[3], expected_next_state)
        assert np.array_equal(sample[4], np.array([True]))
Beispiel #8
0
    def test_wrap(self):
        """Mimic adding past the capacity of the replay memory"""
        self.replay = Replay(self.sess, 4, (3, 4), 4, 3, 6)
        self.sess.run(tf.global_variables_initializer())

        frame_1 = np.array(
            [
                [11, 12, 13, 14],
                [21, 22, 23, 24],
                [31, 32, 33, 34]
            ]
        )
        frame_2 = np.array(
            [
                [41, 42, 43, 44],
                [51, 52, 53, 54],
                [61, 62, 63, 64]
            ]
        )
        frame_3 = np.array(
            [
                [71, 72, 73, 74],
                [81, 82, 83, 84],
                [91, 92, 93, 94]
            ]
        )
        frame_4 = np.array(
            [
                [101, 102, 103, 104],
                [111, 112, 113, 114],
                [121, 122, 123, 124]
            ]
        )
        wrap_frame = np.array(
            [
                [127, 127, 127, 127],
                [191, 191, 191, 191],
                [255, 255, 255, 255]
            ]
        )

        frames = [frame_1, frame_2, frame_3, frame_4, wrap_frame]

        action = 0
        reward = 0.0
        terminal = False

        for frame in frames:
            self.replay.insert(frame, action, reward, terminal)

        expected = np.array(
            [
                [
                    [41, 71, 101, 127],
                    [42, 72, 102, 127],
                    [43, 73, 103, 127],
                    [44, 74, 104, 127]
                ],
                [
                    [51, 81, 111, 191],
                    [52, 82, 112, 191],
                    [53, 83, 113, 191],
                    [54, 84, 114, 191]
                ],
                [
                    [61, 91, 121, 255],
                    [62, 92, 122, 255],
                    [63, 93, 123, 255],
                    [64, 94, 124, 255]
                ]
            ]
        )

        result = self.replay.get_current_state()
        assert np.array_equal(result, expected)