def test_reward(): bit_flip = BitFlip(2) bit_flip.state = np.array([0, 0]) bit_flip.goal = np.array([1, 0]) _, reward, _, _ = bit_flip.step(1) assert reward == -1
def test_reward_expected_success(): bit_flip = BitFlip(2) bit_flip.state = np.array([0, 0]) bit_flip.goal = np.array([1, 0]) _, reward, terminate, _ = bit_flip.step(0) assert reward == 0 assert terminate == True
def test_bitflip_1(): bit_flip = BitFlip(1) bit_flip.state = np.array([0]) bit_flip.step(0) np.testing.assert_array_equal(bit_flip.state, np.array([1]))
def test_bitflip_2(): bit_flip = BitFlip(2) bit_flip.state = np.array([0, 0]) bit_flip.step(1) np.testing.assert_array_equal(bit_flip.state, np.array([0, 1]))