Ejemplo n.º 1
0
def test_reward():
    bit_flip = BitFlip(2)
    bit_flip.state = np.array([0, 0])
    bit_flip.goal = np.array([1, 0])
    _, reward, _, _ = bit_flip.step(1)

    assert reward == -1
Ejemplo n.º 2
0
def test_reward_expected_success():
    bit_flip = BitFlip(2)
    bit_flip.state = np.array([0, 0])
    bit_flip.goal = np.array([1, 0])
    _, reward, terminate, _ = bit_flip.step(0)

    assert reward == 0
    assert terminate == True