def test_environment_actions():
    """Tests environment is executing actions correctly"""
    env = Bit_Flipping_Environment(5)
    env.reset()
    env.state = [1, 0, 0, 1, 0, 1, 0, 0, 1, 0]

    env.step(0)
    env.state = env.next_state
    assert env.state == [0, 0, 0, 1, 0, 1, 0, 0, 1, 0]

    env.step(0)
    env.state = env.next_state
    assert env.state == [1, 0, 0, 1, 0, 1, 0, 0, 1, 0]

    env.step(3)
    env.state = env.next_state
    assert env.state == [1, 0, 0, 0, 0, 1, 0, 0, 1, 0]

    env.step(6)
    env.state = env.next_state
    assert env.state == [1, 0, 0, 0, 0, 1, 0, 0, 1, 0]
def test_environment_goal_achievement():
    """Tests environment is registering goal achievement properly"""
    env = Bit_Flipping_Environment(5)
    env.reset()
    env.state = [1, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    env.desired_goal = [0, 0, 0, 0, 0]

    env.step(0)
    assert env.reward == -1
    env.state = env.next_state
    assert env.achieved_goal == [0, 0, 0, 1, 0]

    env.step(2)
    assert env.reward == -1
    env.state = env.next_state
    assert env.achieved_goal == [0, 0, 1, 1, 0]

    env.step(2)
    assert env.reward == -1
    env.state = env.next_state
    assert env.achieved_goal == [0, 0, 0, 1, 0]

    env.step(3)
    assert env.reward == 5