def test_environment_actions(): """Tests environment is executing actions correctly""" env = Bit_Flipping_Environment(5) env.reset() env.state = [1, 0, 0, 1, 0, 1, 0, 0, 1, 0] env.step(0) env.state = env.next_state assert env.state == [0, 0, 0, 1, 0, 1, 0, 0, 1, 0] env.step(0) env.state = env.next_state assert env.state == [1, 0, 0, 1, 0, 1, 0, 0, 1, 0] env.step(3) env.state = env.next_state assert env.state == [1, 0, 0, 0, 0, 1, 0, 0, 1, 0] env.step(6) env.state = env.next_state assert env.state == [1, 0, 0, 0, 0, 1, 0, 0, 1, 0]
def test_environment_goal_achievement(): """Tests environment is registering goal achievement properly""" env = Bit_Flipping_Environment(5) env.reset() env.state = [1, 0, 0, 1, 0, 0, 0, 0, 0, 0] env.desired_goal = [0, 0, 0, 0, 0] env.step(0) assert env.reward == -1 env.state = env.next_state assert env.achieved_goal == [0, 0, 0, 1, 0] env.step(2) assert env.reward == -1 env.state = env.next_state assert env.achieved_goal == [0, 0, 1, 1, 0] env.step(2) assert env.reward == -1 env.state = env.next_state assert env.achieved_goal == [0, 0, 0, 1, 0] env.step(3) assert env.reward == 5