def test_play_the_game_less_badly(): bad_seeds_01_env = BadSeeds01(seed_count=5, bad_seed_count=3, max_episode_length=2) a_bad_seed_ndx = bad_seeds_01_env.bad_seed_indices[0] next_state, terminal, reward = bad_seeds_01_env.execute( actions=a_bad_seed_ndx) assert next_state[0, a_bad_seed_ndx] != 0.0 assert terminal is False assert reward == 0.0 measurement_counts, measured_seed_counts = count_measurements( bad_seeds_01_env.state) print(measurement_counts) expected_measurement_counts = np.zeros_like(measurement_counts) expected_measurement_counts[0, a_bad_seed_ndx] += 1.0 assert np.all(measurement_counts == expected_measurement_counts) assert measured_seed_counts == 1 next_state, terminal, reward = bad_seeds_01_env.execute( actions=a_bad_seed_ndx) assert next_state[1, a_bad_seed_ndx] != 0.0 assert terminal is True assert reward == 2.0 measurement_counts, measured_seed_counts = count_measurements( bad_seeds_01_env.state) print(measurement_counts) expected_measurement_counts[0, a_bad_seed_ndx] += 1.0 assert np.all(measurement_counts == expected_measurement_counts) assert measured_seed_counts == 1
def test_play_the_game_badly(): bad_seeds_01_env = BadSeeds01(seed_count=5, bad_seed_count=3, max_episode_length=5) # measure all seeds but the last seed for time_i, seed_i in enumerate(range(len(bad_seeds_01_env.all_seeds) - 1)): next_state, terminal, reward = bad_seeds_01_env.execute(actions=seed_i) assert next_state[time_i, seed_i] != 0.0 assert terminal is False assert reward == 0.0 # measurement_counts looks like this # time_i = 0: [1 0 0 0 0 ] # time_i = 1: [1 1 0 0 0 ] # ... # time_i = 3: [1 1 1 1 0 ] measurement_counts, measured_seed_counts = count_measurements( bad_seeds_01_env.state) for seed_j in range(seed_i): assert measurement_counts[0, seed_j] == 1 assert measured_seed_counts == (seed_i + 1) # measure the first seed again # no reward because the last seed is never measured next_state, terminal, reward = bad_seeds_01_env.execute(actions=0) assert next_state[len(bad_seeds_01_env.all_seeds) - 1, 0] != 0.0 assert terminal is True assert reward == 0.0 measurement_counts, measured_seed_counts = count_measurements( bad_seeds_01_env.state) assert np.all(measurement_counts == np.array([[2, 1, 1, 1, 0]])) assert measured_seed_counts == 4
def test_count_measurements(): state = np.array([ [0.0, 0.5, 0.0, 0.0], [0.0, 0.0, -0.5, 0.0], [0.5, 0.0, 0.0, 0.0], [0.0, -0.5, 0.0, 0.0], [0.0, 0.0, 0.5, 0.0], [0.0, 0.5, 0.0, 0.0], ]) measurement_counts, measured_seed_counts = count_measurements( time_steps_by_seeds_state=state) assert np.all(measurement_counts == np.array([1, 3, 2, 0])) assert measured_seed_counts == 3