def test_measure(self): params = attention_allocation.Params() params.incident_rates = [4.0, 2.0] params.attention_replacement = True env = attention_allocation.LocationAllocationEnv(params) env = attention_allocation.LocationAllocationEnv(params) env.seed(100) agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) agent.seed(100) observation = env.reset() done = False for _ in range(250): action = agent.act(observation, done) observation, _, done, _ = env.step(action) metric = distribution_comparison_metrics.DistributionComparisonMetric( env, "incidents_seen", 250) state_dist, action_dist, distance = metric.measure(env) expected_state_dist = env.state.params.incident_rates / np.sum( env.state.params.incident_rates) # Expected action distribution is uniform because RandomAgent is random. expected_action_dist = [0.5, 0.5] expected_distance = np.linalg.norm(expected_state_dist - expected_action_dist) self.assertTrue( np.all(np.isclose(state_dist, expected_state_dist, atol=0.05))) self.assertTrue( np.all(np.isclose(action_dist, expected_action_dist, atol=0.05))) self.assertTrue(np.isclose(distance, expected_distance, atol=0.1))
def test_MLE_rate_estimation(self): env_params = attention_allocation.Params() env_params.prior_incident_counts = (500, 500) env_params.n_attention_units = 5 # pylint: disable=g-long-lambda agent_params = allocation_agents.MLEProbabilityMatchingAgentParams() agent_params.feature_selection_fn = lambda obs: allocation_agents._get_added_vector_features( obs, env_params.n_locations, keys=["incidents_seen"] ) agent_params.interval = 200 agent_params.epsilon = 0 env = attention_allocation.LocationAllocationEnv(env_params) agent = allocation_agents.MLEProbabilityMatchingAgent( action_space=env.action_space, reward_fn=lambda x: None, observation_space=env.observation_space, params=agent_params, ) seed = 0 agent.rng.seed(seed) env.seed(seed) observation = env.reset() done = False steps = 200 for _ in range(steps): action = agent.act(observation, done) observation, _, done, _ = env.step(action) self.assertTrue( np.all(np.isclose(list(agent.beliefs), list(env_params.incident_rates), atol=0.5)) )
def test_episode_done_raises_error(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None ) observation = env.reset() with self.assertRaises(core.EpisodeDoneError): agent.act(observation, done=True)
def test_can_interact_with_attention_env(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.MLEProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None, params=None, ) test_util.run_test_simulation(env=env, agent=agent)
def test_dynamic_rate_change(self): params = attention_allocation.Params() params.dynamic_rate = 0.1 params.incident_rates = [4.0, 2.0] params.n_attention_units = 2 env = attention_allocation.LocationAllocationEnv(params=params) env.seed(0) env.step(action=np.array([2, 0])) new_rates = env.state.params.incident_rates expected_rates = [3.8, 2.1] self.assertEqual(expected_rates, new_rates)
def test__allocate_by_counts(self): """Check allocation proportions match probabilities from counts.""" env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None ) counts = [3, 6, 8] n_resource = 20 n_samples = 100 samples = [agent._allocate(n_resource, counts) for _ in range(n_samples)] counts_normalized = [(count / float(np.sum(counts))) for count in counts] samples_normalized = [(count / float(np.sum(samples))) for count in np.sum(samples, axis=0)] self.assertTrue(np.all(np.isclose(counts_normalized, samples_normalized, atol=0.05)))
def test_allocate_by_counts_zero(self): """Check allocations are even when counts are zero.""" env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None ) counts = [0, 0, 0] n_resource = 15 n_samples = 100 samples = [agent._allocate(n_resource, counts) for _ in range(n_samples)] mean_samples = np.sum(samples, axis=0) / float(n_samples) expected_mean = n_resource / float(len(counts)) std_dev = np.std(samples) means_close = [np.abs(mean - expected_mean) < std_dev for mean in mean_samples] self.assertTrue(np.all(means_close))
def test_update_counts(self): """Check that counts are updated correctly given an observation.""" env = attention_allocation.LocationAllocationEnv() agent_params = allocation_agents.NaiveProbabilityMatchingAgentParams() agent_params.decay_prob = 0 agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None, params=agent_params, ) counts = [3, 6, 8] observation = np.array([1, 2, 0]) updated_counts = agent._update_beliefs(observation, counts) self.assertTrue(np.all(np.equal(updated_counts, [4, 8, 8])))
def test_allocate_beliefs_greedy(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0], ) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward("incidents_seen"), params=agent_params, ) allocation = agent._allocate(5, [5, 2, 1, 1]) self.assertTrue(np.all(np.equal(allocation, [4, 1, 0, 0])))
def test_allocate_beliefs_fair_unsatisfiable(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0], ) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0, alpha=0.25) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward("incidents_seen"), params=agent_params, ) with self.assertRaises(gym.error.InvalidAction): agent._allocate(5, [5, 2, 1, 1])
def test_metric_multiple(self): env = attention_allocation.LocationAllocationEnv() agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) env.seed(100) observation = env.reset() done = False for _ in range(2): action = agent.act(observation, done) observation, _, done, _ = env.step(action) metric1 = core.Metric(env) metric2 = core.Metric(env) history1 = metric1._extract_history(env) history2 = metric2._extract_history(env) self.assertEqual(history1, history2)
def test_update_state(self): """Check that state is correctly updated with incidents_seen. This tests checks that numbers of incidents_seen are no more than the incidents generated and the attention deployed as specified in the action, if allocating without attention replacement. """ env = attention_allocation.LocationAllocationEnv() env.seed(0) agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) observation = env.reset() action = agent.act(observation, False) crimes, reported_incidents = attention_allocation._sample_incidents( env.state.rng, env.state.params ) attention_allocation._update_state(env.state, crimes, reported_incidents, action) incidents_seen = env.state.incidents_seen self.assertTrue((incidents_seen <= crimes).all()) if not env.state.params.attention_replacement: self.assertTrue((incidents_seen <= action).all())
def test_parties_can_interact(self): test_util.run_test_simulation(env=attention_allocation.LocationAllocationEnv())
def test_can_interact_with_attention_env(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None ) test_util.run_test_simulation(env=env, agent=agent)