def test_episode_done_raises_error(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen')) observation = env.reset() with self.assertRaises(core.EpisodeDoneError): agent.act(observation, done=True)
def test_update_counts(self): """Check that counts are updated correctly given an observation.""" env = attention_allocation.LocationAllocationEnv() agent_params = allocation_agents.NaiveProbabilityMatchingAgentParams() agent_params.decay_prob = 0 agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen'), params=agent_params) counts = [3, 6, 8] observation = np.array([1, 2, 0]) updated_counts = agent._update_beliefs(observation, counts) self.assertTrue(np.all(np.equal(updated_counts, [4, 8, 8])))
def test_allocate_beliefs_greedy(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0]) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen'), params=agent_params) allocation = agent._allocate(5, [5, 2, 1, 1]) self.assertTrue(np.all(np.equal(allocation, [4, 1, 0, 0])))
def test_allocate_beliefs_fair_unsatisfiable(self): env_params = attention_allocation.Params( n_locations=4, prior_incident_counts=(10, 10, 10, 10), n_attention_units=5, incident_rates=[0, 0, 0, 0]) env = attention_allocation.LocationAllocationEnv(params=env_params) agent_params = allocation_agents.MLEGreedyAgentParams( epsilon=0.0, alpha=0.25) agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen'), params=agent_params) with self.assertRaises(gym.error.InvalidAction): agent._allocate(5, [5, 2, 1, 1])
def test_allocate_by_counts_zero(self): """Check allocations are even when counts are zero.""" env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen')) counts = [0, 0, 0] n_resource = 15 n_samples = 100 samples = [ agent._allocate(n_resource, counts) for _ in range(n_samples) ] mean_samples = np.sum(samples, axis=0) / float(n_samples) expected_mean = n_resource / float(len(counts)) std_dev = np.std(samples) means_close = [ np.abs(mean - expected_mean) < std_dev for mean in mean_samples ] self.assertTrue(np.all(means_close))
def test__allocate_by_counts(self): """Check allocation proportions match probabilities from counts.""" env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen')) counts = [3, 6, 8] n_resource = 20 n_samples = 100 samples = [ agent._allocate(n_resource, counts) for _ in range(n_samples) ] counts_normalized = [(count / float(np.sum(counts))) for count in counts] samples_normalized = [(count / float(np.sum(samples))) for count in np.sum(samples, axis=0)] self.assertTrue( np.all(np.isclose(counts_normalized, samples_normalized, atol=0.05)))
def test_vector_sum_reward(self): reward = rewards.VectorSumReward('x') self.assertEqual(reward({'x': [1, 5, 2, 4, 7]}), 19)