def test_episode_done_raises_error(self):
     env = attention_allocation.LocationAllocationEnv()
     agent = allocation_agents.NaiveProbabilityMatchingAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=rewards.VectorSumReward('incidents_seen'))
     observation = env.reset()
     with self.assertRaises(core.EpisodeDoneError):
         agent.act(observation, done=True)
Example #2
0
 def test_update_counts(self):
     """Check that counts are updated correctly given an observation."""
     env = attention_allocation.LocationAllocationEnv()
     agent_params = allocation_agents.NaiveProbabilityMatchingAgentParams()
     agent_params.decay_prob = 0
     agent = allocation_agents.NaiveProbabilityMatchingAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=rewards.VectorSumReward('incidents_seen'),
         params=agent_params)
     counts = [3, 6, 8]
     observation = np.array([1, 2, 0])
     updated_counts = agent._update_beliefs(observation, counts)
     self.assertTrue(np.all(np.equal(updated_counts, [4, 8, 8])))
 def test_allocate_beliefs_greedy(self):
   env_params = attention_allocation.Params(
       n_locations=4,
       prior_incident_counts=(10, 10, 10, 10),
       n_attention_units=5,
       incident_rates=[0, 0, 0, 0])
   env = attention_allocation.LocationAllocationEnv(params=env_params)
   agent_params = allocation_agents.MLEGreedyAgentParams(epsilon=0.0)
   agent = allocation_agents.MLEGreedyAgent(
       action_space=env.action_space,
       observation_space=env.observation_space,
       reward_fn=rewards.VectorSumReward('incidents_seen'),
       params=agent_params)
   allocation = agent._allocate(5, [5, 2, 1, 1])
   self.assertTrue(np.all(np.equal(allocation, [4, 1, 0, 0])))
 def test_allocate_beliefs_fair_unsatisfiable(self):
   env_params = attention_allocation.Params(
       n_locations=4,
       prior_incident_counts=(10, 10, 10, 10),
       n_attention_units=5,
       incident_rates=[0, 0, 0, 0])
   env = attention_allocation.LocationAllocationEnv(params=env_params)
   agent_params = allocation_agents.MLEGreedyAgentParams(
       epsilon=0.0, alpha=0.25)
   agent = allocation_agents.MLEGreedyAgent(
       action_space=env.action_space,
       observation_space=env.observation_space,
       reward_fn=rewards.VectorSumReward('incidents_seen'),
       params=agent_params)
   with self.assertRaises(gym.error.InvalidAction):
     agent._allocate(5, [5, 2, 1, 1])
Example #5
0
 def test_allocate_by_counts_zero(self):
     """Check allocations are even when counts are zero."""
     env = attention_allocation.LocationAllocationEnv()
     agent = allocation_agents.NaiveProbabilityMatchingAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=rewards.VectorSumReward('incidents_seen'))
     counts = [0, 0, 0]
     n_resource = 15
     n_samples = 100
     samples = [
         agent._allocate(n_resource, counts) for _ in range(n_samples)
     ]
     mean_samples = np.sum(samples, axis=0) / float(n_samples)
     expected_mean = n_resource / float(len(counts))
     std_dev = np.std(samples)
     means_close = [
         np.abs(mean - expected_mean) < std_dev for mean in mean_samples
     ]
     self.assertTrue(np.all(means_close))
Example #6
0
 def test__allocate_by_counts(self):
     """Check allocation proportions match probabilities from counts."""
     env = attention_allocation.LocationAllocationEnv()
     agent = allocation_agents.NaiveProbabilityMatchingAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=rewards.VectorSumReward('incidents_seen'))
     counts = [3, 6, 8]
     n_resource = 20
     n_samples = 100
     samples = [
         agent._allocate(n_resource, counts) for _ in range(n_samples)
     ]
     counts_normalized = [(count / float(np.sum(counts)))
                          for count in counts]
     samples_normalized = [(count / float(np.sum(samples)))
                           for count in np.sum(samples, axis=0)]
     self.assertTrue(
         np.all(np.isclose(counts_normalized, samples_normalized,
                           atol=0.05)))
Example #7
0
 def test_vector_sum_reward(self):
   reward = rewards.VectorSumReward('x')
   self.assertEqual(reward({'x': [1, 5, 2, 4, 7]}), 19)