def test_fixed_agent_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
Esempio n. 2
0
    def build_scenario(self):
        """Returns agent and env according to provided params."""
        env = college_admission.CollegeAdmissionsEnv(
            user_params=self.env_config)

        if self.agent_type == 'robust':
            agent = college_admission_jury.RobustJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                group_cost=env.initial_params.group_cost,
                subsidize=env.initial_params.subsidize,
                subsidy_beta=env.initial_params.subsidy_beta,
                gaming_control=env.initial_params.gaming_control,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                decay_steps=self.decay_steps,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                burnin=self.burnin)

        elif self.agent_type == 'static':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                decay_steps=self.decay_steps,
                freeze_classifier_after_burnin=True,
                burnin=self.burnin)

        elif self.agent_type == 'continuous':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                freeze_classifier_after_burnin=False,
                decay_steps=self.decay_steps,
                burnin=self.burnin)
        else:
            agent = college_admission_jury.FixedJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=self.agent_threshold,
                epsilon_greedy=self.epsilon_greedy,
                decay_steps=self.decay_steps,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate)

        return env, agent
 def test_agent_raises_invalid_observation_error(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     with self.assertRaises(core.InvalidObservationError):
         agent.act(observation={0: 'Invalid Observation'}, done=False)
 def test_agent_produces_zero_no_epsilon_greedy(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7,
         epsilon_greedy=False)
     epsilon_probs = [
         agent.initial_action()['epsilon_prob'] for _ in range(10)
     ]
     self.assertEqual(epsilon_probs, [0] * 10)
 def test_agent_produces_different_epsilon_with_epsilon_greedy(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7,
         epsilon_greedy=True)
     obs, _, done, _ = env.step(agent.initial_action())
     epsilon_probs = [float(agent.initial_action()['epsilon_prob'])]
     epsilon_probs.extend(
         [float(agent.act(obs, done)['epsilon_prob']) for _ in range(10)])
     self.assertGreater(len(set(epsilon_probs)), 1)
 def test_agent_raises_episode_done_error(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     with self.assertRaises(core.EpisodeDoneError):
         agent.act(observation={
             'threshold': np.array(0.5),
             'epsilon_prob': np.array(0)
         },
                   done=True)
 def test_epsilon_prob_decays_as_expected(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7,
         epsilon_greedy=True,
         initial_epsilon_prob=0.3,
         decay_steps=5,
         epsilon_prob_decay_rate=0.001)
     obs, _, done, _ = env.step(agent.initial_action())
     epsilon_probs = [float(agent.initial_action()['epsilon_prob'])]
     epsilon_probs.extend(
         [float(agent.act(obs, done)['epsilon_prob']) for _ in range(2)])
     self.assertTrue(
         np.all(np.isclose(epsilon_probs, [0.3, 0.0753, 0.0189],
                           atol=1e-2)))