Esempio n. 1
0
 def error_raised_when_noise_params_wrong(self):
   env = college_admission.CollegeAdmissionsEnv(
       user_params={
           'noise_params':
               params.BoundedGaussian(min=0, max=0.3, mu=0, sigma=0.00001),
       })
   with self.assertRaises(ValueError):
     env._add_noise()
Esempio n. 2
0
 def test_fixed_agent_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
 def test_agent_raises_invalid_observation_error(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     with self.assertRaises(core.InvalidObservationError):
         agent.act(observation={0: 'Invalid Observation'}, done=False)
 def test_robust_classifier_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         burnin=10)
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
 def test_jury_successfully_initializes(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     self.assertEqual(agent.initial_action()['threshold'], 0.7)
     self.assertEqual(agent.initial_action()['epsilon_prob'], 0)
    def test_assertion_raised_when_burnin_less_than_2(self):
        env = college_admission.CollegeAdmissionsEnv()

        with self.assertRaises(ValueError):
            college_admission_jury.RobustJury(
                action_space=env.action_space,
                observation_space=env.observation_space,
                reward_fn=(lambda x: 0),
                group_cost=env.initial_params.group_cost,
                burnin=1)
 def test_agent_produces_zero_no_epsilon_greedy(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7,
         epsilon_greedy=False)
     epsilon_probs = [
         agent.initial_action()['epsilon_prob'] for _ in range(10)
     ]
     self.assertEqual(epsilon_probs, [0] * 10)
    def test_correct_robust_threshold_returned(self):
        env = college_admission.CollegeAdmissionsEnv()

        agent = college_admission_jury.RobustJury(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=(lambda x: 0),
            group_cost=env.initial_params.group_cost)
        agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8]
        agent._labels = [0, 0, 1, 0, 0, 1, 1, 1]
        agent._train_model()
        self.assertEqual(agent._threshold, 0.6)
 def test_agent_produces_different_epsilon_with_epsilon_greedy(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7,
         epsilon_greedy=True)
     obs, _, done, _ = env.step(agent.initial_action())
     epsilon_probs = [float(agent.initial_action()['epsilon_prob'])]
     epsilon_probs.extend(
         [float(agent.act(obs, done)['epsilon_prob']) for _ in range(10)])
     self.assertGreater(len(set(epsilon_probs)), 1)
 def test_agent_raises_episode_done_error(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.FixedJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     with self.assertRaises(core.EpisodeDoneError):
         agent.act(observation={
             'threshold': np.array(0.5),
             'epsilon_prob': np.array(0)
         },
                   done=True)
 def test_label_fn_returns_correct_labels(self):
     """Checks that the label function works as expected."""
     observations = {
         'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6],
         'selected_ground_truth': [1, 0, 2, 1, 2],
         'selected_applicants': [1, 1, 0, 1, 0]
     }
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     labels = agent._label_fn(observations)
     self.assertListEqual(labels, [1, 0, 1])
 def test_get_default_features_returns_same_features(self):
     """Checks that the feature selection fn works as expected."""
     observations = {
         'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6],
         'selected_ground_truth': [1, 0, 2, 1, 2],
         'selected_applicants': [1, 1, 0, 1, 0]
     }
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     features = agent._get_default_features(observations)
     self.assertListEqual(features, [0.2, 0.3, 0.5])
 def test_agent_returns_same_threshold_till_burnin_learns_and_freezes(self):
     """Tests that agent returns same threshold till burnin and freezes after."""
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.3,
         burnin=4,
         freeze_classifier_after_burnin=True)
     test_util.run_test_simulation(env=env,
                                   agent=agent,
                                   num_steps=10,
                                   stackelberg=True)
     actions = [float(action['threshold']) for _, action in env.history]
     self.assertEqual(set(actions[:4]), {0.3})
     self.assertLen(set(actions), 3)
 def test_correct_max_score_change_calculated_with_subsidy(self):
     """Tests that the max gaming steps gives output as expected."""
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             'group_cost': {
                 0: 2,
                 1: 4
             },
             'subsidize': True,
             'subsidy_beta': 0.8,
             'gaming_control': np.inf
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         subsidize=env.initial_params.subsidize,
         subsidy_beta=env.initial_params.subsidy_beta,
         gaming_control=env.initial_params.gaming_control)
     obs, _, _, _ = env.step(agent.initial_action())
     max_change = agent._get_max_allowed_score_change(obs)
     self.assertEqual(max_change, [0.5, 0.3125])