Beispiel #1
0
 def test_features_manipulated_to_maximum_limit_no_control_epsilon_greedy(
         self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             "num_applicants": 5,
             "gaming_control": np.inf,
             "group_cost": {
                 0: 2,
                 1: 4,
             }
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         gaming_control=env.initial_params.gaming_control,
         epsilon_greedy=True,
         initial_epsilon_prob=0.2,
     )
     observations = {
         "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]),
         "selected_applicants": np.asarray([0, 1, 0, 1, 1]),
         "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]),
         "applicant_groups": np.asarray([0, 1, 1, 0, 1]),
     }
     self.assertTrue(
         np.all(
             np.isclose(
                 agent._get_maximum_manipulated_features(observations),
                 [0.5, 0.9, 0.6],
                 atol=1e-4,
             )))
Beispiel #2
0
 def test_features_manipulated_to_maximum_limit_with_no_control(self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             "num_applicants": 5,
             "gaming_control": np.inf,
             "group_cost": {
                 0: 2,
                 1: 4
             }
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
     )
     observations = {
         "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]),
         "selected_applicants": np.asarray([0, 1, 0, 1, 1]),
         "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]),
         "applicant_groups": np.asarray([0, 1, 1, 0, 1]),
     }
     agent.act(observations, done=False)
     self.assertTrue(
         np.all(
             np.isclose(
                 agent._get_maximum_manipulated_features(observations),
                 [0.55, 1.0, 0.65],
                 atol=1e-4,
             )))
     self.assertEqual(agent._features,
                      agent._get_maximum_manipulated_features(observations))
Beispiel #3
0
 def test_robust_classifier_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         burnin=10,
     )
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
Beispiel #4
0
    def test_assertion_raised_when_burnin_less_than_2(self):
        env = college_admission.CollegeAdmissionsEnv()

        with self.assertRaises(ValueError):
            college_admission_jury.RobustJury(
                action_space=env.action_space,
                observation_space=env.observation_space,
                reward_fn=(lambda x: 0),
                group_cost=env.initial_params.group_cost,
                burnin=1,
            )
Beispiel #5
0
    def test_correct_robust_threshold_returned(self):
        env = college_admission.CollegeAdmissionsEnv()

        agent = college_admission_jury.RobustJury(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=(lambda x: 0),
            group_cost=env.initial_params.group_cost,
        )
        agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8]
        agent._labels = [0, 0, 1, 0, 0, 1, 1, 1]
        agent._train_model()
        self.assertEqual(agent._threshold, 0.6)
Beispiel #6
0
 def test_correct_max_score_change_calculated_with_subsidy(self):
     """Tests that the max gaming steps gives output as expected."""
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             "group_cost": {
                 0: 2,
                 1: 4
             },
             "subsidize": True,
             "subsidy_beta": 0.8,
             "gaming_control": np.inf,
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         subsidize=env.initial_params.subsidize,
         subsidy_beta=env.initial_params.subsidy_beta,
         gaming_control=env.initial_params.gaming_control,
     )
     obs, _, _, _ = env.step(agent.initial_action())
     max_change = agent._get_max_allowed_score_change(obs)
     self.assertEqual(max_change, [0.5, 0.3125])
Beispiel #7
0
    def build_scenario(self):
        """Returns agent and env according to provided params."""
        env = college_admission.CollegeAdmissionsEnv(
            user_params=self.env_config)

        if self.agent_type == "robust":
            agent = college_admission_jury.RobustJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                group_cost=env.initial_params.group_cost,
                subsidize=env.initial_params.subsidize,
                subsidy_beta=env.initial_params.subsidy_beta,
                gaming_control=env.initial_params.gaming_control,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                decay_steps=self.decay_steps,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                burnin=self.burnin,
            )

        elif self.agent_type == "static":
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                decay_steps=self.decay_steps,
                freeze_classifier_after_burnin=True,
                burnin=self.burnin,
            )

        elif self.agent_type == "continuous":
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                freeze_classifier_after_burnin=False,
                decay_steps=self.decay_steps,
                burnin=self.burnin,
            )
        else:
            agent = college_admission_jury.FixedJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=self.agent_threshold,
                epsilon_greedy=self.epsilon_greedy,
                decay_steps=self.decay_steps,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
            )

        return env, agent