def test_features_manipulated_to_maximum_limit_no_control_epsilon_greedy(
         self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             'num_applicants': 5,
             'gaming_control': np.inf,
             'group_cost': {
                 0: 2,
                 1: 4,
             }
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         gaming_control=env.initial_params.gaming_control,
         epsilon_greedy=True,
         initial_epsilon_prob=0.2)
     observations = {
         'test_scores_y': np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]),
         'selected_applicants': np.asarray([0, 1, 0, 1, 1]),
         'selected_ground_truth': np.asarray([2, 0, 2, 1, 1]),
         'applicant_groups': np.asarray([0, 1, 1, 0, 1])
     }
     self.assertTrue(
         np.all(
             np.isclose(
                 agent._get_maximum_manipulated_features(observations),
                 [0.5, 0.9, 0.6],
                 atol=1e-4)))
 def test_features_manipulated_to_maximum_limit_with_no_control(self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             'num_applicants': 5,
             'gaming_control': np.inf,
             'group_cost': {
                 0: 2,
                 1: 4
             }
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost)
     observations = {
         'test_scores_y': np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]),
         'selected_applicants': np.asarray([0, 1, 0, 1, 1]),
         'selected_ground_truth': np.asarray([2, 0, 2, 1, 1]),
         'applicant_groups': np.asarray([0, 1, 1, 0, 1])
     }
     agent.act(observations, done=False)
     self.assertTrue(
         np.all(
             np.isclose(
                 agent._get_maximum_manipulated_features(observations),
                 [0.55, 1.0, 0.65],
                 atol=1e-4)))
     self.assertEqual(agent._features,
                      agent._get_maximum_manipulated_features(observations))
Ejemplo n.º 3
0
    def build_scenario(self):
        """Returns agent and env according to provided params."""
        env = college_admission.CollegeAdmissionsEnv(
            user_params=self.env_config)

        if self.agent_type == 'robust':
            agent = college_admission_jury.RobustJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                group_cost=env.initial_params.group_cost,
                subsidize=env.initial_params.subsidize,
                subsidy_beta=env.initial_params.subsidy_beta,
                gaming_control=env.initial_params.gaming_control,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                decay_steps=self.decay_steps,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                burnin=self.burnin)

        elif self.agent_type == 'static':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                decay_steps=self.decay_steps,
                freeze_classifier_after_burnin=True,
                burnin=self.burnin)

        elif self.agent_type == 'continuous':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                freeze_classifier_after_burnin=False,
                decay_steps=self.decay_steps,
                burnin=self.burnin)
        else:
            agent = college_admission_jury.FixedJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=self.agent_threshold,
                epsilon_greedy=self.epsilon_greedy,
                decay_steps=self.decay_steps,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate)

        return env, agent
 def test_robust_classifier_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         burnin=10)
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
    def test_assertion_raised_when_burnin_less_than_2(self):
        env = college_admission.CollegeAdmissionsEnv()

        with self.assertRaises(ValueError):
            college_admission_jury.RobustJury(
                action_space=env.action_space,
                observation_space=env.observation_space,
                reward_fn=(lambda x: 0),
                group_cost=env.initial_params.group_cost,
                burnin=1)
    def test_correct_robust_threshold_returned(self):
        env = college_admission.CollegeAdmissionsEnv()

        agent = college_admission_jury.RobustJury(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=(lambda x: 0),
            group_cost=env.initial_params.group_cost)
        agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8]
        agent._labels = [0, 0, 1, 0, 0, 1, 1, 1]
        agent._train_model()
        self.assertEqual(agent._threshold, 0.6)
 def test_correct_max_score_change_calculated_with_subsidy(self):
     """Tests that the max gaming steps gives output as expected."""
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             'group_cost': {
                 0: 2,
                 1: 4
             },
             'subsidize': True,
             'subsidy_beta': 0.8,
             'gaming_control': np.inf
         })
     agent = college_admission_jury.RobustJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         group_cost=env.initial_params.group_cost,
         subsidize=env.initial_params.subsidize,
         subsidy_beta=env.initial_params.subsidy_beta,
         gaming_control=env.initial_params.gaming_control)
     obs, _, _, _ = env.step(agent.initial_action())
     max_change = agent._get_max_allowed_score_change(obs)
     self.assertEqual(max_change, [0.5, 0.3125])