def test_features_manipulated_to_maximum_limit_no_control_epsilon_greedy( self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 5, "gaming_control": np.inf, "group_cost": { 0: 2, 1: 4, } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, gaming_control=env.initial_params.gaming_control, epsilon_greedy=True, initial_epsilon_prob=0.2, ) observations = { "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), "selected_applicants": np.asarray([0, 1, 0, 1, 1]), "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]), "applicant_groups": np.asarray([0, 1, 1, 0, 1]), } self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.5, 0.9, 0.6], atol=1e-4, )))
def test_features_manipulated_to_maximum_limit_with_no_control(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 5, "gaming_control": np.inf, "group_cost": { 0: 2, 1: 4 } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, ) observations = { "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), "selected_applicants": np.asarray([0, 1, 0, 1, 1]), "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]), "applicant_groups": np.asarray([0, 1, 1, 0, 1]), } agent.act(observations, done=False) self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.55, 1.0, 0.65], atol=1e-4, ))) self.assertEqual(agent._features, agent._get_maximum_manipulated_features(observations))
def test_robust_classifier_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=10, ) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_assertion_raised_when_burnin_less_than_2(self): env = college_admission.CollegeAdmissionsEnv() with self.assertRaises(ValueError): college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=1, )
def test_correct_robust_threshold_returned(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, ) agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8] agent._labels = [0, 0, 1, 0, 0, 1, 1, 1] agent._train_model() self.assertEqual(agent._threshold, 0.6)
def test_correct_max_score_change_calculated_with_subsidy(self): """Tests that the max gaming steps gives output as expected.""" env = college_admission.CollegeAdmissionsEnv( user_params={ "group_cost": { 0: 2, 1: 4 }, "subsidize": True, "subsidy_beta": 0.8, "gaming_control": np.inf, }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, ) obs, _, _, _ = env.step(agent.initial_action()) max_change = agent._get_max_allowed_score_change(obs) self.assertEqual(max_change, [0.5, 0.3125])
def build_scenario(self): """Returns agent and env according to provided params.""" env = college_admission.CollegeAdmissionsEnv( user_params=self.env_config) if self.agent_type == "robust": agent = college_admission_jury.RobustJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, decay_steps=self.decay_steps, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, burnin=self.burnin, ) elif self.agent_type == "static": agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, decay_steps=self.decay_steps, freeze_classifier_after_burnin=True, burnin=self.burnin, ) elif self.agent_type == "continuous": agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, freeze_classifier_after_burnin=False, decay_steps=self.decay_steps, burnin=self.burnin, ) else: agent = college_admission_jury.FixedJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=self.agent_threshold, epsilon_greedy=self.epsilon_greedy, decay_steps=self.decay_steps, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, ) return env, agent