def test_features_manipulated_to_maximum_limit_with_no_control(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 5, "gaming_control": np.inf, "group_cost": { 0: 2, 1: 4 } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, ) observations = { "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), "selected_applicants": np.asarray([0, 1, 0, 1, 1]), "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]), "applicant_groups": np.asarray([0, 1, 1, 0, 1]), } agent.act(observations, done=False) self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.55, 1.0, 0.65], atol=1e-4, ))) self.assertEqual(agent._features, agent._get_maximum_manipulated_features(observations))
def test_agent_returns_correct_threshold(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "gaming": False, "subsidize": False, "noise_params": params.BoundedGaussian(max=0.3, min=0, sigma=0, mu=0.1), "feature_params": params.GMM( mix_weight=[0.5, 0.5], mu=[0.5, 0.5], sigma=[0.1, 0.1]), }) agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0, burnin=9, freeze_classifier_after_burnin=True, ) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) learned_threshold = env.history[-1].action["threshold"] self.assertTrue(np.isclose(learned_threshold, 0.55, atol=1e-2))
def test_manipulate_features_no_max_control(self): """Tests that features are manipulated as expected no gaming control.""" env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 6, "gaming": True, "gaming_control": np.inf, "noise_params": params.BoundedGaussian(max=0, mu=0, min=0, sigma=0), "group_cost": {0: 3, 1: 4}, } ) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) env.set_scalar_reward(agent.reward_fn) action = agent.initial_action() env.step(action) env.state.test_scores_x = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9] env.state.applicant_groups = [0, 1, 1, 1, 0, 0] env.state.true_eligible = [0, 0, 1, 1, 0, 1] expected_changed_scores = [0.1, 0.3, 0.8, 0.8, 0.8, 0.9] expected_individual_burden = self._return_individual_burden(env, agent) changed_scores, individual_burden = env._manipulate_features(env.state, action) self.assertTrue(np.all(np.isclose(expected_changed_scores, changed_scores, atol=1e-4))) self.assertTrue( np.all(np.isclose(individual_burden, expected_individual_burden, atol=1e-4)) )
def test_features_manipulated_to_maximum_limit_no_control_epsilon_greedy( self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 5, "gaming_control": np.inf, "group_cost": { 0: 2, 1: 4, } }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, gaming_control=env.initial_params.gaming_control, epsilon_greedy=True, initial_epsilon_prob=0.2, ) observations = { "test_scores_y": np.asarray([0.2, 0.3, 0.4, 0.5, 0.4]), "selected_applicants": np.asarray([0, 1, 0, 1, 1]), "selected_ground_truth": np.asarray([2, 0, 2, 1, 1]), "applicant_groups": np.asarray([0, 1, 1, 0, 1]), } self.assertTrue( np.all( np.isclose( agent._get_maximum_manipulated_features(observations), [0.5, 0.9, 0.6], atol=1e-4, )))
def error_raised_when_noise_params_wrong(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "noise_params": params.BoundedGaussian(min=0, max=0.3, mu=0, sigma=0.00001), } ) with self.assertRaises(ValueError): env._add_noise()
def test_cost_fn_does_not_subsidize_cost_for_group_1_with_no_subsidy(self): env = college_admission.CollegeAdmissionsEnv( user_params={"subsidize": False, "group_cost": {0: 3, 1: 4}} ) group_1_cost = env._cost_function(0.8, 1) group_0_cost = env._cost_function(0.8, 0) self.assertEqual(group_0_cost, 0.8 * 3) self.assertEqual(group_1_cost, 0.8 * 4)
def test_cost_fn_subsidies_cost_for_group_1_with_subsidy(self): """Test for groupwise cost function with and without subsidies.""" env = college_admission.CollegeAdmissionsEnv( user_params={"subsidize": True, "group_cost": {0: 3, 1: 4}, "subsidy_beta": 0.6} ) group_0_cost = env._cost_function(0.8, 0) group_1_cost = env._cost_function(0.8, 1) self.assertEqual(group_0_cost, 0.8 * 3) self.assertEqual(group_1_cost, 0.8 * 0.6 * 4)
def test_fixed_agent_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_one_sided_noise_generated_correctly(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 4, "noise_params": params.BoundedGaussian(min=0, max=0.3, mu=0.2, sigma=0.00001), } ) noise = env._add_noise(env.state.rng) self.assertTrue(np.isclose(0.2, noise, atol=1e-3))
def test_robust_classifier_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=10, ) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_jury_successfully_initializes(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) self.assertEqual(agent.initial_action()["threshold"], 0.7) self.assertEqual(agent.initial_action()["epsilon_prob"], 0)
def test_agent_raises_invalid_observation_error(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) with self.assertRaises(core.InvalidObservationError): agent.act(observation={0: "Invalid Observation"}, done=False)
def test_assertion_raised_when_burnin_less_than_2(self): env = college_admission.CollegeAdmissionsEnv() with self.assertRaises(ValueError): college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=1, )
def test_correct_robust_threshold_returned(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, ) agent._features = [0.1, 0.2, 0.4, 0.4, 0.5, 0.6, 0.7, 0.8] agent._labels = [0, 0, 1, 0, 0, 1, 1, 1] agent._train_model() self.assertEqual(agent._threshold, 0.6)
def test_is_done_when_max_steps_reached(self): env = college_admission.CollegeAdmissionsEnv( user_params={"num_applicants": 4, "max_steps": 8} ) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) with self.assertRaises(core.EpisodeDoneError): test_util.run_test_simulation(agent=agent, env=env, stackelberg=True) self.assertEqual(env.state.steps, 9)
def test_agent_produces_zero_no_epsilon_greedy(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=False, ) epsilon_probs = [ agent.initial_action()["epsilon_prob"] for _ in range(10) ] self.assertEqual(epsilon_probs, [0] * 10)
def test_agent_raises_episode_done_error(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) with self.assertRaises(core.EpisodeDoneError): agent.act(observation={ "threshold": np.array(0.5), "epsilon_prob": np.array(0) }, done=True)
def test_agent_produces_different_epsilon_with_epsilon_greedy(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=True, ) obs, _, done, _ = env.step(agent.initial_action()) epsilon_probs = [float(agent.initial_action()["epsilon_prob"])] epsilon_probs.extend( [float(agent.act(obs, done)["epsilon_prob"]) for _ in range(10)]) self.assertGreater(len(set(epsilon_probs)), 1)
def feature_noise_propagates_to_labels(self): env = college_admission.CollegeAdmissionsEnv( user_params={ "num_applicants": 10, "noise_params": params.BoundedGaussian(min=0.5, max=0.5, mu=0, sigma=1), } ) env.state.rng = np.random.RandomState(seed=100) env._sample_next_state_vars(env.state) scores = np.array(env.state.test_scores_x) eligible = np.array(env.state.true_eligible) # Check that at least one "eligible" candidate has a lower score than an # ineligible one. self.assertLess(np.min(scores[eligible == 1]), np.max(scores[eligible == 0]))
def test_label_fn_returns_correct_labels(self): """Checks that the label function works as expected.""" observations = { "test_scores_y": [0.2, 0.3, 0.4, 0.5, 0.6], "selected_ground_truth": [1, 0, 2, 1, 2], "selected_applicants": [1, 1, 0, 1, 0], } env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) labels = agent._label_fn(observations) self.assertListEqual(labels, [1, 0, 1])
def test_get_default_features_returns_same_features(self): """Checks that the feature selection fn works as expected.""" observations = { "test_scores_y": [0.2, 0.3, 0.4, 0.5, 0.6], "selected_ground_truth": [1, 0, 2, 1, 2], "selected_applicants": [1, 1, 0, 1, 0], } env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, ) features = agent._get_default_features(observations) self.assertListEqual(features, [0.2, 0.3, 0.5])
def test_unmanipualted_features_not_noisified_when_noisy_features_off(self): env = college_admission.CollegeAdmissionsEnv( user_params={"gaming": False, "noisy_features": False} ) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) action = agent.initial_action() env.step(action) self.assertTrue( ( np.array(env.state.original_test_scores) - np.array(env.state.test_scores_x) == 0 ).all() )
def test_select_candidates(self): """Tests predictions by jury, given modified scores are as expected.""" env = college_admission.CollegeAdmissionsEnv(user_params={"num_applicants": 4}) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) env.set_scalar_reward(agent.reward_fn) action = agent.initial_action() _ = env.step(action) env.state.test_scores_y = [0.1, 0.9, 0.8, 0.79] env.state.true_eligible = [0, 1, 0, 1] predictions, selected_ground_truth = env._select_candidates(env.state, action) self.assertEqual(list(predictions), [0, 1, 1, 0]) self.assertEqual(list(selected_ground_truth), [2, 1, 0, 2])
def test_agent_returns_same_threshold_till_burnin_learns_and_freezes(self): """Tests that agent returns same threshold till burnin and freezes after.""" env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.3, burnin=4, freeze_classifier_after_burnin=True, ) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) actions = [float(action["threshold"]) for _, action in env.history] self.assertEqual(set(actions[:4]), {0.3}) self.assertLen(set(actions), 3)
def test_epsilon_prob_decays_as_expected(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=True, initial_epsilon_prob=0.3, decay_steps=5, epsilon_prob_decay_rate=0.001, ) obs, _, done, _ = env.step(agent.initial_action()) epsilon_probs = [float(agent.initial_action()["epsilon_prob"])] epsilon_probs.extend( [float(agent.act(obs, done)["epsilon_prob"]) for _ in range(2)]) self.assertTrue( np.all(np.isclose(epsilon_probs, [0.3, 0.0753, 0.0189], atol=1e-2)))
def test_manipulate_features_no_gaming(self): """Test features are not manipulated when gaming is off.""" env = college_admission.CollegeAdmissionsEnv( user_params={"num_applicants": 6, "gaming": False, "group_cost": {0: 3, 1: 4}} ) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) action = agent.initial_action() _, _, _, _ = env.step(action) env.state.test_scores_x = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9] env.state.applicant_groups = [0, 1, 1, 1, 0, 0] env.state.true_eligible = [0, 0, 1, 1, 0, 1] expected_changed_scores = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9] expected_individual_burden = [0] * env.state.params.num_applicants changed_scores, individual_burden = env._manipulate_features(env.state, action) self.assertTrue(np.all(np.isclose(expected_changed_scores, changed_scores, atol=1e-4))) self.assertTrue( np.all(np.isclose(individual_burden, expected_individual_burden, atol=1e-4)) )
def test_correct_max_score_change_calculated_with_subsidy(self): """Tests that the max gaming steps gives output as expected.""" env = college_admission.CollegeAdmissionsEnv( user_params={ "group_cost": { 0: 2, 1: 4 }, "subsidize": True, "subsidy_beta": 0.8, "gaming_control": np.inf, }) agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, ) obs, _, _, _ = env.step(agent.initial_action()) max_change = agent._get_max_allowed_score_change(obs) self.assertEqual(max_change, [0.5, 0.3125])
def build_scenario(self): """Returns agent and env according to provided params.""" env = college_admission.CollegeAdmissionsEnv( user_params=self.env_config) if self.agent_type == "robust": agent = college_admission_jury.RobustJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, decay_steps=self.decay_steps, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, burnin=self.burnin, ) elif self.agent_type == "static": agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, decay_steps=self.decay_steps, freeze_classifier_after_burnin=True, burnin=self.burnin, ) elif self.agent_type == "continuous": agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, freeze_classifier_after_burnin=False, decay_steps=self.decay_steps, burnin=self.burnin, ) else: agent = college_admission_jury.FixedJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=self.agent_threshold, epsilon_greedy=self.epsilon_greedy, decay_steps=self.decay_steps, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, ) return env, agent
def test_parties_can_interact_no_gaming(self): """Test stackelberg simulation with no gaming.""" env = college_admission.CollegeAdmissionsEnv(user_params={"gaming": False}) agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) test_util.run_test_simulation(agent=agent, env=env, stackelberg=True)
def test_invalid_noise_dist_raises_error(self): with self.assertRaises(ValueError): college_admission.CollegeAdmissionsEnv(user_params={"noise_dist": "random"})