Esempio n. 1
0
    def build_scenario(self):
        """Returns agent and env according to provided params."""
        env = college_admission.CollegeAdmissionsEnv(
            user_params=self.env_config)

        if self.agent_type == 'robust':
            agent = college_admission_jury.RobustJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                group_cost=env.initial_params.group_cost,
                subsidize=env.initial_params.subsidize,
                subsidy_beta=env.initial_params.subsidy_beta,
                gaming_control=env.initial_params.gaming_control,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                decay_steps=self.decay_steps,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                burnin=self.burnin)

        elif self.agent_type == 'static':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                decay_steps=self.decay_steps,
                freeze_classifier_after_burnin=True,
                burnin=self.burnin)

        elif self.agent_type == 'continuous':
            agent = college_admission_jury.NaiveJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=0,
                epsilon_greedy=self.epsilon_greedy,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate,
                freeze_classifier_after_burnin=False,
                decay_steps=self.decay_steps,
                burnin=self.burnin)
        else:
            agent = college_admission_jury.FixedJury(
                action_space=env.action_space,
                reward_fn=(lambda x: 0),
                observation_space=env.observation_space,
                threshold=self.agent_threshold,
                epsilon_greedy=self.epsilon_greedy,
                decay_steps=self.decay_steps,
                initial_epsilon_prob=self.initial_epsilon_prob,
                epsilon_prob_decay_rate=self.epsilon_prob_decay_rate)

        return env, agent
 def test_agent_returns_correct_threshold(self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             'gaming':
             False,
             'subsidize':
             False,
             'noise_params':
             params.BoundedGaussian(max=0.3, min=0, sigma=0, mu=0.1),
             'feature_params':
             params.GMM(
                 mix_weight=[0.5, 0.5], mu=[0.5, 0.5], sigma=[0.1, 0.1])
         })
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0,
         burnin=9,
         freeze_classifier_after_burnin=True)
     test_util.run_test_simulation(env=env,
                                   agent=agent,
                                   num_steps=10,
                                   stackelberg=True)
     learned_threshold = env.history[-1].action['threshold']
     self.assertTrue(np.isclose(learned_threshold, 0.55, atol=1e-2))
 def test_simple_classifier_simulation_runs_successfully(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
 def test_jury_successfully_initializes(self):
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     self.assertEqual(agent.initial_action()['threshold'], 0.7)
     self.assertEqual(agent.initial_action()['epsilon_prob'], 0)
 def test_label_fn_returns_correct_labels(self):
     """Checks that the label function works as expected."""
     observations = {
         'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6],
         'selected_ground_truth': [1, 0, 2, 1, 2],
         'selected_applicants': [1, 1, 0, 1, 0]
     }
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     labels = agent._label_fn(observations)
     self.assertListEqual(labels, [1, 0, 1])
 def test_get_default_features_returns_same_features(self):
     """Checks that the feature selection fn works as expected."""
     observations = {
         'test_scores_y': [0.2, 0.3, 0.4, 0.5, 0.6],
         'selected_ground_truth': [1, 0, 2, 1, 2],
         'selected_applicants': [1, 1, 0, 1, 0]
     }
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.7)
     features = agent._get_default_features(observations)
     self.assertListEqual(features, [0.2, 0.3, 0.5])
 def test_agent_returns_same_threshold_till_burnin_learns_and_freezes(self):
     """Tests that agent returns same threshold till burnin and freezes after."""
     env = college_admission.CollegeAdmissionsEnv()
     agent = college_admission_jury.NaiveJury(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=(lambda x: 0),
         threshold=0.3,
         burnin=4,
         freeze_classifier_after_burnin=True)
     test_util.run_test_simulation(env=env,
                                   agent=agent,
                                   num_steps=10,
                                   stackelberg=True)
     actions = [float(action['threshold']) for _, action in env.history]
     self.assertEqual(set(actions[:4]), {0.3})
     self.assertLen(set(actions), 3)