def test_fixed_agent_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def build_scenario(self): """Returns agent and env according to provided params.""" env = college_admission.CollegeAdmissionsEnv( user_params=self.env_config) if self.agent_type == 'robust': agent = college_admission_jury.RobustJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, group_cost=env.initial_params.group_cost, subsidize=env.initial_params.subsidize, subsidy_beta=env.initial_params.subsidy_beta, gaming_control=env.initial_params.gaming_control, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, decay_steps=self.decay_steps, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, burnin=self.burnin) elif self.agent_type == 'static': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, decay_steps=self.decay_steps, freeze_classifier_after_burnin=True, burnin=self.burnin) elif self.agent_type == 'continuous': agent = college_admission_jury.NaiveJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=0, epsilon_greedy=self.epsilon_greedy, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate, freeze_classifier_after_burnin=False, decay_steps=self.decay_steps, burnin=self.burnin) else: agent = college_admission_jury.FixedJury( action_space=env.action_space, reward_fn=(lambda x: 0), observation_space=env.observation_space, threshold=self.agent_threshold, epsilon_greedy=self.epsilon_greedy, decay_steps=self.decay_steps, initial_epsilon_prob=self.initial_epsilon_prob, epsilon_prob_decay_rate=self.epsilon_prob_decay_rate) return env, agent
def test_agent_raises_invalid_observation_error(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) with self.assertRaises(core.InvalidObservationError): agent.act(observation={0: 'Invalid Observation'}, done=False)
def test_agent_produces_zero_no_epsilon_greedy(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=False) epsilon_probs = [ agent.initial_action()['epsilon_prob'] for _ in range(10) ] self.assertEqual(epsilon_probs, [0] * 10)
def test_agent_produces_different_epsilon_with_epsilon_greedy(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=True) obs, _, done, _ = env.step(agent.initial_action()) epsilon_probs = [float(agent.initial_action()['epsilon_prob'])] epsilon_probs.extend( [float(agent.act(obs, done)['epsilon_prob']) for _ in range(10)]) self.assertGreater(len(set(epsilon_probs)), 1)
def test_agent_raises_episode_done_error(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) with self.assertRaises(core.EpisodeDoneError): agent.act(observation={ 'threshold': np.array(0.5), 'epsilon_prob': np.array(0) }, done=True)
def test_epsilon_prob_decays_as_expected(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7, epsilon_greedy=True, initial_epsilon_prob=0.3, decay_steps=5, epsilon_prob_decay_rate=0.001) obs, _, done, _ = env.step(agent.initial_action()) epsilon_probs = [float(agent.initial_action()['epsilon_prob'])] epsilon_probs.extend( [float(agent.act(obs, done)['epsilon_prob']) for _ in range(2)]) self.assertTrue( np.all(np.isclose(epsilon_probs, [0.3, 0.0753, 0.0189], atol=1e-2)))