def test_agent_returns_correct_threshold(self): env = college_admission.CollegeAdmissionsEnv( user_params={ 'gaming': False, 'subsidize': False, 'noise_params': params.BoundedGaussian(max=0.3, min=0, sigma=0, mu=0.1), 'feature_params': params.GMM( mix_weight=[0.5, 0.5], mu=[0.5, 0.5], sigma=[0.1, 0.1]) }) agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0, burnin=9, freeze_classifier_after_burnin=True) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) learned_threshold = env.history[-1].action['threshold'] self.assertTrue(np.isclose(learned_threshold, 0.55, atol=1e-2))
def test_can_interact_with_attention_env(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.MLEGreedyAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None) test_util.run_test_simulation(env=env, agent=agent)
def test_disease_progresses_with_contact_si(self): num_steps = 10 population_size = 5 # Set up a population that is well-connected (here, totally connected). graph = nx.Graph() graph.add_nodes_from(range(population_size)) graph.add_edges_from(complete_graph_edge_list(population_size)) env = infectious_disease.build_si_model( population_graph=graph, infection_probability=1.0, num_treatments=0, max_treatments=10, initial_health_state=[ 0 if i % 2 == 0 else 1 for i in range(graph.number_of_nodes()) ], ) agent = random_agents.RandomAgent(env.action_space, lambda x: 0, env.observation_space) initial_state = copy.deepcopy(env.state) # Ensure that there are more infected people after running the simulation # for some time. test_util.run_test_simulation(env=env, agent=agent, num_steps=num_steps) self.assertGreater( num_in_health_state(env.state, env.state_name_to_index["infected"]), num_in_health_state(initial_state, env.state_name_to_index["infected"]), )
def test_disease_does_not_progress_without_contact(self): num_steps = 10 # Set up an environment with some infected people but no contact between # members of the population. graph = nx.Graph() graph.add_nodes_from(range(50)) env = infectious_disease.build_si_model( population_graph=graph, infection_probability=0.5, num_treatments=0, max_treatments=10, initial_health_state=[0 for _ in graph], ) agent = random_agents.RandomAgent(env.action_space, lambda x: 0, env.observation_space) initial_health_state = get_population_health_state(env.state) # Run the simulation and ensure that the population's health state does # change (because there's no opportunity for disease to spread due to # the abscence of contact between people). test_util.run_test_simulation(env=env, agent=agent, num_steps=num_steps) final_health_state = get_population_health_state(env.state) self.assertEqual(initial_health_state, final_health_state)
def test_oracle_maxutil_classifier_is_stable(self): env = lending.DelayedImpactEnv() agent_params = classifier_agents.ScoringAgentParams( feature_keys=['applicant_features'], group_key='group', default_action_fn=(lambda: 1), burnin=1, threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD, convert_one_hot_to_integer=True, cost_matrix=params.CostMatrix( fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0)) agent = oracle_lending_agent.OracleThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'), params=agent_params, env=env) test_util.run_test_simulation(env=env, agent=agent) # Drop 0 threshold associated with burn-in. first_nonzero_threshold = None for thresh in agent.global_threshold_history: if thresh > 0: if first_nonzero_threshold is None: first_nonzero_threshold = thresh self.assertAlmostEqual(first_nonzero_threshold, thresh) # Make sure there is at least one non-zero threshold. self.assertIsNotNone(first_nonzero_threshold)
def test_can_run_with_env(self): env = test_util.DummyEnv() agent = random_agents.RandomAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None) test_util.run_test_simulation(env=env, agent=agent)
def test_can_interact_with_attention_env(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.NaiveProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.VectorSumReward('incidents_seen')) test_util.run_test_simulation(env=env, agent=agent)
def test_fixed_agent_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.FixedJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.7) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_final_credit_distribution_metric_can_interact_with_lending(self): env = lending.DelayedImpactEnv() env.set_scalar_reward(rewards.NullReward()) # Use step=-1 to get the final credit distribution. final_distribution = lending_metrics.CreditDistribution(env, step=-1) initial_distribution = lending_metrics.CreditDistribution(env, step=0) test_util.run_test_simulation( env=env, metric=[final_distribution, initial_distribution])
def test_robust_classifier_simulation_runs_successfully(self): env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.RobustJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), group_cost=env.initial_params.group_cost, burnin=10) test_util.run_test_simulation(env=env, agent=agent, stackelberg=True)
def test_can_interact_with_attention_env(self): env = attention_allocation.LocationAllocationEnv() agent = allocation_agents.MLEProbabilityMatchingAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None, params=None, ) test_util.run_test_simulation(env=env, agent=agent)
def test_accuracy_metric_can_interact_with_dummy(self): def _is_zero(history_item): _, action = history_item return int(action == 0) env = test_util.DummyEnv() env.set_scalar_reward(rewards.NullReward()) metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_is_zero) test_util.run_test_simulation(env=env, metric=metric)
def test_interest_exploration_can_run_with_resampling(self): env_config = { 'num_candidates': 5, 'slate_size': 2, 'resample_documents': True, 'seed': 100, } params = recsim_wrapper.Params( recsim_env=interest_exploration.create_environment(env_config)) env = recsim_wrapper.RecsimWrapper(params) test_util.run_test_simulation(env=env, stackelberg=True)
def test_is_done_when_max_steps_reached(self): env = college_admission.CollegeAdmissionsEnv( user_params={"num_applicants": 4, "max_steps": 8} ) agent = random_agents.RandomAgent( env.action_space, None, env.observation_space, default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)}, ) with self.assertRaises(core.EpisodeDoneError): test_util.run_test_simulation(agent=agent, env=env, stackelberg=True) self.assertEqual(env.state.steps, 9)
def test_interact_with_env_replicable(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=5) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) test_util.run_test_simulation(env=env, agent=agent)
def test_interact_with_env_replicable_randomagent(self): graph = nx.karate_club_graph() centrality = nx.eigenvector_centrality(graph) sorted_nodes = sorted(centrality.keys(), key=lambda k: centrality[k], reverse=True) # Infect the 3rd through 5th most central people. initial_health_state = [ 1 if index in sorted_nodes[3:6] else 0 for index in range(len(sorted_nodes)) ] env, agent, _ = set_up_and_observe( population_graph=graph, initial_health_state=initial_health_state, agent_class=infectious_disease_agents.RandomAgent, ) test_util.run_test_simulation(env=env, agent=agent)
def test_interact_with_env_replicable(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( burnin=10, freeze_classifier_after_burnin=False, default_action_fn=env.action_space.sample, feature_keys=["x"], ) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward("x"), params=params, ) test_util.run_test_simulation(env=env, agent=agent)
def test_confusion_metric_correct_for_atomic_prediction_rule(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Always predict 1. metric = error_metrics.ConfusionMetric( env=env, prediction_fn=lambda x: 1, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1, ) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric) logging.info("Measurement: %s.", measurement) # The keys in measurement are given by group membership, which in this case # is defined to always be 1. self.assertEqual(measurement[1].fp, 5) self.assertEqual(measurement[1].tp, 5) self.assertNotIn(0, measurement)
def test_confusion_metric_correct_for_sequence_prediction_rule(self): dim = 10 def _ground_truth_fn(history_item): state, _ = history_item return state.x env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=dim)) env.set_scalar_reward(rewards.NullReward()) # Always predict a sequence of 1s. metric = error_metrics.ConfusionMetric( env=env, prediction_fn=lambda x: [1 for _ in range(dim)], ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: [1 for _ in range(dim)], ) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric) logging.info("Measurement: %s.", measurement) self.assertEqual(measurement[1].fp, 50) self.assertEqual(measurement[1].tp, 50) self.assertNotIn(0, measurement)
def test_agent_returns_same_threshold_till_burnin_learns_and_freezes(self): """Tests that agent returns same threshold till burnin and freezes after.""" env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.3, burnin=4, freeze_classifier_after_burnin=True) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) actions = [float(action['threshold']) for _, action in env.history] self.assertEqual(set(actions[:4]), {0.3}) self.assertLen(set(actions), 3)
def test_agent_returns_same_threshold_till_burnin_and_then_change(self): """Tests that agent returns same threshold till burnin without freezing.""" env = college_admission.CollegeAdmissionsEnv() agent = college_admission_jury.NaiveJury( action_space=env.action_space, observation_space=env.observation_space, reward_fn=(lambda x: 0), threshold=0.3, burnin=4, freeze_classifier_after_burnin=False, ) test_util.run_test_simulation(env=env, agent=agent, num_steps=10, stackelberg=True) actions = [float(action["threshold"]) for _, action in env.history] self.assertEqual(set(actions[:4]), {0.3}) self.assertGreater(len(set(actions)), 4)
def test_oracle_lending_agent_interacts(self): env = lending.DelayedImpactEnv() agent_params = classifier_agents.ScoringAgentParams( feature_keys=['applicant_features'], group_key='group', default_action_fn=(lambda: 1), burnin=1, convert_one_hot_to_integer=True, cost_matrix=params.CostMatrix( fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0)) agent = oracle_lending_agent.OracleThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'), params=agent_params, env=env) test_util.run_test_simulation(env=env, agent=agent)
def test_summing_metric_give_correct_sum_dummy_env(self): env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) metric = value_tracking_metrics.SummingMetric( env=env, selection_fn=_selection_fn) measurement = test_util.run_test_simulation(env, agent=None, metric=metric, seed=0) self.assertTrue(np.all(np.equal(measurement, [5])))
def test_metric_realigns_history(self): env = test_util.DummyEnv() agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) env.set_scalar_reward(agent.reward_fn) def realign_fn(history): return [(1, action) for _, action in history] metric = test_util.DummyMetric(env, realign_fn=realign_fn) _ = test_util.run_test_simulation(env, agent, metric) history = metric._extract_history(env) self.assertCountEqual([1] * 10, [state for state, _ in history])
def test_recall_with_zero_denominator(self): env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Ground truth is always 0, recall will have a zero denominator. metric = error_metrics.RecallMetric( env=env, prediction_fn=lambda x: 0, ground_truth_fn=lambda x: 0, stratify_fn=lambda x: 1) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric, num_steps=50) self.assertEqual({1: 0}, measurement)
def test_aggregate_metric_give_correct_sum_value_for_atomic_value(self): """Test aggregate metric with sum for a atomic values. Expected values: group 0 = sum([-0.2 * (1 + 0)] * 1 for 5 steps) = -2 group 1 = sum([0.3 * (1 + 1)] * 1 for 5 steps) = 1.5 """ env, metric = _setup_test_simulation(dim=1, calc_mean=False) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric, num_steps=10) logging.info('Measurement result: %s.', measurement) self.assertSequenceAlmostEqual(sorted(measurement.values()), [-2, 1.5], delta=1e-4)
def test_summing_metric_give_correct_sum_alloc_env(self): env = attention_allocation.LocationAllocationEnv() def _attn_alloc_selection_fn(step): state, _ = step return state.incidents_seen metric = value_tracking_metrics.SummingMetric( env=env, selection_fn=_attn_alloc_selection_fn) measurement = test_util.run_test_simulation(env, agent=None, metric=metric, seed=0) self.assertTrue(np.all(np.equal(measurement, [4, 5])))
def test_aggregate_metric_give_correct_mean_value_for_list(self): """Test aggregate metric with mean for a list. Expected values: group 0 = mean([-0.2 * (1 + 0)] * 10 for 5 steps) = -0.4 group 1 = mean([0.3 * (1 + 1)] * 10 for 5 steps) = 0.3 """ env, metric = _setup_test_simulation(dim=10, calc_mean=True) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric, num_steps=10) logging.info('Measurement result: %s.', measurement) self.assertSequenceAlmostEqual(sorted(measurement.values()), [-0.4, 0.3], delta=1e-4)
def test_aggregate_metric_give_correct_result_for_list_no_modifier(self): """Test aggregate metric with mean for a list with no modifier function. Expected values: group 0 = sum([0] * 10 for 5 steps) = 0 group 1 = sum([1] * 10 for 5 steps) = 50 """ env, metric = _setup_test_simulation(dim=10, calc_mean=False, modifier_fn=None) measurement = test_util.run_test_simulation(env=env, agent=None, metric=metric, num_steps=10) logging.info('Measurement result: %s.', measurement) self.assertSequenceAlmostEqual(sorted(measurement.values()), [0, 50], delta=1e-4)
def test_precision_with_zero_denominator(self): def _ground_truth_fn(history_item): state, _ = history_item return state.x[0] env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1)) env.set_scalar_reward(rewards.NullReward()) # Always predict 0, precision will have a zero denominator. metric = error_metrics.PrecisionMetric( env=env, prediction_fn=lambda x: 0, ground_truth_fn=_ground_truth_fn, stratify_fn=lambda x: 1) measurement = test_util.run_test_simulation( env=env, agent=None, metric=metric, num_steps=50) self.assertEqual({1: 0}, measurement)