Python RandomAgent Examples, ml_gym.agents.random_agents.RandomAgent Python Examples

Example #1

0

Show file

    def test_disease_does_not_progress_without_contact(self):
        num_steps = 10

        # Set up an environment with some infected people but no contact between
        # members of the population.
        graph = nx.Graph()
        graph.add_nodes_from(range(50))
        env = infectious_disease.build_si_model(
            population_graph=graph,
            infection_probability=0.5,
            num_treatments=0,
            max_treatments=10,
            initial_health_state=[0 for _ in graph],
        )
        agent = random_agents.RandomAgent(env.action_space, lambda x: 0,
                                          env.observation_space)
        initial_health_state = get_population_health_state(env.state)

        # Run the simulation and ensure that the population's health state does
        # change (because there's no opportunity for disease to spread due to
        # the abscence of contact between people).
        test_util.run_test_simulation(env=env,
                                      agent=agent,
                                      num_steps=num_steps)
        final_health_state = get_population_health_state(env.state)
        self.assertEqual(initial_health_state, final_health_state)

Example #2

0

Show file

    def test_disease_progresses_with_contact_si(self):
        num_steps = 10
        population_size = 5

        # Set up a population that is well-connected (here, totally connected).
        graph = nx.Graph()
        graph.add_nodes_from(range(population_size))
        graph.add_edges_from(complete_graph_edge_list(population_size))
        env = infectious_disease.build_si_model(
            population_graph=graph,
            infection_probability=1.0,
            num_treatments=0,
            max_treatments=10,
            initial_health_state=[
                0 if i % 2 == 0 else 1 for i in range(graph.number_of_nodes())
            ],
        )
        agent = random_agents.RandomAgent(env.action_space, lambda x: 0,
                                          env.observation_space)
        initial_state = copy.deepcopy(env.state)

        # Ensure that there are more infected people after running the simulation
        # for some time.
        test_util.run_test_simulation(env=env,
                                      agent=agent,
                                      num_steps=num_steps)
        self.assertGreater(
            num_in_health_state(env.state,
                                env.state_name_to_index["infected"]),
            num_in_health_state(initial_state,
                                env.state_name_to_index["infected"]),
        )

Example #3

0

Show file

File: core_test.py Project: olliethomas/ml-fairness-gym

 def test_episode_done_raises_error(self):
     env = test_util.DummyEnv()
     agent = random_agents.RandomAgent(env.action_space, None,
                                       env.observation_space)
     obs = env.reset()
     with self.assertRaises(core.EpisodeDoneError):
         agent.act(obs, done=True)

Example #4

0

Show file

 def test_manipulate_features_no_max_control(self):
     """Tests that features are manipulated as expected no gaming control."""
     env = college_admission.CollegeAdmissionsEnv(
         user_params={
             "num_applicants": 6,
             "gaming": True,
             "gaming_control": np.inf,
             "noise_params": params.BoundedGaussian(max=0, mu=0, min=0, sigma=0),
             "group_cost": {0: 3, 1: 4},
         }
     )
     agent = random_agents.RandomAgent(
         env.action_space,
         None,
         env.observation_space,
         default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)},
     )
     env.set_scalar_reward(agent.reward_fn)
     action = agent.initial_action()
     env.step(action)
     env.state.test_scores_x = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9]
     env.state.applicant_groups = [0, 1, 1, 1, 0, 0]
     env.state.true_eligible = [0, 0, 1, 1, 0, 1]
     expected_changed_scores = [0.1, 0.3, 0.8, 0.8, 0.8, 0.9]
     expected_individual_burden = self._return_individual_burden(env, agent)
     changed_scores, individual_burden = env._manipulate_features(env.state, action)
     self.assertTrue(np.all(np.isclose(expected_changed_scores, changed_scores, atol=1e-4)))
     self.assertTrue(
         np.all(np.isclose(individual_burden, expected_individual_burden, atol=1e-4))
     )

Example #5

0

Show file

File: random_agents_test.py Project: olliethomas/ml-fairness-gym

 def test_can_run_with_env(self):
     env = test_util.DummyEnv()
     agent = random_agents.RandomAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=None)
     test_util.run_test_simulation(env=env, agent=agent)

Example #6

0

Show file

File: distribution_comparison_metrics_test.py Project: olliethomas/ml-fairness-gym

    def test_measure(self):
        params = attention_allocation.Params()
        params.incident_rates = [4.0, 2.0]
        params.attention_replacement = True
        env = attention_allocation.LocationAllocationEnv(params)
        env = attention_allocation.LocationAllocationEnv(params)
        env.seed(100)
        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)

        agent.seed(100)
        observation = env.reset()
        done = False
        for _ in range(250):
            action = agent.act(observation, done)
            observation, _, done, _ = env.step(action)

        metric = distribution_comparison_metrics.DistributionComparisonMetric(
            env, "incidents_seen", 250)
        state_dist, action_dist, distance = metric.measure(env)

        expected_state_dist = env.state.params.incident_rates / np.sum(
            env.state.params.incident_rates)
        # Expected action distribution is uniform because RandomAgent is random.
        expected_action_dist = [0.5, 0.5]
        expected_distance = np.linalg.norm(expected_state_dist -
                                           expected_action_dist)

        self.assertTrue(
            np.all(np.isclose(state_dist, expected_state_dist, atol=0.05)))
        self.assertTrue(
            np.all(np.isclose(action_dist, expected_action_dist, atol=0.05)))
        self.assertTrue(np.isclose(distance, expected_distance, atol=0.1))

Example #7

0

Show file

 def test_is_done_when_max_steps_reached(self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={"num_applicants": 4, "max_steps": 8}
     )
     agent = random_agents.RandomAgent(
         env.action_space,
         None,
         env.observation_space,
         default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)},
     )
     with self.assertRaises(core.EpisodeDoneError):
         test_util.run_test_simulation(agent=agent, env=env, stackelberg=True)
     self.assertEqual(env.state.steps, 9)

Example #8

0

Show file

File: core_test.py Project: olliethomas/ml-fairness-gym

    def test_metric_realigns_history(self):
        env = test_util.DummyEnv()
        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)
        env.set_scalar_reward(agent.reward_fn)

        def realign_fn(history):
            return [(1, action) for _, action in history]

        metric = test_util.DummyMetric(env, realign_fn=realign_fn)
        _ = test_util.run_test_simulation(env, agent, metric)
        history = metric._extract_history(env)
        self.assertCountEqual([1] * 10, [state for state, _ in history])

Example #9

0

Show file

 def test_unmanipualted_features_not_noisified_when_noisy_features_off(self):
     env = college_admission.CollegeAdmissionsEnv(
         user_params={"gaming": False, "noisy_features": False}
     )
     agent = random_agents.RandomAgent(
         env.action_space,
         None,
         env.observation_space,
         default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)},
     )
     action = agent.initial_action()
     env.step(action)
     self.assertTrue(
         (
             np.array(env.state.original_test_scores) - np.array(env.state.test_scores_x) == 0
         ).all()
     )

Example #10

0

Show file

 def test_select_candidates(self):
     """Tests predictions by jury, given modified scores are as expected."""
     env = college_admission.CollegeAdmissionsEnv(user_params={"num_applicants": 4})
     agent = random_agents.RandomAgent(
         env.action_space,
         None,
         env.observation_space,
         default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)},
     )
     env.set_scalar_reward(agent.reward_fn)
     action = agent.initial_action()
     _ = env.step(action)
     env.state.test_scores_y = [0.1, 0.9, 0.8, 0.79]
     env.state.true_eligible = [0, 1, 0, 1]
     predictions, selected_ground_truth = env._select_candidates(env.state, action)
     self.assertEqual(list(predictions), [0, 1, 1, 0])
     self.assertEqual(list(selected_ground_truth), [2, 1, 0, 2])

Example #11

0

Show file

File: core_test.py Project: olliethomas/ml-fairness-gym

    def test_metric_multiple(self):
        env = attention_allocation.LocationAllocationEnv()
        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)

        env.seed(100)
        observation = env.reset()
        done = False

        for _ in range(2):
            action = agent.act(observation, done)
            observation, _, done, _ = env.step(action)

        metric1 = core.Metric(env)
        metric2 = core.Metric(env)

        history1 = metric1._extract_history(env)
        history2 = metric2._extract_history(env)
        self.assertEqual(history1, history2)

Example #12

0

Show file

File: distribution_comparison_metrics_test.py Project: olliethomas/ml-fairness-gym

    def test_error_on_scalar(self):
        """Test confirms an error is raised when an actions are scalars."""
        env = test_util.DummyEnv()
        env.seed(100)

        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)
        agent.seed(100)

        observation = env.reset()
        done = False
        for _ in range(2):
            action = agent.act(observation, done)
            observation, _, done, _ = env.step(action)

        metric = distribution_comparison_metrics.DistributionComparisonMetric(
            env, "x", 100)
        with self.assertRaises(ValueError):
            metric.measure(env)

Example #13

0

Show file

File: attention_allocation_test.py Project: olliethomas/ml-fairness-gym

    def test_update_state(self):
        """Check that state is correctly updated with incidents_seen.

    This tests checks that numbers of incidents_seen are no more than the
    incidents  generated and the attention deployed as specified in the action,
    if allocating without attention replacement.
    """
        env = attention_allocation.LocationAllocationEnv()
        env.seed(0)
        agent = random_agents.RandomAgent(env.action_space, None, env.observation_space)
        observation = env.reset()
        action = agent.act(observation, False)
        crimes, reported_incidents = attention_allocation._sample_incidents(
            env.state.rng, env.state.params
        )
        attention_allocation._update_state(env.state, crimes, reported_incidents, action)
        incidents_seen = env.state.incidents_seen
        self.assertTrue((incidents_seen <= crimes).all())
        if not env.state.params.attention_replacement:
            self.assertTrue((incidents_seen <= action).all())

Example #14

0

Show file

 def test_manipulate_features_no_gaming(self):
     """Test features are not manipulated when gaming is off."""
     env = college_admission.CollegeAdmissionsEnv(
         user_params={"num_applicants": 6, "gaming": False, "group_cost": {0: 3, 1: 4}}
     )
     agent = random_agents.RandomAgent(
         env.action_space,
         None,
         env.observation_space,
         default_action={"threshold": np.array(0.8), "epsilon_prob": np.array(0)},
     )
     action = agent.initial_action()
     _, _, _, _ = env.step(action)
     env.state.test_scores_x = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9]
     env.state.applicant_groups = [0, 1, 1, 1, 0, 0]
     env.state.true_eligible = [0, 0, 1, 1, 0, 1]
     expected_changed_scores = [0.1, 0.3, 0.6, 0.7, 0.7, 0.9]
     expected_individual_burden = [0] * env.state.params.num_applicants
     changed_scores, individual_burden = env._manipulate_features(env.state, action)
     self.assertTrue(np.all(np.isclose(expected_changed_scores, changed_scores, atol=1e-4)))
     self.assertTrue(
         np.all(np.isclose(individual_burden, expected_individual_burden, atol=1e-4))
     )

Example #15

0

Show file

 def test_parties_can_interact_no_gaming(self):
     """Test stackelberg simulation with no gaming."""
     env = college_admission.CollegeAdmissionsEnv(user_params={"gaming": False})
     agent = random_agents.RandomAgent(env.action_space, None, env.observation_space)
     test_util.run_test_simulation(agent=agent, env=env, stackelberg=True)