Esempio n. 1
0
    def test_confusion_metric_correct_for_sequence_prediction_rule(self):
        dim = 10

        def _ground_truth_fn(history_item):
            state, _ = history_item
            return state.x

        env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=dim))
        env.set_scalar_reward(rewards.NullReward())
        # Always predict a sequence of 1s.
        metric = error_metrics.ConfusionMetric(
            env=env,
            prediction_fn=lambda x: [1 for _ in range(dim)],
            ground_truth_fn=_ground_truth_fn,
            stratify_fn=lambda x: [1 for _ in range(dim)],
        )

        measurement = test_util.run_test_simulation(env=env,
                                                    agent=None,
                                                    metric=metric)

        logging.info("Measurement: %s.", measurement)

        self.assertEqual(measurement[1].fp, 50)
        self.assertEqual(measurement[1].tp, 50)
        self.assertNotIn(0, measurement)
Esempio n. 2
0
    def test_confusion_metric_correct_for_atomic_prediction_rule(self):
        def _ground_truth_fn(history_item):
            state, _ = history_item
            return state.x[0]

        env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
        env.set_scalar_reward(rewards.NullReward())
        # Always predict 1.
        metric = error_metrics.ConfusionMetric(
            env=env,
            prediction_fn=lambda x: 1,
            ground_truth_fn=_ground_truth_fn,
            stratify_fn=lambda x: 1,
        )

        measurement = test_util.run_test_simulation(env=env,
                                                    agent=None,
                                                    metric=metric)

        logging.info("Measurement: %s.", measurement)

        # The keys in measurement are given by group membership, which in this case
        # is defined to always be 1.
        self.assertEqual(measurement[1].fp, 5)
        self.assertEqual(measurement[1].tp, 5)
        self.assertNotIn(0, measurement)
def instantiate_environment_and_agent(
    agent_class,
    population_graph,
    initial_health_state,
    infection_probability=0.5,
    num_treatments=5,
    max_treatments=10,
    seed=100,
    agent_seed=50,
):
    env = infectious_disease.build_si_model(
        population_graph=population_graph,
        infection_probability=infection_probability,
        num_treatments=num_treatments,
        initial_health_state=initial_health_state,
        max_treatments=max_treatments,
    )
    agent = agent_class(
        env.action_space,
        rewards.NullReward(),
        env.observation_space,
        infectious_disease_agents.env_to_agent_params(env.initial_params),
    )
    env.seed(seed)
    agent.seed(agent_seed)
    _ = env.reset()
    return env, agent
Esempio n. 4
0
  def __init__(
      self,
      action_space,
      reward_fn,
      observation_space,
      threshold = 0.5,
      epsilon_greedy = False,
      initial_epsilon_prob = 0.7,
      decay_steps = 10,
      epsilon_prob_decay_rate = 0.02,
  ):
    """Initializes the agent.

    Args:
     action_space: a `gym.Space` that contains valid actions.
     reward_fn: a `RewardFn` object.
     observation_space: a `gym.Space` that contains valid observations.
     threshold: Fixed threshold.
     epsilon_greedy: Bool. Whether we want this agent to follow an epsilon
       greedy policy.
     initial_epsilon_prob: Float. Initial value of probablity for an epsilon
       greedy agent.
     decay_steps: A positive integer.
     epsilon_prob_decay_rate: A positive float.
    """
    if reward_fn is None:
      reward_fn = rewards.NullReward()
    super(FixedJury, self).__init__(action_space, reward_fn, observation_space)
    self._threshold = threshold
    self._epsilon_greedy = epsilon_greedy
    self._initial_epsilon_prob = initial_epsilon_prob
    self._decay_rate = epsilon_prob_decay_rate
    self._decay_steps = decay_steps
    self._steps = 0
    self.rng = np.random.RandomState()
    def scenario_builder(self):
        """Returns an agent and environment pair."""
        graph = GRAPHS[self.graph_name]

        env = infectious_disease.build_sir_model(
            population_graph=graph,
            infection_probability=self.infection_probability,
            infected_exit_probability=self.infected_exit_probability,
            num_treatments=self.num_treatments,
            max_treatments=1,
            burn_in=self.burn_in,
            # Treatments turn susceptible people into recovered without having them
            # get sick.
            treatment_transition_matrix=np.array([[0, 0, 1], [0, 1, 0],
                                                  [0, 0, 1]]),
            # Everybody starts out healthy.
            initial_health_state=[0] * graph.number_of_nodes(),
            initial_health_state_seed=self.env_seed,
        )

        agent = self.agent_constructor(
            env.action_space,
            rewards.NullReward(),
            env.observation_space,
            params=infectious_disease_agents.env_to_agent_params(
                env.initial_params),
        )

        return env, agent
 def test_final_credit_distribution_metric_can_interact_with_lending(self):
   env = lending.DelayedImpactEnv()
   env.set_scalar_reward(rewards.NullReward())
   # Use step=-1 to get the final credit distribution.
   final_distribution = lending_metrics.CreditDistribution(env, step=-1)
   initial_distribution = lending_metrics.CreditDistribution(env, step=0)
   test_util.run_test_simulation(
       env=env, metric=[final_distribution, initial_distribution])
Esempio n. 7
0
    def __init__(self, action_space, reward_fn, observation_space, params):
        self.initial_params = copy.deepcopy(params)
        if reward_fn is None:
            reward_fn = rewards.NullReward()
        super(_BaseAgent, self).__init__(action_space, reward_fn,
                                         observation_space)

        self.rng = np.random.RandomState()
Esempio n. 8
0
    def test_accuracy_metric_can_interact_with_dummy(self):
        def _is_zero(history_item):
            _, action = history_item
            return int(action == 0)

        env = test_util.DummyEnv()
        env.set_scalar_reward(rewards.NullReward())
        metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_is_zero)
        test_util.run_test_simulation(env=env, metric=metric)
Esempio n. 9
0
def _setup_test_simulation(dim=1, calc_mean=False, modifier_fn=_modifier_fn):
    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=dim))
    env.set_scalar_reward(rewards.NullReward())
    metric = value_tracking_metrics.AggregatorMetric(
        env=env,
        modifier_fn=modifier_fn,
        selection_fn=_selection_fn,
        stratify_fn=_stratify_fn,
        calc_mean=calc_mean)
    return env, metric
Esempio n. 10
0
    def test_summing_metric_give_correct_sum_dummy_env(self):
        env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
        env.set_scalar_reward(rewards.NullReward())

        metric = value_tracking_metrics.SummingMetric(
            env=env, selection_fn=_selection_fn)
        measurement = test_util.run_test_simulation(env,
                                                    agent=None,
                                                    metric=metric,
                                                    seed=0)

        self.assertTrue(np.all(np.equal(measurement, [5])))
  def test_recall_with_zero_denominator(self):
    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
    env.set_scalar_reward(rewards.NullReward())
    # Ground truth is always 0, recall will have a zero denominator.
    metric = error_metrics.RecallMetric(
        env=env,
        prediction_fn=lambda x: 0,
        ground_truth_fn=lambda x: 0,
        stratify_fn=lambda x: 1)

    measurement = test_util.run_test_simulation(
        env=env, agent=None, metric=metric, num_steps=50)
    self.assertEqual({1: 0}, measurement)
    def test_one_hot_conversion(self):
        observation_space = gym.spaces.Dict({"x": multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=["x"],
            convert_one_hot_to_integer=True,
            threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD,
        )

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space, reward_fn=rewards.NullReward(), params=params
        )

        self.assertEqual(agent._get_features({"x": _one_hot(5)}), [5])
  def test_precision_with_zero_denominator(self):
    def _ground_truth_fn(history_item):
      state, _ = history_item
      return state.x[0]

    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
    env.set_scalar_reward(rewards.NullReward())
    # Always predict 0, precision will have a zero denominator.
    metric = error_metrics.PrecisionMetric(
        env=env,
        prediction_fn=lambda x: 0,
        ground_truth_fn=_ground_truth_fn,
        stratify_fn=lambda x: 1)

    measurement = test_util.run_test_simulation(
        env=env, agent=None, metric=metric, num_steps=50)

    self.assertEqual({1: 0}, measurement)
  def test_recall_metric_correct_for_atomic_prediction_rule(self):
    def _ground_truth_fn(history_item):
      state, _ = history_item
      return state.x[0]

    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
    env.set_scalar_reward(rewards.NullReward())
    # Always predict 1.
    metric = error_metrics.RecallMetric(
        env=env,
        prediction_fn=lambda x: 1,
        ground_truth_fn=_ground_truth_fn,
        stratify_fn=lambda x: 1)

    measurement = test_util.run_test_simulation(
        env=env, agent=None, metric=metric, num_steps=50)

    logging.info('Measurement: %s.', measurement)
    self.assertEqual({1: 1}, measurement)
Esempio n. 15
0
    def __init__(self,
                 action_space,
                 reward_fn,
                 observation_space,
                 default_action=None):
        """Initializes the random agent, which takes randomly sampled actions.

    Args:
      action_space: A gym.space defining the space of possible actions.
      reward_fn: A function that takes an observation and calculates the agents'
        reward.
      observation_space: A gym.space defining the space of possible
        observations.
      default_action: The first action of the agent when no observation is
        given.
    """
        if reward_fn is None:
            reward_fn = rewards.NullReward()
        super(RandomAgent, self).__init__(action_space, reward_fn,
                                          observation_space)
        self.default_action = default_action
  def test_cost_metric_correct_for_atomic_prediction_rule(self):

    def _ground_truth_fn(history_item):
      state, _ = history_item
      return state.x[0]

    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=1))
    env.set_scalar_reward(rewards.NullReward())
    cost_metric = error_metrics.CostedConfusionMetric(
        env=env,
        prediction_fn=lambda x: 1,
        ground_truth_fn=_ground_truth_fn,
        stratify_fn=lambda x: 1,
        cost_matrix=params.CostMatrix(tp=1, fp=-2, tn=-1, fn=-1))
    measurement = test_util.run_test_simulation(
        env=env, agent=None, metric=cost_metric)

    logging.info('Cost measurement: %s.', measurement)

    self.assertEqual(measurement[1], -5)
    self.assertNotIn(0, measurement)
  def test_stratified_accuracy_metric_correct_sequence_prediction(self):
    """Check correctness when stratifying into (wrong, right) bins."""

    def _x_select(history_item):
      return [i == 1 for i in history_item.state.x]

    def _x_stratify(history_item):
      return history_item.state.x

    env = test_util.DeterministicDummyEnv(test_util.DummyParams(dim=10))
    env.set_scalar_reward(rewards.NullReward())
    metric = error_metrics.AccuracyMetric(
        env=env, numerator_fn=_x_select, stratify_fn=_x_stratify)

    measurement = test_util.run_test_simulation(
        env=env, agent=None, metric=metric)

    logging.info('Measurement: %s.', measurement)

    self.assertEqual(measurement[0], 0)
    self.assertEqual(measurement[1], 1)
Esempio n. 18
0
    def __init__(self,
                 action_space,
                 reward_fn,
                 observation_space,
                 params=None):

        if reward_fn is None:
            reward_fn = rewards.NullReward()
        super(AllocationAgent, self).__init__(action_space, reward_fn,
                                              observation_space)
        if params is None:
            params = AllocationAgentParams()
        self.params = params
        self._n_bins = len(action_space.nvec)

        self.rng = np.random.RandomState()
        self._n_resource = self.action_space.n

        self.beliefs = np.zeros(self._n_bins).tolist()
        self.feature_selection_fn = params.feature_selection_fn or (
            lambda obs: _get_added_vector_features(obs, self._n_bins)
        )  # type: Callable
    def test_agent_on_one_hot_vectors(self):

        # Space of 1-hot vectors of length 10.
        observation_space = gym.spaces.Dict(
            {'x': multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            convert_one_hot_to_integer=True,
            burnin=999,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.NullReward(),
            params=params)

        observation_space.seed(100)
        # Train a boundary at 3 using 1-hot vectors.
        observation = observation_space.sample()
        agent._act_impl(observation, reward=None, done=False)
        for _ in range(1000):
            last_observation = observation
            observation = observation_space.sample()
            agent._act_impl(observation,
                            reward=int(np.argmax(last_observation['x']) >= 3),
                            done=False)
            if agent._training_corpus.examples:
                assert int(agent._training_corpus.examples[-1].features[0] >= 3
                           ) == agent._training_corpus.examples[-1].label

        agent.frozen = True

        self.assertTrue(agent.act({'x': _one_hot(3)}, done=False))
        self.assertFalse(agent.act({'x': _one_hot(2)}, done=False))
Esempio n. 20
0
    def test_stratified_accuracy_metric_correct_atomic_prediction(self):
        """Check correctness when stratifying into (wrong, right) bins."""
        def _x_select(history_item):
            state, _ = history_item
            return int(state.x[0] == 1)

        def _x_stratify(history_item):
            state, _ = history_item
            return state.x[0]

        env = test_util.DeterministicDummyEnv()
        env.set_scalar_reward(rewards.NullReward())
        metric = error_metrics.AccuracyMetric(env=env,
                                              numerator_fn=_x_select,
                                              stratify_fn=_x_stratify)

        measurement = test_util.run_test_simulation(env=env,
                                                    agent=None,
                                                    metric=metric)

        logging.info("Measurement: %s.", measurement)

        self.assertEqual(measurement[0], 0)
        self.assertEqual(measurement[1], 1)