def test_agent_raises_with_improper_number_of_features(self):
        env = test_util.DummyEnv()

        single_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=['x'])

        many_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=['x', 'y'])

        no_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=[])

        initialize = functools.partial(
            classifier_agents.ThresholdAgent,
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'))

        agent = initialize(params=single_feature_params)
        # This should succeed.
        agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=many_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=no_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)
    def test_agent_trains_with_two_features(self):
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0, feature_keys=['x', 'y'], burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=gym.spaces.Discrete(2),
            observation_space=gym.spaces.Dict({
                'x':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]),
                'y':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1])
            }),
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        # `y` is the relevant feature. `x` is a constant.
        const = np.array([1])

        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({
            'y': np.array([0.9]),
            'x': const
        },
                        reward=1,
                        done=False)

        agent.frozen = True
        actions = []
        for obs in np.linspace(0, 0.95, 100):
            actions.append(
                agent.act({
                    'y': np.array([obs]),
                    'x': const
                }, done=False))

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_threshold_history_is_recorded(self):
        observation_space = gym.spaces.Dict({
            'x':
            gym.spaces.Box(low=0, high=1, shape=(1, ), dtype=np.float32),
            'group':
            gym.spaces.MultiDiscrete([1])
        })
        observation_space.seed(100)

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            group_key='group',
            burnin=0,
            threshold_policy=threshold_policies.ThresholdPolicy.
            EQUALIZE_OPPORTUNITY)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        for _ in range(10):
            agent.act(observation_space.sample(), False)

        self.assertLen(agent.global_threshold_history, 10)
        self.assertTrue(agent.group_specific_threshold_history)
        for _, history in agent.group_specific_threshold_history.items():
            # Takes 2 extra steps (one to observe features and one to observe label)
            # before any learned group-specific threshold is available.
            self.assertLen(history, 8)
    def test_insufficient_burnin_raises(self):
        env = test_util.DummyEnv()
        burnin = 5
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=burnin)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Only give positive points to train.
        for _ in range(burnin):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)

        # Should raise a ValueError since the burnin has passed and the classifier
        # cannot train to make a decision.
        with self.assertRaises(ValueError):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)
  def test_oracle_maxutil_classifier_is_stable(self):
    env = lending.DelayedImpactEnv()

    agent_params = classifier_agents.ScoringAgentParams(
        feature_keys=['applicant_features'],
        group_key='group',
        default_action_fn=(lambda: 1),
        burnin=1,
        threshold_policy=threshold_policies.ThresholdPolicy.SINGLE_THRESHOLD,
        convert_one_hot_to_integer=True,
        cost_matrix=params.CostMatrix(
            fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0))

    agent = oracle_lending_agent.OracleThresholdAgent(
        action_space=env.action_space,
        observation_space=env.observation_space,
        reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'),
        params=agent_params,
        env=env)

    test_util.run_test_simulation(env=env, agent=agent)
    # Drop 0 threshold associated with burn-in.
    first_nonzero_threshold = None
    for thresh in agent.global_threshold_history:
      if thresh > 0:
        if first_nonzero_threshold is None:
          first_nonzero_threshold = thresh
        self.assertAlmostEqual(first_nonzero_threshold, thresh)
    # Make sure there is at least one non-zero threshold.
    self.assertIsNotNone(first_nonzero_threshold)
    def test_skip_retraining_fn(self):
        env = test_util.DummyEnv()
        burnin = 10

        def _skip_retraining(action, observation):
            """Always skip retraining."""
            del action, observation
            return True

        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            skip_retraining_fn=_skip_retraining)

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertFalse(agent.frozen)  # Agent is not frozen.
        self.assertFalse(agent.global_threshold)  # Agent has not learned.
Ejemplo n.º 7
0
  def scenario_builder(self):
    """Returns an agent and environment pair."""
    env_params = lending_params.DelayedImpactParams(
        applicant_distribution=lending_params.two_group_credit_clusters(
            cluster_probabilities=self.cluster_probabilities,
            group_likelihoods=[self.group_0_prob, 1 - self.group_0_prob]),
        bank_starting_cash=self.bank_starting_cash,
        interest_rate=self.interest_rate,
        cluster_shift_increment=self.cluster_shift_increment,
    )
    env = lending.DelayedImpactEnv(env_params)

    agent_params = classifier_agents.ScoringAgentParams(
        feature_keys=['applicant_features'],
        group_key='group',
        default_action_fn=(lambda: 1),
        burnin=self.burnin,
        convert_one_hot_to_integer=True,
        threshold_policy=self.threshold_policy,
        skip_retraining_fn=lambda action, observation: action == 0,
        cost_matrix=params.CostMatrix(
            fn=0, fp=-1, tp=env_params.interest_rate, tn=0))

    agent = oracle_lending_agent.OracleThresholdAgent(
        action_space=env.action_space,
        reward_fn=rewards.BinarizedScalarDeltaReward(
            'bank_cash', baseline=env.initial_params.bank_starting_cash),
        observation_space=env.observation_space,
        params=agent_params,
        env=env)
    agent.seed(100)
    return env, agent
    def test_agent_can_learn_different_thresholds(self):

        observation_space = gym.spaces.Dict({
            'x':
            gym.spaces.Box(low=0, high=1, shape=(1, ), dtype=np.float32),
            'group':
            gym.spaces.Discrete(2)
        })

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            group_key='group',
            threshold_policy=threshold_policies.ThresholdPolicy.
            EQUALIZE_OPPORTUNITY)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        rng = np.random.RandomState(100)
        # Train over the whole range of observations. Expect slightly different
        # thresholds to be learned.
        for observation in rng.rand(100):
            for group in [0, 1]:
                agent._act_impl(
                    {
                        'x': np.array([observation]),
                        'group': np.array([group])
                    },
                    reward=observation > 0.5 + 0.1 * group,
                    done=False)

        agent.frozen = True

        actions = {}
        for group in [0, 1]:
            actions[group] = []
            for observation in np.linspace(0, 1, 1000):
                actions[group].append(
                    agent.act(
                        {
                            'x': np.array([observation]),
                            'group': np.array([group])
                        },
                        done=False))

        for group, action_list in actions.items():
            # All actions are {0, 1}
            self.assertSameElements(action_list, {0, 1})
            # Actions are sorted - i.e., 0s followed by 1s.
            self.assertSequenceEqual(action_list, sorted(action_list))

        # The two groups are classified with different policies so they are not
        # exactly equal.
        self.assertNotEqual(actions[0], actions[1])
        self.assertLen(agent.group_specific_thresholds, 2)
    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=5)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)
        test_util.run_test_simulation(env=env, agent=agent)
    def test_one_hot_conversion(self):
        observation_space = gym.spaces.Dict(
            {'x': multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            convert_one_hot_to_integer=True,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.NullReward(),
            params=params)

        self.assertEqual(agent._get_features({'x': _one_hot(5)}), [5])
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=200,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        # Add a negative point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=0, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are sorted - i.e., 0s followed by 1s.
        self.assertSequenceEqual(actions, sorted(actions))

        self.assertGreater(agent.global_threshold, 0)
        self.assertFalse(agent.group_specific_thresholds)
    def test_agent_seed(self):
        env = test_util.DummyEnv()

        params = classifier_agents.ScoringAgentParams(
            burnin=10,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        agent.seed(100)
        a = agent.rng.randint(0, 1000)
        agent.seed(100)
        b = agent.rng.randint(0, 1000)
        self.assertEqual(a, b)
  def test_oracle_lending_agent_interacts(self):
    env = lending.DelayedImpactEnv()

    agent_params = classifier_agents.ScoringAgentParams(
        feature_keys=['applicant_features'],
        group_key='group',
        default_action_fn=(lambda: 1),
        burnin=1,
        convert_one_hot_to_integer=True,
        cost_matrix=params.CostMatrix(
            fn=0, fp=-1, tp=env.initial_params.interest_rate, tn=0))

    agent = oracle_lending_agent.OracleThresholdAgent(
        action_space=env.action_space,
        observation_space=env.observation_space,
        reward_fn=rewards.BinarizedScalarDeltaReward('bank_cash'),
        params=agent_params,
        env=env)

    test_util.run_test_simulation(env=env, agent=agent)
    def test_frozen_classifier_never_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=0,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params,
            frozen=True)
        # Initialize global_threshold with a distinctive value.
        agent.global_threshold = 0.123

        # Run for some number of steps, global_threshold should not change.
        for _ in range(10):
            agent.act(env.observation_space.sample(), False)
        self.assertEqual(agent.global_threshold, 0.123)
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=1, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_freeze_after_burnin(self):
        env = test_util.DummyEnv()
        burnin = 10
        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=True,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertTrue(agent.frozen)
        self.assertTrue(agent.global_threshold)  # Agent has learned something.
    def test_agent_on_one_hot_vectors(self):

        # Space of 1-hot vectors of length 10.
        observation_space = gym.spaces.Dict(
            {'x': multinomial.Multinomial(10, 1)})

        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0,
            feature_keys=['x'],
            convert_one_hot_to_integer=True,
            burnin=999,
            threshold_policy=threshold_policies.ThresholdPolicy.
            SINGLE_THRESHOLD)

        agent = classifier_agents.ThresholdAgent(
            observation_space=observation_space,
            reward_fn=rewards.NullReward(),
            params=params)

        observation_space.seed(100)
        # Train a boundary at 3 using 1-hot vectors.
        observation = observation_space.sample()
        agent._act_impl(observation, reward=None, done=False)
        for _ in range(1000):
            last_observation = observation
            observation = observation_space.sample()
            agent._act_impl(observation,
                            reward=int(np.argmax(last_observation['x']) >= 3),
                            done=False)
            if agent._training_corpus.examples:
                assert int(agent._training_corpus.examples[-1].features[0] >= 3
                           ) == agent._training_corpus.examples[-1].label

        agent.frozen = True

        self.assertTrue(agent.act({'x': _one_hot(3)}, done=False))
        self.assertFalse(agent.act({'x': _one_hot(2)}, done=False))