def test_insufficient_burnin_raises(self):
        env = test_util.DummyEnv()
        burnin = 5
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=burnin)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Only give positive points to train.
        for _ in range(burnin):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)

        # Should raise a ValueError since the burnin has passed and the classifier
        # cannot train to make a decision.
        with self.assertRaises(ValueError):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)
 def test_can_run_with_env(self):
     env = test_util.DummyEnv()
     agent = random_agents.RandomAgent(
         action_space=env.action_space,
         observation_space=env.observation_space,
         reward_fn=None)
     test_util.run_test_simulation(env=env, agent=agent)
Example #3
0
 def test_episode_done_raises_error(self):
     env = test_util.DummyEnv()
     agent = random_agents.RandomAgent(env.action_space, None,
                                       env.observation_space)
     obs = env.reset()
     with self.assertRaises(core.EpisodeDoneError):
         agent.act(obs, done=True)
    def test_agent_raises_with_improper_number_of_features(self):
        env = test_util.DummyEnv()

        single_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=['x'])

        many_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=['x', 'y'])

        no_feature_params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample, feature_keys=[])

        initialize = functools.partial(
            classifier_agents.ThresholdAgent,
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'))

        agent = initialize(params=single_feature_params)
        # This should succeed.
        agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=many_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)

        agent = initialize(params=no_feature_params)
        with self.assertRaises(ValueError):
            agent.act(env.observation_space.sample(), done=False)
Example #5
0
    def test_invalid_env_interactions(self):
        env = test_util.DummyEnv()
        with self.assertRaises(gym.error.InvalidAction):
            env.step('not a real action')

        # Succeeds.
        env.step(0)
    def test_skip_retraining_fn(self):
        env = test_util.DummyEnv()
        burnin = 10

        def _skip_retraining(action, observation):
            """Always skip retraining."""
            del action, observation
            return True

        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            skip_retraining_fn=_skip_retraining)

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertFalse(agent.frozen)  # Agent is not frozen.
        self.assertFalse(agent.global_threshold)  # Agent has not learned.
Example #7
0
    def test_accuracy_metric_can_interact_with_dummy(self):
        def _is_zero(history_item):
            _, action = history_item
            return int(action == 0)

        env = test_util.DummyEnv()
        env.set_scalar_reward(rewards.NullReward())
        metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_is_zero)
        test_util.run_test_simulation(env=env, metric=metric)
Example #8
0
    def test_metric_realigns_history(self):
        env = test_util.DummyEnv()
        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)
        env.set_scalar_reward(agent.reward_fn)

        def realign_fn(history):
            return [(1, action) for _, action in history]

        metric = test_util.DummyMetric(env, realign_fn=realign_fn)
        _ = test_util.run_test_simulation(env, agent, metric)
        history = metric._extract_history(env)
        self.assertCountEqual([1] * 10, [state for state, _ in history])
    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=5)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)
        test_util.run_test_simulation(env=env, agent=agent)
    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=10,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=["x"],
        )

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward("x"),
            params=params,
        )
        test_util.run_test_simulation(env=env, agent=agent)
Example #11
0
    def test_error_on_scalar(self):
        """Test confirms an error is raised when an actions are scalars."""
        env = test_util.DummyEnv()
        env.seed(100)

        agent = random_agents.RandomAgent(env.action_space, None,
                                          env.observation_space)
        agent.seed(100)

        observation = env.reset()
        done = False
        for _ in range(2):
            action = agent.act(observation, done)
            observation, _, done, _ = env.step(action)

        metric = distribution_comparison_metrics.DistributionComparisonMetric(
            env, "x", 100)
        with self.assertRaises(ValueError):
            metric.measure(env)
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=200,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        # Add a negative point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=0, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are sorted - i.e., 0s followed by 1s.
        self.assertSequenceEqual(actions, sorted(actions))

        self.assertGreater(agent.global_threshold, 0)
        self.assertFalse(agent.group_specific_thresholds)
    def test_agent_seed(self):
        env = test_util.DummyEnv()

        params = classifier_agents.ScoringAgentParams(
            burnin=10,
            freeze_classifier_after_burnin=False,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        agent.seed(100)
        a = agent.rng.randint(0, 1000)
        agent.seed(100)
        b = agent.rng.randint(0, 1000)
        self.assertEqual(a, b)
    def test_frozen_classifier_never_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            burnin=0,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params,
            frozen=True)
        # Initialize global_threshold with a distinctive value.
        agent.global_threshold = 0.123

        # Run for some number of steps, global_threshold should not change.
        for _ in range(10):
            agent.act(env.observation_space.sample(), False)
        self.assertEqual(agent.global_threshold, 0.123)
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=1, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_freeze_after_burnin(self):
        env = test_util.DummyEnv()
        burnin = 10
        params = classifier_agents.ScoringAgentParams(
            burnin=burnin,
            freeze_classifier_after_burnin=True,
            default_action_fn=env.action_space.sample,
            feature_keys=['x'])

        agent = classifier_agents.ThresholdAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        for _ in range(burnin + 1):
            self.assertFalse(agent.frozen)
            _ = agent.act(env.observation_space.sample(), False)

        self.assertTrue(agent.frozen)
        self.assertTrue(agent.global_threshold)  # Agent has learned something.
Example #17
0
 def test_noop_state_updater_does_nothing(self):
     env = test_util.DummyEnv()
     state = env._get_state()
     before = copy.deepcopy(state)
     core.NoUpdate().update(state, env.action_space.sample())
     self.assertEqual(state, before)
Example #18
0
 def test_base_state_updater_raises(self):
     env = test_util.DummyEnv()
     state = env._get_state()
     with self.assertRaises(NotImplementedError):
         core.StateUpdater().update(state, env.action_space.sample())