def test_insufficient_burnin_raises(self): env = test_util.DummyEnv() burnin = 5 params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=burnin) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Only give positive points to train. for _ in range(burnin): agent._act_impl(env.observation_space.sample(), reward=1, done=False) # Should raise a ValueError since the burnin has passed and the classifier # cannot train to make a decision. with self.assertRaises(ValueError): agent._act_impl(env.observation_space.sample(), reward=1, done=False)
def test_can_run_with_env(self): env = test_util.DummyEnv() agent = random_agents.RandomAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=None) test_util.run_test_simulation(env=env, agent=agent)
def test_episode_done_raises_error(self): env = test_util.DummyEnv() agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) obs = env.reset() with self.assertRaises(core.EpisodeDoneError): agent.act(obs, done=True)
def test_agent_raises_with_improper_number_of_features(self): env = test_util.DummyEnv() single_feature_params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x']) many_feature_params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x', 'y']) no_feature_params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=[]) initialize = functools.partial( classifier_agents.ThresholdAgent, action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x')) agent = initialize(params=single_feature_params) # This should succeed. agent.act(env.observation_space.sample(), done=False) agent = initialize(params=many_feature_params) with self.assertRaises(ValueError): agent.act(env.observation_space.sample(), done=False) agent = initialize(params=no_feature_params) with self.assertRaises(ValueError): agent.act(env.observation_space.sample(), done=False)
def test_invalid_env_interactions(self): env = test_util.DummyEnv() with self.assertRaises(gym.error.InvalidAction): env.step('not a real action') # Succeeds. env.step(0)
def test_skip_retraining_fn(self): env = test_util.DummyEnv() burnin = 10 def _skip_retraining(action, observation): """Always skip retraining.""" del action, observation return True params = classifier_agents.ScoringAgentParams( burnin=burnin, freeze_classifier_after_burnin=False, default_action_fn=env.action_space.sample, feature_keys=['x'], skip_retraining_fn=_skip_retraining) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) for _ in range(burnin + 1): self.assertFalse(agent.frozen) _ = agent.act(env.observation_space.sample(), False) self.assertFalse(agent.frozen) # Agent is not frozen. self.assertFalse(agent.global_threshold) # Agent has not learned.
def test_accuracy_metric_can_interact_with_dummy(self): def _is_zero(history_item): _, action = history_item return int(action == 0) env = test_util.DummyEnv() env.set_scalar_reward(rewards.NullReward()) metric = error_metrics.AccuracyMetric(env=env, numerator_fn=_is_zero) test_util.run_test_simulation(env=env, metric=metric)
def test_metric_realigns_history(self): env = test_util.DummyEnv() agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) env.set_scalar_reward(agent.reward_fn) def realign_fn(history): return [(1, action) for _, action in history] metric = test_util.DummyMetric(env, realign_fn=realign_fn) _ = test_util.run_test_simulation(env, agent, metric) history = metric._extract_history(env) self.assertCountEqual([1] * 10, [state for state, _ in history])
def test_interact_with_env_replicable(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=5) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) test_util.run_test_simulation(env=env, agent=agent)
def test_interact_with_env_replicable(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( burnin=10, freeze_classifier_after_burnin=False, default_action_fn=env.action_space.sample, feature_keys=["x"], ) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward("x"), params=params, ) test_util.run_test_simulation(env=env, agent=agent)
def test_error_on_scalar(self): """Test confirms an error is raised when an actions are scalars.""" env = test_util.DummyEnv() env.seed(100) agent = random_agents.RandomAgent(env.action_space, None, env.observation_space) agent.seed(100) observation = env.reset() done = False for _ in range(2): action = agent.act(observation, done) observation, _, done, _ = env.step(action) metric = distribution_comparison_metrics.DistributionComparisonMetric( env, "x", 100) with self.assertRaises(ValueError): metric.measure(env)
def test_agent_trains(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( burnin=200, default_action_fn=env.action_space.sample, feature_keys=['x']) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Train with points that are nearly separable but have some overlap between # 0.3 and 0.4. for observation in np.linspace(0, 0.4, 100): agent._act_impl({'x': np.array([observation])}, reward=0, done=False) for observation in np.linspace(0.3, 0.8, 100): agent._act_impl({'x': np.array([observation])}, reward=1, done=False) # Add a negative point at the top of the range so that the training labels # are not fit perfectly by a threshold. agent._act_impl({'x': np.array([0.9])}, reward=0, done=False) agent.frozen = True actions = [ agent.act({'x': np.array([obs])}, done=False) for obs in np.linspace(0, 0.95, 100) ] # Assert some actions are 0 and some are 1. self.assertSameElements(actions, {0, 1}) # Assert actions are sorted - i.e., 0s followed by 1s. self.assertSequenceEqual(actions, sorted(actions)) self.assertGreater(agent.global_threshold, 0) self.assertFalse(agent.group_specific_thresholds)
def test_agent_seed(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( burnin=10, freeze_classifier_after_burnin=False, default_action_fn=env.action_space.sample, feature_keys=['x']) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) agent.seed(100) a = agent.rng.randint(0, 1000) agent.seed(100) b = agent.rng.randint(0, 1000) self.assertEqual(a, b)
def test_frozen_classifier_never_trains(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( burnin=0, default_action_fn=env.action_space.sample, feature_keys=['x']) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params, frozen=True) # Initialize global_threshold with a distinctive value. agent.global_threshold = 0.123 # Run for some number of steps, global_threshold should not change. for _ in range(10): agent.act(env.observation_space.sample(), False) self.assertEqual(agent.global_threshold, 0.123)
def test_agent_trains(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=200) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Train with points that are nearly separable but have some overlap between # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region. # A linear transform of x -> -x is expected to be learned so that a # threshold classifier can be successful. for observation in np.linspace(0, 0.4, 100): agent._act_impl({'x': np.array([observation])}, reward=1, done=False) for observation in np.linspace(0.3, 0.8, 100): agent._act_impl({'x': np.array([observation])}, reward=0, done=False) # Add a positive point at the top of the range so that the training labels # are not fit perfectly by a threshold. agent._act_impl({'x': np.array([0.9])}, reward=1, done=False) agent.frozen = True actions = [ agent.act({'x': np.array([obs])}, done=False) for obs in np.linspace(0, 0.95, 100) ] # Assert some actions are 0 and some are 1. self.assertSameElements(actions, {0, 1}) # Assert actions are reverse-sorted - i.e., 1s followed by 0s. self.assertSequenceEqual(actions, sorted(actions, reverse=True))
def test_freeze_after_burnin(self): env = test_util.DummyEnv() burnin = 10 params = classifier_agents.ScoringAgentParams( burnin=burnin, freeze_classifier_after_burnin=True, default_action_fn=env.action_space.sample, feature_keys=['x']) agent = classifier_agents.ThresholdAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) for _ in range(burnin + 1): self.assertFalse(agent.frozen) _ = agent.act(env.observation_space.sample(), False) self.assertTrue(agent.frozen) self.assertTrue(agent.global_threshold) # Agent has learned something.
def test_noop_state_updater_does_nothing(self): env = test_util.DummyEnv() state = env._get_state() before = copy.deepcopy(state) core.NoUpdate().update(state, env.action_space.sample()) self.assertEqual(state, before)
def test_base_state_updater_raises(self): env = test_util.DummyEnv() state = env._get_state() with self.assertRaises(NotImplementedError): core.StateUpdater().update(state, env.action_space.sample())