def test_agent_trains_with_two_features(self): params = classifier_agents.ScoringAgentParams( default_action_fn=lambda: 0, feature_keys=['x', 'y'], burnin=200) agent = classifier_agents.ClassifierAgent( action_space=gym.spaces.Discrete(2), observation_space=gym.spaces.Dict({ 'x': gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]), 'y': gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]) }), reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Train with points that are nearly separable but have some overlap between # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region. # A linear transform of x -> -x is expected to be learned so that a # threshold classifier can be successful. # `y` is the relevant feature. `x` is a constant. const = np.array([1]) for observation in np.linspace(0, 0.4, 100): agent._act_impl({ 'y': np.array([observation]), 'x': const }, reward=1, done=False) for observation in np.linspace(0.3, 0.8, 100): agent._act_impl({ 'y': np.array([observation]), 'x': const }, reward=0, done=False) # Add a positive point at the top of the range so that the training labels # are not fit perfectly by a threshold. agent._act_impl({ 'y': np.array([0.9]), 'x': const }, reward=1, done=False) agent.frozen = True actions = [] for obs in np.linspace(0, 0.95, 100): actions.append( agent.act({ 'y': np.array([obs]), 'x': const }, done=False)) # Assert some actions are 0 and some are 1. self.assertSameElements(actions, {0, 1}) # Assert actions are reverse-sorted - i.e., 1s followed by 0s. self.assertSequenceEqual(actions, sorted(actions, reverse=True))
def test_insufficient_burnin_raises(self): env = test_util.DummyEnv() burnin = 5 params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=burnin) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Only give positive points to train. for _ in range(burnin): agent._act_impl(env.observation_space.sample(), reward=1, done=False) # Should raise a ValueError since the burnin has passed and the classifier # cannot train to make a decision. with self.assertRaises(ValueError): agent._act_impl(env.observation_space.sample(), reward=1, done=False)
def test_interact_with_env_replicable(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=5) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) test_util.run_test_simulation(env=env, agent=agent)
def test_agent_trains(self): env = test_util.DummyEnv() params = classifier_agents.ScoringAgentParams( default_action_fn=env.action_space.sample, feature_keys=['x'], burnin=200) agent = classifier_agents.ClassifierAgent( action_space=env.action_space, observation_space=env.observation_space, reward_fn=rewards.BinarizedScalarDeltaReward('x'), params=params) # Train with points that are nearly separable but have some overlap between # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region. # A linear transform of x -> -x is expected to be learned so that a # threshold classifier can be successful. for observation in np.linspace(0, 0.4, 100): agent._act_impl({'x': np.array([observation])}, reward=1, done=False) for observation in np.linspace(0.3, 0.8, 100): agent._act_impl({'x': np.array([observation])}, reward=0, done=False) # Add a positive point at the top of the range so that the training labels # are not fit perfectly by a threshold. agent._act_impl({'x': np.array([0.9])}, reward=1, done=False) agent.frozen = True actions = [ agent.act({'x': np.array([obs])}, done=False) for obs in np.linspace(0, 0.95, 100) ] # Assert some actions are 0 and some are 1. self.assertSameElements(actions, {0, 1}) # Assert actions are reverse-sorted - i.e., 1s followed by 0s. self.assertSequenceEqual(actions, sorted(actions, reverse=True))