def test_agent_trains_with_two_features(self):
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0, feature_keys=['x', 'y'], burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=gym.spaces.Discrete(2),
            observation_space=gym.spaces.Dict({
                'x':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]),
                'y':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1])
            }),
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        # `y` is the relevant feature. `x` is a constant.
        const = np.array([1])

        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({
            'y': np.array([0.9]),
            'x': const
        },
                        reward=1,
                        done=False)

        agent.frozen = True
        actions = []
        for obs in np.linspace(0, 0.95, 100):
            actions.append(
                agent.act({
                    'y': np.array([obs]),
                    'x': const
                }, done=False))

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))
    def test_insufficient_burnin_raises(self):
        env = test_util.DummyEnv()
        burnin = 5
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=burnin)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Only give positive points to train.
        for _ in range(burnin):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)

        # Should raise a ValueError since the burnin has passed and the classifier
        # cannot train to make a decision.
        with self.assertRaises(ValueError):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)
    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=5)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)
        test_util.run_test_simulation(env=env, agent=agent)
    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=1, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))