Python ClassifierAgent 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agents.classifier_agents

메소드/함수: ClassifierAgent

hotexamples.com에서의 예제들: 4

Python ClassifierAgent - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agents.classifier_agents.ClassifierAgent에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: classifier_agents_test.py 프로젝트: y12uc231/ml-fairness-gym

    def test_agent_trains_with_two_features(self):
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=lambda: 0, feature_keys=['x', 'y'], burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=gym.spaces.Discrete(2),
            observation_space=gym.spaces.Dict({
                'x':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1]),
                'y':
                gym.spaces.Box(low=-np.inf, high=np.inf, shape=[1])
            }),
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        # `y` is the relevant feature. `x` is a constant.
        const = np.array([1])

        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({
                'y': np.array([observation]),
                'x': const
            },
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({
            'y': np.array([0.9]),
            'x': const
        },
                        reward=1,
                        done=False)

        agent.frozen = True
        actions = []
        for obs in np.linspace(0, 0.95, 100):
            actions.append(
                agent.act({
                    'y': np.array([obs]),
                    'x': const
                }, done=False))

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))

예제 #2

파일 보기

파일: classifier_agents_test.py 프로젝트: y12uc231/ml-fairness-gym

    def test_insufficient_burnin_raises(self):
        env = test_util.DummyEnv()
        burnin = 5
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=burnin)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Only give positive points to train.
        for _ in range(burnin):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)

        # Should raise a ValueError since the burnin has passed and the classifier
        # cannot train to make a decision.
        with self.assertRaises(ValueError):
            agent._act_impl(env.observation_space.sample(),
                            reward=1,
                            done=False)

예제 #3

파일 보기

파일: classifier_agents_test.py 프로젝트: y12uc231/ml-fairness-gym

    def test_interact_with_env_replicable(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=5)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)
        test_util.run_test_simulation(env=env, agent=agent)

예제 #4

파일 보기

파일: classifier_agents_test.py 프로젝트: y12uc231/ml-fairness-gym

    def test_agent_trains(self):
        env = test_util.DummyEnv()
        params = classifier_agents.ScoringAgentParams(
            default_action_fn=env.action_space.sample,
            feature_keys=['x'],
            burnin=200)

        agent = classifier_agents.ClassifierAgent(
            action_space=env.action_space,
            observation_space=env.observation_space,
            reward_fn=rewards.BinarizedScalarDeltaReward('x'),
            params=params)

        # Train with points that are nearly separable but have some overlap between
        # 0.3 and 0.4 with 1s in the lower region and 0s on the higher region.
        # A linear transform of x -> -x is expected to be learned so that a
        # threshold classifier can be successful.
        for observation in np.linspace(0, 0.4, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=1,
                            done=False)

        for observation in np.linspace(0.3, 0.8, 100):
            agent._act_impl({'x': np.array([observation])},
                            reward=0,
                            done=False)

        # Add a positive point at the top of the range so that the training labels
        # are not fit perfectly by a threshold.
        agent._act_impl({'x': np.array([0.9])}, reward=1, done=False)

        agent.frozen = True
        actions = [
            agent.act({'x': np.array([obs])}, done=False)
            for obs in np.linspace(0, 0.95, 100)
        ]

        # Assert some actions are 0 and some are 1.
        self.assertSameElements(actions, {0, 1})
        # Assert actions are reverse-sorted - i.e., 1s followed by 0s.
        self.assertSequenceEqual(actions, sorted(actions, reverse=True))