Python MultiAgentMixInReplayBuffer.sample 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: ray.rllib.utils.replay_buffers.multi_agent_mixin_replay_buffer

메소드/함수: sample

hotexamples.com에서의 예제들: 3

Python MultiAgentMixInReplayBuffer.sample - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 ray.rllib.utils.replay_buffers.multi_agent_mixin_replay_buffer.MultiAgentMixInReplayBuffer.sample에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MultiAgentMixInReplayBuffer(3)

add(3)

sample(3)

예제 #1

파일 보기

파일: test_multi_agent_mixin_replay_buffer.py 프로젝트: smorad/ray

    def test_mixin_sampling_sequences(self):
        """Test sampling of sequences."""
        # 50% replay ratio.
        buffer = MultiAgentMixInReplayBuffer(capacity=100,
                                             storage_unit="sequences",
                                             replay_ratio=0.5)

        # If we insert and replay n times, expect roughly return batches of
        # len 6 (replay_ratio=0.5 -> 50% replayed samples -> 2 new and 2
        # old sequences with an average length of 1.5 each.
        results = []
        batch = self._generate_episodes()
        for _ in range(400):
            buffer.add(batch)
            sample = buffer.sample(10)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 2 * len(batch), delta=0.1)

예제 #2

파일 보기

파일: test_multi_agent_mixin_replay_buffer.py 프로젝트: smorad/ray

    def test_mixin_sampling_episodes(self):
        """Test sampling of episodes."""
        # 50% replay ratio.
        buffer = MultiAgentMixInReplayBuffer(capacity=self.capacity,
                                             storage_unit="episodes",
                                             replay_ratio=0.5)

        # If we insert and replay n times, expect roughly return batches of
        # len 5 (replay_ratio=0.5 -> 50% replayed samples -> 1 new and 1
        # old sample, each of length two on average in each returned value).
        results = []
        batch = self._generate_episodes()
        for _ in range(20):
            buffer.add(batch)
            sample = buffer.sample(2)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        # One sample in the episode does not belong the the episode on thus
        # gets dropped. Full episodes are of length two.
        self.assertAlmostEqual(np.mean(results), 2 * (len(batch) - 1))

예제 #3

파일 보기

파일: test_multi_agent_mixin_replay_buffer.py 프로젝트: smorad/ray

    def test_mixin_sampling_timesteps(self):
        """Test different mixin ratios with timesteps."""
        # 33% replay ratio.
        buffer = MultiAgentMixInReplayBuffer(capacity=self.capacity,
                                             storage_unit="timesteps",
                                             replay_ratio=0.333)
        # Expect exactly 0 samples to be returned (buffer empty).
        sample = buffer.sample(10)
        assert len(sample.policy_batches) == 0

        batch = self._generate_single_timesteps()
        # If we insert-2x and replay n times, expect roughly return batches of
        # len 5 (replay_ratio=0.33 -> 33% replayed samples -> 2 new and 1
        # old sample on average in each returned value).
        results = []
        for _ in range(100):
            buffer.add(batch)
            buffer.add(batch)
            sample = buffer.sample(3)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 3.0, delta=0.2)

        # If we insert-1x and replay n times, expect roughly return batches of
        # len 1.5 (replay_ratio=0.33 -> 33% replayed samples -> 1 new and 0.5
        # old
        # samples on average in each returned value).
        results = []
        for _ in range(100):
            buffer.add(batch)
            sample = buffer.sample(5)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 1.5, delta=0.2)

        # 90% replay ratio.
        buffer = MultiAgentMixInReplayBuffer(capacity=self.capacity,
                                             replay_ratio=0.9)

        # If we insert and replay n times, expect roughly return batches of
        # len 10 (replay_ratio=0.9 -> 90% replayed samples -> 1 new and 9 old
        # samples on average in each returned value).
        results = []
        for _ in range(100):
            buffer.add(batch)
            sample = buffer.sample(10)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 10.0, delta=0.2)

        # 0% replay ratio -> Only new samples.
        buffer = MultiAgentMixInReplayBuffer(capacity=self.capacity,
                                             replay_ratio=0.0)
        # Add a new batch.
        batch = self._generate_single_timesteps()
        buffer.add(batch)
        # Expect exactly 1 batch to be returned.
        sample = buffer.sample(1)
        assert type(sample) == MultiAgentBatch
        self.assertTrue(len(sample) == 1)
        # Expect exactly 0 sample to be returned (nothing new to be returned;
        # no replay allowed (replay_ratio=0.0)).
        sample = buffer.sample(1)
        assert type(sample) == MultiAgentBatch
        assert len(sample.policy_batches) == 0
        # If we insert and replay n times, expect roughly return batches of
        # len 1 (replay_ratio=0.0 -> 0% replayed samples -> 1 new and 0 old samples
        # on average in each returned value).
        results = []
        for _ in range(100):
            buffer.add(batch)
            sample = buffer.sample(1)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 1.0, delta=0.2)

        # 100% replay ratio -> Only new samples.
        buffer = MultiAgentMixInReplayBuffer(capacity=self.capacity,
                                             replay_ratio=1.0)
        # Expect exactly 0 samples to be returned (buffer empty).
        sample = buffer.sample(1)
        assert len(sample.policy_batches) == 0
        # Add a new batch.
        batch = self._generate_single_timesteps()
        buffer.add(batch)
        # Expect exactly 1 sample to be returned (the new batch).
        sample = buffer.sample(1)
        assert type(sample) == MultiAgentBatch
        self.assertTrue(len(sample) == 1)
        # Another replay -> Expect exactly 1 sample to be returned.
        sample = buffer.sample(1)
        assert type(sample) == MultiAgentBatch
        self.assertTrue(len(sample) == 1)
        # If we replay n times, expect roughly return batches of
        # len 1 (replay_ratio=1.0 -> 100% replayed samples -> 0 new and 1 old samples
        # on average in each returned value).
        results = []
        for _ in range(100):
            sample = buffer.sample(1)
            assert type(sample) == MultiAgentBatch
            results.append(len(sample.policy_batches[DEFAULT_POLICY_ID]))
        self.assertAlmostEqual(np.mean(results), 1.0)