Python PrioritizedReplayBuffer.update_prioritiesの例

プログラミング言語: Python

名前空間/パッケージ名: ray.rllib.optimizers.replay_buffer

メソッド/関数: update_priorities

hotexamples.comのコード掲載数: 5

Python PrioritizedReplayBuffer.update_priorities - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのray.rllib.optimizers.replay_buffer.PrioritizedReplayBuffer.update_prioritiesの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

PrioritizedReplayBuffer(13)

add(9)

sample(6)

update_priorities(5)

stats(2)

コード例 #1

ファイルを表示

    def test_ray_updating(self):
        """
        Tests Ray's memory performance.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.add(
                obs_t=record['states'],
                action=record['actions'],
                reward=record['reward'],
                obs_tp1=record['states'],
                done=record['terminals'],
                weight=None
            )
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_priorities(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing Ray Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))

コード例 #2

ファイルを表示

    def test_ray_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        assert get_distributed_backend() == "ray"
        memory = PrioritizedReplayBuffer(
            size=self.capacity,
            alpha=1.0,
            clip_rewards=True
        )
        chunksize = 32

        # Test chunked inserts -> done via external for loop in Ray.
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]
        start = time.monotonic()

        for chunk, loss_values in zip(records, loss_values):
            # Insert.
            for i in range_(chunksize):
                memory.add(
                    obs_t=ray_compress(chunk['states'][i]),
                    action=chunk['actions'][i],
                    reward=chunk['reward'][i],
                    obs_tp1=ray_compress(chunk['states'][i]),
                    done=chunk['terminals'][i],
                    weight=None
                )
            # Sample.
            batch_tuple = memory.sample(self.sample_batch_size, beta=1.0)
            indices = batch_tuple[-1]
            # Update
            memory.update_priorities(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('Ray: testing combined insert/sample/update performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))

コード例 #3

ファイルを表示

ファイル: async_replay_optimizer.py プロジェクト: yuanfeng0905/ray

class ReplayActor(object):
    """A replay buffer shard.

    Ray actors are single-threaded, so for scalability multiple replay actors
    may be created to increase parallelism."""
    def __init__(self, num_shards, learning_starts, buffer_size,
                 train_batch_size, prioritized_replay_alpha,
                 prioritized_replay_beta, prioritized_replay_eps):
        self.replay_starts = learning_starts // num_shards
        self.buffer_size = buffer_size // num_shards
        self.train_batch_size = train_batch_size
        self.prioritized_replay_beta = prioritized_replay_beta
        self.prioritized_replay_eps = prioritized_replay_eps

        self.replay_buffer = PrioritizedReplayBuffer(
            self.buffer_size, alpha=prioritized_replay_alpha)

        # Metrics
        self.add_batch_timer = TimerStat()
        self.replay_timer = TimerStat()
        self.update_priorities_timer = TimerStat()

    def get_host(self):
        return os.uname()[1]

    def add_batch(self, batch):
        PolicyOptimizer._check_not_multiagent(batch)
        with self.add_batch_timer:
            for row in batch.rows():
                self.replay_buffer.add(row["obs"], row["actions"],
                                       row["rewards"], row["new_obs"],
                                       row["dones"], row["weights"])

    def replay(self):
        with self.replay_timer:
            if len(self.replay_buffer) < self.replay_starts:
                return None

            (obses_t, actions, rewards, obses_tp1, dones, weights,
             batch_indexes) = self.replay_buffer.sample(
                 self.train_batch_size, beta=self.prioritized_replay_beta)

            batch = SampleBatch({
                "obs": obses_t,
                "actions": actions,
                "rewards": rewards,
                "new_obs": obses_tp1,
                "dones": dones,
                "weights": weights,
                "batch_indexes": batch_indexes
            })
            return batch

    def update_priorities(self, batch_indexes, td_errors):
        with self.update_priorities_timer:
            new_priorities = (np.abs(td_errors) + self.prioritized_replay_eps)
            self.replay_buffer.update_priorities(batch_indexes, new_priorities)

    def stats(self):
        stat = {
            "add_batch_time_ms":
            round(1000 * self.add_batch_timer.mean, 3),
            "replay_time_ms":
            round(1000 * self.replay_timer.mean, 3),
            "update_priorities_time_ms":
            round(1000 * self.update_priorities_timer.mean, 3),
        }
        stat.update(self.replay_buffer.stats())
        return stat

コード例 #4

ファイルを表示

    def test_update_priorities(self):
        memory = PrioritizedReplayBuffer(size=self.capacity, alpha=self.alpha)

        # Insert n samples.
        num_records = 5
        for i in range(num_records):
            data = self._generate_data()
            memory.add(*data, weight=1.0)
            self.assertTrue(len(memory) == i + 1)
            self.assertTrue(memory._next_idx == i + 1)

        # Fetch records, their indices and weights.
        _, _, _, _, _, weights, indices = \
            memory.sample(3, beta=self.beta)
        check(weights, np.ones(shape=(3, )))
        self.assertEqual(3, len(indices))
        self.assertTrue(len(memory) == num_records)
        self.assertTrue(memory._next_idx == num_records)

        # Update weight of indices 0, 2, 3, 4 to very small.
        memory.update_priorities(np.array([0, 2, 3, 4]),
                                 np.array([0.01, 0.01, 0.01, 0.01]))
        # Expect to sample almost only index 1
        # (which still has a weight of 1.0).
        for _ in range(10):
            _, _, _, _, _, weights, indices = memory.sample(1000,
                                                            beta=self.beta)
            self.assertTrue(970 < np.sum(indices) < 1100)

        # Update weight of indices 0 and 1 to >> 0.01.
        # Expect to sample 0 and 1 equally (and some 2s, 3s, and 4s).
        for _ in range(10):
            rand = np.random.random() + 0.2
            memory.update_priorities(np.array([0, 1]), np.array([rand, rand]))
            _, _, _, _, _, _, indices = memory.sample(1000, beta=self.beta)
            # Expect biased to higher values due to some 2s, 3s, and 4s.
            # print(np.sum(indices))
            self.assertTrue(400 < np.sum(indices) < 800)

        # Update weights to be 1:2.
        # Expect to sample double as often index 1 over index 0
        # plus very few times indices 2, 3, or 4.
        for _ in range(10):
            rand = np.random.random() + 0.2
            memory.update_priorities(np.array([0, 1]),
                                     np.array([rand, rand * 2]))
            _, _, _, _, _, _, indices = memory.sample(1000, beta=self.beta)
            # print(np.sum(indices))
            self.assertTrue(600 < np.sum(indices) < 850)

        # Update weights to be 1:4.
        # Expect to sample quadruple as often index 1 over index 0
        # plus very few times indices 2, 3, or 4.
        for _ in range(10):
            rand = np.random.random() + 0.2
            memory.update_priorities(np.array([0, 1]),
                                     np.array([rand, rand * 4]))
            _, _, _, _, _, _, indices = memory.sample(1000, beta=self.beta)
            # print(np.sum(indices))
            self.assertTrue(750 < np.sum(indices) < 950)

        # Update weights to be 1:9.
        # Expect to sample 9 times as often index 1 over index 0.
        # plus very few times indices 2, 3, or 4.
        for _ in range(10):
            rand = np.random.random() + 0.2
            memory.update_priorities(np.array([0, 1]),
                                     np.array([rand, rand * 9]))
            _, _, _, _, _, _, indices = memory.sample(1000, beta=self.beta)
            # print(np.sum(indices))
            self.assertTrue(850 < np.sum(indices) < 1100)

        # Insert n more samples.
        num_records = 5
        for i in range(num_records):
            data = self._generate_data()
            memory.add(*data, weight=1.0)
            self.assertTrue(len(memory) == i + 6)
            self.assertTrue(memory._next_idx == (i + 6) % self.capacity)

        # Update all weights to be 1.0 to 10.0 and sample a >100 batch.
        memory.update_priorities(
            np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
            np.array([0.001, 0.1, 2., 8., 16., 32., 64., 128., 256., 512.]))
        counts = Counter()
        for _ in range(10):
            _, _, _, _, _, _, indices = memory.sample(np.random.randint(
                100, 600),
                                                      beta=self.beta)
            for i in indices:
                counts[i] += 1
        print(counts)
        # Expect an approximately correct distribution of indices.
        self.assertTrue(
            counts[9] >= counts[8] >= counts[7] >= counts[6] >= counts[5] >=
            counts[4] >= counts[3] >= counts[2] >= counts[1] >= counts[0])

コード例 #5

ファイルを表示

ファイル: apex_optimizer.py プロジェクト: wojohowitz00/ray

class ReplayActor(object):
    def __init__(self, num_shards, learning_starts, buffer_size,
                 train_batch_size, prioritized_replay_alpha,
                 prioritized_replay_beta, prioritized_replay_eps):
        self.replay_starts = learning_starts // num_shards
        self.buffer_size = buffer_size // num_shards
        self.train_batch_size = train_batch_size
        self.prioritized_replay_beta = prioritized_replay_beta
        self.prioritized_replay_eps = prioritized_replay_eps

        self.replay_buffer = PrioritizedReplayBuffer(
            buffer_size, alpha=prioritized_replay_alpha)

        # Metrics
        self.add_batch_timer = TimerStat()
        self.replay_timer = TimerStat()
        self.update_priorities_timer = TimerStat()

    def get_host(self):
        return os.uname()[1]

    def add_batch(self, batch):
        with self.add_batch_timer:
            for row in batch.rows():
                self.replay_buffer.add(row["obs"], row["actions"],
                                       row["rewards"], row["new_obs"],
                                       row["dones"], row["weights"])

    def replay(self):
        with self.replay_timer:
            if len(self.replay_buffer) < self.replay_starts:
                return None

            (obses_t, actions, rewards, obses_tp1, dones, weights,
             batch_indexes) = self.replay_buffer.sample(
                 self.train_batch_size, beta=self.prioritized_replay_beta)

            batch = SampleBatch({
                "obs": obses_t,
                "actions": actions,
                "rewards": rewards,
                "new_obs": obses_tp1,
                "dones": dones,
                "weights": weights,
                "batch_indexes": batch_indexes
            })
            return batch

    def update_priorities(self, batch, td_errors):
        with self.update_priorities_timer:
            new_priorities = (np.abs(td_errors) + self.prioritized_replay_eps)
            self.replay_buffer.update_priorities(batch["batch_indexes"],
                                                 new_priorities)

    def stats(self):
        stat = {
            "add_batch_time_ms":
            round(1000 * self.add_batch_timer.mean, 3),
            "replay_time_ms":
            round(1000 * self.replay_timer.mean, 3),
            "update_priorities_time_ms":
            round(1000 * self.update_priorities_timer.mean, 3),
        }
        stat.update(self.replay_buffer.stats())
        return stat