Beispiel #1
0
    def test_rlgraph_sampling(self):
        """
        Tests RLgraph's sampling performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 ray_compress(record['states']),
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        start = time.monotonic()
        for _ in range_(self.samples):
            batch_tuple = memory.get_records(self.sample_batch_size)
        end = time.monotonic() - start
        tp = self.samples / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing sampling performance:')
        print('Sampled {} batches, throughput: {} batches/s, total time: {} s'.format(
            self.samples, tp, end
        ))
Beispiel #2
0
    def test_prefixsum_idx(self):
        """
        Tests fetching the index corresponding to a prefix sum.
        """
        memory = ApexMemory(capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)

        self.assertEqual(tree.index_of_prefixsum(0.0), 2)
        self.assertEqual(tree.index_of_prefixsum(0.5), 2)
        self.assertEqual(tree.index_of_prefixsum(0.99), 2)
        self.assertEqual(tree.index_of_prefixsum(1.01), 3)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(4.0), 3)

        memory = ApexMemory(capacity=4)
        tree = memory.merged_segment_tree.sum_segment_tree
        tree.insert(0, 0.5)
        tree.insert(1, 1.0)
        tree.insert(2, 1.0)
        tree.insert(3, 3.0)
        self.assertEqual(tree.index_of_prefixsum(0.0), 0)
        self.assertEqual(tree.index_of_prefixsum(0.55), 1)
        self.assertEqual(tree.index_of_prefixsum(0.99), 1)
        self.assertEqual(tree.index_of_prefixsum(1.51), 2)
        self.assertEqual(tree.index_of_prefixsum(3.0), 3)
        self.assertEqual(tree.index_of_prefixsum(5.50), 3)
Beispiel #3
0
 def __init__(self, apex_replay_spec):
     """
     Args:
         apex_replay_spec (dict): Specifies behaviour of this replay actor. Must contain key "memory_spec".
     """
     # N.b. The memory spec contains type PrioritizedReplay because that is
     # used for the agent. We hence do not use from_spec but just read the relevant
     # args.
     self.min_sample_memory_size = apex_replay_spec["min_sample_memory_size"]
     self.clip_rewards = apex_replay_spec.get("clip_rewards", True)
     self.sample_batch_size = apex_replay_spec["sample_batch_size"]
     self.memory = ApexMemory(**apex_replay_spec["memory_spec"])
Beispiel #4
0
    def test_insert(self):
        """
        Simply tests insert op without checking internal logic.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity,
                                      next_states=True,
                                      alpha=self.alpha,
                                      beta=self.beta)
        memory.create_variables(self.input_spaces)

        observation = memory.record_space_flat.sample(size=1)
        memory.insert_records(observation)

        # Test chunked insert
        observation = memory.record_space_flat.sample(size=5)
        memory.insert_records(observation)

        # Also test Apex version
        memory = ApexMemory(capacity=self.capacity,
                            alpha=self.alpha,
                            beta=self.beta)
        observation = self.apex_space.sample(size=5)
        for i in range_(5):
            memory.insert_records(
                (observation['states'][i], observation['actions'][i],
                 observation['reward'][i], observation['terminals'][i],
                 observation['states'][i], observation["weights"][i]))
Beispiel #5
0
    def test_rlgraph_combined_ops(self):
        """
        Tests a combined workflow of insert, sample, update on the prioritized replay memory.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        chunksize = 32
        chunks = int(self.inserts / chunksize)
        records = [self.record_space.sample(size=chunksize) for _ in range_(chunks)]
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(chunks)]

        start = time.monotonic()
        for chunk, loss_values in zip(records, loss_values):
            # Each record now is a chunk.
            for i in range_(chunksize):
                memory.insert_records((
                    ray_compress(chunk['states'][i]),
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))
            batch, indices, weights = memory.get_records(self.sample_batch_size)
            memory.update_records(indices, loss_values)

        end = time.monotonic() - start
        tp = len(records) / end
        print('RLGraph: Testing combined op performance:')
        print('Ran {} combined ops, throughput: {} combined ops/s, total time: {} s'.format(
            len(records), tp, end
        ))
Beispiel #6
0
    def test_rlgraph_apex_insert(self):
        """
        Tests RLgraph's python memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        # Testing insert performance
        records = [self.record_space.sample(size=1) for _ in range(self.inserts)]

        start = time.monotonic()
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        end = time.monotonic() - start
        tp = len(records) / end

        print('#### Testing RLGraph python prioritized replay ####')
        print('Testing insert performance:')
        print('Inserted {} separate records, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))

        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )
        chunks = int(self.inserts / self.chunksize)
        records = [self.record_space.sample(size=self.chunksize) for _ in range_(chunks)]
        start = time.monotonic()
        for chunk in records:
            for i in range_(self.chunksize):
                memory.insert_records((
                    chunk['states'][i],
                    chunk['actions'][i],
                    chunk['reward'][i],
                    chunk['terminals'][i],
                    None
                ))

        end = time.monotonic() - start
        tp = len(records) * self.chunksize / end
        print('Testing chunked insert performance:')
        print('Inserted {} chunks, throughput: {} records/s, total time: {} s'.format(
            len(records), tp, end
        ))
Beispiel #7
0
    def test_rlgraph_updating(self):
        """
        Tests RLGraph's memory performance.
        """
        memory = ApexMemory(
            capacity=self.capacity,
            alpha=1.0
        )

        records = [self.record_space.sample(size=1) for _ in range_(self.inserts)]
        for record in records:
            memory.insert_records((
                 record['states'],
                 record['actions'],
                 record['reward'],
                 record['terminals'],
                 None
            ))
        loss_values = [np.random.random(size=self.sample_batch_size) for _ in range_(self.samples)]
        indices = [np.random.randint(low=0, high=self.inserts, size=self.sample_batch_size) for _
                   in range_(self.samples)]

        start = time.monotonic()
        for index, loss in zip(indices, loss_values):
            memory.update_records(index, loss)
        end = time.monotonic() - start
        tp = len(indices) / end
        print('#### Testing RLGraph Prioritized Replay memory ####')
        print('Testing updating performance:')
        print('Updates {} loss batches, throughput: {} updates/s, total time: {} s'.format(
            len(indices), tp, end
        ))
Beispiel #8
0
 def test_tree_insert(self):
     """
     Tests inserting into the segment tree and querying segments.
     """
     memory = ApexMemory(capacity=4)
     tree = memory.merged_segment_tree.sum_segment_tree
     tree.insert(2, 1.0)
     tree.insert(3, 3.0)
     assert np.isclose(tree.get_sum(), 4.0)
     assert np.isclose(tree.get_sum(0, 2), 0.0)
     assert np.isclose(tree.get_sum(0, 3), 1.0)
     assert np.isclose(tree.get_sum(2, 3), 1.0)
     assert np.isclose(tree.get_sum(2, -1), 1.0)
     assert np.isclose(tree.get_sum(2, 4), 4.0)
Beispiel #9
0
    def test_update_records(self):
        """
        Tests update records logic.
        """
        memory = MemPrioritizedReplay(capacity=self.capacity, next_states=True)
        memory.create_variables(self.input_spaces)

        # Insert a few Elements.
        observation = memory.record_space_flat.sample(size=2)
        memory.insert_records(observation)

        # Fetch elements and their indices.
        num_records = 2
        batch = memory.get_records(num_records)
        indices = batch[1]
        self.assertEqual(num_records, len(indices))

        # Does not return anything.
        memory.update_records(indices, np.asarray([0.1, 0.2]))

        # Test apex memory.
        memory = ApexMemory(capacity=self.capacity,
                            alpha=self.alpha,
                            beta=self.beta)
        observation = self.apex_space.sample(size=5)
        for i in range_(5):
            memory.insert_records(
                (ray_compress(observation["states"][i]),
                 observation["actions"][i], observation["reward"][i],
                 observation["terminals"][i], observation["weights"][i]))

        # Fetch elements and their indices.
        num_records = 5
        batch = memory.get_records(num_records)
        indices = batch[1]
        self.assertEqual(num_records, len(indices))

        # Does not return anything
        memory.update_records(indices, np.random.uniform(size=10))
Beispiel #10
0
class RayMemoryActor(RayActor):
    """
    An in-memory prioritized replay worker used to accelerate memory interaction in Ape-X.
    """
    def __init__(self, apex_replay_spec):
        """
        Args:
            apex_replay_spec (dict): Specifies behaviour of this replay actor. Must contain key "memory_spec".
        """
        # N.b. The memory spec contains type PrioritizedReplay because that is
        # used for the agent. We hence do not use from_spec but just read the relevant
        # args.
        self.min_sample_memory_size = apex_replay_spec[
            "min_sample_memory_size"]
        self.clip_rewards = apex_replay_spec.get("clip_rewards", True)
        self.sample_batch_size = apex_replay_spec["sample_batch_size"]
        self.memory = ApexMemory(**apex_replay_spec["memory_spec"])

    @classmethod
    def as_remote(cls, num_cpus=None, num_gpus=None):
        return ray.remote(num_cpus=num_cpus, num_gpus=num_gpus)(cls)

    def get_batch(self):
        """
        Samples a batch from the replay memory.

        Returns:
            dict: Sample batch

        """
        if self.memory.size < self.min_sample_memory_size:
            return None
        else:
            batch, indices, weights = self.memory.get_records(
                self.sample_batch_size)
            # Merge into one dict to only return one future in ray.
            batch["indices"] = indices
            batch["importance_weights"] = weights
            return batch

    def observe(self, env_sample):
        """
        Observes experience(s).

        N.b. For performance reason, data layout is slightly different for apex.
        """
        records = env_sample.get_batch()
        num_records = len(records['states'])

        # TODO port to tf PR behaviour.
        if self.clip_rewards:
            rewards = np.sign(records["rewards"])
        else:
            rewards = records["rewards"]
        for i in range_(num_records):
            # If Actions is dict with vectors per key, convert to single dict.
            if isinstance(records["actions"], dict):
                action = {k: v[i] for k, v in records["actions"].items()}
            else:
                action = records["actions"][i]

            self.memory.insert_records(
                (records["states"][i], action, rewards[i],
                 records["terminals"][i], records["next_states"][i],
                 records["importance_weights"][i]))

    def update_priorities(self, indices, loss):
        """
        Updates priorities of provided indices in replay memory via externally
        provided loss.

        Args:
            indices (ndarray): Indices to update in replay memory.
            loss (ndarray):  Loss values for indices.
        """
        loss = np.abs(loss) + SMALL_NUMBER
        self.memory.update_records(indices, loss)