Example #1
0
    def step(self) -> Optional[_NextValueNotReady]:
        with self.queue_timer:
            try:
                batch, _ = self.minibatch_buffer.get()
            except queue.Empty:
                return _NextValueNotReady()

        with self.grad_timer:
            # Use LearnerInfoBuilder as a unified way to build the final
            # results dict from `learn_on_loaded_batch` call(s).
            # This makes sure results dicts always have the same structure
            # no matter the setup (multi-GPU, multi-agent, minibatch SGD,
            # tf vs torch).
            learner_info_builder = LearnerInfoBuilder(num_devices=1)
            multi_agent_results = self.local_worker.learn_on_batch(batch)
            for pid, results in multi_agent_results.items():
                learner_info_builder.add_learn_on_batch_results(results, pid)
            self.learner_info = learner_info_builder.finalize()
            learner_stats = {
                pid: info[LEARNER_STATS_KEY]
                for pid, info in self.learner_info.items()
            }
            self.weights_updated = True

        self.num_steps += 1
        self.outqueue.put((batch.count, learner_stats))
        self.learner_queue_size.push(self.inqueue.qsize())
Example #2
0
 def gen_replay(_):
     while True:
         item = local_buffer.replay()
         if item is None:
             yield _NextValueNotReady()
         else:
             yield item
Example #3
0
 def gen_replay(_):
     while True:
         item = local_buffer.sample(num_items_to_replay)
         if item is None:
             yield _NextValueNotReady()
         else:
             yield item
Example #4
0
File: nfsp.py Project: indylab/nxdo
 def gen_replay(_):
     while True:
         item = ray.get(replay_buffer_actor.replay.remote())
         if item is None:
             yield _NextValueNotReady()
         else:
             yield item
Example #5
0
 def base_iterator(timeout=None):
     while check():
         try:
             item = input_queue.get_nowait()
             yield item
         except queue.Empty:
             yield _NextValueNotReady()
     raise RuntimeError("Error raised reading from queue")
Example #6
0
 def base_iterator(timeout=None):
     while check():
         try:
             item = input_queue.get(timeout=0.001)
             yield item
         except queue.Empty:
             yield _NextValueNotReady()
     raise RuntimeError("Dequeue `check()` returned False! "
                        "Exiting with Exception from Dequeue iterator.")
Example #7
0
 def base_iterator(timeout=None):
     while check():
         try:
             item = input_queue.get_nowait()
             yield item
         except queue.Empty:
             yield _NextValueNotReady()
     # Sleep to give time for any error to be printed
     time.sleep(3)
     raise RuntimeError("Error raised reading from queue")
Example #8
0
    def step(self) -> Optional[_NextValueNotReady]:
        with self.queue_timer:
            try:
                batch, _ = self.minibatch_buffer.get()
            except queue.Empty:
                return _NextValueNotReady()

        with self.grad_timer:
            fetches = self.local_worker.learn_on_batch(batch)
            self.weights_updated = True
            self.stats = get_learner_stats(fetches)

        self.num_steps += 1
        self.outqueue.put((batch.count, self.stats))
        self.learner_queue_size.push(self.inqueue.qsize())
Example #9
0
    def gen_replay(timeout):
        while True:
            samples = {}
            idxes = None
            for policy_id, replay_buffer in replay_buffers.buffers.items():
                policy_multiplier = 2 if policy_id == 'policy_team' else 1
                if len(replay_buffer) >= min_size_to_learn*policy_multiplier and \
                     replay_buffers.steps[policy_id] >= learn_every:

                    idxes = replay_buffer.sample_idxes(train_batch_size)
                    replay_buffers.steps[policy_id] = 0

                    if prioritized:
                        (obses_t, actions, rewards, obses_tp1, dones, w, ind) \
                            = replay_buffer.sample_with_idxes(idxes, beta)

                    else:
                        (obses_t, actions, rewards, obses_tp1,
                         dones) = replay_buffer.sample_with_idxes(idxes)
                    weights = np.ones_like(rewards)
                    batch_indexes = -np.ones_like(rewards)
                    samples[policy_id] = SampleBatch({
                        "obs":
                        obses_t,
                        "actions":
                        actions,
                        "rewards":
                        rewards,
                        "new_obs":
                        obses_tp1,
                        "dones":
                        dones,
                        "weights":
                        weights,
                        "batch_indexes":
                        batch_indexes
                    })

            if samples == {}:
                yield _NextValueNotReady()
            else:
                yield MultiAgentBatch(samples, train_batch_size)
Example #10
0
    def gen_replay(timeout):
        while True:
            samples = {}
            idxes = None

            for policy_id, reservoir_buffer in reservoir_buffers.buffers.items(
            ):
                if len(reservoir_buffer) >= min_size_to_learn and \
                        reservoir_buffers.steps[policy_id] >= learn_every:
                    # idxes = reservoir_buffer.sample_idxes(train_batch_size)
                    (obses_t,
                     actions) = reservoir_buffer.sample(train_batch_size)
                    samples[policy_id] = SampleBatch({
                        "obs": obses_t,
                        "actions": actions,
                    })

                    reservoir_buffers.steps[policy_id] = 0

            if samples == {}:
                yield _NextValueNotReady()
            else:
                yield MultiAgentBatch(samples, train_batch_size)
Example #11
0
    def gen_replay(timeout):
        while True:
            samples = {}
            for policy_id, replay_buffer in replay_buffers.buffers.items():
                if len(replay_buffer.replay_batches) >= min_size_to_learn and \
                     replay_buffers.steps[policy_id] >= learn_every:

                    batch = None
                    for x in range(train_batch_size):
                        if batch is None:
                            batch = replay_buffer.replay(
                            ).decompress_if_needed()
                        else:
                            batch = batch.concat(
                                replay_buffer.replay().decompress_if_needed())

                    replay_buffers.steps[policy_id] = 0
                    samples[policy_id] = batch

            if samples == {}:
                yield _NextValueNotReady()
            else:
                yield MultiAgentBatch(samples, train_batch_size)
Example #12
0
 def __call__(self, x):
     try:
         self.queue.put_nowait(x)
     except queue.Full:
         return _NextValueNotReady()
Example #13
0
 def __call__(self, x: Any) -> Any:
     try:
         self.queue.put(x, timeout=0.001)
     except queue.Full:
         return _NextValueNotReady()
     return x