def step(self) -> Optional[_NextValueNotReady]: with self.queue_timer: try: batch, _ = self.minibatch_buffer.get() except queue.Empty: return _NextValueNotReady() with self.grad_timer: # Use LearnerInfoBuilder as a unified way to build the final # results dict from `learn_on_loaded_batch` call(s). # This makes sure results dicts always have the same structure # no matter the setup (multi-GPU, multi-agent, minibatch SGD, # tf vs torch). learner_info_builder = LearnerInfoBuilder(num_devices=1) multi_agent_results = self.local_worker.learn_on_batch(batch) for pid, results in multi_agent_results.items(): learner_info_builder.add_learn_on_batch_results(results, pid) self.learner_info = learner_info_builder.finalize() learner_stats = { pid: info[LEARNER_STATS_KEY] for pid, info in self.learner_info.items() } self.weights_updated = True self.num_steps += 1 self.outqueue.put((batch.count, learner_stats)) self.learner_queue_size.push(self.inqueue.qsize())
def gen_replay(_): while True: item = local_buffer.replay() if item is None: yield _NextValueNotReady() else: yield item
def gen_replay(_): while True: item = local_buffer.sample(num_items_to_replay) if item is None: yield _NextValueNotReady() else: yield item
def gen_replay(_): while True: item = ray.get(replay_buffer_actor.replay.remote()) if item is None: yield _NextValueNotReady() else: yield item
def base_iterator(timeout=None): while check(): try: item = input_queue.get_nowait() yield item except queue.Empty: yield _NextValueNotReady() raise RuntimeError("Error raised reading from queue")
def base_iterator(timeout=None): while check(): try: item = input_queue.get(timeout=0.001) yield item except queue.Empty: yield _NextValueNotReady() raise RuntimeError("Dequeue `check()` returned False! " "Exiting with Exception from Dequeue iterator.")
def base_iterator(timeout=None): while check(): try: item = input_queue.get_nowait() yield item except queue.Empty: yield _NextValueNotReady() # Sleep to give time for any error to be printed time.sleep(3) raise RuntimeError("Error raised reading from queue")
def step(self) -> Optional[_NextValueNotReady]: with self.queue_timer: try: batch, _ = self.minibatch_buffer.get() except queue.Empty: return _NextValueNotReady() with self.grad_timer: fetches = self.local_worker.learn_on_batch(batch) self.weights_updated = True self.stats = get_learner_stats(fetches) self.num_steps += 1 self.outqueue.put((batch.count, self.stats)) self.learner_queue_size.push(self.inqueue.qsize())
def gen_replay(timeout): while True: samples = {} idxes = None for policy_id, replay_buffer in replay_buffers.buffers.items(): policy_multiplier = 2 if policy_id == 'policy_team' else 1 if len(replay_buffer) >= min_size_to_learn*policy_multiplier and \ replay_buffers.steps[policy_id] >= learn_every: idxes = replay_buffer.sample_idxes(train_batch_size) replay_buffers.steps[policy_id] = 0 if prioritized: (obses_t, actions, rewards, obses_tp1, dones, w, ind) \ = replay_buffer.sample_with_idxes(idxes, beta) else: (obses_t, actions, rewards, obses_tp1, dones) = replay_buffer.sample_with_idxes(idxes) weights = np.ones_like(rewards) batch_indexes = -np.ones_like(rewards) samples[policy_id] = SampleBatch({ "obs": obses_t, "actions": actions, "rewards": rewards, "new_obs": obses_tp1, "dones": dones, "weights": weights, "batch_indexes": batch_indexes }) if samples == {}: yield _NextValueNotReady() else: yield MultiAgentBatch(samples, train_batch_size)
def gen_replay(timeout): while True: samples = {} idxes = None for policy_id, reservoir_buffer in reservoir_buffers.buffers.items( ): if len(reservoir_buffer) >= min_size_to_learn and \ reservoir_buffers.steps[policy_id] >= learn_every: # idxes = reservoir_buffer.sample_idxes(train_batch_size) (obses_t, actions) = reservoir_buffer.sample(train_batch_size) samples[policy_id] = SampleBatch({ "obs": obses_t, "actions": actions, }) reservoir_buffers.steps[policy_id] = 0 if samples == {}: yield _NextValueNotReady() else: yield MultiAgentBatch(samples, train_batch_size)
def gen_replay(timeout): while True: samples = {} for policy_id, replay_buffer in replay_buffers.buffers.items(): if len(replay_buffer.replay_batches) >= min_size_to_learn and \ replay_buffers.steps[policy_id] >= learn_every: batch = None for x in range(train_batch_size): if batch is None: batch = replay_buffer.replay( ).decompress_if_needed() else: batch = batch.concat( replay_buffer.replay().decompress_if_needed()) replay_buffers.steps[policy_id] = 0 samples[policy_id] = batch if samples == {}: yield _NextValueNotReady() else: yield MultiAgentBatch(samples, train_batch_size)
def __call__(self, x): try: self.queue.put_nowait(x) except queue.Full: return _NextValueNotReady()
def __call__(self, x: Any) -> Any: try: self.queue.put(x, timeout=0.001) except queue.Full: return _NextValueNotReady() return x