def decompress_if_needed(self, columns=frozenset(["obs", "new_obs"])): for key in columns: if key in self.data: arr = self.data[key] if is_compressed(arr): self.data[key] = unpack(arr) elif len(arr) > 0 and is_compressed(arr[0]): self.data[key] = np.array( [unpack(o) for o in self.data[key]])
def _decompress_in_place(path, value): if path[0] not in columns: return curr = self for p in path[:-1]: curr = curr[p] # Bulk compressed. if is_compressed(value): curr[path[-1]] = unpack(value) # Non bulk compressed. elif len(value) > 0 and is_compressed(value[0]): curr[path[-1]] = np.array([unpack(o) for o in value])
def _encode_sample(self, idxes): obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done = data obses_t.append(np.array(unpack(obs_t), copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(unpack(obs_tp1), copy=False)) dones.append(done) self._hit_count[i] += 1 return (np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones))
def _encode_sample(self, idxes): obses_t, actions, rewards, obses_tp1, dones = [], [], [], [], [] for i in idxes: data = self._storage[i] obs_t, action, reward, obs_tp1, done = data obses_t.append(np.array(unpack(obs_t), copy=False)) actions.append(np.array(action, copy=False)) rewards.append(reward) obses_tp1.append(np.array(unpack(obs_tp1), copy=False)) dones.append(done) self._hit_count[i] += 1 return (np.array(obses_t), np.array(actions), np.array(rewards), np.array(obses_tp1), np.array(dones))
def worker_train(ps, replay_buffer, opt, learner_index): agent = Learner(opt, job="learner") keys = agent.get_weights()[0] weights = ray.get(ps.pull.remote(keys)) agent.set_weights(keys, weights) cache = Cache(replay_buffer) cache.start() cnt = 1 while True: # time1 = time.time() batch = cache.q1.get() # time2 = time.time() # print('cache get time:', time2-time1) if opt.model == "cnn": batch['obs'] = np.array([[unpack(o) for o in lno] for lno in batch['obs']]) agent.train(batch, cnt) # time3 = time.time() # print('agent train time:', time3 - time2) # TODO cnt % 300 == 0 before if cnt % 100 == 0: cache.q2.put(agent.get_weights()) cnt += 1
def decompress_if_needed( self, columns: Set[str] = frozenset(["obs", "new_obs"])) -> "SampleBatch": """Decompresses data buffers (per column if not compressed) in place. Args: columns (Set[str]): The columns to decompress. Default: Only decompress the obs and new_obs columns. Returns: SampleBatch: This very SampleBatch. """ for key in columns: if key in self.keys(): arr = self[key] if is_compressed(arr): self[key] = unpack(arr) elif len(arr) > 0 and is_compressed(arr[0]): self[key] = np.array([unpack(o) for o in self[key]]) return self