class sIMapIterator(object): def __init__(self, cache, maxsize): self._cond = threading.Condition(threading.Lock()) self._empty_sema = threading.Semaphore(maxsize) # self._full_sema = threading.Semaphore(0) self._job = job_counter.next() self._cache = cache # self._items = collections.deque() self._items = Queue(maxsize) # print self._items.maxsize self._index = 0 # self._put_index = 0 # self._get_index = 0 self._length = None # # self._get_lock = threading.Lock() # self._put_lock = threading.Lock() self._unsorted = {} cache[self._job] = self def __iter__(self): return self def next(self, timeout=None): # with self._get_lock: # if self._get_index == self._length: # raise StopIteration # item = self._items.get(timeout=timeout) # self._get_index += 1 # # success, value = item # if success: # return value # raise value self._cond.acquire() try: try: item = self._items.get_nowait() self._empty_sema.release() except Empty: if self._index == self._length: raise StopIteration self._cond.wait(timeout) try: item = self._items.get(timeout=timeout) self._empty_sema.release() except Empty: if self._index == self._length: raise StopIteration raise TimeoutError finally: self._cond.release() success, value = item if success: return value raise value __next__ = next # XXX def _set(self, i, obj): # with self._put_lock: # if self._put_index != i: # self._unsorted[i] = obj # else: # self._items.put(obj) # self._put_index += 1 # while self._put_index in self._unsorted: # obj = self._unsorted.pop(self._put_index) # self._items.put(obj) # self._put_index += 1 # # if self._put_index == self._length: # del self._cache[self._job] self._empty_sema.acquire() self._cond.acquire() try: if self._index == i: self._items.put_nowait(obj) self._index += 1 while self._index in self._unsorted: obj = self._unsorted.pop(self._index) self._items.put_nowait(obj) self._index += 1 self._cond.notify() else: self._unsorted[i] = obj if self._index == self._length: del self._cache[self._job] finally: self._cond.release() def _set_length(self, length): # # with self._put_lock as pl, self._get_lock as gl: # self._length = length # if self._put_index == self._length: # del self._cache[self._job] self._cond.acquire() try: self._length = length if self._index == self._length: self._cond.notify() del self._cache[self._job] finally: self._cond.release()
class GAETrajectoryBuffer(TrajectoryBuffer): def __init__(self, capacity, horizon, gamma, gae_lambda, observation_shape, action_space, v_fn): super().__init__(observation_shape=observation_shape, action_space=action_space, horizon=horizon) self._gamma = gamma self._capacity = capacity print("\tlambda for GAE(lambda): {}".format(gae_lambda)) self._enrich_queue = Queue() self._traj_queue = Queue() self._v_fn = v_fn enricher_count = cpu_count() for _ in range(enricher_count): p = Process(target=enrich_trajectories, args=(gamma, horizon, gae_lambda, self._enrich_queue, self._traj_queue)) p.start() for _ in range(enricher_count): self._traj_queue.get() def capacity(self): return self._capacity def _create_trajectory(self, env_idx): return GAETrajectory(self._horizon, observation_shape=self._observation_shape, action_type=self._action_space.dtype, action_shape=self._action_space.shape, env_idx=env_idx) def _enrich_traj(self, traj): if not traj.done() and not traj.closed: return traj traj.update_vs(self._v_fn) self._enrich_queue.put_nowait(traj) return traj def ready(self): return len(self) >= self._capacity def sample(self): self.close_trajectories() v_targets = [] gaes = [] # Generalized Advantage Estimations to_process = len(self) trajectories = [] while to_process != 0: traj = self._traj_queue.get() trajectories.append(traj) to_process -= len(traj) states = np.concatenate([traj.states for traj in trajectories], axis=0) actions = np.concatenate([traj.actions for traj in trajectories], axis=0) v_targets = np.concatenate([traj.v_targets for traj in trajectories], axis=0) gaes = np.concatenate([traj.gaes for traj in trajectories], axis=0) assert len(states) == len(actions) assert len(states) == len(v_targets) assert len(states) == len(gaes) return states, actions, v_targets, gaes