Ejemplo n.º 1
0
class sIMapIterator(object):

    def __init__(self, cache, maxsize):
        self._cond = threading.Condition(threading.Lock())

        self._empty_sema = threading.Semaphore(maxsize)
        # self._full_sema = threading.Semaphore(0)

        self._job = job_counter.next()
        self._cache = cache
        # self._items = collections.deque()
        self._items = Queue(maxsize)
        # print self._items.maxsize

        self._index = 0
        # self._put_index = 0
        # self._get_index = 0
        self._length = None
        #
        # self._get_lock = threading.Lock()
        # self._put_lock = threading.Lock()

        self._unsorted = {}
        cache[self._job] = self

    def __iter__(self):
        return self

    def next(self, timeout=None):
        # with self._get_lock:
        #     if self._get_index == self._length:
        #         raise StopIteration
        #     item = self._items.get(timeout=timeout)
        #     self._get_index += 1
        #
        #     success, value = item
        #     if success:
        #         return value
        #     raise value

        self._cond.acquire()
        try:
            try:
                item = self._items.get_nowait()
                self._empty_sema.release()
            except Empty:
                if self._index == self._length:
                    raise StopIteration
                self._cond.wait(timeout)
                try:
                    item = self._items.get(timeout=timeout)
                    self._empty_sema.release()
                except Empty:
                    if self._index == self._length:
                        raise StopIteration
                    raise TimeoutError
        finally:
            self._cond.release()

        success, value = item
        if success:
            return value
        raise value

    __next__ = next                    # XXX

    def _set(self, i, obj):
        # with self._put_lock:
        #     if self._put_index != i:
        #         self._unsorted[i] = obj
        #     else:
        #         self._items.put(obj)
        #         self._put_index += 1
        #         while self._put_index in self._unsorted:
        #             obj = self._unsorted.pop(self._put_index)
        #             self._items.put(obj)
        #             self._put_index += 1
        #
        #     if self._put_index == self._length:
        #         del self._cache[self._job]

        self._empty_sema.acquire()
        self._cond.acquire()
        try:
            if self._index == i:
                self._items.put_nowait(obj)
                self._index += 1
                while self._index in self._unsorted:
                    obj = self._unsorted.pop(self._index)
                    self._items.put_nowait(obj)
                    self._index += 1
                self._cond.notify()
            else:
                self._unsorted[i] = obj

            if self._index == self._length:
                del self._cache[self._job]
        finally:
            self._cond.release()

    def _set_length(self, length):
        #
        # with self._put_lock as pl, self._get_lock as gl:
        #     self._length = length
        #     if self._put_index == self._length:
        #         del self._cache[self._job]

        self._cond.acquire()
        try:
            self._length = length
            if self._index == self._length:
                self._cond.notify()
                del self._cache[self._job]
        finally:
            self._cond.release()
Ejemplo n.º 2
0
class GAETrajectoryBuffer(TrajectoryBuffer):
    def __init__(self, capacity, horizon, gamma, gae_lambda, observation_shape,
                 action_space, v_fn):
        super().__init__(observation_shape=observation_shape,
                         action_space=action_space,
                         horizon=horizon)
        self._gamma = gamma
        self._capacity = capacity
        print("\tlambda for GAE(lambda): {}".format(gae_lambda))
        self._enrich_queue = Queue()
        self._traj_queue = Queue()
        self._v_fn = v_fn

        enricher_count = cpu_count()
        for _ in range(enricher_count):
            p = Process(target=enrich_trajectories,
                        args=(gamma, horizon, gae_lambda, self._enrich_queue,
                              self._traj_queue))
            p.start()
        for _ in range(enricher_count):
            self._traj_queue.get()

    def capacity(self):
        return self._capacity

    def _create_trajectory(self, env_idx):
        return GAETrajectory(self._horizon,
                             observation_shape=self._observation_shape,
                             action_type=self._action_space.dtype,
                             action_shape=self._action_space.shape,
                             env_idx=env_idx)

    def _enrich_traj(self, traj):
        if not traj.done() and not traj.closed:
            return traj
        traj.update_vs(self._v_fn)
        self._enrich_queue.put_nowait(traj)
        return traj

    def ready(self):
        return len(self) >= self._capacity

    def sample(self):
        self.close_trajectories()

        v_targets = []
        gaes = []  # Generalized Advantage Estimations

        to_process = len(self)

        trajectories = []
        while to_process != 0:
            traj = self._traj_queue.get()
            trajectories.append(traj)
            to_process -= len(traj)

        states = np.concatenate([traj.states for traj in trajectories], axis=0)
        actions = np.concatenate([traj.actions for traj in trajectories],
                                 axis=0)
        v_targets = np.concatenate([traj.v_targets for traj in trajectories],
                                   axis=0)
        gaes = np.concatenate([traj.gaes for traj in trajectories], axis=0)

        assert len(states) == len(actions)
        assert len(states) == len(v_targets)
        assert len(states) == len(gaes)

        return states, actions, v_targets, gaes