def describe(self, app_id: str) -> Optional[DescribeAppResponse]: if app_id not in self._apps: return None local_app = self._apps[app_id] structured_error_msg = local_app.get_structured_error_msg() # check if the app is known to have finished if is_terminal(local_app.state): state = local_app.state else: running = False failed = False for replicas in local_app.role_replicas.values(): for r in replicas: running |= r.is_alive() failed |= r.failed() if running: state = AppState.RUNNING elif failed: state = AppState.FAILED else: state = AppState.SUCCEEDED local_app.set_state(state) if is_terminal(local_app.state): local_app.close() resp = DescribeAppResponse() resp.app_id = app_id resp.structured_error_msg = structured_error_msg resp.state = state resp.num_restarts = 0 return resp
def _evict_lru(self) -> bool: """ Evicts one least recently used element from the apps cache. LRU is defined as the oldest app in a terminal state (e.g. oldest finished app). Returns: ``True`` if an entry was evicted, ``False`` if no entries could be evicted (e.g. all apps are running) """ lru_time = sys.maxsize lru_app_id = None for (app_id, app) in self._apps.items(): if is_terminal(app.state): if app.last_updated <= lru_time: lru_app_id = app_id if lru_app_id: # evict LRU finished app from the apps cache del self._apps[lru_app_id] log.debug( f"evicting app: {lru_app_id}, from local scheduler cache") return True else: log.debug( f"no apps evicted, all {len(self._apps)} apps are running") return False
def wait(self, app_id: str, timeout=-1) -> Optional[DescribeAppResponse]: """ Waits for the app to finish or raise TimeoutError upon timeout (in seconds). If no timeout is specified waits indefinitely. Returns: The last return value from ``describe()`` """ if timeout > 0: expiry = time.time() interval = timeout / 10 else: expiry = sys.maxsize interval = 1 while expiry > time.time(): desc = self.describe(app_id) if desc is None: return None elif is_terminal(desc.state): return desc time.sleep(interval) raise TimeoutError(f"timed out waiting for app: {app_id} to finish")
def describe(self, app_id: str) -> Optional[DescribeAppResponse]: if app_id not in self._apps: return None local_app = self._apps[app_id] # check if the app has been known to have finished if is_terminal(local_app.state): state = local_app.state else: running = False failed = False for (_, procs) in local_app.role_procs.items(): for p in procs: running |= self._is_alive(p) failed |= self._failed(p) if running: state = AppState.RUNNING elif failed: state = AppState.FAILED self._terminate(local_app) # terminate danglers else: state = AppState.SUCCEEDED local_app.set_state(state) resp = DescribeAppResponse() resp.app_id = app_id resp.state = state resp.num_restarts = 0 return resp
def _evict_lru(self) -> bool: """ Evicts one least recently used element from the apps cache. LRU is defined as the oldest app in a terminal state (e.g. oldest finished app). Returns: ``True`` if an entry was evicted, ``False`` if no entries could be evicted (e.g. all apps are running) """ lru_time = sys.maxsize lru_app_id = None for (app_id, app) in self._apps.items(): if is_terminal(app.state): if app.last_updated <= lru_time: lru_app_id = app_id if lru_app_id: # evict LRU finished app from the apps cache # do not remove the app name from the ids map so that the ids # remain unique throughout the lifespan of this scheduler # for example if cache size == 1 # app_id1 = submit(app) # app_id2 = submit(app) # app_id1 was evicted here # app_id1 == "app.name_0" # app_id2 == "app.name_1" del self._apps[lru_app_id] log.debug( f"evicting app: {lru_app_id}, from local scheduler cache") return True else: log.debug( f"no apps evicted, all {len(self._apps)} apps are running") return False
def wait( self, app_id: str, scheduler: Optional[LocalScheduler] = None, timeout: float = 30, ) -> Optional[DescribeAppResponse]: """ Waits for the app to finish or raise TimeoutError upon timeout (in seconds). If no timeout is specified waits indefinitely. Returns: The last return value from ``describe()`` """ scheduler_ = scheduler or self.scheduler interval = timeout / 100 expiry = time.time() + timeout while expiry > time.time(): desc = scheduler_.describe(app_id) if desc is None: return None elif is_terminal(desc.state): return desc time.sleep(interval) raise TimeoutError(f"timed out waiting for app: {app_id}")
def _check_finished(self): # either the app (already finished) was evicted from the LRU cache # -- or -- the app reached a terminal state (and still in the cache) desc = self._scheduler.describe(self._app_id) if not desc or is_terminal(desc.state): self._app_finished = True else: self._app_finished = False