def _job_clean(self, jid: RPCKey) -> str: trc('0').error('%s', jid) wid = self.jobs_workers[jid] del self.jobs_workers[jid] self.workers_jobs[wid].remove(jid) reset(self.push_push_assign, 0) return wid
def _job_new(self, jid: RPCKey, jreq: RequestType): trc('0').debug('%s %s', jid, jreq) self.jobs[jid] = JobState(jreq, time_now()) self.jobs_pending.push(jid) reset(self.push_push_assign, 0)
def push_push_assign(self): def spread(iter_obj: Iterable[Tuple[str, int]]): for wid, capa in iter_obj: for i in range(capa): yield wid def eat(obj: Deque[RPCKey]): while len(obj): try: yield obj.pop() except IndexError: return w_caps = ((wid, max( 0, self.workers[wid].load.capacity - len(self.workers_jobs[wid]))) for wid, wst in self.workers.items()) jobs_workers = zip(spread(w_caps), eat(self.jobs_pending)) for wid, jid in jobs_workers: trc('1').debug('%s %s', wid, jid) self.jobs[jid].started = time_now() self.jobs[jid].attempts += 1 self.jobs_workers[jid] = wid self.workers_jobs[wid].append(jid) self.jobs_pending_assign.push(SchedKey.now(jid)) reset(self.push_assign, 0)
def _job_done(self, jid: RPCKey, jres: ResponseType): self.dones += 1 self._job_clean(jid) self.jobs[jid].finished = time_now() self.jobs[jid].res = jres self.jobs_pending_flush.push(SchedKey.now(jid)) reset(self.push_flush, 0)
def done_ack(self, brid: RPCKey, jid: RPCKey): if brid != self.brid: trc('brid').error('%s != %s %s', brid, self.brid, jid) reset(self.push_announce, 0) return if jid in self.jobs_res: self._evict(jid) if jid in self.jobs_pending_done: del self.jobs_pending_done[jid]
def resign(self, jid: RPCKey, reason: Optional[str] = None) -> bool: if jid not in self.jobs: trc('0').debug('%s %s', jid, reason) return False if jid not in self.jobs_workers: trc('1').debug('%s %s', jid, reason) return False self.jobs_pending_resign.push(SchedKey.now(jid)) reset(self.push_resign, 0) return True
def _job_resign(self, jid: RPCKey): self.resigns += 1 self._job_clean(jid) if jid in self.jobs_pending: del self.jobs_pending[jid] if jid in self.jobs_pending_assign: del self.jobs_pending_assign[jid] if jid in self.jobs_pending_resign: del self.jobs_pending_resign[jid] if jid in self.jobs_cancel: del self.jobs[jid] self.jobs_cancel.remove(jid) else: self.jobs_pending.push(jid) self.jobs[jid].started = None reset(self.push_push_assign, 0)
def resign(self, brid: RPCKey, jid: RPCKey, reason: Optional[str] = None): if brid != self.brid: trc('brid').error('%s != %s %s', brid, self.brid, jid) reset(self.push_announce, 0) return self._push_done(jid) if jid not in self.jobs: # [w-1] resignation notice may appear after worker had successfully finished the job # [w-1] in such a scenario, a broker must report resignation as a failure by checking it's finish log trc('unk').error('%s', jid) return if jid in self.jobs_res: trc('done').error('%s', jid) return self._evict(jid) self._fork() return
def _started(self): self.has_started = True reset(self.startup_timeout, None) reset(self.push_announce, 0) if self.url_metrics: reset(self.push_metrics, 0)
def bk_done(self, res: ResponseType): wid = sender() if wid not in self.workers_jobs: trc('1').error('%s', wid) return jid = self.workers_jobs[wid] if jid not in self.jobs_workers: trc('1').error('%s', jid) return self.jobs_res[jid] = res self.jobs_pending_done.push(SchedKey(time_now(), jid)) reset(self.push_done, 0) wid = self.jobs_workers[jid] del self.jobs_workers[jid] del self.workers_jobs[wid] self.load.occupied -= 1 self.workers_free.push(wid)
def assign(self, brid: RPCKey, jid: RPCKey, jreq: RequestType): # todo: currently sends to an address will cause an exception if we're lucky enough to # todo: send a task right after the worker had died (and before transport realised that) sb = service(Broker[self.cls_req, self.cls_res], self.url_broker) if brid != self.brid: trc('brid').error('%s != %s %s', brid, self.brid, jid) reset(self.push_announce, 0) sb.bk_assign(brid, jid, False) return if jid in self.jobs: # is there any scenario where a job may be assigned to something else ? trc('kno').error('%s', jid) sb.bk_assign(brid, jid, True) return if len(self.workers_free) == 0 or len( self.jobs_res) >= self.conf.pending_max: sb.bk_assign(brid, jid, False) return nwid = self.workers_free.pop() self.jobs[jid] = jreq self.jobs_workers[jid] = nwid self.workers_jobs[nwid] = jid s = service(WorkerInst[self.cls_req, self.cls_res], nwid, group=BACKEND) s.put(jreq) self.load.occupied += 1 sb.bk_assign(brid, jid, True)
def stop(self) -> bool: reset(self.stopper, 0) return True
def _worker_conf_changed(self): if self.url_metrics: reset(self.push_metrics, 0) reset(self.push_push_assign, 0)
def startup(self): if self.url_metrics: reset(self.push_metrics, 0) return None
def bk_started(self): self.thread_addr = sender() reset(self.check_started, None) reset(self.announce, 0)
def _fork(self): sel_wfid = random.choice(list(self.workers_fork_addrs.keys())) self.workers_fork.append(sel_wfid) reset(self.push_fork, 0)
def stop(self) -> bool: reset('stopper', 0) return True