def _on_rpc_update_graph(self, task_id, peer_addr, state, is_final): pck = self._by_task_id.get(task_id) logging.debug('_on_rpc_update_graph: task_id=%s, pck_id=%s; status=%s; is_final=%s' % ( task_id, pck and pck.id, GraphState.str(state['state']), is_final )) if not pck: raise WrongTaskIdError('No packet for sbx:%s' % task_id) with pck._lock: # FIXME # pck: connect # pck: write # rem: enter _on_rpc_update_graph # pck: CRASHED # pck: Sandbox' task FAILURE # rem: _on_task_status_change (enter + exit) # rem: _on_rpc_update_graph with self.lock <-- OOPS assert pck._state not in [ RemotePacketState.CREATING, RemotePacketState.TASK_FIN_WAIT, RemotePacketState.FINISHED, RemotePacketState.FETCHING_RESOURCE_LIST, RemotePacketState.FETCHING_FINAL_UPDATE ], "_on_rpc_update_graph in %s state" % pck._state if pck._state in [RemotePacketState.STARTING, RemotePacketState.CHECKING_START_ERROR, RemotePacketState.STARTED]: if pck._state != RemotePacketState.STARTED: pck._set_state(RemotePacketState.STARTED, '_on_rpc_update_graph') pck._drop_sched_if_need() assert not pck._peer_addr #else: #assert not pck._sched # stop may be scheduled for example if not pck._peer_addr: pck._peer_addr = peer_addr logging.debug('SET pck._peer_addr = %s for %s' % (peer_addr, pck)) if pck._target_stop_mode: if is_final: pck._sent_stop_mode = pck._target_stop_mode # FIXME else: self._start_packet_stop(pck) if pck._target_stop_mode != StopMode.CANCEL: pck._update_graph(state, is_final) if is_final: if pck._target_stop_mode == StopMode.CANCEL \ or state['state'] == GraphState.SUCCESSFULL: self._mark_task_fin_wait(pck, '_on_rpc_update_graph(SUCCESSFULL)') else: pass # XXX WAITING for TaskStateGroups.TERMINATED
def _update_state(self): new = self._calc_state() if new == self.state: return self.state = new logging.debug('SandboxJobGraphExecutorProxy.state => %s' % GraphState.str(self.state)) self._ops.on_state_change()
def _do_on_packet_terminated(self): def on_stop(): self._prev_task_id = self._remote_packet._sandbox_task_id self._remote_packet = None self._remote_state = None self._remote_time_wait_deadline = None assert not self.time_wait_deadline and not self.time_wait_sched r = self._remote_packet self.stopping = False if self.cancelled: self.cancelled = False on_stop() return # TODO Handle Exception packet state # FIXME Rollback history/Status to prev state logging.debug('state for SandboxJobGraphExecutorProxy == %s' \ % None if r._final_state is None else GraphState.str(r._final_state)) if r._error: self._error = r._error if r._is_error_permanent: self._tries = 0 elif self._tries: retry_idx = self.MAX_TRY_COUNT - self._tries - 1 delay = self.RETRY_INTERVALS[retry_idx] \ if retry_idx < len(self.RETRY_INTERVALS) \ else self.RETRY_INTERVALS[-1] logging.debug("%s failed. Will retry after %s seconds" % (r, delay)) self.time_wait_deadline = time.time() + delay self._schedule_time_wait_stop() elif r._final_state == GraphState.TIME_WAIT: self.time_wait_deadline = r._last_update['nearest_retry_deadline'] self._schedule_time_wait_stop() elif r._final_state == GraphState.SUCCESSFULL: self.result = True elif r._final_state == GraphState.ERROR: self.result = False self._prev_snapshot_resource_id = r._result_snapshot_resource_id \ if r._final_state != GraphState.SUCCESSFULL else None on_stop()
def _update_graph(self, update, is_final): assert self._target_stop_mode != StopMode.CANCEL self._last_update = update # TODO succeed_jobs = set(map(int, update['succeed_jobs'])) new_succeed_jobs = succeed_jobs - self._succeeded_jobs self._succeeded_jobs = succeed_jobs if is_final: self._final_state = update['state'] logging.debug('%s._final_state = %s' % (self, GraphState.str(self._final_state))) self._ops._on_sandbox_packet_update(update, new_succeed_jobs, is_final)