def process_next_round(self, process, newstate=ProcessState.DIED_REQUESTED, enqueue=True): """Tentatively advance a process to the next round """ assert newstate and newstate < ProcessState.TERMINATING cur_round = process.round updated = False while (process.state < ProcessState.TERMINATING and cur_round == process.round): process.state = newstate process.assigned = None process.round = cur_round + 1 try: self.store.update_process(process) updated = True log.info(get_process_state_message(process)) except WriteConflictError: process = self.store.get_process(process.owner, process.upid) if updated: if enqueue: self.store.enqueue_process(process.owner, process.upid, process.round) self.notifier.notify_process(process) return process, updated
def _maybe_update_assigned_process(self, process, resource): updated = False while process.state < ProcessState.ASSIGNED: process.assigned = resource.resource_id process.state = ProcessState.ASSIGNED process.increment_dispatches() # pull hostname directly onto process record, if available. # it is commonly desired information and this saves the need to # make multiple queries to get it. process.hostname = resource.properties.get('hostname') try: self.store.update_process(process) updated = True log.info(get_process_state_message(process)) except WriteConflictError: process = self.store.get_process(process.owner, process.upid) if updated: self.notifier.notify_process(process) return process, updated
def process_change_state(self, process, newstate, **updates): """ Tentatively update a process record Because some other worker may update the process record before this one, this method retries writes in the face of conflict, as long as the current record of the process start is less than the new state, and the round remains the same. @param process: process to update @param newstate: the new state. update will only happen if current state is less than the new state @param updates: keyword arguments of additional fields to update in process @return: """ cur_round = process.round updated = False while process.state < newstate and cur_round == process.round: if newstate == ProcessState.RUNNING: process.increment_starts() process.state = newstate process.update(updates) try: self.store.update_process(process) updated = True # log as error when processes fail if newstate == ProcessState.FAILED: log.error(get_process_state_message(process)) else: log.info(get_process_state_message(process)) except WriteConflictError: process = self.store.get_process(process.owner, process.upid) if updated: self.notifier.notify_process(process) return process, updated
def terminate_process(self, owner, upid): """ Kill a running process @param owner: owner of the process @param upid: ID of process @return: description of process termination status This is an RPC-style call that returns quickly, as soon as termination of the process has begun (TERMINATING state). Retry ===== If a call to this operation times out without a reply, it can safely be retried. Termination of processes should be an idempotent operation here and at the EEAgent. It is important that eeids not be repeated to faciliate this. """ validate_owner_upid(owner, upid) process = self.store.get_process(owner, upid) if process is None: raise NotFoundError("process %s does not exist" % upid) # if the process is already at rest -- UNSCHEDULED, or REJECTED # for example -- we do nothing and leave the process state as is. if process.state in ProcessState.TERMINAL_STATES: return process # unassigned processes can just be marked as terminated, but note # that we may be racing with the matchmaker. if process.assigned is None: # there could be a race where the process is assigned just # after we pulled the record. In this case our write will # fail. we keep trying until we either see an assignment # or we mark the process as terminated. updated = False while process.assigned is None and not updated: process.state = ProcessState.TERMINATED try: self.store.update_process(process) updated = True except WriteConflictError: process = self.store.get_process(process.owner, process.upid) if updated: log.info(get_process_state_message(process)) self.notifier.notify_process(process) # also try to remove process from queue try: self.store.remove_queued_process(process.owner, process.upid, process.round) except NotFoundError: pass # EARLY RETURN: the process was never assigned to a resource return process # same as above: we want to mark the process as terminating but # other players may also be updating this record. we keep trying # in the face of conflict until the process is >= TERMINATING -- # but note that it may be another worker that actually makes the # write. self.process_change_state(process, ProcessState.TERMINATING) self.eeagent_client.terminate_process(process.assigned, upid, process.round) return process