def _on_exit(self): timeout_pks = list( self._manager.filter(state="RUNNING").values_list("pk", flat=True)) logger.info(f"Timing out {len(timeout_pks)} running jobs.") BalsamJob.batch_update_state(timeout_pks, "RUN_TIMEOUT", release=True) self._manager.release_all_owned() logger.info(f"BalsamJobSource thread finished.")
def _handle_dones(self, done_pks): for pk in done_pks: rank = self.running_locations[pk] self.revert_assign(rank, pk) BalsamJob.batch_update_state(done_pks, 'RUN_DONE') self.job_source.release(done_pks) logger.info(f"RUN_DONE: {len(done_pks)} jobs")
def allocate_next_jobs(self): '''Generator: yield (job,rank) pairs and mark the nodes/ranks as busy''' self.refresh_job_cache() send_requests = [] pre_assignments = defaultdict(list) min_packing_count = 1 for job in self.job_cache: if job.node_packing_count < min_packing_count: continue job_occ = 1.0 / job.node_packing_count free_ranks = (i for i in range(1, comm.size) if self.node_occupancy[i] + job_occ < 1.0001) rank = next(free_ranks, None) if rank is None: logger.debug(f'no free ranks to assign {job.cute_id}') min_packing_count = job.node_packing_count + 1 else: pre_assignments[rank].append(job) self.pre_assign(rank, job) if len(pre_assignments) == 0: return False to_acquire = [ job.pk for rank in pre_assignments for job in pre_assignments[rank] ] acquired_pks = self.job_source.acquire(to_acquire) logger.info( f'Acquired lock on {len(acquired_pks)} out of {len(to_acquire)} jobs marked for running' ) # Make actual assignment: for (rank, pre_jobs) in pre_assignments.items(): runjobs = [] for j in pre_jobs: if j.pk in acquired_pks: runjobs.append(j) self.job_cache.remove(j) else: self.revert_assign(rank, j.pk) if runjobs: mpiReq = self._send_jobs(runjobs, rank) logger.info( f"Sent {len(runjobs)} jobs to rank {rank}: occupancy is now {self.node_occupancy[rank]}" ) send_requests.append(mpiReq) BalsamJob.batch_update_state(acquired_pks, 'RUNNING', self.RUN_MESSAGE) logger.debug("allocate_next_jobs: waiting on all isends...") MPI.Request.waitall(send_requests) logger.debug("allocate_next_jobs: all isends completed.") return len(acquired_pks) > 0
def update_states_from_cache(job_cache): # Update states of fast-forwarded jobs update_jobs = defaultdict(list) failed_jobs = [] for job in job_cache: if job.state != job.__old_state: job.__old_state = job.state if job.state != 'FAILED': update_jobs[job.state].append(job.pk) else: failed_jobs.append(job) if failed_jobs: fail_update(failed_jobs) for newstate, joblist in update_jobs.items(): BalsamJob.batch_update_state(joblist, newstate)
def exit(self): outstanding_job_pks = list(self.manager.running_locations.keys()) num_timeout = len(outstanding_job_pks) logger.info( f"Shutting down with {num_timeout} jobs still running..timing out") BalsamJob.batch_update_state(outstanding_job_pks, 'RUN_TIMEOUT', 'timed out in MPI Ensemble') self.manager.job_source.release_all_owned() self.manager.send_exit() logger.debug("Send_exit: master done") logger.info(f"master calling MPI Finalize") MPI.Finalize() logger.info(f"ensemble master exit gracefully") sys.exit(0)
def perform_updates(self, update_msgs): start_pks = [] done_pks = [] error_msgs = [] for msg in update_msgs: if msg == 'exit': continue start_pks.extend(uuid.UUID(pk) for pk in msg['started']) # pk list done_pks.extend(uuid.UUID(pk) for pk in msg['done']) # pk list error_msgs.extend(msg['error']) # list: (pk, retcode, tail) if start_pks: BalsamJob.batch_update_state(start_pks, 'RUNNING') logger.info(f"StatusUpdater marked {len(start_pks)} RUNNING") if done_pks: BalsamJob.batch_update_state(done_pks, 'RUN_DONE', release=True) logger.info(f"StatusUpdater marked {len(done_pks)} DONE") if error_msgs: self._handle_errors(error_msgs)