def test_slurm_max_array_size(): max_array_size = 2 executor = cluster_tools.get_executor("slurm", debug=True) original_max_array_size = executor.get_max_array_size() command = f"MaxArraySize={max_array_size}" _, _, exit_code = call( f"echo -e '{command}' >> /etc/slurm/slurm.conf && scontrol reconfigure" ) try: assert exit_code == 0 new_max_array_size = executor.get_max_array_size() assert new_max_array_size == max_array_size with executor: futures = executor.map_to_futures(square, range(6)) concurrent.futures.wait(futures) job_ids = [fut.cluster_jobid for fut in futures] # Count how often each job_id occurs which corresponds to the array size of the job occurences = list(Counter(job_ids).values()) assert all(array_size <= max_array_size for array_size in occurences) finally: _, _, exit_code = call( f"sed -i 's/{command}//g' /etc/slurm/slurm.conf && scontrol reconfigure" ) assert exit_code == 0 reset_max_array_size = executor.get_max_array_size() assert reset_max_array_size == original_max_array_size
def test_slurm_max_submit_user(): max_submit_jobs = 6 # MaxSubmitJobs can either be defined at the user or at the qos level for command in ["user root", "qos normal"]: executor = cluster_tools.get_executor("slurm", debug=True) original_max_submit_jobs = executor.get_max_submit_jobs() _, _, exit_code = call( f"echo y | sacctmgr modify {command} set MaxSubmitJobs={max_submit_jobs}" ) try: assert exit_code == 0 new_max_submit_jobs = executor.get_max_submit_jobs() assert new_max_submit_jobs == max_submit_jobs with executor: futures = executor.map_to_futures(square, range(10)) result = [fut.result() for fut in futures] assert result == [i**2 for i in range(10)] job_ids = {fut.cluster_jobid for fut in futures} # The 10 work packages should have been scheduled as 5 separate jobs, # because the cluster_tools schedule at most 1/3 of MaxSubmitJobs at once. assert len(job_ids) == 5 finally: _, _, exit_code = call( f"echo y | sacctmgr modify {command} set MaxSubmitJobs=-1") assert exit_code == 0 reset_max_submit_jobs = executor.get_max_submit_jobs() assert reset_max_submit_jobs == original_max_submit_jobs
def test_slurm_deferred_submit(): max_submit_jobs = 1 # Only one job can be scheduled at a time _, _, exit_code = call( f"echo y | sacctmgr modify qos normal set MaxSubmitJobs={max_submit_jobs}" ) executor = cluster_tools.get_executor("slurm", debug=True) try: with executor: time_of_start = time.time() futures = executor.map_to_futures(sleep, [0.5, 0.5]) time_of_futures = time.time() concurrent.futures.wait(futures) time_of_result = time.time() # The futures should be returned before each job was scheduled assert time_of_futures - time_of_start < 0.5 # Computing the results should have taken at least two seconds # since only one job is scheduled at a time and each job takes 0.5 seconds assert time_of_result - time_of_start > 1 finally: _, _, exit_code = call( "echo y | sacctmgr modify qos normal set MaxSubmitJobs=-1")
def get_max_submit_jobs(): max_submit_jobs_env = os.environ.get("SLURM_MAX_SUBMIT_JOBS", None) if max_submit_jobs_env is not None: logging.debug( f"SLURM_MAX_SUBMIT_JOBS env variable specified which is {max_submit_jobs_env}." ) return int(max_submit_jobs_env) max_submit_jobs = 2**32 # Check whether there is a limit per user stdout_user, stderr_user, _ = call( "sacctmgr list -n user $USER withassoc format=maxsubmitjobsperuser" ) try: max_submit_jobs = int(stdout_user.decode("utf8")) except ValueError: # If there is no limit per user check whether there is a general limit stdout_qos, stderr_qos, _ = call( "sacctmgr list -n qos normal format=maxsubmitjobsperuser") try: max_submit_jobs = int(stdout_qos.decode("utf8")) except ValueError: logging.warning( f"Slurm's MaxSubmitJobsPerUser couldn't be determined. Reason: {stderr_user}\n{stderr_qos}" ) return max_submit_jobs logging.debug(f"Slurm MaxSubmitJobsPerUser is {max_submit_jobs}.") return max_submit_jobs
def check_job_state( self, job_id_with_index) -> Literal["failed", "ignore", "completed"]: job_states = [] # If the output file was not found, we determine the job status so that # we can recognize jobs which failed hard (in this case, they don't produce output files) stdout, _, exit_code = call( "scontrol show job {}".format(job_id_with_index)) stdout = stdout.decode("utf8") if exit_code == 0: job_state_search = re.search("JobState=([a-zA-Z_]*)", str(stdout)) if job_state_search: job_states = [job_state_search.group(1)] else: logging.error("Could not extract slurm job state? {}".format( stdout[0:10])) else: stdout, _, exit_code = call( "sacct -j {} -o State -P".format(job_id_with_index)) stdout = stdout.decode("utf8") if exit_code == 0: job_states = stdout.split("\n")[1:] if len(job_states) == 0: logging.error( "Couldn't call scontrol nor sacct to determine job's status. Continuing to poll for output file. This could be an indicator for a failed job which was already cleaned up from the slurm db. If this is the case, the process will hang forever." ) return "ignore" def matches_states(slurm_states): return len(list(set(job_states) & set(slurm_states))) > 0 if matches_states(SLURM_STATES["Failure"]): return "failed" elif matches_states(SLURM_STATES["Ignore"]): return "ignore" elif matches_states(SLURM_STATES["Unclear"]): logging.warning( "The job state for {} is {}. It's unclear whether the job will recover. Will wait further" .format(job_id_with_index, job_states)) return "ignore" elif matches_states(SLURM_STATES["Success"]): return "completed" else: logging.error("Unhandled slurm job state for job id {}? {}".format( job_id_with_index, job_states)) return "ignore"
def test_slurm_deferred_submit_shutdown(): # Test that the SlurmExecutor stops scheduling jobs in a separate thread # once it was killed even if the executor was used multiple times and # therefore started multiple job submission threads max_submit_jobs = 1 # Only one job can be scheduled at a time _, _, exit_code = call( f"echo y | sacctmgr modify qos normal set MaxSubmitJobs={max_submit_jobs}" ) executor = cluster_tools.get_executor("slurm", debug=True) try: # Use the executor twice to start multiple job submission threads executor.map_to_futures(sleep, [0.5] * 10) executor.map_to_futures(sleep, [0.5] * 10) wait_until_first_job_was_submitted(executor) for submit_thread in executor.submit_threads: assert submit_thread.is_alive() with pytest.raises(SystemExit) as pytest_wrapped_e: executor.handle_kill(None, None) assert pytest_wrapped_e.type == SystemExit assert pytest_wrapped_e.value.code == 130 # Wait for the threads to die down, but less than it would take to submit all jobs # which would take ~5 seconds since only one job is scheduled at a time for submit_thread in executor.submit_threads: submit_thread.join(1) assert not submit_thread.is_alive() # Wait for scheduled jobs to finish, so that the queue is empty again while executor.get_number_of_submitted_jobs() > 0: time.sleep(0.5) finally: _, _, exit_code = call( "echo y | sacctmgr modify qos normal set MaxSubmitJobs=-1" )
def get_max_submit_jobs(): max_submit_jobs = 2**32 # Check whether there is a limit per user stdout_user, stderr_user, _ = call( "sacctmgr list -n user $USER withassoc format=maxsubmitjobsperuser" ) try: max_submit_jobs = int(stdout_user.decode("utf8")) except ValueError: # If there is no limit per user check whether there is a general limit stdout_qos, stderr_qos, _ = call( "sacctmgr list -n qos normal format=maxsubmitjobsperuser") try: max_submit_jobs = int(stdout_qos.decode("utf8")) except ValueError: logging.warning( f"Slurm's MaxSubmitJobsPerUser couldn't be determined. Reason: {stderr_user}\n{stderr_qos}" ) return max_submit_jobs logging.debug(f"Slurm MaxSubmitJobsPerUser is {max_submit_jobs}.") return max_submit_jobs
def get_max_array_size(): max_array_size = 2**32 # See https://unix.stackexchange.com/a/364615 stdout, stderr, exit_code = call( "scontrol show config | sed -n '/^MaxArraySize/s/.*= *//p'") if exit_code == 0: max_array_size = int(stdout.decode("utf8")) logging.debug(f"Slurm MaxArraySize is {max_array_size}.") else: logging.warning( f"Slurm's MaxArraySize couldn't be determined. Reason: {stderr}" ) return max_array_size
def get_number_of_submitted_jobs(): number_of_submitted_jobs = 0 # --array so that each job array element is displayed on a separate line and -h to hide the header stdout, stderr, exit_code = call("squeue --array -u $USER -h | wc -l") if exit_code == 0: number_of_submitted_jobs = int(stdout.decode("utf8")) logging.debug( f"Number of currently submitted jobs is {number_of_submitted_jobs}." ) else: logging.warning( f"Number of currently submitted jobs couldn't be determined. Reason: {stderr}" ) return number_of_submitted_jobs
def check_job_state( self, job_id_with_index) -> Literal["failed", "ignore", "completed"]: if len(str(job_id_with_index).split("_")) >= 2: a, b = job_id_with_index.split("_") job_id_with_index = f"{a}[{b}]" # If the output file was not found, we determine the job status so that # we can recognize jobs which failed hard (in this case, they don't produce output files) stdout, _, exit_code = call("qstat -f {}".format(job_id_with_index)) if exit_code != 0: logging.error( "Couldn't call checkjob to determine job's status. {}. Continuing to poll for output file. This could be an indicator for a failed job which was already cleaned up from the pbs db. If this is the case, the process will hang forever." .format(job_id_with_index)) return "ignore" else: job_state_search = re.search("job_state = ([a-zA-Z_]*)", str(stdout)) if job_state_search: job_state = job_state_search.group(1) if job_state in PBS_STATES["Failure"]: return "failed" elif job_state in PBS_STATES["Ignore"]: return "ignore" elif job_state in PBS_STATES["Unclear"]: logging.warning( "The job state for {} is {}. It's unclear whether the job will recover. Will wait further" .format(job_id_with_index, job_state)) return "ignore" elif job_state in PBS_STATES["Success"]: return "completed" else: logging.error( "Unhandled pbs job state? {}".format(job_state)) return "ignore" else: logging.error("Could not extract pbs job state? {}...".format( stdout[0:10])) return "ignore"
def get_max_array_size(): max_array_size_env = os.environ.get("SLURM_MAX_ARRAY_SIZE", None) if max_array_size_env is not None: logging.debug( f"SLURM_MAX_ARRAY_SIZE env variable specified which is {max_array_size_env}." ) return int(max_array_size_env) max_array_size = 2**32 # See https://unix.stackexchange.com/a/364615 stdout, stderr, exit_code = call( "scontrol show config | sed -n '/^MaxArraySize/s/.*= *//p'") if exit_code == 0: max_array_size = int(stdout.decode("utf8")) logging.debug(f"Slurm MaxArraySize is {max_array_size}.") else: logging.warning( f"Slurm's MaxArraySize couldn't be determined. Reason: {stderr}" ) return max_array_size
def investigate_failed_job( self, job_id_with_index) -> Optional[Tuple[str, Type[RemoteException]]]: # We call `seff job_id` which should return some output including a line, # such as: "Memory Efficiency: 25019.18% of 1.00 GB" stdout, _, exit_code = call("seff {}".format(job_id_with_index)) if exit_code != 0: return None # Parse stdout into a key-value object properties = {} stdout = stdout.decode("utf8") for line in stdout.split("\n"): if ":" not in line: continue key, value = line.split(":", 1) properties[key.strip()] = value.strip() def investigate_memory_consumption(): if not properties.get("Memory Efficiency", None): return None # Extract the "25019.18% of 1.00 GB" part of the line efficiency_note = properties["Memory Efficiency"] PERCENTAGE_REGEX = r"([0-9]+(\.[0-9]+)?)%" # Extract the percentage to see whether it exceeds 100%. match = re.search(PERCENTAGE_REGEX, efficiency_note) percentage = None if match is None: return None try: percentage = float(match.group(1)) except ValueError: return None if percentage < 100: return None reason = f"The job was probably terminated because it consumed too much memory ({efficiency_note})." return (reason, RemoteOutOfMemoryException) def investigate_exit_code(): if not properties.get("State", None): return None if "exit code 137" not in properties["State"]: return None reason = ( "The job was probably terminated because it consumed too " "much memory (at least, the exit code 137 suggests this). Please " "use the `seff` utility to inspect the failed job and its potential " "job siblings (in case of an array job) to doublecheck the memory " "consumption.") return (reason, RemoteOutOfMemoryException) investigation = investigate_memory_consumption() if investigation: return investigation return investigate_exit_code()