def test_get_pending_jobs_info(pending_jobs, max_slots_filter, max_nodes_filter, filter_by_pending_reasons, expected_output, mocker): mock = mocker.patch("common.schedulers.slurm_commands.get_jobs_info", return_value=pending_jobs, autospec=True) pending_jobs = get_pending_jobs_info(max_slots_filter, max_nodes_filter, filter_by_pending_reasons) mock.assert_called_with(job_state_filter="PD") assert_that(pending_jobs).is_equal_to(expected_output)
def hasPendingJobs(instance_properties, max_size): try: pending_jobs = get_pending_jobs_info( max_slots_filter=instance_properties.get("slots"), max_nodes_filter=max_size, filter_by_pending_reasons=PENDING_RESOURCES_REASONS, ) return len(pending_jobs) > 0, False except Exception as e: log.error( "Failed when checking if node is down with exception %s. Reporting no pending jobs.", e) return False, True
def get_required_nodes(instance_properties, max_size): log.info("Computing number of required nodes for submitted jobs") pending_jobs = get_pending_jobs_info( max_slots_filter=instance_properties.get("slots"), max_nodes_filter=max_size, filter_by_pending_reasons=PENDING_RESOURCES_REASONS, ) slots_requested = [] nodes_requested = [] for job in pending_jobs: slots_requested.append(job.cpus_total) nodes_requested.append(job.nodes) return get_optimal_nodes(nodes_requested, slots_requested, instance_properties)
def hasPendingJobs(instance_properties, max_size): """ Check if there is any pending job in the queue. :return: a pair (has_pending_job, has_error) where has_error communicates if there was an error when checking for pending jobs. """ try: pending_jobs = get_pending_jobs_info( max_slots_filter=instance_properties.get("slots"), max_nodes_filter=max_size, filter_by_pending_reasons=PENDING_RESOURCES_REASONS, ) return len(pending_jobs) > 0, False except Exception as e: log.error("Failed when checking if node is down with exception %s. Reporting no pending jobs.", e) return False, True
def get_required_nodes(instance_properties, max_size): log.info("Computing number of required nodes for submitted jobs") pending_jobs = get_pending_jobs_info( instance_properties=instance_properties, max_nodes_filter=max_size, filter_by_pending_reasons=PENDING_RESOURCES_REASONS, ) logging.info("Found the following pending jobs:\n%s", pending_jobs) resources_requested = [] nodes_requested = [] for job in pending_jobs: resources_for_job = {} resources_for_job["gpus"] = process_gpus_total_for_job(job) resources_for_job["slots"] = job.cpus_total resources_requested.append(resources_for_job) nodes_requested.append(job.nodes) return get_optimal_nodes(nodes_requested, resources_requested, instance_properties)