Esempio n. 1
0
def _test_job_arrays_and_parallel_jobs(remote_command_executor, region,
                                       stack_name, scaledown_idletime,
                                       max_slots):
    logging.info(
        "Testing cluster scales correctly with array jobs and parallel jobs")
    sge_commands = SgeCommands(remote_command_executor)

    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1' | qsub -t 1-5", raise_on_error=False)
    array_job_id = sge_commands.assert_job_submitted(result.stdout,
                                                     is_array=True)

    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1' | qsub -pe mpi 4", raise_on_error=False)
    parallel_job_id = sge_commands.assert_job_submitted(result.stdout)

    # Assert scaling worked as expected
    expected_max = math.ceil(float(5 + 4) / max_slots)
    assert_scaling_worked(sge_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=expected_max,
                          expected_final=0)
    # Assert jobs were completed
    sge_commands.assert_job_succeeded(array_job_id)
    sge_commands.assert_job_succeeded(parallel_job_id)
Esempio n. 2
0
def _test_job_dependencies(remote_command_executor, region, stack_name,
                           scaledown_idletime, max_queue_size):
    logging.info(
        "Testing cluster doesn't scale when job dependencies are not satisfied"
    )
    slurm_commands = SlurmCommands(remote_command_executor)
    result = slurm_commands.submit_command("sleep 60", nodes=1)
    job_id = slurm_commands.assert_job_submitted(result.stdout)
    result = remote_command_executor.run_remote_command(
        "sbatch -N 1 --wrap='sleep 1' -d afterok:{0}".format(job_id))
    dependent_job_id = slurm_commands.assert_job_submitted(result.stdout)

    # Wait for reason to be computed
    time.sleep(3)
    assert_that(slurm_commands.get_job_info(job_id)).contains(
        "JobState=PENDING Reason=Nodes_required_for_job_are_DOWN,_DRAINED"
        "_or_reserved_for_jobs_in_higher_priority_partitions")
    assert_that(slurm_commands.get_job_info(dependent_job_id)).contains(
        "JobState=PENDING Reason=Dependency")

    assert_scaling_worked(slurm_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=1,
                          expected_final=0)
    # Assert scheduler configuration is correct
    _assert_dummy_nodes(remote_command_executor, max_queue_size)
    assert_that(
        _retrieve_slurm_compute_nodes_from_config(
            remote_command_executor)).is_empty()
    # Assert jobs were completed
    _assert_job_completed(remote_command_executor, job_id)
    _assert_job_completed(remote_command_executor, dependent_job_id)
Esempio n. 3
0
def _test_job_dependencies(remote_command_executor, region, stack_name,
                           scaledown_idletime):
    logging.info(
        "Testing cluster doesn't scale when job dependencies are not satisfied"
    )
    sge_commands = SgeCommands(remote_command_executor)
    result = sge_commands.submit_command("sleep 60", nodes=1)
    job_id = sge_commands.assert_job_submitted(result.stdout)
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1' | qsub -hold_jid {0}".format(job_id),
        raise_on_error=False)
    dependent_job_id = sge_commands.assert_job_submitted(result.stdout)

    assert_that(_get_job_state(remote_command_executor,
                               dependent_job_id)).is_equal_to("hqw")

    # Assert scaling worked as expected
    assert_scaling_worked(sge_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=1,
                          expected_final=0)
    # Assert jobs were completed
    sge_commands.assert_job_succeeded(job_id)
    sge_commands.assert_job_succeeded(dependent_job_id)
Esempio n. 4
0
def assert_overscaling_when_job_submitted_during_scaledown(
    remote_command_executor, scheduler, region, stack_name, scaledown_idletime
):
    """Test that if a job gets submitted when a node is locked the cluster does not overscale"""
    logging.info("Testing cluster does not overscale when a job is submitted and a node is being terminated.")
    scheduler_commands = get_scheduler_commands(scheduler, remote_command_executor)
    if scheduler_commands.compute_nodes_count() == 0:
        result = scheduler_commands.submit_command("sleep 1")
        job_id = scheduler_commands.assert_job_submitted(result.stdout)
        scheduler_commands.wait_job_completed(job_id)
    assert_that(scheduler_commands.compute_nodes_count()).is_equal_to(1)

    scheduler_commands.wait_for_locked_node()

    result = scheduler_commands.submit_command("sleep 1")
    scheduler_commands.assert_job_submitted(result.stdout)
    # do not check scheduler scaling but only ASG.
    assert_scaling_worked(
        scheduler_commands,
        region,
        stack_name,
        scaledown_idletime,
        expected_max=1,
        expected_final=0,
        assert_scheduler=False,
    )
def _test_job_dependencies(slurm_commands, region, stack_name,
                           scaledown_idletime):
    logging.info(
        "Testing cluster doesn't scale when job dependencies are not satisfied"
    )
    job_id = slurm_commands.submit_command_and_assert_job_accepted(
        submit_command_args={
            "command": "sleep 60",
            "nodes": 1
        })
    dependent_job_id = slurm_commands.submit_command_and_assert_job_accepted(
        submit_command_args={
            "command": "sleep 1",
            "nodes": 1,
            "after_ok": job_id
        })

    # Wait for reason to be computed
    time.sleep(3)
    # Job should be in CF and waiting for nodes to power_up
    assert_that(
        slurm_commands.get_job_info(job_id)).contains("JobState=CONFIGURING")
    assert_that(slurm_commands.get_job_info(dependent_job_id)).contains(
        "JobState=PENDING Reason=Dependency")

    assert_scaling_worked(slurm_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=1,
                          expected_final=0)
    # Assert jobs were completed
    _assert_job_completed(slurm_commands, job_id)
    _assert_job_completed(slurm_commands, dependent_job_id)
Esempio n. 6
0
def _test_job_arrays_and_parallel_jobs(remote_command_executor, region,
                                       stack_name, scaledown_idletime,
                                       max_slots):
    logging.info(
        "Testing cluster scales correctly with array jobs and parallel jobs")
    torque_commands = TorqueCommands(remote_command_executor)

    result = remote_command_executor.run_remote_command(
        "echo 'sleep 30' | qsub -t 1-{0}".format(max_slots),
        raise_on_error=False)
    array_job_id = torque_commands.assert_job_submitted(result.stdout)

    result = remote_command_executor.run_remote_command(
        "echo 'sleep 30' | qsub -l nodes=2:ppn=1", raise_on_error=False)
    parallel_job_id = torque_commands.assert_job_submitted(result.stdout)

    # Assert scaling worked as expected
    assert_scaling_worked(torque_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=3,
                          expected_final=0)
    # Assert jobs were completed
    for i in range(1, max_slots + 1):
        _assert_job_completed(remote_command_executor,
                              array_job_id.replace("[]", "[{0}]".format(i)))
    _assert_job_completed(remote_command_executor, parallel_job_id)
Esempio n. 7
0
def _gpu_test_scaleup(remote_command_executor, region, asg_name, stack_name,
                      scaledown_idletime, num_gpus):
    """Test cluster is scaling up correctly and GPU jobs are not aborted on slurmctld restart."""
    logging.info("Testing cluster scales correctly with GPU jobs")
    slurm_commands = SlurmCommands(remote_command_executor)
    # Assert initial conditions
    _assert_asg_has_no_node(region, asg_name)
    _assert_no_nodes_in_scheduler(slurm_commands)
    # g3.8xlarge has 32 vcpus and 2 GPUs, hardcoding tests for g3.8xlarge
    job_ids = []

    # sbatch --wrap 'sleep 10' -G 3
    result = slurm_commands.submit_command(command="sleep 10",
                                           nodes=-1,
                                           other_options="-G 3")
    job_ids.append(slurm_commands.assert_job_submitted(result.stdout))
    # Nodes/resources available after this job:
    # [{cpu:31, gpu:0}, {cpu:31, gpu:0}]

    # sbatch --wrap 'sleep 10' --cpus-per-gpu=10 --gpus-per-task=1
    result = slurm_commands.submit_command(
        command="sleep 10",
        nodes=-1,
        other_options="--cpus-per-gpu=10 --gpus-per-task=1")
    job_ids.append(slurm_commands.assert_job_submitted(result.stdout))
    # Nodes/resources available after this job:
    # [{cpu:31, gpu:0}, {cpu:31, gpu:0}, {cpu:22, gpu:1}]

    # sbatch --wrap 'sleep 10' -N 1 --gpus-per-node=1 -c 22 -n 1
    result = slurm_commands.submit_command(
        command="sleep 10",
        nodes=1,
        slots=1,
        other_options="--gpus-per-node=1 -c 23")
    job_ids.append(slurm_commands.assert_job_submitted(result.stdout))
    # Nodes/resources available after this job:
    # [{cpu:31, gpu:0}, {cpu:31, gpu:0}, {cpu:22, gpu:1}, {cpu:19, gpu:1}]

    # sbatch --wrap 'sleep 10' -c 31 -n 1
    result = slurm_commands.submit_command(command="sleep 10",
                                           nodes=-1,
                                           slots=1,
                                           other_options="-c 31")
    job_ids.append(slurm_commands.assert_job_submitted(result.stdout))
    # Nodes/resources available after this job:
    # [{cpu:0, gpu:0}, {cpu:31, gpu:0}, {cpu:22, gpu:1}, {cpu:19, gpu:1}]

    # Assert scaling worked as expected
    assert_scaling_worked(slurm_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=4,
                          expected_final=0)
    # Assert jobs were completed
    for job_id in job_ids:
        slurm_commands.assert_job_succeeded(job_id)
Esempio n. 8
0
def _test_non_runnable_jobs(remote_command_executor, max_queue_size, max_slots,
                            region, cluster, scaledown_idletime):
    logging.info("Testing jobs that violate scheduling requirements")
    torque_commands = TorqueCommands(remote_command_executor)

    # Make sure the cluster has at least 1 node in the queue so that we can verify cluster scales down correctly
    if torque_commands.compute_nodes_count() == 0:
        result = torque_commands.submit_command("sleep 1")
        job_id = torque_commands.assert_job_submitted(result.stdout)
        torque_commands.wait_job_completed(job_id)
    assert_that(torque_commands.compute_nodes_count()).is_greater_than(0)

    logging.info(
        "Testing cluster doesn't scale when job requires a capacity that is higher than the max available"
    )
    # nodes limit enforced by scheduler
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1000' | qsub -l nodes={0}".format(max_queue_size + 1),
        raise_on_error=False)
    assert_that(result.stdout).contains("Job exceeds queue resource limits")
    # ppn limit enforced by daemons
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1000' | qsub -l nodes=1:ppn={0}".format(max_slots + 1),
        raise_on_error=False)
    ppn_job_id = torque_commands.assert_job_submitted(result.stdout)
    # ppn total limit enforced by scheduler
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1000' | qsub -l nodes=1:ppn={0}".format((max_slots *
                                                              max_queue_size) +
                                                             1),
        raise_on_error=False)
    assert_that(result.stdout).contains("Job exceeds queue resource limits")
    # ncpus limit enforced by scheduler
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1000' | qsub -l ncpus={0}".format(max_slots + 1),
        raise_on_error=False)
    assert_that(result.stdout).contains("Job exceeds queue resource limits")

    logging.info("Testing cluster doesn't scale when job is set on hold")
    result = remote_command_executor.run_remote_command(
        "echo 'sleep 1000' | qsub -l nodes=1 -h", raise_on_error=False)
    hold_job_id = torque_commands.assert_job_submitted(result.stdout)

    logging.info(
        "Testing cluster scales down when pending jobs cannot be submitted")
    assert_scaling_worked(torque_commands,
                          region,
                          cluster.cfn_name,
                          scaledown_idletime,
                          expected_max=1,
                          expected_final=0)
    # Assert jobs are still pending
    assert_that(_get_job_state(remote_command_executor,
                               ppn_job_id)).is_equal_to("Q")
    assert_that(_get_job_state(remote_command_executor,
                               hold_job_id)).is_equal_to("H")
Esempio n. 9
0
def _test_mpi(
    remote_command_executor,
    slots_per_instance,
    scheduler,
    os,
    region=None,
    stack_name=None,
    scaledown_idletime=None,
    verify_scaling=False,
):
    logging.info("Testing mpi job")
    datadir = pathlib.Path(__file__).parent / "data/mpi/"
    mpi_module = OS_TO_OPENMPI_MODULE_MAP[os]
    # Compile mpi script
    command = "mpicc -o mpi_hello_world mpi_hello_world.c"
    if mpi_module != "no_module_available":
        command = "module load {0} && {1}".format(mpi_module, command)
    remote_command_executor.run_remote_command(
        command, additional_files=[str(datadir / "mpi_hello_world.c")])
    scheduler_commands = get_scheduler_commands(scheduler,
                                                remote_command_executor)

    # submit script using additional files
    result = scheduler_commands.submit_script(str(
        datadir / "mpi_submit_{0}.sh".format(mpi_module)),
                                              slots=2 * slots_per_instance)
    job_id = scheduler_commands.assert_job_submitted(result.stdout)

    if verify_scaling:
        assert_scaling_worked(scheduler_commands,
                              region,
                              stack_name,
                              scaledown_idletime,
                              expected_max=2,
                              expected_final=0)
        # not checking assert_job_succeeded after cluster scale down cause the scheduler history might be gone
    else:
        scheduler_commands.wait_job_completed(job_id)
        scheduler_commands.assert_job_succeeded(job_id)

    mpi_out = remote_command_executor.run_remote_command(
        "cat /shared/mpi.out").stdout
    assert_that(mpi_out.splitlines()).is_length(2)
    assert_that(mpi_out).matches(
        r"Hello world from processor ip-.+, rank 0 out of 2 processors")
    assert_that(mpi_out).matches(
        r"Hello world from processor ip-.+, rank 1 out of 2 processors")

    assert_no_errors_in_logs(remote_command_executor,
                             ["/var/log/sqswatcher", "/var/log/jobwatcher"])
Esempio n. 10
0
def _test_job_arrays_and_parallel_jobs(remote_command_executor, region, stack_name, scaledown_idletime):
    logging.info("Testing cluster scales correctly with array jobs and parallel jobs")
    slurm_commands = SlurmCommands(remote_command_executor)

    result = remote_command_executor.run_remote_command("sbatch --wrap 'sleep 1' -a 1-5")
    array_job_id = slurm_commands.assert_job_submitted(result.stdout)

    result = remote_command_executor.run_remote_command("sbatch --wrap 'sleep 1' -c 3 -n 2")
    parallel_job_id = slurm_commands.assert_job_submitted(result.stdout)

    # Assert scaling worked as expected
    assert_scaling_worked(slurm_commands, region, stack_name, scaledown_idletime, expected_max=3, expected_final=0)
    # Assert jobs were completed
    _assert_job_completed(remote_command_executor, array_job_id)
    _assert_job_completed(remote_command_executor, parallel_job_id)
Esempio n. 11
0
def _test_non_runnable_jobs(remote_command_executor, max_queue_size, max_slots,
                            region, cluster, scaledown_idletime):
    logging.info("Testing jobs that violate scheduling requirements")
    sge_commands = SgeCommands(remote_command_executor)

    # Make sure the cluster has at least 1 node in the queue so that we can verify cluster scales down correctly
    if sge_commands.compute_nodes_count() == 0:
        result = sge_commands.submit_command("sleep 1")
        job_id = sge_commands.assert_job_submitted(result.stdout)
        sge_commands.wait_job_completed(job_id)
    assert_that(sge_commands.compute_nodes_count()).is_greater_than(0)

    logging.info(
        "Testing cluster doesn't scale when job requires a capacity that is higher than the max available"
    )
    result = sge_commands.submit_command("sleep 1000",
                                         slots=(max_slots * max_queue_size) +
                                         1)
    max_slots_job_id = sge_commands.assert_job_submitted(result.stdout)
    assert_that(_get_job_state(remote_command_executor,
                               max_slots_job_id)).is_equal_to("qw")

    logging.info("Testing cluster doesn't scale when job is set on hold")
    result = sge_commands.submit_command("sleep 1000", hold=True)
    hold_job_id = sge_commands.assert_job_submitted(result.stdout)
    assert_that(_get_job_state(remote_command_executor,
                               hold_job_id)).is_equal_to("hqw")

    logging.info(
        "Testing cluster scales down when pending jobs cannot be submitted")
    assert_scaling_worked(sge_commands,
                          region,
                          cluster.cfn_name,
                          scaledown_idletime,
                          expected_max=1,
                          expected_final=0)
    # Assert jobs are still pending
    pending_jobs = remote_command_executor.run_remote_command(
        "qstat -s p | tail -n +3 | awk '{ print $1 }'").stdout
    pending_jobs = pending_jobs.splitlines()
    assert_that(pending_jobs).contains(max_slots_job_id, hold_job_id)
def _test_job_arrays_and_parallel_jobs(slurm_commands, region, stack_name,
                                       scaledown_idletime, partition,
                                       instance_type, cpu_per_instance):
    logging.info(
        "Testing cluster scales correctly with array jobs and parallel jobs")

    # Following 2 jobs requires total of 3 nodes
    array_job_id = slurm_commands.submit_command_and_assert_job_accepted(
        submit_command_args={
            "command": "sleep 1",
            "nodes": -1,
            "partition": partition,
            "constraint": instance_type,
            "other_options": "-a 1-{0}".format(cpu_per_instance + 1),
        })

    parallel_job_id = slurm_commands.submit_command_and_assert_job_accepted(
        submit_command_args={
            "command": "sleep 1",
            "nodes": -1,
            "slots": 2,
            "partition": partition,
            "constraint": instance_type,
            "other_options": "-c {0}".format(cpu_per_instance - 1),
        })

    # Assert scaling worked as expected
    assert_scaling_worked(slurm_commands,
                          region,
                          stack_name,
                          scaledown_idletime,
                          expected_max=3,
                          expected_final=0)
    # Assert jobs were completed
    _assert_job_completed(slurm_commands, array_job_id)
    _assert_job_completed(slurm_commands, parallel_job_id)
Esempio n. 13
0
def _test_mpi(
    remote_command_executor,
    slots_per_instance,
    scheduler,
    region=None,
    stack_name=None,
    scaledown_idletime=None,
    verify_scaling=False,
    partition=None,
):
    logging.info("Testing mpi job")
    mpi_module = "openmpi"
    # Compile mpi script
    compile_mpi_ring(mpi_module, remote_command_executor)
    scheduler_commands = get_scheduler_commands(scheduler,
                                                remote_command_executor)

    if partition:
        # submit script using additional files
        result = scheduler_commands.submit_script(
            str(MPI_COMMON_DATADIR / "mpi_submit_{0}.sh".format(mpi_module)),
            slots=2 * slots_per_instance,
            partition=partition,
        )
    else:
        # submit script using additional files
        result = scheduler_commands.submit_script(str(
            MPI_COMMON_DATADIR / "mpi_submit_{0}.sh".format(mpi_module)),
                                                  slots=2 * slots_per_instance)
    job_id = scheduler_commands.assert_job_submitted(result.stdout)

    if verify_scaling:
        assert_scaling_worked(scheduler_commands,
                              region,
                              stack_name,
                              scaledown_idletime,
                              expected_max=2,
                              expected_final=0)
        # not checking assert_job_succeeded after cluster scale down cause the scheduler history might be gone
    else:
        scheduler_commands.wait_job_completed(job_id)
        scheduler_commands.assert_job_succeeded(job_id)

    mpi_out = remote_command_executor.run_remote_command(
        "cat /shared/mpi.out").stdout
    # mpi_out expected output
    # Hello world from processor ip-192-168-53-169, rank 0 out of 2 processors
    # Process 0 received token -1 from process 1
    # Hello world from processor ip-192-168-60-9, rank 1 out of 2 processors
    # Process 1 received token -1 from process 0
    assert_that(mpi_out.splitlines()).is_length(4)
    # Slurm HIT DNS name is the same as nodename and starts with partition
    # Example: efa-enabled-st-c5n18xlarge-2
    if partition:
        nodename_prefix = partition
    elif scheduler == "slurm":
        nodename_prefix = ""
    else:
        nodename_prefix = "ip-"
    assert_that(mpi_out).matches(
        r"Hello world from processor {0}.+, rank 0 out of 2 processors".format(
            nodename_prefix))
    assert_that(mpi_out).matches(
        r"Hello world from processor {0}.+, rank 1 out of 2 processors".format(
            nodename_prefix))
    assert_that(mpi_out).contains("Process 0 received token -1 from process 1")
    assert_that(mpi_out).contains("Process 1 received token -1 from process 0")

    assert_no_errors_in_logs(remote_command_executor, scheduler)