Python SlurmScheduler Examples, aiida.schedulers.plugins.slurm.SlurmScheduler Python Examples

Example #1

0

Show file

def test_parse_output_valid():
    """Test `SlurmScheduler.parse_output` for valid arguments."""
    number_of_fields = len(SlurmScheduler._detailed_job_info_fields)  # pylint: disable=protected-access
    detailed_job_info = {'stdout': f"Header\n{'|' * number_of_fields}"}
    scheduler = SlurmScheduler()

    assert scheduler.parse_output(detailed_job_info, '', '') is None

Example #2

0

Show file

    def test_submit_script_with_num_cores_per_machine_and_mpiproc1(self):  # pylint: disable=invalid-name
        """
        Test to verify if scripts works fine if we pass both
        num_cores_per_machine and num_cores_per_mpiproc correct values.
        It should pass in check:
        res.num_cores_per_mpiproc * res.num_mpiprocs_per_machine = res.num_cores_per_machine
        """
        from aiida.schedulers.datastructures import JobTemplate
        from aiida.common.datastructures import CodeInfo, CodeRunMode

        scheduler = SlurmScheduler()

        job_tmpl = JobTemplate()
        job_tmpl.shebang = '#!/bin/bash'
        job_tmpl.job_resource = scheduler.create_job_resource(
            num_machines=1, num_mpiprocs_per_machine=1, num_cores_per_machine=24, num_cores_per_mpiproc=24
        )
        job_tmpl.uuid = str(uuid.uuid4())
        job_tmpl.max_wallclock_seconds = 24 * 3600
        code_info = CodeInfo()
        code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
        code_info.stdin_name = 'aiida.in'
        job_tmpl.codes_info = [code_info]
        job_tmpl.codes_run_mode = CodeRunMode.SERIAL

        submit_script_text = scheduler.get_submit_script(job_tmpl)

        assert '#SBATCH --no-requeue' in submit_script_text
        assert '#SBATCH --time=1-00:00:00' in submit_script_text
        assert '#SBATCH --nodes=1' in submit_script_text
        assert '#SBATCH --ntasks-per-node=1' in submit_script_text
        assert '#SBATCH --cpus-per-task=24' in submit_script_text

        assert "'mpirun' '-np' '23' 'pw.x' '-npool' '1' < 'aiida.in'" in submit_script_text

Example #3

0

Show file

    def test_joblist_multi(self):
        """Test that asking for multiple jobs does not result in duplications."""
        scheduler = SlurmScheduler()

        command = scheduler._get_joblist_command(jobs=['123', '456'])  # pylint: disable=protected-access
        assert '123,456' in command
        assert '456,456' not in command

Example #4

0

Show file

File: test_slurm.py Project: CasperWA/aiida_core

    def test_submit_script_bad_shebang(self):
        """Test that first line of submit script is as expected."""
        from aiida.schedulers.datastructures import JobTemplate
        from aiida.common.datastructures import CodeInfo, CodeRunMode

        scheduler = SlurmScheduler()
        code_info = CodeInfo()
        code_info.cmdline_params = [
            'mpirun', '-np', '23', 'pw.x', '-npool', '1'
        ]
        code_info.stdin_name = 'aiida.in'

        for (shebang, expected_first_line) in ((None, '#!/bin/bash'), ('', ''),
                                               ('NOSET', '#!/bin/bash')):
            job_tmpl = JobTemplate()
            if shebang == 'NOSET':
                pass
            else:
                job_tmpl.shebang = shebang
            job_tmpl.job_resource = scheduler.create_job_resource(
                num_machines=1, num_mpiprocs_per_machine=1)
            job_tmpl.codes_info = [code_info]
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL

            submit_script_text = scheduler.get_submit_script(job_tmpl)

            # This tests if the implementation correctly chooses the default:
            self.assertEqual(
                submit_script_text.split('\n')[0], expected_first_line)

Example #5

0

Show file

    def test_submit_script(self):
        """
        Test the creation of a simple submission script.
        """
        from aiida.schedulers.datastructures import JobTemplate
        from aiida.common.datastructures import CodeInfo, CodeRunMode

        scheduler = SlurmScheduler()

        job_tmpl = JobTemplate()
        job_tmpl.shebang = '#!/bin/bash'
        job_tmpl.uuid = str(uuid.uuid4())
        job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
        job_tmpl.max_wallclock_seconds = 24 * 3600
        code_info = CodeInfo()
        code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
        code_info.stdin_name = 'aiida.in'
        job_tmpl.codes_info = [code_info]
        job_tmpl.codes_run_mode = CodeRunMode.SERIAL

        submit_script_text = scheduler.get_submit_script(job_tmpl)

        assert submit_script_text.startswith('#!/bin/bash')

        assert '#SBATCH --no-requeue' in submit_script_text
        assert '#SBATCH --time=1-00:00:00' in submit_script_text
        assert '#SBATCH --nodes=1' in submit_script_text

        assert "'mpirun' '-np' '23' 'pw.x' '-npool' '1' < 'aiida.in'" in submit_script_text

Example #6

0

Show file

def test_time_conversion(value, expected):
    """
    Test conversion of (relative) times.

    From docs, acceptable time formats include
    "minutes", "minutes:seconds", "hours:minutes:seconds",
    "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds".
    """
    # pylint: disable=protected-access
    scheduler = SlurmScheduler()
    assert scheduler._convert_time(value) == expected

Example #7

0

Show file

    def test_parse_failed_squeue_output(self):
        """
        Test that _parse_joblist_output reacts as expected to failures.
        """
        scheduler = SlurmScheduler()

        # non-zero return value should raise
        with self.assertRaises(SchedulerError):
            _ = scheduler._parse_joblist_output(1, TEXT_SQUEUE_TO_TEST, '')  # pylint: disable=protected-access

        # non-empty stderr should be logged
        with self.assertLogs(scheduler.logger, 'WARNING'):
            _ = scheduler._parse_joblist_output(0, TEXT_SQUEUE_TO_TEST,
                                                'error message')  # pylint: disable=protected-access

Example #8

0

Show file

def test_parse_out_of_memory():
    """Test that for job that failed due to OOM `parse_output` return the `ERROR_SCHEDULER_OUT_OF_MEMORY` code."""
    from aiida.engine import CalcJob

    scheduler = SlurmScheduler()
    stdout = ''
    stderr = ''
    detailed_job_info = {
        'retval': 0,
        'stderr': '',
        'stdout': """||||||||||||||||||||||||||||||||||||||||||||||||||
        |||||||||||||||||||||||||||||||||||||||||OUT_OF_MEMORY|||||||||"""
    }  # yapf: disable

    exit_code = scheduler.parse_output(detailed_job_info, stdout, stderr)
    assert exit_code == CalcJob.exit_codes.ERROR_SCHEDULER_OUT_OF_MEMORY  # pylint: disable=no-member

Example #9

0

Show file

    def test_parse_common_joblist_output(self):
        """
        Test whether _parse_joblist_output can parse the squeue output
        """
        scheduler = SlurmScheduler()

        retval = 0
        stdout = TEXT_SQUEUE_TO_TEST
        stderr = ''

        job_list = scheduler._parse_joblist_output(retval, stdout, stderr)  # pylint: disable=protected-access
        job_dict = {j.job_id: j for j in job_list}

        # The parameters are hard coded in the text to parse
        job_parsed = len(job_list)
        assert job_parsed == JOBS_ON_CLUSTER

        job_running_parsed = len([j for j in job_list if j.job_state \
                                  and j.job_state == JobState.RUNNING])
        assert len(JOBS_RUNNING) == job_running_parsed

        job_held_parsed = len([j for j in job_list if j.job_state and j.job_state == JobState.QUEUED_HELD])
        assert JOBS_HELD == job_held_parsed

        job_queued_parsed = len([j for j in job_list if j.job_state and j.job_state == JobState.QUEUED])
        assert JOBS_QUEUED == job_queued_parsed

        parsed_running_users = [j.job_owner for j in job_list if j.job_state and j.job_state == JobState.RUNNING]
        assert set(USERS_RUNNING) == set(parsed_running_users)

        parsed_running_jobs = [j.job_id for j in job_list if j.job_state and j.job_state == JobState.RUNNING]
        assert set(JOBS_RUNNING) == set(parsed_running_jobs)

        assert job_dict['863553'].requested_wallclock_time_seconds, 30 * 60  # pylint: disable=invalid-name
        assert job_dict['863553'].wallclock_time_seconds, 29 * 60 + 29
        assert job_dict['863553'].dispatch_time, datetime.datetime(2013, 5, 23, 11, 44, 11)
        assert job_dict['863553'].submission_time, datetime.datetime(2013, 5, 23, 10, 42, 11)

        assert job_dict['863100'].annotation == 'Resources'
        assert job_dict['863100'].num_machines == 32
        assert job_dict['863100'].num_mpiprocs == 1024
        assert job_dict['863100'].queue_name == 'normal'

        assert job_dict['861352'].title == 'Pressure_PBEsol_0'

        assert job_dict['863554'].requested_wallclock_time_seconds is None  # pylint: disable=invalid-name

Example #10

0

Show file

    def test_submit_script_with_num_cores_per_machine_and_mpiproc2(self):  # pylint: disable=invalid-name
        """
        Test to verify if scripts works fine if we pass
        num_cores_per_machine and num_cores_per_mpiproc wrong values.

        It should fail in check:
        res.num_cores_per_mpiproc * res.num_mpiprocs_per_machine = res.num_cores_per_machine
        """
        from aiida.schedulers.datastructures import JobTemplate

        scheduler = SlurmScheduler()

        job_tmpl = JobTemplate()
        with pytest.raises(ValueError, match='`num_cores_per_machine` must be equal to'):
            job_tmpl.job_resource = scheduler.create_job_resource(
                num_machines=1, num_mpiprocs_per_machine=1, num_cores_per_machine=24, num_cores_per_mpiproc=23
            )

Example #11

0

Show file

def test_time_conversion_errors(caplog):
    """Test conversion of (relative) times for bad inputs."""
    # pylint: disable=protected-access
    scheduler = SlurmScheduler()

    # Disable logging to avoid excessive output during test
    with caplog.at_level(logging.CRITICAL):
        with pytest.raises(ValueError, match='Unrecognized format for time string.'):
            # Empty string not valid
            scheduler._convert_time('')
        with pytest.raises(ValueError, match='Unrecognized format for time string.'):
            # there should be something after the dash
            scheduler._convert_time('1-')
        with pytest.raises(ValueError, match='Unrecognized format for time string.'):
            # there should be something after the dash
            # there cannot be a dash after the colons
            scheduler._convert_time('1:2-3')

Example #12

0

Show file

def test_parse_output_invalid(detailed_job_info, expected):
    """Test `SlurmScheduler.parse_output` for various invalid arguments."""
    scheduler = SlurmScheduler()

    with pytest.raises(expected):
        scheduler.parse_output(detailed_job_info, '', '')

Example #13

0

Show file

    def test_joblist_single(self):
        """Test that asking for a single job results in duplication of the list."""
        scheduler = SlurmScheduler()

        command = scheduler._get_joblist_command(jobs=['123'])  # pylint: disable=protected-access
        assert '123,123' in command

Example #14

0

Show file

File: test_slurm.py Project: CasperWA/aiida_core

    def test_parse_common_joblist_output(self):
        """
        Test whether _parse_joblist can parse the qstat -f output
        """
        scheduler = SlurmScheduler()

        retval = 0
        stdout = TEXT_SQUEUE_TO_TEST
        stderr = ''

        job_list = scheduler._parse_joblist_output(retval, stdout, stderr)  # pylint: disable=protected-access
        job_dict = {j.job_id: j for j in job_list}

        # The parameters are hard coded in the text to parse
        job_parsed = len(job_list)
        self.assertEqual(job_parsed, JOBS_ON_CLUSTER)

        job_running_parsed = len([j for j in job_list if j.job_state \
                                  and j.job_state == JobState.RUNNING])
        self.assertEqual(len(JOBS_RUNNING), job_running_parsed)

        job_held_parsed = len([
            j for j in job_list
            if j.job_state and j.job_state == JobState.QUEUED_HELD
        ])
        self.assertEqual(JOBS_HELD, job_held_parsed)

        job_queued_parsed = len([
            j for j in job_list
            if j.job_state and j.job_state == JobState.QUEUED
        ])
        self.assertEqual(JOBS_QUEUED, job_queued_parsed)

        parsed_running_users = [
            j.job_owner for j in job_list
            if j.job_state and j.job_state == JobState.RUNNING
        ]
        self.assertEqual(set(USERS_RUNNING), set(parsed_running_users))

        parsed_running_jobs = [
            j.job_id for j in job_list
            if j.job_state and j.job_state == JobState.RUNNING
        ]
        self.assertEqual(set(JOBS_RUNNING), set(parsed_running_jobs))

        self.assertEqual(job_dict['863553'].requested_wallclock_time_seconds,
                         30 * 60)  # pylint: disable=invalid-name
        self.assertEqual(job_dict['863553'].wallclock_time_seconds,
                         29 * 60 + 29)
        self.assertEqual(job_dict['863553'].dispatch_time,
                         datetime.datetime(2013, 5, 23, 11, 44, 11))
        self.assertEqual(job_dict['863553'].submission_time,
                         datetime.datetime(2013, 5, 23, 10, 42, 11))

        self.assertEqual(job_dict['863100'].annotation, 'Resources')
        self.assertEqual(job_dict['863100'].num_machines, 32)
        self.assertEqual(job_dict['863100'].num_mpiprocs, 1024)
        self.assertEqual(job_dict['863100'].queue_name, 'normal')

        self.assertEqual(job_dict['861352'].title, 'Pressure_PBEsol_0')

        self.assertEqual(job_dict['863554'].requested_wallclock_time_seconds,
                         None)  # pylint: disable=invalid-name

Example #15

0

Show file

File: test_slurm.py Project: CasperWA/aiida_core

    def test_time_conversion(self):
        """
        Test conversion of (relative) times.

        From docs, acceptable time formats include
        "minutes", "minutes:seconds", "hours:minutes:seconds",
        "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds".
        """
        # pylint: disable=protected-access
        scheduler = SlurmScheduler()
        self.assertEqual(scheduler._convert_time('2'), 2 * 60)
        self.assertEqual(scheduler._convert_time('02'), 2 * 60)

        self.assertEqual(scheduler._convert_time('02:3'), 2 * 60 + 3)
        self.assertEqual(scheduler._convert_time('02:03'), 2 * 60 + 3)

        self.assertEqual(scheduler._convert_time('1:02:03'), 3600 + 2 * 60 + 3)
        self.assertEqual(scheduler._convert_time('01:02:03'),
                         3600 + 2 * 60 + 3)

        self.assertEqual(scheduler._convert_time('1-3'), 86400 + 3 * 3600)
        self.assertEqual(scheduler._convert_time('01-3'), 86400 + 3 * 3600)
        self.assertEqual(scheduler._convert_time('01-03'), 86400 + 3 * 3600)

        self.assertEqual(scheduler._convert_time('1-3:5'),
                         86400 + 3 * 3600 + 5 * 60)
        self.assertEqual(scheduler._convert_time('01-3:05'),
                         86400 + 3 * 3600 + 5 * 60)
        self.assertEqual(scheduler._convert_time('01-03:05'),
                         86400 + 3 * 3600 + 5 * 60)

        self.assertEqual(scheduler._convert_time('1-3:5:7'),
                         86400 + 3 * 3600 + 5 * 60 + 7)
        self.assertEqual(scheduler._convert_time('01-3:05:7'),
                         86400 + 3 * 3600 + 5 * 60 + 7)
        self.assertEqual(scheduler._convert_time('01-03:05:07'),
                         86400 + 3 * 3600 + 5 * 60 + 7)

        self.assertEqual(scheduler._convert_time('UNLIMITED'), 2**31 - 1)
        self.assertEqual(scheduler._convert_time('NOT_SET'), None)

        # Disable logging to avoid excessive output during test
        logging.disable(logging.ERROR)
        with self.assertRaises(ValueError):
            # Empty string not valid
            scheduler._convert_time('')
        with self.assertRaises(ValueError):
            # there should be something after the dash
            scheduler._convert_time('1-')
        with self.assertRaises(ValueError):
            # there should be something after the dash
            # there cannot be a dash after the colons
            scheduler._convert_time('1:2-3')
        # Reset logging level
        logging.disable(logging.NOTSET)