Example #1
0
 class FieldTester:
     field = fields.TimerField('field')
     field_maybe_none = fields.TimerField('field_maybe_none', type(None))
Example #2
0
 class FieldTester:
     field = fields.TimerField()
     field_maybe_none = fields.TimerField(type(None))
Example #3
0
class Job:
    '''A job descriptor.

    A job descriptor is created by the framework after the "setup" phase and
    is associated with the test.

    .. warning::
       Users may not create a job descriptor directly.

    '''

    num_tasks = fields.TypedField('num_tasks', int)
    num_tasks_per_node = fields.TypedField('num_tasks_per_node', int,
                                           type(None))
    num_tasks_per_core = fields.TypedField('num_tasks_per_core', int,
                                           type(None))
    num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int,
                                             type(None))

    num_cpus_per_task = fields.TypedField('num_cpus_per_task', int, type(None))
    use_smt = fields.TypedField('use_smt', bool, type(None))
    time_limit = fields.TimerField('time_limit', type(None))

    #: Options to be passed to the backend job scheduler.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    options = fields.TypedField('options', typ.List[str])

    #: The (parallel) program launcher that will be used to launch the
    #: (parallel) executable of this job.
    #:
    #: Users are allowed to explicitly set the current job launcher, but this
    #: is only relevant in rare situations, such as when you want to wrap the
    #: current launcher command. For this specific scenario, you may have a
    #: look at the :class:`reframe.core.launchers.LauncherWrapper` class.
    #:
    #: The following example shows how you can replace the current partition's
    #: launcher for this test with the "local" launcher:
    #:
    #: .. code-block:: python
    #:
    #:    from reframe.core.backends import getlauncher
    #:
    #:    @rfm.run_after('setup')
    #:    def set_launcher(self):
    #:        self.job.launcher = getlauncher('local')()
    #:
    #: :type: :class:`reframe.core.launchers.JobLauncher`
    launcher = fields.TypedField('launcher', JobLauncher)
    scheduler = fields.TypedField('scheduler', JobScheduler)

    #: .. versionadded:: 2.21
    #:
    #: The ID of the current job.
    #:
    #: :type: :class:`int` or :class:`None`.
    jobid = fields.TypedField('jobid', int, type(None))

    #: .. versionadded:: 2.21
    #:
    #: The exit code of the job.
    #:
    #: This may or may not be set depending on the scheduler backend.
    #:
    #: :type: :class:`int` or :class:`None`.
    exitcode = fields.TypedField('exitcode', int, type(None))

    #: .. versionadded:: 2.21
    #:
    #: The state of the job.
    #:
    #: The value of this field is scheduler-specific.
    #:
    #: :type: :class:`str` or :class:`None`.
    state = fields.TypedField('state', str, type(None))

    #: .. versionadded:: 2.17
    #:
    #: The list of node names assigned to this job.
    #:
    #: This attribute is :class:`None` if no nodes are assigned to the job
    #: yet.
    #: This attribute is set reliably only for the ``slurm`` backend, i.e.,
    #: Slurm *with* accounting enabled.
    #: The ``squeue`` scheduler backend, i.e., Slurm *without* accounting,
    #: might not set this attribute for jobs that finish very quickly.
    #: For the ``local`` scheduler backend, this returns an one-element list
    #: containing the hostname of the current host.
    #:
    #: This attribute might be useful in a flexible regression test for
    #: determining the actual nodes that were assigned to the test.
    #: For more information on flexible node allocation, see the
    #: |--flex-alloc-nodes|_ command-line option
    #:
    #: This attribute is *not* supported by the ``pbs`` scheduler backend.
    nodelist = fields.TypedField('nodelist', typ.List[str], type(None))

    # The sched_* arguments are exposed also to the frontend
    def __init__(self,
                 name,
                 workdir='.',
                 script_filename=None,
                 stdout=None,
                 stderr=None,
                 max_pending_time=None,
                 sched_flex_alloc_nodes=None,
                 sched_access=[],
                 sched_account=None,
                 sched_partition=None,
                 sched_reservation=None,
                 sched_nodelist=None,
                 sched_exclude_nodelist=None,
                 sched_exclusive_access=None,
                 sched_options=None):

        # Mutable fields
        self.num_tasks = 1
        self.num_tasks_per_node = None
        self.num_tasks_per_core = None
        self.num_tasks_per_socket = None
        self.num_cpus_per_task = None
        self.use_smt = None
        self.time_limit = None
        self.options = sched_options or []

        # Live job information; to be filled during job's lifetime by the
        # scheduler
        self.jobid = None
        self.exitcode = None
        self.state = None
        self.nodelist = None

        self._name = name
        self._workdir = workdir
        self._script_filename = script_filename or '%s.sh' % name
        self._stdout = stdout or '%s.out' % name
        self._stderr = stderr or '%s.err' % name
        self._max_pending_time = max_pending_time
        self._completion_time = None

        # Backend scheduler related information
        self._sched_flex_alloc_nodes = sched_flex_alloc_nodes
        self._sched_access = sched_access
        self._sched_nodelist = sched_nodelist
        self._sched_exclude_nodelist = sched_exclude_nodelist
        self._sched_partition = sched_partition
        self._sched_reservation = sched_reservation
        self._sched_account = sched_account
        self._sched_exclusive_access = sched_exclusive_access

    @classmethod
    def create(cls, scheduler, launcher, *args, **kwargs):
        ret = Job(*args, **kwargs)
        ret.scheduler, ret.launcher = scheduler, launcher
        return ret

    @property
    def name(self):
        return self._name

    @property
    def workdir(self):
        return self._workdir

    @property
    def max_pending_time(self):
        return self._max_pending_time

    @property
    def script_filename(self):
        return self._script_filename

    @property
    def stdout(self):
        return self._stdout

    @property
    def stderr(self):
        return self._stderr

    @property
    def sched_flex_alloc_nodes(self):
        return self._sched_flex_alloc_nodes

    @property
    def sched_access(self):
        return self._sched_access

    @property
    def sched_nodelist(self):
        return self._sched_nodelist

    @property
    def sched_exclude_nodelist(self):
        return self._sched_exclude_nodelist

    @property
    def sched_partition(self):
        return self._sched_partition

    @property
    def sched_reservation(self):
        return self._sched_reservation

    @property
    def sched_account(self):
        return self._sched_account

    @property
    def sched_exclusive_access(self):
        return self._sched_exclusive_access

    @property
    def completion_time(self):
        return self.scheduler.completion_time(self) or self._completion_time

    def prepare(self, commands, environs=None, **gen_opts):
        environs = environs or []
        if self.num_tasks <= 0:
            num_tasks_per_node = self.num_tasks_per_node or 1
            min_num_tasks = (-self.num_tasks
                             if self.num_tasks else num_tasks_per_node)

            try:
                guessed_num_tasks = self.guess_num_tasks()
            except NotImplementedError as e:
                raise JobError('flexible node allocation is not supported by '
                               'this backend') from e

            if guessed_num_tasks < min_num_tasks:
                raise JobError(
                    'could not satisfy the minimum task requirement: '
                    'required %s, found %s' %
                    (min_num_tasks, guessed_num_tasks))

            self.num_tasks = guessed_num_tasks
            getlogger().debug('flex_alloc_nodes: setting num_tasks to %s' %
                              self.num_tasks)

        with shell.generate_script(self.script_filename,
                                   **gen_opts) as builder:
            builder.write_prolog(self.scheduler.emit_preamble(self))
            builder.write(runtime.emit_loadenv_commands(*environs))
            for c in commands:
                builder.write_body(c)

    def guess_num_tasks(self):
        num_tasks_per_node = self.num_tasks_per_node or 1
        if isinstance(self.sched_flex_alloc_nodes, int):
            if self.sched_flex_alloc_nodes <= 0:
                raise JobError('invalid number of flex_alloc_nodes: %s' %
                               self.sched_flex_alloc_nodes)

            return self.sched_flex_alloc_nodes * num_tasks_per_node

        available_nodes = self.scheduler.allnodes()
        getlogger().debug('flex_alloc_nodes: total available nodes %s ' %
                          len(available_nodes))

        # Try to guess the number of tasks now
        available_nodes = self.scheduler.filternodes(self, available_nodes)
        if self.sched_flex_alloc_nodes == 'idle':
            available_nodes = {n for n in available_nodes if n.is_available()}
            getlogger().debug('flex_alloc_nodes: selecting idle nodes: '
                              'available nodes now: %s' % len(available_nodes))

        return len(available_nodes) * num_tasks_per_node

    def submit(self):
        return self.scheduler.submit(self)

    def wait(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot wait an unstarted job')

        self.scheduler.wait(self)
        self._completion_time = self._completion_time or time.time()

    def cancel(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot cancel an unstarted job')

        return self.scheduler.cancel(self)

    def finished(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot poll an unstarted job')

        done = self.scheduler.finished(self)
        if done:
            self._completion_time = self._completion_time or time.time()

        return done
Example #4
0
class RegressionTest:
    """Base class for regression tests.

    All regression tests must eventually inherit from this class.
    This class provides the implementation of the pipeline phases that the
    regression test goes through during its lifetime.

    :arg name: The name of the test.
        If :class:`None`, the framework will try to assign a unique and
        human-readable name to the test.

    :arg prefix: The directory prefix of the test.
        If :class:`None`, the framework will set it to the directory containing
        the test file.

    .. note::
        The ``name`` and ``prefix`` arguments are just maintained for backward
        compatibility to the old (prior to 2.13) syntax of regression tests.
        Users are advised to use the new simplified syntax for writing
        regression tests.
        Refer to the :doc:`ReFrame Tutorial </tutorial>` for more information.

        This class is also directly available under the top-level
        :mod:`reframe` module.

       .. versionchanged:: 2.13

    """
    #: The name of the test.
    #:
    #: :type: string that can contain any character except ``/``
    name = fields.TypedField('name', typ.Str[r'[^\/]+'])

    #: List of programming environments supported by this test.
    #:
    #: If ``*`` is in the list then all programming environments are supported
    #: by this test.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.12
    #:        Programming environments can now be specified using wildcards.
    #:
    #:     .. versionchanged:: 2.17
    #:        Support for wildcards is dropped.
    valid_prog_environs = fields.TypedField('valid_prog_environs',
                                            typ.List[str])

    #: List of systems supported by this test.
    #: The general syntax for systems is ``<sysname>[:<partname]``.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    valid_systems = fields.TypedField('valid_systems', typ.List[str])

    #: A detailed description of the test.
    #:
    #: :type: :class:`str`
    #: :default: ``self.name``
    descr = fields.TypedField('descr', str)

    #: The path to the source file or source directory of the test.
    #:
    #: It must be a path relative to the :attr:`sourcesdir`, pointing to a
    #: subfolder or a file contained in :attr:`sourcesdir`. This applies also
    #: in the case where :attr:`sourcesdir` is a Git repository.
    #:
    #: If it refers to a regular file, this file will be compiled using the
    #: :class:`SingleSource <reframe.core.buildsystems.SingleSource>` build
    #: system.
    #: If it refers to a directory, ReFrame will try to infer the build system
    #: to use for the project and will fall back in using the :class:`Make
    #: <reframe.core.buildsystems.Make>` build system, if it cannot find a more
    #: specific one.
    #:
    #: :type: :class:`str`
    #: :default: ``''``
    sourcepath = fields.TypedField('sourcepath', str)

    #: The directory containing the test's resources.
    #:
    #: This directory may be specified with an absolute path or with a path
    #: relative to the location of the test. Its contents will always be copied
    #: to the stage directory of the test.
    #:
    #: This attribute may also accept a URL, in which case ReFrame will treat
    #: it as a Git repository and will try to clone its contents in the stage
    #: directory of the test.
    #:
    #: If set to :class:`None`, the test has no resources an no action is
    #: taken.
    #:
    #: :type: :class:`str` or :class:`None`
    #: :default: ``'src'``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.9
    #:        Allow :class:`None` values to be set also in regression tests
    #:        with a compilation phase
    #:
    #:     .. versionchanged:: 2.10
    #:        Support for Git repositories was added.
    sourcesdir = fields.TypedField('sourcesdir', str, type(None))

    #: The build system to be used for this test.
    #: If not specified, the framework will try to figure it out automatically
    #: based on the value of :attr:`sourcepath`.
    #:
    #: This field may be set using either a string referring to a concrete
    #: build system class name
    #: (see `build systems <reference.html#build-systems>`__) or an instance of
    #: :class:`reframe.core.buildsystems.BuildSystem`. The former is the
    #: recommended way.
    #:
    #:
    #: :type: :class:`str` or :class:`reframe.core.buildsystems.BuildSystem`.
    #: :default: :class:`None`.
    #:
    #: .. versionadded:: 2.14
    build_system = BuildSystemField('build_system', type(None))

    #: List of shell commands to be executed before compiling.
    #:
    #: These commands are executed during the compilation phase and from
    #: inside the stage directory. **Each entry in the list spawns a new
    #: shell.**
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    prebuild_cmd = fields.TypedField('prebuild_cmd', typ.List[str])

    #: List of shell commands to be executed after a successful compilation.
    #:
    #: These commands are executed during the compilation phase and from inside
    #: the stage directory. **Each entry in the list spawns a new shell.**
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    postbuild_cmd = fields.TypedField('postbuild_cmd', typ.List[str])

    #: The name of the executable to be launched during the run phase.
    #:
    #: :type: :class:`str`
    #: :default: ``os.path.join('.', self.name)``
    executable = fields.TypedField('executable', str)

    #: List of options to be passed to the :attr:`executable`.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    executable_opts = fields.TypedField('executable_opts', typ.List[str])

    #: List of shell commands to execute before launching this job.
    #:
    #: These commands do not execute in the context of ReFrame.
    #: Instead, they are emitted in the generated job script just before the
    #: actual job launch command.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    #:
    #: .. note::
    #:    .. versionadded:: 2.10
    pre_run = fields.TypedField('pre_run', typ.List[str])

    #: List of shell commands to execute after launching this job.
    #:
    #: See :attr:`pre_run` for a more detailed description of the semantics.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    #:
    #: .. note::
    #:    .. versionadded:: 2.10
    post_run = fields.TypedField('post_run', typ.List[str])

    #: List of files to be kept after the test finishes.
    #:
    #: By default, the framework saves the standard output, the standard error
    #: and the generated shell script that was used to run this test.
    #:
    #: These files will be copied over to the framework’s output directory
    #: during the :func:`cleanup` phase.
    #:
    #: Directories are also accepted in this field.
    #:
    #: Relative path names are resolved against the stage directory.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    keep_files = fields.TypedField('keep_files', typ.List[str])

    #: List of files or directories (relative to the :attr:`sourcesdir`) that
    #: will be symlinked in the stage directory and not copied.
    #:
    #: You can use this variable to avoid copying very large files to the stage
    #: directory.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    readonly_files = fields.TypedField('readonly_files', typ.List[str])

    #: Set of tags associated with this test.
    #:
    #: This test can be selected from the frontend using any of these tags.
    #:
    #: :type: :class:`Set[str]`
    #: :default: an empty set
    tags = fields.TypedField('tags', typ.Set[str])

    #: List of people responsible for this test.
    #:
    #: When the test fails, this contact list will be printed out.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    maintainers = fields.TypedField('maintainers', typ.List[str])

    #: Mark this test as a strict performance test.
    #:
    #: If a test is marked as non-strict, the performance checking phase will
    #: always succeed, unless the ``--strict`` command-line option is passed
    #: when invoking ReFrame.
    #:
    #: :type: boolean
    #: :default: :class:`True`
    strict_check = fields.TypedField('strict_check', bool)

    #: Number of tasks required by this test.
    #:
    #: If the number of tasks is set to a number ``<=0``, ReFrame will try
    #: to flexibly allocate the number of tasks, based on the command line
    #: option ``--flex-alloc-tasks``.
    #: A negative number is used to indicate the minimum number of tasks
    #: required for the test.
    #: In this case the minimum number of tasks is the absolute value of
    #: the number, while
    #: Setting ``num_tasks`` to ``0`` is equivalent to setting it to
    #: ``-num_tasks_per_node``.
    #:
    #: :type: integral
    #: :default: ``1``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.15
    #:        Added support for flexible allocation of the number of tasks
    #:        according to the ``--flex-alloc-tasks`` command line option
    #:        (see `Flexible task allocation
    #:        <running.html#flexible-task-allocation>`__)
    #:        if the number of tasks is set to ``0``.
    #:     .. versionchanged:: 2.16
    #:        Negative ``num_tasks`` is allowed for specifying the minimum
    #:        number of required tasks by the test.
    num_tasks = fields.TypedField('num_tasks', int)

    #: Number of tasks per node required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_node = fields.TypedField('num_tasks_per_node', int,
                                           type(None))

    #: Number of GPUs per node required by this test.
    #:
    #: :type: integral
    #: :default: ``0``
    num_gpus_per_node = fields.TypedField('num_gpus_per_node', int)

    #: Number of CPUs per task required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_cpus_per_task = fields.TypedField('num_cpus_per_task', int, type(None))

    #: Number of tasks per core required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_core = fields.TypedField('num_tasks_per_core', int,
                                           type(None))

    #: Number of tasks per socket required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int,
                                             type(None))

    #: Specify whether this tests needs simultaneous multithreading enabled.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: boolean or :class:`None`
    #: :default: :class:`None`
    use_multithreading = fields.TypedField('use_multithreading', bool,
                                           type(None))

    #: Specify whether this test needs exclusive access to nodes.
    #:
    #: :type: boolean
    #: :default: :class:`False`
    exclusive_access = fields.TypedField('exclusive_access', bool)

    #: Always execute this test locally.
    #:
    #: :type: boolean
    #: :default: :class:`False`
    local = fields.TypedField('local', bool)

    #: The set of reference values for this test.
    #:
    #: The reference values are specified as a scoped dictionary keyed on the
    #: performance variables defined in :attr:`perf_patterns` and scoped under
    #: the system/partition combinations.
    #: The reference itself is a three- or four-tuple that contains the
    #: reference value, the lower and upper thresholds and, optionally, the
    #: measurement unit.
    #: An example follows:
    #:
    #: .. code:: python
    #:
    #:    self.reference = {
    #:        'sys0:part0': {
    #:            'perfvar0': (50, -0.1, 0.1, 'Gflop/s'),
    #:            'perfvar1': (20, -0.1, 0.1, 'GB/s')
    #:        },
    #:        'sys0:part1': {
    #:            'perfvar0': (100, -0.1, 0.1, 'Gflop/s'),
    #:            'perfvar1': (40, -0.1, 0.1, 'GB/s')
    #:        }
    #:    }
    #:
    #: :type: A scoped dictionary with system names as scopes or :class:`None`
    #: :default: ``{}``
    reference = fields.ScopedDictField('reference', typ.Tuple[object])
    # FIXME: There is not way currently to express tuples of `float`s or
    # `None`s, so we just use the very generic `object`

    #:
    #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage
    #: examples.
    #:
    #: If set to :class:`None`, a sanity error will be raised during sanity
    #: checking.
    #:
    #: :type: A deferrable expression (i.e., the result of a :doc:`sanity
    #:     function </sanity_functions_reference>`) or :class:`None`
    #: :default: :class:`None`
    #:
    #: .. note::
    #:    .. versionchanged:: 2.9
    #:       The default behaviour has changed and it is now considered a
    #:       sanity failure if this attribute is set to :class:`None`.
    #:
    #:       If a test doesn't care about its output, this must be stated
    #:       explicitly as follows:
    #:
    #:       ::
    #:
    #:           self.sanity_patterns = sn.assert_found(r'.*', self.stdout)
    #:
    sanity_patterns = fields.TypedField('sanity_patterns', _DeferredExpression,
                                        type(None))

    #: Patterns for verifying the performance of this test.
    #:
    #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage
    #: examples.
    #:
    #: If set to :class:`None`, no performance checking will be performed.
    #:
    #: :type: A dictionary with keys of type :class:`str` and deferrable
    #:     expressions (i.e., the result of a :doc:`sanity function
    #:     </sanity_functions_reference>`) as values.
    #:     :class:`None` is also allowed.
    #: :default: :class:`None`
    perf_patterns = fields.TypedField('perf_patterns',
                                      typ.Dict[str, _DeferredExpression],
                                      type(None))

    #: List of modules to be loaded before running this test.
    #:
    #: These modules will be loaded during the :func:`setup` phase.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    modules = fields.TypedField('modules', typ.List[str])

    #: Environment variables to be set before running this test.
    #:
    #: These variables will be set during the :func:`setup` phase.
    #:
    #: :type: :class:`Dict[str, str]`
    #: :default: ``{}``
    variables = fields.TypedField('variables', typ.Dict[str, str])

    #: Time limit for this test.
    #:
    #: Time limit is specified as a three-tuple in the form ``(hh, mm, ss)``,
    #: with ``hh >= 0``, ``0 <= mm <= 59`` and ``0 <= ss <= 59``.
    #: If set to :class:`None`, no time limit will be set.
    #: The default time limit of the system partition's scheduler will be used.
    #:
    #:
    #: :type: :class:`tuple[int]`
    #: :default: ``(0, 10, 0)``
    #:
    #: .. note::
    #:    .. versionchanged:: 2.15
    #:
    #:    This attribute may be set to :class:`None`.
    #:
    time_limit = fields.TimerField('time_limit', type(None))

    #: Extra resources for this test.
    #:
    #: This field is for specifying custom resources needed by this test.
    #: These resources are defined in the :doc:`configuration </configure>`
    #: of a system partition.
    #: For example, assume that two additional resources, named ``gpu`` and
    #: ``datawarp``, are defined in the configuration file as follows:
    #:
    #: ::
    #:
    #:     'resources': {
    #:         'gpu': [
    #:             '--gres=gpu:{num_gpus_per_node}'
    #:         ],
    #:         'datawarp': [
    #:             '#DW jobdw capacity={capacity}',
    #:             '#DW stage_in source={stagein_src}'
    #:         ]
    #:     }
    #:
    #: A regression test then may instantiate the above resources by setting
    #: the :attr:`extra_resources` attribute as follows:
    #:
    #: ::
    #:
    #:     self.extra_resources = {
    #:         'gpu': {'num_gpus_per_node': 2}
    #:         'datawarp': {
    #:             'capacity': '100GB',
    #:             'stagein_src': '/foo'
    #:         }
    #:     }
    #:
    #: The generated batch script (for Slurm) will then contain the following
    #: lines:
    #:
    #: ::
    #:
    #:     #SBATCH --gres=gpu:2
    #:     #DW jobdw capacity=100GB
    #:     #DW stage_in source=/foo
    #:
    #: Notice that if the resource specified in the configuration uses an
    #: alternative directive prefix (in this case ``#DW``), this will replace
    #: the standard prefix of the backend scheduler (in this case ``#SBATCH``)
    #:
    #: If the resource name specified in this variable does not match a
    #: resource name in the partition configuration, it will be simply ignored.
    #: The :attr:`num_gpus_per_node` attribute translates internally to the
    #: ``_rfm_gpu`` resource, so that setting
    #: ``self.num_gpus_per_node = 2`` is equivalent to the following:
    #:
    #: ::
    #:
    #:     self.extra_resources = {'_rfm_gpu': {'num_gpus_per_node': 2}}
    #:
    #: :type: :class:`Dict[str, Dict[str, object]]`
    #: :default: ``{}``
    #:
    #: .. note::
    #:    .. versionadded:: 2.8
    #:    .. versionchanged:: 2.9
    #:
    #:    A new more powerful syntax was introduced
    #:    that allows also custom job script directive prefixes.
    #:
    extra_resources = fields.TypedField('extra_resources',
                                        typ.Dict[str, typ.Dict[str, object]])

    # Private properties
    _prefix = fields.TypedField('_prefix', str)
    _stagedir = fields.TypedField('_stagedir', str, type(None))
    _stdout = fields.TypedField('_stdout', str, type(None))
    _stderr = fields.TypedField('_stderr', str, type(None))
    _current_partition = fields.TypedField('_current_partition',
                                           SystemPartition, type(None))
    _current_environ = fields.TypedField('_current_environ', Environment,
                                         type(None))
    _user_environ = fields.TypedField('_user_environ', Environment, type(None))
    _job = fields.TypedField('_job', Job, type(None))
    _build_job = fields.TypedField('_build_job', Job, type(None))

    def __new__(cls, *args, **kwargs):
        obj = super().__new__(cls)

        # Create a test name from the class name and the constructor's
        # arguments
        name = cls.__qualname__
        if args or kwargs:
            arg_names = map(lambda x: util.toalphanum(str(x)),
                            itertools.chain(args, kwargs.values()))
            name += '_' + '_'.join(arg_names)

        obj._rfm_init(name,
                      os.path.abspath(os.path.dirname(inspect.getfile(cls))))
        return obj

    def __init__(self):
        pass

    def _rfm_init(self, name=None, prefix=None):
        if name is not None:
            self.name = name

        self.descr = self.name
        self.valid_prog_environs = []
        self.valid_systems = []
        self.sourcepath = ''
        self.prebuild_cmd = []
        self.postbuild_cmd = []
        self.executable = os.path.join('.', self.name)
        self.executable_opts = []
        self.pre_run = []
        self.post_run = []
        self.keep_files = []
        self.readonly_files = []
        self.tags = set()
        self.maintainers = []
        self._perfvalues = {}

        # Strict performance check, if applicable
        self.strict_check = True

        # Default is a single node check
        self.num_tasks = 1
        self.num_tasks_per_node = None
        self.num_gpus_per_node = 0
        self.num_cpus_per_task = None
        self.num_tasks_per_core = None
        self.num_tasks_per_socket = None
        self.use_multithreading = None
        self.exclusive_access = False

        # True only if check is to be run locally
        self.local = False

        # Static directories of the regression check
        if prefix is not None:
            self._prefix = os.path.abspath(prefix)

        self.sourcesdir = 'src'

        # Output patterns
        self.sanity_patterns = None

        # Performance patterns: None -> no performance checking
        self.perf_patterns = None
        self.reference = {}

        # Environment setup
        self.modules = []
        self.variables = {}

        # Time limit for the check
        self.time_limit = (0, 10, 0)

        # Runtime information of the test
        self._current_partition = None
        self._current_environ = None
        self._user_environ = None

        # Associated job
        self._job = None
        self.extra_resources = {}

        # Dynamic paths of the regression check; will be set in setup()
        self._stagedir = None
        self._outputdir = None
        self._stdout = None
        self._stderr = None

        # Compilation process output
        self._build_job = None
        self._compile_proc = None
        self.build_system = None

        # Performance logging
        self._perf_logger = logging.null_logger

        # List of dependencies specified by the user
        self._userdeps = []

        # Weak reference to the test case associated with this check
        self._case = None

    # Export read-only views to interesting fields
    @property
    def current_environ(self):
        """The programming environment that the regression test is currently
        executing with.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.environments.Environment`.
        """
        return self._current_environ

    @property
    def current_partition(self):
        """The system partition the regression test is currently executing on.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.systems.SystemPartition`.
        """
        return self._current_partition

    @property
    def current_system(self):
        """The system the regression test is currently executing on.

        This is set by the framework during the initialization phase.

        :type: :class:`reframe.core.runtime.HostSystem`.
        """
        return rt.runtime().system

    @property
    def perfvalues(self):
        return util.MappingView(self._perfvalues)

    @property
    def job(self):
        """The job descriptor associated with this test.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.schedulers.Job`.
        """
        return self._job

    @property
    def logger(self):
        """A logger associated with the this test.

        You can use this logger to log information for your test.
        """
        return logging.getlogger()

    @property
    def prefix(self):
        """The prefix directory of the test.

        :type: :class:`str`.
        """
        return self._prefix

    @property
    def stagedir(self):
        """The stage directory of the test.

        This is set during the :func:`setup` phase.

        :type: :class:`str`.
        """
        return self._stagedir

    @property
    def outputdir(self):
        """The output directory of the test.

        This is set during the :func:`setup` phase.

        .. versionadded:: 2.13

        :type: :class:`str`.
        """
        return self._outputdir

    @property
    @deferrable
    def stdout(self):
        """The name of the file containing the standard output of the test.

        This is set during the :func:`setup` phase.

        This attribute is evaluated lazily, so it can by used inside sanity
        expressions.

        :type: :class:`str`.
        """
        return self._job.stdout

    @property
    @deferrable
    def stderr(self):
        """The name of the file containing the standard error of the test.

        This is set during the :func:`setup` phase.

        This attribute is evaluated lazily, so it can by used inside sanity
        expressions.

        :type: :class:`str`.
        """
        return self._job.stderr

    @property
    @deferrable
    def build_stdout(self):
        return self._build_job.stdout

    @property
    @deferrable
    def build_stderr(self):
        return self._build_job.stderr

    def info(self):
        """Provide live information of a running test.

        This method is used by the front-end to print the status message during
        the test's execution.
        This function is also called to provide the message for the
        ``check_info`` `logging attribute <running.html#logging>`__.
        By default, it returns a message reporting the test name, the current
        partition and the current programming environment that the test is
        currently executing on.

        :returns: a string with an informational message about this test

        .. note ::
           When overriding this method, you should pay extra attention on how
           you use the :class:`RegressionTest`'s attributes, because this
           method may be called at any point of the test's lifetime.

           .. versionadded:: 2.10

        """
        ret = self.name
        if self.current_partition:
            ret += ' on %s' % self.current_partition.fullname

        if self.current_environ:
            ret += ' using %s' % self.current_environ.name

        return ret

    def supports_system(self, partition_name):
        if '*' in self.valid_systems:
            return True

        if self.current_system.name in self.valid_systems:
            return True

        # Check if this is a relative name
        if partition_name.find(':') == -1:
            partition_name = '%s:%s' % (self.current_system.name,
                                        partition_name)

        return partition_name in self.valid_systems

    def supports_environ(self, env_name):
        if '*' in self.valid_prog_environs:
            return True

        return env_name in self.valid_prog_environs

    def is_local(self):
        """Check if the test will execute locally.

        A test executes locally if the :attr:`local` attribute is set or if the
        current partition's scheduler does not support job submission.
        """
        if self._current_partition is None:
            return self.local

        return self.local or self._current_partition.scheduler.is_local

    def _setup_environ(self, environ):
        """Setup the current environment and load it."""

        self._current_environ = environ

        # Set up user environment
        self._user_environ = Environment(
            type(self).__name__, self.modules, self.variables.items())

        # Temporarily load the test's environment to record the actual module
        # load/unload sequence
        environ_save = EnvironmentSnapshot()
        # First load the local environment of the partition
        self.logger.debug('loading environment for the current partition')
        self._current_partition.local_env.load()

        self.logger.debug("loading current programming environment")
        self._current_environ.load()

        self.logger.debug("loading user's environment")
        self._user_environ.load()
        environ_save.load()

    def _setup_paths(self):
        """Setup the check's dynamic paths."""
        self.logger.debug('setting up paths')
        try:
            resources = rt.runtime().resources
            self._stagedir = resources.make_stagedir(
                self.current_system.name, self._current_partition.name,
                self._current_environ.name, self.name)
            self._outputdir = resources.make_outputdir(
                self.current_system.name, self._current_partition.name,
                self._current_environ.name, self.name)
        except OSError as e:
            raise PipelineError('failed to set up paths') from e

    def _setup_job(self, **job_opts):
        """Setup the job related to this check."""

        self.logger.debug('setting up the job descriptor')

        msg = 'job scheduler backend: {0}'
        self.logger.debug(
            msg.format('local' if self.is_local else self._current_partition.
                       scheduler.registered_name))

        # num_gpus_per_node is a managed resource
        if self.num_gpus_per_node > 0:
            self.extra_resources.setdefault(
                '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node})

        if self.local:
            scheduler_type = getscheduler('local')
            launcher_type = getlauncher('local')
        else:
            scheduler_type = self._current_partition.scheduler
            launcher_type = self._current_partition.launcher

        self._job = scheduler_type(
            name='rfm_%s_job' % self.name,
            launcher=launcher_type(),
            workdir=self._stagedir,
            num_tasks=self.num_tasks,
            num_tasks_per_node=self.num_tasks_per_node,
            num_tasks_per_core=self.num_tasks_per_core,
            num_tasks_per_socket=self.num_tasks_per_socket,
            num_cpus_per_task=self.num_cpus_per_task,
            use_smt=self.use_multithreading,
            time_limit=self.time_limit,
            sched_access=self._current_partition.access,
            sched_exclusive_access=self.exclusive_access,
            **job_opts)

        # Get job options from managed resources and prepend them to
        # job_opts. We want any user supplied options to be able to
        # override those set by the framework.
        resources_opts = []
        for r, v in self.extra_resources.items():
            resources_opts.extend(self._current_partition.get_resource(r, **v))

        self._job.options = resources_opts + self._job.options

    def _setup_perf_logging(self):
        self.logger.debug('setting up performance logging')
        self._perf_logger = logging.getperflogger(self)

    def setup(self, partition, environ, **job_opts):
        """The setup phase of the regression test pipeline.

        :arg partition: The system partition to set up this test for.
        :arg environ: The environment to set up this test for.
        :arg job_opts: Options to be passed through to the backend scheduler.
            When overriding this method users should always pass through
            ``job_opts`` to the base class method.
        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        self._current_partition = partition
        self._setup_environ(environ)
        self._setup_paths()
        self._setup_job(**job_opts)
        if self.perf_patterns is not None:
            self._setup_perf_logging()

    def _copy_to_stagedir(self, path):
        self.logger.debug('copying %s to stage directory (%s)' %
                          (path, self._stagedir))
        self.logger.debug('symlinking files: %s' % self.readonly_files)
        try:
            os_ext.copytree_virtual(path, self._stagedir, self.readonly_files)
        except (OSError, ValueError, TypeError) as e:
            raise PipelineError('virtual copying of files failed') from e

    def _clone_to_stagedir(self, url):
        self.logger.debug('cloning URL %s to stage directory (%s)' %
                          (url, self._stagedir))
        os_ext.git_clone(self.sourcesdir, self._stagedir)

    def compile(self):
        """The compilation phase of the regression test pipeline.

        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        if not self._current_environ:
            raise PipelineError('no programming environment set')

        # Copy the check's resources to the stage directory
        if self.sourcesdir:
            try:
                commonpath = os.path.commonpath(
                    [self.sourcesdir, self.sourcepath])
            except ValueError:
                commonpath = None

            if commonpath:
                self.logger.warn(
                    "sourcepath `%s' seems to be a subdirectory of "
                    "sourcesdir `%s', but it will be interpreted "
                    "as relative to it." % (self.sourcepath, self.sourcesdir))

            if os_ext.is_url(self.sourcesdir):
                self._clone_to_stagedir(self.sourcesdir)
            else:
                self._copy_to_stagedir(
                    os.path.join(self._prefix, self.sourcesdir))

        # Verify the sourcepath and determine the sourcepath in the stagedir
        if (os.path.isabs(self.sourcepath)
                or os.path.normpath(self.sourcepath).startswith('..')):
            raise PipelineError(
                'self.sourcepath is an absolute path or does not point to a '
                'subfolder or a file contained in self.sourcesdir: ' +
                self.sourcepath)

        staged_sourcepath = os.path.join(self._stagedir, self.sourcepath)
        self.logger.debug('Staged sourcepath: %s' % staged_sourcepath)
        if os.path.isdir(staged_sourcepath):
            if not self.build_system:
                # Try to guess the build system
                cmakelists = os.path.join(staged_sourcepath, 'CMakeLists.txt')
                configure_ac = os.path.join(staged_sourcepath, 'configure.ac')
                configure_in = os.path.join(staged_sourcepath, 'configure.in')
                if os.path.exists(cmakelists):
                    self.build_system = 'CMake'
                    self.build_system.builddir = 'rfm_build'
                elif (os.path.exists(configure_ac)
                      or os.path.exists(configure_in)):
                    self.build_system = 'Autotools'
                    self.build_system.builddir = 'rfm_build'
                else:
                    self.build_system = 'Make'

            self.build_system.srcdir = self.sourcepath
        else:
            if not self.build_system:
                self.build_system = 'SingleSource'

            self.build_system.srcfile = self.sourcepath
            self.build_system.executable = self.executable

        # Prepare build job
        build_commands = [
            *self.prebuild_cmd,
            *self.build_system.emit_build_commands(self._current_environ),
            *self.postbuild_cmd
        ]
        environs = [
            self._current_partition.local_env, self._current_environ,
            self._user_environ
        ]
        self._build_job = getscheduler('local')(
            name='rfm_%s_build' % self.name,
            launcher=getlauncher('local')(),
            workdir=self._stagedir)

        with os_ext.change_dir(self._stagedir):
            try:
                self._build_job.prepare(build_commands,
                                        environs,
                                        login=True,
                                        trap_errors=True)
            except OSError as e:
                raise PipelineError('failed to prepare build job') from e

            self._build_job.submit()

    def compile_wait(self):
        """Wait for compilation phase to finish.

        .. versionadded:: 2.13
        """
        self._build_job.wait()
        self.logger.debug('compilation finished')

        # FIXME: this check is not reliable for certain scheduler backends
        if self._build_job.exitcode != 0:
            raise BuildError(self._build_job.stdout, self._build_job.stderr)

    def run(self):
        """The run phase of the regression test pipeline.

        This call is non-blocking.
        It simply submits the job associated with this test and returns.
        """
        if not self.current_system or not self._current_partition:
            raise PipelineError('no system or system partition is set')

        exec_cmd = [
            self.job.launcher.run_command(self.job), self.executable,
            *self.executable_opts
        ]
        commands = [*self.pre_run, ' '.join(exec_cmd), *self.post_run]
        environs = [
            self._current_partition.local_env, self._current_environ,
            self._user_environ
        ]
        with os_ext.change_dir(self._stagedir):
            try:
                self._job.prepare(commands, environs, login=True)
            except OSError as e:
                raise PipelineError('failed to prepare job') from e

            self._job.submit()

        msg = ('spawned job (%s=%s)' %
               ('pid' if self.is_local() else 'jobid', self._job.jobid))
        self.logger.debug(msg)

    def poll(self):
        """Poll the test's state.

        :returns: :class:`True` if the associated job has finished,
            :class:`False` otherwise.

            If no job descriptor is yet associated with this test,
            :class:`True` is returned.
        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        if not self._job:
            return True

        return self._job.finished()

    def wait(self):
        """Wait for this test to finish.

        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        self._job.wait()
        self.logger.debug('spawned job finished')

    def sanity(self):
        self.check_sanity()

    def performance(self):
        try:
            self.check_performance()
        except PerformanceError:
            if self.strict_check:
                raise

    def check_sanity(self):
        """The sanity checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the sanity check fails.
        """
        if self.sanity_patterns is None:
            raise SanityError('sanity_patterns not set')

        with os_ext.change_dir(self._stagedir):
            success = evaluate(self.sanity_patterns)
            if not success:
                raise SanityError()

    def check_performance(self):
        """The performance checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the performance check
            fails.
        """
        if self.perf_patterns is None:
            return

        with os_ext.change_dir(self._stagedir):
            # Check if default reference perf values are provided and
            # store all the variables  tested in the performance check
            has_default = False
            variables = set()
            for key, ref in self.reference.items():
                keyparts = key.split(self.reference.scope_separator)
                system = keyparts[0]
                varname = keyparts[-1]
                try:
                    unit = ref[3]
                except IndexError:
                    unit = None

                variables.add((varname, unit))
                if system == '*':
                    has_default = True
                    break

            if not has_default:
                if not variables:
                    # If empty, it means that self.reference was empty, so try
                    # to infer their name from perf_patterns
                    variables = {(name, None)
                                 for name in self.perf_patterns.keys()}

                for var in variables:
                    name, unit = var
                    ref_tuple = (0, None, None)
                    if unit:
                        ref_tuple += (unit, )

                    self.reference.update({'*': {name: ref_tuple}})

            # We first evaluate and log all performance values and then we
            # check them against the reference. This way we always log them
            # even if the don't meet the reference.
            for tag, expr in self.perf_patterns.items():
                value = evaluate(expr)
                key = '%s:%s' % (self._current_partition.fullname, tag)
                if key not in self.reference:
                    raise SanityError(
                        "tag `%s' not resolved in references for `%s'" %
                        (tag, self._current_partition.fullname))

                self._perfvalues[key] = (value, *self.reference[key])
                self._perf_logger.log_performance(logging.INFO, tag, value,
                                                  *self.reference[key])

            for key, values in self._perfvalues.items():
                val, ref, low_thres, high_thres, *_ = values
                tag = key.split(':')[-1]
                try:
                    evaluate(
                        assert_reference(
                            val,
                            ref,
                            low_thres,
                            high_thres,
                            msg=('failed to meet reference: %s={0}, '
                                 'expected {1} (l={2}, u={3})' % tag)))
                except SanityError as e:
                    raise PerformanceError(e)

    def _copy_job_files(self, job, dst):
        if job is None:
            return

        stdout = os.path.join(self._stagedir, job.stdout)
        stderr = os.path.join(self._stagedir, job.stderr)
        script = os.path.join(self._stagedir, job.script_filename)
        shutil.copy(stdout, dst)
        shutil.copy(stderr, dst)
        shutil.copy(script, dst)

    def _copy_to_outputdir(self):
        """Copy checks interesting files to the output directory."""
        self.logger.debug('copying interesting files to output directory')
        self._copy_job_files(self._job, self.outputdir)
        self._copy_job_files(self._build_job, self.outputdir)

        # Copy files specified by the user
        for f in self.keep_files:
            f_orig = f
            if not os.path.isabs(f):
                f = os.path.join(self._stagedir, f)

            if os.path.isfile(f):
                shutil.copy(f, self.outputdir)
            elif os.path.isdir(f):
                shutil.copytree(f, os.path.join(self.outputdir, f_orig))

    def cleanup(self, remove_files=False, unload_env=True):
        """The cleanup phase of the regression test pipeline.

        :arg remove_files: If :class:`True`, the stage directory associated
            with this test will be removed.
        :arg unload_env: If :class:`True`, the environment that was used to run
            this test will be unloaded.
        """
        aliased = os.path.samefile(self._stagedir, self._outputdir)
        if aliased:
            self.logger.debug('skipping copy to output dir '
                              'since they alias each other')
        else:
            self._copy_to_outputdir()

        if remove_files:
            self.logger.debug('removing stage directory')
            os_ext.rmtree(self._stagedir)

        if unload_env:
            self.logger.debug("unloading test's environment")
            self._user_environ.unload()
            self._current_environ.unload()
            self._current_partition.local_env.unload()

    # Dependency API
    def user_deps(self):
        return util.SequenceView(self._userdeps)

    def depends_on(self, target, how=DEPEND_BY_ENV, subdeps=None):
        if not isinstance(target, str):
            raise TypeError("target argument must be of type: `str'")

        if not isinstance(how, int):
            raise TypeError("how argument must be of type: `int'")

        if (subdeps is not None
                and not isinstance(subdeps, typ.Dict[str, typ.List[str]])):
            raise TypeError("subdeps argument must be of type "
                            "`Dict[str, List[str]]' or `None'")

        self._userdeps.append((target, how, subdeps))

    def getdep(self, target, environ):
        if self._case is None or self._case() is None:
            raise DependencyError('no test case is associated with this test')

        for d in self._case().deps:
            if d.check.name == target and d.environ.name == environ:
                return d.check

        raise DependencyError('could not resolve dependency to (%s, %s)' %
                              (target, environ))

    def __str__(self):
        return "%s(name='%s', prefix='%s')" % (type(self).__name__, self.name,
                                               self.prefix)
Example #5
0
class Job(jsonext.JSONSerializable):
    '''A job descriptor.

    A job descriptor is created by the framework after the "setup" phase and
    is associated with the test.

    .. warning::
       Users may not create a job descriptor directly.

    '''

    num_tasks = fields.TypedField(int)
    num_tasks_per_node = fields.TypedField(int, type(None))
    num_tasks_per_core = fields.TypedField(int, type(None))
    num_tasks_per_socket = fields.TypedField(int, type(None))
    num_cpus_per_task = fields.TypedField(int, type(None))
    use_smt = fields.TypedField(bool, type(None))
    time_limit = fields.TimerField(type(None))

    #: Options to be passed to the backend job scheduler.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    options = fields.TypedField(typ.List[str])

    #: The (parallel) program launcher that will be used to launch the
    #: (parallel) executable of this job.
    #:
    #: Users are allowed to explicitly set the current job launcher, but this
    #: is only relevant in rare situations, such as when you want to wrap the
    #: current launcher command. For this specific scenario, you may have a
    #: look at the :class:`reframe.core.launchers.LauncherWrapper` class.
    #:
    #: The following example shows how you can replace the current partition's
    #: launcher for this test with the "local" launcher:
    #:
    #: .. code-block:: python
    #:
    #:    from reframe.core.backends import getlauncher
    #:
    #:    @rfm.run_after('setup')
    #:    def set_launcher(self):
    #:        self.job.launcher = getlauncher('local')()
    #:
    #: :type: :class:`reframe.core.launchers.JobLauncher`
    launcher = fields.TypedField(JobLauncher)

    # The sched_* arguments are exposed also to the frontend
    def __init__(self,
                 name,
                 workdir='.',
                 script_filename=None,
                 stdout=None,
                 stderr=None,
                 max_pending_time=None,
                 sched_flex_alloc_nodes=None,
                 sched_access=[],
                 sched_exclusive_access=None,
                 sched_options=None):

        # Mutable fields
        self.num_tasks = 1
        self.num_tasks_per_node = None
        self.num_tasks_per_core = None
        self.num_tasks_per_socket = None
        self.num_cpus_per_task = None
        self.use_smt = None
        self.time_limit = None
        self.cli_options = list(sched_options) if sched_options else []
        self.options = []

        self._name = name
        self._workdir = workdir
        self._script_filename = script_filename or '%s.sh' % name
        self._stdout = stdout or '%s.out' % name
        self._stderr = stderr or '%s.err' % name
        self._max_pending_time = max_pending_time

        # Backend scheduler related information
        self._sched_flex_alloc_nodes = sched_flex_alloc_nodes
        self._sched_access = sched_access
        self._sched_exclusive_access = sched_exclusive_access

        # Live job information; to be filled during job's lifetime by the
        # scheduler
        self._jobid = None
        self._exitcode = None
        self._state = None
        self._nodelist = None
        self._submit_time = None
        self._completion_time = None

        # Job errors discovered while polling; if not None this will be raised
        # in finished()
        self._exception = None

    @classmethod
    def create(cls, scheduler, launcher, *args, **kwargs):
        ret = scheduler.make_job(*args, **kwargs)
        ret._scheduler = scheduler
        ret.launcher = launcher
        return ret

    @property
    def name(self):
        return self._name

    @property
    def workdir(self):
        return self._workdir

    @property
    def max_pending_time(self):
        return self._max_pending_time

    @property
    def script_filename(self):
        return self._script_filename

    @property
    def stdout(self):
        return self._stdout

    @property
    def stderr(self):
        return self._stderr

    @property
    def sched_flex_alloc_nodes(self):
        return self._sched_flex_alloc_nodes

    @property
    def sched_access(self):
        return self._sched_access

    @property
    def sched_exclusive_access(self):
        return self._sched_exclusive_access

    @property
    def completion_time(self):
        '''The completion time of this job as a floating point number
        expressed in seconds since the epoch, in UTC.

        This attribute is :class:`None` if the job hasn't been finished yet,
        or if ReFrame runtime hasn't perceived it yet.

        The accuracy of this timestamp depends on the backend scheduler.
        The ``slurm`` scheduler backend relies on job accounting and returns
        the actual termination time of the job. The rest of the backends
        report as completion time the moment when the framework realizes that
        the spawned job has finished. In this case, the accuracy depends on
        the execution policy used. If tests are executed with the serial
        execution policy, this is close to the real completion time, but
        if the asynchronous execution policy is used, it can differ
        significantly.

        :type: :class:`float` or :class:`None`
        '''
        return self._completion_time

    @property
    def scheduler(self):
        return self._scheduler

    @property
    def exception(self):
        return self._exception

    @property
    def jobid(self):
        '''The ID of this job.

        .. versionadded:: 2.21

        .. versionchanged:: 3.2
           Job ID type is now a string.

        :type: :class:`str` or :class:`None`
        '''
        return self._jobid

    @property
    def exitcode(self):
        '''The exit code of this job.

        This may or may not be set depending on the scheduler backend.

        .. versionadded:: 2.21

        :type: :class:`int` or :class:`None`
        '''
        return self._exitcode

    @property
    def state(self):
        '''The state of this job.

        The value of this field is scheduler-specific.

        .. versionadded:: 2.21

        :type: :class`str` or :class:`None`
        '''
        return self._state

    @property
    def nodelist(self):
        '''The list of node names assigned to this job.

        This attribute is :class:`None` if no nodes are assigned to the job
        yet.
        This attribute is set reliably only for the ``slurm`` backend, i.e.,
        Slurm *with* accounting enabled.
        The ``squeue`` scheduler backend, i.e., Slurm *without* accounting,
        might not set this attribute for jobs that finish very quickly.
        For the ``local`` scheduler backend, this returns an one-element list
        containing the hostname of the current host.

        This attribute might be useful in a flexible regression test for
        determining the actual nodes that were assigned to the test.
        For more information on flexible node allocation, see the
        |--flex-alloc-nodes|_ command-line option

        This attribute is *not* supported by the ``pbs`` scheduler backend.

        .. versionadded:: 2.17

        :type: :class:`List[str]` or :class:`None`
        '''
        return self._nodelist

    @property
    def submit_time(self):
        '''The submission time of this job as a floating point number
        expressed in seconds since the epoch, in UTC.

        This attribute is :class:`None` if the job hasn't been submitted yet.

        This attribute is set right after the job is submitted and can vary
        significantly from the time the jobs starts running, depending on the
        scheduler.

        :type: :class:`float` or :class:`None`
        '''
        return self._submit_time

    def prepare(self, commands, environs=None, prepare_cmds=None, **gen_opts):
        environs = environs or []
        if self.num_tasks <= 0:
            getlogger().debug(f'[F] Flexible node allocation requested')
            num_tasks_per_node = self.num_tasks_per_node or 1
            min_num_tasks = (-self.num_tasks if self.num_tasks else
                             num_tasks_per_node)

            try:
                guessed_num_tasks = self.guess_num_tasks()
            except NotImplementedError as e:
                raise JobError('flexible node allocation is not supported by '
                               'this scheduler backend') from e

            if guessed_num_tasks < min_num_tasks:
                raise JobError(
                    'could not satisfy the minimum task requirement: '
                    'required %s, found %s' %
                    (min_num_tasks, guessed_num_tasks)
                )

            self.num_tasks = guessed_num_tasks
            getlogger().debug(f'[F] Setting num_tasks to {self.num_tasks}')

        with shell.generate_script(self.script_filename,
                                   **gen_opts) as builder:
            builder.write_prolog(self.scheduler.emit_preamble(self))
            prepare_cmds = prepare_cmds or []
            for c in prepare_cmds:
                builder.write_body(c)

            builder.write(runtime.emit_loadenv_commands(*environs))
            for c in commands:
                builder.write_body(c)

    def guess_num_tasks(self):
        num_tasks_per_node = self.num_tasks_per_node or 1
        if isinstance(self.sched_flex_alloc_nodes, int):
            if self.sched_flex_alloc_nodes <= 0:
                raise JobError('invalid number of flex_alloc_nodes: %s' %
                               self.sched_flex_alloc_nodes)

            return self.sched_flex_alloc_nodes * num_tasks_per_node

        available_nodes = self.scheduler.allnodes()
        getlogger().debug(
            f'[F] Total available nodes: {len(available_nodes)}'
        )

        # Try to guess the number of tasks now
        available_nodes = self.scheduler.filternodes(self, available_nodes)
        if self.sched_flex_alloc_nodes.casefold() != 'all':
            available_nodes = {n for n in available_nodes
                               if n.in_state(self.sched_flex_alloc_nodes)}
            getlogger().debug(
                f'[F] Selecting nodes in state '
                f'{self.sched_flex_alloc_nodes!r}: '
                f'available nodes now: {len(available_nodes)}'
            )

        return len(available_nodes) * num_tasks_per_node

    def submit(self):
        return self.scheduler.submit(self)

    def wait(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot wait an unstarted job')

        self.scheduler.wait(self)
        self._completion_time = self._completion_time or time.time()

    def cancel(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot cancel an unstarted job')

        return self.scheduler.cancel(self)

    def finished(self):
        if self.jobid is None:
            raise JobNotStartedError('cannot poll an unstarted job')

        done = self.scheduler.finished(self)
        if done:
            self._completion_time = self._completion_time or time.time()

        return done

    def __eq__(self, other):
        return type(self) == type(other) and self.jobid == other.jobid

    def __hash__(self):
        return hash(self.jobid)
Example #6
0
 class FieldTester:
     field = fields.TimerField('field')
     field_maybe_none = fields.TimerField('field_maybe_none',
                                          allow_none=True)
Example #7
0
class RegressionTest:
    """Base class for regression tests.

    All regression tests must eventually inherit from this class.
    This class provides the implementation of the pipeline phases that the
    regression test goes through during its lifetime.

    :arg name: The name of the test.
        This is the only argument that the users may specify freely.
    :arg prefix: The directory prefix of the test.
        You should initialize this to the directory containing the file that
        defines the regression test.
        You can achieve this by always passing ``os.path.dirname(__file__)``.
    :arg system: The system that this regression test will run on.
        The framework takes care of initializing and passing correctly this
        argument.
    :arg resources: An object managing the framework's resources.
        The framework takes care of initializing and passing correctly this
        argument.

    Concrete regression test subclasses should call the base constructor as
    follows:

    ::

        class MyTest(RegressionTest):
            def __init__(self, my_test_args, **kwargs):
                super().__init__('mytest', os.path.dirname(__file__), **kwargs)
    """
    #: The name of the test.
    #:
    #: :type: Alphanumeric string.
    name = fields.AlphanumericField('name')

    #: List of programming environments supported by this test.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.12
    #:        Programming environments can now be specified using wildcards.
    valid_prog_environs = fields.TypedListField('valid_prog_environs', str)

    #: List of systems supported by this test.
    #: The general syntax for systems is ``<sysname>[:<partname]``.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    valid_systems = fields.TypedListField('valid_systems', str)

    #: A detailed description of the test.
    #:
    #: :type: :class:`str`
    #: :default: ``self.name``
    descr = fields.StringField('descr')

    #: The path to the source file or source directory of the test.
    #:
    #: It must be a path relative to the :attr:`sourcesdir`, pointing to a
    #: subfolder or a file contained in :attr:`sourcesdir`. This applies also
    #: in the case where :attr:`sourcesdir` is a Git repository.
    #:
    #: If it refers to a regular file, this file will be compiled (its language
    #: will be automatically recognized).
    #: If it refers to a directory, ``make`` will be invoked in that directory.
    #:
    #: :type: :class:`str`
    #: :default: ``''``
    sourcepath = fields.StringField('sourcepath')

    #: The directory containing the test's resources.
    #:
    #: This directory may be specified with an absolute path or with a path
    #: relative to the location of the test. Its contents will always be copied
    #: to the stage directory of the test.
    #:
    #: This attribute may also accept a URL, in which case ReFrame will treat it
    #: as a Git repository and will try to clone its contents in the stage
    #: directory of the test.
    #:
    #: If set to :class:`None`, the test has no resources an no action is taken.
    #:
    #: :type: :class:`str` or :class:`None`
    #: :default: ``'src'``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.9
    #:        Allow :class:`None` values to be set also in regression tests
    #:        with a compilation phase
    #:
    #:     .. versionchanged:: 2.10
    #:        Support for Git repositories was added.
    sourcesdir = fields.StringField('sourcesdir', allow_none=True)

    #: List of shell commands to be executed before compiling.
    #:
    #: These commands are executed during the compilation phase and from
    #: inside the stage directory. **Each entry in the list spawns a new shell.**
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    prebuild_cmd = fields.TypedListField('prebuild_cmd', str)

    #: List of shell commands to be executed after a successful compilation.
    #:
    #: These commands are executed during the compilation phase and from inside
    #: the stage directory. **Each entry in the list spawns a new shell.**
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    postbuild_cmd = fields.TypedListField('postbuild_cmd', str)

    #: The name of the executable to be launched during the run phase.
    #:
    #: :type: :class:`str`
    #: :default: ``os.path.join('.', self.name)``
    executable = fields.StringField('executable')

    #: List of options to be passed to the :attr:`executable`.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    executable_opts = fields.TypedListField('executable_opts', str)

    #: List of shell commands to execute before launching this job.
    #:
    #: These commands do not execute in the context of ReFrame.
    #: Instead, they are emitted in the generated job script just before the
    #: actual job launch command.
    #:
    #: :type: :class:`list` of :class:`str`
    #: :default: ``[]``
    #:
    #: .. note::
    #:    .. versionadded:: 2.10
    pre_run = fields.TypedListField('pre_run', str)

    #: List of shell commands to execute after launching this job.
    #:
    #: See :attr:`pre_run` for a more detailed description of the semantics.
    #:
    #: :type: :class:`list` of :class:`str`
    #: :default: ``[]``
    #:
    #: .. note::
    #:    .. versionadded:: 2.10
    post_run = fields.TypedListField('post_run', str)

    #: List of files to be kept after the test finishes.
    #:
    #: By default, the framework saves the standard output, the standard error
    #: and the generated shell script that was used to run this test.
    #:
    #: These files will be copied over to the framework’s output directory
    #: during the :func:`cleanup` phase.
    #:
    #: Directories are also accepted in this field.
    #:
    #: Relative path names are resolved against the stage directory.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    keep_files = fields.TypedListField('keep_files', str)

    #: List of files or directories (relative to the :attr:`sourcesdir`) that
    #: will be symlinked in the stage directory and not copied.
    #:
    #: You can use this variable to avoid copying very large files to the stage
    #: directory.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    readonly_files = fields.TypedListField('readonly_files', str)

    #: Set of tags associated with this test.
    #:
    #: This test can be selected from the frontend using any of these tags.
    #:
    #: :type: :class:`set[str]`
    #: :default: an empty set
    tags = fields.TypedSetField('tags', str)

    #: List of people responsible for this test.
    #:
    #: When the test fails, this contact list will be printed out.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    maintainers = fields.TypedListField('maintainers', str)

    #: Mark this test as a strict performance test.
    #:
    #: If a test is marked as non-strict, the performance checking phase will
    #: always succeed, unless the ``--strict`` command-line option is passed
    #: when invoking ReFrame.
    #:
    #: :type: boolean
    #: :default: :class:`True`
    strict_check = fields.BooleanField('strict_check')

    #: Number of tasks required by this test.
    #:
    #: If the number of tasks is set to ``0``, ReFrame will try to use all
    #: the available nodes of a reservation. A reservation *must* be specified
    #: through the `--reservation` command-line option, otherwise the
    #: regression test will fail during submission. ReFrame will try to run the
    #: test on all the nodes of the reservation that satisfy the selection
    #: criteria of the current
    #: `virtual partition <configure.html#partition-configuration>`__
    #: (i.e., constraints and/or partitions).
    #:
    #: :type: integral
    #: :default: ``1``
    #:
    #: .. note::
    #:     .. versionchanged:: 2.9
    #:        Added support for running the test using all the nodes of the
    #:        specified reservation if the number of tasks is set to ``0``.
    num_tasks = fields.IntegerField('num_tasks')

    #: Number of tasks per node required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_node = fields.IntegerField('num_tasks_per_node',
                                             allow_none=True)

    #: Number of GPUs per node required by this test.
    #:
    #: :type: integral
    #: :default: ``0``
    num_gpus_per_node = fields.IntegerField('num_gpus_per_node')

    #: Number of CPUs per task required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_cpus_per_task = fields.IntegerField('num_cpus_per_task',
                                            allow_none=True)

    #: Number of tasks per core required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_core = fields.IntegerField('num_tasks_per_core',
                                             allow_none=True)

    #: Number of tasks per socket required by this test.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: integral or :class:`None`
    #: :default: :class:`None`
    num_tasks_per_socket = fields.IntegerField('num_tasks_per_socket',
                                               allow_none=True)

    #: Specify whether this tests needs simultaneous multithreading enabled.
    #:
    #: Ignored if :class:`None`.
    #:
    #: :type: boolean or :class:`None`
    #: :default: :class:`None`
    use_multithreading = fields.BooleanField('use_multithreading',
                                             allow_none=True)

    #: Specify whether this test needs exclusive access to nodes.
    #:
    #: :type: boolean
    #: :default: :class:`False`
    exclusive_access = fields.BooleanField('exclusive_access')

    #: Always execute this test locally.
    #:
    #: :type: boolean
    #: :default: :class:`False`
    local = fields.BooleanField('local')

    #: The set of reference values for this test.
    #:
    #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage
    #: examples.
    #:
    #: :type: A scoped dictionary with system names as scopes or :class:`None`
    #: :default: ``{}``
    reference = fields.ScopedDictField('reference', (tuple, object))
    # FIXME: There is not way currently to express tuples of `float`s or
    # `None`s, so we just use the very generic `object`

    #:
    #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage
    #: examples.
    #:
    #: If set to :class:`None`, a sanity error will be raised during sanity
    #: checking.
    #:
    #: :type: A deferrable expression (i.e., the result of a :doc:`sanity
    #:     function </sanity_functions_reference>`) or :class:`None`
    #: :default: :class:`None`
    #:
    #: .. note::
    #:    .. versionchanged:: 2.9
    #:       The default behaviour has changed and it is now considered a
    #:       sanity failure if this attribute is set to :class:`None`.
    #:
    #:       If a test doesn't care about its output, this must be stated
    #:       explicitly as follows:
    #:
    #:       ::
    #:
    #:           self.sanity_patterns = sn.assert_found(r'.*', self.stdout)
    #:
    sanity_patterns = fields.TypedField('sanity_patterns',
                                        _DeferredExpression,
                                        allow_none=True)

    #: Patterns for verifying the performance of this test.
    #:
    #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage
    #: examples.
    #:
    #: If set to :class:`None`, no performance checking will be performed.
    #:
    #: :type: A dictionary with keys of type :class:`str` and deferrable
    #:     expressions (i.e., the result of a :doc:`sanity function
    #:     </sanity_functions_reference>`) as values.
    #:     :class:`None` is also allowed.
    #: :default: :class:`None`
    perf_patterns = fields.TypedDictField('perf_patterns',
                                          str,
                                          _DeferredExpression,
                                          allow_none=True)

    #: List of modules to be loaded before running this test.
    #:
    #: These modules will be loaded during the :func:`setup` phase.
    #:
    #: :type: :class:`list[str]`
    #: :default: ``[]``
    modules = fields.TypedListField('modules', str)

    #: Environment variables to be set before running this test.
    #:
    #: These variables will be set during the :func:`setup` phase.
    #:
    #: :type: :class:`dict[str, str]`
    #: :default: ``{}``
    variables = fields.TypedDictField('variables', str, str)

    #: Time limit for this test.
    #:
    #: Time limit is specified as a three-tuple in the form ``(hh, mm, ss)``,
    #: with ``hh >= 0``, ``0 <= mm <= 59`` and ``0 <= ss <= 59``.
    #:
    #: :type: :class:`tuple[int]`
    #: :default: ``(0, 10, 0)``
    time_limit = fields.TimerField('time_limit')

    #: Extra resources for this test.
    #:
    #: This field is for specifying custom resources needed by this test.
    #: These resources are defined in the :doc:`configuration </configure>`
    #: of a system partition.
    #: For example, assume that two additional resources, named ``gpu`` and
    #: ``datawarp``, are defined in the configuration file as follows:
    #:
    #: ::
    #:
    #:     'resources': {
    #:         'gpu': [
    #:             '--gres=gpu:{num_gpus_per_node}'
    #:         ],
    #:         'datawarp': [
    #:             '#DW jobdw capacity={capacity}',
    #:             '#DW stage_in source={stagein_src}'
    #:         ]
    #:     }
    #:
    #: A regression test then may instantiate the above resources by setting the
    #: :attr:`extra_resources` attribute as follows:
    #:
    #: ::
    #:
    #:     self.extra_resources = {
    #:         'gpu': {'num_gpus_per_node': 2}
    #:         'datawarp': {
    #:             'capacity': '100GB',
    #:             'stagein_src': '/foo'
    #:         }
    #:     }
    #:
    #: The generated batch script (for Slurm) will then contain the following
    #: lines:
    #:
    #: ::
    #:
    #:     #SBATCH --gres=gpu:2
    #:     #DW jobdw capacity=100GB
    #:     #DW stage_in source=/foo
    #:
    #: Notice that if the resource specified in the configuration uses an
    #: alternative directive prefix (in this case ``#DW``), this will replace
    #: the standard prefix of the backend scheduler (in this case ``#SBATCH``)
    #:
    #: If the resource name specified in this variable does not match a resource
    #: name in the partition configuration, it will be simply ignored.
    #: The :attr:`num_gpus_per_node` attribute translates internally to the
    #: ``_rfm_gpu`` resource, so that setting
    #: ``self.num_gpus_per_node = 2`` is equivalent to the following:
    #:
    #: ::
    #:
    #:     self.extra_resources = {'_rfm_gpu': {'num_gpus_per_node': 2}}
    #:
    #: :type: :class:`dict[str, dict[str, object]]`
    #: :default: ``{}``
    #:
    #: .. note::
    #:    .. versionadded:: 2.8
    #:    .. versionchanged:: 2.9
    #:
    #:    A new more powerful syntax was introduced
    #:    that allows also custom job script directive prefixes.
    #:
    extra_resources = fields.AggregateTypeField('extra_resources',
                                                (dict, (str, (dict,
                                                              (str, object)))))

    # Private properties
    _prefix = fields.StringField('_prefix')
    _stagedir = fields.StringField('_stagedir', allow_none=True)
    _stdout = fields.StringField('_stdout', allow_none=True)
    _stderr = fields.StringField('_stderr', allow_none=True)
    _perf_logfile = fields.StringField('_perf_logfile', allow_none=True)
    _current_system = fields.TypedField('_current_system', System)
    _current_partition = fields.TypedField('_current_partition',
                                           SystemPartition,
                                           allow_none=True)
    _current_environ = fields.TypedField('_current_environ',
                                         Environment,
                                         allow_none=True)
    _job = fields.TypedField('_job', Job, allow_none=True)

    def __init__(self, name, prefix, system, resources):
        self.name = name
        self.descr = name
        self.valid_prog_environs = []
        self.valid_systems = []
        self.sourcepath = ''
        self.prebuild_cmd = []
        self.postbuild_cmd = []
        self.executable = os.path.join('.', self.name)
        self.executable_opts = []
        self.pre_run = []
        self.post_run = []
        self.keep_files = []
        self.readonly_files = []
        self.tags = set()
        self.maintainers = []

        # Strict performance check, if applicable
        self.strict_check = True

        # Default is a single node check
        self.num_tasks = 1
        self.num_tasks_per_node = None
        self.num_gpus_per_node = 0
        self.num_cpus_per_task = None
        self.num_tasks_per_core = None
        self.num_tasks_per_socket = None
        self.use_multithreading = None
        self.exclusive_access = False

        # True only if check is to be run locally
        self.local = False

        # Static directories of the regression check
        self._prefix = os.path.abspath(prefix)
        self.sourcesdir = 'src'

        # Output patterns
        self.sanity_patterns = None

        # Performance patterns: None -> no performance checking
        self.perf_patterns = None
        self.reference = {}

        # Environment setup
        self.modules = []
        self.variables = {}

        # Time limit for the check
        self.time_limit = (0, 10, 0)

        # Runtime information of the test
        self._current_system = system
        self._current_partition = None
        self._current_environ = None

        # Associated job
        self._job = None
        self.extra_resources = {}

        # Dynamic paths of the regression check; will be set in setup()
        self._resources_mgr = resources
        self._stagedir = None
        self._stdout = None
        self._stderr = None

        # Compilation task output
        self._compile_task = None

        # Performance logging
        self._perf_logger = logging.null_logger
        self._perf_logfile = None

    # Export read-only views to interesting fields
    @property
    def current_environ(self):
        """The programming environment that the regression test is currently executing
        with.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.environments.Environment`.
        """
        return self._current_environ

    @property
    def current_partition(self):
        """The system partition the regression test is currently executing on.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.systems.SystemPartition`.
        """
        return self._current_partition

    @property
    def current_system(self):
        """The system the regression test is currently executing on.

        This is set by the framework during the initialization phase.

        :type: :class:`reframe.core.systems.System`.
        """
        return self._current_system

    @property
    def job(self):
        """The job descriptor associated with this test.

        This is set by the framework during the :func:`setup` phase.

        :type: :class:`reframe.core.schedulers.Job`.
        """
        return self._job

    @property
    def logger(self):
        """A logger associated with the this test.

        You can use this logger to log information for your test.
        """
        return logging.getlogger()

    @property
    def prefix(self):
        """The prefix directory of the test.

        :type: :class:`str`.
        """
        return self._prefix

    @property
    def stagedir(self):
        """The stage directory of the test.

        This is set during the :func:`setup` phase.

        :type: :class:`str`.
        """
        return self._stagedir

    @property
    @deferrable
    def stdout(self):
        """The name of the file containing the standard output of the test.

        This is set during the :func:`setup` phase.

        This attribute is evaluated lazily, so it can by used inside sanity
        expressions.

        :type: :class:`str`.
        """
        return self._stdout

    @property
    @deferrable
    def stderr(self):
        """The name of the file containing the standard error of the test.

        This is set during the :func:`setup` phase.

        This attribute is evaluated lazily, so it can by used inside sanity
        expressions.

        :type: :class:`str`.
        """
        return self._stderr

    def __repr__(self):
        return debug.repr(self)

    def info(self):
        """Provide live information of a running test.

        This method is used by the front-end to print the status message during
        the test's execution.
        This function is also called to provide the message for the
        ``check_info`` `logging attribute <running.html#logging>`__.
        By default, it returns a message reporting the test name, the current
        partition and the current programming environment that the test is
        currently executing on.

        :returns: a string with an informational message about this test

        .. note ::
           When overriding this method, you should pay extra attention on how
           you use the :class:`RegressionTest`'s attributes, because this
           method may be called at any point of the test's lifetime.

           .. versionadded:: 2.10

        """
        ret = self.name
        if self.current_partition:
            ret += ' on %s' % self.current_partition.fullname

        if self.current_environ:
            ret += ' using %s' % self.current_environ.name

        return ret

    def supports_system(self, partition_name):
        if '*' in self.valid_systems:
            return True

        if self._current_system.name in self.valid_systems:
            return True

        # Check if this is a relative name
        if partition_name.find(':') == -1:
            partition_name = '%s:%s' % (self._current_system.name,
                                        partition_name)

        return partition_name in self.valid_systems

    def supports_environ(self, env_name):
        for env in self.valid_prog_environs:
            if fnmatch.fnmatch(env_name, env):
                return True

        return False

    def is_local(self):
        """Check if the test will execute locally.

        A test executes locally if the :attr:`local` attribute is set or if the
        current partition's scheduler does not support job submission.
        """
        if self._current_partition is None:
            return self.local

        return self.local or self._current_partition.scheduler.is_local

    def _sanitize_basename(self, name):
        """Create a basename safe to be used as path component

        Replace all path separator characters in `name` with underscores."""
        return name.replace(os.sep, '_')

    def _setup_environ(self, environ):
        """Setup the current environment and load it."""

        self._current_environ = environ

        # Add user modules and variables to the environment
        for m in self.modules:
            self._current_environ.add_module(m)

        for k, v in self.variables.items():
            self._current_environ.set_variable(k, v)

        # First load the local environment of the partition
        self.logger.debug('loading environment for the current partition')
        self._current_partition.local_env.load()

        self.logger.debug("loading test's environment")
        self._current_environ.load()

    def _setup_paths(self):
        """Setup the check's dynamic paths."""
        self.logger.debug('setting up paths')
        try:
            self._stagedir = self._resources_mgr.stagedir(
                self._sanitize_basename(self._current_partition.name),
                self.name, self._sanitize_basename(self._current_environ.name))

            self.outputdir = self._resources_mgr.outputdir(
                self._sanitize_basename(self._current_partition.name),
                self.name, self._sanitize_basename(self._current_environ.name))
        except OSError as e:
            raise PipelineError('failed to set up paths') from e

        self._stdout = os.path.join(self._stagedir, '%s.out' % self.name)
        self._stderr = os.path.join(self._stagedir, '%s.err' % self.name)

    def _setup_job(self, **job_opts):
        """Setup the job related to this check."""

        self.logger.debug('setting up the job descriptor')

        msg = 'job scheduler backend: {0}'
        self.logger.debug(
            msg.format('local' if self.is_local else self._current_partition.
                       scheduler.registered_name))

        # num_gpus_per_node is a managed resource
        if self.num_gpus_per_node > 0:
            self.extra_resources.setdefault(
                '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node})

        if self.local:
            scheduler_type = getscheduler('local')
            launcher_type = getlauncher('local')
        else:
            scheduler_type = self._current_partition.scheduler
            launcher_type = self._current_partition.launcher

        job_name = '%s_%s_%s_%s' % (
            self.name, self._sanitize_basename(self._current_system.name),
            self._sanitize_basename(self._current_partition.name),
            self._sanitize_basename(self._current_environ.name))
        job_script_filename = os.path.join(self._stagedir, job_name + '.sh')

        self._job = scheduler_type(
            name=job_name,
            command=' '.join([self.executable] + self.executable_opts),
            launcher=launcher_type(),
            environs=[
                self._current_partition.local_env, self._current_environ
            ],
            workdir=self._stagedir,
            num_tasks=self.num_tasks,
            num_tasks_per_node=self.num_tasks_per_node,
            num_tasks_per_core=self.num_tasks_per_core,
            num_tasks_per_socket=self.num_tasks_per_socket,
            num_cpus_per_task=self.num_cpus_per_task,
            use_smt=self.use_multithreading,
            time_limit=self.time_limit,
            script_filename=job_script_filename,
            stdout=self._stdout,
            stderr=self._stderr,
            pre_run=self.pre_run,
            post_run=self.post_run,
            sched_exclusive_access=self.exclusive_access,
            **job_opts)

        # Get job options from managed resources and prepend them to
        # job_opts. We want any user supplied options to be able to
        # override those set by the framework.
        resources_opts = []
        for r, v in self.extra_resources.items():
            resources_opts.extend(self._current_partition.get_resource(r, **v))

        self._job.options = (self._current_partition.access + resources_opts +
                             self._job.options)

    # FIXME: This is a temporary solution to address issue #157
    def _setup_perf_logging(self):
        self.logger.debug('setting up performance logging')
        self._perf_logfile = os.path.join(
            self._resources_mgr.logdir(self._current_partition.name),
            self.name + '.log')

        perf_logging_config = {
            'level': 'INFO',
            'handlers': {
                self._perf_logfile: {
                    'level':
                    'DEBUG',
                    'format':
                    '[%(asctime)s] reframe %(version)s: '
                    '%(check_info)s '
                    '(jobid=%(check_jobid)s): %(message)s',
                    'append':
                    True,
                }
            }
        }

        self._perf_logger = logging.LoggerAdapter(
            logger=logging.load_from_dict(perf_logging_config), check=self)

    def setup(self, partition, environ, **job_opts):
        """The setup phase of the regression test pipeline.

        :arg partition: The system partition to set up this test for.
        :arg environ: The environment to set up this test for.
        :arg job_opts: Options to be passed through to the backend scheduler.
            When overriding this method users should always pass through
            ``job_opts`` to the base class method.
        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        self._current_partition = partition
        self._setup_environ(environ)
        self._setup_paths()
        self._setup_job(**job_opts)
        if self.perf_patterns is not None:
            self._setup_perf_logging()

    def _copy_to_stagedir(self, path):
        self.logger.debug('copying %s to stage directory (%s)' %
                          (path, self._stagedir))
        self.logger.debug('symlinking files: %s' % self.readonly_files)
        try:
            os_ext.copytree_virtual(path, self._stagedir, self.readonly_files)
        except (OSError, ValueError, TypeError) as e:
            raise PipelineError('virtual copying of files failed') from e

    def _clone_to_stagedir(self, url):
        self.logger.debug('cloning URL %s to stage directory (%s)' %
                          (url, self._stagedir))
        os_ext.git_clone(self.sourcesdir, self._stagedir)

    def prebuild(self):
        for cmd in self.prebuild_cmd:
            self.logger.debug('executing prebuild commands')
            os_ext.run_command(cmd, check=True, shell=True)

    def postbuild(self):
        for cmd in self.postbuild_cmd:
            self.logger.debug('executing postbuild commands')
            os_ext.run_command(cmd, check=True, shell=True)

    def compile(self, **compile_opts):
        """The compilation phase of the regression test pipeline.

        :arg compile_opts: Extra options to be passed to the programming
            environment for compiling the source code of the test.
        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        if not self._current_environ:
            raise PipelineError('no programming environment set')

        # Copy the check's resources to the stage directory
        if self.sourcesdir:
            try:
                commonpath = os.path.commonpath(
                    [self.sourcesdir, self.sourcepath])
            except ValueError:
                commonpath = None

            if commonpath:
                self.logger.warn(
                    "sourcepath (`%s') seems to be a subdirectory of "
                    "sourcesdir (`%s'), but it will be interpreted "
                    "as relative to it." % (self.sourcepath, self.sourcesdir))

            if os_ext.is_url(self.sourcesdir):
                self._clone_to_stagedir(self.sourcesdir)
            else:
                self._copy_to_stagedir(
                    os.path.join(self._prefix, self.sourcesdir))

        # Verify the sourcepath and determine the sourcepath in the stagedir
        if (os.path.isabs(self.sourcepath)
                or os.path.normpath(self.sourcepath).startswith('..')):
            raise PipelineError(
                'self.sourcepath is an absolute path or does not point to a '
                'subfolder or a file contained in self.sourcesdir: ' +
                self.sourcepath)

        staged_sourcepath = os.path.join(self._stagedir, self.sourcepath)
        self.logger.debug('Staged sourcepath: %s' % staged_sourcepath)

        # Remove source and executable from compile_opts
        compile_opts.pop('source', None)
        compile_opts.pop('executable', None)

        # Change working dir to stagedir although absolute paths are used
        # everywhere in the compilation process. This is done to ensure that
        # any other files (besides the executable) generated during the the
        # compilation will remain in the stage directory
        with os_ext.change_dir(self._stagedir):
            self.prebuild()
            if os.path.isdir(staged_sourcepath):
                includedir = staged_sourcepath
            else:
                includedir = os.path.dirname(staged_sourcepath)

            self._current_environ.include_search_path.append(includedir)
            self._compile_task = self._current_environ.compile(
                sourcepath=staged_sourcepath,
                executable=os.path.join(self._stagedir, self.executable),
                **compile_opts)
            self.logger.debug('compilation stdout:\n%s' %
                              self._compile_task.stdout)
            self.logger.debug('compilation stderr:\n%s' %
                              self._compile_task.stderr)
            self.postbuild()

        self.logger.debug('compilation finished')

    def run(self):
        """The run phase of the regression test pipeline.

        This call is non-blocking.
        It simply submits the job associated with this test and returns.
        """
        if not self._current_system or not self._current_partition:
            raise PipelineError('no system or system partition is set')

        with os_ext.change_dir(self._stagedir):
            try:
                self._job.prepare(BashScriptBuilder(login=True))
            except OSError as e:
                raise PipelineError('failed to prepare job') from e

            self._job.submit()

        msg = ('spawned job (%s=%s)' %
               ('pid' if self.is_local() else 'jobid', self._job.jobid))
        self.logger.debug(msg)

    def poll(self):
        """Poll the test's state.

        :returns: :class:`True` if the associated job has finished, :class:`False`
            otherwise.

            If no job descriptor is yet associated with this test,
            :class:`True` is returned.
        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        if not self._job:
            return True

        return self._job.finished()

    def wait(self):
        """Wait for this test to finish.

        :raises reframe.core.exceptions.ReframeError: In case of errors.
        """
        self._job.wait()
        self.logger.debug('spawned job finished')

    def sanity(self):
        self.check_sanity()

    def performance(self):
        try:
            self.check_performance()
        except SanityError:
            if self.strict_check:
                raise

    def check_sanity(self):
        """The sanity checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the sanity check fails.
        """
        if self.sanity_patterns is None:
            raise SanityError('sanity_patterns not set')

        with os_ext.change_dir(self._stagedir):
            success = evaluate(self.sanity_patterns)
            if not success:
                raise SanityError('sanity failure')

    def check_performance(self):
        """The performance checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the performance check
            fails.
        """
        if self.perf_patterns is None:
            return

        with os_ext.change_dir(self._stagedir):
            for tag, expr in self.perf_patterns.items():
                value = evaluate(expr)
                key = '%s:%s' % (self._current_partition.fullname, tag)
                try:
                    ref, low_thres, high_thres = self.reference[key]
                    self._perf_logger.info('value: %s, reference: %s' %
                                           (value, self.reference[key]))
                except KeyError:
                    raise SanityError(
                        "tag `%s' not resolved in references for `%s'" %
                        (tag, self._current_partition.fullname))
                evaluate(assert_reference(value, ref, low_thres, high_thres))

    def _copy_to_outputdir(self):
        """Copy checks interesting files to the output directory."""
        self.logger.debug('copying interesting files to output directory')
        shutil.copy(self._stdout, self.outputdir)
        shutil.copy(self._stderr, self.outputdir)
        if self._job:
            shutil.copy(self._job.script_filename, self.outputdir)

        # Copy files specified by the user
        for f in self.keep_files:
            if not os.path.isabs(f):
                f = os.path.join(self._stagedir, f)
            shutil.copy(f, self.outputdir)

    def cleanup(self, remove_files=False, unload_env=True):
        """The cleanup phase of the regression test pipeline.

        :arg remove_files: If :class:`True`, the stage directory associated
            with this test will be removed.
        :arg unload_env: If :class:`True`, the environment that was used to run
            this test will be unloaded.
        """
        aliased = os.path.samefile(self._stagedir, self.outputdir)
        if aliased:
            self.logger.debug('skipping copy to output dir '
                              'since they alias each other')
        else:
            self._copy_to_outputdir()

        if remove_files:
            self.logger.debug('removing stage directory')
            shutil.rmtree(self._stagedir)

        if unload_env:
            self.logger.debug("unloading test's environment")
            self._current_environ.unload()
            self._current_partition.local_env.unload()

    def __str__(self):
        return ('%s (%s)\n'
                '        tags: [%s], maintainers: [%s]' %
                (self.name, self.descr, ', '.join(self.tags), ', '.join(
                    self.maintainers)))
Example #8
0
class Job(abc.ABC):
    '''A job descriptor.

    .. caution::
       This is an abstract class.
       Users may not create jobs directly.
    '''

    num_tasks = fields.TypedField('num_tasks', int)
    num_tasks_per_node = fields.TypedField('num_tasks_per_node', int,
                                           type(None))
    num_tasks_per_core = fields.TypedField('num_tasks_per_core', int,
                                           type(None))
    num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int,
                                             type(None))
    num_cpus_per_tasks = fields.TypedField('num_cpus_per_task', int,
                                           type(None))
    use_smt = fields.TypedField('use_smt', bool, type(None))
    time_limit = fields.TimerField('time_limit', type(None))

    #: Options to be passed to the backend job scheduler.
    #:
    #: :type: :class:`List[str]`
    #: :default: ``[]``
    options = fields.TypedField('options', typ.List[str])

    #: The parallel program launcher that will be used to launch the parallel
    #: executable of this job.
    #:
    #: :type: :class:`reframe.core.launchers.JobLauncher`
    launcher = fields.TypedField('launcher', JobLauncher)

    _jobid = fields.TypedField('_jobid', int, type(None))
    _exitcode = fields.TypedField('_exitcode', int, type(None))
    _state = fields.TypedField('_state', str, type(None))

    # The sched_* arguments are exposed also to the frontend
    def __init__(self,
                 name,
                 launcher,
                 workdir='.',
                 num_tasks=1,
                 num_tasks_per_node=None,
                 num_tasks_per_core=None,
                 num_tasks_per_socket=None,
                 num_cpus_per_task=None,
                 use_smt=None,
                 time_limit=None,
                 script_filename=None,
                 stdout=None,
                 stderr=None,
                 pre_run=[],
                 post_run=[],
                 sched_flex_alloc_tasks=None,
                 sched_access=[],
                 sched_account=None,
                 sched_partition=None,
                 sched_reservation=None,
                 sched_nodelist=None,
                 sched_exclude_nodelist=None,
                 sched_exclusive_access=None,
                 sched_options=[]):

        # Mutable fields
        self.num_tasks = num_tasks
        self.num_tasks_per_node = num_tasks_per_node
        self.num_tasks_per_core = num_tasks_per_core
        self.num_tasks_per_socket = num_tasks_per_socket
        self.num_cpus_per_task = num_cpus_per_task
        self.use_smt = use_smt
        self.time_limit = time_limit
        self.options = list(sched_options)
        self.launcher = launcher

        self._name = name
        self._workdir = workdir
        self._script_filename = script_filename or '%s.sh' % name
        self._stdout = stdout or '%s.out' % name
        self._stderr = stderr or '%s.err' % name
        self._nodelist = None

        # Backend scheduler related information
        self._sched_flex_alloc_tasks = sched_flex_alloc_tasks
        self._sched_access = sched_access
        self._sched_nodelist = sched_nodelist
        self._sched_exclude_nodelist = sched_exclude_nodelist
        self._sched_partition = sched_partition
        self._sched_reservation = sched_reservation
        self._sched_account = sched_account
        self._sched_exclusive_access = sched_exclusive_access

        # Live job information; to be filled during job's lifetime by the
        # scheduler
        self._jobid = None
        self._exitcode = None
        self._state = None

    def __repr__(self):
        return debug.repr(self)

    # Read-only properties
    @property
    def exitcode(self):
        return self._exitcode

    @property
    def jobid(self):
        return self._jobid

    @property
    def state(self):
        return self._state

    @property
    def name(self):
        return self._name

    @property
    def workdir(self):
        return self._workdir

    @property
    def script_filename(self):
        return self._script_filename

    @property
    def stdout(self):
        return self._stdout

    @property
    def stderr(self):
        return self._stderr

    @property
    def sched_flex_alloc_tasks(self):
        return self._sched_flex_alloc_tasks

    @property
    def sched_access(self):
        return self._sched_access

    @property
    def sched_nodelist(self):
        return self._sched_nodelist

    @property
    def sched_exclude_nodelist(self):
        return self._sched_exclude_nodelist

    @property
    def sched_partition(self):
        return self._sched_partition

    @property
    def sched_reservation(self):
        return self._sched_reservation

    @property
    def sched_account(self):
        return self._sched_account

    @property
    def sched_exclusive_access(self):
        return self._sched_exclusive_access

    def prepare(self, commands, environs=None, **gen_opts):
        environs = environs or []
        if self.num_tasks <= 0:
            num_tasks_per_node = self.num_tasks_per_node or 1
            min_num_tasks = (-self.num_tasks
                             if self.num_tasks else num_tasks_per_node)

            try:
                guessed_num_tasks = self.guess_num_tasks()
            except NotImplementedError as e:
                raise JobError('flexible task allocation is not supported by '
                               'this backend') from e

            if guessed_num_tasks < min_num_tasks:
                nodes_required = min_num_tasks // num_tasks_per_node
                nodes_found = guessed_num_tasks // num_tasks_per_node
                raise JobError('could not find enough nodes: '
                               'required %s, found %s' %
                               (nodes_required, nodes_found))

            self.num_tasks = guessed_num_tasks
            getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' %
                              self.num_tasks)

        with shell.generate_script(self.script_filename,
                                   **gen_opts) as builder:
            builder.write_prolog(self.emit_preamble())
            builder.write(env.emit_load_commands(*environs))
            for c in commands:
                builder.write_body(c)

    @abc.abstractmethod
    def emit_preamble(self):
        pass

    def guess_num_tasks(self):
        if isinstance(self.sched_flex_alloc_tasks, int):
            if self.sched_flex_alloc_tasks <= 0:
                raise JobError('invalid number of flex_alloc_tasks: %s' %
                               self.sched_flex_alloc_tasks)

            return self.sched_flex_alloc_tasks

        available_nodes = self.get_all_nodes()
        getlogger().debug('flex_alloc_tasks: total available nodes %s ' %
                          len(available_nodes))

        # Try to guess the number of tasks now
        available_nodes = self.filter_nodes(available_nodes,
                                            self.sched_access + self.options)

        if self.sched_flex_alloc_tasks == 'idle':
            available_nodes = {n for n in available_nodes if n.is_available()}
            getlogger().debug('flex_alloc_tasks: selecting idle nodes: '
                              'available nodes now: %s' % len(available_nodes))

        num_tasks_per_node = self.num_tasks_per_node or 1
        num_tasks = len(available_nodes) * num_tasks_per_node
        return num_tasks

    @abc.abstractmethod
    def get_all_nodes(self):
        # Gets all the available nodes
        pass

    @abc.abstractmethod
    def filter_nodes(self, nodes, options):
        # Filter nodes according to the scheduler options
        pass

    @abc.abstractmethod
    def submit(self):
        pass

    @abc.abstractmethod
    def wait(self):
        if self._jobid is None:
            raise JobNotStartedError('cannot wait an unstarted job')

    @abc.abstractmethod
    def cancel(self):
        if self._jobid is None:
            raise JobNotStartedError('cannot cancel an unstarted job')

    @abc.abstractmethod
    def finished(self):
        if self._jobid is None:
            raise JobNotStartedError('cannot poll an unstarted job')

    @property
    def nodelist(self):
        '''The list of node names assigned to this job.

        This attribute is :class:`None` if no nodes are assigned to the job
        yet.
        This attribute is set reliably only for the ``slurm`` backend, i.e.,
        Slurm *with* accounting enabled.
        The ``squeue`` scheduler backend, i.e., Slurm *without* accounting,
        might not set this attribute for jobs that finish very quickly.
        For the ``local`` scheduler backend, this returns an one-element list
        containing the hostname of the current host.

        This attribute might be useful in a flexible regression test for
        determining the actual nodes that were assigned to the test.

        For more information on flexible task allocation, please refer to the
        corresponding `section <advanced.html#flexible-regression-tests>`__ of
        the tutorial.

        This attribute is *not* supported by the ``pbs`` scheduler backend.

        .. versionadded:: 2.17

        '''
        return self._nodelist