class FieldTester: field = fields.TimerField('field') field_maybe_none = fields.TimerField('field_maybe_none', type(None))
class FieldTester: field = fields.TimerField() field_maybe_none = fields.TimerField(type(None))
class Job: '''A job descriptor. A job descriptor is created by the framework after the "setup" phase and is associated with the test. .. warning:: Users may not create a job descriptor directly. ''' num_tasks = fields.TypedField('num_tasks', int) num_tasks_per_node = fields.TypedField('num_tasks_per_node', int, type(None)) num_tasks_per_core = fields.TypedField('num_tasks_per_core', int, type(None)) num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int, type(None)) num_cpus_per_task = fields.TypedField('num_cpus_per_task', int, type(None)) use_smt = fields.TypedField('use_smt', bool, type(None)) time_limit = fields.TimerField('time_limit', type(None)) #: Options to be passed to the backend job scheduler. #: #: :type: :class:`List[str]` #: :default: ``[]`` options = fields.TypedField('options', typ.List[str]) #: The (parallel) program launcher that will be used to launch the #: (parallel) executable of this job. #: #: Users are allowed to explicitly set the current job launcher, but this #: is only relevant in rare situations, such as when you want to wrap the #: current launcher command. For this specific scenario, you may have a #: look at the :class:`reframe.core.launchers.LauncherWrapper` class. #: #: The following example shows how you can replace the current partition's #: launcher for this test with the "local" launcher: #: #: .. code-block:: python #: #: from reframe.core.backends import getlauncher #: #: @rfm.run_after('setup') #: def set_launcher(self): #: self.job.launcher = getlauncher('local')() #: #: :type: :class:`reframe.core.launchers.JobLauncher` launcher = fields.TypedField('launcher', JobLauncher) scheduler = fields.TypedField('scheduler', JobScheduler) #: .. versionadded:: 2.21 #: #: The ID of the current job. #: #: :type: :class:`int` or :class:`None`. jobid = fields.TypedField('jobid', int, type(None)) #: .. versionadded:: 2.21 #: #: The exit code of the job. #: #: This may or may not be set depending on the scheduler backend. #: #: :type: :class:`int` or :class:`None`. exitcode = fields.TypedField('exitcode', int, type(None)) #: .. versionadded:: 2.21 #: #: The state of the job. #: #: The value of this field is scheduler-specific. #: #: :type: :class:`str` or :class:`None`. state = fields.TypedField('state', str, type(None)) #: .. versionadded:: 2.17 #: #: The list of node names assigned to this job. #: #: This attribute is :class:`None` if no nodes are assigned to the job #: yet. #: This attribute is set reliably only for the ``slurm`` backend, i.e., #: Slurm *with* accounting enabled. #: The ``squeue`` scheduler backend, i.e., Slurm *without* accounting, #: might not set this attribute for jobs that finish very quickly. #: For the ``local`` scheduler backend, this returns an one-element list #: containing the hostname of the current host. #: #: This attribute might be useful in a flexible regression test for #: determining the actual nodes that were assigned to the test. #: For more information on flexible node allocation, see the #: |--flex-alloc-nodes|_ command-line option #: #: This attribute is *not* supported by the ``pbs`` scheduler backend. nodelist = fields.TypedField('nodelist', typ.List[str], type(None)) # The sched_* arguments are exposed also to the frontend def __init__(self, name, workdir='.', script_filename=None, stdout=None, stderr=None, max_pending_time=None, sched_flex_alloc_nodes=None, sched_access=[], sched_account=None, sched_partition=None, sched_reservation=None, sched_nodelist=None, sched_exclude_nodelist=None, sched_exclusive_access=None, sched_options=None): # Mutable fields self.num_tasks = 1 self.num_tasks_per_node = None self.num_tasks_per_core = None self.num_tasks_per_socket = None self.num_cpus_per_task = None self.use_smt = None self.time_limit = None self.options = sched_options or [] # Live job information; to be filled during job's lifetime by the # scheduler self.jobid = None self.exitcode = None self.state = None self.nodelist = None self._name = name self._workdir = workdir self._script_filename = script_filename or '%s.sh' % name self._stdout = stdout or '%s.out' % name self._stderr = stderr or '%s.err' % name self._max_pending_time = max_pending_time self._completion_time = None # Backend scheduler related information self._sched_flex_alloc_nodes = sched_flex_alloc_nodes self._sched_access = sched_access self._sched_nodelist = sched_nodelist self._sched_exclude_nodelist = sched_exclude_nodelist self._sched_partition = sched_partition self._sched_reservation = sched_reservation self._sched_account = sched_account self._sched_exclusive_access = sched_exclusive_access @classmethod def create(cls, scheduler, launcher, *args, **kwargs): ret = Job(*args, **kwargs) ret.scheduler, ret.launcher = scheduler, launcher return ret @property def name(self): return self._name @property def workdir(self): return self._workdir @property def max_pending_time(self): return self._max_pending_time @property def script_filename(self): return self._script_filename @property def stdout(self): return self._stdout @property def stderr(self): return self._stderr @property def sched_flex_alloc_nodes(self): return self._sched_flex_alloc_nodes @property def sched_access(self): return self._sched_access @property def sched_nodelist(self): return self._sched_nodelist @property def sched_exclude_nodelist(self): return self._sched_exclude_nodelist @property def sched_partition(self): return self._sched_partition @property def sched_reservation(self): return self._sched_reservation @property def sched_account(self): return self._sched_account @property def sched_exclusive_access(self): return self._sched_exclusive_access @property def completion_time(self): return self.scheduler.completion_time(self) or self._completion_time def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] if self.num_tasks <= 0: num_tasks_per_node = self.num_tasks_per_node or 1 min_num_tasks = (-self.num_tasks if self.num_tasks else num_tasks_per_node) try: guessed_num_tasks = self.guess_num_tasks() except NotImplementedError as e: raise JobError('flexible node allocation is not supported by ' 'this backend') from e if guessed_num_tasks < min_num_tasks: raise JobError( 'could not satisfy the minimum task requirement: ' 'required %s, found %s' % (min_num_tasks, guessed_num_tasks)) self.num_tasks = guessed_num_tasks getlogger().debug('flex_alloc_nodes: setting num_tasks to %s' % self.num_tasks) with shell.generate_script(self.script_filename, **gen_opts) as builder: builder.write_prolog(self.scheduler.emit_preamble(self)) builder.write(runtime.emit_loadenv_commands(*environs)) for c in commands: builder.write_body(c) def guess_num_tasks(self): num_tasks_per_node = self.num_tasks_per_node or 1 if isinstance(self.sched_flex_alloc_nodes, int): if self.sched_flex_alloc_nodes <= 0: raise JobError('invalid number of flex_alloc_nodes: %s' % self.sched_flex_alloc_nodes) return self.sched_flex_alloc_nodes * num_tasks_per_node available_nodes = self.scheduler.allnodes() getlogger().debug('flex_alloc_nodes: total available nodes %s ' % len(available_nodes)) # Try to guess the number of tasks now available_nodes = self.scheduler.filternodes(self, available_nodes) if self.sched_flex_alloc_nodes == 'idle': available_nodes = {n for n in available_nodes if n.is_available()} getlogger().debug('flex_alloc_nodes: selecting idle nodes: ' 'available nodes now: %s' % len(available_nodes)) return len(available_nodes) * num_tasks_per_node def submit(self): return self.scheduler.submit(self) def wait(self): if self.jobid is None: raise JobNotStartedError('cannot wait an unstarted job') self.scheduler.wait(self) self._completion_time = self._completion_time or time.time() def cancel(self): if self.jobid is None: raise JobNotStartedError('cannot cancel an unstarted job') return self.scheduler.cancel(self) def finished(self): if self.jobid is None: raise JobNotStartedError('cannot poll an unstarted job') done = self.scheduler.finished(self) if done: self._completion_time = self._completion_time or time.time() return done
class RegressionTest: """Base class for regression tests. All regression tests must eventually inherit from this class. This class provides the implementation of the pipeline phases that the regression test goes through during its lifetime. :arg name: The name of the test. If :class:`None`, the framework will try to assign a unique and human-readable name to the test. :arg prefix: The directory prefix of the test. If :class:`None`, the framework will set it to the directory containing the test file. .. note:: The ``name`` and ``prefix`` arguments are just maintained for backward compatibility to the old (prior to 2.13) syntax of regression tests. Users are advised to use the new simplified syntax for writing regression tests. Refer to the :doc:`ReFrame Tutorial </tutorial>` for more information. This class is also directly available under the top-level :mod:`reframe` module. .. versionchanged:: 2.13 """ #: The name of the test. #: #: :type: string that can contain any character except ``/`` name = fields.TypedField('name', typ.Str[r'[^\/]+']) #: List of programming environments supported by this test. #: #: If ``*`` is in the list then all programming environments are supported #: by this test. #: #: :type: :class:`List[str]` #: :default: ``[]`` #: #: .. note:: #: .. versionchanged:: 2.12 #: Programming environments can now be specified using wildcards. #: #: .. versionchanged:: 2.17 #: Support for wildcards is dropped. valid_prog_environs = fields.TypedField('valid_prog_environs', typ.List[str]) #: List of systems supported by this test. #: The general syntax for systems is ``<sysname>[:<partname]``. #: #: :type: :class:`List[str]` #: :default: ``[]`` valid_systems = fields.TypedField('valid_systems', typ.List[str]) #: A detailed description of the test. #: #: :type: :class:`str` #: :default: ``self.name`` descr = fields.TypedField('descr', str) #: The path to the source file or source directory of the test. #: #: It must be a path relative to the :attr:`sourcesdir`, pointing to a #: subfolder or a file contained in :attr:`sourcesdir`. This applies also #: in the case where :attr:`sourcesdir` is a Git repository. #: #: If it refers to a regular file, this file will be compiled using the #: :class:`SingleSource <reframe.core.buildsystems.SingleSource>` build #: system. #: If it refers to a directory, ReFrame will try to infer the build system #: to use for the project and will fall back in using the :class:`Make #: <reframe.core.buildsystems.Make>` build system, if it cannot find a more #: specific one. #: #: :type: :class:`str` #: :default: ``''`` sourcepath = fields.TypedField('sourcepath', str) #: The directory containing the test's resources. #: #: This directory may be specified with an absolute path or with a path #: relative to the location of the test. Its contents will always be copied #: to the stage directory of the test. #: #: This attribute may also accept a URL, in which case ReFrame will treat #: it as a Git repository and will try to clone its contents in the stage #: directory of the test. #: #: If set to :class:`None`, the test has no resources an no action is #: taken. #: #: :type: :class:`str` or :class:`None` #: :default: ``'src'`` #: #: .. note:: #: .. versionchanged:: 2.9 #: Allow :class:`None` values to be set also in regression tests #: with a compilation phase #: #: .. versionchanged:: 2.10 #: Support for Git repositories was added. sourcesdir = fields.TypedField('sourcesdir', str, type(None)) #: The build system to be used for this test. #: If not specified, the framework will try to figure it out automatically #: based on the value of :attr:`sourcepath`. #: #: This field may be set using either a string referring to a concrete #: build system class name #: (see `build systems <reference.html#build-systems>`__) or an instance of #: :class:`reframe.core.buildsystems.BuildSystem`. The former is the #: recommended way. #: #: #: :type: :class:`str` or :class:`reframe.core.buildsystems.BuildSystem`. #: :default: :class:`None`. #: #: .. versionadded:: 2.14 build_system = BuildSystemField('build_system', type(None)) #: List of shell commands to be executed before compiling. #: #: These commands are executed during the compilation phase and from #: inside the stage directory. **Each entry in the list spawns a new #: shell.** #: #: :type: :class:`List[str]` #: :default: ``[]`` prebuild_cmd = fields.TypedField('prebuild_cmd', typ.List[str]) #: List of shell commands to be executed after a successful compilation. #: #: These commands are executed during the compilation phase and from inside #: the stage directory. **Each entry in the list spawns a new shell.** #: #: :type: :class:`List[str]` #: :default: ``[]`` postbuild_cmd = fields.TypedField('postbuild_cmd', typ.List[str]) #: The name of the executable to be launched during the run phase. #: #: :type: :class:`str` #: :default: ``os.path.join('.', self.name)`` executable = fields.TypedField('executable', str) #: List of options to be passed to the :attr:`executable`. #: #: :type: :class:`List[str]` #: :default: ``[]`` executable_opts = fields.TypedField('executable_opts', typ.List[str]) #: List of shell commands to execute before launching this job. #: #: These commands do not execute in the context of ReFrame. #: Instead, they are emitted in the generated job script just before the #: actual job launch command. #: #: :type: :class:`List[str]` #: :default: ``[]`` #: #: .. note:: #: .. versionadded:: 2.10 pre_run = fields.TypedField('pre_run', typ.List[str]) #: List of shell commands to execute after launching this job. #: #: See :attr:`pre_run` for a more detailed description of the semantics. #: #: :type: :class:`List[str]` #: :default: ``[]`` #: #: .. note:: #: .. versionadded:: 2.10 post_run = fields.TypedField('post_run', typ.List[str]) #: List of files to be kept after the test finishes. #: #: By default, the framework saves the standard output, the standard error #: and the generated shell script that was used to run this test. #: #: These files will be copied over to the framework’s output directory #: during the :func:`cleanup` phase. #: #: Directories are also accepted in this field. #: #: Relative path names are resolved against the stage directory. #: #: :type: :class:`List[str]` #: :default: ``[]`` keep_files = fields.TypedField('keep_files', typ.List[str]) #: List of files or directories (relative to the :attr:`sourcesdir`) that #: will be symlinked in the stage directory and not copied. #: #: You can use this variable to avoid copying very large files to the stage #: directory. #: #: :type: :class:`List[str]` #: :default: ``[]`` readonly_files = fields.TypedField('readonly_files', typ.List[str]) #: Set of tags associated with this test. #: #: This test can be selected from the frontend using any of these tags. #: #: :type: :class:`Set[str]` #: :default: an empty set tags = fields.TypedField('tags', typ.Set[str]) #: List of people responsible for this test. #: #: When the test fails, this contact list will be printed out. #: #: :type: :class:`List[str]` #: :default: ``[]`` maintainers = fields.TypedField('maintainers', typ.List[str]) #: Mark this test as a strict performance test. #: #: If a test is marked as non-strict, the performance checking phase will #: always succeed, unless the ``--strict`` command-line option is passed #: when invoking ReFrame. #: #: :type: boolean #: :default: :class:`True` strict_check = fields.TypedField('strict_check', bool) #: Number of tasks required by this test. #: #: If the number of tasks is set to a number ``<=0``, ReFrame will try #: to flexibly allocate the number of tasks, based on the command line #: option ``--flex-alloc-tasks``. #: A negative number is used to indicate the minimum number of tasks #: required for the test. #: In this case the minimum number of tasks is the absolute value of #: the number, while #: Setting ``num_tasks`` to ``0`` is equivalent to setting it to #: ``-num_tasks_per_node``. #: #: :type: integral #: :default: ``1`` #: #: .. note:: #: .. versionchanged:: 2.15 #: Added support for flexible allocation of the number of tasks #: according to the ``--flex-alloc-tasks`` command line option #: (see `Flexible task allocation #: <running.html#flexible-task-allocation>`__) #: if the number of tasks is set to ``0``. #: .. versionchanged:: 2.16 #: Negative ``num_tasks`` is allowed for specifying the minimum #: number of required tasks by the test. num_tasks = fields.TypedField('num_tasks', int) #: Number of tasks per node required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_node = fields.TypedField('num_tasks_per_node', int, type(None)) #: Number of GPUs per node required by this test. #: #: :type: integral #: :default: ``0`` num_gpus_per_node = fields.TypedField('num_gpus_per_node', int) #: Number of CPUs per task required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_cpus_per_task = fields.TypedField('num_cpus_per_task', int, type(None)) #: Number of tasks per core required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_core = fields.TypedField('num_tasks_per_core', int, type(None)) #: Number of tasks per socket required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int, type(None)) #: Specify whether this tests needs simultaneous multithreading enabled. #: #: Ignored if :class:`None`. #: #: :type: boolean or :class:`None` #: :default: :class:`None` use_multithreading = fields.TypedField('use_multithreading', bool, type(None)) #: Specify whether this test needs exclusive access to nodes. #: #: :type: boolean #: :default: :class:`False` exclusive_access = fields.TypedField('exclusive_access', bool) #: Always execute this test locally. #: #: :type: boolean #: :default: :class:`False` local = fields.TypedField('local', bool) #: The set of reference values for this test. #: #: The reference values are specified as a scoped dictionary keyed on the #: performance variables defined in :attr:`perf_patterns` and scoped under #: the system/partition combinations. #: The reference itself is a three- or four-tuple that contains the #: reference value, the lower and upper thresholds and, optionally, the #: measurement unit. #: An example follows: #: #: .. code:: python #: #: self.reference = { #: 'sys0:part0': { #: 'perfvar0': (50, -0.1, 0.1, 'Gflop/s'), #: 'perfvar1': (20, -0.1, 0.1, 'GB/s') #: }, #: 'sys0:part1': { #: 'perfvar0': (100, -0.1, 0.1, 'Gflop/s'), #: 'perfvar1': (40, -0.1, 0.1, 'GB/s') #: } #: } #: #: :type: A scoped dictionary with system names as scopes or :class:`None` #: :default: ``{}`` reference = fields.ScopedDictField('reference', typ.Tuple[object]) # FIXME: There is not way currently to express tuples of `float`s or # `None`s, so we just use the very generic `object` #: #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage #: examples. #: #: If set to :class:`None`, a sanity error will be raised during sanity #: checking. #: #: :type: A deferrable expression (i.e., the result of a :doc:`sanity #: function </sanity_functions_reference>`) or :class:`None` #: :default: :class:`None` #: #: .. note:: #: .. versionchanged:: 2.9 #: The default behaviour has changed and it is now considered a #: sanity failure if this attribute is set to :class:`None`. #: #: If a test doesn't care about its output, this must be stated #: explicitly as follows: #: #: :: #: #: self.sanity_patterns = sn.assert_found(r'.*', self.stdout) #: sanity_patterns = fields.TypedField('sanity_patterns', _DeferredExpression, type(None)) #: Patterns for verifying the performance of this test. #: #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage #: examples. #: #: If set to :class:`None`, no performance checking will be performed. #: #: :type: A dictionary with keys of type :class:`str` and deferrable #: expressions (i.e., the result of a :doc:`sanity function #: </sanity_functions_reference>`) as values. #: :class:`None` is also allowed. #: :default: :class:`None` perf_patterns = fields.TypedField('perf_patterns', typ.Dict[str, _DeferredExpression], type(None)) #: List of modules to be loaded before running this test. #: #: These modules will be loaded during the :func:`setup` phase. #: #: :type: :class:`List[str]` #: :default: ``[]`` modules = fields.TypedField('modules', typ.List[str]) #: Environment variables to be set before running this test. #: #: These variables will be set during the :func:`setup` phase. #: #: :type: :class:`Dict[str, str]` #: :default: ``{}`` variables = fields.TypedField('variables', typ.Dict[str, str]) #: Time limit for this test. #: #: Time limit is specified as a three-tuple in the form ``(hh, mm, ss)``, #: with ``hh >= 0``, ``0 <= mm <= 59`` and ``0 <= ss <= 59``. #: If set to :class:`None`, no time limit will be set. #: The default time limit of the system partition's scheduler will be used. #: #: #: :type: :class:`tuple[int]` #: :default: ``(0, 10, 0)`` #: #: .. note:: #: .. versionchanged:: 2.15 #: #: This attribute may be set to :class:`None`. #: time_limit = fields.TimerField('time_limit', type(None)) #: Extra resources for this test. #: #: This field is for specifying custom resources needed by this test. #: These resources are defined in the :doc:`configuration </configure>` #: of a system partition. #: For example, assume that two additional resources, named ``gpu`` and #: ``datawarp``, are defined in the configuration file as follows: #: #: :: #: #: 'resources': { #: 'gpu': [ #: '--gres=gpu:{num_gpus_per_node}' #: ], #: 'datawarp': [ #: '#DW jobdw capacity={capacity}', #: '#DW stage_in source={stagein_src}' #: ] #: } #: #: A regression test then may instantiate the above resources by setting #: the :attr:`extra_resources` attribute as follows: #: #: :: #: #: self.extra_resources = { #: 'gpu': {'num_gpus_per_node': 2} #: 'datawarp': { #: 'capacity': '100GB', #: 'stagein_src': '/foo' #: } #: } #: #: The generated batch script (for Slurm) will then contain the following #: lines: #: #: :: #: #: #SBATCH --gres=gpu:2 #: #DW jobdw capacity=100GB #: #DW stage_in source=/foo #: #: Notice that if the resource specified in the configuration uses an #: alternative directive prefix (in this case ``#DW``), this will replace #: the standard prefix of the backend scheduler (in this case ``#SBATCH``) #: #: If the resource name specified in this variable does not match a #: resource name in the partition configuration, it will be simply ignored. #: The :attr:`num_gpus_per_node` attribute translates internally to the #: ``_rfm_gpu`` resource, so that setting #: ``self.num_gpus_per_node = 2`` is equivalent to the following: #: #: :: #: #: self.extra_resources = {'_rfm_gpu': {'num_gpus_per_node': 2}} #: #: :type: :class:`Dict[str, Dict[str, object]]` #: :default: ``{}`` #: #: .. note:: #: .. versionadded:: 2.8 #: .. versionchanged:: 2.9 #: #: A new more powerful syntax was introduced #: that allows also custom job script directive prefixes. #: extra_resources = fields.TypedField('extra_resources', typ.Dict[str, typ.Dict[str, object]]) # Private properties _prefix = fields.TypedField('_prefix', str) _stagedir = fields.TypedField('_stagedir', str, type(None)) _stdout = fields.TypedField('_stdout', str, type(None)) _stderr = fields.TypedField('_stderr', str, type(None)) _current_partition = fields.TypedField('_current_partition', SystemPartition, type(None)) _current_environ = fields.TypedField('_current_environ', Environment, type(None)) _user_environ = fields.TypedField('_user_environ', Environment, type(None)) _job = fields.TypedField('_job', Job, type(None)) _build_job = fields.TypedField('_build_job', Job, type(None)) def __new__(cls, *args, **kwargs): obj = super().__new__(cls) # Create a test name from the class name and the constructor's # arguments name = cls.__qualname__ if args or kwargs: arg_names = map(lambda x: util.toalphanum(str(x)), itertools.chain(args, kwargs.values())) name += '_' + '_'.join(arg_names) obj._rfm_init(name, os.path.abspath(os.path.dirname(inspect.getfile(cls)))) return obj def __init__(self): pass def _rfm_init(self, name=None, prefix=None): if name is not None: self.name = name self.descr = self.name self.valid_prog_environs = [] self.valid_systems = [] self.sourcepath = '' self.prebuild_cmd = [] self.postbuild_cmd = [] self.executable = os.path.join('.', self.name) self.executable_opts = [] self.pre_run = [] self.post_run = [] self.keep_files = [] self.readonly_files = [] self.tags = set() self.maintainers = [] self._perfvalues = {} # Strict performance check, if applicable self.strict_check = True # Default is a single node check self.num_tasks = 1 self.num_tasks_per_node = None self.num_gpus_per_node = 0 self.num_cpus_per_task = None self.num_tasks_per_core = None self.num_tasks_per_socket = None self.use_multithreading = None self.exclusive_access = False # True only if check is to be run locally self.local = False # Static directories of the regression check if prefix is not None: self._prefix = os.path.abspath(prefix) self.sourcesdir = 'src' # Output patterns self.sanity_patterns = None # Performance patterns: None -> no performance checking self.perf_patterns = None self.reference = {} # Environment setup self.modules = [] self.variables = {} # Time limit for the check self.time_limit = (0, 10, 0) # Runtime information of the test self._current_partition = None self._current_environ = None self._user_environ = None # Associated job self._job = None self.extra_resources = {} # Dynamic paths of the regression check; will be set in setup() self._stagedir = None self._outputdir = None self._stdout = None self._stderr = None # Compilation process output self._build_job = None self._compile_proc = None self.build_system = None # Performance logging self._perf_logger = logging.null_logger # List of dependencies specified by the user self._userdeps = [] # Weak reference to the test case associated with this check self._case = None # Export read-only views to interesting fields @property def current_environ(self): """The programming environment that the regression test is currently executing with. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.environments.Environment`. """ return self._current_environ @property def current_partition(self): """The system partition the regression test is currently executing on. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.systems.SystemPartition`. """ return self._current_partition @property def current_system(self): """The system the regression test is currently executing on. This is set by the framework during the initialization phase. :type: :class:`reframe.core.runtime.HostSystem`. """ return rt.runtime().system @property def perfvalues(self): return util.MappingView(self._perfvalues) @property def job(self): """The job descriptor associated with this test. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.schedulers.Job`. """ return self._job @property def logger(self): """A logger associated with the this test. You can use this logger to log information for your test. """ return logging.getlogger() @property def prefix(self): """The prefix directory of the test. :type: :class:`str`. """ return self._prefix @property def stagedir(self): """The stage directory of the test. This is set during the :func:`setup` phase. :type: :class:`str`. """ return self._stagedir @property def outputdir(self): """The output directory of the test. This is set during the :func:`setup` phase. .. versionadded:: 2.13 :type: :class:`str`. """ return self._outputdir @property @deferrable def stdout(self): """The name of the file containing the standard output of the test. This is set during the :func:`setup` phase. This attribute is evaluated lazily, so it can by used inside sanity expressions. :type: :class:`str`. """ return self._job.stdout @property @deferrable def stderr(self): """The name of the file containing the standard error of the test. This is set during the :func:`setup` phase. This attribute is evaluated lazily, so it can by used inside sanity expressions. :type: :class:`str`. """ return self._job.stderr @property @deferrable def build_stdout(self): return self._build_job.stdout @property @deferrable def build_stderr(self): return self._build_job.stderr def info(self): """Provide live information of a running test. This method is used by the front-end to print the status message during the test's execution. This function is also called to provide the message for the ``check_info`` `logging attribute <running.html#logging>`__. By default, it returns a message reporting the test name, the current partition and the current programming environment that the test is currently executing on. :returns: a string with an informational message about this test .. note :: When overriding this method, you should pay extra attention on how you use the :class:`RegressionTest`'s attributes, because this method may be called at any point of the test's lifetime. .. versionadded:: 2.10 """ ret = self.name if self.current_partition: ret += ' on %s' % self.current_partition.fullname if self.current_environ: ret += ' using %s' % self.current_environ.name return ret def supports_system(self, partition_name): if '*' in self.valid_systems: return True if self.current_system.name in self.valid_systems: return True # Check if this is a relative name if partition_name.find(':') == -1: partition_name = '%s:%s' % (self.current_system.name, partition_name) return partition_name in self.valid_systems def supports_environ(self, env_name): if '*' in self.valid_prog_environs: return True return env_name in self.valid_prog_environs def is_local(self): """Check if the test will execute locally. A test executes locally if the :attr:`local` attribute is set or if the current partition's scheduler does not support job submission. """ if self._current_partition is None: return self.local return self.local or self._current_partition.scheduler.is_local def _setup_environ(self, environ): """Setup the current environment and load it.""" self._current_environ = environ # Set up user environment self._user_environ = Environment( type(self).__name__, self.modules, self.variables.items()) # Temporarily load the test's environment to record the actual module # load/unload sequence environ_save = EnvironmentSnapshot() # First load the local environment of the partition self.logger.debug('loading environment for the current partition') self._current_partition.local_env.load() self.logger.debug("loading current programming environment") self._current_environ.load() self.logger.debug("loading user's environment") self._user_environ.load() environ_save.load() def _setup_paths(self): """Setup the check's dynamic paths.""" self.logger.debug('setting up paths') try: resources = rt.runtime().resources self._stagedir = resources.make_stagedir( self.current_system.name, self._current_partition.name, self._current_environ.name, self.name) self._outputdir = resources.make_outputdir( self.current_system.name, self._current_partition.name, self._current_environ.name, self.name) except OSError as e: raise PipelineError('failed to set up paths') from e def _setup_job(self, **job_opts): """Setup the job related to this check.""" self.logger.debug('setting up the job descriptor') msg = 'job scheduler backend: {0}' self.logger.debug( msg.format('local' if self.is_local else self._current_partition. scheduler.registered_name)) # num_gpus_per_node is a managed resource if self.num_gpus_per_node > 0: self.extra_resources.setdefault( '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node}) if self.local: scheduler_type = getscheduler('local') launcher_type = getlauncher('local') else: scheduler_type = self._current_partition.scheduler launcher_type = self._current_partition.launcher self._job = scheduler_type( name='rfm_%s_job' % self.name, launcher=launcher_type(), workdir=self._stagedir, num_tasks=self.num_tasks, num_tasks_per_node=self.num_tasks_per_node, num_tasks_per_core=self.num_tasks_per_core, num_tasks_per_socket=self.num_tasks_per_socket, num_cpus_per_task=self.num_cpus_per_task, use_smt=self.use_multithreading, time_limit=self.time_limit, sched_access=self._current_partition.access, sched_exclusive_access=self.exclusive_access, **job_opts) # Get job options from managed resources and prepend them to # job_opts. We want any user supplied options to be able to # override those set by the framework. resources_opts = [] for r, v in self.extra_resources.items(): resources_opts.extend(self._current_partition.get_resource(r, **v)) self._job.options = resources_opts + self._job.options def _setup_perf_logging(self): self.logger.debug('setting up performance logging') self._perf_logger = logging.getperflogger(self) def setup(self, partition, environ, **job_opts): """The setup phase of the regression test pipeline. :arg partition: The system partition to set up this test for. :arg environ: The environment to set up this test for. :arg job_opts: Options to be passed through to the backend scheduler. When overriding this method users should always pass through ``job_opts`` to the base class method. :raises reframe.core.exceptions.ReframeError: In case of errors. """ self._current_partition = partition self._setup_environ(environ) self._setup_paths() self._setup_job(**job_opts) if self.perf_patterns is not None: self._setup_perf_logging() def _copy_to_stagedir(self, path): self.logger.debug('copying %s to stage directory (%s)' % (path, self._stagedir)) self.logger.debug('symlinking files: %s' % self.readonly_files) try: os_ext.copytree_virtual(path, self._stagedir, self.readonly_files) except (OSError, ValueError, TypeError) as e: raise PipelineError('virtual copying of files failed') from e def _clone_to_stagedir(self, url): self.logger.debug('cloning URL %s to stage directory (%s)' % (url, self._stagedir)) os_ext.git_clone(self.sourcesdir, self._stagedir) def compile(self): """The compilation phase of the regression test pipeline. :raises reframe.core.exceptions.ReframeError: In case of errors. """ if not self._current_environ: raise PipelineError('no programming environment set') # Copy the check's resources to the stage directory if self.sourcesdir: try: commonpath = os.path.commonpath( [self.sourcesdir, self.sourcepath]) except ValueError: commonpath = None if commonpath: self.logger.warn( "sourcepath `%s' seems to be a subdirectory of " "sourcesdir `%s', but it will be interpreted " "as relative to it." % (self.sourcepath, self.sourcesdir)) if os_ext.is_url(self.sourcesdir): self._clone_to_stagedir(self.sourcesdir) else: self._copy_to_stagedir( os.path.join(self._prefix, self.sourcesdir)) # Verify the sourcepath and determine the sourcepath in the stagedir if (os.path.isabs(self.sourcepath) or os.path.normpath(self.sourcepath).startswith('..')): raise PipelineError( 'self.sourcepath is an absolute path or does not point to a ' 'subfolder or a file contained in self.sourcesdir: ' + self.sourcepath) staged_sourcepath = os.path.join(self._stagedir, self.sourcepath) self.logger.debug('Staged sourcepath: %s' % staged_sourcepath) if os.path.isdir(staged_sourcepath): if not self.build_system: # Try to guess the build system cmakelists = os.path.join(staged_sourcepath, 'CMakeLists.txt') configure_ac = os.path.join(staged_sourcepath, 'configure.ac') configure_in = os.path.join(staged_sourcepath, 'configure.in') if os.path.exists(cmakelists): self.build_system = 'CMake' self.build_system.builddir = 'rfm_build' elif (os.path.exists(configure_ac) or os.path.exists(configure_in)): self.build_system = 'Autotools' self.build_system.builddir = 'rfm_build' else: self.build_system = 'Make' self.build_system.srcdir = self.sourcepath else: if not self.build_system: self.build_system = 'SingleSource' self.build_system.srcfile = self.sourcepath self.build_system.executable = self.executable # Prepare build job build_commands = [ *self.prebuild_cmd, *self.build_system.emit_build_commands(self._current_environ), *self.postbuild_cmd ] environs = [ self._current_partition.local_env, self._current_environ, self._user_environ ] self._build_job = getscheduler('local')( name='rfm_%s_build' % self.name, launcher=getlauncher('local')(), workdir=self._stagedir) with os_ext.change_dir(self._stagedir): try: self._build_job.prepare(build_commands, environs, login=True, trap_errors=True) except OSError as e: raise PipelineError('failed to prepare build job') from e self._build_job.submit() def compile_wait(self): """Wait for compilation phase to finish. .. versionadded:: 2.13 """ self._build_job.wait() self.logger.debug('compilation finished') # FIXME: this check is not reliable for certain scheduler backends if self._build_job.exitcode != 0: raise BuildError(self._build_job.stdout, self._build_job.stderr) def run(self): """The run phase of the regression test pipeline. This call is non-blocking. It simply submits the job associated with this test and returns. """ if not self.current_system or not self._current_partition: raise PipelineError('no system or system partition is set') exec_cmd = [ self.job.launcher.run_command(self.job), self.executable, *self.executable_opts ] commands = [*self.pre_run, ' '.join(exec_cmd), *self.post_run] environs = [ self._current_partition.local_env, self._current_environ, self._user_environ ] with os_ext.change_dir(self._stagedir): try: self._job.prepare(commands, environs, login=True) except OSError as e: raise PipelineError('failed to prepare job') from e self._job.submit() msg = ('spawned job (%s=%s)' % ('pid' if self.is_local() else 'jobid', self._job.jobid)) self.logger.debug(msg) def poll(self): """Poll the test's state. :returns: :class:`True` if the associated job has finished, :class:`False` otherwise. If no job descriptor is yet associated with this test, :class:`True` is returned. :raises reframe.core.exceptions.ReframeError: In case of errors. """ if not self._job: return True return self._job.finished() def wait(self): """Wait for this test to finish. :raises reframe.core.exceptions.ReframeError: In case of errors. """ self._job.wait() self.logger.debug('spawned job finished') def sanity(self): self.check_sanity() def performance(self): try: self.check_performance() except PerformanceError: if self.strict_check: raise def check_sanity(self): """The sanity checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the sanity check fails. """ if self.sanity_patterns is None: raise SanityError('sanity_patterns not set') with os_ext.change_dir(self._stagedir): success = evaluate(self.sanity_patterns) if not success: raise SanityError() def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): # Check if default reference perf values are provided and # store all the variables tested in the performance check has_default = False variables = set() for key, ref in self.reference.items(): keyparts = key.split(self.reference.scope_separator) system = keyparts[0] varname = keyparts[-1] try: unit = ref[3] except IndexError: unit = None variables.add((varname, unit)) if system == '*': has_default = True break if not has_default: if not variables: # If empty, it means that self.reference was empty, so try # to infer their name from perf_patterns variables = {(name, None) for name in self.perf_patterns.keys()} for var in variables: name, unit = var ref_tuple = (0, None, None) if unit: ref_tuple += (unit, ) self.reference.update({'*': {name: ref_tuple}}) # We first evaluate and log all performance values and then we # check them against the reference. This way we always log them # even if the don't meet the reference. for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) if key not in self.reference: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) self._perfvalues[key] = (value, *self.reference[key]) self._perf_logger.log_performance(logging.INFO, tag, value, *self.reference[key]) for key, values in self._perfvalues.items(): val, ref, low_thres, high_thres, *_ = values tag = key.split(':')[-1] try: evaluate( assert_reference( val, ref, low_thres, high_thres, msg=('failed to meet reference: %s={0}, ' 'expected {1} (l={2}, u={3})' % tag))) except SanityError as e: raise PerformanceError(e) def _copy_job_files(self, job, dst): if job is None: return stdout = os.path.join(self._stagedir, job.stdout) stderr = os.path.join(self._stagedir, job.stderr) script = os.path.join(self._stagedir, job.script_filename) shutil.copy(stdout, dst) shutil.copy(stderr, dst) shutil.copy(script, dst) def _copy_to_outputdir(self): """Copy checks interesting files to the output directory.""" self.logger.debug('copying interesting files to output directory') self._copy_job_files(self._job, self.outputdir) self._copy_job_files(self._build_job, self.outputdir) # Copy files specified by the user for f in self.keep_files: f_orig = f if not os.path.isabs(f): f = os.path.join(self._stagedir, f) if os.path.isfile(f): shutil.copy(f, self.outputdir) elif os.path.isdir(f): shutil.copytree(f, os.path.join(self.outputdir, f_orig)) def cleanup(self, remove_files=False, unload_env=True): """The cleanup phase of the regression test pipeline. :arg remove_files: If :class:`True`, the stage directory associated with this test will be removed. :arg unload_env: If :class:`True`, the environment that was used to run this test will be unloaded. """ aliased = os.path.samefile(self._stagedir, self._outputdir) if aliased: self.logger.debug('skipping copy to output dir ' 'since they alias each other') else: self._copy_to_outputdir() if remove_files: self.logger.debug('removing stage directory') os_ext.rmtree(self._stagedir) if unload_env: self.logger.debug("unloading test's environment") self._user_environ.unload() self._current_environ.unload() self._current_partition.local_env.unload() # Dependency API def user_deps(self): return util.SequenceView(self._userdeps) def depends_on(self, target, how=DEPEND_BY_ENV, subdeps=None): if not isinstance(target, str): raise TypeError("target argument must be of type: `str'") if not isinstance(how, int): raise TypeError("how argument must be of type: `int'") if (subdeps is not None and not isinstance(subdeps, typ.Dict[str, typ.List[str]])): raise TypeError("subdeps argument must be of type " "`Dict[str, List[str]]' or `None'") self._userdeps.append((target, how, subdeps)) def getdep(self, target, environ): if self._case is None or self._case() is None: raise DependencyError('no test case is associated with this test') for d in self._case().deps: if d.check.name == target and d.environ.name == environ: return d.check raise DependencyError('could not resolve dependency to (%s, %s)' % (target, environ)) def __str__(self): return "%s(name='%s', prefix='%s')" % (type(self).__name__, self.name, self.prefix)
class Job(jsonext.JSONSerializable): '''A job descriptor. A job descriptor is created by the framework after the "setup" phase and is associated with the test. .. warning:: Users may not create a job descriptor directly. ''' num_tasks = fields.TypedField(int) num_tasks_per_node = fields.TypedField(int, type(None)) num_tasks_per_core = fields.TypedField(int, type(None)) num_tasks_per_socket = fields.TypedField(int, type(None)) num_cpus_per_task = fields.TypedField(int, type(None)) use_smt = fields.TypedField(bool, type(None)) time_limit = fields.TimerField(type(None)) #: Options to be passed to the backend job scheduler. #: #: :type: :class:`List[str]` #: :default: ``[]`` options = fields.TypedField(typ.List[str]) #: The (parallel) program launcher that will be used to launch the #: (parallel) executable of this job. #: #: Users are allowed to explicitly set the current job launcher, but this #: is only relevant in rare situations, such as when you want to wrap the #: current launcher command. For this specific scenario, you may have a #: look at the :class:`reframe.core.launchers.LauncherWrapper` class. #: #: The following example shows how you can replace the current partition's #: launcher for this test with the "local" launcher: #: #: .. code-block:: python #: #: from reframe.core.backends import getlauncher #: #: @rfm.run_after('setup') #: def set_launcher(self): #: self.job.launcher = getlauncher('local')() #: #: :type: :class:`reframe.core.launchers.JobLauncher` launcher = fields.TypedField(JobLauncher) # The sched_* arguments are exposed also to the frontend def __init__(self, name, workdir='.', script_filename=None, stdout=None, stderr=None, max_pending_time=None, sched_flex_alloc_nodes=None, sched_access=[], sched_exclusive_access=None, sched_options=None): # Mutable fields self.num_tasks = 1 self.num_tasks_per_node = None self.num_tasks_per_core = None self.num_tasks_per_socket = None self.num_cpus_per_task = None self.use_smt = None self.time_limit = None self.cli_options = list(sched_options) if sched_options else [] self.options = [] self._name = name self._workdir = workdir self._script_filename = script_filename or '%s.sh' % name self._stdout = stdout or '%s.out' % name self._stderr = stderr or '%s.err' % name self._max_pending_time = max_pending_time # Backend scheduler related information self._sched_flex_alloc_nodes = sched_flex_alloc_nodes self._sched_access = sched_access self._sched_exclusive_access = sched_exclusive_access # Live job information; to be filled during job's lifetime by the # scheduler self._jobid = None self._exitcode = None self._state = None self._nodelist = None self._submit_time = None self._completion_time = None # Job errors discovered while polling; if not None this will be raised # in finished() self._exception = None @classmethod def create(cls, scheduler, launcher, *args, **kwargs): ret = scheduler.make_job(*args, **kwargs) ret._scheduler = scheduler ret.launcher = launcher return ret @property def name(self): return self._name @property def workdir(self): return self._workdir @property def max_pending_time(self): return self._max_pending_time @property def script_filename(self): return self._script_filename @property def stdout(self): return self._stdout @property def stderr(self): return self._stderr @property def sched_flex_alloc_nodes(self): return self._sched_flex_alloc_nodes @property def sched_access(self): return self._sched_access @property def sched_exclusive_access(self): return self._sched_exclusive_access @property def completion_time(self): '''The completion time of this job as a floating point number expressed in seconds since the epoch, in UTC. This attribute is :class:`None` if the job hasn't been finished yet, or if ReFrame runtime hasn't perceived it yet. The accuracy of this timestamp depends on the backend scheduler. The ``slurm`` scheduler backend relies on job accounting and returns the actual termination time of the job. The rest of the backends report as completion time the moment when the framework realizes that the spawned job has finished. In this case, the accuracy depends on the execution policy used. If tests are executed with the serial execution policy, this is close to the real completion time, but if the asynchronous execution policy is used, it can differ significantly. :type: :class:`float` or :class:`None` ''' return self._completion_time @property def scheduler(self): return self._scheduler @property def exception(self): return self._exception @property def jobid(self): '''The ID of this job. .. versionadded:: 2.21 .. versionchanged:: 3.2 Job ID type is now a string. :type: :class:`str` or :class:`None` ''' return self._jobid @property def exitcode(self): '''The exit code of this job. This may or may not be set depending on the scheduler backend. .. versionadded:: 2.21 :type: :class:`int` or :class:`None` ''' return self._exitcode @property def state(self): '''The state of this job. The value of this field is scheduler-specific. .. versionadded:: 2.21 :type: :class`str` or :class:`None` ''' return self._state @property def nodelist(self): '''The list of node names assigned to this job. This attribute is :class:`None` if no nodes are assigned to the job yet. This attribute is set reliably only for the ``slurm`` backend, i.e., Slurm *with* accounting enabled. The ``squeue`` scheduler backend, i.e., Slurm *without* accounting, might not set this attribute for jobs that finish very quickly. For the ``local`` scheduler backend, this returns an one-element list containing the hostname of the current host. This attribute might be useful in a flexible regression test for determining the actual nodes that were assigned to the test. For more information on flexible node allocation, see the |--flex-alloc-nodes|_ command-line option This attribute is *not* supported by the ``pbs`` scheduler backend. .. versionadded:: 2.17 :type: :class:`List[str]` or :class:`None` ''' return self._nodelist @property def submit_time(self): '''The submission time of this job as a floating point number expressed in seconds since the epoch, in UTC. This attribute is :class:`None` if the job hasn't been submitted yet. This attribute is set right after the job is submitted and can vary significantly from the time the jobs starts running, depending on the scheduler. :type: :class:`float` or :class:`None` ''' return self._submit_time def prepare(self, commands, environs=None, prepare_cmds=None, **gen_opts): environs = environs or [] if self.num_tasks <= 0: getlogger().debug(f'[F] Flexible node allocation requested') num_tasks_per_node = self.num_tasks_per_node or 1 min_num_tasks = (-self.num_tasks if self.num_tasks else num_tasks_per_node) try: guessed_num_tasks = self.guess_num_tasks() except NotImplementedError as e: raise JobError('flexible node allocation is not supported by ' 'this scheduler backend') from e if guessed_num_tasks < min_num_tasks: raise JobError( 'could not satisfy the minimum task requirement: ' 'required %s, found %s' % (min_num_tasks, guessed_num_tasks) ) self.num_tasks = guessed_num_tasks getlogger().debug(f'[F] Setting num_tasks to {self.num_tasks}') with shell.generate_script(self.script_filename, **gen_opts) as builder: builder.write_prolog(self.scheduler.emit_preamble(self)) prepare_cmds = prepare_cmds or [] for c in prepare_cmds: builder.write_body(c) builder.write(runtime.emit_loadenv_commands(*environs)) for c in commands: builder.write_body(c) def guess_num_tasks(self): num_tasks_per_node = self.num_tasks_per_node or 1 if isinstance(self.sched_flex_alloc_nodes, int): if self.sched_flex_alloc_nodes <= 0: raise JobError('invalid number of flex_alloc_nodes: %s' % self.sched_flex_alloc_nodes) return self.sched_flex_alloc_nodes * num_tasks_per_node available_nodes = self.scheduler.allnodes() getlogger().debug( f'[F] Total available nodes: {len(available_nodes)}' ) # Try to guess the number of tasks now available_nodes = self.scheduler.filternodes(self, available_nodes) if self.sched_flex_alloc_nodes.casefold() != 'all': available_nodes = {n for n in available_nodes if n.in_state(self.sched_flex_alloc_nodes)} getlogger().debug( f'[F] Selecting nodes in state ' f'{self.sched_flex_alloc_nodes!r}: ' f'available nodes now: {len(available_nodes)}' ) return len(available_nodes) * num_tasks_per_node def submit(self): return self.scheduler.submit(self) def wait(self): if self.jobid is None: raise JobNotStartedError('cannot wait an unstarted job') self.scheduler.wait(self) self._completion_time = self._completion_time or time.time() def cancel(self): if self.jobid is None: raise JobNotStartedError('cannot cancel an unstarted job') return self.scheduler.cancel(self) def finished(self): if self.jobid is None: raise JobNotStartedError('cannot poll an unstarted job') done = self.scheduler.finished(self) if done: self._completion_time = self._completion_time or time.time() return done def __eq__(self, other): return type(self) == type(other) and self.jobid == other.jobid def __hash__(self): return hash(self.jobid)
class FieldTester: field = fields.TimerField('field') field_maybe_none = fields.TimerField('field_maybe_none', allow_none=True)
class RegressionTest: """Base class for regression tests. All regression tests must eventually inherit from this class. This class provides the implementation of the pipeline phases that the regression test goes through during its lifetime. :arg name: The name of the test. This is the only argument that the users may specify freely. :arg prefix: The directory prefix of the test. You should initialize this to the directory containing the file that defines the regression test. You can achieve this by always passing ``os.path.dirname(__file__)``. :arg system: The system that this regression test will run on. The framework takes care of initializing and passing correctly this argument. :arg resources: An object managing the framework's resources. The framework takes care of initializing and passing correctly this argument. Concrete regression test subclasses should call the base constructor as follows: :: class MyTest(RegressionTest): def __init__(self, my_test_args, **kwargs): super().__init__('mytest', os.path.dirname(__file__), **kwargs) """ #: The name of the test. #: #: :type: Alphanumeric string. name = fields.AlphanumericField('name') #: List of programming environments supported by this test. #: #: :type: :class:`list[str]` #: :default: ``[]`` #: #: .. note:: #: .. versionchanged:: 2.12 #: Programming environments can now be specified using wildcards. valid_prog_environs = fields.TypedListField('valid_prog_environs', str) #: List of systems supported by this test. #: The general syntax for systems is ``<sysname>[:<partname]``. #: #: :type: :class:`list[str]` #: :default: ``[]`` valid_systems = fields.TypedListField('valid_systems', str) #: A detailed description of the test. #: #: :type: :class:`str` #: :default: ``self.name`` descr = fields.StringField('descr') #: The path to the source file or source directory of the test. #: #: It must be a path relative to the :attr:`sourcesdir`, pointing to a #: subfolder or a file contained in :attr:`sourcesdir`. This applies also #: in the case where :attr:`sourcesdir` is a Git repository. #: #: If it refers to a regular file, this file will be compiled (its language #: will be automatically recognized). #: If it refers to a directory, ``make`` will be invoked in that directory. #: #: :type: :class:`str` #: :default: ``''`` sourcepath = fields.StringField('sourcepath') #: The directory containing the test's resources. #: #: This directory may be specified with an absolute path or with a path #: relative to the location of the test. Its contents will always be copied #: to the stage directory of the test. #: #: This attribute may also accept a URL, in which case ReFrame will treat it #: as a Git repository and will try to clone its contents in the stage #: directory of the test. #: #: If set to :class:`None`, the test has no resources an no action is taken. #: #: :type: :class:`str` or :class:`None` #: :default: ``'src'`` #: #: .. note:: #: .. versionchanged:: 2.9 #: Allow :class:`None` values to be set also in regression tests #: with a compilation phase #: #: .. versionchanged:: 2.10 #: Support for Git repositories was added. sourcesdir = fields.StringField('sourcesdir', allow_none=True) #: List of shell commands to be executed before compiling. #: #: These commands are executed during the compilation phase and from #: inside the stage directory. **Each entry in the list spawns a new shell.** #: #: :type: :class:`list[str]` #: :default: ``[]`` prebuild_cmd = fields.TypedListField('prebuild_cmd', str) #: List of shell commands to be executed after a successful compilation. #: #: These commands are executed during the compilation phase and from inside #: the stage directory. **Each entry in the list spawns a new shell.** #: #: :type: :class:`list[str]` #: :default: ``[]`` postbuild_cmd = fields.TypedListField('postbuild_cmd', str) #: The name of the executable to be launched during the run phase. #: #: :type: :class:`str` #: :default: ``os.path.join('.', self.name)`` executable = fields.StringField('executable') #: List of options to be passed to the :attr:`executable`. #: #: :type: :class:`list[str]` #: :default: ``[]`` executable_opts = fields.TypedListField('executable_opts', str) #: List of shell commands to execute before launching this job. #: #: These commands do not execute in the context of ReFrame. #: Instead, they are emitted in the generated job script just before the #: actual job launch command. #: #: :type: :class:`list` of :class:`str` #: :default: ``[]`` #: #: .. note:: #: .. versionadded:: 2.10 pre_run = fields.TypedListField('pre_run', str) #: List of shell commands to execute after launching this job. #: #: See :attr:`pre_run` for a more detailed description of the semantics. #: #: :type: :class:`list` of :class:`str` #: :default: ``[]`` #: #: .. note:: #: .. versionadded:: 2.10 post_run = fields.TypedListField('post_run', str) #: List of files to be kept after the test finishes. #: #: By default, the framework saves the standard output, the standard error #: and the generated shell script that was used to run this test. #: #: These files will be copied over to the framework’s output directory #: during the :func:`cleanup` phase. #: #: Directories are also accepted in this field. #: #: Relative path names are resolved against the stage directory. #: #: :type: :class:`list[str]` #: :default: ``[]`` keep_files = fields.TypedListField('keep_files', str) #: List of files or directories (relative to the :attr:`sourcesdir`) that #: will be symlinked in the stage directory and not copied. #: #: You can use this variable to avoid copying very large files to the stage #: directory. #: #: :type: :class:`list[str]` #: :default: ``[]`` readonly_files = fields.TypedListField('readonly_files', str) #: Set of tags associated with this test. #: #: This test can be selected from the frontend using any of these tags. #: #: :type: :class:`set[str]` #: :default: an empty set tags = fields.TypedSetField('tags', str) #: List of people responsible for this test. #: #: When the test fails, this contact list will be printed out. #: #: :type: :class:`list[str]` #: :default: ``[]`` maintainers = fields.TypedListField('maintainers', str) #: Mark this test as a strict performance test. #: #: If a test is marked as non-strict, the performance checking phase will #: always succeed, unless the ``--strict`` command-line option is passed #: when invoking ReFrame. #: #: :type: boolean #: :default: :class:`True` strict_check = fields.BooleanField('strict_check') #: Number of tasks required by this test. #: #: If the number of tasks is set to ``0``, ReFrame will try to use all #: the available nodes of a reservation. A reservation *must* be specified #: through the `--reservation` command-line option, otherwise the #: regression test will fail during submission. ReFrame will try to run the #: test on all the nodes of the reservation that satisfy the selection #: criteria of the current #: `virtual partition <configure.html#partition-configuration>`__ #: (i.e., constraints and/or partitions). #: #: :type: integral #: :default: ``1`` #: #: .. note:: #: .. versionchanged:: 2.9 #: Added support for running the test using all the nodes of the #: specified reservation if the number of tasks is set to ``0``. num_tasks = fields.IntegerField('num_tasks') #: Number of tasks per node required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_node = fields.IntegerField('num_tasks_per_node', allow_none=True) #: Number of GPUs per node required by this test. #: #: :type: integral #: :default: ``0`` num_gpus_per_node = fields.IntegerField('num_gpus_per_node') #: Number of CPUs per task required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_cpus_per_task = fields.IntegerField('num_cpus_per_task', allow_none=True) #: Number of tasks per core required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_core = fields.IntegerField('num_tasks_per_core', allow_none=True) #: Number of tasks per socket required by this test. #: #: Ignored if :class:`None`. #: #: :type: integral or :class:`None` #: :default: :class:`None` num_tasks_per_socket = fields.IntegerField('num_tasks_per_socket', allow_none=True) #: Specify whether this tests needs simultaneous multithreading enabled. #: #: Ignored if :class:`None`. #: #: :type: boolean or :class:`None` #: :default: :class:`None` use_multithreading = fields.BooleanField('use_multithreading', allow_none=True) #: Specify whether this test needs exclusive access to nodes. #: #: :type: boolean #: :default: :class:`False` exclusive_access = fields.BooleanField('exclusive_access') #: Always execute this test locally. #: #: :type: boolean #: :default: :class:`False` local = fields.BooleanField('local') #: The set of reference values for this test. #: #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage #: examples. #: #: :type: A scoped dictionary with system names as scopes or :class:`None` #: :default: ``{}`` reference = fields.ScopedDictField('reference', (tuple, object)) # FIXME: There is not way currently to express tuples of `float`s or # `None`s, so we just use the very generic `object` #: #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage #: examples. #: #: If set to :class:`None`, a sanity error will be raised during sanity #: checking. #: #: :type: A deferrable expression (i.e., the result of a :doc:`sanity #: function </sanity_functions_reference>`) or :class:`None` #: :default: :class:`None` #: #: .. note:: #: .. versionchanged:: 2.9 #: The default behaviour has changed and it is now considered a #: sanity failure if this attribute is set to :class:`None`. #: #: If a test doesn't care about its output, this must be stated #: explicitly as follows: #: #: :: #: #: self.sanity_patterns = sn.assert_found(r'.*', self.stdout) #: sanity_patterns = fields.TypedField('sanity_patterns', _DeferredExpression, allow_none=True) #: Patterns for verifying the performance of this test. #: #: Refer to the :doc:`ReFrame Tutorial </tutorial>` for concrete usage #: examples. #: #: If set to :class:`None`, no performance checking will be performed. #: #: :type: A dictionary with keys of type :class:`str` and deferrable #: expressions (i.e., the result of a :doc:`sanity function #: </sanity_functions_reference>`) as values. #: :class:`None` is also allowed. #: :default: :class:`None` perf_patterns = fields.TypedDictField('perf_patterns', str, _DeferredExpression, allow_none=True) #: List of modules to be loaded before running this test. #: #: These modules will be loaded during the :func:`setup` phase. #: #: :type: :class:`list[str]` #: :default: ``[]`` modules = fields.TypedListField('modules', str) #: Environment variables to be set before running this test. #: #: These variables will be set during the :func:`setup` phase. #: #: :type: :class:`dict[str, str]` #: :default: ``{}`` variables = fields.TypedDictField('variables', str, str) #: Time limit for this test. #: #: Time limit is specified as a three-tuple in the form ``(hh, mm, ss)``, #: with ``hh >= 0``, ``0 <= mm <= 59`` and ``0 <= ss <= 59``. #: #: :type: :class:`tuple[int]` #: :default: ``(0, 10, 0)`` time_limit = fields.TimerField('time_limit') #: Extra resources for this test. #: #: This field is for specifying custom resources needed by this test. #: These resources are defined in the :doc:`configuration </configure>` #: of a system partition. #: For example, assume that two additional resources, named ``gpu`` and #: ``datawarp``, are defined in the configuration file as follows: #: #: :: #: #: 'resources': { #: 'gpu': [ #: '--gres=gpu:{num_gpus_per_node}' #: ], #: 'datawarp': [ #: '#DW jobdw capacity={capacity}', #: '#DW stage_in source={stagein_src}' #: ] #: } #: #: A regression test then may instantiate the above resources by setting the #: :attr:`extra_resources` attribute as follows: #: #: :: #: #: self.extra_resources = { #: 'gpu': {'num_gpus_per_node': 2} #: 'datawarp': { #: 'capacity': '100GB', #: 'stagein_src': '/foo' #: } #: } #: #: The generated batch script (for Slurm) will then contain the following #: lines: #: #: :: #: #: #SBATCH --gres=gpu:2 #: #DW jobdw capacity=100GB #: #DW stage_in source=/foo #: #: Notice that if the resource specified in the configuration uses an #: alternative directive prefix (in this case ``#DW``), this will replace #: the standard prefix of the backend scheduler (in this case ``#SBATCH``) #: #: If the resource name specified in this variable does not match a resource #: name in the partition configuration, it will be simply ignored. #: The :attr:`num_gpus_per_node` attribute translates internally to the #: ``_rfm_gpu`` resource, so that setting #: ``self.num_gpus_per_node = 2`` is equivalent to the following: #: #: :: #: #: self.extra_resources = {'_rfm_gpu': {'num_gpus_per_node': 2}} #: #: :type: :class:`dict[str, dict[str, object]]` #: :default: ``{}`` #: #: .. note:: #: .. versionadded:: 2.8 #: .. versionchanged:: 2.9 #: #: A new more powerful syntax was introduced #: that allows also custom job script directive prefixes. #: extra_resources = fields.AggregateTypeField('extra_resources', (dict, (str, (dict, (str, object))))) # Private properties _prefix = fields.StringField('_prefix') _stagedir = fields.StringField('_stagedir', allow_none=True) _stdout = fields.StringField('_stdout', allow_none=True) _stderr = fields.StringField('_stderr', allow_none=True) _perf_logfile = fields.StringField('_perf_logfile', allow_none=True) _current_system = fields.TypedField('_current_system', System) _current_partition = fields.TypedField('_current_partition', SystemPartition, allow_none=True) _current_environ = fields.TypedField('_current_environ', Environment, allow_none=True) _job = fields.TypedField('_job', Job, allow_none=True) def __init__(self, name, prefix, system, resources): self.name = name self.descr = name self.valid_prog_environs = [] self.valid_systems = [] self.sourcepath = '' self.prebuild_cmd = [] self.postbuild_cmd = [] self.executable = os.path.join('.', self.name) self.executable_opts = [] self.pre_run = [] self.post_run = [] self.keep_files = [] self.readonly_files = [] self.tags = set() self.maintainers = [] # Strict performance check, if applicable self.strict_check = True # Default is a single node check self.num_tasks = 1 self.num_tasks_per_node = None self.num_gpus_per_node = 0 self.num_cpus_per_task = None self.num_tasks_per_core = None self.num_tasks_per_socket = None self.use_multithreading = None self.exclusive_access = False # True only if check is to be run locally self.local = False # Static directories of the regression check self._prefix = os.path.abspath(prefix) self.sourcesdir = 'src' # Output patterns self.sanity_patterns = None # Performance patterns: None -> no performance checking self.perf_patterns = None self.reference = {} # Environment setup self.modules = [] self.variables = {} # Time limit for the check self.time_limit = (0, 10, 0) # Runtime information of the test self._current_system = system self._current_partition = None self._current_environ = None # Associated job self._job = None self.extra_resources = {} # Dynamic paths of the regression check; will be set in setup() self._resources_mgr = resources self._stagedir = None self._stdout = None self._stderr = None # Compilation task output self._compile_task = None # Performance logging self._perf_logger = logging.null_logger self._perf_logfile = None # Export read-only views to interesting fields @property def current_environ(self): """The programming environment that the regression test is currently executing with. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.environments.Environment`. """ return self._current_environ @property def current_partition(self): """The system partition the regression test is currently executing on. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.systems.SystemPartition`. """ return self._current_partition @property def current_system(self): """The system the regression test is currently executing on. This is set by the framework during the initialization phase. :type: :class:`reframe.core.systems.System`. """ return self._current_system @property def job(self): """The job descriptor associated with this test. This is set by the framework during the :func:`setup` phase. :type: :class:`reframe.core.schedulers.Job`. """ return self._job @property def logger(self): """A logger associated with the this test. You can use this logger to log information for your test. """ return logging.getlogger() @property def prefix(self): """The prefix directory of the test. :type: :class:`str`. """ return self._prefix @property def stagedir(self): """The stage directory of the test. This is set during the :func:`setup` phase. :type: :class:`str`. """ return self._stagedir @property @deferrable def stdout(self): """The name of the file containing the standard output of the test. This is set during the :func:`setup` phase. This attribute is evaluated lazily, so it can by used inside sanity expressions. :type: :class:`str`. """ return self._stdout @property @deferrable def stderr(self): """The name of the file containing the standard error of the test. This is set during the :func:`setup` phase. This attribute is evaluated lazily, so it can by used inside sanity expressions. :type: :class:`str`. """ return self._stderr def __repr__(self): return debug.repr(self) def info(self): """Provide live information of a running test. This method is used by the front-end to print the status message during the test's execution. This function is also called to provide the message for the ``check_info`` `logging attribute <running.html#logging>`__. By default, it returns a message reporting the test name, the current partition and the current programming environment that the test is currently executing on. :returns: a string with an informational message about this test .. note :: When overriding this method, you should pay extra attention on how you use the :class:`RegressionTest`'s attributes, because this method may be called at any point of the test's lifetime. .. versionadded:: 2.10 """ ret = self.name if self.current_partition: ret += ' on %s' % self.current_partition.fullname if self.current_environ: ret += ' using %s' % self.current_environ.name return ret def supports_system(self, partition_name): if '*' in self.valid_systems: return True if self._current_system.name in self.valid_systems: return True # Check if this is a relative name if partition_name.find(':') == -1: partition_name = '%s:%s' % (self._current_system.name, partition_name) return partition_name in self.valid_systems def supports_environ(self, env_name): for env in self.valid_prog_environs: if fnmatch.fnmatch(env_name, env): return True return False def is_local(self): """Check if the test will execute locally. A test executes locally if the :attr:`local` attribute is set or if the current partition's scheduler does not support job submission. """ if self._current_partition is None: return self.local return self.local or self._current_partition.scheduler.is_local def _sanitize_basename(self, name): """Create a basename safe to be used as path component Replace all path separator characters in `name` with underscores.""" return name.replace(os.sep, '_') def _setup_environ(self, environ): """Setup the current environment and load it.""" self._current_environ = environ # Add user modules and variables to the environment for m in self.modules: self._current_environ.add_module(m) for k, v in self.variables.items(): self._current_environ.set_variable(k, v) # First load the local environment of the partition self.logger.debug('loading environment for the current partition') self._current_partition.local_env.load() self.logger.debug("loading test's environment") self._current_environ.load() def _setup_paths(self): """Setup the check's dynamic paths.""" self.logger.debug('setting up paths') try: self._stagedir = self._resources_mgr.stagedir( self._sanitize_basename(self._current_partition.name), self.name, self._sanitize_basename(self._current_environ.name)) self.outputdir = self._resources_mgr.outputdir( self._sanitize_basename(self._current_partition.name), self.name, self._sanitize_basename(self._current_environ.name)) except OSError as e: raise PipelineError('failed to set up paths') from e self._stdout = os.path.join(self._stagedir, '%s.out' % self.name) self._stderr = os.path.join(self._stagedir, '%s.err' % self.name) def _setup_job(self, **job_opts): """Setup the job related to this check.""" self.logger.debug('setting up the job descriptor') msg = 'job scheduler backend: {0}' self.logger.debug( msg.format('local' if self.is_local else self._current_partition. scheduler.registered_name)) # num_gpus_per_node is a managed resource if self.num_gpus_per_node > 0: self.extra_resources.setdefault( '_rfm_gpu', {'num_gpus_per_node': self.num_gpus_per_node}) if self.local: scheduler_type = getscheduler('local') launcher_type = getlauncher('local') else: scheduler_type = self._current_partition.scheduler launcher_type = self._current_partition.launcher job_name = '%s_%s_%s_%s' % ( self.name, self._sanitize_basename(self._current_system.name), self._sanitize_basename(self._current_partition.name), self._sanitize_basename(self._current_environ.name)) job_script_filename = os.path.join(self._stagedir, job_name + '.sh') self._job = scheduler_type( name=job_name, command=' '.join([self.executable] + self.executable_opts), launcher=launcher_type(), environs=[ self._current_partition.local_env, self._current_environ ], workdir=self._stagedir, num_tasks=self.num_tasks, num_tasks_per_node=self.num_tasks_per_node, num_tasks_per_core=self.num_tasks_per_core, num_tasks_per_socket=self.num_tasks_per_socket, num_cpus_per_task=self.num_cpus_per_task, use_smt=self.use_multithreading, time_limit=self.time_limit, script_filename=job_script_filename, stdout=self._stdout, stderr=self._stderr, pre_run=self.pre_run, post_run=self.post_run, sched_exclusive_access=self.exclusive_access, **job_opts) # Get job options from managed resources and prepend them to # job_opts. We want any user supplied options to be able to # override those set by the framework. resources_opts = [] for r, v in self.extra_resources.items(): resources_opts.extend(self._current_partition.get_resource(r, **v)) self._job.options = (self._current_partition.access + resources_opts + self._job.options) # FIXME: This is a temporary solution to address issue #157 def _setup_perf_logging(self): self.logger.debug('setting up performance logging') self._perf_logfile = os.path.join( self._resources_mgr.logdir(self._current_partition.name), self.name + '.log') perf_logging_config = { 'level': 'INFO', 'handlers': { self._perf_logfile: { 'level': 'DEBUG', 'format': '[%(asctime)s] reframe %(version)s: ' '%(check_info)s ' '(jobid=%(check_jobid)s): %(message)s', 'append': True, } } } self._perf_logger = logging.LoggerAdapter( logger=logging.load_from_dict(perf_logging_config), check=self) def setup(self, partition, environ, **job_opts): """The setup phase of the regression test pipeline. :arg partition: The system partition to set up this test for. :arg environ: The environment to set up this test for. :arg job_opts: Options to be passed through to the backend scheduler. When overriding this method users should always pass through ``job_opts`` to the base class method. :raises reframe.core.exceptions.ReframeError: In case of errors. """ self._current_partition = partition self._setup_environ(environ) self._setup_paths() self._setup_job(**job_opts) if self.perf_patterns is not None: self._setup_perf_logging() def _copy_to_stagedir(self, path): self.logger.debug('copying %s to stage directory (%s)' % (path, self._stagedir)) self.logger.debug('symlinking files: %s' % self.readonly_files) try: os_ext.copytree_virtual(path, self._stagedir, self.readonly_files) except (OSError, ValueError, TypeError) as e: raise PipelineError('virtual copying of files failed') from e def _clone_to_stagedir(self, url): self.logger.debug('cloning URL %s to stage directory (%s)' % (url, self._stagedir)) os_ext.git_clone(self.sourcesdir, self._stagedir) def prebuild(self): for cmd in self.prebuild_cmd: self.logger.debug('executing prebuild commands') os_ext.run_command(cmd, check=True, shell=True) def postbuild(self): for cmd in self.postbuild_cmd: self.logger.debug('executing postbuild commands') os_ext.run_command(cmd, check=True, shell=True) def compile(self, **compile_opts): """The compilation phase of the regression test pipeline. :arg compile_opts: Extra options to be passed to the programming environment for compiling the source code of the test. :raises reframe.core.exceptions.ReframeError: In case of errors. """ if not self._current_environ: raise PipelineError('no programming environment set') # Copy the check's resources to the stage directory if self.sourcesdir: try: commonpath = os.path.commonpath( [self.sourcesdir, self.sourcepath]) except ValueError: commonpath = None if commonpath: self.logger.warn( "sourcepath (`%s') seems to be a subdirectory of " "sourcesdir (`%s'), but it will be interpreted " "as relative to it." % (self.sourcepath, self.sourcesdir)) if os_ext.is_url(self.sourcesdir): self._clone_to_stagedir(self.sourcesdir) else: self._copy_to_stagedir( os.path.join(self._prefix, self.sourcesdir)) # Verify the sourcepath and determine the sourcepath in the stagedir if (os.path.isabs(self.sourcepath) or os.path.normpath(self.sourcepath).startswith('..')): raise PipelineError( 'self.sourcepath is an absolute path or does not point to a ' 'subfolder or a file contained in self.sourcesdir: ' + self.sourcepath) staged_sourcepath = os.path.join(self._stagedir, self.sourcepath) self.logger.debug('Staged sourcepath: %s' % staged_sourcepath) # Remove source and executable from compile_opts compile_opts.pop('source', None) compile_opts.pop('executable', None) # Change working dir to stagedir although absolute paths are used # everywhere in the compilation process. This is done to ensure that # any other files (besides the executable) generated during the the # compilation will remain in the stage directory with os_ext.change_dir(self._stagedir): self.prebuild() if os.path.isdir(staged_sourcepath): includedir = staged_sourcepath else: includedir = os.path.dirname(staged_sourcepath) self._current_environ.include_search_path.append(includedir) self._compile_task = self._current_environ.compile( sourcepath=staged_sourcepath, executable=os.path.join(self._stagedir, self.executable), **compile_opts) self.logger.debug('compilation stdout:\n%s' % self._compile_task.stdout) self.logger.debug('compilation stderr:\n%s' % self._compile_task.stderr) self.postbuild() self.logger.debug('compilation finished') def run(self): """The run phase of the regression test pipeline. This call is non-blocking. It simply submits the job associated with this test and returns. """ if not self._current_system or not self._current_partition: raise PipelineError('no system or system partition is set') with os_ext.change_dir(self._stagedir): try: self._job.prepare(BashScriptBuilder(login=True)) except OSError as e: raise PipelineError('failed to prepare job') from e self._job.submit() msg = ('spawned job (%s=%s)' % ('pid' if self.is_local() else 'jobid', self._job.jobid)) self.logger.debug(msg) def poll(self): """Poll the test's state. :returns: :class:`True` if the associated job has finished, :class:`False` otherwise. If no job descriptor is yet associated with this test, :class:`True` is returned. :raises reframe.core.exceptions.ReframeError: In case of errors. """ if not self._job: return True return self._job.finished() def wait(self): """Wait for this test to finish. :raises reframe.core.exceptions.ReframeError: In case of errors. """ self._job.wait() self.logger.debug('spawned job finished') def sanity(self): self.check_sanity() def performance(self): try: self.check_performance() except SanityError: if self.strict_check: raise def check_sanity(self): """The sanity checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the sanity check fails. """ if self.sanity_patterns is None: raise SanityError('sanity_patterns not set') with os_ext.change_dir(self._stagedir): success = evaluate(self.sanity_patterns) if not success: raise SanityError('sanity failure') def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) try: ref, low_thres, high_thres = self.reference[key] self._perf_logger.info('value: %s, reference: %s' % (value, self.reference[key])) except KeyError: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) evaluate(assert_reference(value, ref, low_thres, high_thres)) def _copy_to_outputdir(self): """Copy checks interesting files to the output directory.""" self.logger.debug('copying interesting files to output directory') shutil.copy(self._stdout, self.outputdir) shutil.copy(self._stderr, self.outputdir) if self._job: shutil.copy(self._job.script_filename, self.outputdir) # Copy files specified by the user for f in self.keep_files: if not os.path.isabs(f): f = os.path.join(self._stagedir, f) shutil.copy(f, self.outputdir) def cleanup(self, remove_files=False, unload_env=True): """The cleanup phase of the regression test pipeline. :arg remove_files: If :class:`True`, the stage directory associated with this test will be removed. :arg unload_env: If :class:`True`, the environment that was used to run this test will be unloaded. """ aliased = os.path.samefile(self._stagedir, self.outputdir) if aliased: self.logger.debug('skipping copy to output dir ' 'since they alias each other') else: self._copy_to_outputdir() if remove_files: self.logger.debug('removing stage directory') shutil.rmtree(self._stagedir) if unload_env: self.logger.debug("unloading test's environment") self._current_environ.unload() self._current_partition.local_env.unload() def __str__(self): return ('%s (%s)\n' ' tags: [%s], maintainers: [%s]' % (self.name, self.descr, ', '.join(self.tags), ', '.join( self.maintainers)))
class Job(abc.ABC): '''A job descriptor. .. caution:: This is an abstract class. Users may not create jobs directly. ''' num_tasks = fields.TypedField('num_tasks', int) num_tasks_per_node = fields.TypedField('num_tasks_per_node', int, type(None)) num_tasks_per_core = fields.TypedField('num_tasks_per_core', int, type(None)) num_tasks_per_socket = fields.TypedField('num_tasks_per_socket', int, type(None)) num_cpus_per_tasks = fields.TypedField('num_cpus_per_task', int, type(None)) use_smt = fields.TypedField('use_smt', bool, type(None)) time_limit = fields.TimerField('time_limit', type(None)) #: Options to be passed to the backend job scheduler. #: #: :type: :class:`List[str]` #: :default: ``[]`` options = fields.TypedField('options', typ.List[str]) #: The parallel program launcher that will be used to launch the parallel #: executable of this job. #: #: :type: :class:`reframe.core.launchers.JobLauncher` launcher = fields.TypedField('launcher', JobLauncher) _jobid = fields.TypedField('_jobid', int, type(None)) _exitcode = fields.TypedField('_exitcode', int, type(None)) _state = fields.TypedField('_state', str, type(None)) # The sched_* arguments are exposed also to the frontend def __init__(self, name, launcher, workdir='.', num_tasks=1, num_tasks_per_node=None, num_tasks_per_core=None, num_tasks_per_socket=None, num_cpus_per_task=None, use_smt=None, time_limit=None, script_filename=None, stdout=None, stderr=None, pre_run=[], post_run=[], sched_flex_alloc_tasks=None, sched_access=[], sched_account=None, sched_partition=None, sched_reservation=None, sched_nodelist=None, sched_exclude_nodelist=None, sched_exclusive_access=None, sched_options=[]): # Mutable fields self.num_tasks = num_tasks self.num_tasks_per_node = num_tasks_per_node self.num_tasks_per_core = num_tasks_per_core self.num_tasks_per_socket = num_tasks_per_socket self.num_cpus_per_task = num_cpus_per_task self.use_smt = use_smt self.time_limit = time_limit self.options = list(sched_options) self.launcher = launcher self._name = name self._workdir = workdir self._script_filename = script_filename or '%s.sh' % name self._stdout = stdout or '%s.out' % name self._stderr = stderr or '%s.err' % name self._nodelist = None # Backend scheduler related information self._sched_flex_alloc_tasks = sched_flex_alloc_tasks self._sched_access = sched_access self._sched_nodelist = sched_nodelist self._sched_exclude_nodelist = sched_exclude_nodelist self._sched_partition = sched_partition self._sched_reservation = sched_reservation self._sched_account = sched_account self._sched_exclusive_access = sched_exclusive_access # Live job information; to be filled during job's lifetime by the # scheduler self._jobid = None self._exitcode = None self._state = None def __repr__(self): return debug.repr(self) # Read-only properties @property def exitcode(self): return self._exitcode @property def jobid(self): return self._jobid @property def state(self): return self._state @property def name(self): return self._name @property def workdir(self): return self._workdir @property def script_filename(self): return self._script_filename @property def stdout(self): return self._stdout @property def stderr(self): return self._stderr @property def sched_flex_alloc_tasks(self): return self._sched_flex_alloc_tasks @property def sched_access(self): return self._sched_access @property def sched_nodelist(self): return self._sched_nodelist @property def sched_exclude_nodelist(self): return self._sched_exclude_nodelist @property def sched_partition(self): return self._sched_partition @property def sched_reservation(self): return self._sched_reservation @property def sched_account(self): return self._sched_account @property def sched_exclusive_access(self): return self._sched_exclusive_access def prepare(self, commands, environs=None, **gen_opts): environs = environs or [] if self.num_tasks <= 0: num_tasks_per_node = self.num_tasks_per_node or 1 min_num_tasks = (-self.num_tasks if self.num_tasks else num_tasks_per_node) try: guessed_num_tasks = self.guess_num_tasks() except NotImplementedError as e: raise JobError('flexible task allocation is not supported by ' 'this backend') from e if guessed_num_tasks < min_num_tasks: nodes_required = min_num_tasks // num_tasks_per_node nodes_found = guessed_num_tasks // num_tasks_per_node raise JobError('could not find enough nodes: ' 'required %s, found %s' % (nodes_required, nodes_found)) self.num_tasks = guessed_num_tasks getlogger().debug('flex_alloc_tasks: setting num_tasks to %s' % self.num_tasks) with shell.generate_script(self.script_filename, **gen_opts) as builder: builder.write_prolog(self.emit_preamble()) builder.write(env.emit_load_commands(*environs)) for c in commands: builder.write_body(c) @abc.abstractmethod def emit_preamble(self): pass def guess_num_tasks(self): if isinstance(self.sched_flex_alloc_tasks, int): if self.sched_flex_alloc_tasks <= 0: raise JobError('invalid number of flex_alloc_tasks: %s' % self.sched_flex_alloc_tasks) return self.sched_flex_alloc_tasks available_nodes = self.get_all_nodes() getlogger().debug('flex_alloc_tasks: total available nodes %s ' % len(available_nodes)) # Try to guess the number of tasks now available_nodes = self.filter_nodes(available_nodes, self.sched_access + self.options) if self.sched_flex_alloc_tasks == 'idle': available_nodes = {n for n in available_nodes if n.is_available()} getlogger().debug('flex_alloc_tasks: selecting idle nodes: ' 'available nodes now: %s' % len(available_nodes)) num_tasks_per_node = self.num_tasks_per_node or 1 num_tasks = len(available_nodes) * num_tasks_per_node return num_tasks @abc.abstractmethod def get_all_nodes(self): # Gets all the available nodes pass @abc.abstractmethod def filter_nodes(self, nodes, options): # Filter nodes according to the scheduler options pass @abc.abstractmethod def submit(self): pass @abc.abstractmethod def wait(self): if self._jobid is None: raise JobNotStartedError('cannot wait an unstarted job') @abc.abstractmethod def cancel(self): if self._jobid is None: raise JobNotStartedError('cannot cancel an unstarted job') @abc.abstractmethod def finished(self): if self._jobid is None: raise JobNotStartedError('cannot poll an unstarted job') @property def nodelist(self): '''The list of node names assigned to this job. This attribute is :class:`None` if no nodes are assigned to the job yet. This attribute is set reliably only for the ``slurm`` backend, i.e., Slurm *with* accounting enabled. The ``squeue`` scheduler backend, i.e., Slurm *without* accounting, might not set this attribute for jobs that finish very quickly. For the ``local`` scheduler backend, this returns an one-element list containing the hostname of the current host. This attribute might be useful in a flexible regression test for determining the actual nodes that were assigned to the test. For more information on flexible task allocation, please refer to the corresponding `section <advanced.html#flexible-regression-tests>`__ of the tutorial. This attribute is *not* supported by the ``pbs`` scheduler backend. .. versionadded:: 2.17 ''' return self._nodelist